@livekit/agents 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/job.cjs +2 -2
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.ts +6 -1
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +2 -2
- package/dist/job.js.map +1 -1
- package/dist/tokenize/basic/basic.cjs +1 -1
- package/dist/tokenize/basic/basic.cjs.map +1 -1
- package/dist/tokenize/basic/basic.d.ts +1 -1
- package/dist/tokenize/basic/basic.d.ts.map +1 -1
- package/dist/tokenize/basic/basic.js +1 -1
- package/dist/tokenize/basic/basic.js.map +1 -1
- package/dist/tokenize/basic/sentence.cjs +14 -8
- package/dist/tokenize/basic/sentence.cjs.map +1 -1
- package/dist/tokenize/basic/sentence.d.ts.map +1 -1
- package/dist/tokenize/basic/sentence.js +14 -8
- package/dist/tokenize/basic/sentence.js.map +1 -1
- package/dist/tokenize/tokenizer.test.cjs +220 -0
- package/dist/tokenize/tokenizer.test.cjs.map +1 -0
- package/dist/tokenize/tokenizer.test.d.ts +2 -0
- package/dist/tokenize/tokenizer.test.d.ts.map +1 -0
- package/dist/tokenize/tokenizer.test.js +219 -0
- package/dist/tokenize/tokenizer.test.js.map +1 -0
- package/dist/worker.cjs +2 -1
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +2 -1
- package/dist/worker.js.map +1 -1
- package/package.json +1 -1
- package/src/job.ts +3 -2
- package/src/tokenize/basic/basic.ts +1 -1
- package/src/tokenize/basic/sentence.ts +14 -8
- package/src/tokenize/tokenizer.test.ts +255 -0
- package/src/worker.ts +1 -0
package/dist/job.cjs
CHANGED
|
@@ -214,9 +214,9 @@ class JobRequest {
|
|
|
214
214
|
await this.#onReject();
|
|
215
215
|
}
|
|
216
216
|
/** Accepts the job, launching it on an idle child process. */
|
|
217
|
-
async accept(name = "", identity = "", metadata = "") {
|
|
217
|
+
async accept(name = "", identity = "", metadata = "", attributes) {
|
|
218
218
|
if (identity === "") identity = "agent-" + this.id;
|
|
219
|
-
this.#onAccept({ name, identity, metadata });
|
|
219
|
+
this.#onAccept({ name, identity, metadata, attributes });
|
|
220
220
|
}
|
|
221
221
|
}
|
|
222
222
|
// Annotate the CommonJS export names for ESM import in node:
|
package/dist/job.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/job.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type * as proto from '@livekit/protocol';\nimport type {\n E2EEOptions,\n LocalParticipant,\n RemoteParticipant,\n Room,\n RtcConfiguration,\n} from '@livekit/rtc-node';\nimport { ParticipantKind, RoomEvent, TrackKind } from '@livekit/rtc-node';\nimport type { Logger } from 'pino';\nimport { log } from './log.js';\n\n/** Which tracks, if any, should the agent automatically subscribe to? */\nexport enum AutoSubscribe {\n SUBSCRIBE_ALL,\n SUBSCRIBE_NONE,\n VIDEO_ONLY,\n AUDIO_ONLY,\n}\n\nexport type JobAcceptArguments = {\n name: string;\n identity: string;\n metadata: string;\n};\n\nexport type RunningJobInfo = {\n acceptArguments: JobAcceptArguments;\n job: proto.Job;\n url: string;\n token: string;\n};\n\n/** Attempted to add a function callback, but the function already exists. */\nexport class FunctionExistsError extends Error {\n constructor(msg?: string) {\n super(msg);\n Object.setPrototypeOf(this, new.target.prototype);\n }\n}\n\n/** The job and environment context as seen by the agent, accessible by the entrypoint function. */\nexport class JobContext {\n #proc: JobProcess;\n #info: RunningJobInfo;\n #room: Room;\n #onConnect: () => void;\n #onShutdown: (s: string) => void;\n /** @internal */\n shutdownCallbacks: (() => Promise<void>)[] = [];\n #participantEntrypoints: ((job: JobContext, p: RemoteParticipant) => Promise<void>)[] = [];\n #participantTasks: {\n [id: string]: {\n callback: (job: JobContext, p: RemoteParticipant) => Promise<void>;\n result: Promise<void>;\n };\n } = {};\n #logger: Logger;\n\n constructor(\n proc: JobProcess,\n info: RunningJobInfo,\n room: Room,\n onConnect: () => void,\n onShutdown: (s: string) => void,\n ) {\n this.#proc = proc;\n this.#info = info;\n this.#room = room;\n this.#onConnect = onConnect;\n this.#onShutdown = onShutdown;\n this.onParticipantConnected = this.onParticipantConnected.bind(this);\n this.#room.on(RoomEvent.ParticipantConnected, this.onParticipantConnected);\n this.#logger = log().child({ info: this.#info });\n }\n\n get proc(): JobProcess {\n return this.#proc;\n }\n\n get job(): proto.Job {\n return this.#info.job;\n }\n\n /** @returns The room the agent was called into */\n get room(): Room {\n return this.#room;\n }\n\n /** @returns The agent's participant if connected to the room, otherwise `undefined` */\n get agent(): LocalParticipant | undefined {\n return this.#room.localParticipant;\n }\n\n /** Adds a promise to be awaited when {@link JobContext.shutdown | shutdown} is called. */\n addShutdownCallback(callback: () => Promise<void>) {\n this.shutdownCallbacks.push(callback);\n }\n\n async waitForParticipant(identity?: string): Promise<RemoteParticipant> {\n if (!this.#room.isConnected) {\n throw new Error('room is not connected');\n }\n\n for (const p of this.#room.remoteParticipants.values()) {\n if ((!identity || p.identity === identity) && p.info.kind != ParticipantKind.AGENT) {\n return p;\n }\n }\n\n return new Promise((resolve, reject) => {\n const onParticipantConnected = (participant: RemoteParticipant) => {\n if (\n (!identity || participant.identity === identity) &&\n participant.info.kind != ParticipantKind.AGENT\n ) {\n clearHandlers();\n resolve(participant);\n }\n };\n const onDisconnected = () => {\n clearHandlers();\n reject(new Error('Room disconnected while waiting for participant'));\n };\n\n const clearHandlers = () => {\n this.#room.off(RoomEvent.ParticipantConnected, onParticipantConnected);\n this.#room.off(RoomEvent.Disconnected, onDisconnected);\n };\n\n this.#room.on(RoomEvent.ParticipantConnected, onParticipantConnected);\n this.#room.on(RoomEvent.Disconnected, onDisconnected);\n });\n }\n\n /**\n * Connects the agent to the room.\n *\n * @remarks\n * It is recommended to run this command as early in the function as possible, as executing it\n * later may cause noticeable delay between user and agent joins.\n *\n * @see {@link https://github.com/livekit/node-sdks/tree/main/packages/livekit-rtc#readme |\n * @livekit/rtc-node} for more information about the parameters.\n */\n async connect(\n e2ee?: E2EEOptions,\n autoSubscribe: AutoSubscribe = AutoSubscribe.SUBSCRIBE_ALL,\n rtcConfig?: RtcConfiguration,\n ) {\n const opts = {\n e2ee,\n autoSubscribe: autoSubscribe == AutoSubscribe.SUBSCRIBE_ALL,\n rtcConfig,\n dynacast: false,\n };\n\n await this.#room.connect(this.#info.url, this.#info.token, opts);\n this.#onConnect();\n\n this.#room.remoteParticipants.forEach(this.onParticipantConnected);\n\n if ([AutoSubscribe.AUDIO_ONLY, AutoSubscribe.VIDEO_ONLY].includes(autoSubscribe)) {\n this.#room.remoteParticipants.forEach((p) => {\n p.trackPublications.forEach((pub) => {\n if (\n (autoSubscribe === AutoSubscribe.AUDIO_ONLY && pub.kind === TrackKind.KIND_AUDIO) ||\n (autoSubscribe === AutoSubscribe.VIDEO_ONLY && pub.kind === TrackKind.KIND_VIDEO)\n ) {\n pub.setSubscribed(true);\n }\n });\n });\n }\n }\n\n /**\n * Gracefully shuts down the job, and runs all shutdown promises.\n *\n * @param reason - Optional reason for shutdown\n */\n shutdown(reason = '') {\n this.#onShutdown(reason);\n }\n\n /** @internal */\n onParticipantConnected(p: RemoteParticipant) {\n for (const callback of this.#participantEntrypoints) {\n if (this.#participantTasks[p.identity]?.callback == callback) {\n this.#logger.warn(\n 'a participant has joined before a prior prticipant task matching the same identity has finished:',\n p.identity,\n );\n }\n const result = callback(this, p);\n result.finally(() => delete this.#participantTasks[p.identity]);\n this.#participantTasks[p.identity] = { callback, result };\n }\n }\n\n /**\n * Adds a promise to be awaited whenever a new participant joins the room.\n *\n * @throws {@link FunctionExistsError} if an entrypoint already exists\n */\n addParticipantEntrypoint(callback: (job: JobContext, p: RemoteParticipant) => Promise<void>) {\n if (this.#participantEntrypoints.includes(callback)) {\n throw new FunctionExistsError('entrypoints cannot be added more than once');\n }\n\n this.#participantEntrypoints.push(callback);\n }\n}\n\nexport class JobProcess {\n #pid = process.pid;\n userData: { [id: string]: unknown } = {};\n\n get pid(): number {\n return this.#pid;\n }\n}\n\n/**\n * A request sent by the server to spawn a new agent job.\n *\n * @remarks\n * For most applications, this is best left to the default, which simply accepts the job and\n * handles the logic inside the entrypoint function. This class is useful for vetting which\n * requests should fill idle processes and which should be outright rejected.\n */\nexport class JobRequest {\n #job: proto.Job;\n #onReject: () => Promise<void>;\n #onAccept: (args: JobAcceptArguments) => Promise<void>;\n\n /** @internal */\n constructor(\n job: proto.Job,\n onReject: () => Promise<void>,\n onAccept: (args: JobAcceptArguments) => Promise<void>,\n ) {\n this.#job = job;\n this.#onReject = onReject;\n this.#onAccept = onAccept;\n }\n\n /** @returns The ID of the job, set by the LiveKit server */\n get id(): string {\n return this.#job.id;\n }\n\n /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */\n get job(): proto.Job {\n return this.#job;\n }\n\n /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */\n get room(): proto.Room | undefined {\n return this.#job.room;\n }\n\n /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */\n get publisher(): proto.ParticipantInfo | undefined {\n return this.#job.participant;\n }\n\n /** @returns The agent's name, as set in {@link WorkerOptions} */\n get agentName(): string {\n return this.#job.agentName;\n }\n\n /** Rejects the job. */\n async reject() {\n await this.#onReject();\n }\n\n /** Accepts the job, launching it on an idle child process. */\n async accept(name = '', identity = '', metadata = '') {\n if (identity === '') identity = 'agent-' + this.id;\n\n this.#onAccept({ name, identity, metadata });\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAWA,sBAAsD;AAEtD,iBAAoB;AAGb,IAAK,gBAAL,kBAAKA,mBAAL;AACL,EAAAA,8BAAA;AACA,EAAAA,8BAAA;AACA,EAAAA,8BAAA;AACA,EAAAA,8BAAA;AAJU,SAAAA;AAAA,GAAA;AAqBL,MAAM,4BAA4B,MAAM;AAAA,EAC7C,YAAY,KAAc;AACxB,UAAM,GAAG;AACT,WAAO,eAAe,MAAM,WAAW,SAAS;AAAA,EAClD;AACF;AAGO,MAAM,WAAW;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAEA,oBAA6C,CAAC;AAAA,EAC9C,0BAAwF,CAAC;AAAA,EACzF,oBAKI,CAAC;AAAA,EACL;AAAA,EAEA,YACE,MACA,MACA,MACA,WACA,YACA;AACA,SAAK,QAAQ;AACb,SAAK,QAAQ;AACb,SAAK,QAAQ;AACb,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,yBAAyB,KAAK,uBAAuB,KAAK,IAAI;AACnE,SAAK,MAAM,GAAG,0BAAU,sBAAsB,KAAK,sBAAsB;AACzE,SAAK,cAAU,gBAAI,EAAE,MAAM,EAAE,MAAM,KAAK,MAAM,CAAC;AAAA,EACjD;AAAA,EAEA,IAAI,OAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAiB;AACnB,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,IAAI,OAAa;AACf,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,QAAsC;AACxC,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,oBAAoB,UAA+B;AACjD,SAAK,kBAAkB,KAAK,QAAQ;AAAA,EACtC;AAAA,EAEA,MAAM,mBAAmB,UAA+C;AACtE,QAAI,CAAC,KAAK,MAAM,aAAa;AAC3B,YAAM,IAAI,MAAM,uBAAuB;AAAA,IACzC;AAEA,eAAW,KAAK,KAAK,MAAM,mBAAmB,OAAO,GAAG;AACtD,WAAK,CAAC,YAAY,EAAE,aAAa,aAAa,EAAE,KAAK,QAAQ,gCAAgB,OAAO;AAClF,eAAO;AAAA,MACT;AAAA,IACF;AAEA,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,YAAM,yBAAyB,CAAC,gBAAmC;AACjE,aACG,CAAC,YAAY,YAAY,aAAa,aACvC,YAAY,KAAK,QAAQ,gCAAgB,OACzC;AACA,wBAAc;AACd,kBAAQ,WAAW;AAAA,QACrB;AAAA,MACF;AACA,YAAM,iBAAiB,MAAM;AAC3B,sBAAc;AACd,eAAO,IAAI,MAAM,iDAAiD,CAAC;AAAA,MACrE;AAEA,YAAM,gBAAgB,MAAM;AAC1B,aAAK,MAAM,IAAI,0BAAU,sBAAsB,sBAAsB;AACrE,aAAK,MAAM,IAAI,0BAAU,cAAc,cAAc;AAAA,MACvD;AAEA,WAAK,MAAM,GAAG,0BAAU,sBAAsB,sBAAsB;AACpE,WAAK,MAAM,GAAG,0BAAU,cAAc,cAAc;AAAA,IACtD,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,QACJ,MACA,gBAA+B,uBAC/B,WACA;AACA,UAAM,OAAO;AAAA,MACX;AAAA,MACA,eAAe,iBAAiB;AAAA,MAChC;AAAA,MACA,UAAU;AAAA,IACZ;AAEA,UAAM,KAAK,MAAM,QAAQ,KAAK,MAAM,KAAK,KAAK,MAAM,OAAO,IAAI;AAC/D,SAAK,WAAW;AAEhB,SAAK,MAAM,mBAAmB,QAAQ,KAAK,sBAAsB;AAEjE,QAAI,CAAC,oBAA0B,kBAAwB,EAAE,SAAS,aAAa,GAAG;AAChF,WAAK,MAAM,mBAAmB,QAAQ,CAAC,MAAM;AAC3C,UAAE,kBAAkB,QAAQ,CAAC,QAAQ;AACnC,cACG,kBAAkB,sBAA4B,IAAI,SAAS,0BAAU,cACrE,kBAAkB,sBAA4B,IAAI,SAAS,0BAAU,YACtE;AACA,gBAAI,cAAc,IAAI;AAAA,UACxB;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,SAAS,SAAS,IAAI;AACpB,SAAK,YAAY,MAAM;AAAA,EACzB;AAAA;AAAA,EAGA,uBAAuB,GAAsB;AA7L/C;AA8LI,eAAW,YAAY,KAAK,yBAAyB;AACnD,YAAI,UAAK,kBAAkB,EAAE,QAAQ,MAAjC,mBAAoC,aAAY,UAAU;AAC5D,aAAK,QAAQ;AAAA,UACX;AAAA,UACA,EAAE;AAAA,QACJ;AAAA,MACF;AACA,YAAM,SAAS,SAAS,MAAM,CAAC;AAC/B,aAAO,QAAQ,MAAM,OAAO,KAAK,kBAAkB,EAAE,QAAQ,CAAC;AAC9D,WAAK,kBAAkB,EAAE,QAAQ,IAAI,EAAE,UAAU,OAAO;AAAA,IAC1D;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,yBAAyB,UAAoE;AAC3F,QAAI,KAAK,wBAAwB,SAAS,QAAQ,GAAG;AACnD,YAAM,IAAI,oBAAoB,4CAA4C;AAAA,IAC5E;AAEA,SAAK,wBAAwB,KAAK,QAAQ;AAAA,EAC5C;AACF;AAEO,MAAM,WAAW;AAAA,EACtB,OAAO,QAAQ;AAAA,EACf,WAAsC,CAAC;AAAA,EAEvC,IAAI,MAAc;AAChB,WAAO,KAAK;AAAA,EACd;AACF;AAUO,MAAM,WAAW;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA,YACE,KACA,UACA,UACA;AACA,SAAK,OAAO;AACZ,SAAK,YAAY;AACjB,SAAK,YAAY;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,KAAa;AACf,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,MAAiB;AACnB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,OAA+B;AACjC,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,YAA+C;AACjD,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,YAAoB;AACtB,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,MAAM,SAAS;AACb,UAAM,KAAK,UAAU;AAAA,EACvB;AAAA;AAAA,EAGA,MAAM,OAAO,OAAO,IAAI,WAAW,IAAI,WAAW,IAAI;AACpD,QAAI,aAAa,GAAI,YAAW,WAAW,KAAK;AAEhD,SAAK,UAAU,EAAE,MAAM,UAAU,SAAS,CAAC;AAAA,EAC7C;AACF;","names":["AutoSubscribe"]}
|
|
1
|
+
{"version":3,"sources":["../src/job.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type * as proto from '@livekit/protocol';\nimport type {\n E2EEOptions,\n LocalParticipant,\n RemoteParticipant,\n Room,\n RtcConfiguration,\n} from '@livekit/rtc-node';\nimport { ParticipantKind, RoomEvent, TrackKind } from '@livekit/rtc-node';\nimport type { Logger } from 'pino';\nimport { log } from './log.js';\n\n/** Which tracks, if any, should the agent automatically subscribe to? */\nexport enum AutoSubscribe {\n SUBSCRIBE_ALL,\n SUBSCRIBE_NONE,\n VIDEO_ONLY,\n AUDIO_ONLY,\n}\n\nexport type JobAcceptArguments = {\n name: string;\n identity: string;\n metadata: string;\n attributes?: { [key: string]: string };\n};\n\nexport type RunningJobInfo = {\n acceptArguments: JobAcceptArguments;\n job: proto.Job;\n url: string;\n token: string;\n};\n\n/** Attempted to add a function callback, but the function already exists. */\nexport class FunctionExistsError extends Error {\n constructor(msg?: string) {\n super(msg);\n Object.setPrototypeOf(this, new.target.prototype);\n }\n}\n\n/** The job and environment context as seen by the agent, accessible by the entrypoint function. */\nexport class JobContext {\n #proc: JobProcess;\n #info: RunningJobInfo;\n #room: Room;\n #onConnect: () => void;\n #onShutdown: (s: string) => void;\n /** @internal */\n shutdownCallbacks: (() => Promise<void>)[] = [];\n #participantEntrypoints: ((job: JobContext, p: RemoteParticipant) => Promise<void>)[] = [];\n #participantTasks: {\n [id: string]: {\n callback: (job: JobContext, p: RemoteParticipant) => Promise<void>;\n result: Promise<void>;\n };\n } = {};\n #logger: Logger;\n\n constructor(\n proc: JobProcess,\n info: RunningJobInfo,\n room: Room,\n onConnect: () => void,\n onShutdown: (s: string) => void,\n ) {\n this.#proc = proc;\n this.#info = info;\n this.#room = room;\n this.#onConnect = onConnect;\n this.#onShutdown = onShutdown;\n this.onParticipantConnected = this.onParticipantConnected.bind(this);\n this.#room.on(RoomEvent.ParticipantConnected, this.onParticipantConnected);\n this.#logger = log().child({ info: this.#info });\n }\n\n get proc(): JobProcess {\n return this.#proc;\n }\n\n get job(): proto.Job {\n return this.#info.job;\n }\n\n /** @returns The room the agent was called into */\n get room(): Room {\n return this.#room;\n }\n\n /** @returns The agent's participant if connected to the room, otherwise `undefined` */\n get agent(): LocalParticipant | undefined {\n return this.#room.localParticipant;\n }\n\n /** Adds a promise to be awaited when {@link JobContext.shutdown | shutdown} is called. */\n addShutdownCallback(callback: () => Promise<void>) {\n this.shutdownCallbacks.push(callback);\n }\n\n async waitForParticipant(identity?: string): Promise<RemoteParticipant> {\n if (!this.#room.isConnected) {\n throw new Error('room is not connected');\n }\n\n for (const p of this.#room.remoteParticipants.values()) {\n if ((!identity || p.identity === identity) && p.info.kind != ParticipantKind.AGENT) {\n return p;\n }\n }\n\n return new Promise((resolve, reject) => {\n const onParticipantConnected = (participant: RemoteParticipant) => {\n if (\n (!identity || participant.identity === identity) &&\n participant.info.kind != ParticipantKind.AGENT\n ) {\n clearHandlers();\n resolve(participant);\n }\n };\n const onDisconnected = () => {\n clearHandlers();\n reject(new Error('Room disconnected while waiting for participant'));\n };\n\n const clearHandlers = () => {\n this.#room.off(RoomEvent.ParticipantConnected, onParticipantConnected);\n this.#room.off(RoomEvent.Disconnected, onDisconnected);\n };\n\n this.#room.on(RoomEvent.ParticipantConnected, onParticipantConnected);\n this.#room.on(RoomEvent.Disconnected, onDisconnected);\n });\n }\n\n /**\n * Connects the agent to the room.\n *\n * @remarks\n * It is recommended to run this command as early in the function as possible, as executing it\n * later may cause noticeable delay between user and agent joins.\n *\n * @see {@link https://github.com/livekit/node-sdks/tree/main/packages/livekit-rtc#readme |\n * @livekit/rtc-node} for more information about the parameters.\n */\n async connect(\n e2ee?: E2EEOptions,\n autoSubscribe: AutoSubscribe = AutoSubscribe.SUBSCRIBE_ALL,\n rtcConfig?: RtcConfiguration,\n ) {\n const opts = {\n e2ee,\n autoSubscribe: autoSubscribe == AutoSubscribe.SUBSCRIBE_ALL,\n rtcConfig,\n dynacast: false,\n };\n\n await this.#room.connect(this.#info.url, this.#info.token, opts);\n this.#onConnect();\n\n this.#room.remoteParticipants.forEach(this.onParticipantConnected);\n\n if ([AutoSubscribe.AUDIO_ONLY, AutoSubscribe.VIDEO_ONLY].includes(autoSubscribe)) {\n this.#room.remoteParticipants.forEach((p) => {\n p.trackPublications.forEach((pub) => {\n if (\n (autoSubscribe === AutoSubscribe.AUDIO_ONLY && pub.kind === TrackKind.KIND_AUDIO) ||\n (autoSubscribe === AutoSubscribe.VIDEO_ONLY && pub.kind === TrackKind.KIND_VIDEO)\n ) {\n pub.setSubscribed(true);\n }\n });\n });\n }\n }\n\n /**\n * Gracefully shuts down the job, and runs all shutdown promises.\n *\n * @param reason - Optional reason for shutdown\n */\n shutdown(reason = '') {\n this.#onShutdown(reason);\n }\n\n /** @internal */\n onParticipantConnected(p: RemoteParticipant) {\n for (const callback of this.#participantEntrypoints) {\n if (this.#participantTasks[p.identity]?.callback == callback) {\n this.#logger.warn(\n 'a participant has joined before a prior prticipant task matching the same identity has finished:',\n p.identity,\n );\n }\n const result = callback(this, p);\n result.finally(() => delete this.#participantTasks[p.identity]);\n this.#participantTasks[p.identity] = { callback, result };\n }\n }\n\n /**\n * Adds a promise to be awaited whenever a new participant joins the room.\n *\n * @throws {@link FunctionExistsError} if an entrypoint already exists\n */\n addParticipantEntrypoint(callback: (job: JobContext, p: RemoteParticipant) => Promise<void>) {\n if (this.#participantEntrypoints.includes(callback)) {\n throw new FunctionExistsError('entrypoints cannot be added more than once');\n }\n\n this.#participantEntrypoints.push(callback);\n }\n}\n\nexport class JobProcess {\n #pid = process.pid;\n userData: { [id: string]: unknown } = {};\n\n get pid(): number {\n return this.#pid;\n }\n}\n\n/**\n * A request sent by the server to spawn a new agent job.\n *\n * @remarks\n * For most applications, this is best left to the default, which simply accepts the job and\n * handles the logic inside the entrypoint function. This class is useful for vetting which\n * requests should fill idle processes and which should be outright rejected.\n */\nexport class JobRequest {\n #job: proto.Job;\n #onReject: () => Promise<void>;\n #onAccept: (args: JobAcceptArguments) => Promise<void>;\n\n /** @internal */\n constructor(\n job: proto.Job,\n onReject: () => Promise<void>,\n onAccept: (args: JobAcceptArguments) => Promise<void>,\n ) {\n this.#job = job;\n this.#onReject = onReject;\n this.#onAccept = onAccept;\n }\n\n /** @returns The ID of the job, set by the LiveKit server */\n get id(): string {\n return this.#job.id;\n }\n\n /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */\n get job(): proto.Job {\n return this.#job;\n }\n\n /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */\n get room(): proto.Room | undefined {\n return this.#job.room;\n }\n\n /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */\n get publisher(): proto.ParticipantInfo | undefined {\n return this.#job.participant;\n }\n\n /** @returns The agent's name, as set in {@link WorkerOptions} */\n get agentName(): string {\n return this.#job.agentName;\n }\n\n /** Rejects the job. */\n async reject() {\n await this.#onReject();\n }\n\n /** Accepts the job, launching it on an idle child process. */\n async accept(name = '', identity = '', metadata = '', attributes?: { [key: string]: string }) {\n if (identity === '') identity = 'agent-' + this.id;\n\n this.#onAccept({ name, identity, metadata, attributes });\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAWA,sBAAsD;AAEtD,iBAAoB;AAGb,IAAK,gBAAL,kBAAKA,mBAAL;AACL,EAAAA,8BAAA;AACA,EAAAA,8BAAA;AACA,EAAAA,8BAAA;AACA,EAAAA,8BAAA;AAJU,SAAAA;AAAA,GAAA;AAsBL,MAAM,4BAA4B,MAAM;AAAA,EAC7C,YAAY,KAAc;AACxB,UAAM,GAAG;AACT,WAAO,eAAe,MAAM,WAAW,SAAS;AAAA,EAClD;AACF;AAGO,MAAM,WAAW;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAEA,oBAA6C,CAAC;AAAA,EAC9C,0BAAwF,CAAC;AAAA,EACzF,oBAKI,CAAC;AAAA,EACL;AAAA,EAEA,YACE,MACA,MACA,MACA,WACA,YACA;AACA,SAAK,QAAQ;AACb,SAAK,QAAQ;AACb,SAAK,QAAQ;AACb,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,yBAAyB,KAAK,uBAAuB,KAAK,IAAI;AACnE,SAAK,MAAM,GAAG,0BAAU,sBAAsB,KAAK,sBAAsB;AACzE,SAAK,cAAU,gBAAI,EAAE,MAAM,EAAE,MAAM,KAAK,MAAM,CAAC;AAAA,EACjD;AAAA,EAEA,IAAI,OAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAiB;AACnB,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,IAAI,OAAa;AACf,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,QAAsC;AACxC,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,oBAAoB,UAA+B;AACjD,SAAK,kBAAkB,KAAK,QAAQ;AAAA,EACtC;AAAA,EAEA,MAAM,mBAAmB,UAA+C;AACtE,QAAI,CAAC,KAAK,MAAM,aAAa;AAC3B,YAAM,IAAI,MAAM,uBAAuB;AAAA,IACzC;AAEA,eAAW,KAAK,KAAK,MAAM,mBAAmB,OAAO,GAAG;AACtD,WAAK,CAAC,YAAY,EAAE,aAAa,aAAa,EAAE,KAAK,QAAQ,gCAAgB,OAAO;AAClF,eAAO;AAAA,MACT;AAAA,IACF;AAEA,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,YAAM,yBAAyB,CAAC,gBAAmC;AACjE,aACG,CAAC,YAAY,YAAY,aAAa,aACvC,YAAY,KAAK,QAAQ,gCAAgB,OACzC;AACA,wBAAc;AACd,kBAAQ,WAAW;AAAA,QACrB;AAAA,MACF;AACA,YAAM,iBAAiB,MAAM;AAC3B,sBAAc;AACd,eAAO,IAAI,MAAM,iDAAiD,CAAC;AAAA,MACrE;AAEA,YAAM,gBAAgB,MAAM;AAC1B,aAAK,MAAM,IAAI,0BAAU,sBAAsB,sBAAsB;AACrE,aAAK,MAAM,IAAI,0BAAU,cAAc,cAAc;AAAA,MACvD;AAEA,WAAK,MAAM,GAAG,0BAAU,sBAAsB,sBAAsB;AACpE,WAAK,MAAM,GAAG,0BAAU,cAAc,cAAc;AAAA,IACtD,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,QACJ,MACA,gBAA+B,uBAC/B,WACA;AACA,UAAM,OAAO;AAAA,MACX;AAAA,MACA,eAAe,iBAAiB;AAAA,MAChC;AAAA,MACA,UAAU;AAAA,IACZ;AAEA,UAAM,KAAK,MAAM,QAAQ,KAAK,MAAM,KAAK,KAAK,MAAM,OAAO,IAAI;AAC/D,SAAK,WAAW;AAEhB,SAAK,MAAM,mBAAmB,QAAQ,KAAK,sBAAsB;AAEjE,QAAI,CAAC,oBAA0B,kBAAwB,EAAE,SAAS,aAAa,GAAG;AAChF,WAAK,MAAM,mBAAmB,QAAQ,CAAC,MAAM;AAC3C,UAAE,kBAAkB,QAAQ,CAAC,QAAQ;AACnC,cACG,kBAAkB,sBAA4B,IAAI,SAAS,0BAAU,cACrE,kBAAkB,sBAA4B,IAAI,SAAS,0BAAU,YACtE;AACA,gBAAI,cAAc,IAAI;AAAA,UACxB;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,SAAS,SAAS,IAAI;AACpB,SAAK,YAAY,MAAM;AAAA,EACzB;AAAA;AAAA,EAGA,uBAAuB,GAAsB;AA9L/C;AA+LI,eAAW,YAAY,KAAK,yBAAyB;AACnD,YAAI,UAAK,kBAAkB,EAAE,QAAQ,MAAjC,mBAAoC,aAAY,UAAU;AAC5D,aAAK,QAAQ;AAAA,UACX;AAAA,UACA,EAAE;AAAA,QACJ;AAAA,MACF;AACA,YAAM,SAAS,SAAS,MAAM,CAAC;AAC/B,aAAO,QAAQ,MAAM,OAAO,KAAK,kBAAkB,EAAE,QAAQ,CAAC;AAC9D,WAAK,kBAAkB,EAAE,QAAQ,IAAI,EAAE,UAAU,OAAO;AAAA,IAC1D;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,yBAAyB,UAAoE;AAC3F,QAAI,KAAK,wBAAwB,SAAS,QAAQ,GAAG;AACnD,YAAM,IAAI,oBAAoB,4CAA4C;AAAA,IAC5E;AAEA,SAAK,wBAAwB,KAAK,QAAQ;AAAA,EAC5C;AACF;AAEO,MAAM,WAAW;AAAA,EACtB,OAAO,QAAQ;AAAA,EACf,WAAsC,CAAC;AAAA,EAEvC,IAAI,MAAc;AAChB,WAAO,KAAK;AAAA,EACd;AACF;AAUO,MAAM,WAAW;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA,YACE,KACA,UACA,UACA;AACA,SAAK,OAAO;AACZ,SAAK,YAAY;AACjB,SAAK,YAAY;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,KAAa;AACf,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,MAAiB;AACnB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,OAA+B;AACjC,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,YAA+C;AACjD,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,YAAoB;AACtB,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,MAAM,SAAS;AACb,UAAM,KAAK,UAAU;AAAA,EACvB;AAAA;AAAA,EAGA,MAAM,OAAO,OAAO,IAAI,WAAW,IAAI,WAAW,IAAI,YAAwC;AAC5F,QAAI,aAAa,GAAI,YAAW,WAAW,KAAK;AAEhD,SAAK,UAAU,EAAE,MAAM,UAAU,UAAU,WAAW,CAAC;AAAA,EACzD;AACF;","names":["AutoSubscribe"]}
|
package/dist/job.d.ts
CHANGED
|
@@ -11,6 +11,9 @@ export type JobAcceptArguments = {
|
|
|
11
11
|
name: string;
|
|
12
12
|
identity: string;
|
|
13
13
|
metadata: string;
|
|
14
|
+
attributes?: {
|
|
15
|
+
[key: string]: string;
|
|
16
|
+
};
|
|
14
17
|
};
|
|
15
18
|
export type RunningJobInfo = {
|
|
16
19
|
acceptArguments: JobAcceptArguments;
|
|
@@ -95,6 +98,8 @@ export declare class JobRequest {
|
|
|
95
98
|
/** Rejects the job. */
|
|
96
99
|
reject(): Promise<void>;
|
|
97
100
|
/** Accepts the job, launching it on an idle child process. */
|
|
98
|
-
accept(name?: string, identity?: string, metadata?: string
|
|
101
|
+
accept(name?: string, identity?: string, metadata?: string, attributes?: {
|
|
102
|
+
[key: string]: string;
|
|
103
|
+
}): Promise<void>;
|
|
99
104
|
}
|
|
100
105
|
//# sourceMappingURL=job.d.ts.map
|
package/dist/job.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"job.d.ts","sourceRoot":"","sources":["../src/job.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,KAAK,KAAK,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EACV,WAAW,EACX,gBAAgB,EAChB,iBAAiB,EACjB,IAAI,EACJ,gBAAgB,EACjB,MAAM,mBAAmB,CAAC;AAK3B,yEAAyE;AACzE,oBAAY,aAAa;IACvB,aAAa,IAAA;IACb,cAAc,IAAA;IACd,UAAU,IAAA;IACV,UAAU,IAAA;CACX;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"job.d.ts","sourceRoot":"","sources":["../src/job.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,KAAK,KAAK,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EACV,WAAW,EACX,gBAAgB,EAChB,iBAAiB,EACjB,IAAI,EACJ,gBAAgB,EACjB,MAAM,mBAAmB,CAAC;AAK3B,yEAAyE;AACzE,oBAAY,aAAa;IACvB,aAAa,IAAA;IACb,cAAc,IAAA;IACd,UAAU,IAAA;IACV,UAAU,IAAA;CACX;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE;QAAE,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAAA;KAAE,CAAC;CACxC,CAAC;AAEF,MAAM,MAAM,cAAc,GAAG;IAC3B,eAAe,EAAE,kBAAkB,CAAC;IACpC,GAAG,EAAE,KAAK,CAAC,GAAG,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;CACf,CAAC;AAEF,6EAA6E;AAC7E,qBAAa,mBAAoB,SAAQ,KAAK;gBAChC,GAAG,CAAC,EAAE,MAAM;CAIzB;AAED,mGAAmG;AACnG,qBAAa,UAAU;;IAMrB,gBAAgB;IAChB,iBAAiB,EAAE,CAAC,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,EAAE,CAAM;gBAW9C,IAAI,EAAE,UAAU,EAChB,IAAI,EAAE,cAAc,EACpB,IAAI,EAAE,IAAI,EACV,SAAS,EAAE,MAAM,IAAI,EACrB,UAAU,EAAE,CAAC,CAAC,EAAE,MAAM,KAAK,IAAI;IAYjC,IAAI,IAAI,IAAI,UAAU,CAErB;IAED,IAAI,GAAG,IAAI,KAAK,CAAC,GAAG,CAEnB;IAED,kDAAkD;IAClD,IAAI,IAAI,IAAI,IAAI,CAEf;IAED,uFAAuF;IACvF,IAAI,KAAK,IAAI,gBAAgB,GAAG,SAAS,CAExC;IAED,0FAA0F;IAC1F,mBAAmB,CAAC,QAAQ,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC;IAI3C,kBAAkB,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAoCvE;;;;;;;;;OASG;IACG,OAAO,CACX,IAAI,CAAC,EAAE,WAAW,EAClB,aAAa,GAAE,aAA2C,EAC1D,SAAS,CAAC,EAAE,gBAAgB;IA4B9B;;;;OAIG;IACH,QAAQ,CAAC,MAAM,SAAK;IAIpB,gBAAgB;IAChB,sBAAsB,CAAC,CAAC,EAAE,iBAAiB;IAc3C;;;;OAIG;IACH,wBAAwB,CAAC,QAAQ,EAAE,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,EAAE,iBAAiB,KAAK,OAAO,CAAC,IAAI,CAAC;CAO5F;AAED,qBAAa,UAAU;;IAErB,QAAQ,EAAE;QAAE,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAA;KAAE,CAAM;IAEzC,IAAI,GAAG,IAAI,MAAM,CAEhB;CACF;AAED;;;;;;;GAOG;AACH,qBAAa,UAAU;;IAKrB,gBAAgB;gBAEd,GAAG,EAAE,KAAK,CAAC,GAAG,EACd,QAAQ,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,EAC7B,QAAQ,EAAE,CAAC,IAAI,EAAE,kBAAkB,KAAK,OAAO,CAAC,IAAI,CAAC;IAOvD,4DAA4D;IAC5D,IAAI,EAAE,IAAI,MAAM,CAEf;IAED,uFAAuF;IACvF,IAAI,GAAG,IAAI,KAAK,CAAC,GAAG,CAEnB;IAED,uFAAuF;IACvF,IAAI,IAAI,IAAI,KAAK,CAAC,IAAI,GAAG,SAAS,CAEjC;IAED,uFAAuF;IACvF,IAAI,SAAS,IAAI,KAAK,CAAC,eAAe,GAAG,SAAS,CAEjD;IAED,iEAAiE;IACjE,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,uBAAuB;IACjB,MAAM;IAIZ,8DAA8D;IACxD,MAAM,CAAC,IAAI,SAAK,EAAE,QAAQ,SAAK,EAAE,QAAQ,SAAK,EAAE,UAAU,CAAC,EAAE;QAAE,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAAA;KAAE;CAK7F"}
|
package/dist/job.js
CHANGED
|
@@ -187,9 +187,9 @@ class JobRequest {
|
|
|
187
187
|
await this.#onReject();
|
|
188
188
|
}
|
|
189
189
|
/** Accepts the job, launching it on an idle child process. */
|
|
190
|
-
async accept(name = "", identity = "", metadata = "") {
|
|
190
|
+
async accept(name = "", identity = "", metadata = "", attributes) {
|
|
191
191
|
if (identity === "") identity = "agent-" + this.id;
|
|
192
|
-
this.#onAccept({ name, identity, metadata });
|
|
192
|
+
this.#onAccept({ name, identity, metadata, attributes });
|
|
193
193
|
}
|
|
194
194
|
}
|
|
195
195
|
export {
|
package/dist/job.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/job.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type * as proto from '@livekit/protocol';\nimport type {\n E2EEOptions,\n LocalParticipant,\n RemoteParticipant,\n Room,\n RtcConfiguration,\n} from '@livekit/rtc-node';\nimport { ParticipantKind, RoomEvent, TrackKind } from '@livekit/rtc-node';\nimport type { Logger } from 'pino';\nimport { log } from './log.js';\n\n/** Which tracks, if any, should the agent automatically subscribe to? */\nexport enum AutoSubscribe {\n SUBSCRIBE_ALL,\n SUBSCRIBE_NONE,\n VIDEO_ONLY,\n AUDIO_ONLY,\n}\n\nexport type JobAcceptArguments = {\n name: string;\n identity: string;\n metadata: string;\n};\n\nexport type RunningJobInfo = {\n acceptArguments: JobAcceptArguments;\n job: proto.Job;\n url: string;\n token: string;\n};\n\n/** Attempted to add a function callback, but the function already exists. */\nexport class FunctionExistsError extends Error {\n constructor(msg?: string) {\n super(msg);\n Object.setPrototypeOf(this, new.target.prototype);\n }\n}\n\n/** The job and environment context as seen by the agent, accessible by the entrypoint function. */\nexport class JobContext {\n #proc: JobProcess;\n #info: RunningJobInfo;\n #room: Room;\n #onConnect: () => void;\n #onShutdown: (s: string) => void;\n /** @internal */\n shutdownCallbacks: (() => Promise<void>)[] = [];\n #participantEntrypoints: ((job: JobContext, p: RemoteParticipant) => Promise<void>)[] = [];\n #participantTasks: {\n [id: string]: {\n callback: (job: JobContext, p: RemoteParticipant) => Promise<void>;\n result: Promise<void>;\n };\n } = {};\n #logger: Logger;\n\n constructor(\n proc: JobProcess,\n info: RunningJobInfo,\n room: Room,\n onConnect: () => void,\n onShutdown: (s: string) => void,\n ) {\n this.#proc = proc;\n this.#info = info;\n this.#room = room;\n this.#onConnect = onConnect;\n this.#onShutdown = onShutdown;\n this.onParticipantConnected = this.onParticipantConnected.bind(this);\n this.#room.on(RoomEvent.ParticipantConnected, this.onParticipantConnected);\n this.#logger = log().child({ info: this.#info });\n }\n\n get proc(): JobProcess {\n return this.#proc;\n }\n\n get job(): proto.Job {\n return this.#info.job;\n }\n\n /** @returns The room the agent was called into */\n get room(): Room {\n return this.#room;\n }\n\n /** @returns The agent's participant if connected to the room, otherwise `undefined` */\n get agent(): LocalParticipant | undefined {\n return this.#room.localParticipant;\n }\n\n /** Adds a promise to be awaited when {@link JobContext.shutdown | shutdown} is called. */\n addShutdownCallback(callback: () => Promise<void>) {\n this.shutdownCallbacks.push(callback);\n }\n\n async waitForParticipant(identity?: string): Promise<RemoteParticipant> {\n if (!this.#room.isConnected) {\n throw new Error('room is not connected');\n }\n\n for (const p of this.#room.remoteParticipants.values()) {\n if ((!identity || p.identity === identity) && p.info.kind != ParticipantKind.AGENT) {\n return p;\n }\n }\n\n return new Promise((resolve, reject) => {\n const onParticipantConnected = (participant: RemoteParticipant) => {\n if (\n (!identity || participant.identity === identity) &&\n participant.info.kind != ParticipantKind.AGENT\n ) {\n clearHandlers();\n resolve(participant);\n }\n };\n const onDisconnected = () => {\n clearHandlers();\n reject(new Error('Room disconnected while waiting for participant'));\n };\n\n const clearHandlers = () => {\n this.#room.off(RoomEvent.ParticipantConnected, onParticipantConnected);\n this.#room.off(RoomEvent.Disconnected, onDisconnected);\n };\n\n this.#room.on(RoomEvent.ParticipantConnected, onParticipantConnected);\n this.#room.on(RoomEvent.Disconnected, onDisconnected);\n });\n }\n\n /**\n * Connects the agent to the room.\n *\n * @remarks\n * It is recommended to run this command as early in the function as possible, as executing it\n * later may cause noticeable delay between user and agent joins.\n *\n * @see {@link https://github.com/livekit/node-sdks/tree/main/packages/livekit-rtc#readme |\n * @livekit/rtc-node} for more information about the parameters.\n */\n async connect(\n e2ee?: E2EEOptions,\n autoSubscribe: AutoSubscribe = AutoSubscribe.SUBSCRIBE_ALL,\n rtcConfig?: RtcConfiguration,\n ) {\n const opts = {\n e2ee,\n autoSubscribe: autoSubscribe == AutoSubscribe.SUBSCRIBE_ALL,\n rtcConfig,\n dynacast: false,\n };\n\n await this.#room.connect(this.#info.url, this.#info.token, opts);\n this.#onConnect();\n\n this.#room.remoteParticipants.forEach(this.onParticipantConnected);\n\n if ([AutoSubscribe.AUDIO_ONLY, AutoSubscribe.VIDEO_ONLY].includes(autoSubscribe)) {\n this.#room.remoteParticipants.forEach((p) => {\n p.trackPublications.forEach((pub) => {\n if (\n (autoSubscribe === AutoSubscribe.AUDIO_ONLY && pub.kind === TrackKind.KIND_AUDIO) ||\n (autoSubscribe === AutoSubscribe.VIDEO_ONLY && pub.kind === TrackKind.KIND_VIDEO)\n ) {\n pub.setSubscribed(true);\n }\n });\n });\n }\n }\n\n /**\n * Gracefully shuts down the job, and runs all shutdown promises.\n *\n * @param reason - Optional reason for shutdown\n */\n shutdown(reason = '') {\n this.#onShutdown(reason);\n }\n\n /** @internal */\n onParticipantConnected(p: RemoteParticipant) {\n for (const callback of this.#participantEntrypoints) {\n if (this.#participantTasks[p.identity]?.callback == callback) {\n this.#logger.warn(\n 'a participant has joined before a prior prticipant task matching the same identity has finished:',\n p.identity,\n );\n }\n const result = callback(this, p);\n result.finally(() => delete this.#participantTasks[p.identity]);\n this.#participantTasks[p.identity] = { callback, result };\n }\n }\n\n /**\n * Adds a promise to be awaited whenever a new participant joins the room.\n *\n * @throws {@link FunctionExistsError} if an entrypoint already exists\n */\n addParticipantEntrypoint(callback: (job: JobContext, p: RemoteParticipant) => Promise<void>) {\n if (this.#participantEntrypoints.includes(callback)) {\n throw new FunctionExistsError('entrypoints cannot be added more than once');\n }\n\n this.#participantEntrypoints.push(callback);\n }\n}\n\nexport class JobProcess {\n #pid = process.pid;\n userData: { [id: string]: unknown } = {};\n\n get pid(): number {\n return this.#pid;\n }\n}\n\n/**\n * A request sent by the server to spawn a new agent job.\n *\n * @remarks\n * For most applications, this is best left to the default, which simply accepts the job and\n * handles the logic inside the entrypoint function. This class is useful for vetting which\n * requests should fill idle processes and which should be outright rejected.\n */\nexport class JobRequest {\n #job: proto.Job;\n #onReject: () => Promise<void>;\n #onAccept: (args: JobAcceptArguments) => Promise<void>;\n\n /** @internal */\n constructor(\n job: proto.Job,\n onReject: () => Promise<void>,\n onAccept: (args: JobAcceptArguments) => Promise<void>,\n ) {\n this.#job = job;\n this.#onReject = onReject;\n this.#onAccept = onAccept;\n }\n\n /** @returns The ID of the job, set by the LiveKit server */\n get id(): string {\n return this.#job.id;\n }\n\n /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */\n get job(): proto.Job {\n return this.#job;\n }\n\n /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */\n get room(): proto.Room | undefined {\n return this.#job.room;\n }\n\n /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */\n get publisher(): proto.ParticipantInfo | undefined {\n return this.#job.participant;\n }\n\n /** @returns The agent's name, as set in {@link WorkerOptions} */\n get agentName(): string {\n return this.#job.agentName;\n }\n\n /** Rejects the job. */\n async reject() {\n await this.#onReject();\n }\n\n /** Accepts the job, launching it on an idle child process. */\n async accept(name = '', identity = '', metadata = '') {\n if (identity === '') identity = 'agent-' + this.id;\n\n this.#onAccept({ name, identity, metadata });\n }\n}\n"],"mappings":"AAWA,SAAS,iBAAiB,WAAW,iBAAiB;AAEtD,SAAS,WAAW;AAGb,IAAK,gBAAL,kBAAKA,mBAAL;AACL,EAAAA,8BAAA;AACA,EAAAA,8BAAA;AACA,EAAAA,8BAAA;AACA,EAAAA,8BAAA;AAJU,SAAAA;AAAA,GAAA;AAqBL,MAAM,4BAA4B,MAAM;AAAA,EAC7C,YAAY,KAAc;AACxB,UAAM,GAAG;AACT,WAAO,eAAe,MAAM,WAAW,SAAS;AAAA,EAClD;AACF;AAGO,MAAM,WAAW;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAEA,oBAA6C,CAAC;AAAA,EAC9C,0BAAwF,CAAC;AAAA,EACzF,oBAKI,CAAC;AAAA,EACL;AAAA,EAEA,YACE,MACA,MACA,MACA,WACA,YACA;AACA,SAAK,QAAQ;AACb,SAAK,QAAQ;AACb,SAAK,QAAQ;AACb,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,yBAAyB,KAAK,uBAAuB,KAAK,IAAI;AACnE,SAAK,MAAM,GAAG,UAAU,sBAAsB,KAAK,sBAAsB;AACzE,SAAK,UAAU,IAAI,EAAE,MAAM,EAAE,MAAM,KAAK,MAAM,CAAC;AAAA,EACjD;AAAA,EAEA,IAAI,OAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAiB;AACnB,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,IAAI,OAAa;AACf,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,QAAsC;AACxC,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,oBAAoB,UAA+B;AACjD,SAAK,kBAAkB,KAAK,QAAQ;AAAA,EACtC;AAAA,EAEA,MAAM,mBAAmB,UAA+C;AACtE,QAAI,CAAC,KAAK,MAAM,aAAa;AAC3B,YAAM,IAAI,MAAM,uBAAuB;AAAA,IACzC;AAEA,eAAW,KAAK,KAAK,MAAM,mBAAmB,OAAO,GAAG;AACtD,WAAK,CAAC,YAAY,EAAE,aAAa,aAAa,EAAE,KAAK,QAAQ,gBAAgB,OAAO;AAClF,eAAO;AAAA,MACT;AAAA,IACF;AAEA,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,YAAM,yBAAyB,CAAC,gBAAmC;AACjE,aACG,CAAC,YAAY,YAAY,aAAa,aACvC,YAAY,KAAK,QAAQ,gBAAgB,OACzC;AACA,wBAAc;AACd,kBAAQ,WAAW;AAAA,QACrB;AAAA,MACF;AACA,YAAM,iBAAiB,MAAM;AAC3B,sBAAc;AACd,eAAO,IAAI,MAAM,iDAAiD,CAAC;AAAA,MACrE;AAEA,YAAM,gBAAgB,MAAM;AAC1B,aAAK,MAAM,IAAI,UAAU,sBAAsB,sBAAsB;AACrE,aAAK,MAAM,IAAI,UAAU,cAAc,cAAc;AAAA,MACvD;AAEA,WAAK,MAAM,GAAG,UAAU,sBAAsB,sBAAsB;AACpE,WAAK,MAAM,GAAG,UAAU,cAAc,cAAc;AAAA,IACtD,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,QACJ,MACA,gBAA+B,uBAC/B,WACA;AACA,UAAM,OAAO;AAAA,MACX;AAAA,MACA,eAAe,iBAAiB;AAAA,MAChC;AAAA,MACA,UAAU;AAAA,IACZ;AAEA,UAAM,KAAK,MAAM,QAAQ,KAAK,MAAM,KAAK,KAAK,MAAM,OAAO,IAAI;AAC/D,SAAK,WAAW;AAEhB,SAAK,MAAM,mBAAmB,QAAQ,KAAK,sBAAsB;AAEjE,QAAI,CAAC,oBAA0B,kBAAwB,EAAE,SAAS,aAAa,GAAG;AAChF,WAAK,MAAM,mBAAmB,QAAQ,CAAC,MAAM;AAC3C,UAAE,kBAAkB,QAAQ,CAAC,QAAQ;AACnC,cACG,kBAAkB,sBAA4B,IAAI,SAAS,UAAU,cACrE,kBAAkB,sBAA4B,IAAI,SAAS,UAAU,YACtE;AACA,gBAAI,cAAc,IAAI;AAAA,UACxB;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,SAAS,SAAS,IAAI;AACpB,SAAK,YAAY,MAAM;AAAA,EACzB;AAAA;AAAA,EAGA,uBAAuB,GAAsB;AA7L/C;AA8LI,eAAW,YAAY,KAAK,yBAAyB;AACnD,YAAI,UAAK,kBAAkB,EAAE,QAAQ,MAAjC,mBAAoC,aAAY,UAAU;AAC5D,aAAK,QAAQ;AAAA,UACX;AAAA,UACA,EAAE;AAAA,QACJ;AAAA,MACF;AACA,YAAM,SAAS,SAAS,MAAM,CAAC;AAC/B,aAAO,QAAQ,MAAM,OAAO,KAAK,kBAAkB,EAAE,QAAQ,CAAC;AAC9D,WAAK,kBAAkB,EAAE,QAAQ,IAAI,EAAE,UAAU,OAAO;AAAA,IAC1D;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,yBAAyB,UAAoE;AAC3F,QAAI,KAAK,wBAAwB,SAAS,QAAQ,GAAG;AACnD,YAAM,IAAI,oBAAoB,4CAA4C;AAAA,IAC5E;AAEA,SAAK,wBAAwB,KAAK,QAAQ;AAAA,EAC5C;AACF;AAEO,MAAM,WAAW;AAAA,EACtB,OAAO,QAAQ;AAAA,EACf,WAAsC,CAAC;AAAA,EAEvC,IAAI,MAAc;AAChB,WAAO,KAAK;AAAA,EACd;AACF;AAUO,MAAM,WAAW;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA,YACE,KACA,UACA,UACA;AACA,SAAK,OAAO;AACZ,SAAK,YAAY;AACjB,SAAK,YAAY;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,KAAa;AACf,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,MAAiB;AACnB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,OAA+B;AACjC,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,YAA+C;AACjD,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,YAAoB;AACtB,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,MAAM,SAAS;AACb,UAAM,KAAK,UAAU;AAAA,EACvB;AAAA;AAAA,EAGA,MAAM,OAAO,OAAO,IAAI,WAAW,IAAI,WAAW,IAAI;AACpD,QAAI,aAAa,GAAI,YAAW,WAAW,KAAK;AAEhD,SAAK,UAAU,EAAE,MAAM,UAAU,SAAS,CAAC;AAAA,EAC7C;AACF;","names":["AutoSubscribe"]}
|
|
1
|
+
{"version":3,"sources":["../src/job.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type * as proto from '@livekit/protocol';\nimport type {\n E2EEOptions,\n LocalParticipant,\n RemoteParticipant,\n Room,\n RtcConfiguration,\n} from '@livekit/rtc-node';\nimport { ParticipantKind, RoomEvent, TrackKind } from '@livekit/rtc-node';\nimport type { Logger } from 'pino';\nimport { log } from './log.js';\n\n/** Which tracks, if any, should the agent automatically subscribe to? */\nexport enum AutoSubscribe {\n SUBSCRIBE_ALL,\n SUBSCRIBE_NONE,\n VIDEO_ONLY,\n AUDIO_ONLY,\n}\n\nexport type JobAcceptArguments = {\n name: string;\n identity: string;\n metadata: string;\n attributes?: { [key: string]: string };\n};\n\nexport type RunningJobInfo = {\n acceptArguments: JobAcceptArguments;\n job: proto.Job;\n url: string;\n token: string;\n};\n\n/** Attempted to add a function callback, but the function already exists. */\nexport class FunctionExistsError extends Error {\n constructor(msg?: string) {\n super(msg);\n Object.setPrototypeOf(this, new.target.prototype);\n }\n}\n\n/** The job and environment context as seen by the agent, accessible by the entrypoint function. */\nexport class JobContext {\n #proc: JobProcess;\n #info: RunningJobInfo;\n #room: Room;\n #onConnect: () => void;\n #onShutdown: (s: string) => void;\n /** @internal */\n shutdownCallbacks: (() => Promise<void>)[] = [];\n #participantEntrypoints: ((job: JobContext, p: RemoteParticipant) => Promise<void>)[] = [];\n #participantTasks: {\n [id: string]: {\n callback: (job: JobContext, p: RemoteParticipant) => Promise<void>;\n result: Promise<void>;\n };\n } = {};\n #logger: Logger;\n\n constructor(\n proc: JobProcess,\n info: RunningJobInfo,\n room: Room,\n onConnect: () => void,\n onShutdown: (s: string) => void,\n ) {\n this.#proc = proc;\n this.#info = info;\n this.#room = room;\n this.#onConnect = onConnect;\n this.#onShutdown = onShutdown;\n this.onParticipantConnected = this.onParticipantConnected.bind(this);\n this.#room.on(RoomEvent.ParticipantConnected, this.onParticipantConnected);\n this.#logger = log().child({ info: this.#info });\n }\n\n get proc(): JobProcess {\n return this.#proc;\n }\n\n get job(): proto.Job {\n return this.#info.job;\n }\n\n /** @returns The room the agent was called into */\n get room(): Room {\n return this.#room;\n }\n\n /** @returns The agent's participant if connected to the room, otherwise `undefined` */\n get agent(): LocalParticipant | undefined {\n return this.#room.localParticipant;\n }\n\n /** Adds a promise to be awaited when {@link JobContext.shutdown | shutdown} is called. */\n addShutdownCallback(callback: () => Promise<void>) {\n this.shutdownCallbacks.push(callback);\n }\n\n async waitForParticipant(identity?: string): Promise<RemoteParticipant> {\n if (!this.#room.isConnected) {\n throw new Error('room is not connected');\n }\n\n for (const p of this.#room.remoteParticipants.values()) {\n if ((!identity || p.identity === identity) && p.info.kind != ParticipantKind.AGENT) {\n return p;\n }\n }\n\n return new Promise((resolve, reject) => {\n const onParticipantConnected = (participant: RemoteParticipant) => {\n if (\n (!identity || participant.identity === identity) &&\n participant.info.kind != ParticipantKind.AGENT\n ) {\n clearHandlers();\n resolve(participant);\n }\n };\n const onDisconnected = () => {\n clearHandlers();\n reject(new Error('Room disconnected while waiting for participant'));\n };\n\n const clearHandlers = () => {\n this.#room.off(RoomEvent.ParticipantConnected, onParticipantConnected);\n this.#room.off(RoomEvent.Disconnected, onDisconnected);\n };\n\n this.#room.on(RoomEvent.ParticipantConnected, onParticipantConnected);\n this.#room.on(RoomEvent.Disconnected, onDisconnected);\n });\n }\n\n /**\n * Connects the agent to the room.\n *\n * @remarks\n * It is recommended to run this command as early in the function as possible, as executing it\n * later may cause noticeable delay between user and agent joins.\n *\n * @see {@link https://github.com/livekit/node-sdks/tree/main/packages/livekit-rtc#readme |\n * @livekit/rtc-node} for more information about the parameters.\n */\n async connect(\n e2ee?: E2EEOptions,\n autoSubscribe: AutoSubscribe = AutoSubscribe.SUBSCRIBE_ALL,\n rtcConfig?: RtcConfiguration,\n ) {\n const opts = {\n e2ee,\n autoSubscribe: autoSubscribe == AutoSubscribe.SUBSCRIBE_ALL,\n rtcConfig,\n dynacast: false,\n };\n\n await this.#room.connect(this.#info.url, this.#info.token, opts);\n this.#onConnect();\n\n this.#room.remoteParticipants.forEach(this.onParticipantConnected);\n\n if ([AutoSubscribe.AUDIO_ONLY, AutoSubscribe.VIDEO_ONLY].includes(autoSubscribe)) {\n this.#room.remoteParticipants.forEach((p) => {\n p.trackPublications.forEach((pub) => {\n if (\n (autoSubscribe === AutoSubscribe.AUDIO_ONLY && pub.kind === TrackKind.KIND_AUDIO) ||\n (autoSubscribe === AutoSubscribe.VIDEO_ONLY && pub.kind === TrackKind.KIND_VIDEO)\n ) {\n pub.setSubscribed(true);\n }\n });\n });\n }\n }\n\n /**\n * Gracefully shuts down the job, and runs all shutdown promises.\n *\n * @param reason - Optional reason for shutdown\n */\n shutdown(reason = '') {\n this.#onShutdown(reason);\n }\n\n /** @internal */\n onParticipantConnected(p: RemoteParticipant) {\n for (const callback of this.#participantEntrypoints) {\n if (this.#participantTasks[p.identity]?.callback == callback) {\n this.#logger.warn(\n 'a participant has joined before a prior prticipant task matching the same identity has finished:',\n p.identity,\n );\n }\n const result = callback(this, p);\n result.finally(() => delete this.#participantTasks[p.identity]);\n this.#participantTasks[p.identity] = { callback, result };\n }\n }\n\n /**\n * Adds a promise to be awaited whenever a new participant joins the room.\n *\n * @throws {@link FunctionExistsError} if an entrypoint already exists\n */\n addParticipantEntrypoint(callback: (job: JobContext, p: RemoteParticipant) => Promise<void>) {\n if (this.#participantEntrypoints.includes(callback)) {\n throw new FunctionExistsError('entrypoints cannot be added more than once');\n }\n\n this.#participantEntrypoints.push(callback);\n }\n}\n\nexport class JobProcess {\n #pid = process.pid;\n userData: { [id: string]: unknown } = {};\n\n get pid(): number {\n return this.#pid;\n }\n}\n\n/**\n * A request sent by the server to spawn a new agent job.\n *\n * @remarks\n * For most applications, this is best left to the default, which simply accepts the job and\n * handles the logic inside the entrypoint function. This class is useful for vetting which\n * requests should fill idle processes and which should be outright rejected.\n */\nexport class JobRequest {\n #job: proto.Job;\n #onReject: () => Promise<void>;\n #onAccept: (args: JobAcceptArguments) => Promise<void>;\n\n /** @internal */\n constructor(\n job: proto.Job,\n onReject: () => Promise<void>,\n onAccept: (args: JobAcceptArguments) => Promise<void>,\n ) {\n this.#job = job;\n this.#onReject = onReject;\n this.#onAccept = onAccept;\n }\n\n /** @returns The ID of the job, set by the LiveKit server */\n get id(): string {\n return this.#job.id;\n }\n\n /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */\n get job(): proto.Job {\n return this.#job;\n }\n\n /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */\n get room(): proto.Room | undefined {\n return this.#job.room;\n }\n\n /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */\n get publisher(): proto.ParticipantInfo | undefined {\n return this.#job.participant;\n }\n\n /** @returns The agent's name, as set in {@link WorkerOptions} */\n get agentName(): string {\n return this.#job.agentName;\n }\n\n /** Rejects the job. */\n async reject() {\n await this.#onReject();\n }\n\n /** Accepts the job, launching it on an idle child process. */\n async accept(name = '', identity = '', metadata = '', attributes?: { [key: string]: string }) {\n if (identity === '') identity = 'agent-' + this.id;\n\n this.#onAccept({ name, identity, metadata, attributes });\n }\n}\n"],"mappings":"AAWA,SAAS,iBAAiB,WAAW,iBAAiB;AAEtD,SAAS,WAAW;AAGb,IAAK,gBAAL,kBAAKA,mBAAL;AACL,EAAAA,8BAAA;AACA,EAAAA,8BAAA;AACA,EAAAA,8BAAA;AACA,EAAAA,8BAAA;AAJU,SAAAA;AAAA,GAAA;AAsBL,MAAM,4BAA4B,MAAM;AAAA,EAC7C,YAAY,KAAc;AACxB,UAAM,GAAG;AACT,WAAO,eAAe,MAAM,WAAW,SAAS;AAAA,EAClD;AACF;AAGO,MAAM,WAAW;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAEA,oBAA6C,CAAC;AAAA,EAC9C,0BAAwF,CAAC;AAAA,EACzF,oBAKI,CAAC;AAAA,EACL;AAAA,EAEA,YACE,MACA,MACA,MACA,WACA,YACA;AACA,SAAK,QAAQ;AACb,SAAK,QAAQ;AACb,SAAK,QAAQ;AACb,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,yBAAyB,KAAK,uBAAuB,KAAK,IAAI;AACnE,SAAK,MAAM,GAAG,UAAU,sBAAsB,KAAK,sBAAsB;AACzE,SAAK,UAAU,IAAI,EAAE,MAAM,EAAE,MAAM,KAAK,MAAM,CAAC;AAAA,EACjD;AAAA,EAEA,IAAI,OAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAiB;AACnB,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,IAAI,OAAa;AACf,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,QAAsC;AACxC,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,oBAAoB,UAA+B;AACjD,SAAK,kBAAkB,KAAK,QAAQ;AAAA,EACtC;AAAA,EAEA,MAAM,mBAAmB,UAA+C;AACtE,QAAI,CAAC,KAAK,MAAM,aAAa;AAC3B,YAAM,IAAI,MAAM,uBAAuB;AAAA,IACzC;AAEA,eAAW,KAAK,KAAK,MAAM,mBAAmB,OAAO,GAAG;AACtD,WAAK,CAAC,YAAY,EAAE,aAAa,aAAa,EAAE,KAAK,QAAQ,gBAAgB,OAAO;AAClF,eAAO;AAAA,MACT;AAAA,IACF;AAEA,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,YAAM,yBAAyB,CAAC,gBAAmC;AACjE,aACG,CAAC,YAAY,YAAY,aAAa,aACvC,YAAY,KAAK,QAAQ,gBAAgB,OACzC;AACA,wBAAc;AACd,kBAAQ,WAAW;AAAA,QACrB;AAAA,MACF;AACA,YAAM,iBAAiB,MAAM;AAC3B,sBAAc;AACd,eAAO,IAAI,MAAM,iDAAiD,CAAC;AAAA,MACrE;AAEA,YAAM,gBAAgB,MAAM;AAC1B,aAAK,MAAM,IAAI,UAAU,sBAAsB,sBAAsB;AACrE,aAAK,MAAM,IAAI,UAAU,cAAc,cAAc;AAAA,MACvD;AAEA,WAAK,MAAM,GAAG,UAAU,sBAAsB,sBAAsB;AACpE,WAAK,MAAM,GAAG,UAAU,cAAc,cAAc;AAAA,IACtD,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,QACJ,MACA,gBAA+B,uBAC/B,WACA;AACA,UAAM,OAAO;AAAA,MACX;AAAA,MACA,eAAe,iBAAiB;AAAA,MAChC;AAAA,MACA,UAAU;AAAA,IACZ;AAEA,UAAM,KAAK,MAAM,QAAQ,KAAK,MAAM,KAAK,KAAK,MAAM,OAAO,IAAI;AAC/D,SAAK,WAAW;AAEhB,SAAK,MAAM,mBAAmB,QAAQ,KAAK,sBAAsB;AAEjE,QAAI,CAAC,oBAA0B,kBAAwB,EAAE,SAAS,aAAa,GAAG;AAChF,WAAK,MAAM,mBAAmB,QAAQ,CAAC,MAAM;AAC3C,UAAE,kBAAkB,QAAQ,CAAC,QAAQ;AACnC,cACG,kBAAkB,sBAA4B,IAAI,SAAS,UAAU,cACrE,kBAAkB,sBAA4B,IAAI,SAAS,UAAU,YACtE;AACA,gBAAI,cAAc,IAAI;AAAA,UACxB;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,SAAS,SAAS,IAAI;AACpB,SAAK,YAAY,MAAM;AAAA,EACzB;AAAA;AAAA,EAGA,uBAAuB,GAAsB;AA9L/C;AA+LI,eAAW,YAAY,KAAK,yBAAyB;AACnD,YAAI,UAAK,kBAAkB,EAAE,QAAQ,MAAjC,mBAAoC,aAAY,UAAU;AAC5D,aAAK,QAAQ;AAAA,UACX;AAAA,UACA,EAAE;AAAA,QACJ;AAAA,MACF;AACA,YAAM,SAAS,SAAS,MAAM,CAAC;AAC/B,aAAO,QAAQ,MAAM,OAAO,KAAK,kBAAkB,EAAE,QAAQ,CAAC;AAC9D,WAAK,kBAAkB,EAAE,QAAQ,IAAI,EAAE,UAAU,OAAO;AAAA,IAC1D;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,yBAAyB,UAAoE;AAC3F,QAAI,KAAK,wBAAwB,SAAS,QAAQ,GAAG;AACnD,YAAM,IAAI,oBAAoB,4CAA4C;AAAA,IAC5E;AAEA,SAAK,wBAAwB,KAAK,QAAQ;AAAA,EAC5C;AACF;AAEO,MAAM,WAAW;AAAA,EACtB,OAAO,QAAQ;AAAA,EACf,WAAsC,CAAC;AAAA,EAEvC,IAAI,MAAc;AAChB,WAAO,KAAK;AAAA,EACd;AACF;AAUO,MAAM,WAAW;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA,YACE,KACA,UACA,UACA;AACA,SAAK,OAAO;AACZ,SAAK,YAAY;AACjB,SAAK,YAAY;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,KAAa;AACf,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,MAAiB;AACnB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,OAA+B;AACjC,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,YAA+C;AACjD,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,IAAI,YAAoB;AACtB,WAAO,KAAK,KAAK;AAAA,EACnB;AAAA;AAAA,EAGA,MAAM,SAAS;AACb,UAAM,KAAK,UAAU;AAAA,EACvB;AAAA;AAAA,EAGA,MAAM,OAAO,OAAO,IAAI,WAAW,IAAI,WAAW,IAAI,YAAwC;AAC5F,QAAI,aAAa,GAAI,YAAW,WAAW,KAAK;AAEhD,SAAK,UAAU,EAAE,MAAM,UAAU,UAAU,WAAW,CAAC;AAAA,EACzD;AACF;","names":["AutoSubscribe"]}
|
|
@@ -34,8 +34,8 @@ __export(basic_exports, {
|
|
|
34
34
|
tokenizeParagraphs: () => tokenizeParagraphs
|
|
35
35
|
});
|
|
36
36
|
module.exports = __toCommonJS(basic_exports);
|
|
37
|
-
var tokenizer = __toESM(require("../index.cjs"), 1);
|
|
38
37
|
var import_token_stream = require("../token_stream.cjs");
|
|
38
|
+
var tokenizer = __toESM(require("../tokenizer.cjs"), 1);
|
|
39
39
|
var import_hyphenator = require("./hyphenator.cjs");
|
|
40
40
|
var import_paragraph = require("./paragraph.cjs");
|
|
41
41
|
var import_sentence = require("./sentence.cjs");
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/tokenize/basic/basic.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport
|
|
1
|
+
{"version":3,"sources":["../../../src/tokenize/basic/basic.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { BufferedSentenceStream, BufferedWordStream } from '../token_stream.js';\nimport * as tokenizer from '../tokenizer.js';\nimport { hyphenator } from './hyphenator.js';\nimport { splitParagraphs } from './paragraph.js';\nimport { splitSentences } from './sentence.js';\nimport { splitWords } from './word.js';\n\ninterface TokenizerOptions {\n language: string;\n minSentenceLength: number;\n streamContextLength: number;\n}\n\nexport class SentenceTokenizer extends tokenizer.SentenceTokenizer {\n #config: TokenizerOptions;\n\n constructor(language = 'en-US', minSentenceLength = 20, streamContextLength = 10) {\n super();\n this.#config = {\n language,\n minSentenceLength,\n streamContextLength,\n };\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n tokenize(text: string, language?: string): string[] {\n return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n stream(language?: string): tokenizer.SentenceStream {\n return new BufferedSentenceStream(\n (text: string) => splitSentences(text, this.#config.minSentenceLength),\n this.#config.minSentenceLength,\n this.#config.streamContextLength,\n );\n }\n}\n\nexport class WordTokenizer extends tokenizer.WordTokenizer {\n #ignorePunctuation: boolean;\n\n constructor(ignorePunctuation = true) {\n super();\n this.#ignorePunctuation = ignorePunctuation;\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n tokenize(text: string, language?: string): string[] {\n return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n stream(language?: string): tokenizer.WordStream {\n return new BufferedWordStream(\n (text: string) => splitWords(text, this.#ignorePunctuation),\n 1,\n 1,\n );\n }\n}\n\nexport const hyphenateWord = (word: string): string[] => {\n return hyphenator.hyphenateWord(word);\n};\n\nexport const tokenizeParagraphs = (text: string): string[] => {\n return splitParagraphs(text).map((tok) => tok[0]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,0BAA2D;AAC3D,gBAA2B;AAC3B,wBAA2B;AAC3B,uBAAgC;AAChC,sBAA+B;AAC/B,kBAA2B;AAQpB,MAAM,0BAA0B,UAAU,kBAAkB;AAAA,EACjE;AAAA,EAEA,YAAY,WAAW,SAAS,oBAAoB,IAAI,sBAAsB,IAAI;AAChF,UAAM;AACN,SAAK,UAAU;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,SAAS,MAAc,UAA6B;AAClD,eAAO,gCAAe,MAAM,KAAK,QAAQ,iBAAiB,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAAA,EACjF;AAAA;AAAA,EAGA,OAAO,UAA6C;AAClD,WAAO,IAAI;AAAA,MACT,CAAC,aAAiB,gCAAe,MAAM,KAAK,QAAQ,iBAAiB;AAAA,MACrE,KAAK,QAAQ;AAAA,MACb,KAAK,QAAQ;AAAA,IACf;AAAA,EACF;AACF;AAEO,MAAM,sBAAsB,UAAU,cAAc;AAAA,EACzD;AAAA,EAEA,YAAY,oBAAoB,MAAM;AACpC,UAAM;AACN,SAAK,qBAAqB;AAAA,EAC5B;AAAA;AAAA,EAGA,SAAS,MAAc,UAA6B;AAClD,eAAO,wBAAW,MAAM,KAAK,kBAAkB,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAAA,EACtE;AAAA;AAAA,EAGA,OAAO,UAAyC;AAC9C,WAAO,IAAI;AAAA,MACT,CAAC,aAAiB,wBAAW,MAAM,KAAK,kBAAkB;AAAA,MAC1D;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;AAEO,MAAM,gBAAgB,CAAC,SAA2B;AACvD,SAAO,6BAAW,cAAc,IAAI;AACtC;AAEO,MAAM,qBAAqB,CAAC,SAA2B;AAC5D,aAAO,kCAAgB,IAAI,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAClD;","names":[]}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import * as tokenizer from '../
|
|
1
|
+
import * as tokenizer from '../tokenizer.js';
|
|
2
2
|
export declare class SentenceTokenizer extends tokenizer.SentenceTokenizer {
|
|
3
3
|
#private;
|
|
4
4
|
constructor(language?: string, minSentenceLength?: number, streamContextLength?: number);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"basic.d.ts","sourceRoot":"","sources":["../../../src/tokenize/basic/basic.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"basic.d.ts","sourceRoot":"","sources":["../../../src/tokenize/basic/basic.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,SAAS,MAAM,iBAAiB,CAAC;AAY7C,qBAAa,iBAAkB,SAAQ,SAAS,CAAC,iBAAiB;;gBAGpD,QAAQ,SAAU,EAAE,iBAAiB,SAAK,EAAE,mBAAmB,SAAK;IAUhF,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE;IAKnD,MAAM,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC,cAAc;CAOpD;AAED,qBAAa,aAAc,SAAQ,SAAS,CAAC,aAAa;;gBAG5C,iBAAiB,UAAO;IAMpC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE;IAKnD,MAAM,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC,UAAU;CAOhD;AAED,eAAO,MAAM,aAAa,SAAU,MAAM,KAAG,MAAM,EAElD,CAAC;AAEF,eAAO,MAAM,kBAAkB,SAAU,MAAM,KAAG,MAAM,EAEvD,CAAC"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import * as tokenizer from "../index.js";
|
|
2
1
|
import { BufferedSentenceStream, BufferedWordStream } from "../token_stream.js";
|
|
2
|
+
import * as tokenizer from "../tokenizer.js";
|
|
3
3
|
import { hyphenator } from "./hyphenator.js";
|
|
4
4
|
import { splitParagraphs } from "./paragraph.js";
|
|
5
5
|
import { splitSentences } from "./sentence.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/tokenize/basic/basic.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport
|
|
1
|
+
{"version":3,"sources":["../../../src/tokenize/basic/basic.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { BufferedSentenceStream, BufferedWordStream } from '../token_stream.js';\nimport * as tokenizer from '../tokenizer.js';\nimport { hyphenator } from './hyphenator.js';\nimport { splitParagraphs } from './paragraph.js';\nimport { splitSentences } from './sentence.js';\nimport { splitWords } from './word.js';\n\ninterface TokenizerOptions {\n language: string;\n minSentenceLength: number;\n streamContextLength: number;\n}\n\nexport class SentenceTokenizer extends tokenizer.SentenceTokenizer {\n #config: TokenizerOptions;\n\n constructor(language = 'en-US', minSentenceLength = 20, streamContextLength = 10) {\n super();\n this.#config = {\n language,\n minSentenceLength,\n streamContextLength,\n };\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n tokenize(text: string, language?: string): string[] {\n return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n stream(language?: string): tokenizer.SentenceStream {\n return new BufferedSentenceStream(\n (text: string) => splitSentences(text, this.#config.minSentenceLength),\n this.#config.minSentenceLength,\n this.#config.streamContextLength,\n );\n }\n}\n\nexport class WordTokenizer extends tokenizer.WordTokenizer {\n #ignorePunctuation: boolean;\n\n constructor(ignorePunctuation = true) {\n super();\n this.#ignorePunctuation = ignorePunctuation;\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n tokenize(text: string, language?: string): string[] {\n return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n stream(language?: string): tokenizer.WordStream {\n return new BufferedWordStream(\n (text: string) => splitWords(text, this.#ignorePunctuation),\n 1,\n 1,\n );\n }\n}\n\nexport const hyphenateWord = (word: string): string[] => {\n return hyphenator.hyphenateWord(word);\n};\n\nexport const tokenizeParagraphs = (text: string): string[] => {\n return splitParagraphs(text).map((tok) => tok[0]);\n};\n"],"mappings":"AAGA,SAAS,wBAAwB,0BAA0B;AAC3D,YAAY,eAAe;AAC3B,SAAS,kBAAkB;AAC3B,SAAS,uBAAuB;AAChC,SAAS,sBAAsB;AAC/B,SAAS,kBAAkB;AAQpB,MAAM,0BAA0B,UAAU,kBAAkB;AAAA,EACjE;AAAA,EAEA,YAAY,WAAW,SAAS,oBAAoB,IAAI,sBAAsB,IAAI;AAChF,UAAM;AACN,SAAK,UAAU;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,SAAS,MAAc,UAA6B;AAClD,WAAO,eAAe,MAAM,KAAK,QAAQ,iBAAiB,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAAA,EACjF;AAAA;AAAA,EAGA,OAAO,UAA6C;AAClD,WAAO,IAAI;AAAA,MACT,CAAC,SAAiB,eAAe,MAAM,KAAK,QAAQ,iBAAiB;AAAA,MACrE,KAAK,QAAQ;AAAA,MACb,KAAK,QAAQ;AAAA,IACf;AAAA,EACF;AACF;AAEO,MAAM,sBAAsB,UAAU,cAAc;AAAA,EACzD;AAAA,EAEA,YAAY,oBAAoB,MAAM;AACpC,UAAM;AACN,SAAK,qBAAqB;AAAA,EAC5B;AAAA;AAAA,EAGA,SAAS,MAAc,UAA6B;AAClD,WAAO,WAAW,MAAM,KAAK,kBAAkB,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAAA,EACtE;AAAA;AAAA,EAGA,OAAO,UAAyC;AAC9C,WAAO,IAAI;AAAA,MACT,CAAC,SAAiB,WAAW,MAAM,KAAK,kBAAkB;AAAA,MAC1D;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;AAEO,MAAM,gBAAgB,CAAC,SAA2B;AACvD,SAAO,WAAW,cAAc,IAAI;AACtC;AAEO,MAAM,qBAAqB,CAAC,SAA2B;AAC5D,SAAO,gBAAgB,IAAI,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAClD;","names":[]}
|
|
@@ -33,19 +33,25 @@ const splitSentences = (text, minLength = 20) => {
|
|
|
33
33
|
text = text.replaceAll("\n", " ");
|
|
34
34
|
text = text.replaceAll(prefixes, "$1<prd>");
|
|
35
35
|
text = text.replaceAll(websites, "<prd>$2");
|
|
36
|
-
text = text.replaceAll(new RegExp(`${digits}[.]${digits}`, "g"), "$1<prd>$2");
|
|
36
|
+
text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, "g"), "$1<prd>$2");
|
|
37
37
|
text = text.replaceAll(dots, (match) => "<prd>".repeat(match.length));
|
|
38
38
|
text = text.replaceAll("Ph.D.", "Ph<prd>D<prd>");
|
|
39
|
-
text = text.replaceAll(new RegExp(
|
|
40
|
-
text = text.replaceAll(new RegExp(`${acronyms} ${starters}`, "g"), "$1<stop> $2");
|
|
39
|
+
text = text.replaceAll(new RegExp(`\\s${alphabets.source}[.] `, "g"), " $1<prd> ");
|
|
40
|
+
text = text.replaceAll(new RegExp(`${acronyms.source} ${starters.source}`, "g"), "$1<stop> $2");
|
|
41
41
|
text = text.replaceAll(
|
|
42
|
-
new RegExp(`${alphabets}[.]${alphabets}[.]${alphabets}[.]`, "g"),
|
|
42
|
+
new RegExp(`${alphabets.source}[.]${alphabets.source}[.]${alphabets.source}[.]`, "g"),
|
|
43
43
|
"$1<prd>$2<prd>$3<prd>"
|
|
44
44
|
);
|
|
45
|
-
text = text.replaceAll(
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
text = text.replaceAll(
|
|
46
|
+
new RegExp(`${alphabets.source}[.]${alphabets.source}[.]`, "g"),
|
|
47
|
+
"$1<prd>$2<prd>"
|
|
48
|
+
);
|
|
49
|
+
text = text.replaceAll(
|
|
50
|
+
new RegExp(` ${suffixes.source}[.] ${starters.source}`, "g"),
|
|
51
|
+
"$1<stop> $2"
|
|
52
|
+
);
|
|
53
|
+
text = text.replaceAll(new RegExp(` ${suffixes.source}[.]`, "g"), "$1<prd>");
|
|
54
|
+
text = text.replaceAll(new RegExp(` ${alphabets.source}[.]`, "g"), "$1<prd>");
|
|
49
55
|
text = text.replaceAll(".\u201D", "\u201D.");
|
|
50
56
|
text = text.replaceAll('."', '".');
|
|
51
57
|
text = text.replaceAll('!"', '"!');
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/tokenize/basic/sentence.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n/**\n * Split the text into sentences.\n */\nexport const splitSentences = (text: string, minLength = 20): [string, number, number][] => {\n const alphabets = /([A-Za-z])/g;\n const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;\n const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;\n const starters =\n /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)/g;\n const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;\n const websites = /[.](com|net|org|io|gov|edu|me)/g;\n const digits = /([0-9])/g;\n const dots = /\\.{2,}/g;\n\n text = text.replaceAll('\\n', ' ');\n text = text.replaceAll(prefixes, '$1<prd>');\n text = text.replaceAll(websites, '<prd>$2');\n text = text.replaceAll(new RegExp(`${digits}[.]${digits}`, 'g'), '$1<prd>$2');\n text = text.replaceAll(dots, (match) => '<prd>'.repeat(match.length));\n text = text.replaceAll('Ph.D.', 'Ph<prd>D<prd>');\n text = text.replaceAll(new RegExp(
|
|
1
|
+
{"version":3,"sources":["../../../src/tokenize/basic/sentence.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n/**\n * Split the text into sentences.\n */\nexport const splitSentences = (text: string, minLength = 20): [string, number, number][] => {\n const alphabets = /([A-Za-z])/g;\n const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;\n const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;\n const starters =\n /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)/g;\n const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;\n const websites = /[.](com|net|org|io|gov|edu|me)/g;\n const digits = /([0-9])/g;\n const dots = /\\.{2,}/g;\n\n text = text.replaceAll('\\n', ' ');\n text = text.replaceAll(prefixes, '$1<prd>');\n text = text.replaceAll(websites, '<prd>$2');\n text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, 'g'), '$1<prd>$2');\n text = text.replaceAll(dots, (match) => '<prd>'.repeat(match.length));\n text = text.replaceAll('Ph.D.', 'Ph<prd>D<prd>');\n text = text.replaceAll(new RegExp(`\\\\s${alphabets.source}[.] `, 'g'), ' $1<prd> ');\n text = text.replaceAll(new RegExp(`${acronyms.source} ${starters.source}`, 'g'), '$1<stop> $2');\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>$3<prd>',\n );\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>',\n );\n text = text.replaceAll(\n new RegExp(` ${suffixes.source}[.] ${starters.source}`, 'g'),\n '$1<stop> $2',\n );\n text = text.replaceAll(new RegExp(` ${suffixes.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll(new RegExp(` ${alphabets.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll('.”', '”.');\n text = text.replaceAll('.\"', '\".');\n text = text.replaceAll('!\"', '\"!');\n text = text.replaceAll('?\"', '\"?');\n text = text.replaceAll('.', '.<stop>');\n text = text.replaceAll('?', '?<stop>');\n text = text.replaceAll('!', '!<stop>');\n text = text.replaceAll('<prd>', '.');\n\n const split = text.split('<stop>');\n text = text.replaceAll('<stop>', '');\n\n const sentences: [string, number, number][] = [];\n let buf = '';\n let start = 0;\n let end = 0;\n for (const match of split) {\n const sentence = match.trim();\n if (!sentence) continue;\n\n buf += ' ' + sentence;\n end += match.length;\n if (buf.length > minLength) {\n sentences.push([buf.slice(1), start, end]);\n start = end;\n buf = '';\n }\n }\n\n if (buf) {\n sentences.push([buf.slice(1), start, text.length - 1]);\n }\n\n return sentences;\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAOO,MAAM,iBAAiB,CAAC,MAAc,YAAY,OAAmC;AAC1F,QAAM,YAAY;AAClB,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,WACJ;AACF,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,SAAS;AACf,QAAM,OAAO;AAEb,SAAO,KAAK,WAAW,MAAM,GAAG;AAChC,SAAO,KAAK,WAAW,UAAU,SAAS;AAC1C,SAAO,KAAK,WAAW,UAAU,SAAS;AAC1C,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,OAAO,MAAM,MAAM,OAAO,MAAM,IAAI,GAAG,GAAG,WAAW;AAC1F,SAAO,KAAK,WAAW,MAAM,CAAC,UAAU,QAAQ,OAAO,MAAM,MAAM,CAAC;AACpE,SAAO,KAAK,WAAW,SAAS,eAAe;AAC/C,SAAO,KAAK,WAAW,IAAI,OAAO,MAAM,UAAU,MAAM,QAAQ,GAAG,GAAG,WAAW;AACjF,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,SAAS,MAAM,IAAI,SAAS,MAAM,IAAI,GAAG,GAAG,aAAa;AAC9F,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IACpF;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IAC9D;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,SAAS,MAAM,IAAI,GAAG;AAAA,IAC3D;AAAA,EACF;AACA,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,GAAG,GAAG,SAAS;AAC3E,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,UAAU,MAAM,OAAO,GAAG,GAAG,SAAS;AAC5E,SAAO,KAAK,WAAW,WAAM,SAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,SAAS,GAAG;AAEnC,QAAM,QAAQ,KAAK,MAAM,QAAQ;AACjC,SAAO,KAAK,WAAW,UAAU,EAAE;AAEnC,QAAM,YAAwC,CAAC;AAC/C,MAAI,MAAM;AACV,MAAI,QAAQ;AACZ,MAAI,MAAM;AACV,aAAW,SAAS,OAAO;AACzB,UAAM,WAAW,MAAM,KAAK;AAC5B,QAAI,CAAC,SAAU;AAEf,WAAO,MAAM;AACb,WAAO,MAAM;AACb,QAAI,IAAI,SAAS,WAAW;AAC1B,gBAAU,KAAK,CAAC,IAAI,MAAM,CAAC,GAAG,OAAO,GAAG,CAAC;AACzC,cAAQ;AACR,YAAM;AAAA,IACR;AAAA,EACF;AAEA,MAAI,KAAK;AACP,cAAU,KAAK,CAAC,IAAI,MAAM,CAAC,GAAG,OAAO,KAAK,SAAS,CAAC,CAAC;AAAA,EACvD;AAEA,SAAO;AACT;","names":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sentence.d.ts","sourceRoot":"","sources":["../../../src/tokenize/basic/sentence.ts"],"names":[],"mappings":"AAIA;;GAEG;AACH,eAAO,MAAM,cAAc,SAAU,MAAM,yBAAmB,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,
|
|
1
|
+
{"version":3,"file":"sentence.d.ts","sourceRoot":"","sources":["../../../src/tokenize/basic/sentence.ts"],"names":[],"mappings":"AAIA;;GAEG;AACH,eAAO,MAAM,cAAc,SAAU,MAAM,yBAAmB,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAmErF,CAAC"}
|
|
@@ -10,19 +10,25 @@ const splitSentences = (text, minLength = 20) => {
|
|
|
10
10
|
text = text.replaceAll("\n", " ");
|
|
11
11
|
text = text.replaceAll(prefixes, "$1<prd>");
|
|
12
12
|
text = text.replaceAll(websites, "<prd>$2");
|
|
13
|
-
text = text.replaceAll(new RegExp(`${digits}[.]${digits}`, "g"), "$1<prd>$2");
|
|
13
|
+
text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, "g"), "$1<prd>$2");
|
|
14
14
|
text = text.replaceAll(dots, (match) => "<prd>".repeat(match.length));
|
|
15
15
|
text = text.replaceAll("Ph.D.", "Ph<prd>D<prd>");
|
|
16
|
-
text = text.replaceAll(new RegExp(
|
|
17
|
-
text = text.replaceAll(new RegExp(`${acronyms} ${starters}`, "g"), "$1<stop> $2");
|
|
16
|
+
text = text.replaceAll(new RegExp(`\\s${alphabets.source}[.] `, "g"), " $1<prd> ");
|
|
17
|
+
text = text.replaceAll(new RegExp(`${acronyms.source} ${starters.source}`, "g"), "$1<stop> $2");
|
|
18
18
|
text = text.replaceAll(
|
|
19
|
-
new RegExp(`${alphabets}[.]${alphabets}[.]${alphabets}[.]`, "g"),
|
|
19
|
+
new RegExp(`${alphabets.source}[.]${alphabets.source}[.]${alphabets.source}[.]`, "g"),
|
|
20
20
|
"$1<prd>$2<prd>$3<prd>"
|
|
21
21
|
);
|
|
22
|
-
text = text.replaceAll(
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
22
|
+
text = text.replaceAll(
|
|
23
|
+
new RegExp(`${alphabets.source}[.]${alphabets.source}[.]`, "g"),
|
|
24
|
+
"$1<prd>$2<prd>"
|
|
25
|
+
);
|
|
26
|
+
text = text.replaceAll(
|
|
27
|
+
new RegExp(` ${suffixes.source}[.] ${starters.source}`, "g"),
|
|
28
|
+
"$1<stop> $2"
|
|
29
|
+
);
|
|
30
|
+
text = text.replaceAll(new RegExp(` ${suffixes.source}[.]`, "g"), "$1<prd>");
|
|
31
|
+
text = text.replaceAll(new RegExp(` ${alphabets.source}[.]`, "g"), "$1<prd>");
|
|
26
32
|
text = text.replaceAll(".\u201D", "\u201D.");
|
|
27
33
|
text = text.replaceAll('."', '".');
|
|
28
34
|
text = text.replaceAll('!"', '"!');
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/tokenize/basic/sentence.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n/**\n * Split the text into sentences.\n */\nexport const splitSentences = (text: string, minLength = 20): [string, number, number][] => {\n const alphabets = /([A-Za-z])/g;\n const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;\n const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;\n const starters =\n /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)/g;\n const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;\n const websites = /[.](com|net|org|io|gov|edu|me)/g;\n const digits = /([0-9])/g;\n const dots = /\\.{2,}/g;\n\n text = text.replaceAll('\\n', ' ');\n text = text.replaceAll(prefixes, '$1<prd>');\n text = text.replaceAll(websites, '<prd>$2');\n text = text.replaceAll(new RegExp(`${digits}[.]${digits}`, 'g'), '$1<prd>$2');\n text = text.replaceAll(dots, (match) => '<prd>'.repeat(match.length));\n text = text.replaceAll('Ph.D.', 'Ph<prd>D<prd>');\n text = text.replaceAll(new RegExp(
|
|
1
|
+
{"version":3,"sources":["../../../src/tokenize/basic/sentence.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n/**\n * Split the text into sentences.\n */\nexport const splitSentences = (text: string, minLength = 20): [string, number, number][] => {\n const alphabets = /([A-Za-z])/g;\n const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;\n const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;\n const starters =\n /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)/g;\n const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;\n const websites = /[.](com|net|org|io|gov|edu|me)/g;\n const digits = /([0-9])/g;\n const dots = /\\.{2,}/g;\n\n text = text.replaceAll('\\n', ' ');\n text = text.replaceAll(prefixes, '$1<prd>');\n text = text.replaceAll(websites, '<prd>$2');\n text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, 'g'), '$1<prd>$2');\n text = text.replaceAll(dots, (match) => '<prd>'.repeat(match.length));\n text = text.replaceAll('Ph.D.', 'Ph<prd>D<prd>');\n text = text.replaceAll(new RegExp(`\\\\s${alphabets.source}[.] `, 'g'), ' $1<prd> ');\n text = text.replaceAll(new RegExp(`${acronyms.source} ${starters.source}`, 'g'), '$1<stop> $2');\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>$3<prd>',\n );\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>',\n );\n text = text.replaceAll(\n new RegExp(` ${suffixes.source}[.] ${starters.source}`, 'g'),\n '$1<stop> $2',\n );\n text = text.replaceAll(new RegExp(` ${suffixes.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll(new RegExp(` ${alphabets.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll('.”', '”.');\n text = text.replaceAll('.\"', '\".');\n text = text.replaceAll('!\"', '\"!');\n text = text.replaceAll('?\"', '\"?');\n text = text.replaceAll('.', '.<stop>');\n text = text.replaceAll('?', '?<stop>');\n text = text.replaceAll('!', '!<stop>');\n text = text.replaceAll('<prd>', '.');\n\n const split = text.split('<stop>');\n text = text.replaceAll('<stop>', '');\n\n const sentences: [string, number, number][] = [];\n let buf = '';\n let start = 0;\n let end = 0;\n for (const match of split) {\n const sentence = match.trim();\n if (!sentence) continue;\n\n buf += ' ' + sentence;\n end += match.length;\n if (buf.length > minLength) {\n sentences.push([buf.slice(1), start, end]);\n start = end;\n buf = '';\n }\n }\n\n if (buf) {\n sentences.push([buf.slice(1), start, text.length - 1]);\n }\n\n return sentences;\n};\n"],"mappings":"AAOO,MAAM,iBAAiB,CAAC,MAAc,YAAY,OAAmC;AAC1F,QAAM,YAAY;AAClB,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,WACJ;AACF,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,SAAS;AACf,QAAM,OAAO;AAEb,SAAO,KAAK,WAAW,MAAM,GAAG;AAChC,SAAO,KAAK,WAAW,UAAU,SAAS;AAC1C,SAAO,KAAK,WAAW,UAAU,SAAS;AAC1C,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,OAAO,MAAM,MAAM,OAAO,MAAM,IAAI,GAAG,GAAG,WAAW;AAC1F,SAAO,KAAK,WAAW,MAAM,CAAC,UAAU,QAAQ,OAAO,MAAM,MAAM,CAAC;AACpE,SAAO,KAAK,WAAW,SAAS,eAAe;AAC/C,SAAO,KAAK,WAAW,IAAI,OAAO,MAAM,UAAU,MAAM,QAAQ,GAAG,GAAG,WAAW;AACjF,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,SAAS,MAAM,IAAI,SAAS,MAAM,IAAI,GAAG,GAAG,aAAa;AAC9F,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IACpF;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IAC9D;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,SAAS,MAAM,IAAI,GAAG;AAAA,IAC3D;AAAA,EACF;AACA,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,GAAG,GAAG,SAAS;AAC3E,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,UAAU,MAAM,OAAO,GAAG,GAAG,SAAS;AAC5E,SAAO,KAAK,WAAW,WAAM,SAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,SAAS,GAAG;AAEnC,QAAM,QAAQ,KAAK,MAAM,QAAQ;AACjC,SAAO,KAAK,WAAW,UAAU,EAAE;AAEnC,QAAM,YAAwC,CAAC;AAC/C,MAAI,MAAM;AACV,MAAI,QAAQ;AACZ,MAAI,MAAM;AACV,aAAW,SAAS,OAAO;AACzB,UAAM,WAAW,MAAM,KAAK;AAC5B,QAAI,CAAC,SAAU;AAEf,WAAO,MAAM;AACb,WAAO,MAAM;AACb,QAAI,IAAI,SAAS,WAAW;AAC1B,gBAAU,KAAK,CAAC,IAAI,MAAM,CAAC,GAAG,OAAO,GAAG,CAAC;AACzC,cAAQ;AACR,YAAM;AAAA,IACR;AAAA,EACF;AAEA,MAAI,KAAK;AACP,cAAU,KAAK,CAAC,IAAI,MAAM,CAAC,GAAG,OAAO,KAAK,SAAS,CAAC,CAAC;AAAA,EACvD;AAEA,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var import_vitest = require("vitest");
|
|
3
|
+
var import_basic = require("./basic/index.cjs");
|
|
4
|
+
var import_paragraph = require("./basic/paragraph.cjs");
|
|
5
|
+
const TEXT = "Hi! LiveKit is a platform for live audio and video applications and services. R.T.C stands for Real-Time Communication... again R.T.C. Mr. Theo is testing the sentence tokenizer. This is a test. Another test. A short sentence. A longer sentence that is longer than the previous sentence. f(x) = x * 2.54 + 42. Hey! Hi! Hello! ";
|
|
6
|
+
const EXPECTED_MIN_20 = [
|
|
7
|
+
"Hi! LiveKit is a platform for live audio and video applications and services.",
|
|
8
|
+
"R.T.C stands for Real-Time Communication... again R.T.C.",
|
|
9
|
+
"Mr. Theo is testing the sentence tokenizer.",
|
|
10
|
+
"This is a test. Another test.",
|
|
11
|
+
"A short sentence. A longer sentence that is longer than the previous sentence.",
|
|
12
|
+
"f(x) = x * 2.54 + 42.",
|
|
13
|
+
"Hey! Hi! Hello!"
|
|
14
|
+
];
|
|
15
|
+
const WORDS_TEXT = "This is a test. Blabla another test! multiple consecutive spaces: done";
|
|
16
|
+
const WORDS_EXPECTED = [
|
|
17
|
+
"This",
|
|
18
|
+
"is",
|
|
19
|
+
"a",
|
|
20
|
+
"test",
|
|
21
|
+
"Blabla",
|
|
22
|
+
"another",
|
|
23
|
+
"test",
|
|
24
|
+
"multiple",
|
|
25
|
+
"consecutive",
|
|
26
|
+
"spaces",
|
|
27
|
+
"done"
|
|
28
|
+
];
|
|
29
|
+
const WORDS_PUNCT_TEXT = 'This is <phoneme alphabet="cmu-arpabet" ph="AE K CH UW AH L IY">actually</phoneme> tricky to handle.';
|
|
30
|
+
const WORDS_PUNCT_EXPECTED = [
|
|
31
|
+
"This",
|
|
32
|
+
"is",
|
|
33
|
+
"<phoneme",
|
|
34
|
+
'alphabet="cmu-arpabet"',
|
|
35
|
+
'ph="AE',
|
|
36
|
+
"K",
|
|
37
|
+
"CH",
|
|
38
|
+
"UW",
|
|
39
|
+
"AH",
|
|
40
|
+
"L",
|
|
41
|
+
'IY">actually</phoneme>',
|
|
42
|
+
"tricky",
|
|
43
|
+
"to",
|
|
44
|
+
"handle."
|
|
45
|
+
];
|
|
46
|
+
const HYPHENATOR_TEXT = ["Segment", "expected", "communication", "window", "welcome", "bedroom"];
|
|
47
|
+
const HYPHENATOR_EXPECTED = [
|
|
48
|
+
["Seg", "ment"],
|
|
49
|
+
["ex", "pect", "ed"],
|
|
50
|
+
["com", "mu", "ni", "ca", "tion"],
|
|
51
|
+
["win", "dow"],
|
|
52
|
+
["wel", "come"],
|
|
53
|
+
["bed", "room"]
|
|
54
|
+
];
|
|
55
|
+
const PARAGRAPH_TEST_CASES = [
|
|
56
|
+
["Single paragraph.", [["Single paragraph.", 0, 17]]],
|
|
57
|
+
[
|
|
58
|
+
"Paragraph 1.\n\nParagraph 2.",
|
|
59
|
+
[
|
|
60
|
+
["Paragraph 1.", 0, 12],
|
|
61
|
+
["Paragraph 2.", 14, 26]
|
|
62
|
+
]
|
|
63
|
+
],
|
|
64
|
+
[
|
|
65
|
+
"Para 1.\n\nPara 2.\n\nPara 3.",
|
|
66
|
+
[
|
|
67
|
+
["Para 1.", 0, 7],
|
|
68
|
+
["Para 2.", 9, 16],
|
|
69
|
+
["Para 3.", 18, 25]
|
|
70
|
+
]
|
|
71
|
+
],
|
|
72
|
+
["\n\nParagraph with leading newlines.", [["Paragraph with leading newlines.", 2, 34]]],
|
|
73
|
+
["Paragraph with trailing newlines.\n\n", [["Paragraph with trailing newlines.", 0, 33]]],
|
|
74
|
+
[
|
|
75
|
+
"\n\n Paragraph with leading and trailing spaces. \n\n",
|
|
76
|
+
[["Paragraph with leading and trailing spaces.", 4, 47]]
|
|
77
|
+
],
|
|
78
|
+
[
|
|
79
|
+
"Para 1.\n\n\n\nPara 2.",
|
|
80
|
+
// Multiple newlines between paragraphs
|
|
81
|
+
[
|
|
82
|
+
["Para 1.", 0, 7],
|
|
83
|
+
["Para 2.", 11, 18]
|
|
84
|
+
]
|
|
85
|
+
],
|
|
86
|
+
[
|
|
87
|
+
"Para 1.\n \n \nPara 2.",
|
|
88
|
+
// Newlines with spaces between paragraphs
|
|
89
|
+
[
|
|
90
|
+
["Para 1.", 0, 7],
|
|
91
|
+
["Para 2.", 12, 19]
|
|
92
|
+
]
|
|
93
|
+
],
|
|
94
|
+
[
|
|
95
|
+
"",
|
|
96
|
+
// Empty string
|
|
97
|
+
[]
|
|
98
|
+
],
|
|
99
|
+
[
|
|
100
|
+
"\n\n\n",
|
|
101
|
+
// Only newlines
|
|
102
|
+
[]
|
|
103
|
+
],
|
|
104
|
+
[
|
|
105
|
+
"Line 1\nLine 2\nLine 3",
|
|
106
|
+
// Single paragraph with newlines
|
|
107
|
+
[["Line 1\nLine 2\nLine 3", 0, 20]]
|
|
108
|
+
]
|
|
109
|
+
];
|
|
110
|
+
(0, import_vitest.describe)("tokenizer", () => {
|
|
111
|
+
(0, import_vitest.describe)("SentenceTokenizer", () => {
|
|
112
|
+
const tokenizer = new import_basic.SentenceTokenizer();
|
|
113
|
+
(0, import_vitest.it)("should tokenize sentences correctly", () => {
|
|
114
|
+
(0, import_vitest.expect)(tokenizer.tokenize(TEXT).every((x, i) => EXPECTED_MIN_20[i] === x)).toBeTruthy();
|
|
115
|
+
});
|
|
116
|
+
(0, import_vitest.it)("should stream tokenize sentences correctly", async () => {
|
|
117
|
+
const pattern = [1, 2, 4];
|
|
118
|
+
let text = TEXT;
|
|
119
|
+
const chunks = [];
|
|
120
|
+
const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0))).fill(pattern).flat()[Symbol.iterator]();
|
|
121
|
+
for (const size of patternIter) {
|
|
122
|
+
if (!text) break;
|
|
123
|
+
chunks.push(text.slice(void 0, size));
|
|
124
|
+
text = text.slice(size);
|
|
125
|
+
}
|
|
126
|
+
const stream = tokenizer.stream();
|
|
127
|
+
for (const chunk of chunks) {
|
|
128
|
+
stream.pushText(chunk);
|
|
129
|
+
}
|
|
130
|
+
stream.endInput();
|
|
131
|
+
stream.close();
|
|
132
|
+
for (const x of EXPECTED_MIN_20) {
|
|
133
|
+
await stream.next().then((value) => {
|
|
134
|
+
if (value.value) {
|
|
135
|
+
(0, import_vitest.expect)(value.value.token).toStrictEqual(x);
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
(0, import_vitest.describe)("WordTokenizer", () => {
|
|
142
|
+
const tokenizer = new import_basic.WordTokenizer();
|
|
143
|
+
(0, import_vitest.it)("should tokenize words correctly", () => {
|
|
144
|
+
(0, import_vitest.expect)(tokenizer.tokenize(WORDS_TEXT).every((x, i) => WORDS_EXPECTED[i] === x)).toBeTruthy();
|
|
145
|
+
});
|
|
146
|
+
(0, import_vitest.it)("should stream tokenize words correctly", async () => {
|
|
147
|
+
const pattern = [1, 2, 4];
|
|
148
|
+
let text = WORDS_TEXT;
|
|
149
|
+
const chunks = [];
|
|
150
|
+
const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0))).fill(pattern).flat()[Symbol.iterator]();
|
|
151
|
+
for (const size of patternIter) {
|
|
152
|
+
if (!text) break;
|
|
153
|
+
chunks.push(text.slice(void 0, size));
|
|
154
|
+
text = text.slice(size);
|
|
155
|
+
}
|
|
156
|
+
const stream = tokenizer.stream();
|
|
157
|
+
for (const chunk of chunks) {
|
|
158
|
+
stream.pushText(chunk);
|
|
159
|
+
}
|
|
160
|
+
stream.endInput();
|
|
161
|
+
stream.close();
|
|
162
|
+
for (const x of WORDS_EXPECTED) {
|
|
163
|
+
await stream.next().then((value) => {
|
|
164
|
+
if (value.value) {
|
|
165
|
+
(0, import_vitest.expect)(value.value.token).toStrictEqual(x);
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
});
|
|
170
|
+
(0, import_vitest.describe)("punctuation handling", () => {
|
|
171
|
+
const tokenizerPunct = new import_basic.WordTokenizer(false);
|
|
172
|
+
(0, import_vitest.it)("should tokenize words correctly", () => {
|
|
173
|
+
(0, import_vitest.expect)(
|
|
174
|
+
tokenizerPunct.tokenize(WORDS_PUNCT_TEXT).every((x, i) => WORDS_PUNCT_EXPECTED[i] === x)
|
|
175
|
+
).toBeTruthy();
|
|
176
|
+
});
|
|
177
|
+
(0, import_vitest.it)("should stream tokenize words correctly", async () => {
|
|
178
|
+
const pattern = [1, 2, 4];
|
|
179
|
+
let text = WORDS_PUNCT_TEXT;
|
|
180
|
+
const chunks = [];
|
|
181
|
+
const patternIter = Array(
|
|
182
|
+
Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0))
|
|
183
|
+
).fill(pattern).flat()[Symbol.iterator]();
|
|
184
|
+
for (const size of patternIter) {
|
|
185
|
+
if (!text) break;
|
|
186
|
+
chunks.push(text.slice(void 0, size));
|
|
187
|
+
text = text.slice(size);
|
|
188
|
+
}
|
|
189
|
+
const stream = tokenizerPunct.stream();
|
|
190
|
+
for (const chunk of chunks) {
|
|
191
|
+
stream.pushText(chunk);
|
|
192
|
+
}
|
|
193
|
+
stream.endInput();
|
|
194
|
+
stream.close();
|
|
195
|
+
for (const x of WORDS_PUNCT_EXPECTED) {
|
|
196
|
+
await stream.next().then((value) => {
|
|
197
|
+
if (value.value) {
|
|
198
|
+
(0, import_vitest.expect)(value.value.token).toStrictEqual(x);
|
|
199
|
+
}
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
});
|
|
203
|
+
});
|
|
204
|
+
});
|
|
205
|
+
(0, import_vitest.describe)("hyphenateWord", () => {
|
|
206
|
+
(0, import_vitest.it)("should hyphenate correctly", () => {
|
|
207
|
+
HYPHENATOR_TEXT.forEach((x, i) => {
|
|
208
|
+
(0, import_vitest.expect)((0, import_basic.hyphenateWord)(x)).toStrictEqual(HYPHENATOR_EXPECTED[i]);
|
|
209
|
+
});
|
|
210
|
+
});
|
|
211
|
+
});
|
|
212
|
+
(0, import_vitest.describe)("splitParagraphs", () => {
|
|
213
|
+
(0, import_vitest.it)("should tokenize paragraphs correctly", () => {
|
|
214
|
+
PARAGRAPH_TEST_CASES.forEach(([a, b]) => {
|
|
215
|
+
(0, import_vitest.expect)((0, import_paragraph.splitParagraphs)(a)).toStrictEqual(b);
|
|
216
|
+
});
|
|
217
|
+
});
|
|
218
|
+
});
|
|
219
|
+
});
|
|
220
|
+
//# sourceMappingURL=tokenizer.test.cjs.map
|