@livekit/agents 1.0.39 → 1.0.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,21 +23,24 @@ __export(http_server_exports, {
23
23
  module.exports = __toCommonJS(http_server_exports);
24
24
  var import_node_http = require("node:http");
25
25
  var import_log = require("./log.cjs");
26
- const healthCheck = async (res) => {
27
- res.writeHead(200);
28
- res.end("OK");
29
- };
30
26
  class HTTPServer {
31
27
  host;
32
28
  port;
33
29
  app;
34
30
  #logger = (0, import_log.log)();
35
- constructor(host, port, workerListener) {
31
+ constructor(host, port, healthCheckListener, workerListener) {
36
32
  this.host = host;
37
33
  this.port = port;
38
34
  this.app = (0, import_node_http.createServer)((req, res) => {
39
35
  if (req.url === "/") {
40
- healthCheck(res);
36
+ const result = healthCheckListener();
37
+ if (result.healthy) {
38
+ res.writeHead(200);
39
+ res.end(result.message);
40
+ } else {
41
+ res.writeHead(503);
42
+ res.end(result.message);
43
+ }
41
44
  } else if (req.url === "/worker") {
42
45
  res.writeHead(200, { "Content-Type": "application/json" });
43
46
  res.end(JSON.stringify(workerListener()));
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/http_server.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type IncomingMessage, type Server, type ServerResponse, createServer } from 'node:http';\nimport { log } from './log.js';\n\nconst healthCheck = async (res: ServerResponse) => {\n res.writeHead(200);\n res.end('OK');\n};\n\ninterface WorkerResponse {\n agent_name: string;\n worker_type: string;\n active_jobs: number;\n sdk_version: string;\n project_type: string;\n}\n\nexport class HTTPServer {\n host: string;\n port: number;\n app: Server;\n #logger = log();\n\n constructor(host: string, port: number, workerListener: () => WorkerResponse) {\n this.host = host;\n this.port = port;\n\n this.app = createServer((req: IncomingMessage, res: ServerResponse) => {\n if (req.url === '/') {\n healthCheck(res);\n } else if (req.url === '/worker') {\n res.writeHead(200, { 'Content-Type': 'application/json' });\n res.end(JSON.stringify(workerListener()));\n } else {\n res.writeHead(404);\n res.end('not found');\n }\n });\n }\n\n async run(): Promise<void> {\n return new Promise((resolve, reject) => {\n this.app.listen(this.port, this.host, (err?: Error) => {\n if (err) reject(err);\n const address = this.app.address();\n if (typeof address! !== 'string') {\n this.#logger.info(`Server is listening on port ${address!.port}`);\n }\n resolve();\n });\n });\n }\n\n async close(): Promise<void> {\n return new Promise((resolve, reject) => {\n this.app.close((err?: Error) => {\n if (err) reject(err);\n resolve();\n });\n });\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,uBAAqF;AACrF,iBAAoB;AAEpB,MAAM,cAAc,OAAO,QAAwB;AACjD,MAAI,UAAU,GAAG;AACjB,MAAI,IAAI,IAAI;AACd;AAUO,MAAM,WAAW;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA,cAAU,gBAAI;AAAA,EAEd,YAAY,MAAc,MAAc,gBAAsC;AAC5E,SAAK,OAAO;AACZ,SAAK,OAAO;AAEZ,SAAK,UAAM,+BAAa,CAAC,KAAsB,QAAwB;AACrE,UAAI,IAAI,QAAQ,KAAK;AACnB,oBAAY,GAAG;AAAA,MACjB,WAAW,IAAI,QAAQ,WAAW;AAChC,YAAI,UAAU,KAAK,EAAE,gBAAgB,mBAAmB,CAAC;AACzD,YAAI,IAAI,KAAK,UAAU,eAAe,CAAC,CAAC;AAAA,MAC1C,OAAO;AACL,YAAI,UAAU,GAAG;AACjB,YAAI,IAAI,WAAW;AAAA,MACrB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,MAAqB;AACzB,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,OAAO,KAAK,MAAM,KAAK,MAAM,CAAC,QAAgB;AACrD,YAAI,IAAK,QAAO,GAAG;AACnB,cAAM,UAAU,KAAK,IAAI,QAAQ;AACjC,YAAI,OAAO,YAAa,UAAU;AAChC,eAAK,QAAQ,KAAK,+BAA+B,QAAS,IAAI,EAAE;AAAA,QAClE;AACA,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAuB;AAC3B,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,MAAM,CAAC,QAAgB;AAC9B,YAAI,IAAK,QAAO,GAAG;AACnB,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/http_server.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type IncomingMessage, type Server, type ServerResponse, createServer } from 'node:http';\nimport { log } from './log.js';\n\nexport interface HealthCheckResult {\n healthy: boolean;\n message: string;\n}\n\ninterface WorkerResponse {\n agent_name: string;\n worker_type: string;\n active_jobs: number;\n sdk_version: string;\n project_type: string;\n}\n\nexport class HTTPServer {\n host: string;\n port: number;\n app: Server;\n #logger = log();\n\n constructor(\n host: string,\n port: number,\n healthCheckListener: () => HealthCheckResult,\n workerListener: () => WorkerResponse,\n ) {\n this.host = host;\n this.port = port;\n\n this.app = createServer((req: IncomingMessage, res: ServerResponse) => {\n if (req.url === '/') {\n const result = healthCheckListener();\n if (result.healthy) {\n res.writeHead(200);\n res.end(result.message);\n } else {\n res.writeHead(503);\n res.end(result.message);\n }\n } else if (req.url === '/worker') {\n res.writeHead(200, { 'Content-Type': 'application/json' });\n res.end(JSON.stringify(workerListener()));\n } else {\n res.writeHead(404);\n res.end('not found');\n }\n });\n }\n\n async run(): Promise<void> {\n return new Promise((resolve, reject) => {\n this.app.listen(this.port, this.host, (err?: Error) => {\n if (err) reject(err);\n const address = this.app.address();\n if (typeof address! !== 'string') {\n this.#logger.info(`Server is listening on port ${address!.port}`);\n }\n resolve();\n });\n });\n }\n\n async close(): Promise<void> {\n return new Promise((resolve, reject) => {\n this.app.close((err?: Error) => {\n if (err) reject(err);\n resolve();\n });\n });\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,uBAAqF;AACrF,iBAAoB;AAeb,MAAM,WAAW;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA,cAAU,gBAAI;AAAA,EAEd,YACE,MACA,MACA,qBACA,gBACA;AACA,SAAK,OAAO;AACZ,SAAK,OAAO;AAEZ,SAAK,UAAM,+BAAa,CAAC,KAAsB,QAAwB;AACrE,UAAI,IAAI,QAAQ,KAAK;AACnB,cAAM,SAAS,oBAAoB;AACnC,YAAI,OAAO,SAAS;AAClB,cAAI,UAAU,GAAG;AACjB,cAAI,IAAI,OAAO,OAAO;AAAA,QACxB,OAAO;AACL,cAAI,UAAU,GAAG;AACjB,cAAI,IAAI,OAAO,OAAO;AAAA,QACxB;AAAA,MACF,WAAW,IAAI,QAAQ,WAAW;AAChC,YAAI,UAAU,KAAK,EAAE,gBAAgB,mBAAmB,CAAC;AACzD,YAAI,IAAI,KAAK,UAAU,eAAe,CAAC,CAAC;AAAA,MAC1C,OAAO;AACL,YAAI,UAAU,GAAG;AACjB,YAAI,IAAI,WAAW;AAAA,MACrB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,MAAqB;AACzB,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,OAAO,KAAK,MAAM,KAAK,MAAM,CAAC,QAAgB;AACrD,YAAI,IAAK,QAAO,GAAG;AACnB,cAAM,UAAU,KAAK,IAAI,QAAQ;AACjC,YAAI,OAAO,YAAa,UAAU;AAChC,eAAK,QAAQ,KAAK,+BAA+B,QAAS,IAAI,EAAE;AAAA,QAClE;AACA,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAuB;AAC3B,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,MAAM,CAAC,QAAgB;AAC9B,YAAI,IAAK,QAAO,GAAG;AACnB,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AACF;","names":[]}
@@ -1,5 +1,9 @@
1
1
  /// <reference types="node" resolution-mode="require"/>
2
2
  import { type Server } from 'node:http';
3
+ export interface HealthCheckResult {
4
+ healthy: boolean;
5
+ message: string;
6
+ }
3
7
  interface WorkerResponse {
4
8
  agent_name: string;
5
9
  worker_type: string;
@@ -12,7 +16,7 @@ export declare class HTTPServer {
12
16
  host: string;
13
17
  port: number;
14
18
  app: Server;
15
- constructor(host: string, port: number, workerListener: () => WorkerResponse);
19
+ constructor(host: string, port: number, healthCheckListener: () => HealthCheckResult, workerListener: () => WorkerResponse);
16
20
  run(): Promise<void>;
17
21
  close(): Promise<void>;
18
22
  }
@@ -1,5 +1,9 @@
1
1
  /// <reference types="node" resolution-mode="require"/>
2
2
  import { type Server } from 'node:http';
3
+ export interface HealthCheckResult {
4
+ healthy: boolean;
5
+ message: string;
6
+ }
3
7
  interface WorkerResponse {
4
8
  agent_name: string;
5
9
  worker_type: string;
@@ -12,7 +16,7 @@ export declare class HTTPServer {
12
16
  host: string;
13
17
  port: number;
14
18
  app: Server;
15
- constructor(host: string, port: number, workerListener: () => WorkerResponse);
19
+ constructor(host: string, port: number, healthCheckListener: () => HealthCheckResult, workerListener: () => WorkerResponse);
16
20
  run(): Promise<void>;
17
21
  close(): Promise<void>;
18
22
  }
@@ -1 +1 @@
1
- {"version":3,"file":"http_server.d.ts","sourceRoot":"","sources":["../src/http_server.ts"],"names":[],"mappings":";AAGA,OAAO,EAAwB,KAAK,MAAM,EAAqC,MAAM,WAAW,CAAC;AAQjG,UAAU,cAAc;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,UAAU;;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;gBAGA,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,cAAc;IAiBtE,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAapB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAQ7B"}
1
+ {"version":3,"file":"http_server.d.ts","sourceRoot":"","sources":["../src/http_server.ts"],"names":[],"mappings":";AAGA,OAAO,EAAwB,KAAK,MAAM,EAAqC,MAAM,WAAW,CAAC;AAGjG,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,UAAU,cAAc;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,UAAU;;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;gBAIV,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,MAAM,EACZ,mBAAmB,EAAE,MAAM,iBAAiB,EAC5C,cAAc,EAAE,MAAM,cAAc;IAyBhC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAapB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAQ7B"}
@@ -1,20 +1,23 @@
1
1
  import { createServer } from "node:http";
2
2
  import { log } from "./log.js";
3
- const healthCheck = async (res) => {
4
- res.writeHead(200);
5
- res.end("OK");
6
- };
7
3
  class HTTPServer {
8
4
  host;
9
5
  port;
10
6
  app;
11
7
  #logger = log();
12
- constructor(host, port, workerListener) {
8
+ constructor(host, port, healthCheckListener, workerListener) {
13
9
  this.host = host;
14
10
  this.port = port;
15
11
  this.app = createServer((req, res) => {
16
12
  if (req.url === "/") {
17
- healthCheck(res);
13
+ const result = healthCheckListener();
14
+ if (result.healthy) {
15
+ res.writeHead(200);
16
+ res.end(result.message);
17
+ } else {
18
+ res.writeHead(503);
19
+ res.end(result.message);
20
+ }
18
21
  } else if (req.url === "/worker") {
19
22
  res.writeHead(200, { "Content-Type": "application/json" });
20
23
  res.end(JSON.stringify(workerListener()));
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/http_server.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type IncomingMessage, type Server, type ServerResponse, createServer } from 'node:http';\nimport { log } from './log.js';\n\nconst healthCheck = async (res: ServerResponse) => {\n res.writeHead(200);\n res.end('OK');\n};\n\ninterface WorkerResponse {\n agent_name: string;\n worker_type: string;\n active_jobs: number;\n sdk_version: string;\n project_type: string;\n}\n\nexport class HTTPServer {\n host: string;\n port: number;\n app: Server;\n #logger = log();\n\n constructor(host: string, port: number, workerListener: () => WorkerResponse) {\n this.host = host;\n this.port = port;\n\n this.app = createServer((req: IncomingMessage, res: ServerResponse) => {\n if (req.url === '/') {\n healthCheck(res);\n } else if (req.url === '/worker') {\n res.writeHead(200, { 'Content-Type': 'application/json' });\n res.end(JSON.stringify(workerListener()));\n } else {\n res.writeHead(404);\n res.end('not found');\n }\n });\n }\n\n async run(): Promise<void> {\n return new Promise((resolve, reject) => {\n this.app.listen(this.port, this.host, (err?: Error) => {\n if (err) reject(err);\n const address = this.app.address();\n if (typeof address! !== 'string') {\n this.#logger.info(`Server is listening on port ${address!.port}`);\n }\n resolve();\n });\n });\n }\n\n async close(): Promise<void> {\n return new Promise((resolve, reject) => {\n this.app.close((err?: Error) => {\n if (err) reject(err);\n resolve();\n });\n });\n }\n}\n"],"mappings":"AAGA,SAAiE,oBAAoB;AACrF,SAAS,WAAW;AAEpB,MAAM,cAAc,OAAO,QAAwB;AACjD,MAAI,UAAU,GAAG;AACjB,MAAI,IAAI,IAAI;AACd;AAUO,MAAM,WAAW;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,MAAc,MAAc,gBAAsC;AAC5E,SAAK,OAAO;AACZ,SAAK,OAAO;AAEZ,SAAK,MAAM,aAAa,CAAC,KAAsB,QAAwB;AACrE,UAAI,IAAI,QAAQ,KAAK;AACnB,oBAAY,GAAG;AAAA,MACjB,WAAW,IAAI,QAAQ,WAAW;AAChC,YAAI,UAAU,KAAK,EAAE,gBAAgB,mBAAmB,CAAC;AACzD,YAAI,IAAI,KAAK,UAAU,eAAe,CAAC,CAAC;AAAA,MAC1C,OAAO;AACL,YAAI,UAAU,GAAG;AACjB,YAAI,IAAI,WAAW;AAAA,MACrB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,MAAqB;AACzB,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,OAAO,KAAK,MAAM,KAAK,MAAM,CAAC,QAAgB;AACrD,YAAI,IAAK,QAAO,GAAG;AACnB,cAAM,UAAU,KAAK,IAAI,QAAQ;AACjC,YAAI,OAAO,YAAa,UAAU;AAChC,eAAK,QAAQ,KAAK,+BAA+B,QAAS,IAAI,EAAE;AAAA,QAClE;AACA,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAuB;AAC3B,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,MAAM,CAAC,QAAgB;AAC9B,YAAI,IAAK,QAAO,GAAG;AACnB,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/http_server.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type IncomingMessage, type Server, type ServerResponse, createServer } from 'node:http';\nimport { log } from './log.js';\n\nexport interface HealthCheckResult {\n healthy: boolean;\n message: string;\n}\n\ninterface WorkerResponse {\n agent_name: string;\n worker_type: string;\n active_jobs: number;\n sdk_version: string;\n project_type: string;\n}\n\nexport class HTTPServer {\n host: string;\n port: number;\n app: Server;\n #logger = log();\n\n constructor(\n host: string,\n port: number,\n healthCheckListener: () => HealthCheckResult,\n workerListener: () => WorkerResponse,\n ) {\n this.host = host;\n this.port = port;\n\n this.app = createServer((req: IncomingMessage, res: ServerResponse) => {\n if (req.url === '/') {\n const result = healthCheckListener();\n if (result.healthy) {\n res.writeHead(200);\n res.end(result.message);\n } else {\n res.writeHead(503);\n res.end(result.message);\n }\n } else if (req.url === '/worker') {\n res.writeHead(200, { 'Content-Type': 'application/json' });\n res.end(JSON.stringify(workerListener()));\n } else {\n res.writeHead(404);\n res.end('not found');\n }\n });\n }\n\n async run(): Promise<void> {\n return new Promise((resolve, reject) => {\n this.app.listen(this.port, this.host, (err?: Error) => {\n if (err) reject(err);\n const address = this.app.address();\n if (typeof address! !== 'string') {\n this.#logger.info(`Server is listening on port ${address!.port}`);\n }\n resolve();\n });\n });\n }\n\n async close(): Promise<void> {\n return new Promise((resolve, reject) => {\n this.app.close((err?: Error) => {\n if (err) reject(err);\n resolve();\n });\n });\n }\n}\n"],"mappings":"AAGA,SAAiE,oBAAoB;AACrF,SAAS,WAAW;AAeb,MAAM,WAAW;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YACE,MACA,MACA,qBACA,gBACA;AACA,SAAK,OAAO;AACZ,SAAK,OAAO;AAEZ,SAAK,MAAM,aAAa,CAAC,KAAsB,QAAwB;AACrE,UAAI,IAAI,QAAQ,KAAK;AACnB,cAAM,SAAS,oBAAoB;AACnC,YAAI,OAAO,SAAS;AAClB,cAAI,UAAU,GAAG;AACjB,cAAI,IAAI,OAAO,OAAO;AAAA,QACxB,OAAO;AACL,cAAI,UAAU,GAAG;AACjB,cAAI,IAAI,OAAO,OAAO;AAAA,QACxB;AAAA,MACF,WAAW,IAAI,QAAQ,WAAW;AAChC,YAAI,UAAU,KAAK,EAAE,gBAAgB,mBAAmB,CAAC;AACzD,YAAI,IAAI,KAAK,UAAU,eAAe,CAAC,CAAC;AAAA,MAC1C,OAAO;AACL,YAAI,UAAU,GAAG;AACjB,YAAI,IAAI,WAAW;AAAA,MACrB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,MAAqB;AACzB,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,OAAO,KAAK,MAAM,KAAK,MAAM,CAAC,QAAgB;AACrD,YAAI,IAAK,QAAO,GAAG;AACnB,cAAM,UAAU,KAAK,IAAI,QAAQ;AACjC,YAAI,OAAO,YAAa,UAAU;AAChC,eAAK,QAAQ,KAAK,+BAA+B,QAAS,IAAI,EAAE;AAAA,QAClE;AACA,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAuB;AAC3B,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,MAAM,CAAC,QAAgB;AAC9B,YAAI,IAAK,QAAO,GAAG;AACnB,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AACF;","names":[]}
@@ -61,6 +61,10 @@ class SupervisedProc {
61
61
  get started() {
62
62
  return this.#started;
63
63
  }
64
+ get isAlive() {
65
+ var _a;
66
+ return this.#started && !this.#closing && !!((_a = this.proc) == null ? void 0 : _a.connected);
67
+ }
64
68
  get runningJob() {
65
69
  return this.#runningJob;
66
70
  }
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/ipc/supervised_proc.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { ChildProcess } from 'node:child_process';\nimport { once } from 'node:events';\nimport pidusage from 'pidusage';\nimport type { RunningJobInfo } from '../job.js';\nimport { log, loggerOptions } from '../log.js';\nimport { Future } from '../utils.js';\nimport type { IPCMessage } from './message.js';\n\nexport interface ProcOpts {\n initializeTimeout: number;\n closeTimeout: number;\n memoryWarnMB: number;\n memoryLimitMB: number;\n pingInterval: number;\n pingTimeout: number;\n highPingThreshold: number;\n}\n\nexport abstract class SupervisedProc {\n #opts: ProcOpts;\n #started = false;\n #closing = false;\n #runningJob?: RunningJobInfo = undefined;\n proc?: ChildProcess;\n #pingInterval?: ReturnType<typeof setInterval>;\n #memoryMonitorInterval?: ReturnType<typeof setInterval>;\n #pongTimeout?: ReturnType<typeof setTimeout>;\n protected init = new Future();\n #join = new Future();\n #logger = log().child({ runningJob: this.#runningJob });\n\n constructor(\n initializeTimeout: number,\n closeTimeout: number,\n memoryWarnMB: number,\n memoryLimitMB: number,\n pingInterval: number,\n pingTimeout: number,\n highPingThreshold: number,\n ) {\n this.#opts = {\n initializeTimeout,\n closeTimeout,\n memoryWarnMB,\n memoryLimitMB,\n pingInterval,\n pingTimeout,\n highPingThreshold,\n };\n }\n\n abstract createProcess(): ChildProcess;\n abstract mainTask(child: ChildProcess): Promise<void>;\n\n get started(): boolean {\n return this.#started;\n }\n\n get runningJob(): RunningJobInfo | undefined {\n return this.#runningJob;\n }\n\n async start() {\n if (this.#started) {\n throw new Error('runner already started');\n } else if (this.#closing) {\n throw new Error('runner is closed');\n }\n\n this.proc = this.createProcess();\n\n this.#started = true;\n this.run();\n }\n\n async run() {\n await this.init.await;\n\n this.#pingInterval = setInterval(() => {\n if (this.proc?.connected) {\n this.proc.send({ case: 'pingRequest', value: { timestamp: Date.now() } });\n }\n }, this.#opts.pingInterval);\n\n this.#pongTimeout = setTimeout(() => {\n this.#logger.warn('job is unresponsive');\n clearTimeout(this.#pongTimeout);\n clearInterval(this.#pingInterval);\n this.proc!.kill();\n this.#join.resolve();\n }, this.#opts.pingTimeout);\n\n this.#memoryMonitorInterval = setInterval(async () => {\n const memoryMB = await this.getChildMemoryUsageMB();\n if (this.#opts.memoryLimitMB > 0 && memoryMB > this.#opts.memoryLimitMB) {\n this.#logger\n .child({ memoryUsageMB: memoryMB, memoryLimitMB: this.#opts.memoryLimitMB })\n .error('process exceeded memory limit, killing process');\n this.close();\n } else if (this.#opts.memoryWarnMB > 0 && memoryMB > this.#opts.memoryWarnMB) {\n this.#logger\n .child({\n memoryUsageMB: memoryMB,\n memoryWarnMB: this.#opts.memoryWarnMB,\n memoryLimitMB: this.#opts.memoryLimitMB,\n })\n .warn('process memory usage is high');\n }\n }, 5000);\n\n const listener = (msg: IPCMessage) => {\n switch (msg.case) {\n case 'pongResponse': {\n const delay = Date.now() - msg.value.timestamp;\n if (delay > this.#opts.highPingThreshold) {\n this.#logger.child({ delay }).warn('job executor is unresponsive');\n }\n this.#pongTimeout?.refresh();\n break;\n }\n case 'exiting': {\n this.#logger.child({ reason: msg.value.reason }).debug('job exiting');\n break;\n }\n case 'done': {\n this.#closing = true;\n this.proc!.off('message', listener);\n break;\n }\n }\n };\n this.proc!.on('message', listener);\n this.proc!.on('error', (err) => {\n if (this.#closing) return;\n this.#logger\n .child({ err })\n .warn('job process exited unexpectedly; this likely means the error above caused a crash');\n this.clearTimers();\n this.#join.resolve();\n });\n\n this.proc!.on('exit', () => {\n this.clearTimers();\n this.#join.resolve();\n });\n\n this.mainTask(this.proc!);\n\n await this.#join.await;\n }\n\n async join() {\n if (!this.#started) {\n throw new Error('runner not started');\n }\n\n await this.#join.await;\n }\n\n async initialize() {\n const timer = setTimeout(() => {\n this.init.reject(new Error('runner initialization timed out'));\n }, this.#opts.initializeTimeout);\n if (!this.proc?.connected) {\n this.init.reject(new Error('process not connected'));\n return;\n }\n this.proc.send({\n case: 'initializeRequest',\n value: {\n loggerOptions,\n pingInterval: this.#opts.pingInterval,\n pingTimeout: this.#opts.pingTimeout,\n highPingThreshold: this.#opts.highPingThreshold,\n },\n });\n await once(this.proc!, 'message').then(([msg]: IPCMessage[]) => {\n clearTimeout(timer);\n if (msg!.case !== 'initializeResponse') {\n throw new Error('first message must be InitializeResponse');\n }\n });\n this.init.resolve();\n }\n\n async close() {\n if (!this.#started) {\n return;\n }\n this.#closing = true;\n\n if (this.proc?.connected) {\n this.proc.send({ case: 'shutdownRequest' });\n }\n\n const timer = setTimeout(() => {\n this.#logger.error('job shutdown is taking too much time');\n this.proc!.kill();\n }, this.#opts.closeTimeout);\n await this.#join.await.then(() => {\n clearTimeout(timer);\n this.clearTimers();\n });\n }\n\n async launchJob(info: RunningJobInfo) {\n if (this.#runningJob) {\n throw new Error('executor already has a running job');\n }\n if (!this.proc?.connected) {\n throw new Error('process not connected');\n }\n this.#runningJob = info;\n this.proc.send({ case: 'startJobRequest', value: { runningJob: info } });\n }\n\n private async getChildMemoryUsageMB(): Promise<number> {\n const pid = this.proc?.pid;\n if (!pid) {\n return 0;\n }\n try {\n const stats = await pidusage(pid);\n return stats.memory / (1024 * 1024);\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n if (code === 'ENOENT' || code === 'ESRCH') {\n return 0;\n }\n throw err;\n }\n }\n\n private clearTimers() {\n clearTimeout(this.#pongTimeout);\n clearInterval(this.#pingInterval);\n clearInterval(this.#memoryMonitorInterval);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,yBAAqB;AACrB,sBAAqB;AAErB,iBAAmC;AACnC,mBAAuB;AAahB,MAAe,eAAe;AAAA,EACnC;AAAA,EACA,WAAW;AAAA,EACX,WAAW;AAAA,EACX,cAA+B;AAAA,EAC/B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACU,OAAO,IAAI,oBAAO;AAAA,EAC5B,QAAQ,IAAI,oBAAO;AAAA,EACnB,cAAU,gBAAI,EAAE,MAAM,EAAE,YAAY,KAAK,YAAY,CAAC;AAAA,EAEtD,YACE,mBACA,cACA,cACA,eACA,cACA,aACA,mBACA;AACA,SAAK,QAAQ;AAAA,MACX;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAKA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAyC;AAC3C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,QAAI,KAAK,UAAU;AACjB,YAAM,IAAI,MAAM,wBAAwB;AAAA,IAC1C,WAAW,KAAK,UAAU;AACxB,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,SAAK,OAAO,KAAK,cAAc;AAE/B,SAAK,WAAW;AAChB,SAAK,IAAI;AAAA,EACX;AAAA,EAEA,MAAM,MAAM;AACV,UAAM,KAAK,KAAK;AAEhB,SAAK,gBAAgB,YAAY,MAAM;AAjF3C;AAkFM,WAAI,UAAK,SAAL,mBAAW,WAAW;AACxB,aAAK,KAAK,KAAK,EAAE,MAAM,eAAe,OAAO,EAAE,WAAW,KAAK,IAAI,EAAE,EAAE,CAAC;AAAA,MAC1E;AAAA,IACF,GAAG,KAAK,MAAM,YAAY;AAE1B,SAAK,eAAe,WAAW,MAAM;AACnC,WAAK,QAAQ,KAAK,qBAAqB;AACvC,mBAAa,KAAK,YAAY;AAC9B,oBAAc,KAAK,aAAa;AAChC,WAAK,KAAM,KAAK;AAChB,WAAK,MAAM,QAAQ;AAAA,IACrB,GAAG,KAAK,MAAM,WAAW;AAEzB,SAAK,yBAAyB,YAAY,YAAY;AACpD,YAAM,WAAW,MAAM,KAAK,sBAAsB;AAClD,UAAI,KAAK,MAAM,gBAAgB,KAAK,WAAW,KAAK,MAAM,eAAe;AACvE,aAAK,QACF,MAAM,EAAE,eAAe,UAAU,eAAe,KAAK,MAAM,cAAc,CAAC,EAC1E,MAAM,gDAAgD;AACzD,aAAK,MAAM;AAAA,MACb,WAAW,KAAK,MAAM,eAAe,KAAK,WAAW,KAAK,MAAM,cAAc;AAC5E,aAAK,QACF,MAAM;AAAA,UACL,eAAe;AAAA,UACf,cAAc,KAAK,MAAM;AAAA,UACzB,eAAe,KAAK,MAAM;AAAA,QAC5B,CAAC,EACA,KAAK,8BAA8B;AAAA,MACxC;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,QAAoB;AAjH1C;AAkHM,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK,gBAAgB;AACnB,gBAAM,QAAQ,KAAK,IAAI,IAAI,IAAI,MAAM;AACrC,cAAI,QAAQ,KAAK,MAAM,mBAAmB;AACxC,iBAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AAAA,UACnE;AACA,qBAAK,iBAAL,mBAAmB;AACnB;AAAA,QACF;AAAA,QACA,KAAK,WAAW;AACd,eAAK,QAAQ,MAAM,EAAE,QAAQ,IAAI,MAAM,OAAO,CAAC,EAAE,MAAM,aAAa;AACpE;AAAA,QACF;AAAA,QACA,KAAK,QAAQ;AACX,eAAK,WAAW;AAChB,eAAK,KAAM,IAAI,WAAW,QAAQ;AAClC;AAAA,QACF;AAAA,MACF;AAAA,IACF;AACA,SAAK,KAAM,GAAG,WAAW,QAAQ;AACjC,SAAK,KAAM,GAAG,SAAS,CAAC,QAAQ;AAC9B,UAAI,KAAK,SAAU;AACnB,WAAK,QACF,MAAM,EAAE,IAAI,CAAC,EACb,KAAK,mFAAmF;AAC3F,WAAK,YAAY;AACjB,WAAK,MAAM,QAAQ;AAAA,IACrB,CAAC;AAED,SAAK,KAAM,GAAG,QAAQ,MAAM;AAC1B,WAAK,YAAY;AACjB,WAAK,MAAM,QAAQ;AAAA,IACrB,CAAC;AAED,SAAK,SAAS,KAAK,IAAK;AAExB,UAAM,KAAK,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,OAAO;AACX,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,oBAAoB;AAAA,IACtC;AAEA,UAAM,KAAK,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,aAAa;AAlKrB;AAmKI,UAAM,QAAQ,WAAW,MAAM;AAC7B,WAAK,KAAK,OAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,IAC/D,GAAG,KAAK,MAAM,iBAAiB;AAC/B,QAAI,GAAC,UAAK,SAAL,mBAAW,YAAW;AACzB,WAAK,KAAK,OAAO,IAAI,MAAM,uBAAuB,CAAC;AACnD;AAAA,IACF;AACA,SAAK,KAAK,KAAK;AAAA,MACb,MAAM;AAAA,MACN,OAAO;AAAA,QACL;AAAA,QACA,cAAc,KAAK,MAAM;AAAA,QACzB,aAAa,KAAK,MAAM;AAAA,QACxB,mBAAmB,KAAK,MAAM;AAAA,MAChC;AAAA,IACF,CAAC;AACD,cAAM,yBAAK,KAAK,MAAO,SAAS,EAAE,KAAK,CAAC,CAAC,GAAG,MAAoB;AAC9D,mBAAa,KAAK;AAClB,UAAI,IAAK,SAAS,sBAAsB;AACtC,cAAM,IAAI,MAAM,0CAA0C;AAAA,MAC5D;AAAA,IACF,CAAC;AACD,SAAK,KAAK,QAAQ;AAAA,EACpB;AAAA,EAEA,MAAM,QAAQ;AA5LhB;AA6LI,QAAI,CAAC,KAAK,UAAU;AAClB;AAAA,IACF;AACA,SAAK,WAAW;AAEhB,SAAI,UAAK,SAAL,mBAAW,WAAW;AACxB,WAAK,KAAK,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAAA,IAC5C;AAEA,UAAM,QAAQ,WAAW,MAAM;AAC7B,WAAK,QAAQ,MAAM,sCAAsC;AACzD,WAAK,KAAM,KAAK;AAAA,IAClB,GAAG,KAAK,MAAM,YAAY;AAC1B,UAAM,KAAK,MAAM,MAAM,KAAK,MAAM;AAChC,mBAAa,KAAK;AAClB,WAAK,YAAY;AAAA,IACnB,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,UAAU,MAAsB;AAhNxC;AAiNI,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,oCAAoC;AAAA,IACtD;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,YAAW;AACzB,YAAM,IAAI,MAAM,uBAAuB;AAAA,IACzC;AACA,SAAK,cAAc;AACnB,SAAK,KAAK,KAAK,EAAE,MAAM,mBAAmB,OAAO,EAAE,YAAY,KAAK,EAAE,CAAC;AAAA,EACzE;AAAA,EAEA,MAAc,wBAAyC;AA3NzD;AA4NI,UAAM,OAAM,UAAK,SAAL,mBAAW;AACvB,QAAI,CAAC,KAAK;AACR,aAAO;AAAA,IACT;AACA,QAAI;AACF,YAAM,QAAQ,UAAM,gBAAAA,SAAS,GAAG;AAChC,aAAO,MAAM,UAAU,OAAO;AAAA,IAChC,SAAS,KAAK;AACZ,YAAM,OAAQ,IAA8B;AAC5C,UAAI,SAAS,YAAY,SAAS,SAAS;AACzC,eAAO;AAAA,MACT;AACA,YAAM;AAAA,IACR;AAAA,EACF;AAAA,EAEQ,cAAc;AACpB,iBAAa,KAAK,YAAY;AAC9B,kBAAc,KAAK,aAAa;AAChC,kBAAc,KAAK,sBAAsB;AAAA,EAC3C;AACF;","names":["pidusage"]}
1
+ {"version":3,"sources":["../../src/ipc/supervised_proc.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { ChildProcess } from 'node:child_process';\nimport { once } from 'node:events';\nimport pidusage from 'pidusage';\nimport type { RunningJobInfo } from '../job.js';\nimport { log, loggerOptions } from '../log.js';\nimport { Future } from '../utils.js';\nimport type { IPCMessage } from './message.js';\n\nexport interface ProcOpts {\n initializeTimeout: number;\n closeTimeout: number;\n memoryWarnMB: number;\n memoryLimitMB: number;\n pingInterval: number;\n pingTimeout: number;\n highPingThreshold: number;\n}\n\nexport abstract class SupervisedProc {\n #opts: ProcOpts;\n #started = false;\n #closing = false;\n #runningJob?: RunningJobInfo = undefined;\n proc?: ChildProcess;\n #pingInterval?: ReturnType<typeof setInterval>;\n #memoryMonitorInterval?: ReturnType<typeof setInterval>;\n #pongTimeout?: ReturnType<typeof setTimeout>;\n protected init = new Future();\n #join = new Future();\n #logger = log().child({ runningJob: this.#runningJob });\n\n constructor(\n initializeTimeout: number,\n closeTimeout: number,\n memoryWarnMB: number,\n memoryLimitMB: number,\n pingInterval: number,\n pingTimeout: number,\n highPingThreshold: number,\n ) {\n this.#opts = {\n initializeTimeout,\n closeTimeout,\n memoryWarnMB,\n memoryLimitMB,\n pingInterval,\n pingTimeout,\n highPingThreshold,\n };\n }\n\n abstract createProcess(): ChildProcess;\n abstract mainTask(child: ChildProcess): Promise<void>;\n\n get started(): boolean {\n return this.#started;\n }\n\n get isAlive(): boolean {\n return this.#started && !this.#closing && !!this.proc?.connected;\n }\n\n get runningJob(): RunningJobInfo | undefined {\n return this.#runningJob;\n }\n\n async start() {\n if (this.#started) {\n throw new Error('runner already started');\n } else if (this.#closing) {\n throw new Error('runner is closed');\n }\n\n this.proc = this.createProcess();\n\n this.#started = true;\n this.run();\n }\n\n async run() {\n await this.init.await;\n\n this.#pingInterval = setInterval(() => {\n if (this.proc?.connected) {\n this.proc.send({ case: 'pingRequest', value: { timestamp: Date.now() } });\n }\n }, this.#opts.pingInterval);\n\n this.#pongTimeout = setTimeout(() => {\n this.#logger.warn('job is unresponsive');\n clearTimeout(this.#pongTimeout);\n clearInterval(this.#pingInterval);\n this.proc!.kill();\n this.#join.resolve();\n }, this.#opts.pingTimeout);\n\n this.#memoryMonitorInterval = setInterval(async () => {\n const memoryMB = await this.getChildMemoryUsageMB();\n if (this.#opts.memoryLimitMB > 0 && memoryMB > this.#opts.memoryLimitMB) {\n this.#logger\n .child({ memoryUsageMB: memoryMB, memoryLimitMB: this.#opts.memoryLimitMB })\n .error('process exceeded memory limit, killing process');\n this.close();\n } else if (this.#opts.memoryWarnMB > 0 && memoryMB > this.#opts.memoryWarnMB) {\n this.#logger\n .child({\n memoryUsageMB: memoryMB,\n memoryWarnMB: this.#opts.memoryWarnMB,\n memoryLimitMB: this.#opts.memoryLimitMB,\n })\n .warn('process memory usage is high');\n }\n }, 5000);\n\n const listener = (msg: IPCMessage) => {\n switch (msg.case) {\n case 'pongResponse': {\n const delay = Date.now() - msg.value.timestamp;\n if (delay > this.#opts.highPingThreshold) {\n this.#logger.child({ delay }).warn('job executor is unresponsive');\n }\n this.#pongTimeout?.refresh();\n break;\n }\n case 'exiting': {\n this.#logger.child({ reason: msg.value.reason }).debug('job exiting');\n break;\n }\n case 'done': {\n this.#closing = true;\n this.proc!.off('message', listener);\n break;\n }\n }\n };\n this.proc!.on('message', listener);\n this.proc!.on('error', (err) => {\n if (this.#closing) return;\n this.#logger\n .child({ err })\n .warn('job process exited unexpectedly; this likely means the error above caused a crash');\n this.clearTimers();\n this.#join.resolve();\n });\n\n this.proc!.on('exit', () => {\n this.clearTimers();\n this.#join.resolve();\n });\n\n this.mainTask(this.proc!);\n\n await this.#join.await;\n }\n\n async join() {\n if (!this.#started) {\n throw new Error('runner not started');\n }\n\n await this.#join.await;\n }\n\n async initialize() {\n const timer = setTimeout(() => {\n this.init.reject(new Error('runner initialization timed out'));\n }, this.#opts.initializeTimeout);\n if (!this.proc?.connected) {\n this.init.reject(new Error('process not connected'));\n return;\n }\n this.proc.send({\n case: 'initializeRequest',\n value: {\n loggerOptions,\n pingInterval: this.#opts.pingInterval,\n pingTimeout: this.#opts.pingTimeout,\n highPingThreshold: this.#opts.highPingThreshold,\n },\n });\n await once(this.proc!, 'message').then(([msg]: IPCMessage[]) => {\n clearTimeout(timer);\n if (msg!.case !== 'initializeResponse') {\n throw new Error('first message must be InitializeResponse');\n }\n });\n this.init.resolve();\n }\n\n async close() {\n if (!this.#started) {\n return;\n }\n this.#closing = true;\n\n if (this.proc?.connected) {\n this.proc.send({ case: 'shutdownRequest' });\n }\n\n const timer = setTimeout(() => {\n this.#logger.error('job shutdown is taking too much time');\n this.proc!.kill();\n }, this.#opts.closeTimeout);\n await this.#join.await.then(() => {\n clearTimeout(timer);\n this.clearTimers();\n });\n }\n\n async launchJob(info: RunningJobInfo) {\n if (this.#runningJob) {\n throw new Error('executor already has a running job');\n }\n if (!this.proc?.connected) {\n throw new Error('process not connected');\n }\n this.#runningJob = info;\n this.proc.send({ case: 'startJobRequest', value: { runningJob: info } });\n }\n\n private async getChildMemoryUsageMB(): Promise<number> {\n const pid = this.proc?.pid;\n if (!pid) {\n return 0;\n }\n try {\n const stats = await pidusage(pid);\n return stats.memory / (1024 * 1024);\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n if (code === 'ENOENT' || code === 'ESRCH') {\n return 0;\n }\n throw err;\n }\n }\n\n private clearTimers() {\n clearTimeout(this.#pongTimeout);\n clearInterval(this.#pingInterval);\n clearInterval(this.#memoryMonitorInterval);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,yBAAqB;AACrB,sBAAqB;AAErB,iBAAmC;AACnC,mBAAuB;AAahB,MAAe,eAAe;AAAA,EACnC;AAAA,EACA,WAAW;AAAA,EACX,WAAW;AAAA,EACX,cAA+B;AAAA,EAC/B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACU,OAAO,IAAI,oBAAO;AAAA,EAC5B,QAAQ,IAAI,oBAAO;AAAA,EACnB,cAAU,gBAAI,EAAE,MAAM,EAAE,YAAY,KAAK,YAAY,CAAC;AAAA,EAEtD,YACE,mBACA,cACA,cACA,eACA,cACA,aACA,mBACA;AACA,SAAK,QAAQ;AAAA,MACX;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAKA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAmB;AA7DzB;AA8DI,WAAO,KAAK,YAAY,CAAC,KAAK,YAAY,CAAC,GAAC,UAAK,SAAL,mBAAW;AAAA,EACzD;AAAA,EAEA,IAAI,aAAyC;AAC3C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,QAAI,KAAK,UAAU;AACjB,YAAM,IAAI,MAAM,wBAAwB;AAAA,IAC1C,WAAW,KAAK,UAAU;AACxB,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,SAAK,OAAO,KAAK,cAAc;AAE/B,SAAK,WAAW;AAChB,SAAK,IAAI;AAAA,EACX;AAAA,EAEA,MAAM,MAAM;AACV,UAAM,KAAK,KAAK;AAEhB,SAAK,gBAAgB,YAAY,MAAM;AArF3C;AAsFM,WAAI,UAAK,SAAL,mBAAW,WAAW;AACxB,aAAK,KAAK,KAAK,EAAE,MAAM,eAAe,OAAO,EAAE,WAAW,KAAK,IAAI,EAAE,EAAE,CAAC;AAAA,MAC1E;AAAA,IACF,GAAG,KAAK,MAAM,YAAY;AAE1B,SAAK,eAAe,WAAW,MAAM;AACnC,WAAK,QAAQ,KAAK,qBAAqB;AACvC,mBAAa,KAAK,YAAY;AAC9B,oBAAc,KAAK,aAAa;AAChC,WAAK,KAAM,KAAK;AAChB,WAAK,MAAM,QAAQ;AAAA,IACrB,GAAG,KAAK,MAAM,WAAW;AAEzB,SAAK,yBAAyB,YAAY,YAAY;AACpD,YAAM,WAAW,MAAM,KAAK,sBAAsB;AAClD,UAAI,KAAK,MAAM,gBAAgB,KAAK,WAAW,KAAK,MAAM,eAAe;AACvE,aAAK,QACF,MAAM,EAAE,eAAe,UAAU,eAAe,KAAK,MAAM,cAAc,CAAC,EAC1E,MAAM,gDAAgD;AACzD,aAAK,MAAM;AAAA,MACb,WAAW,KAAK,MAAM,eAAe,KAAK,WAAW,KAAK,MAAM,cAAc;AAC5E,aAAK,QACF,MAAM;AAAA,UACL,eAAe;AAAA,UACf,cAAc,KAAK,MAAM;AAAA,UACzB,eAAe,KAAK,MAAM;AAAA,QAC5B,CAAC,EACA,KAAK,8BAA8B;AAAA,MACxC;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,QAAoB;AArH1C;AAsHM,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK,gBAAgB;AACnB,gBAAM,QAAQ,KAAK,IAAI,IAAI,IAAI,MAAM;AACrC,cAAI,QAAQ,KAAK,MAAM,mBAAmB;AACxC,iBAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AAAA,UACnE;AACA,qBAAK,iBAAL,mBAAmB;AACnB;AAAA,QACF;AAAA,QACA,KAAK,WAAW;AACd,eAAK,QAAQ,MAAM,EAAE,QAAQ,IAAI,MAAM,OAAO,CAAC,EAAE,MAAM,aAAa;AACpE;AAAA,QACF;AAAA,QACA,KAAK,QAAQ;AACX,eAAK,WAAW;AAChB,eAAK,KAAM,IAAI,WAAW,QAAQ;AAClC;AAAA,QACF;AAAA,MACF;AAAA,IACF;AACA,SAAK,KAAM,GAAG,WAAW,QAAQ;AACjC,SAAK,KAAM,GAAG,SAAS,CAAC,QAAQ;AAC9B,UAAI,KAAK,SAAU;AACnB,WAAK,QACF,MAAM,EAAE,IAAI,CAAC,EACb,KAAK,mFAAmF;AAC3F,WAAK,YAAY;AACjB,WAAK,MAAM,QAAQ;AAAA,IACrB,CAAC;AAED,SAAK,KAAM,GAAG,QAAQ,MAAM;AAC1B,WAAK,YAAY;AACjB,WAAK,MAAM,QAAQ;AAAA,IACrB,CAAC;AAED,SAAK,SAAS,KAAK,IAAK;AAExB,UAAM,KAAK,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,OAAO;AACX,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,oBAAoB;AAAA,IACtC;AAEA,UAAM,KAAK,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,aAAa;AAtKrB;AAuKI,UAAM,QAAQ,WAAW,MAAM;AAC7B,WAAK,KAAK,OAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,IAC/D,GAAG,KAAK,MAAM,iBAAiB;AAC/B,QAAI,GAAC,UAAK,SAAL,mBAAW,YAAW;AACzB,WAAK,KAAK,OAAO,IAAI,MAAM,uBAAuB,CAAC;AACnD;AAAA,IACF;AACA,SAAK,KAAK,KAAK;AAAA,MACb,MAAM;AAAA,MACN,OAAO;AAAA,QACL;AAAA,QACA,cAAc,KAAK,MAAM;AAAA,QACzB,aAAa,KAAK,MAAM;AAAA,QACxB,mBAAmB,KAAK,MAAM;AAAA,MAChC;AAAA,IACF,CAAC;AACD,cAAM,yBAAK,KAAK,MAAO,SAAS,EAAE,KAAK,CAAC,CAAC,GAAG,MAAoB;AAC9D,mBAAa,KAAK;AAClB,UAAI,IAAK,SAAS,sBAAsB;AACtC,cAAM,IAAI,MAAM,0CAA0C;AAAA,MAC5D;AAAA,IACF,CAAC;AACD,SAAK,KAAK,QAAQ;AAAA,EACpB;AAAA,EAEA,MAAM,QAAQ;AAhMhB;AAiMI,QAAI,CAAC,KAAK,UAAU;AAClB;AAAA,IACF;AACA,SAAK,WAAW;AAEhB,SAAI,UAAK,SAAL,mBAAW,WAAW;AACxB,WAAK,KAAK,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAAA,IAC5C;AAEA,UAAM,QAAQ,WAAW,MAAM;AAC7B,WAAK,QAAQ,MAAM,sCAAsC;AACzD,WAAK,KAAM,KAAK;AAAA,IAClB,GAAG,KAAK,MAAM,YAAY;AAC1B,UAAM,KAAK,MAAM,MAAM,KAAK,MAAM;AAChC,mBAAa,KAAK;AAClB,WAAK,YAAY;AAAA,IACnB,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,UAAU,MAAsB;AApNxC;AAqNI,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,oCAAoC;AAAA,IACtD;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,YAAW;AACzB,YAAM,IAAI,MAAM,uBAAuB;AAAA,IACzC;AACA,SAAK,cAAc;AACnB,SAAK,KAAK,KAAK,EAAE,MAAM,mBAAmB,OAAO,EAAE,YAAY,KAAK,EAAE,CAAC;AAAA,EACzE;AAAA,EAEA,MAAc,wBAAyC;AA/NzD;AAgOI,UAAM,OAAM,UAAK,SAAL,mBAAW;AACvB,QAAI,CAAC,KAAK;AACR,aAAO;AAAA,IACT;AACA,QAAI;AACF,YAAM,QAAQ,UAAM,gBAAAA,SAAS,GAAG;AAChC,aAAO,MAAM,UAAU,OAAO;AAAA,IAChC,SAAS,KAAK;AACZ,YAAM,OAAQ,IAA8B;AAC5C,UAAI,SAAS,YAAY,SAAS,SAAS;AACzC,eAAO;AAAA,MACT;AACA,YAAM;AAAA,IACR;AAAA,EACF;AAAA,EAEQ,cAAc;AACpB,iBAAa,KAAK,YAAY;AAC9B,kBAAc,KAAK,aAAa;AAChC,kBAAc,KAAK,sBAAsB;AAAA,EAC3C;AACF;","names":["pidusage"]}
@@ -19,6 +19,7 @@ export declare abstract class SupervisedProc {
19
19
  abstract createProcess(): ChildProcess;
20
20
  abstract mainTask(child: ChildProcess): Promise<void>;
21
21
  get started(): boolean;
22
+ get isAlive(): boolean;
22
23
  get runningJob(): RunningJobInfo | undefined;
23
24
  start(): Promise<void>;
24
25
  run(): Promise<void>;
@@ -19,6 +19,7 @@ export declare abstract class SupervisedProc {
19
19
  abstract createProcess(): ChildProcess;
20
20
  abstract mainTask(child: ChildProcess): Promise<void>;
21
21
  get started(): boolean;
22
+ get isAlive(): boolean;
22
23
  get runningJob(): RunningJobInfo | undefined;
23
24
  start(): Promise<void>;
24
25
  run(): Promise<void>;
@@ -1 +1 @@
1
- {"version":3,"file":"supervised_proc.d.ts","sourceRoot":"","sources":["../../src/ipc/supervised_proc.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAGvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,WAAW,CAAC;AAEhD,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,QAAQ;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,8BAAsB,cAAc;;IAKlC,IAAI,CAAC,EAAE,YAAY,CAAC;IAIpB,SAAS,CAAC,IAAI,eAAgB;gBAK5B,iBAAiB,EAAE,MAAM,EACzB,YAAY,EAAE,MAAM,EACpB,YAAY,EAAE,MAAM,EACpB,aAAa,EAAE,MAAM,EACrB,YAAY,EAAE,MAAM,EACpB,WAAW,EAAE,MAAM,EACnB,iBAAiB,EAAE,MAAM;IAa3B,QAAQ,CAAC,aAAa,IAAI,YAAY;IACtC,QAAQ,CAAC,QAAQ,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAErD,IAAI,OAAO,IAAI,OAAO,CAErB;IAED,IAAI,UAAU,IAAI,cAAc,GAAG,SAAS,CAE3C;IAEK,KAAK;IAaL,GAAG;IA4EH,IAAI;IAQJ,UAAU;IA0BV,KAAK;IAoBL,SAAS,CAAC,IAAI,EAAE,cAAc;YAWtB,qBAAqB;IAiBnC,OAAO,CAAC,WAAW;CAKpB"}
1
+ {"version":3,"file":"supervised_proc.d.ts","sourceRoot":"","sources":["../../src/ipc/supervised_proc.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAGvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,WAAW,CAAC;AAEhD,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,QAAQ;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,8BAAsB,cAAc;;IAKlC,IAAI,CAAC,EAAE,YAAY,CAAC;IAIpB,SAAS,CAAC,IAAI,eAAgB;gBAK5B,iBAAiB,EAAE,MAAM,EACzB,YAAY,EAAE,MAAM,EACpB,YAAY,EAAE,MAAM,EACpB,aAAa,EAAE,MAAM,EACrB,YAAY,EAAE,MAAM,EACpB,WAAW,EAAE,MAAM,EACnB,iBAAiB,EAAE,MAAM;IAa3B,QAAQ,CAAC,aAAa,IAAI,YAAY;IACtC,QAAQ,CAAC,QAAQ,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAErD,IAAI,OAAO,IAAI,OAAO,CAErB;IAED,IAAI,OAAO,IAAI,OAAO,CAErB;IAED,IAAI,UAAU,IAAI,cAAc,GAAG,SAAS,CAE3C;IAEK,KAAK;IAaL,GAAG;IA4EH,IAAI;IAQJ,UAAU;IA0BV,KAAK;IAoBL,SAAS,CAAC,IAAI,EAAE,cAAc;YAWtB,qBAAqB;IAiBnC,OAAO,CAAC,WAAW;CAKpB"}
@@ -28,6 +28,10 @@ class SupervisedProc {
28
28
  get started() {
29
29
  return this.#started;
30
30
  }
31
+ get isAlive() {
32
+ var _a;
33
+ return this.#started && !this.#closing && !!((_a = this.proc) == null ? void 0 : _a.connected);
34
+ }
31
35
  get runningJob() {
32
36
  return this.#runningJob;
33
37
  }
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/ipc/supervised_proc.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { ChildProcess } from 'node:child_process';\nimport { once } from 'node:events';\nimport pidusage from 'pidusage';\nimport type { RunningJobInfo } from '../job.js';\nimport { log, loggerOptions } from '../log.js';\nimport { Future } from '../utils.js';\nimport type { IPCMessage } from './message.js';\n\nexport interface ProcOpts {\n initializeTimeout: number;\n closeTimeout: number;\n memoryWarnMB: number;\n memoryLimitMB: number;\n pingInterval: number;\n pingTimeout: number;\n highPingThreshold: number;\n}\n\nexport abstract class SupervisedProc {\n #opts: ProcOpts;\n #started = false;\n #closing = false;\n #runningJob?: RunningJobInfo = undefined;\n proc?: ChildProcess;\n #pingInterval?: ReturnType<typeof setInterval>;\n #memoryMonitorInterval?: ReturnType<typeof setInterval>;\n #pongTimeout?: ReturnType<typeof setTimeout>;\n protected init = new Future();\n #join = new Future();\n #logger = log().child({ runningJob: this.#runningJob });\n\n constructor(\n initializeTimeout: number,\n closeTimeout: number,\n memoryWarnMB: number,\n memoryLimitMB: number,\n pingInterval: number,\n pingTimeout: number,\n highPingThreshold: number,\n ) {\n this.#opts = {\n initializeTimeout,\n closeTimeout,\n memoryWarnMB,\n memoryLimitMB,\n pingInterval,\n pingTimeout,\n highPingThreshold,\n };\n }\n\n abstract createProcess(): ChildProcess;\n abstract mainTask(child: ChildProcess): Promise<void>;\n\n get started(): boolean {\n return this.#started;\n }\n\n get runningJob(): RunningJobInfo | undefined {\n return this.#runningJob;\n }\n\n async start() {\n if (this.#started) {\n throw new Error('runner already started');\n } else if (this.#closing) {\n throw new Error('runner is closed');\n }\n\n this.proc = this.createProcess();\n\n this.#started = true;\n this.run();\n }\n\n async run() {\n await this.init.await;\n\n this.#pingInterval = setInterval(() => {\n if (this.proc?.connected) {\n this.proc.send({ case: 'pingRequest', value: { timestamp: Date.now() } });\n }\n }, this.#opts.pingInterval);\n\n this.#pongTimeout = setTimeout(() => {\n this.#logger.warn('job is unresponsive');\n clearTimeout(this.#pongTimeout);\n clearInterval(this.#pingInterval);\n this.proc!.kill();\n this.#join.resolve();\n }, this.#opts.pingTimeout);\n\n this.#memoryMonitorInterval = setInterval(async () => {\n const memoryMB = await this.getChildMemoryUsageMB();\n if (this.#opts.memoryLimitMB > 0 && memoryMB > this.#opts.memoryLimitMB) {\n this.#logger\n .child({ memoryUsageMB: memoryMB, memoryLimitMB: this.#opts.memoryLimitMB })\n .error('process exceeded memory limit, killing process');\n this.close();\n } else if (this.#opts.memoryWarnMB > 0 && memoryMB > this.#opts.memoryWarnMB) {\n this.#logger\n .child({\n memoryUsageMB: memoryMB,\n memoryWarnMB: this.#opts.memoryWarnMB,\n memoryLimitMB: this.#opts.memoryLimitMB,\n })\n .warn('process memory usage is high');\n }\n }, 5000);\n\n const listener = (msg: IPCMessage) => {\n switch (msg.case) {\n case 'pongResponse': {\n const delay = Date.now() - msg.value.timestamp;\n if (delay > this.#opts.highPingThreshold) {\n this.#logger.child({ delay }).warn('job executor is unresponsive');\n }\n this.#pongTimeout?.refresh();\n break;\n }\n case 'exiting': {\n this.#logger.child({ reason: msg.value.reason }).debug('job exiting');\n break;\n }\n case 'done': {\n this.#closing = true;\n this.proc!.off('message', listener);\n break;\n }\n }\n };\n this.proc!.on('message', listener);\n this.proc!.on('error', (err) => {\n if (this.#closing) return;\n this.#logger\n .child({ err })\n .warn('job process exited unexpectedly; this likely means the error above caused a crash');\n this.clearTimers();\n this.#join.resolve();\n });\n\n this.proc!.on('exit', () => {\n this.clearTimers();\n this.#join.resolve();\n });\n\n this.mainTask(this.proc!);\n\n await this.#join.await;\n }\n\n async join() {\n if (!this.#started) {\n throw new Error('runner not started');\n }\n\n await this.#join.await;\n }\n\n async initialize() {\n const timer = setTimeout(() => {\n this.init.reject(new Error('runner initialization timed out'));\n }, this.#opts.initializeTimeout);\n if (!this.proc?.connected) {\n this.init.reject(new Error('process not connected'));\n return;\n }\n this.proc.send({\n case: 'initializeRequest',\n value: {\n loggerOptions,\n pingInterval: this.#opts.pingInterval,\n pingTimeout: this.#opts.pingTimeout,\n highPingThreshold: this.#opts.highPingThreshold,\n },\n });\n await once(this.proc!, 'message').then(([msg]: IPCMessage[]) => {\n clearTimeout(timer);\n if (msg!.case !== 'initializeResponse') {\n throw new Error('first message must be InitializeResponse');\n }\n });\n this.init.resolve();\n }\n\n async close() {\n if (!this.#started) {\n return;\n }\n this.#closing = true;\n\n if (this.proc?.connected) {\n this.proc.send({ case: 'shutdownRequest' });\n }\n\n const timer = setTimeout(() => {\n this.#logger.error('job shutdown is taking too much time');\n this.proc!.kill();\n }, this.#opts.closeTimeout);\n await this.#join.await.then(() => {\n clearTimeout(timer);\n this.clearTimers();\n });\n }\n\n async launchJob(info: RunningJobInfo) {\n if (this.#runningJob) {\n throw new Error('executor already has a running job');\n }\n if (!this.proc?.connected) {\n throw new Error('process not connected');\n }\n this.#runningJob = info;\n this.proc.send({ case: 'startJobRequest', value: { runningJob: info } });\n }\n\n private async getChildMemoryUsageMB(): Promise<number> {\n const pid = this.proc?.pid;\n if (!pid) {\n return 0;\n }\n try {\n const stats = await pidusage(pid);\n return stats.memory / (1024 * 1024);\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n if (code === 'ENOENT' || code === 'ESRCH') {\n return 0;\n }\n throw err;\n }\n }\n\n private clearTimers() {\n clearTimeout(this.#pongTimeout);\n clearInterval(this.#pingInterval);\n clearInterval(this.#memoryMonitorInterval);\n }\n}\n"],"mappings":"AAIA,SAAS,YAAY;AACrB,OAAO,cAAc;AAErB,SAAS,KAAK,qBAAqB;AACnC,SAAS,cAAc;AAahB,MAAe,eAAe;AAAA,EACnC;AAAA,EACA,WAAW;AAAA,EACX,WAAW;AAAA,EACX,cAA+B;AAAA,EAC/B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACU,OAAO,IAAI,OAAO;AAAA,EAC5B,QAAQ,IAAI,OAAO;AAAA,EACnB,UAAU,IAAI,EAAE,MAAM,EAAE,YAAY,KAAK,YAAY,CAAC;AAAA,EAEtD,YACE,mBACA,cACA,cACA,eACA,cACA,aACA,mBACA;AACA,SAAK,QAAQ;AAAA,MACX;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAKA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAyC;AAC3C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,QAAI,KAAK,UAAU;AACjB,YAAM,IAAI,MAAM,wBAAwB;AAAA,IAC1C,WAAW,KAAK,UAAU;AACxB,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,SAAK,OAAO,KAAK,cAAc;AAE/B,SAAK,WAAW;AAChB,SAAK,IAAI;AAAA,EACX;AAAA,EAEA,MAAM,MAAM;AACV,UAAM,KAAK,KAAK;AAEhB,SAAK,gBAAgB,YAAY,MAAM;AAjF3C;AAkFM,WAAI,UAAK,SAAL,mBAAW,WAAW;AACxB,aAAK,KAAK,KAAK,EAAE,MAAM,eAAe,OAAO,EAAE,WAAW,KAAK,IAAI,EAAE,EAAE,CAAC;AAAA,MAC1E;AAAA,IACF,GAAG,KAAK,MAAM,YAAY;AAE1B,SAAK,eAAe,WAAW,MAAM;AACnC,WAAK,QAAQ,KAAK,qBAAqB;AACvC,mBAAa,KAAK,YAAY;AAC9B,oBAAc,KAAK,aAAa;AAChC,WAAK,KAAM,KAAK;AAChB,WAAK,MAAM,QAAQ;AAAA,IACrB,GAAG,KAAK,MAAM,WAAW;AAEzB,SAAK,yBAAyB,YAAY,YAAY;AACpD,YAAM,WAAW,MAAM,KAAK,sBAAsB;AAClD,UAAI,KAAK,MAAM,gBAAgB,KAAK,WAAW,KAAK,MAAM,eAAe;AACvE,aAAK,QACF,MAAM,EAAE,eAAe,UAAU,eAAe,KAAK,MAAM,cAAc,CAAC,EAC1E,MAAM,gDAAgD;AACzD,aAAK,MAAM;AAAA,MACb,WAAW,KAAK,MAAM,eAAe,KAAK,WAAW,KAAK,MAAM,cAAc;AAC5E,aAAK,QACF,MAAM;AAAA,UACL,eAAe;AAAA,UACf,cAAc,KAAK,MAAM;AAAA,UACzB,eAAe,KAAK,MAAM;AAAA,QAC5B,CAAC,EACA,KAAK,8BAA8B;AAAA,MACxC;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,QAAoB;AAjH1C;AAkHM,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK,gBAAgB;AACnB,gBAAM,QAAQ,KAAK,IAAI,IAAI,IAAI,MAAM;AACrC,cAAI,QAAQ,KAAK,MAAM,mBAAmB;AACxC,iBAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AAAA,UACnE;AACA,qBAAK,iBAAL,mBAAmB;AACnB;AAAA,QACF;AAAA,QACA,KAAK,WAAW;AACd,eAAK,QAAQ,MAAM,EAAE,QAAQ,IAAI,MAAM,OAAO,CAAC,EAAE,MAAM,aAAa;AACpE;AAAA,QACF;AAAA,QACA,KAAK,QAAQ;AACX,eAAK,WAAW;AAChB,eAAK,KAAM,IAAI,WAAW,QAAQ;AAClC;AAAA,QACF;AAAA,MACF;AAAA,IACF;AACA,SAAK,KAAM,GAAG,WAAW,QAAQ;AACjC,SAAK,KAAM,GAAG,SAAS,CAAC,QAAQ;AAC9B,UAAI,KAAK,SAAU;AACnB,WAAK,QACF,MAAM,EAAE,IAAI,CAAC,EACb,KAAK,mFAAmF;AAC3F,WAAK,YAAY;AACjB,WAAK,MAAM,QAAQ;AAAA,IACrB,CAAC;AAED,SAAK,KAAM,GAAG,QAAQ,MAAM;AAC1B,WAAK,YAAY;AACjB,WAAK,MAAM,QAAQ;AAAA,IACrB,CAAC;AAED,SAAK,SAAS,KAAK,IAAK;AAExB,UAAM,KAAK,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,OAAO;AACX,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,oBAAoB;AAAA,IACtC;AAEA,UAAM,KAAK,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,aAAa;AAlKrB;AAmKI,UAAM,QAAQ,WAAW,MAAM;AAC7B,WAAK,KAAK,OAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,IAC/D,GAAG,KAAK,MAAM,iBAAiB;AAC/B,QAAI,GAAC,UAAK,SAAL,mBAAW,YAAW;AACzB,WAAK,KAAK,OAAO,IAAI,MAAM,uBAAuB,CAAC;AACnD;AAAA,IACF;AACA,SAAK,KAAK,KAAK;AAAA,MACb,MAAM;AAAA,MACN,OAAO;AAAA,QACL;AAAA,QACA,cAAc,KAAK,MAAM;AAAA,QACzB,aAAa,KAAK,MAAM;AAAA,QACxB,mBAAmB,KAAK,MAAM;AAAA,MAChC;AAAA,IACF,CAAC;AACD,UAAM,KAAK,KAAK,MAAO,SAAS,EAAE,KAAK,CAAC,CAAC,GAAG,MAAoB;AAC9D,mBAAa,KAAK;AAClB,UAAI,IAAK,SAAS,sBAAsB;AACtC,cAAM,IAAI,MAAM,0CAA0C;AAAA,MAC5D;AAAA,IACF,CAAC;AACD,SAAK,KAAK,QAAQ;AAAA,EACpB;AAAA,EAEA,MAAM,QAAQ;AA5LhB;AA6LI,QAAI,CAAC,KAAK,UAAU;AAClB;AAAA,IACF;AACA,SAAK,WAAW;AAEhB,SAAI,UAAK,SAAL,mBAAW,WAAW;AACxB,WAAK,KAAK,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAAA,IAC5C;AAEA,UAAM,QAAQ,WAAW,MAAM;AAC7B,WAAK,QAAQ,MAAM,sCAAsC;AACzD,WAAK,KAAM,KAAK;AAAA,IAClB,GAAG,KAAK,MAAM,YAAY;AAC1B,UAAM,KAAK,MAAM,MAAM,KAAK,MAAM;AAChC,mBAAa,KAAK;AAClB,WAAK,YAAY;AAAA,IACnB,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,UAAU,MAAsB;AAhNxC;AAiNI,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,oCAAoC;AAAA,IACtD;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,YAAW;AACzB,YAAM,IAAI,MAAM,uBAAuB;AAAA,IACzC;AACA,SAAK,cAAc;AACnB,SAAK,KAAK,KAAK,EAAE,MAAM,mBAAmB,OAAO,EAAE,YAAY,KAAK,EAAE,CAAC;AAAA,EACzE;AAAA,EAEA,MAAc,wBAAyC;AA3NzD;AA4NI,UAAM,OAAM,UAAK,SAAL,mBAAW;AACvB,QAAI,CAAC,KAAK;AACR,aAAO;AAAA,IACT;AACA,QAAI;AACF,YAAM,QAAQ,MAAM,SAAS,GAAG;AAChC,aAAO,MAAM,UAAU,OAAO;AAAA,IAChC,SAAS,KAAK;AACZ,YAAM,OAAQ,IAA8B;AAC5C,UAAI,SAAS,YAAY,SAAS,SAAS;AACzC,eAAO;AAAA,MACT;AACA,YAAM;AAAA,IACR;AAAA,EACF;AAAA,EAEQ,cAAc;AACpB,iBAAa,KAAK,YAAY;AAC9B,kBAAc,KAAK,aAAa;AAChC,kBAAc,KAAK,sBAAsB;AAAA,EAC3C;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/ipc/supervised_proc.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { ChildProcess } from 'node:child_process';\nimport { once } from 'node:events';\nimport pidusage from 'pidusage';\nimport type { RunningJobInfo } from '../job.js';\nimport { log, loggerOptions } from '../log.js';\nimport { Future } from '../utils.js';\nimport type { IPCMessage } from './message.js';\n\nexport interface ProcOpts {\n initializeTimeout: number;\n closeTimeout: number;\n memoryWarnMB: number;\n memoryLimitMB: number;\n pingInterval: number;\n pingTimeout: number;\n highPingThreshold: number;\n}\n\nexport abstract class SupervisedProc {\n #opts: ProcOpts;\n #started = false;\n #closing = false;\n #runningJob?: RunningJobInfo = undefined;\n proc?: ChildProcess;\n #pingInterval?: ReturnType<typeof setInterval>;\n #memoryMonitorInterval?: ReturnType<typeof setInterval>;\n #pongTimeout?: ReturnType<typeof setTimeout>;\n protected init = new Future();\n #join = new Future();\n #logger = log().child({ runningJob: this.#runningJob });\n\n constructor(\n initializeTimeout: number,\n closeTimeout: number,\n memoryWarnMB: number,\n memoryLimitMB: number,\n pingInterval: number,\n pingTimeout: number,\n highPingThreshold: number,\n ) {\n this.#opts = {\n initializeTimeout,\n closeTimeout,\n memoryWarnMB,\n memoryLimitMB,\n pingInterval,\n pingTimeout,\n highPingThreshold,\n };\n }\n\n abstract createProcess(): ChildProcess;\n abstract mainTask(child: ChildProcess): Promise<void>;\n\n get started(): boolean {\n return this.#started;\n }\n\n get isAlive(): boolean {\n return this.#started && !this.#closing && !!this.proc?.connected;\n }\n\n get runningJob(): RunningJobInfo | undefined {\n return this.#runningJob;\n }\n\n async start() {\n if (this.#started) {\n throw new Error('runner already started');\n } else if (this.#closing) {\n throw new Error('runner is closed');\n }\n\n this.proc = this.createProcess();\n\n this.#started = true;\n this.run();\n }\n\n async run() {\n await this.init.await;\n\n this.#pingInterval = setInterval(() => {\n if (this.proc?.connected) {\n this.proc.send({ case: 'pingRequest', value: { timestamp: Date.now() } });\n }\n }, this.#opts.pingInterval);\n\n this.#pongTimeout = setTimeout(() => {\n this.#logger.warn('job is unresponsive');\n clearTimeout(this.#pongTimeout);\n clearInterval(this.#pingInterval);\n this.proc!.kill();\n this.#join.resolve();\n }, this.#opts.pingTimeout);\n\n this.#memoryMonitorInterval = setInterval(async () => {\n const memoryMB = await this.getChildMemoryUsageMB();\n if (this.#opts.memoryLimitMB > 0 && memoryMB > this.#opts.memoryLimitMB) {\n this.#logger\n .child({ memoryUsageMB: memoryMB, memoryLimitMB: this.#opts.memoryLimitMB })\n .error('process exceeded memory limit, killing process');\n this.close();\n } else if (this.#opts.memoryWarnMB > 0 && memoryMB > this.#opts.memoryWarnMB) {\n this.#logger\n .child({\n memoryUsageMB: memoryMB,\n memoryWarnMB: this.#opts.memoryWarnMB,\n memoryLimitMB: this.#opts.memoryLimitMB,\n })\n .warn('process memory usage is high');\n }\n }, 5000);\n\n const listener = (msg: IPCMessage) => {\n switch (msg.case) {\n case 'pongResponse': {\n const delay = Date.now() - msg.value.timestamp;\n if (delay > this.#opts.highPingThreshold) {\n this.#logger.child({ delay }).warn('job executor is unresponsive');\n }\n this.#pongTimeout?.refresh();\n break;\n }\n case 'exiting': {\n this.#logger.child({ reason: msg.value.reason }).debug('job exiting');\n break;\n }\n case 'done': {\n this.#closing = true;\n this.proc!.off('message', listener);\n break;\n }\n }\n };\n this.proc!.on('message', listener);\n this.proc!.on('error', (err) => {\n if (this.#closing) return;\n this.#logger\n .child({ err })\n .warn('job process exited unexpectedly; this likely means the error above caused a crash');\n this.clearTimers();\n this.#join.resolve();\n });\n\n this.proc!.on('exit', () => {\n this.clearTimers();\n this.#join.resolve();\n });\n\n this.mainTask(this.proc!);\n\n await this.#join.await;\n }\n\n async join() {\n if (!this.#started) {\n throw new Error('runner not started');\n }\n\n await this.#join.await;\n }\n\n async initialize() {\n const timer = setTimeout(() => {\n this.init.reject(new Error('runner initialization timed out'));\n }, this.#opts.initializeTimeout);\n if (!this.proc?.connected) {\n this.init.reject(new Error('process not connected'));\n return;\n }\n this.proc.send({\n case: 'initializeRequest',\n value: {\n loggerOptions,\n pingInterval: this.#opts.pingInterval,\n pingTimeout: this.#opts.pingTimeout,\n highPingThreshold: this.#opts.highPingThreshold,\n },\n });\n await once(this.proc!, 'message').then(([msg]: IPCMessage[]) => {\n clearTimeout(timer);\n if (msg!.case !== 'initializeResponse') {\n throw new Error('first message must be InitializeResponse');\n }\n });\n this.init.resolve();\n }\n\n async close() {\n if (!this.#started) {\n return;\n }\n this.#closing = true;\n\n if (this.proc?.connected) {\n this.proc.send({ case: 'shutdownRequest' });\n }\n\n const timer = setTimeout(() => {\n this.#logger.error('job shutdown is taking too much time');\n this.proc!.kill();\n }, this.#opts.closeTimeout);\n await this.#join.await.then(() => {\n clearTimeout(timer);\n this.clearTimers();\n });\n }\n\n async launchJob(info: RunningJobInfo) {\n if (this.#runningJob) {\n throw new Error('executor already has a running job');\n }\n if (!this.proc?.connected) {\n throw new Error('process not connected');\n }\n this.#runningJob = info;\n this.proc.send({ case: 'startJobRequest', value: { runningJob: info } });\n }\n\n private async getChildMemoryUsageMB(): Promise<number> {\n const pid = this.proc?.pid;\n if (!pid) {\n return 0;\n }\n try {\n const stats = await pidusage(pid);\n return stats.memory / (1024 * 1024);\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n if (code === 'ENOENT' || code === 'ESRCH') {\n return 0;\n }\n throw err;\n }\n }\n\n private clearTimers() {\n clearTimeout(this.#pongTimeout);\n clearInterval(this.#pingInterval);\n clearInterval(this.#memoryMonitorInterval);\n }\n}\n"],"mappings":"AAIA,SAAS,YAAY;AACrB,OAAO,cAAc;AAErB,SAAS,KAAK,qBAAqB;AACnC,SAAS,cAAc;AAahB,MAAe,eAAe;AAAA,EACnC;AAAA,EACA,WAAW;AAAA,EACX,WAAW;AAAA,EACX,cAA+B;AAAA,EAC/B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACU,OAAO,IAAI,OAAO;AAAA,EAC5B,QAAQ,IAAI,OAAO;AAAA,EACnB,UAAU,IAAI,EAAE,MAAM,EAAE,YAAY,KAAK,YAAY,CAAC;AAAA,EAEtD,YACE,mBACA,cACA,cACA,eACA,cACA,aACA,mBACA;AACA,SAAK,QAAQ;AAAA,MACX;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAKA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAmB;AA7DzB;AA8DI,WAAO,KAAK,YAAY,CAAC,KAAK,YAAY,CAAC,GAAC,UAAK,SAAL,mBAAW;AAAA,EACzD;AAAA,EAEA,IAAI,aAAyC;AAC3C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,QAAI,KAAK,UAAU;AACjB,YAAM,IAAI,MAAM,wBAAwB;AAAA,IAC1C,WAAW,KAAK,UAAU;AACxB,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,SAAK,OAAO,KAAK,cAAc;AAE/B,SAAK,WAAW;AAChB,SAAK,IAAI;AAAA,EACX;AAAA,EAEA,MAAM,MAAM;AACV,UAAM,KAAK,KAAK;AAEhB,SAAK,gBAAgB,YAAY,MAAM;AArF3C;AAsFM,WAAI,UAAK,SAAL,mBAAW,WAAW;AACxB,aAAK,KAAK,KAAK,EAAE,MAAM,eAAe,OAAO,EAAE,WAAW,KAAK,IAAI,EAAE,EAAE,CAAC;AAAA,MAC1E;AAAA,IACF,GAAG,KAAK,MAAM,YAAY;AAE1B,SAAK,eAAe,WAAW,MAAM;AACnC,WAAK,QAAQ,KAAK,qBAAqB;AACvC,mBAAa,KAAK,YAAY;AAC9B,oBAAc,KAAK,aAAa;AAChC,WAAK,KAAM,KAAK;AAChB,WAAK,MAAM,QAAQ;AAAA,IACrB,GAAG,KAAK,MAAM,WAAW;AAEzB,SAAK,yBAAyB,YAAY,YAAY;AACpD,YAAM,WAAW,MAAM,KAAK,sBAAsB;AAClD,UAAI,KAAK,MAAM,gBAAgB,KAAK,WAAW,KAAK,MAAM,eAAe;AACvE,aAAK,QACF,MAAM,EAAE,eAAe,UAAU,eAAe,KAAK,MAAM,cAAc,CAAC,EAC1E,MAAM,gDAAgD;AACzD,aAAK,MAAM;AAAA,MACb,WAAW,KAAK,MAAM,eAAe,KAAK,WAAW,KAAK,MAAM,cAAc;AAC5E,aAAK,QACF,MAAM;AAAA,UACL,eAAe;AAAA,UACf,cAAc,KAAK,MAAM;AAAA,UACzB,eAAe,KAAK,MAAM;AAAA,QAC5B,CAAC,EACA,KAAK,8BAA8B;AAAA,MACxC;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,QAAoB;AArH1C;AAsHM,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK,gBAAgB;AACnB,gBAAM,QAAQ,KAAK,IAAI,IAAI,IAAI,MAAM;AACrC,cAAI,QAAQ,KAAK,MAAM,mBAAmB;AACxC,iBAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AAAA,UACnE;AACA,qBAAK,iBAAL,mBAAmB;AACnB;AAAA,QACF;AAAA,QACA,KAAK,WAAW;AACd,eAAK,QAAQ,MAAM,EAAE,QAAQ,IAAI,MAAM,OAAO,CAAC,EAAE,MAAM,aAAa;AACpE;AAAA,QACF;AAAA,QACA,KAAK,QAAQ;AACX,eAAK,WAAW;AAChB,eAAK,KAAM,IAAI,WAAW,QAAQ;AAClC;AAAA,QACF;AAAA,MACF;AAAA,IACF;AACA,SAAK,KAAM,GAAG,WAAW,QAAQ;AACjC,SAAK,KAAM,GAAG,SAAS,CAAC,QAAQ;AAC9B,UAAI,KAAK,SAAU;AACnB,WAAK,QACF,MAAM,EAAE,IAAI,CAAC,EACb,KAAK,mFAAmF;AAC3F,WAAK,YAAY;AACjB,WAAK,MAAM,QAAQ;AAAA,IACrB,CAAC;AAED,SAAK,KAAM,GAAG,QAAQ,MAAM;AAC1B,WAAK,YAAY;AACjB,WAAK,MAAM,QAAQ;AAAA,IACrB,CAAC;AAED,SAAK,SAAS,KAAK,IAAK;AAExB,UAAM,KAAK,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,OAAO;AACX,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,oBAAoB;AAAA,IACtC;AAEA,UAAM,KAAK,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,aAAa;AAtKrB;AAuKI,UAAM,QAAQ,WAAW,MAAM;AAC7B,WAAK,KAAK,OAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,IAC/D,GAAG,KAAK,MAAM,iBAAiB;AAC/B,QAAI,GAAC,UAAK,SAAL,mBAAW,YAAW;AACzB,WAAK,KAAK,OAAO,IAAI,MAAM,uBAAuB,CAAC;AACnD;AAAA,IACF;AACA,SAAK,KAAK,KAAK;AAAA,MACb,MAAM;AAAA,MACN,OAAO;AAAA,QACL;AAAA,QACA,cAAc,KAAK,MAAM;AAAA,QACzB,aAAa,KAAK,MAAM;AAAA,QACxB,mBAAmB,KAAK,MAAM;AAAA,MAChC;AAAA,IACF,CAAC;AACD,UAAM,KAAK,KAAK,MAAO,SAAS,EAAE,KAAK,CAAC,CAAC,GAAG,MAAoB;AAC9D,mBAAa,KAAK;AAClB,UAAI,IAAK,SAAS,sBAAsB;AACtC,cAAM,IAAI,MAAM,0CAA0C;AAAA,MAC5D;AAAA,IACF,CAAC;AACD,SAAK,KAAK,QAAQ;AAAA,EACpB;AAAA,EAEA,MAAM,QAAQ;AAhMhB;AAiMI,QAAI,CAAC,KAAK,UAAU;AAClB;AAAA,IACF;AACA,SAAK,WAAW;AAEhB,SAAI,UAAK,SAAL,mBAAW,WAAW;AACxB,WAAK,KAAK,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAAA,IAC5C;AAEA,UAAM,QAAQ,WAAW,MAAM;AAC7B,WAAK,QAAQ,MAAM,sCAAsC;AACzD,WAAK,KAAM,KAAK;AAAA,IAClB,GAAG,KAAK,MAAM,YAAY;AAC1B,UAAM,KAAK,MAAM,MAAM,KAAK,MAAM;AAChC,mBAAa,KAAK;AAClB,WAAK,YAAY;AAAA,IACnB,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,UAAU,MAAsB;AApNxC;AAqNI,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,oCAAoC;AAAA,IACtD;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,YAAW;AACzB,YAAM,IAAI,MAAM,uBAAuB;AAAA,IACzC;AACA,SAAK,cAAc;AACnB,SAAK,KAAK,KAAK,EAAE,MAAM,mBAAmB,OAAO,EAAE,YAAY,KAAK,EAAE,CAAC;AAAA,EACzE;AAAA,EAEA,MAAc,wBAAyC;AA/NzD;AAgOI,UAAM,OAAM,UAAK,SAAL,mBAAW;AACvB,QAAI,CAAC,KAAK;AACR,aAAO;AAAA,IACT;AACA,QAAI;AACF,YAAM,QAAQ,MAAM,SAAS,GAAG;AAChC,aAAO,MAAM,UAAU,OAAO;AAAA,IAChC,SAAS,KAAK;AACZ,YAAM,OAAQ,IAA8B;AAC5C,UAAI,SAAS,YAAY,SAAS,SAAS;AACzC,eAAO;AAAA,MACT;AACA,YAAM;AAAA,IACR;AAAA,EACF;AAAA,EAEQ,cAAc;AACpB,iBAAa,KAAK,YAAY;AAC9B,kBAAc,KAAK,aAAa;AAChC,kBAAc,KAAK,sBAAsB;AAAA,EAC3C;AACF;","names":[]}
@@ -27,7 +27,7 @@ const splitSentences = (text, minLength = 20, retainFormat = false) => {
27
27
  const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;
28
28
  const starters = /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)/g;
29
29
  const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;
30
- const websites = /[.](com|net|org|io|gov|edu|me)/g;
30
+ const websites = /(\w+\.)+(com|net|org|io|gov|edu|me)/g;
31
31
  const digits = /([0-9])/g;
32
32
  const dots = /\.{2,}/g;
33
33
  if (retainFormat) {
@@ -36,7 +36,7 @@ const splitSentences = (text, minLength = 20, retainFormat = false) => {
36
36
  text = text.replaceAll("\n", " ");
37
37
  }
38
38
  text = text.replaceAll(prefixes, "$1<prd>");
39
- text = text.replaceAll(websites, "<prd>$2");
39
+ text = text.replace(websites, (match) => match.replaceAll(".", "<prd>"));
40
40
  text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, "g"), "$1<prd>$2");
41
41
  text = text.replaceAll(dots, (match) => "<prd>".repeat(match.length));
42
42
  text = text.replaceAll("Ph.D.", "Ph<prd>D<prd>");
@@ -60,7 +60,7 @@ const splitSentences = (text, minLength = 20, retainFormat = false) => {
60
60
  text = text.replaceAll('."', '".');
61
61
  text = text.replaceAll('!"', '"!');
62
62
  text = text.replaceAll('?"', '"?');
63
- text = text.replaceAll(".", ".<stop>");
63
+ text = text.replace(/\.(?=\s|$)/g, ".<stop>");
64
64
  text = text.replaceAll("?", "?<stop>");
65
65
  text = text.replaceAll("!", "!<stop>");
66
66
  text = text.replaceAll("<prd>", ".");
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../src/tokenize/basic/sentence.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n/**\n * Split the text into sentences.\n */\nexport const splitSentences = (\n text: string,\n minLength = 20,\n retainFormat: boolean = false,\n): [string, number, number][] => {\n const alphabets = /([A-Za-z])/g;\n const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;\n const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;\n const starters =\n /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)/g;\n const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;\n const websites = /[.](com|net|org|io|gov|edu|me)/g;\n const digits = /([0-9])/g;\n const dots = /\\.{2,}/g;\n\n if (retainFormat) {\n text = text.replaceAll('\\n', '<nel><stop>');\n } else {\n text = text.replaceAll('\\n', ' ');\n }\n\n text = text.replaceAll(prefixes, '$1<prd>');\n text = text.replaceAll(websites, '<prd>$2');\n text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, 'g'), '$1<prd>$2');\n text = text.replaceAll(dots, (match) => '<prd>'.repeat(match.length));\n text = text.replaceAll('Ph.D.', 'Ph<prd>D<prd>');\n text = text.replaceAll(new RegExp(`\\\\s${alphabets.source}[.] `, 'g'), ' $1<prd> ');\n text = text.replaceAll(new RegExp(`${acronyms.source} ${starters.source}`, 'g'), '$1<stop> $2');\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>$3<prd>',\n );\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>',\n );\n text = text.replaceAll(\n new RegExp(` ${suffixes.source}[.] ${starters.source}`, 'g'),\n '$1<stop> $2',\n );\n text = text.replaceAll(new RegExp(` ${suffixes.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll(new RegExp(` ${alphabets.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll('.”', '”.');\n text = text.replaceAll('.\"', '\".');\n text = text.replaceAll('!\"', '\"!');\n text = text.replaceAll('?\"', '\"?');\n text = text.replaceAll('.', '.<stop>');\n text = text.replaceAll('?', '?<stop>');\n text = text.replaceAll('!', '!<stop>');\n text = text.replaceAll('<prd>', '.');\n\n if (retainFormat) {\n text = text.replaceAll('<nel>', '\\n');\n }\n\n const split = text.split('<stop>');\n text = text.replaceAll('<stop>', '');\n\n const sentences: [string, number, number][] = [];\n let buf = '';\n let start = 0;\n let end = 0;\n const prePad = retainFormat ? '' : ' ';\n for (const match of split) {\n const sentence = retainFormat ? match : match.trim();\n if (!sentence) continue;\n\n buf += prePad + sentence;\n end += match.length;\n if (buf.length > minLength) {\n sentences.push([buf.slice(prePad.length), start, end]);\n start = end;\n buf = '';\n }\n }\n\n if (buf) {\n sentences.push([buf.slice(prePad.length), start, text.length - 1]);\n }\n\n return sentences;\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAOO,MAAM,iBAAiB,CAC5B,MACA,YAAY,IACZ,eAAwB,UACO;AAC/B,QAAM,YAAY;AAClB,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,WACJ;AACF,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,SAAS;AACf,QAAM,OAAO;AAEb,MAAI,cAAc;AAChB,WAAO,KAAK,WAAW,MAAM,aAAa;AAAA,EAC5C,OAAO;AACL,WAAO,KAAK,WAAW,MAAM,GAAG;AAAA,EAClC;AAEA,SAAO,KAAK,WAAW,UAAU,SAAS;AAC1C,SAAO,KAAK,WAAW,UAAU,SAAS;AAC1C,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,OAAO,MAAM,MAAM,OAAO,MAAM,IAAI,GAAG,GAAG,WAAW;AAC1F,SAAO,KAAK,WAAW,MAAM,CAAC,UAAU,QAAQ,OAAO,MAAM,MAAM,CAAC;AACpE,SAAO,KAAK,WAAW,SAAS,eAAe;AAC/C,SAAO,KAAK,WAAW,IAAI,OAAO,MAAM,UAAU,MAAM,QAAQ,GAAG,GAAG,WAAW;AACjF,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,SAAS,MAAM,IAAI,SAAS,MAAM,IAAI,GAAG,GAAG,aAAa;AAC9F,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IACpF;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IAC9D;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,SAAS,MAAM,IAAI,GAAG;AAAA,IAC3D;AAAA,EACF;AACA,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,GAAG,GAAG,SAAS;AAC3E,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,UAAU,MAAM,OAAO,GAAG,GAAG,SAAS;AAC5E,SAAO,KAAK,WAAW,WAAM,SAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,SAAS,GAAG;AAEnC,MAAI,cAAc;AAChB,WAAO,KAAK,WAAW,SAAS,IAAI;AAAA,EACtC;AAEA,QAAM,QAAQ,KAAK,MAAM,QAAQ;AACjC,SAAO,KAAK,WAAW,UAAU,EAAE;AAEnC,QAAM,YAAwC,CAAC;AAC/C,MAAI,MAAM;AACV,MAAI,QAAQ;AACZ,MAAI,MAAM;AACV,QAAM,SAAS,eAAe,KAAK;AACnC,aAAW,SAAS,OAAO;AACzB,UAAM,WAAW,eAAe,QAAQ,MAAM,KAAK;AACnD,QAAI,CAAC,SAAU;AAEf,WAAO,SAAS;AAChB,WAAO,MAAM;AACb,QAAI,IAAI,SAAS,WAAW;AAC1B,gBAAU,KAAK,CAAC,IAAI,MAAM,OAAO,MAAM,GAAG,OAAO,GAAG,CAAC;AACrD,cAAQ;AACR,YAAM;AAAA,IACR;AAAA,EACF;AAEA,MAAI,KAAK;AACP,cAAU,KAAK,CAAC,IAAI,MAAM,OAAO,MAAM,GAAG,OAAO,KAAK,SAAS,CAAC,CAAC;AAAA,EACnE;AAEA,SAAO;AACT;","names":[]}
1
+ {"version":3,"sources":["../../../src/tokenize/basic/sentence.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n/**\n * Split the text into sentences.\n */\nexport const splitSentences = (\n text: string,\n minLength = 20,\n retainFormat: boolean = false,\n): [string, number, number][] => {\n const alphabets = /([A-Za-z])/g;\n const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;\n const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;\n const starters =\n /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)/g;\n const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;\n const websites = /(\\w+\\.)+(com|net|org|io|gov|edu|me)/g;\n const digits = /([0-9])/g;\n const dots = /\\.{2,}/g;\n\n if (retainFormat) {\n text = text.replaceAll('\\n', '<nel><stop>');\n } else {\n text = text.replaceAll('\\n', ' ');\n }\n\n text = text.replaceAll(prefixes, '$1<prd>');\n text = text.replace(websites, (match) => match.replaceAll('.', '<prd>'));\n text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, 'g'), '$1<prd>$2');\n text = text.replaceAll(dots, (match) => '<prd>'.repeat(match.length));\n text = text.replaceAll('Ph.D.', 'Ph<prd>D<prd>');\n text = text.replaceAll(new RegExp(`\\\\s${alphabets.source}[.] `, 'g'), ' $1<prd> ');\n text = text.replaceAll(new RegExp(`${acronyms.source} ${starters.source}`, 'g'), '$1<stop> $2');\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>$3<prd>',\n );\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>',\n );\n text = text.replaceAll(\n new RegExp(` ${suffixes.source}[.] ${starters.source}`, 'g'),\n '$1<stop> $2',\n );\n text = text.replaceAll(new RegExp(` ${suffixes.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll(new RegExp(` ${alphabets.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll('.”', '”.');\n text = text.replaceAll('.\"', '\".');\n text = text.replaceAll('!\"', '\"!');\n text = text.replaceAll('?\"', '\"?');\n text = text.replace(/\\.(?=\\s|$)/g, '.<stop>');\n text = text.replaceAll('?', '?<stop>');\n text = text.replaceAll('!', '!<stop>');\n text = text.replaceAll('<prd>', '.');\n\n if (retainFormat) {\n text = text.replaceAll('<nel>', '\\n');\n }\n\n const split = text.split('<stop>');\n text = text.replaceAll('<stop>', '');\n\n const sentences: [string, number, number][] = [];\n let buf = '';\n let start = 0;\n let end = 0;\n const prePad = retainFormat ? '' : ' ';\n for (const match of split) {\n const sentence = retainFormat ? match : match.trim();\n if (!sentence) continue;\n\n buf += prePad + sentence;\n end += match.length;\n if (buf.length > minLength) {\n sentences.push([buf.slice(prePad.length), start, end]);\n start = end;\n buf = '';\n }\n }\n\n if (buf) {\n sentences.push([buf.slice(prePad.length), start, text.length - 1]);\n }\n\n return sentences;\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAOO,MAAM,iBAAiB,CAC5B,MACA,YAAY,IACZ,eAAwB,UACO;AAC/B,QAAM,YAAY;AAClB,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,WACJ;AACF,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,SAAS;AACf,QAAM,OAAO;AAEb,MAAI,cAAc;AAChB,WAAO,KAAK,WAAW,MAAM,aAAa;AAAA,EAC5C,OAAO;AACL,WAAO,KAAK,WAAW,MAAM,GAAG;AAAA,EAClC;AAEA,SAAO,KAAK,WAAW,UAAU,SAAS;AAC1C,SAAO,KAAK,QAAQ,UAAU,CAAC,UAAU,MAAM,WAAW,KAAK,OAAO,CAAC;AACvE,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,OAAO,MAAM,MAAM,OAAO,MAAM,IAAI,GAAG,GAAG,WAAW;AAC1F,SAAO,KAAK,WAAW,MAAM,CAAC,UAAU,QAAQ,OAAO,MAAM,MAAM,CAAC;AACpE,SAAO,KAAK,WAAW,SAAS,eAAe;AAC/C,SAAO,KAAK,WAAW,IAAI,OAAO,MAAM,UAAU,MAAM,QAAQ,GAAG,GAAG,WAAW;AACjF,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,SAAS,MAAM,IAAI,SAAS,MAAM,IAAI,GAAG,GAAG,aAAa;AAC9F,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IACpF;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IAC9D;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,SAAS,MAAM,IAAI,GAAG;AAAA,IAC3D;AAAA,EACF;AACA,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,GAAG,GAAG,SAAS;AAC3E,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,UAAU,MAAM,OAAO,GAAG,GAAG,SAAS;AAC5E,SAAO,KAAK,WAAW,WAAM,SAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,QAAQ,eAAe,SAAS;AAC5C,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,SAAS,GAAG;AAEnC,MAAI,cAAc;AAChB,WAAO,KAAK,WAAW,SAAS,IAAI;AAAA,EACtC;AAEA,QAAM,QAAQ,KAAK,MAAM,QAAQ;AACjC,SAAO,KAAK,WAAW,UAAU,EAAE;AAEnC,QAAM,YAAwC,CAAC;AAC/C,MAAI,MAAM;AACV,MAAI,QAAQ;AACZ,MAAI,MAAM;AACV,QAAM,SAAS,eAAe,KAAK;AACnC,aAAW,SAAS,OAAO;AACzB,UAAM,WAAW,eAAe,QAAQ,MAAM,KAAK;AACnD,QAAI,CAAC,SAAU;AAEf,WAAO,SAAS;AAChB,WAAO,MAAM;AACb,QAAI,IAAI,SAAS,WAAW;AAC1B,gBAAU,KAAK,CAAC,IAAI,MAAM,OAAO,MAAM,GAAG,OAAO,GAAG,CAAC;AACrD,cAAQ;AACR,YAAM;AAAA,IACR;AAAA,EACF;AAEA,MAAI,KAAK;AACP,cAAU,KAAK,CAAC,IAAI,MAAM,OAAO,MAAM,GAAG,OAAO,KAAK,SAAS,CAAC,CAAC;AAAA,EACnE;AAEA,SAAO;AACT;","names":[]}
@@ -4,7 +4,7 @@ const splitSentences = (text, minLength = 20, retainFormat = false) => {
4
4
  const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;
5
5
  const starters = /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)/g;
6
6
  const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;
7
- const websites = /[.](com|net|org|io|gov|edu|me)/g;
7
+ const websites = /(\w+\.)+(com|net|org|io|gov|edu|me)/g;
8
8
  const digits = /([0-9])/g;
9
9
  const dots = /\.{2,}/g;
10
10
  if (retainFormat) {
@@ -13,7 +13,7 @@ const splitSentences = (text, minLength = 20, retainFormat = false) => {
13
13
  text = text.replaceAll("\n", " ");
14
14
  }
15
15
  text = text.replaceAll(prefixes, "$1<prd>");
16
- text = text.replaceAll(websites, "<prd>$2");
16
+ text = text.replace(websites, (match) => match.replaceAll(".", "<prd>"));
17
17
  text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, "g"), "$1<prd>$2");
18
18
  text = text.replaceAll(dots, (match) => "<prd>".repeat(match.length));
19
19
  text = text.replaceAll("Ph.D.", "Ph<prd>D<prd>");
@@ -37,7 +37,7 @@ const splitSentences = (text, minLength = 20, retainFormat = false) => {
37
37
  text = text.replaceAll('."', '".');
38
38
  text = text.replaceAll('!"', '"!');
39
39
  text = text.replaceAll('?"', '"?');
40
- text = text.replaceAll(".", ".<stop>");
40
+ text = text.replace(/\.(?=\s|$)/g, ".<stop>");
41
41
  text = text.replaceAll("?", "?<stop>");
42
42
  text = text.replaceAll("!", "!<stop>");
43
43
  text = text.replaceAll("<prd>", ".");
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../src/tokenize/basic/sentence.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n/**\n * Split the text into sentences.\n */\nexport const splitSentences = (\n text: string,\n minLength = 20,\n retainFormat: boolean = false,\n): [string, number, number][] => {\n const alphabets = /([A-Za-z])/g;\n const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;\n const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;\n const starters =\n /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)/g;\n const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;\n const websites = /[.](com|net|org|io|gov|edu|me)/g;\n const digits = /([0-9])/g;\n const dots = /\\.{2,}/g;\n\n if (retainFormat) {\n text = text.replaceAll('\\n', '<nel><stop>');\n } else {\n text = text.replaceAll('\\n', ' ');\n }\n\n text = text.replaceAll(prefixes, '$1<prd>');\n text = text.replaceAll(websites, '<prd>$2');\n text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, 'g'), '$1<prd>$2');\n text = text.replaceAll(dots, (match) => '<prd>'.repeat(match.length));\n text = text.replaceAll('Ph.D.', 'Ph<prd>D<prd>');\n text = text.replaceAll(new RegExp(`\\\\s${alphabets.source}[.] `, 'g'), ' $1<prd> ');\n text = text.replaceAll(new RegExp(`${acronyms.source} ${starters.source}`, 'g'), '$1<stop> $2');\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>$3<prd>',\n );\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>',\n );\n text = text.replaceAll(\n new RegExp(` ${suffixes.source}[.] ${starters.source}`, 'g'),\n '$1<stop> $2',\n );\n text = text.replaceAll(new RegExp(` ${suffixes.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll(new RegExp(` ${alphabets.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll('.”', '”.');\n text = text.replaceAll('.\"', '\".');\n text = text.replaceAll('!\"', '\"!');\n text = text.replaceAll('?\"', '\"?');\n text = text.replaceAll('.', '.<stop>');\n text = text.replaceAll('?', '?<stop>');\n text = text.replaceAll('!', '!<stop>');\n text = text.replaceAll('<prd>', '.');\n\n if (retainFormat) {\n text = text.replaceAll('<nel>', '\\n');\n }\n\n const split = text.split('<stop>');\n text = text.replaceAll('<stop>', '');\n\n const sentences: [string, number, number][] = [];\n let buf = '';\n let start = 0;\n let end = 0;\n const prePad = retainFormat ? '' : ' ';\n for (const match of split) {\n const sentence = retainFormat ? match : match.trim();\n if (!sentence) continue;\n\n buf += prePad + sentence;\n end += match.length;\n if (buf.length > minLength) {\n sentences.push([buf.slice(prePad.length), start, end]);\n start = end;\n buf = '';\n }\n }\n\n if (buf) {\n sentences.push([buf.slice(prePad.length), start, text.length - 1]);\n }\n\n return sentences;\n};\n"],"mappings":"AAOO,MAAM,iBAAiB,CAC5B,MACA,YAAY,IACZ,eAAwB,UACO;AAC/B,QAAM,YAAY;AAClB,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,WACJ;AACF,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,SAAS;AACf,QAAM,OAAO;AAEb,MAAI,cAAc;AAChB,WAAO,KAAK,WAAW,MAAM,aAAa;AAAA,EAC5C,OAAO;AACL,WAAO,KAAK,WAAW,MAAM,GAAG;AAAA,EAClC;AAEA,SAAO,KAAK,WAAW,UAAU,SAAS;AAC1C,SAAO,KAAK,WAAW,UAAU,SAAS;AAC1C,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,OAAO,MAAM,MAAM,OAAO,MAAM,IAAI,GAAG,GAAG,WAAW;AAC1F,SAAO,KAAK,WAAW,MAAM,CAAC,UAAU,QAAQ,OAAO,MAAM,MAAM,CAAC;AACpE,SAAO,KAAK,WAAW,SAAS,eAAe;AAC/C,SAAO,KAAK,WAAW,IAAI,OAAO,MAAM,UAAU,MAAM,QAAQ,GAAG,GAAG,WAAW;AACjF,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,SAAS,MAAM,IAAI,SAAS,MAAM,IAAI,GAAG,GAAG,aAAa;AAC9F,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IACpF;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IAC9D;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,SAAS,MAAM,IAAI,GAAG;AAAA,IAC3D;AAAA,EACF;AACA,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,GAAG,GAAG,SAAS;AAC3E,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,UAAU,MAAM,OAAO,GAAG,GAAG,SAAS;AAC5E,SAAO,KAAK,WAAW,WAAM,SAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,SAAS,GAAG;AAEnC,MAAI,cAAc;AAChB,WAAO,KAAK,WAAW,SAAS,IAAI;AAAA,EACtC;AAEA,QAAM,QAAQ,KAAK,MAAM,QAAQ;AACjC,SAAO,KAAK,WAAW,UAAU,EAAE;AAEnC,QAAM,YAAwC,CAAC;AAC/C,MAAI,MAAM;AACV,MAAI,QAAQ;AACZ,MAAI,MAAM;AACV,QAAM,SAAS,eAAe,KAAK;AACnC,aAAW,SAAS,OAAO;AACzB,UAAM,WAAW,eAAe,QAAQ,MAAM,KAAK;AACnD,QAAI,CAAC,SAAU;AAEf,WAAO,SAAS;AAChB,WAAO,MAAM;AACb,QAAI,IAAI,SAAS,WAAW;AAC1B,gBAAU,KAAK,CAAC,IAAI,MAAM,OAAO,MAAM,GAAG,OAAO,GAAG,CAAC;AACrD,cAAQ;AACR,YAAM;AAAA,IACR;AAAA,EACF;AAEA,MAAI,KAAK;AACP,cAAU,KAAK,CAAC,IAAI,MAAM,OAAO,MAAM,GAAG,OAAO,KAAK,SAAS,CAAC,CAAC;AAAA,EACnE;AAEA,SAAO;AACT;","names":[]}
1
+ {"version":3,"sources":["../../../src/tokenize/basic/sentence.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n/**\n * Split the text into sentences.\n */\nexport const splitSentences = (\n text: string,\n minLength = 20,\n retainFormat: boolean = false,\n): [string, number, number][] => {\n const alphabets = /([A-Za-z])/g;\n const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;\n const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;\n const starters =\n /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)/g;\n const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;\n const websites = /(\\w+\\.)+(com|net|org|io|gov|edu|me)/g;\n const digits = /([0-9])/g;\n const dots = /\\.{2,}/g;\n\n if (retainFormat) {\n text = text.replaceAll('\\n', '<nel><stop>');\n } else {\n text = text.replaceAll('\\n', ' ');\n }\n\n text = text.replaceAll(prefixes, '$1<prd>');\n text = text.replace(websites, (match) => match.replaceAll('.', '<prd>'));\n text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, 'g'), '$1<prd>$2');\n text = text.replaceAll(dots, (match) => '<prd>'.repeat(match.length));\n text = text.replaceAll('Ph.D.', 'Ph<prd>D<prd>');\n text = text.replaceAll(new RegExp(`\\\\s${alphabets.source}[.] `, 'g'), ' $1<prd> ');\n text = text.replaceAll(new RegExp(`${acronyms.source} ${starters.source}`, 'g'), '$1<stop> $2');\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>$3<prd>',\n );\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>',\n );\n text = text.replaceAll(\n new RegExp(` ${suffixes.source}[.] ${starters.source}`, 'g'),\n '$1<stop> $2',\n );\n text = text.replaceAll(new RegExp(` ${suffixes.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll(new RegExp(` ${alphabets.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll('.”', '”.');\n text = text.replaceAll('.\"', '\".');\n text = text.replaceAll('!\"', '\"!');\n text = text.replaceAll('?\"', '\"?');\n text = text.replace(/\\.(?=\\s|$)/g, '.<stop>');\n text = text.replaceAll('?', '?<stop>');\n text = text.replaceAll('!', '!<stop>');\n text = text.replaceAll('<prd>', '.');\n\n if (retainFormat) {\n text = text.replaceAll('<nel>', '\\n');\n }\n\n const split = text.split('<stop>');\n text = text.replaceAll('<stop>', '');\n\n const sentences: [string, number, number][] = [];\n let buf = '';\n let start = 0;\n let end = 0;\n const prePad = retainFormat ? '' : ' ';\n for (const match of split) {\n const sentence = retainFormat ? match : match.trim();\n if (!sentence) continue;\n\n buf += prePad + sentence;\n end += match.length;\n if (buf.length > minLength) {\n sentences.push([buf.slice(prePad.length), start, end]);\n start = end;\n buf = '';\n }\n }\n\n if (buf) {\n sentences.push([buf.slice(prePad.length), start, text.length - 1]);\n }\n\n return sentences;\n};\n"],"mappings":"AAOO,MAAM,iBAAiB,CAC5B,MACA,YAAY,IACZ,eAAwB,UACO;AAC/B,QAAM,YAAY;AAClB,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,WACJ;AACF,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,SAAS;AACf,QAAM,OAAO;AAEb,MAAI,cAAc;AAChB,WAAO,KAAK,WAAW,MAAM,aAAa;AAAA,EAC5C,OAAO;AACL,WAAO,KAAK,WAAW,MAAM,GAAG;AAAA,EAClC;AAEA,SAAO,KAAK,WAAW,UAAU,SAAS;AAC1C,SAAO,KAAK,QAAQ,UAAU,CAAC,UAAU,MAAM,WAAW,KAAK,OAAO,CAAC;AACvE,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,OAAO,MAAM,MAAM,OAAO,MAAM,IAAI,GAAG,GAAG,WAAW;AAC1F,SAAO,KAAK,WAAW,MAAM,CAAC,UAAU,QAAQ,OAAO,MAAM,MAAM,CAAC;AACpE,SAAO,KAAK,WAAW,SAAS,eAAe;AAC/C,SAAO,KAAK,WAAW,IAAI,OAAO,MAAM,UAAU,MAAM,QAAQ,GAAG,GAAG,WAAW;AACjF,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,SAAS,MAAM,IAAI,SAAS,MAAM,IAAI,GAAG,GAAG,aAAa;AAC9F,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IACpF;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IAC9D;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,SAAS,MAAM,IAAI,GAAG;AAAA,IAC3D;AAAA,EACF;AACA,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,GAAG,GAAG,SAAS;AAC3E,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,UAAU,MAAM,OAAO,GAAG,GAAG,SAAS;AAC5E,SAAO,KAAK,WAAW,WAAM,SAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,QAAQ,eAAe,SAAS;AAC5C,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,SAAS,GAAG;AAEnC,MAAI,cAAc;AAChB,WAAO,KAAK,WAAW,SAAS,IAAI;AAAA,EACtC;AAEA,QAAM,QAAQ,KAAK,MAAM,QAAQ;AACjC,SAAO,KAAK,WAAW,UAAU,EAAE;AAEnC,QAAM,YAAwC,CAAC;AAC/C,MAAI,MAAM;AACV,MAAI,QAAQ;AACZ,MAAI,MAAM;AACV,QAAM,SAAS,eAAe,KAAK;AACnC,aAAW,SAAS,OAAO;AACzB,UAAM,WAAW,eAAe,QAAQ,MAAM,KAAK;AACnD,QAAI,CAAC,SAAU;AAEf,WAAO,SAAS;AAChB,WAAO,MAAM;AACb,QAAI,IAAI,SAAS,WAAW;AAC1B,gBAAU,KAAK,CAAC,IAAI,MAAM,OAAO,MAAM,GAAG,OAAO,GAAG,CAAC;AACrD,cAAQ;AACR,YAAM;AAAA,IACR;AAAA,EACF;AAEA,MAAI,KAAK;AACP,cAAU,KAAK,CAAC,IAAI,MAAM,OAAO,MAAM,GAAG,OAAO,KAAK,SAAS,CAAC,CAAC;AAAA,EACnE;AAEA,SAAO;AACT;","names":[]}
@@ -2,13 +2,15 @@
2
2
  var import_vitest = require("vitest");
3
3
  var import_basic = require("./basic/index.cjs");
4
4
  var import_paragraph = require("./basic/paragraph.cjs");
5
- const TEXT = "Hi! LiveKit is a platform for live audio and video applications and services. R.T.C stands for Real-Time Communication... again R.T.C. Mr. Theo is testing the sentence tokenizer. This is a test. Another test. A short sentence. A longer sentence that is longer than the previous sentence. f(x) = x * 2.54 + 42. Hey! Hi! Hello! ";
5
+ const TEXT = "Hi! LiveKit is a platform for live audio and video applications and services. R.T.C stands for Real-Time Communication... again R.T.C. Mr. Theo is testing the sentence tokenizer. This is a test. Another test. A short sentence. A longer sentence that is longer than the previous sentence. Find additional resources on livekit.com. Find additional resources on docs.livekit.com. f(x) = x * 2.54 + 42. Hey! Hi! Hello! ";
6
6
  const EXPECTED_MIN_20 = [
7
7
  "Hi! LiveKit is a platform for live audio and video applications and services.",
8
8
  "R.T.C stands for Real-Time Communication... again R.T.C.",
9
9
  "Mr. Theo is testing the sentence tokenizer.",
10
10
  "This is a test. Another test.",
11
11
  "A short sentence. A longer sentence that is longer than the previous sentence.",
12
+ "Find additional resources on livekit.com.",
13
+ "Find additional resources on docs.livekit.com.",
12
14
  "f(x) = x * 2.54 + 42.",
13
15
  "Hey! Hi! Hello!"
14
16
  ];
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/tokenize/tokenizer.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { describe, expect, it } from 'vitest';\nimport { SentenceTokenizer, WordTokenizer, hyphenateWord } from './basic/index.js';\nimport { splitParagraphs } from './basic/paragraph.js';\n\nconst TEXT =\n 'Hi! ' +\n 'LiveKit is a platform for live audio and video applications and services. ' +\n 'R.T.C stands for Real-Time Communication... again R.T.C. ' +\n 'Mr. Theo is testing the sentence tokenizer. ' +\n 'This is a test. Another test. ' +\n 'A short sentence. ' +\n 'A longer sentence that is longer than the previous sentence. ' +\n 'f(x) = x * 2.54 + 42. ' +\n 'Hey! Hi! Hello! ';\n\nconst EXPECTED_MIN_20 = [\n 'Hi! LiveKit is a platform for live audio and video applications and services.',\n 'R.T.C stands for Real-Time Communication... again R.T.C.',\n 'Mr. Theo is testing the sentence tokenizer.',\n 'This is a test. Another test.',\n 'A short sentence. A longer sentence that is longer than the previous sentence.',\n 'f(x) = x * 2.54 + 42.',\n 'Hey! Hi! Hello!',\n];\n\nconst WORDS_TEXT = 'This is a test. Blabla another test! multiple consecutive spaces: done';\nconst WORDS_EXPECTED = [\n 'This',\n 'is',\n 'a',\n 'test',\n 'Blabla',\n 'another',\n 'test',\n 'multiple',\n 'consecutive',\n 'spaces',\n 'done',\n];\n\nconst WORDS_PUNCT_TEXT =\n 'This is <phoneme alphabet=\"cmu-arpabet\" ph=\"AE K CH UW AH L IY\">actually</phoneme> tricky to handle.';\nconst WORDS_PUNCT_EXPECTED = [\n 'This',\n 'is',\n '<phoneme',\n 'alphabet=\"cmu-arpabet\"',\n 'ph=\"AE',\n 'K',\n 'CH',\n 'UW',\n 'AH',\n 'L',\n 'IY\">actually</phoneme>',\n 'tricky',\n 'to',\n 'handle.',\n];\n\nconst HYPHENATOR_TEXT = ['Segment', 'expected', 'communication', 'window', 'welcome', 'bedroom'];\nconst HYPHENATOR_EXPECTED = [\n ['Seg', 'ment'],\n ['ex', 'pect', 'ed'],\n ['com', 'mu', 'ni', 'ca', 'tion'],\n ['win', 'dow'],\n ['wel', 'come'],\n ['bed', 'room'],\n];\n\nconst PARAGRAPH_TEST_CASES: [string, [string, number, number][]][] = [\n ['Single paragraph.', [['Single paragraph.', 0, 17]]],\n [\n 'Paragraph 1.\\n\\nParagraph 2.',\n [\n ['Paragraph 1.', 0, 12],\n ['Paragraph 2.', 14, 26],\n ],\n ],\n [\n 'Para 1.\\n\\nPara 2.\\n\\nPara 3.',\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 9, 16],\n ['Para 3.', 18, 25],\n ],\n ],\n ['\\n\\nParagraph with leading newlines.', [['Paragraph with leading newlines.', 2, 34]]],\n ['Paragraph with trailing newlines.\\n\\n', [['Paragraph with trailing newlines.', 0, 33]]],\n [\n '\\n\\n Paragraph with leading and trailing spaces. \\n\\n',\n [['Paragraph with leading and trailing spaces.', 4, 47]],\n ],\n [\n 'Para 1.\\n\\n\\n\\nPara 2.', // Multiple newlines between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 11, 18],\n ],\n ],\n [\n 'Para 1.\\n \\n \\nPara 2.', // Newlines with spaces between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 12, 19],\n ],\n ],\n [\n '', // Empty string\n [],\n ],\n [\n '\\n\\n\\n', // Only newlines\n [],\n ],\n [\n 'Line 1\\nLine 2\\nLine 3', // Single paragraph with newlines\n [['Line 1\\nLine 2\\nLine 3', 0, 20]],\n ],\n];\n\ndescribe('tokenizer', () => {\n describe('SentenceTokenizer', () => {\n const tokenizer = new SentenceTokenizer();\n\n it('should tokenize sentences correctly', () => {\n expect(tokenizer.tokenize(TEXT).every((x, i) => EXPECTED_MIN_20[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize sentences correctly', async () => {\n const pattern = [1, 2, 4];\n let text = TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of EXPECTED_MIN_20) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n describe('WordTokenizer', () => {\n const tokenizer = new WordTokenizer();\n\n it('should tokenize words correctly', () => {\n expect(tokenizer.tokenize(WORDS_TEXT).every((x, i) => WORDS_EXPECTED[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n\n describe('punctuation handling', () => {\n const tokenizerPunct = new WordTokenizer(false);\n\n it('should tokenize words correctly', () => {\n expect(\n tokenizerPunct.tokenize(WORDS_PUNCT_TEXT).every((x, i) => WORDS_PUNCT_EXPECTED[i] === x),\n ).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_PUNCT_TEXT;\n const chunks = [];\n const patternIter = Array(\n Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)),\n )\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizerPunct.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_PUNCT_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n });\n describe('hyphenateWord', () => {\n it('should hyphenate correctly', () => {\n HYPHENATOR_TEXT.forEach((x, i) => {\n expect(hyphenateWord(x)).toStrictEqual(HYPHENATOR_EXPECTED[i]);\n });\n });\n });\n describe('splitParagraphs', () => {\n it('should tokenize paragraphs correctly', () => {\n PARAGRAPH_TEST_CASES.forEach(([a, b]) => {\n expect(splitParagraphs(a)).toStrictEqual(b);\n });\n });\n });\n});\n"],"mappings":";AAGA,oBAAqC;AACrC,mBAAgE;AAChE,uBAAgC;AAEhC,MAAM,OACJ;AAUF,MAAM,kBAAkB;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,aAAa;AACnB,MAAM,iBAAiB;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,mBACJ;AACF,MAAM,uBAAuB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,kBAAkB,CAAC,WAAW,YAAY,iBAAiB,UAAU,WAAW,SAAS;AAC/F,MAAM,sBAAsB;AAAA,EAC1B,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,MAAM,QAAQ,IAAI;AAAA,EACnB,CAAC,OAAO,MAAM,MAAM,MAAM,MAAM;AAAA,EAChC,CAAC,OAAO,KAAK;AAAA,EACb,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,OAAO,MAAM;AAChB;AAEA,MAAM,uBAA+D;AAAA,EACnE,CAAC,qBAAqB,CAAC,CAAC,qBAAqB,GAAG,EAAE,CAAC,CAAC;AAAA,EACpD;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,gBAAgB,GAAG,EAAE;AAAA,MACtB,CAAC,gBAAgB,IAAI,EAAE;AAAA,IACzB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,GAAG,EAAE;AAAA,MACjB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA,CAAC,wCAAwC,CAAC,CAAC,oCAAoC,GAAG,EAAE,CAAC,CAAC;AAAA,EACtF,CAAC,yCAAyC,CAAC,CAAC,qCAAqC,GAAG,EAAE,CAAC,CAAC;AAAA,EACxF;AAAA,IACE;AAAA,IACA,CAAC,CAAC,+CAA+C,GAAG,EAAE,CAAC;AAAA,EACzD;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC,CAAC,0BAA0B,GAAG,EAAE,CAAC;AAAA,EACpC;AACF;AAAA,IAEA,wBAAS,aAAa,MAAM;AAC1B,8BAAS,qBAAqB,MAAM;AAClC,UAAM,YAAY,IAAI,+BAAkB;AAExC,0BAAG,uCAAuC,MAAM;AAC9C,gCAAO,UAAU,SAAS,IAAI,EAAE,MAAM,CAAC,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IACxF,CAAC;AAED,0BAAG,8CAA8C,YAAY;AAC3D,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,iBAAiB;AAC/B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,sCAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACD,8BAAS,iBAAiB,MAAM;AAC9B,UAAM,YAAY,IAAI,2BAAc;AAEpC,0BAAG,mCAAmC,MAAM;AAC1C,gCAAO,UAAU,SAAS,UAAU,EAAE,MAAM,CAAC,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IAC7F,CAAC;AAED,0BAAG,0CAA0C,YAAY;AACvD,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,gBAAgB;AAC9B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,sCAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAED,gCAAS,wBAAwB,MAAM;AACrC,YAAM,iBAAiB,IAAI,2BAAc,KAAK;AAE9C,4BAAG,mCAAmC,MAAM;AAC1C;AAAA,UACE,eAAe,SAAS,gBAAgB,EAAE,MAAM,CAAC,GAAG,MAAM,qBAAqB,CAAC,MAAM,CAAC;AAAA,QACzF,EAAE,WAAW;AAAA,MACf,CAAC;AAED,4BAAG,0CAA0C,YAAY;AACvD,cAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,YAAI,OAAO;AACX,cAAM,SAAS,CAAC;AAChB,cAAM,cAAc;AAAA,UAClB,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC;AAAA,QACpE,EACG,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,mBAAW,QAAQ,aAAa;AAC9B,cAAI,CAAC,KAAM;AACX,iBAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,iBAAO,KAAK,MAAM,IAAI;AAAA,QACxB;AACA,cAAM,SAAS,eAAe,OAAO;AACrC,mBAAW,SAAS,QAAQ;AAC1B,iBAAO,SAAS,KAAK;AAAA,QACvB;AACA,eAAO,SAAS;AAChB,eAAO,MAAM;AAEb,mBAAW,KAAK,sBAAsB;AACpC,gBAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,gBAAI,MAAM,OAAO;AACf,wCAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,YAC3C;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,8BAAS,iBAAiB,MAAM;AAC9B,0BAAG,8BAA8B,MAAM;AACrC,sBAAgB,QAAQ,CAAC,GAAG,MAAM;AAChC,sCAAO,4BAAc,CAAC,CAAC,EAAE,cAAc,oBAAoB,CAAC,CAAC;AAAA,MAC/D,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,8BAAS,mBAAmB,MAAM;AAChC,0BAAG,wCAAwC,MAAM;AAC/C,2BAAqB,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACvC,sCAAO,kCAAgB,CAAC,CAAC,EAAE,cAAc,CAAC;AAAA,MAC5C,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACH,CAAC;","names":[]}
1
+ {"version":3,"sources":["../../src/tokenize/tokenizer.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { describe, expect, it } from 'vitest';\nimport { SentenceTokenizer, WordTokenizer, hyphenateWord } from './basic/index.js';\nimport { splitParagraphs } from './basic/paragraph.js';\n\nconst TEXT =\n 'Hi! ' +\n 'LiveKit is a platform for live audio and video applications and services. ' +\n 'R.T.C stands for Real-Time Communication... again R.T.C. ' +\n 'Mr. Theo is testing the sentence tokenizer. ' +\n 'This is a test. Another test. ' +\n 'A short sentence. ' +\n 'A longer sentence that is longer than the previous sentence. ' +\n 'Find additional resources on livekit.com. ' +\n 'Find additional resources on docs.livekit.com. ' +\n 'f(x) = x * 2.54 + 42. ' +\n 'Hey! Hi! Hello! ';\n\nconst EXPECTED_MIN_20 = [\n 'Hi! LiveKit is a platform for live audio and video applications and services.',\n 'R.T.C stands for Real-Time Communication... again R.T.C.',\n 'Mr. Theo is testing the sentence tokenizer.',\n 'This is a test. Another test.',\n 'A short sentence. A longer sentence that is longer than the previous sentence.',\n 'Find additional resources on livekit.com.',\n 'Find additional resources on docs.livekit.com.',\n 'f(x) = x * 2.54 + 42.',\n 'Hey! Hi! Hello!',\n];\n\nconst WORDS_TEXT = 'This is a test. Blabla another test! multiple consecutive spaces: done';\nconst WORDS_EXPECTED = [\n 'This',\n 'is',\n 'a',\n 'test',\n 'Blabla',\n 'another',\n 'test',\n 'multiple',\n 'consecutive',\n 'spaces',\n 'done',\n];\n\nconst WORDS_PUNCT_TEXT =\n 'This is <phoneme alphabet=\"cmu-arpabet\" ph=\"AE K CH UW AH L IY\">actually</phoneme> tricky to handle.';\nconst WORDS_PUNCT_EXPECTED = [\n 'This',\n 'is',\n '<phoneme',\n 'alphabet=\"cmu-arpabet\"',\n 'ph=\"AE',\n 'K',\n 'CH',\n 'UW',\n 'AH',\n 'L',\n 'IY\">actually</phoneme>',\n 'tricky',\n 'to',\n 'handle.',\n];\n\nconst HYPHENATOR_TEXT = ['Segment', 'expected', 'communication', 'window', 'welcome', 'bedroom'];\nconst HYPHENATOR_EXPECTED = [\n ['Seg', 'ment'],\n ['ex', 'pect', 'ed'],\n ['com', 'mu', 'ni', 'ca', 'tion'],\n ['win', 'dow'],\n ['wel', 'come'],\n ['bed', 'room'],\n];\n\nconst PARAGRAPH_TEST_CASES: [string, [string, number, number][]][] = [\n ['Single paragraph.', [['Single paragraph.', 0, 17]]],\n [\n 'Paragraph 1.\\n\\nParagraph 2.',\n [\n ['Paragraph 1.', 0, 12],\n ['Paragraph 2.', 14, 26],\n ],\n ],\n [\n 'Para 1.\\n\\nPara 2.\\n\\nPara 3.',\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 9, 16],\n ['Para 3.', 18, 25],\n ],\n ],\n ['\\n\\nParagraph with leading newlines.', [['Paragraph with leading newlines.', 2, 34]]],\n ['Paragraph with trailing newlines.\\n\\n', [['Paragraph with trailing newlines.', 0, 33]]],\n [\n '\\n\\n Paragraph with leading and trailing spaces. \\n\\n',\n [['Paragraph with leading and trailing spaces.', 4, 47]],\n ],\n [\n 'Para 1.\\n\\n\\n\\nPara 2.', // Multiple newlines between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 11, 18],\n ],\n ],\n [\n 'Para 1.\\n \\n \\nPara 2.', // Newlines with spaces between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 12, 19],\n ],\n ],\n [\n '', // Empty string\n [],\n ],\n [\n '\\n\\n\\n', // Only newlines\n [],\n ],\n [\n 'Line 1\\nLine 2\\nLine 3', // Single paragraph with newlines\n [['Line 1\\nLine 2\\nLine 3', 0, 20]],\n ],\n];\n\ndescribe('tokenizer', () => {\n describe('SentenceTokenizer', () => {\n const tokenizer = new SentenceTokenizer();\n\n it('should tokenize sentences correctly', () => {\n expect(tokenizer.tokenize(TEXT).every((x, i) => EXPECTED_MIN_20[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize sentences correctly', async () => {\n const pattern = [1, 2, 4];\n let text = TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of EXPECTED_MIN_20) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n describe('WordTokenizer', () => {\n const tokenizer = new WordTokenizer();\n\n it('should tokenize words correctly', () => {\n expect(tokenizer.tokenize(WORDS_TEXT).every((x, i) => WORDS_EXPECTED[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n\n describe('punctuation handling', () => {\n const tokenizerPunct = new WordTokenizer(false);\n\n it('should tokenize words correctly', () => {\n expect(\n tokenizerPunct.tokenize(WORDS_PUNCT_TEXT).every((x, i) => WORDS_PUNCT_EXPECTED[i] === x),\n ).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_PUNCT_TEXT;\n const chunks = [];\n const patternIter = Array(\n Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)),\n )\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizerPunct.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_PUNCT_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n });\n describe('hyphenateWord', () => {\n it('should hyphenate correctly', () => {\n HYPHENATOR_TEXT.forEach((x, i) => {\n expect(hyphenateWord(x)).toStrictEqual(HYPHENATOR_EXPECTED[i]);\n });\n });\n });\n describe('splitParagraphs', () => {\n it('should tokenize paragraphs correctly', () => {\n PARAGRAPH_TEST_CASES.forEach(([a, b]) => {\n expect(splitParagraphs(a)).toStrictEqual(b);\n });\n });\n });\n});\n"],"mappings":";AAGA,oBAAqC;AACrC,mBAAgE;AAChE,uBAAgC;AAEhC,MAAM,OACJ;AAYF,MAAM,kBAAkB;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,aAAa;AACnB,MAAM,iBAAiB;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,mBACJ;AACF,MAAM,uBAAuB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,kBAAkB,CAAC,WAAW,YAAY,iBAAiB,UAAU,WAAW,SAAS;AAC/F,MAAM,sBAAsB;AAAA,EAC1B,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,MAAM,QAAQ,IAAI;AAAA,EACnB,CAAC,OAAO,MAAM,MAAM,MAAM,MAAM;AAAA,EAChC,CAAC,OAAO,KAAK;AAAA,EACb,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,OAAO,MAAM;AAChB;AAEA,MAAM,uBAA+D;AAAA,EACnE,CAAC,qBAAqB,CAAC,CAAC,qBAAqB,GAAG,EAAE,CAAC,CAAC;AAAA,EACpD;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,gBAAgB,GAAG,EAAE;AAAA,MACtB,CAAC,gBAAgB,IAAI,EAAE;AAAA,IACzB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,GAAG,EAAE;AAAA,MACjB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA,CAAC,wCAAwC,CAAC,CAAC,oCAAoC,GAAG,EAAE,CAAC,CAAC;AAAA,EACtF,CAAC,yCAAyC,CAAC,CAAC,qCAAqC,GAAG,EAAE,CAAC,CAAC;AAAA,EACxF;AAAA,IACE;AAAA,IACA,CAAC,CAAC,+CAA+C,GAAG,EAAE,CAAC;AAAA,EACzD;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC,CAAC,0BAA0B,GAAG,EAAE,CAAC;AAAA,EACpC;AACF;AAAA,IAEA,wBAAS,aAAa,MAAM;AAC1B,8BAAS,qBAAqB,MAAM;AAClC,UAAM,YAAY,IAAI,+BAAkB;AAExC,0BAAG,uCAAuC,MAAM;AAC9C,gCAAO,UAAU,SAAS,IAAI,EAAE,MAAM,CAAC,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IACxF,CAAC;AAED,0BAAG,8CAA8C,YAAY;AAC3D,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,iBAAiB;AAC/B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,sCAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACD,8BAAS,iBAAiB,MAAM;AAC9B,UAAM,YAAY,IAAI,2BAAc;AAEpC,0BAAG,mCAAmC,MAAM;AAC1C,gCAAO,UAAU,SAAS,UAAU,EAAE,MAAM,CAAC,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IAC7F,CAAC;AAED,0BAAG,0CAA0C,YAAY;AACvD,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,gBAAgB;AAC9B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,sCAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAED,gCAAS,wBAAwB,MAAM;AACrC,YAAM,iBAAiB,IAAI,2BAAc,KAAK;AAE9C,4BAAG,mCAAmC,MAAM;AAC1C;AAAA,UACE,eAAe,SAAS,gBAAgB,EAAE,MAAM,CAAC,GAAG,MAAM,qBAAqB,CAAC,MAAM,CAAC;AAAA,QACzF,EAAE,WAAW;AAAA,MACf,CAAC;AAED,4BAAG,0CAA0C,YAAY;AACvD,cAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,YAAI,OAAO;AACX,cAAM,SAAS,CAAC;AAChB,cAAM,cAAc;AAAA,UAClB,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC;AAAA,QACpE,EACG,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,mBAAW,QAAQ,aAAa;AAC9B,cAAI,CAAC,KAAM;AACX,iBAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,iBAAO,KAAK,MAAM,IAAI;AAAA,QACxB;AACA,cAAM,SAAS,eAAe,OAAO;AACrC,mBAAW,SAAS,QAAQ;AAC1B,iBAAO,SAAS,KAAK;AAAA,QACvB;AACA,eAAO,SAAS;AAChB,eAAO,MAAM;AAEb,mBAAW,KAAK,sBAAsB;AACpC,gBAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,gBAAI,MAAM,OAAO;AACf,wCAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,YAC3C;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,8BAAS,iBAAiB,MAAM;AAC9B,0BAAG,8BAA8B,MAAM;AACrC,sBAAgB,QAAQ,CAAC,GAAG,MAAM;AAChC,sCAAO,4BAAc,CAAC,CAAC,EAAE,cAAc,oBAAoB,CAAC,CAAC;AAAA,MAC/D,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,8BAAS,mBAAmB,MAAM;AAChC,0BAAG,wCAAwC,MAAM;AAC/C,2BAAqB,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACvC,sCAAO,kCAAgB,CAAC,CAAC,EAAE,cAAc,CAAC;AAAA,MAC5C,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACH,CAAC;","names":[]}
@@ -1,13 +1,15 @@
1
1
  import { describe, expect, it } from "vitest";
2
2
  import { SentenceTokenizer, WordTokenizer, hyphenateWord } from "./basic/index.js";
3
3
  import { splitParagraphs } from "./basic/paragraph.js";
4
- const TEXT = "Hi! LiveKit is a platform for live audio and video applications and services. R.T.C stands for Real-Time Communication... again R.T.C. Mr. Theo is testing the sentence tokenizer. This is a test. Another test. A short sentence. A longer sentence that is longer than the previous sentence. f(x) = x * 2.54 + 42. Hey! Hi! Hello! ";
4
+ const TEXT = "Hi! LiveKit is a platform for live audio and video applications and services. R.T.C stands for Real-Time Communication... again R.T.C. Mr. Theo is testing the sentence tokenizer. This is a test. Another test. A short sentence. A longer sentence that is longer than the previous sentence. Find additional resources on livekit.com. Find additional resources on docs.livekit.com. f(x) = x * 2.54 + 42. Hey! Hi! Hello! ";
5
5
  const EXPECTED_MIN_20 = [
6
6
  "Hi! LiveKit is a platform for live audio and video applications and services.",
7
7
  "R.T.C stands for Real-Time Communication... again R.T.C.",
8
8
  "Mr. Theo is testing the sentence tokenizer.",
9
9
  "This is a test. Another test.",
10
10
  "A short sentence. A longer sentence that is longer than the previous sentence.",
11
+ "Find additional resources on livekit.com.",
12
+ "Find additional resources on docs.livekit.com.",
11
13
  "f(x) = x * 2.54 + 42.",
12
14
  "Hey! Hi! Hello!"
13
15
  ];
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/tokenize/tokenizer.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { describe, expect, it } from 'vitest';\nimport { SentenceTokenizer, WordTokenizer, hyphenateWord } from './basic/index.js';\nimport { splitParagraphs } from './basic/paragraph.js';\n\nconst TEXT =\n 'Hi! ' +\n 'LiveKit is a platform for live audio and video applications and services. ' +\n 'R.T.C stands for Real-Time Communication... again R.T.C. ' +\n 'Mr. Theo is testing the sentence tokenizer. ' +\n 'This is a test. Another test. ' +\n 'A short sentence. ' +\n 'A longer sentence that is longer than the previous sentence. ' +\n 'f(x) = x * 2.54 + 42. ' +\n 'Hey! Hi! Hello! ';\n\nconst EXPECTED_MIN_20 = [\n 'Hi! LiveKit is a platform for live audio and video applications and services.',\n 'R.T.C stands for Real-Time Communication... again R.T.C.',\n 'Mr. Theo is testing the sentence tokenizer.',\n 'This is a test. Another test.',\n 'A short sentence. A longer sentence that is longer than the previous sentence.',\n 'f(x) = x * 2.54 + 42.',\n 'Hey! Hi! Hello!',\n];\n\nconst WORDS_TEXT = 'This is a test. Blabla another test! multiple consecutive spaces: done';\nconst WORDS_EXPECTED = [\n 'This',\n 'is',\n 'a',\n 'test',\n 'Blabla',\n 'another',\n 'test',\n 'multiple',\n 'consecutive',\n 'spaces',\n 'done',\n];\n\nconst WORDS_PUNCT_TEXT =\n 'This is <phoneme alphabet=\"cmu-arpabet\" ph=\"AE K CH UW AH L IY\">actually</phoneme> tricky to handle.';\nconst WORDS_PUNCT_EXPECTED = [\n 'This',\n 'is',\n '<phoneme',\n 'alphabet=\"cmu-arpabet\"',\n 'ph=\"AE',\n 'K',\n 'CH',\n 'UW',\n 'AH',\n 'L',\n 'IY\">actually</phoneme>',\n 'tricky',\n 'to',\n 'handle.',\n];\n\nconst HYPHENATOR_TEXT = ['Segment', 'expected', 'communication', 'window', 'welcome', 'bedroom'];\nconst HYPHENATOR_EXPECTED = [\n ['Seg', 'ment'],\n ['ex', 'pect', 'ed'],\n ['com', 'mu', 'ni', 'ca', 'tion'],\n ['win', 'dow'],\n ['wel', 'come'],\n ['bed', 'room'],\n];\n\nconst PARAGRAPH_TEST_CASES: [string, [string, number, number][]][] = [\n ['Single paragraph.', [['Single paragraph.', 0, 17]]],\n [\n 'Paragraph 1.\\n\\nParagraph 2.',\n [\n ['Paragraph 1.', 0, 12],\n ['Paragraph 2.', 14, 26],\n ],\n ],\n [\n 'Para 1.\\n\\nPara 2.\\n\\nPara 3.',\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 9, 16],\n ['Para 3.', 18, 25],\n ],\n ],\n ['\\n\\nParagraph with leading newlines.', [['Paragraph with leading newlines.', 2, 34]]],\n ['Paragraph with trailing newlines.\\n\\n', [['Paragraph with trailing newlines.', 0, 33]]],\n [\n '\\n\\n Paragraph with leading and trailing spaces. \\n\\n',\n [['Paragraph with leading and trailing spaces.', 4, 47]],\n ],\n [\n 'Para 1.\\n\\n\\n\\nPara 2.', // Multiple newlines between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 11, 18],\n ],\n ],\n [\n 'Para 1.\\n \\n \\nPara 2.', // Newlines with spaces between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 12, 19],\n ],\n ],\n [\n '', // Empty string\n [],\n ],\n [\n '\\n\\n\\n', // Only newlines\n [],\n ],\n [\n 'Line 1\\nLine 2\\nLine 3', // Single paragraph with newlines\n [['Line 1\\nLine 2\\nLine 3', 0, 20]],\n ],\n];\n\ndescribe('tokenizer', () => {\n describe('SentenceTokenizer', () => {\n const tokenizer = new SentenceTokenizer();\n\n it('should tokenize sentences correctly', () => {\n expect(tokenizer.tokenize(TEXT).every((x, i) => EXPECTED_MIN_20[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize sentences correctly', async () => {\n const pattern = [1, 2, 4];\n let text = TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of EXPECTED_MIN_20) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n describe('WordTokenizer', () => {\n const tokenizer = new WordTokenizer();\n\n it('should tokenize words correctly', () => {\n expect(tokenizer.tokenize(WORDS_TEXT).every((x, i) => WORDS_EXPECTED[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n\n describe('punctuation handling', () => {\n const tokenizerPunct = new WordTokenizer(false);\n\n it('should tokenize words correctly', () => {\n expect(\n tokenizerPunct.tokenize(WORDS_PUNCT_TEXT).every((x, i) => WORDS_PUNCT_EXPECTED[i] === x),\n ).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_PUNCT_TEXT;\n const chunks = [];\n const patternIter = Array(\n Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)),\n )\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizerPunct.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_PUNCT_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n });\n describe('hyphenateWord', () => {\n it('should hyphenate correctly', () => {\n HYPHENATOR_TEXT.forEach((x, i) => {\n expect(hyphenateWord(x)).toStrictEqual(HYPHENATOR_EXPECTED[i]);\n });\n });\n });\n describe('splitParagraphs', () => {\n it('should tokenize paragraphs correctly', () => {\n PARAGRAPH_TEST_CASES.forEach(([a, b]) => {\n expect(splitParagraphs(a)).toStrictEqual(b);\n });\n });\n });\n});\n"],"mappings":"AAGA,SAAS,UAAU,QAAQ,UAAU;AACrC,SAAS,mBAAmB,eAAe,qBAAqB;AAChE,SAAS,uBAAuB;AAEhC,MAAM,OACJ;AAUF,MAAM,kBAAkB;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,aAAa;AACnB,MAAM,iBAAiB;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,mBACJ;AACF,MAAM,uBAAuB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,kBAAkB,CAAC,WAAW,YAAY,iBAAiB,UAAU,WAAW,SAAS;AAC/F,MAAM,sBAAsB;AAAA,EAC1B,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,MAAM,QAAQ,IAAI;AAAA,EACnB,CAAC,OAAO,MAAM,MAAM,MAAM,MAAM;AAAA,EAChC,CAAC,OAAO,KAAK;AAAA,EACb,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,OAAO,MAAM;AAChB;AAEA,MAAM,uBAA+D;AAAA,EACnE,CAAC,qBAAqB,CAAC,CAAC,qBAAqB,GAAG,EAAE,CAAC,CAAC;AAAA,EACpD;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,gBAAgB,GAAG,EAAE;AAAA,MACtB,CAAC,gBAAgB,IAAI,EAAE;AAAA,IACzB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,GAAG,EAAE;AAAA,MACjB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA,CAAC,wCAAwC,CAAC,CAAC,oCAAoC,GAAG,EAAE,CAAC,CAAC;AAAA,EACtF,CAAC,yCAAyC,CAAC,CAAC,qCAAqC,GAAG,EAAE,CAAC,CAAC;AAAA,EACxF;AAAA,IACE;AAAA,IACA,CAAC,CAAC,+CAA+C,GAAG,EAAE,CAAC;AAAA,EACzD;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC,CAAC,0BAA0B,GAAG,EAAE,CAAC;AAAA,EACpC;AACF;AAEA,SAAS,aAAa,MAAM;AAC1B,WAAS,qBAAqB,MAAM;AAClC,UAAM,YAAY,IAAI,kBAAkB;AAExC,OAAG,uCAAuC,MAAM;AAC9C,aAAO,UAAU,SAAS,IAAI,EAAE,MAAM,CAAC,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IACxF,CAAC;AAED,OAAG,8CAA8C,YAAY;AAC3D,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,iBAAiB;AAC/B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,mBAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACD,WAAS,iBAAiB,MAAM;AAC9B,UAAM,YAAY,IAAI,cAAc;AAEpC,OAAG,mCAAmC,MAAM;AAC1C,aAAO,UAAU,SAAS,UAAU,EAAE,MAAM,CAAC,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IAC7F,CAAC;AAED,OAAG,0CAA0C,YAAY;AACvD,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,gBAAgB;AAC9B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,mBAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAED,aAAS,wBAAwB,MAAM;AACrC,YAAM,iBAAiB,IAAI,cAAc,KAAK;AAE9C,SAAG,mCAAmC,MAAM;AAC1C;AAAA,UACE,eAAe,SAAS,gBAAgB,EAAE,MAAM,CAAC,GAAG,MAAM,qBAAqB,CAAC,MAAM,CAAC;AAAA,QACzF,EAAE,WAAW;AAAA,MACf,CAAC;AAED,SAAG,0CAA0C,YAAY;AACvD,cAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,YAAI,OAAO;AACX,cAAM,SAAS,CAAC;AAChB,cAAM,cAAc;AAAA,UAClB,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC;AAAA,QACpE,EACG,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,mBAAW,QAAQ,aAAa;AAC9B,cAAI,CAAC,KAAM;AACX,iBAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,iBAAO,KAAK,MAAM,IAAI;AAAA,QACxB;AACA,cAAM,SAAS,eAAe,OAAO;AACrC,mBAAW,SAAS,QAAQ;AAC1B,iBAAO,SAAS,KAAK;AAAA,QACvB;AACA,eAAO,SAAS;AAChB,eAAO,MAAM;AAEb,mBAAW,KAAK,sBAAsB;AACpC,gBAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,gBAAI,MAAM,OAAO;AACf,qBAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,YAC3C;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,WAAS,iBAAiB,MAAM;AAC9B,OAAG,8BAA8B,MAAM;AACrC,sBAAgB,QAAQ,CAAC,GAAG,MAAM;AAChC,eAAO,cAAc,CAAC,CAAC,EAAE,cAAc,oBAAoB,CAAC,CAAC;AAAA,MAC/D,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,WAAS,mBAAmB,MAAM;AAChC,OAAG,wCAAwC,MAAM;AAC/C,2BAAqB,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACvC,eAAO,gBAAgB,CAAC,CAAC,EAAE,cAAc,CAAC;AAAA,MAC5C,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACH,CAAC;","names":[]}
1
+ {"version":3,"sources":["../../src/tokenize/tokenizer.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { describe, expect, it } from 'vitest';\nimport { SentenceTokenizer, WordTokenizer, hyphenateWord } from './basic/index.js';\nimport { splitParagraphs } from './basic/paragraph.js';\n\nconst TEXT =\n 'Hi! ' +\n 'LiveKit is a platform for live audio and video applications and services. ' +\n 'R.T.C stands for Real-Time Communication... again R.T.C. ' +\n 'Mr. Theo is testing the sentence tokenizer. ' +\n 'This is a test. Another test. ' +\n 'A short sentence. ' +\n 'A longer sentence that is longer than the previous sentence. ' +\n 'Find additional resources on livekit.com. ' +\n 'Find additional resources on docs.livekit.com. ' +\n 'f(x) = x * 2.54 + 42. ' +\n 'Hey! Hi! Hello! ';\n\nconst EXPECTED_MIN_20 = [\n 'Hi! LiveKit is a platform for live audio and video applications and services.',\n 'R.T.C stands for Real-Time Communication... again R.T.C.',\n 'Mr. Theo is testing the sentence tokenizer.',\n 'This is a test. Another test.',\n 'A short sentence. A longer sentence that is longer than the previous sentence.',\n 'Find additional resources on livekit.com.',\n 'Find additional resources on docs.livekit.com.',\n 'f(x) = x * 2.54 + 42.',\n 'Hey! Hi! Hello!',\n];\n\nconst WORDS_TEXT = 'This is a test. Blabla another test! multiple consecutive spaces: done';\nconst WORDS_EXPECTED = [\n 'This',\n 'is',\n 'a',\n 'test',\n 'Blabla',\n 'another',\n 'test',\n 'multiple',\n 'consecutive',\n 'spaces',\n 'done',\n];\n\nconst WORDS_PUNCT_TEXT =\n 'This is <phoneme alphabet=\"cmu-arpabet\" ph=\"AE K CH UW AH L IY\">actually</phoneme> tricky to handle.';\nconst WORDS_PUNCT_EXPECTED = [\n 'This',\n 'is',\n '<phoneme',\n 'alphabet=\"cmu-arpabet\"',\n 'ph=\"AE',\n 'K',\n 'CH',\n 'UW',\n 'AH',\n 'L',\n 'IY\">actually</phoneme>',\n 'tricky',\n 'to',\n 'handle.',\n];\n\nconst HYPHENATOR_TEXT = ['Segment', 'expected', 'communication', 'window', 'welcome', 'bedroom'];\nconst HYPHENATOR_EXPECTED = [\n ['Seg', 'ment'],\n ['ex', 'pect', 'ed'],\n ['com', 'mu', 'ni', 'ca', 'tion'],\n ['win', 'dow'],\n ['wel', 'come'],\n ['bed', 'room'],\n];\n\nconst PARAGRAPH_TEST_CASES: [string, [string, number, number][]][] = [\n ['Single paragraph.', [['Single paragraph.', 0, 17]]],\n [\n 'Paragraph 1.\\n\\nParagraph 2.',\n [\n ['Paragraph 1.', 0, 12],\n ['Paragraph 2.', 14, 26],\n ],\n ],\n [\n 'Para 1.\\n\\nPara 2.\\n\\nPara 3.',\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 9, 16],\n ['Para 3.', 18, 25],\n ],\n ],\n ['\\n\\nParagraph with leading newlines.', [['Paragraph with leading newlines.', 2, 34]]],\n ['Paragraph with trailing newlines.\\n\\n', [['Paragraph with trailing newlines.', 0, 33]]],\n [\n '\\n\\n Paragraph with leading and trailing spaces. \\n\\n',\n [['Paragraph with leading and trailing spaces.', 4, 47]],\n ],\n [\n 'Para 1.\\n\\n\\n\\nPara 2.', // Multiple newlines between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 11, 18],\n ],\n ],\n [\n 'Para 1.\\n \\n \\nPara 2.', // Newlines with spaces between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 12, 19],\n ],\n ],\n [\n '', // Empty string\n [],\n ],\n [\n '\\n\\n\\n', // Only newlines\n [],\n ],\n [\n 'Line 1\\nLine 2\\nLine 3', // Single paragraph with newlines\n [['Line 1\\nLine 2\\nLine 3', 0, 20]],\n ],\n];\n\ndescribe('tokenizer', () => {\n describe('SentenceTokenizer', () => {\n const tokenizer = new SentenceTokenizer();\n\n it('should tokenize sentences correctly', () => {\n expect(tokenizer.tokenize(TEXT).every((x, i) => EXPECTED_MIN_20[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize sentences correctly', async () => {\n const pattern = [1, 2, 4];\n let text = TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of EXPECTED_MIN_20) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n describe('WordTokenizer', () => {\n const tokenizer = new WordTokenizer();\n\n it('should tokenize words correctly', () => {\n expect(tokenizer.tokenize(WORDS_TEXT).every((x, i) => WORDS_EXPECTED[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n\n describe('punctuation handling', () => {\n const tokenizerPunct = new WordTokenizer(false);\n\n it('should tokenize words correctly', () => {\n expect(\n tokenizerPunct.tokenize(WORDS_PUNCT_TEXT).every((x, i) => WORDS_PUNCT_EXPECTED[i] === x),\n ).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_PUNCT_TEXT;\n const chunks = [];\n const patternIter = Array(\n Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)),\n )\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizerPunct.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_PUNCT_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n });\n describe('hyphenateWord', () => {\n it('should hyphenate correctly', () => {\n HYPHENATOR_TEXT.forEach((x, i) => {\n expect(hyphenateWord(x)).toStrictEqual(HYPHENATOR_EXPECTED[i]);\n });\n });\n });\n describe('splitParagraphs', () => {\n it('should tokenize paragraphs correctly', () => {\n PARAGRAPH_TEST_CASES.forEach(([a, b]) => {\n expect(splitParagraphs(a)).toStrictEqual(b);\n });\n });\n });\n});\n"],"mappings":"AAGA,SAAS,UAAU,QAAQ,UAAU;AACrC,SAAS,mBAAmB,eAAe,qBAAqB;AAChE,SAAS,uBAAuB;AAEhC,MAAM,OACJ;AAYF,MAAM,kBAAkB;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,aAAa;AACnB,MAAM,iBAAiB;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,mBACJ;AACF,MAAM,uBAAuB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,kBAAkB,CAAC,WAAW,YAAY,iBAAiB,UAAU,WAAW,SAAS;AAC/F,MAAM,sBAAsB;AAAA,EAC1B,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,MAAM,QAAQ,IAAI;AAAA,EACnB,CAAC,OAAO,MAAM,MAAM,MAAM,MAAM;AAAA,EAChC,CAAC,OAAO,KAAK;AAAA,EACb,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,OAAO,MAAM;AAChB;AAEA,MAAM,uBAA+D;AAAA,EACnE,CAAC,qBAAqB,CAAC,CAAC,qBAAqB,GAAG,EAAE,CAAC,CAAC;AAAA,EACpD;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,gBAAgB,GAAG,EAAE;AAAA,MACtB,CAAC,gBAAgB,IAAI,EAAE;AAAA,IACzB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,GAAG,EAAE;AAAA,MACjB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA,CAAC,wCAAwC,CAAC,CAAC,oCAAoC,GAAG,EAAE,CAAC,CAAC;AAAA,EACtF,CAAC,yCAAyC,CAAC,CAAC,qCAAqC,GAAG,EAAE,CAAC,CAAC;AAAA,EACxF;AAAA,IACE;AAAA,IACA,CAAC,CAAC,+CAA+C,GAAG,EAAE,CAAC;AAAA,EACzD;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC,CAAC,0BAA0B,GAAG,EAAE,CAAC;AAAA,EACpC;AACF;AAEA,SAAS,aAAa,MAAM;AAC1B,WAAS,qBAAqB,MAAM;AAClC,UAAM,YAAY,IAAI,kBAAkB;AAExC,OAAG,uCAAuC,MAAM;AAC9C,aAAO,UAAU,SAAS,IAAI,EAAE,MAAM,CAAC,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IACxF,CAAC;AAED,OAAG,8CAA8C,YAAY;AAC3D,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,iBAAiB;AAC/B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,mBAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACD,WAAS,iBAAiB,MAAM;AAC9B,UAAM,YAAY,IAAI,cAAc;AAEpC,OAAG,mCAAmC,MAAM;AAC1C,aAAO,UAAU,SAAS,UAAU,EAAE,MAAM,CAAC,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IAC7F,CAAC;AAED,OAAG,0CAA0C,YAAY;AACvD,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,gBAAgB;AAC9B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,mBAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAED,aAAS,wBAAwB,MAAM;AACrC,YAAM,iBAAiB,IAAI,cAAc,KAAK;AAE9C,SAAG,mCAAmC,MAAM;AAC1C;AAAA,UACE,eAAe,SAAS,gBAAgB,EAAE,MAAM,CAAC,GAAG,MAAM,qBAAqB,CAAC,MAAM,CAAC;AAAA,QACzF,EAAE,WAAW;AAAA,MACf,CAAC;AAED,SAAG,0CAA0C,YAAY;AACvD,cAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,YAAI,OAAO;AACX,cAAM,SAAS,CAAC;AAChB,cAAM,cAAc;AAAA,UAClB,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC;AAAA,QACpE,EACG,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,mBAAW,QAAQ,aAAa;AAC9B,cAAI,CAAC,KAAM;AACX,iBAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,iBAAO,KAAK,MAAM,IAAI;AAAA,QACxB;AACA,cAAM,SAAS,eAAe,OAAO;AACrC,mBAAW,SAAS,QAAQ;AAC1B,iBAAO,SAAS,KAAK;AAAA,QACvB;AACA,eAAO,SAAS;AAChB,eAAO,MAAM;AAEb,mBAAW,KAAK,sBAAsB;AACpC,gBAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,gBAAI,MAAM,OAAO;AACf,qBAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,YAC3C;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,WAAS,iBAAiB,MAAM;AAC9B,OAAG,8BAA8B,MAAM;AACrC,sBAAgB,QAAQ,CAAC,GAAG,MAAM;AAChC,eAAO,cAAc,CAAC,CAAC,EAAE,cAAc,oBAAoB,CAAC,CAAC;AAAA,MAC/D,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,WAAS,mBAAmB,MAAM;AAChC,OAAG,wCAAwC,MAAM;AAC/C,2BAAqB,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACvC,eAAO,gBAAgB,CAAC,CAAC,EAAE,cAAc,CAAC;AAAA,MAC5C,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACH,CAAC;","names":[]}