@supertone/supertone 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +119 -69
  2. package/custom_test/realtime_tts_player.ts +177 -12
  3. package/custom_test/test_pronunciation_dictionary.ts +227 -0
  4. package/custom_test/test_real_api.ts +1677 -162
  5. package/custom_test/test_text_utils_chunk_text_punctuation.ts +55 -0
  6. package/dist/commonjs/lib/config.d.ts +2 -2
  7. package/dist/commonjs/lib/config.d.ts.map +1 -1
  8. package/dist/commonjs/lib/config.js +2 -2
  9. package/dist/commonjs/lib/config.js.map +1 -1
  10. package/dist/commonjs/lib/custom_utils/index.d.ts +1 -0
  11. package/dist/commonjs/lib/custom_utils/index.d.ts.map +1 -1
  12. package/dist/commonjs/lib/custom_utils/index.js +5 -1
  13. package/dist/commonjs/lib/custom_utils/index.js.map +1 -1
  14. package/dist/commonjs/lib/custom_utils/pronunciation_utils.d.ts +24 -0
  15. package/dist/commonjs/lib/custom_utils/pronunciation_utils.d.ts.map +1 -0
  16. package/dist/commonjs/lib/custom_utils/pronunciation_utils.js +145 -0
  17. package/dist/commonjs/lib/custom_utils/pronunciation_utils.js.map +1 -0
  18. package/dist/commonjs/lib/custom_utils/text_utils.d.ts +8 -1
  19. package/dist/commonjs/lib/custom_utils/text_utils.d.ts.map +1 -1
  20. package/dist/commonjs/lib/custom_utils/text_utils.js +125 -7
  21. package/dist/commonjs/lib/custom_utils/text_utils.js.map +1 -1
  22. package/dist/commonjs/models/apiconverttexttospeechusingcharacterrequest.d.ts +92 -1
  23. package/dist/commonjs/models/apiconverttexttospeechusingcharacterrequest.d.ts.map +1 -1
  24. package/dist/commonjs/models/apiconverttexttospeechusingcharacterrequest.js +48 -3
  25. package/dist/commonjs/models/apiconverttexttospeechusingcharacterrequest.js.map +1 -1
  26. package/dist/commonjs/models/predictttsdurationusingcharacterrequest.d.ts +92 -1
  27. package/dist/commonjs/models/predictttsdurationusingcharacterrequest.d.ts.map +1 -1
  28. package/dist/commonjs/models/predictttsdurationusingcharacterrequest.js +46 -3
  29. package/dist/commonjs/models/predictttsdurationusingcharacterrequest.js.map +1 -1
  30. package/dist/commonjs/sdk/texttospeech.d.ts +17 -6
  31. package/dist/commonjs/sdk/texttospeech.d.ts.map +1 -1
  32. package/dist/commonjs/sdk/texttospeech.js +48 -25
  33. package/dist/commonjs/sdk/texttospeech.js.map +1 -1
  34. package/dist/esm/lib/config.d.ts +2 -2
  35. package/dist/esm/lib/config.d.ts.map +1 -1
  36. package/dist/esm/lib/config.js +2 -2
  37. package/dist/esm/lib/config.js.map +1 -1
  38. package/dist/esm/lib/custom_utils/index.d.ts +1 -0
  39. package/dist/esm/lib/custom_utils/index.d.ts.map +1 -1
  40. package/dist/esm/lib/custom_utils/index.js +2 -0
  41. package/dist/esm/lib/custom_utils/index.js.map +1 -1
  42. package/dist/esm/lib/custom_utils/pronunciation_utils.d.ts +24 -0
  43. package/dist/esm/lib/custom_utils/pronunciation_utils.d.ts.map +1 -0
  44. package/dist/esm/lib/custom_utils/pronunciation_utils.js +140 -0
  45. package/dist/esm/lib/custom_utils/pronunciation_utils.js.map +1 -0
  46. package/dist/esm/lib/custom_utils/text_utils.d.ts +8 -1
  47. package/dist/esm/lib/custom_utils/text_utils.d.ts.map +1 -1
  48. package/dist/esm/lib/custom_utils/text_utils.js +125 -7
  49. package/dist/esm/lib/custom_utils/text_utils.js.map +1 -1
  50. package/dist/esm/models/apiconverttexttospeechusingcharacterrequest.d.ts +92 -1
  51. package/dist/esm/models/apiconverttexttospeechusingcharacterrequest.d.ts.map +1 -1
  52. package/dist/esm/models/apiconverttexttospeechusingcharacterrequest.js +47 -2
  53. package/dist/esm/models/apiconverttexttospeechusingcharacterrequest.js.map +1 -1
  54. package/dist/esm/models/predictttsdurationusingcharacterrequest.d.ts +92 -1
  55. package/dist/esm/models/predictttsdurationusingcharacterrequest.d.ts.map +1 -1
  56. package/dist/esm/models/predictttsdurationusingcharacterrequest.js +45 -2
  57. package/dist/esm/models/predictttsdurationusingcharacterrequest.js.map +1 -1
  58. package/dist/esm/sdk/texttospeech.d.ts +17 -6
  59. package/dist/esm/sdk/texttospeech.d.ts.map +1 -1
  60. package/dist/esm/sdk/texttospeech.js +49 -26
  61. package/dist/esm/sdk/texttospeech.js.map +1 -1
  62. package/examples/custom_voices/create_cloned_voice.ts +4 -3
  63. package/examples/custom_voices/delete_custom_voice.ts +2 -7
  64. package/examples/custom_voices/edit_custom_voice.ts +2 -6
  65. package/examples/custom_voices/get_custom_voice.ts +2 -7
  66. package/examples/custom_voices/list_custom_voices.ts +2 -7
  67. package/examples/custom_voices/search_custom_voices.ts +2 -6
  68. package/examples/text_to_speech/create_speech.ts +3 -8
  69. package/examples/text_to_speech/create_speech_long_text.ts +3 -7
  70. package/examples/text_to_speech/create_speech_with_phonemes.ts +3 -7
  71. package/examples/text_to_speech/create_speech_with_voice_settings.ts +3 -8
  72. package/examples/text_to_speech/predict_duration.ts +3 -7
  73. package/examples/text_to_speech/stream_speech.ts +3 -7
  74. package/examples/text_to_speech/stream_speech_long_text.ts +3 -7
  75. package/examples/text_to_speech/stream_speech_with_phonemes.ts +3 -7
  76. package/examples/text_to_speech/stream_speech_with_voice_settings.ts +3 -7
  77. package/examples/usage/get_credit_balance.ts +2 -6
  78. package/examples/usage/get_usage.ts +2 -6
  79. package/examples/usage/get_voice_usage.ts +2 -7
  80. package/examples/voices/get_voice.ts +2 -6
  81. package/examples/voices/list_voices.ts +2 -6
  82. package/examples/voices/search_voices.ts +2 -7
  83. package/jsr.json +1 -1
  84. package/openapi.json +101 -9
  85. package/package.json +1 -1
  86. package/src/lib/config.ts +41 -41
  87. package/src/lib/custom_utils/index.ts +7 -0
  88. package/src/lib/custom_utils/pronunciation_utils.ts +193 -0
  89. package/src/lib/custom_utils/text_utils.ts +138 -7
  90. package/src/models/apiconverttexttospeechusingcharacterrequest.ts +62 -3
  91. package/src/models/predictttsdurationusingcharacterrequest.ts +64 -3
  92. package/src/sdk/texttospeech.ts +99 -68
@@ -0,0 +1,55 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Smoke test for multilingual sentence punctuation splitting in chunkText().
4
+ *
5
+ * Run:
6
+ * npx ts-node custom_test/test_text_utils_chunk_text_punctuation.ts
7
+ * # or after build:
8
+ * node dist/custom_test/test_text_utils_chunk_text_punctuation.js
9
+ */
10
+
11
+ import { chunkText } from "../src/lib/custom_utils/text_utils.js";
12
+
13
+ function assertSplits(
14
+ text: string,
15
+ expectedChunks: string[],
16
+ maxLength: number
17
+ ): void {
18
+ const got = chunkText(text, maxLength);
19
+ const passed = JSON.stringify(got) === JSON.stringify(expectedChunks);
20
+
21
+ if (!passed) {
22
+ throw new Error(
23
+ `\ntext=${JSON.stringify(text)}\nexpected=${JSON.stringify(expectedChunks)}\ngot=${JSON.stringify(got)}`
24
+ );
25
+ }
26
+ }
27
+
28
+ function main(): void {
29
+ // English / many EU languages
30
+ assertSplits("Hello. World!", ["Hello. ", "World!"], 8);
31
+
32
+ // Korean (mostly ASCII punctuation in practice, plus ellipsis)
33
+ assertSplits("안...반가… 네.", ["안...", "반가… ", "네."], 4);
34
+
35
+ // Japanese
36
+ assertSplits(
37
+ "こんにちは。元気ですか?はい!",
38
+ ["こんにちは。", "元気ですか?", "はい!"],
39
+ 6
40
+ );
41
+
42
+ // Arabic (short samples to avoid max_length merge issues)
43
+ assertSplits("مر؟ نعم۔", ["مر؟ ", "نعم۔"], 5);
44
+
45
+ // Hindi
46
+ assertSplits("हाँ। नहीं॥", ["हाँ। ", "नहीं॥"], 6);
47
+
48
+ // Greek question mark (U+037E)
49
+ assertSplits("Γεια;Καλά.", ["Γεια;", "Καλά."], 5);
50
+
51
+ console.log("OK: chunkText punctuation smoke test passed");
52
+ }
53
+
54
+ main();
55
+
@@ -31,8 +31,8 @@ export declare function serverURLFromOptions(options: SDKOptions): URL | null;
31
31
  export declare const SDK_METADATA: {
32
32
  readonly language: "typescript";
33
33
  readonly openapiDocVersion: "0.8.69";
34
- readonly sdkVersion: "0.1.1";
34
+ readonly sdkVersion: "0.1.3";
35
35
  readonly genVersion: "2.686.7";
36
- readonly userAgent: "speakeasy-sdk/typescript 0.1.1 2.686.7 0.8.69 @supertone/supertone";
36
+ readonly userAgent: "speakeasy-sdk/typescript 0.1.2 2.686.7 0.8.69 @supertone/supertone";
37
37
  };
38
38
  //# sourceMappingURL=config.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../../src/lib/config.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAG3C;;GAEG;AACH,eAAO,MAAM,UAAU,uCAKb,CAAC;AAEX,MAAM,MAAM,UAAU,GAAG;IACvB,MAAM,CAAC,EAAE,MAAM,GAAG,CAAC,MAAM,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,SAAS,CAAC;IAEtD,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;OAEG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB,CAAC;AAEF,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,UAAU,GAAG,GAAG,GAAG,IAAI,CAepE;AAED,eAAO,MAAM,YAAY;;;;;;CAOf,CAAC"}
1
+ {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../../src/lib/config.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAG3C;;GAEG;AACH,eAAO,MAAM,UAAU,uCAKb,CAAC;AAEX,MAAM,MAAM,UAAU,GAAG;IACxB,MAAM,CAAC,EAAE,MAAM,GAAG,CAAC,MAAM,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,SAAS,CAAC;IAEtD,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;OAEG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,UAAU,GAAG,GAAG,GAAG,IAAI,CAepE;AAED,eAAO,MAAM,YAAY;;;;;;CAOf,CAAC"}
@@ -31,8 +31,8 @@ function serverURLFromOptions(options) {
31
31
  exports.SDK_METADATA = {
32
32
  language: "typescript",
33
33
  openapiDocVersion: "0.8.69",
34
- sdkVersion: "0.1.1",
34
+ sdkVersion: "0.1.3",
35
35
  genVersion: "2.686.7",
36
- userAgent: "speakeasy-sdk/typescript 0.1.1 2.686.7 0.8.69 @supertone/supertone",
36
+ userAgent: "speakeasy-sdk/typescript 0.1.2 2.686.7 0.8.69 @supertone/supertone",
37
37
  };
38
38
  //# sourceMappingURL=config.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"config.js","sourceRoot":"","sources":["../../../src/lib/config.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAyCH,oDAeC;AAnDD,qCAA8C;AAE9C;;GAEG;AACU,QAAA,UAAU,GAAG;IACxB;;OAEG;IACH,0BAA0B;CAClB,CAAC;AA0BX,SAAgB,oBAAoB,CAAC,OAAmB;IACtD,IAAI,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;IAElC,MAAM,MAAM,GAAW,EAAE,CAAC;IAE1B,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,CAAC,CAAC;QACzC,IAAI,SAAS,GAAG,CAAC,IAAI,SAAS,IAAI,kBAAU,CAAC,MAAM,EAAE,CAAC;YACpD,MAAM,IAAI,KAAK,CAAC,wBAAwB,SAAS,EAAE,CAAC,CAAC;QACvD,CAAC;QACD,SAAS,GAAG,kBAAU,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;IAC1C,CAAC;IAED,MAAM,CAAC,GAAG,IAAA,mBAAU,EAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC;IACxC,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;AACpB,CAAC;AAEY,QAAA,YAAY,GAAG;IAC1B,QAAQ,EAAE,YAAY;IACtB,iBAAiB,EAAE,QAAQ;IAC3B,UAAU,EAAE,OAAO;IACnB,UAAU,EAAE,SAAS;IACrB,SAAS,EACP,oEAAoE;CAC9D,CAAC"}
1
+ {"version":3,"file":"config.js","sourceRoot":"","sources":["../../../src/lib/config.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAyCH,oDAeC;AAnDD,qCAA8C;AAE9C;;GAEG;AACU,QAAA,UAAU,GAAG;IACzB;;OAEG;IACH,0BAA0B;CACjB,CAAC;AA0BX,SAAgB,oBAAoB,CAAC,OAAmB;IACvD,IAAI,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;IAElC,MAAM,MAAM,GAAW,EAAE,CAAC;IAE1B,IAAI,CAAC,SAAS,EAAE,CAAC;QAChB,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,CAAC,CAAC;QACzC,IAAI,SAAS,GAAG,CAAC,IAAI,SAAS,IAAI,kBAAU,CAAC,MAAM,EAAE,CAAC;YACrD,MAAM,IAAI,KAAK,CAAC,wBAAwB,SAAS,EAAE,CAAC,CAAC;QACtD,CAAC;QACD,SAAS,GAAG,kBAAU,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;IACzC,CAAC;IAED,MAAM,CAAC,GAAG,IAAA,mBAAU,EAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC;IACxC,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;AACnB,CAAC;AAEY,QAAA,YAAY,GAAG;IAC3B,QAAQ,EAAE,YAAY;IACtB,iBAAiB,EAAE,QAAQ;IAC3B,UAAU,EAAE,OAAO;IACnB,UAAU,EAAE,SAAS;IACrB,SAAS,EACR,oEAAoE;CAC5D,CAAC"}
@@ -6,6 +6,7 @@
6
6
  */
7
7
  export * from "./constants.js";
8
8
  export { chunkText, extractAudioFromNdjson } from "./text_utils.js";
9
+ export { applyPronunciationDictionary, PronunciationDictionaryValidationError, type PronunciationDictionaryEntry, } from "./pronunciation_utils.js";
9
10
  export { mergeWavBinary, mergeMp3Binary, removeWavHeader, removeMp3Header, detectAudioFormat, extractAudioFromResponse, extractAudioFromResponses, } from "./audio_utils.js";
10
11
  export { mergePhonemeData, adjustPhonemeTiming, createEmptyPhonemeDict, type PhonemeData, } from "./phoneme_utils.js";
11
12
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/lib/custom_utils/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,cAAc,gBAAgB,CAAC;AAG/B,OAAO,EAAE,SAAS,EAAE,sBAAsB,EAAE,MAAM,iBAAiB,CAAC;AAGpE,OAAO,EACN,cAAc,EACd,cAAc,EACd,eAAe,EACf,eAAe,EACf,iBAAiB,EACjB,wBAAwB,EACxB,yBAAyB,GACzB,MAAM,kBAAkB,CAAC;AAG1B,OAAO,EACN,gBAAgB,EAChB,mBAAmB,EACnB,sBAAsB,EACtB,KAAK,WAAW,GAChB,MAAM,oBAAoB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/lib/custom_utils/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,cAAc,gBAAgB,CAAC;AAG/B,OAAO,EAAE,SAAS,EAAE,sBAAsB,EAAE,MAAM,iBAAiB,CAAC;AAGpE,OAAO,EACN,4BAA4B,EAC5B,sCAAsC,EACtC,KAAK,4BAA4B,GACjC,MAAM,0BAA0B,CAAC;AAGlC,OAAO,EACN,cAAc,EACd,cAAc,EACd,eAAe,EACf,eAAe,EACf,iBAAiB,EACjB,wBAAwB,EACxB,yBAAyB,GACzB,MAAM,kBAAkB,CAAC;AAG1B,OAAO,EACN,gBAAgB,EAChB,mBAAmB,EACnB,sBAAsB,EACtB,KAAK,WAAW,GAChB,MAAM,oBAAoB,CAAC"}
@@ -20,13 +20,17 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
20
20
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
21
21
  };
22
22
  Object.defineProperty(exports, "__esModule", { value: true });
23
- exports.createEmptyPhonemeDict = exports.adjustPhonemeTiming = exports.mergePhonemeData = exports.extractAudioFromResponses = exports.extractAudioFromResponse = exports.detectAudioFormat = exports.removeMp3Header = exports.removeWavHeader = exports.mergeMp3Binary = exports.mergeWavBinary = exports.extractAudioFromNdjson = exports.chunkText = void 0;
23
+ exports.createEmptyPhonemeDict = exports.adjustPhonemeTiming = exports.mergePhonemeData = exports.extractAudioFromResponses = exports.extractAudioFromResponse = exports.detectAudioFormat = exports.removeMp3Header = exports.removeWavHeader = exports.mergeMp3Binary = exports.mergeWavBinary = exports.PronunciationDictionaryValidationError = exports.applyPronunciationDictionary = exports.extractAudioFromNdjson = exports.chunkText = void 0;
24
24
  // Export all constants
25
25
  __exportStar(require("./constants.js"), exports);
26
26
  // Export text utilities
27
27
  var text_utils_js_1 = require("./text_utils.js");
28
28
  Object.defineProperty(exports, "chunkText", { enumerable: true, get: function () { return text_utils_js_1.chunkText; } });
29
29
  Object.defineProperty(exports, "extractAudioFromNdjson", { enumerable: true, get: function () { return text_utils_js_1.extractAudioFromNdjson; } });
30
+ // Export pronunciation utilities
31
+ var pronunciation_utils_js_1 = require("./pronunciation_utils.js");
32
+ Object.defineProperty(exports, "applyPronunciationDictionary", { enumerable: true, get: function () { return pronunciation_utils_js_1.applyPronunciationDictionary; } });
33
+ Object.defineProperty(exports, "PronunciationDictionaryValidationError", { enumerable: true, get: function () { return pronunciation_utils_js_1.PronunciationDictionaryValidationError; } });
30
34
  // Export audio utilities
31
35
  var audio_utils_js_1 = require("./audio_utils.js");
32
36
  Object.defineProperty(exports, "mergeWavBinary", { enumerable: true, get: function () { return audio_utils_js_1.mergeWavBinary; } });
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/lib/custom_utils/index.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;;;;;;;;;;;;;;;;AAEH,uBAAuB;AACvB,iDAA+B;AAE/B,wBAAwB;AACxB,iDAAoE;AAA3D,0GAAA,SAAS,OAAA;AAAE,uHAAA,sBAAsB,OAAA;AAE1C,yBAAyB;AACzB,mDAQ0B;AAPzB,gHAAA,cAAc,OAAA;AACd,gHAAA,cAAc,OAAA;AACd,iHAAA,eAAe,OAAA;AACf,iHAAA,eAAe,OAAA;AACf,mHAAA,iBAAiB,OAAA;AACjB,0HAAA,wBAAwB,OAAA;AACxB,2HAAA,yBAAyB,OAAA;AAG1B,2BAA2B;AAC3B,uDAK4B;AAJ3B,oHAAA,gBAAgB,OAAA;AAChB,uHAAA,mBAAmB,OAAA;AACnB,0HAAA,sBAAsB,OAAA"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/lib/custom_utils/index.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;;;;;;;;;;;;;;;;AAEH,uBAAuB;AACvB,iDAA+B;AAE/B,wBAAwB;AACxB,iDAAoE;AAA3D,0GAAA,SAAS,OAAA;AAAE,uHAAA,sBAAsB,OAAA;AAE1C,iCAAiC;AACjC,mEAIkC;AAHjC,sIAAA,4BAA4B,OAAA;AAC5B,gJAAA,sCAAsC,OAAA;AAIvC,yBAAyB;AACzB,mDAQ0B;AAPzB,gHAAA,cAAc,OAAA;AACd,gHAAA,cAAc,OAAA;AACd,iHAAA,eAAe,OAAA;AACf,iHAAA,eAAe,OAAA;AACf,mHAAA,iBAAiB,OAAA;AACjB,0HAAA,wBAAwB,OAAA;AACxB,2HAAA,yBAAyB,OAAA;AAG1B,2BAA2B;AAC3B,uDAK4B;AAJ3B,oHAAA,gBAAgB,OAAA;AAChB,uHAAA,mBAAmB,OAAA;AACnB,0HAAA,sBAAsB,OAAA"}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Pronunciation dictionary substitution utilities.
3
+ *
4
+ * Mirrors the Python implementation policy:
5
+ * - Apply rules in input order
6
+ * - partial_match=false: word-boundary exact matches only
7
+ * - partial_match=true: substring matches (no boundaries)
8
+ * - No re-substitution: replaced segments are shielded via opaque tokens
9
+ *
10
+ * Validation:
11
+ * - pronunciation_dictionary omitted/undefined/null -> return original text
12
+ * - pronunciation_dictionary must be an array of objects
13
+ * - each object must have: text (string, non-empty), pronunciation (string, non-empty), partial_match (boolean)
14
+ */
15
+ export declare class PronunciationDictionaryValidationError extends Error {
16
+ constructor(message: string);
17
+ }
18
+ export type PronunciationDictionaryEntry = {
19
+ text: string;
20
+ pronunciation: string;
21
+ partial_match: boolean;
22
+ };
23
+ export declare function applyPronunciationDictionary(text: string, pronunciation_dictionary?: unknown): string;
24
+ //# sourceMappingURL=pronunciation_utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pronunciation_utils.d.ts","sourceRoot":"","sources":["../../../../src/lib/custom_utils/pronunciation_utils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,qBAAa,sCAAuC,SAAQ,KAAK;gBACnD,OAAO,EAAE,MAAM;CAI5B;AAED,MAAM,MAAM,4BAA4B,GAAG;IACzC,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,wBAAgB,4BAA4B,CAC1C,IAAI,EAAE,MAAM,EACZ,wBAAwB,CAAC,EAAE,OAAO,GACjC,MAAM,CAuER"}
@@ -0,0 +1,145 @@
1
+ "use strict";
2
+ /**
3
+ * Pronunciation dictionary substitution utilities.
4
+ *
5
+ * Mirrors the Python implementation policy:
6
+ * - Apply rules in input order
7
+ * - partial_match=false: word-boundary exact matches only
8
+ * - partial_match=true: substring matches (no boundaries)
9
+ * - No re-substitution: replaced segments are shielded via opaque tokens
10
+ *
11
+ * Validation:
12
+ * - pronunciation_dictionary omitted/undefined/null -> return original text
13
+ * - pronunciation_dictionary must be an array of objects
14
+ * - each object must have: text (string, non-empty), pronunciation (string, non-empty), partial_match (boolean)
15
+ */
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.PronunciationDictionaryValidationError = void 0;
18
+ exports.applyPronunciationDictionary = applyPronunciationDictionary;
19
+ class PronunciationDictionaryValidationError extends Error {
20
+ constructor(message) {
21
+ super(message);
22
+ this.name = "PronunciationDictionaryValidationError";
23
+ }
24
+ }
25
+ exports.PronunciationDictionaryValidationError = PronunciationDictionaryValidationError;
26
+ function applyPronunciationDictionary(text, pronunciation_dictionary) {
27
+ // Match Python behavior: return early for null, undefined, or empty array
28
+ if (pronunciation_dictionary == null ||
29
+ (Array.isArray(pronunciation_dictionary) &&
30
+ pronunciation_dictionary.length === 0)) {
31
+ return text;
32
+ }
33
+ if (typeof text !== "string") {
34
+ throw new PronunciationDictionaryValidationError(`\`text\` must be string, got ${typeof text}`);
35
+ }
36
+ if (!Array.isArray(pronunciation_dictionary)) {
37
+ throw new PronunciationDictionaryValidationError("`pronunciation_dictionary` must be an array of objects");
38
+ }
39
+ // Prevent re-substitution:
40
+ // replace matches with unique opaque tokens first,
41
+ // then expand tokens to pronunciations at the end.
42
+ const tokenToPronunciation = new Map();
43
+ let working = text;
44
+ for (let idx = 0; idx < pronunciation_dictionary.length; idx++) {
45
+ const entry = validateEntry(pronunciation_dictionary[idx], idx);
46
+ const src = entry.text;
47
+ const dst = entry.pronunciation;
48
+ const partial = entry.partial_match;
49
+ const token = makeUniqueToken(idx, working, tokenToPronunciation);
50
+ if (partial) {
51
+ const re = new RegExp(escapeRegExp(src), "g");
52
+ const newWorking = working.replace(re, token);
53
+ if (newWorking === working)
54
+ continue; // No match found
55
+ tokenToPronunciation.set(token, dst);
56
+ working = newWorking;
57
+ continue;
58
+ }
59
+ // Exact match with word-boundary semantics (Unicode-aware-ish).
60
+ // Python uses Unicode \w; in JS, \w is ASCII-only. To mirror behavior better across scripts,
61
+ // we define "word char" as: letter or number or underscore.
62
+ //
63
+ // We avoid lookbehind for broader runtime compatibility by capturing the left boundary.
64
+ //
65
+ // Pattern: (^|[^WORD_CHARS]) (SRC) (?=[^WORD_CHARS]|$)
66
+ // (IMPORTANT) WORD_CHARS must not include surrounding [] because we embed it into other [].
67
+ const WORD_CHARS = "\\p{L}\\p{N}_";
68
+ const srcEsc = escapeRegExp(src);
69
+ const pattern = `(^|[^${WORD_CHARS}])(${srcEsc})(?=[^${WORD_CHARS}]|$)`;
70
+ const re = new RegExp(pattern, "gu");
71
+ // Replace keeping the left boundary (group 1)
72
+ const newWorking = working.replace(re, `$1${token}`);
73
+ if (newWorking === working)
74
+ continue; // No match found
75
+ tokenToPronunciation.set(token, dst);
76
+ working = newWorking;
77
+ }
78
+ // Expand tokens into pronunciations.
79
+ for (const [token, pron] of tokenToPronunciation.entries()) {
80
+ working = working.split(token).join(pron);
81
+ }
82
+ return working;
83
+ }
84
+ function validateEntry(raw, idx) {
85
+ if (raw == null || typeof raw !== "object" || Array.isArray(raw)) {
86
+ throw new PronunciationDictionaryValidationError(`pronunciation_dictionary[${idx}] must be an object, got ${raw === null ? "null" : Array.isArray(raw) ? "array" : typeof raw}`);
87
+ }
88
+ const obj = raw;
89
+ const missing = [];
90
+ if (!("text" in obj))
91
+ missing.push("text");
92
+ if (!("pronunciation" in obj))
93
+ missing.push("pronunciation");
94
+ if (!("partial_match" in obj))
95
+ missing.push("partial_match");
96
+ if (missing.length) {
97
+ throw new PronunciationDictionaryValidationError(`pronunciation_dictionary[${idx}] missing required field(s): ${missing.join(", ")}`);
98
+ }
99
+ const src = obj["text"];
100
+ const dst = obj["pronunciation"];
101
+ const partial = obj["partial_match"];
102
+ if (typeof src !== "string") {
103
+ throw new PronunciationDictionaryValidationError(`pronunciation_dictionary[${idx}].text must be string, got ${typeof src}`);
104
+ }
105
+ if (typeof dst !== "string") {
106
+ throw new PronunciationDictionaryValidationError(`pronunciation_dictionary[${idx}].pronunciation must be string, got ${typeof dst}`);
107
+ }
108
+ if (typeof partial !== "boolean") {
109
+ throw new PronunciationDictionaryValidationError(`pronunciation_dictionary[${idx}].partial_match must be boolean, got ${typeof partial}`);
110
+ }
111
+ if (src === "") {
112
+ throw new PronunciationDictionaryValidationError(`pronunciation_dictionary[${idx}].text must not be empty`);
113
+ }
114
+ if (dst === "") {
115
+ throw new PronunciationDictionaryValidationError(`pronunciation_dictionary[${idx}].pronunciation must not be empty`);
116
+ }
117
+ return { text: src, pronunciation: dst, partial_match: partial };
118
+ }
119
+ function escapeRegExp(s) {
120
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
121
+ }
122
+ function makeUniqueToken(idx, working, existing) {
123
+ // Private Use Area markers to minimize collision with typical text.
124
+ const base = `\uE000PD${idx}\uE001`;
125
+ if (!working.includes(base) && !existing.has(base))
126
+ return base;
127
+ while (true) {
128
+ const suffix = safeRandomHex();
129
+ const token = `\uE000PD${idx}_${suffix}\uE001`;
130
+ if (!working.includes(token) && !existing.has(token))
131
+ return token;
132
+ }
133
+ }
134
+ function safeRandomHex() {
135
+ // Prefer crypto.randomUUID when available (browser / modern runtimes)
136
+ const c = globalThis.crypto;
137
+ if (c && typeof c.randomUUID === "function") {
138
+ return String(c.randomUUID()).replace(/-/g, "");
139
+ }
140
+ // Fallback: not cryptographically strong, but fine for uniqueness tokenization.
141
+ return (Math.random().toString(16).slice(2) +
142
+ Math.random().toString(16).slice(2) +
143
+ Date.now().toString(16));
144
+ }
145
+ //# sourceMappingURL=pronunciation_utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pronunciation_utils.js","sourceRoot":"","sources":["../../../../src/lib/custom_utils/pronunciation_utils.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;GAaG;;;AAeH,oEA0EC;AAvFD,MAAa,sCAAuC,SAAQ,KAAK;IAC/D,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,wCAAwC,CAAC;IACvD,CAAC;CACF;AALD,wFAKC;AAQD,SAAgB,4BAA4B,CAC1C,IAAY,EACZ,wBAAkC;IAElC,0EAA0E;IAC1E,IACE,wBAAwB,IAAI,IAAI;QAChC,CAAC,KAAK,CAAC,OAAO,CAAC,wBAAwB,CAAC;YACtC,wBAAwB,CAAC,MAAM,KAAK,CAAC,CAAC,EACxC,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QAC7B,MAAM,IAAI,sCAAsC,CAC9C,gCAAgC,OAAO,IAAI,EAAE,CAC9C,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,wBAAwB,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,sCAAsC,CAC9C,wDAAwD,CACzD,CAAC;IACJ,CAAC;IAED,2BAA2B;IAC3B,mDAAmD;IACnD,mDAAmD;IACnD,MAAM,oBAAoB,GAAG,IAAI,GAAG,EAAkB,CAAC;IACvD,IAAI,OAAO,GAAG,IAAI,CAAC;IAEnB,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,wBAAwB,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;QAC/D,MAAM,KAAK,GAAG,aAAa,CAAC,wBAAwB,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;QAChE,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC;QACvB,MAAM,GAAG,GAAG,KAAK,CAAC,aAAa,CAAC;QAChC,MAAM,OAAO,GAAG,KAAK,CAAC,aAAa,CAAC;QAEpC,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,EAAE,OAAO,EAAE,oBAAoB,CAAC,CAAC;QAElE,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;YAC9C,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;YAC9C,IAAI,UAAU,KAAK,OAAO;gBAAE,SAAS,CAAC,iBAAiB;YACvD,oBAAoB,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YACrC,OAAO,GAAG,UAAU,CAAC;YACrB,SAAS;QACX,CAAC;QAED,gEAAgE;QAChE,6FAA6F;QAC7F,4DAA4D;QAC5D,EAAE;QACF,wFAAwF;QACxF,EAAE;QACF,uDAAuD;QACvD,4FAA4F;QAC5F,MAAM,UAAU,GAAG,eAAe,CAAC;QACnC,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;QACjC,MAAM,OAAO,GAAG,QAAQ,UAAU,MAAM,MAAM,SAAS,UAAU,MAAM,CAAC;QACxE,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QAErC,8CAA8C;QAC9C,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,EAAE,KAAK,KAAK,EAAE,CAAC,CAAC;QACrD,IAAI,UAAU,KAAK,OAAO;YAAE,SAAS,CAAC,iBAAiB;QACvD,oBAAoB,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QACrC,OAAO,GAAG,UAAU,CAAC;IACvB,CAAC;IAED,qCAAqC;IACrC,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,oBAAoB,CAAC,OAAO,EAAE,EAAE,CAAC;QAC3D,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CAAC,GAAY,EAAE,GAAW;IAC9C,IAAI,GAAG,IAAI,IAAI,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QACjE,MAAM,IAAI,sCAAsC,CAC9C,4BAA4B,GAAG,4BAC7B,GAAG,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,GAChE,EAAE,CACH,CAAC;IACJ,CAAC;IAED,MAAM,GAAG,GAAG,GAA8B,CAAC;IAC3C,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,IAAI,CAAC,CAAC,MAAM,IAAI,GAAG,CAAC;QAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC3C,IAAI,CAAC,CAAC,eAAe,IAAI,GAAG,CAAC;QAAE,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAC7D,IAAI,CAAC,CAAC,eAAe,IAAI,GAAG,CAAC;QAAE,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAC7D,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,MAAM,IAAI,sCAAsC,CAC9C,4BAA4B,GAAG,gCAAgC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CACpF,CAAC;IACJ,CAAC;IAED,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;IACxB,MAAM,GAAG,GAAG,GAAG,CAAC,eAAe,CAAC,CAAC;IACjC,MAAM,OAAO,GAAG,GAAG,CAAC,eAAe,CAAC,CAAC;IAErC,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QAC5B,MAAM,IAAI,sCAAsC,CAC9C,4BAA4B,GAAG,8BAA8B,OAAO,GAAG,EAAE,CAC1E,CAAC;IACJ,CAAC;IACD,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QAC5B,MAAM,IAAI,sCAAsC,CAC9C,4BAA4B,GAAG,uCAAuC,OAAO,GAAG,EAAE,CACnF,CAAC;IACJ,CAAC;IACD,IAAI,OAAO,OAAO,KAAK,SAAS,EAAE,CAAC;QACjC,MAAM,IAAI,sCAAsC,CAC9C,4BAA4B,GAAG,wCAAwC,OAAO,OAAO,EAAE,CACxF,CAAC;IACJ,CAAC;IAED,IAAI,GAAG,KAAK,EAAE,EAAE,CAAC;QACf,MAAM,IAAI,sCAAsC,CAC9C,4BAA4B,GAAG,0BAA0B,CAC1D,CAAC;IACJ,CAAC;IACD,IAAI,GAAG,KAAK,EAAE,EAAE,CAAC;QACf,MAAM,IAAI,sCAAsC,CAC9C,4BAA4B,GAAG,mCAAmC,CACnE,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,GAAG,EAAE,aAAa,EAAE,GAAG,EAAE,aAAa,EAAE,OAAO,EAAE,CAAC;AACnE,CAAC;AAED,SAAS,YAAY,CAAC,CAAS;IAC7B,OAAO,CAAC,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;AAClD,CAAC;AAED,SAAS,eAAe,CACtB,GAAW,EACX,OAAe,EACf,QAA6B;IAE7B,oEAAoE;IACpE,MAAM,IAAI,GAAG,WAAW,GAAG,QAAQ,CAAC;IACpC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAEhE,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,aAAa,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,WAAW,GAAG,IAAI,MAAM,QAAQ,CAAC;QAC/C,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC;YAAE,OAAO,KAAK,CAAC;IACrE,CAAC;AACH,CAAC;AAED,SAAS,aAAa;IACpB,sEAAsE;IACtE,MAAM,CAAC,GAAI,UAAkB,CAAC,MAAM,CAAC;IACrC,IAAI,CAAC,IAAI,OAAO,CAAC,CAAC,UAAU,KAAK,UAAU,EAAE,CAAC;QAC5C,OAAO,MAAM,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAClD,CAAC;IACD,gFAAgF;IAChF,OAAO,CACL,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;QACnC,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;QACnC,IAAI,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CACxB,CAAC;AACJ,CAAC"}
@@ -12,9 +12,16 @@
12
12
  * It handles various punctuation patterns and provides graceful fallback to
13
13
  * word/character boundaries when necessary.
14
14
  *
15
+ * Chunking Strategy:
16
+ * 1. First, split by sentence boundaries (multilingual punctuation)
17
+ * 2. Merge sentences into chunks up to maxLength
18
+ * 3. If a sentence exceeds maxLength:
19
+ * - For text with spaces: split by words
20
+ * - For text without spaces (Japanese, etc.): split by characters
21
+ *
15
22
  * @param text - Input text to be segmented
16
23
  * @param maxLength - Maximum length of each chunk
17
- * @returns Array of text chunks
24
+ * @returns Array of text chunks, each guaranteed to be <= maxLength
18
25
  */
19
26
  export declare function chunkText(text: string, maxLength?: number): string[];
20
27
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"text_utils.d.ts","sourceRoot":"","sources":["../../../../src/lib/custom_utils/text_utils.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH;;;;;;;;;;;GAWG;AACH,wBAAgB,SAAS,CACxB,IAAI,EAAE,MAAM,EACZ,SAAS,GAAE,MAAgC,GACzC,MAAM,EAAE,CA2BV;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,MAAM,GAAG,UAAU,CA8BpE"}
1
+ {"version":3,"file":"text_utils.d.ts","sourceRoot":"","sources":["../../../../src/lib/custom_utils/text_utils.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAkHH;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,SAAS,CACxB,IAAI,EAAE,MAAM,EACZ,SAAS,GAAE,MAAgC,GACzC,MAAM,EAAE,CAyCV;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,MAAM,GAAG,UAAU,CA8BpE"}
@@ -9,6 +9,103 @@ Object.defineProperty(exports, "__esModule", { value: true });
9
9
  exports.chunkText = chunkText;
10
10
  exports.extractAudioFromNdjson = extractAudioFromNdjson;
11
11
  const constants_js_1 = require("./constants.js");
12
+ /**
13
+ * Sentence-ending punctuation pattern for multilingual support.
14
+ *
15
+ * Supported languages: English, Korean, Japanese, Bulgarian, Czech, Danish,
16
+ * Greek, Spanish, Estonian, Finnish, Hungarian, Italian, Dutch, Polish,
17
+ * Portuguese, Romanian, Arabic, German, French, Hindi, Indonesian, Russian,
18
+ * Vietnamese, Chinese, Thai, and more.
19
+ *
20
+ * Punctuation groups:
21
+ * - ASCII basics: . ! ? ; :
22
+ * - Ellipsis: … (U+2026), ‥ (U+2025)
23
+ * - CJK fullwidth: 。!?;:。、
24
+ * - Arabic/Urdu: ؟ ؛ ۔ ،
25
+ * - Devanagari (Hindi/Sanskrit): । ॥
26
+ * - Greek question mark: ; (U+037E)
27
+ */
28
+ const SENTENCE_PUNCTUATION = ".!?;:…‥。!?;:。、؟؛۔،।॥\u037E";
29
+ const SENTENCE_SPLIT_PATTERN = new RegExp(`([${SENTENCE_PUNCTUATION}]+\\s*)`, "u");
30
+ /**
31
+ * Check if text contains spaces (to determine if word-based splitting is possible)
32
+ *
33
+ * @param text - Text to check
34
+ * @returns true if text contains spaces
35
+ */
36
+ function hasSpaces(text) {
37
+ return /\s/.test(text);
38
+ }
39
+ /**
40
+ * Split text by words, ensuring each chunk is under maxLength.
41
+ * Used for languages with spaces (English, Korean, etc.)
42
+ *
43
+ * @param text - Text to split
44
+ * @param maxLength - Maximum length of each chunk
45
+ * @returns Array of text chunks
46
+ */
47
+ function splitByWords(text, maxLength) {
48
+ const words = text.split(/(\s+)/);
49
+ const chunks = [];
50
+ let currentChunk = "";
51
+ for (const word of words) {
52
+ if (currentChunk.length + word.length <= maxLength) {
53
+ currentChunk += word;
54
+ }
55
+ else {
56
+ if (currentChunk.trim()) {
57
+ chunks.push(currentChunk.trim());
58
+ }
59
+ // If a single word exceeds maxLength, split by characters
60
+ if (word.trim().length > maxLength) {
61
+ const charChunks = splitByCharacters(word.trim(), maxLength);
62
+ chunks.push(...charChunks);
63
+ currentChunk = "";
64
+ }
65
+ else {
66
+ currentChunk = word;
67
+ }
68
+ }
69
+ }
70
+ if (currentChunk.trim()) {
71
+ chunks.push(currentChunk.trim());
72
+ }
73
+ return chunks;
74
+ }
75
+ /**
76
+ * Split text by characters, ensuring each chunk is under maxLength.
77
+ * Used for languages without spaces (Japanese, Chinese, etc.)
78
+ *
79
+ * @param text - Text to split
80
+ * @param maxLength - Maximum length of each chunk
81
+ * @returns Array of text chunks
82
+ */
83
+ function splitByCharacters(text, maxLength) {
84
+ const chunks = [];
85
+ for (let i = 0; i < text.length; i += maxLength) {
86
+ chunks.push(text.slice(i, i + maxLength));
87
+ }
88
+ return chunks;
89
+ }
90
+ /**
91
+ * Split a single chunk that exceeds maxLength into smaller chunks.
92
+ * Uses word-based splitting for texts with spaces, character-based for texts without.
93
+ *
94
+ * @param chunk - Text chunk to split
95
+ * @param maxLength - Maximum length of each chunk
96
+ * @returns Array of text chunks, all under maxLength
97
+ */
98
+ function splitOversizedChunk(chunk, maxLength) {
99
+ if (chunk.length <= maxLength) {
100
+ return [chunk];
101
+ }
102
+ // Check if text has spaces (word-based splitting possible)
103
+ if (hasSpaces(chunk)) {
104
+ return splitByWords(chunk, maxLength);
105
+ }
106
+ // No spaces: use character-based splitting (Japanese, Chinese, etc.)
107
+ return splitByCharacters(chunk, maxLength);
108
+ }
12
109
  /**
13
110
  * Split input text into sentence chunks suitable for TTS processing.
14
111
  *
@@ -17,33 +114,54 @@ const constants_js_1 = require("./constants.js");
17
114
  * It handles various punctuation patterns and provides graceful fallback to
18
115
  * word/character boundaries when necessary.
19
116
  *
117
+ * Chunking Strategy:
118
+ * 1. First, split by sentence boundaries (multilingual punctuation)
119
+ * 2. Merge sentences into chunks up to maxLength
120
+ * 3. If a sentence exceeds maxLength:
121
+ * - For text with spaces: split by words
122
+ * - For text without spaces (Japanese, etc.): split by characters
123
+ *
20
124
  * @param text - Input text to be segmented
21
125
  * @param maxLength - Maximum length of each chunk
22
- * @returns Array of text chunks
126
+ * @returns Array of text chunks, each guaranteed to be <= maxLength
23
127
  */
24
128
  function chunkText(text, maxLength = constants_js_1.DEFAULT_MAX_TEXT_LENGTH) {
25
129
  if (text.length <= maxLength) {
26
130
  return [text];
27
131
  }
28
- // Split by sentence boundaries
29
- const sentences = text.split(/([.!?;:]+\s*)/);
30
- const chunks = [];
132
+ // Step 1: Split by sentence boundaries (multilingual punctuation)
133
+ const sentences = text.split(SENTENCE_SPLIT_PATTERN);
134
+ const preliminaryChunks = [];
31
135
  let currentChunk = "";
136
+ // Step 2: Merge sentences into chunks up to maxLength
32
137
  for (const sentence of sentences) {
33
138
  if (currentChunk.length + sentence.length <= maxLength) {
34
139
  currentChunk += sentence;
35
140
  }
36
141
  else {
37
142
  if (currentChunk) {
38
- chunks.push(currentChunk);
143
+ preliminaryChunks.push(currentChunk);
39
144
  }
40
145
  currentChunk = sentence;
41
146
  }
42
147
  }
43
148
  if (currentChunk) {
44
- chunks.push(currentChunk);
149
+ preliminaryChunks.push(currentChunk);
45
150
  }
46
- return chunks;
151
+ // Step 3: Handle oversized chunks (split by words or characters)
152
+ const finalChunks = [];
153
+ for (const chunk of preliminaryChunks) {
154
+ if (chunk.length <= maxLength) {
155
+ finalChunks.push(chunk);
156
+ }
157
+ else {
158
+ // Chunk exceeds maxLength, need to split further
159
+ const subChunks = splitOversizedChunk(chunk, maxLength);
160
+ finalChunks.push(...subChunks);
161
+ }
162
+ }
163
+ // Filter out empty chunks
164
+ return finalChunks.filter((chunk) => chunk.length > 0);
47
165
  }
48
166
  /**
49
167
  * Extract audio data from NDJSON response.
@@ -1 +1 @@
1
- {"version":3,"file":"text_utils.js","sourceRoot":"","sources":["../../../../src/lib/custom_utils/text_utils.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;AAgBH,8BA8BC;AAWD,wDA8BC;AArFD,iDAAyD;AAEzD;;;;;;;;;;;GAWG;AACH,SAAgB,SAAS,CACxB,IAAY,EACZ,YAAoB,sCAAuB;IAE3C,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC9B,OAAO,CAAC,IAAI,CAAC,CAAC;IACf,CAAC;IAED,+BAA+B;IAC/B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;IAE9C,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;IAEtB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QAClC,IAAI,YAAY,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;YACxD,YAAY,IAAI,QAAQ,CAAC;QAC1B,CAAC;aAAM,CAAC;YACP,IAAI,YAAY,EAAE,CAAC;gBAClB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAC3B,CAAC;YACD,YAAY,GAAG,QAAQ,CAAC;QACzB,CAAC;IACF,CAAC;IAED,IAAI,YAAY,EAAE,CAAC;QAClB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC3B,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;;;;;;;GAQG;AACH,SAAgB,sBAAsB,CAAC,SAAiB;IACvD,qCAAqC;IACrC,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QACnC,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,OAAO,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC9C,CAAC;IACF,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACZ,8CAA8C;IAC/C,CAAC;IAED,kCAAkC;IAClC,MAAM,KAAK,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,WAAW,GAAiB,EAAE,CAAC;IAErC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;YACjB,IAAI,CAAC;gBACJ,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC9B,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;oBACvB,WAAW,CAAC,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;gBACzD,CAAC;YACF,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACZ,SAAS;YACV,CAAC;QACF,CAAC;IACF,CAAC;IAED,mBAAmB;IACnB,OAAO,gBAAgB,CAAC,WAAW,CAAC,CAAC;AACtC,CAAC;AAED;;;;;GAKG;AACH,SAAS,kBAAkB,CAAC,MAAc;IACzC,+CAA+C;IAC/C,IAAI,OAAO,MAAM,KAAK,WAAW,EAAE,CAAC;QACnC,UAAU;QACV,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC;IACtD,CAAC;SAAM,CAAC;QACP,UAAU;QACV,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,KAAK,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC;QACD,OAAO,KAAK,CAAC;IACd,CAAC;AACF,CAAC;AAED;;;;;GAKG;AACH,SAAS,gBAAgB,CAAC,MAAoB;IAC7C,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACrE,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,WAAW,CAAC,CAAC;IAC3C,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QAC1B,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;QACxB,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC;IACtB,CAAC;IACD,OAAO,MAAM,CAAC;AACf,CAAC"}
1
+ {"version":3,"file":"text_utils.js","sourceRoot":"","sources":["../../../../src/lib/custom_utils/text_utils.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;AAqIH,8BA4CC;AAWD,wDA8BC;AAxND,iDAAyD;AAEzD;;;;;;;;;;;;;;;GAeG;AACH,MAAM,oBAAoB,GAAG,4BAA4B,CAAC;AAC1D,MAAM,sBAAsB,GAAG,IAAI,MAAM,CACxC,KAAK,oBAAoB,SAAS,EAClC,GAAG,CACH,CAAC;AAEF;;;;;GAKG;AACH,SAAS,SAAS,CAAC,IAAY;IAC9B,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACxB,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,YAAY,CAAC,IAAY,EAAE,SAAiB;IACpD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAClC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;IAEtB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,YAAY,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;YACpD,YAAY,IAAI,IAAI,CAAC;QACtB,CAAC;aAAM,CAAC;YACP,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;gBACzB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;YAClC,CAAC;YACD,0DAA0D;YAC1D,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,SAAS,EAAE,CAAC;gBACpC,MAAM,UAAU,GAAG,iBAAiB,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,SAAS,CAAC,CAAC;gBAC7D,MAAM,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;gBAC3B,YAAY,GAAG,EAAE,CAAC;YACnB,CAAC;iBAAM,CAAC;gBACP,YAAY,GAAG,IAAI,CAAC;YACrB,CAAC;QACF,CAAC;IACF,CAAC;IAED,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;QACzB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;IAClC,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,iBAAiB,CAAC,IAAY,EAAE,SAAiB;IACzD,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACjD,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC;IAC3C,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,mBAAmB,CAAC,KAAa,EAAE,SAAiB;IAC5D,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC/B,OAAO,CAAC,KAAK,CAAC,CAAC;IAChB,CAAC;IAED,2DAA2D;IAC3D,IAAI,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,YAAY,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IACvC,CAAC;IAED,qEAAqE;IACrE,OAAO,iBAAiB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;AAC5C,CAAC;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,SAAgB,SAAS,CACxB,IAAY,EACZ,YAAoB,sCAAuB;IAE3C,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC9B,OAAO,CAAC,IAAI,CAAC,CAAC;IACf,CAAC;IAED,kEAAkE;IAClE,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;IAErD,MAAM,iBAAiB,GAAa,EAAE,CAAC;IACvC,IAAI,YAAY,GAAG,EAAE,CAAC;IAEtB,sDAAsD;IACtD,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QAClC,IAAI,YAAY,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;YACxD,YAAY,IAAI,QAAQ,CAAC;QAC1B,CAAC;aAAM,CAAC;YACP,IAAI,YAAY,EAAE,CAAC;gBAClB,iBAAiB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YACtC,CAAC;YACD,YAAY,GAAG,QAAQ,CAAC;QACzB,CAAC;IACF,CAAC;IAED,IAAI,YAAY,EAAE,CAAC;QAClB,iBAAiB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACtC,CAAC;IAED,iEAAiE;IACjE,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,KAAK,MAAM,KAAK,IAAI,iBAAiB,EAAE,CAAC;QACvC,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;YAC/B,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACzB,CAAC;aAAM,CAAC;YACP,iDAAiD;YACjD,MAAM,SAAS,GAAG,mBAAmB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;YACxD,WAAW,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;QAChC,CAAC;IACF,CAAC;IAED,0BAA0B;IAC1B,OAAO,WAAW,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACxD,CAAC;AAED;;;;;;;;GAQG;AACH,SAAgB,sBAAsB,CAAC,SAAiB;IACvD,qCAAqC;IACrC,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QACnC,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,OAAO,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC9C,CAAC;IACF,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACZ,8CAA8C;IAC/C,CAAC;IAED,kCAAkC;IAClC,MAAM,KAAK,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,WAAW,GAAiB,EAAE,CAAC;IAErC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;YACjB,IAAI,CAAC;gBACJ,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC9B,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;oBACvB,WAAW,CAAC,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;gBACzD,CAAC;YACF,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACZ,SAAS;YACV,CAAC;QACF,CAAC;IACF,CAAC;IAED,mBAAmB;IACnB,OAAO,gBAAgB,CAAC,WAAW,CAAC,CAAC;AACtC,CAAC;AAED;;;;;GAKG;AACH,SAAS,kBAAkB,CAAC,MAAc;IACzC,+CAA+C;IAC/C,IAAI,OAAO,MAAM,KAAK,WAAW,EAAE,CAAC;QACnC,UAAU;QACV,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC;IACtD,CAAC;SAAM,CAAC;QACP,UAAU;QACV,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,KAAK,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC;QACD,OAAO,KAAK,CAAC;IACd,CAAC;AACF,CAAC;AAED;;;;;GAKG;AACH,SAAS,gBAAgB,CAAC,MAAoB;IAC7C,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACrE,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,WAAW,CAAC,CAAC;IAC3C,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QAC1B,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;QACxB,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC;IACtB,CAAC;IACD,OAAO,MAAM,CAAC;AACf,CAAC"}