refacil-sdd-ai 5.2.2 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/NOTICE.md +46 -0
  2. package/README.md +209 -42
  3. package/agents/auditor.md +46 -0
  4. package/agents/debugger.md +41 -1
  5. package/agents/implementer.md +76 -10
  6. package/agents/investigator.md +36 -0
  7. package/agents/proposer.md +46 -2
  8. package/agents/tester.md +45 -8
  9. package/agents/validator.md +67 -13
  10. package/bin/cli.js +428 -83
  11. package/bin/postinstall.js +20 -0
  12. package/lib/bus/broker.js +121 -3
  13. package/lib/bus/spawn.js +189 -121
  14. package/lib/check-review.js +102 -0
  15. package/lib/codegraph-telemetry.js +135 -0
  16. package/lib/codegraph.js +273 -0
  17. package/lib/commands/autopilot.js +120 -0
  18. package/lib/commands/bus.js +29 -36
  19. package/lib/commands/compact.js +185 -46
  20. package/lib/commands/read-spec.js +352 -0
  21. package/lib/commands/sdd.js +429 -44
  22. package/lib/compact-guidance.js +122 -77
  23. package/lib/config.js +136 -0
  24. package/lib/global-paths.js +56 -20
  25. package/lib/hooks.js +32 -4
  26. package/lib/ide-detection.js +1 -1
  27. package/lib/ignore-files.js +5 -1
  28. package/lib/installer.js +202 -19
  29. package/lib/kapso.js +241 -0
  30. package/lib/methodology-migration-pending.js +13 -0
  31. package/lib/open-browser.js +32 -0
  32. package/lib/opencode-migrate.js +148 -0
  33. package/lib/opencode-plugin/index.js +84 -104
  34. package/lib/opencode-plugin/rules.js +236 -0
  35. package/lib/project-root.js +154 -0
  36. package/lib/repo-ide-sync.js +5 -0
  37. package/lib/spec-reader/lang.js +72 -0
  38. package/lib/spec-reader/md-parser.js +299 -0
  39. package/lib/spec-reader/session.js +139 -0
  40. package/lib/spec-reader/ui/app.js +685 -0
  41. package/lib/spec-reader/ui/index.html +59 -0
  42. package/lib/spec-reader/ui/mixed-lang.js +200 -0
  43. package/lib/spec-reader/ui/model-cache.js +117 -0
  44. package/lib/spec-reader/ui/style.css +294 -0
  45. package/lib/spec-reader/ui/supertonic-helper.js +565 -0
  46. package/lib/spec-sync.js +258 -0
  47. package/lib/test-scope.js +713 -0
  48. package/lib/testing-policy-sync.js +14 -2
  49. package/package.json +6 -3
  50. package/skills/apply/SKILL.md +39 -64
  51. package/skills/archive/SKILL.md +74 -48
  52. package/skills/ask/SKILL.md +43 -8
  53. package/skills/autopilot/SKILL.md +476 -0
  54. package/skills/bug/SKILL.md +52 -53
  55. package/skills/explore/SKILL.md +48 -1
  56. package/skills/guide/SKILL.md +31 -13
  57. package/skills/inbox/SKILL.md +9 -0
  58. package/skills/join/SKILL.md +1 -1
  59. package/skills/prereqs/BUS-CROSS-REPO.md +33 -16
  60. package/skills/prereqs/METHODOLOGY-CONTRACT.md +96 -17
  61. package/skills/prereqs/SKILL.md +1 -1
  62. package/skills/propose/SKILL.md +74 -19
  63. package/skills/read-spec/SKILL.md +76 -0
  64. package/skills/reply/SKILL.md +42 -9
  65. package/skills/review/SKILL.md +63 -25
  66. package/skills/review/checklist.md +2 -2
  67. package/skills/say/SKILL.md +40 -4
  68. package/skills/setup/SKILL.md +59 -5
  69. package/skills/setup/troubleshooting.md +11 -3
  70. package/skills/stats/SKILL.md +157 -0
  71. package/skills/test/SKILL.md +35 -10
  72. package/skills/up-code/SKILL.md +20 -13
  73. package/skills/update/SKILL.md +32 -1
  74. package/skills/verify/SKILL.md +78 -41
  75. package/templates/compact-guidance.md +10 -0
  76. package/templates/methodology-guide.md +5 -0
@@ -0,0 +1,565 @@
1
+ import * as ort from 'onnxruntime-web';
2
+
3
+ // Available languages for multilingual TTS
4
+ export const AVAILABLE_LANGS = ['en', 'ko', 'ja', 'ar', 'bg', 'cs', 'da', 'de', 'el', 'es', 'et', 'fi', 'fr', 'hi', 'hr', 'hu', 'id', 'it', 'lt', 'lv', 'nl', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sv', 'tr', 'uk', 'vi', 'na'];
5
+
6
+ export function isValidLang(lang) {
7
+ return AVAILABLE_LANGS.includes(lang);
8
+ }
9
+
10
+ /**
11
+ * Unicode Text Processor
12
+ */
13
+ export class UnicodeProcessor {
14
+ constructor(indexer) {
15
+ this.indexer = indexer;
16
+ }
17
+
18
+ call(textList, langList) {
19
+ const processedTexts = textList.map((text, i) => this.preprocessText(text, langList[i]));
20
+
21
+ const textIdsLengths = processedTexts.map(text => text.length);
22
+ const maxLen = Math.max(...textIdsLengths);
23
+
24
+ const textIds = processedTexts.map(text => {
25
+ const row = new Array(maxLen).fill(0);
26
+ for (let j = 0; j < text.length; j++) {
27
+ const codePoint = text.codePointAt(j);
28
+ row[j] = (codePoint < this.indexer.length) ? this.indexer[codePoint] : -1;
29
+ }
30
+ return row;
31
+ });
32
+
33
+ const textMask = this.getTextMask(textIdsLengths);
34
+ return { textIds, textMask };
35
+ }
36
+
37
+ preprocessText(text, lang) {
38
+ // TODO: Need advanced normalizer for better performance
39
+ // NFKD decomposes accented chars into base + combining mark (e.g. ó → o + ◌́).
40
+ // The model indexer was trained on NFKD codepoints — do NOT use NFC here, because
41
+ // NFC keeps ó as a single codepoint U+00F3 which may be absent from the indexer,
42
+ // causing it to return -1 and mispronounce the character.
43
+ text = text.normalize('NFKD');
44
+
45
+ // Remove emojis (wide Unicode range)
46
+ const emojiPattern = /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F700}-\u{1F77F}\u{1F780}-\u{1F7FF}\u{1F800}-\u{1F8FF}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F1E6}-\u{1F1FF}]+/gu;
47
+ text = text.replace(emojiPattern, '');
48
+
49
+ // Replace various dashes and symbols
50
+ const replacements = {
51
+ '–': '-',
52
+ '‑': '-',
53
+ '—': '-',
54
+ '_': ' ',
55
+ '\u201C': '"', // left double quote "
56
+ '\u201D': '"', // right double quote "
57
+ '\u2018': "'", // left single quote '
58
+ '\u2019': "'", // right single quote '
59
+ '´': "'",
60
+ '`': "'",
61
+ '[': ' ',
62
+ ']': ' ',
63
+ '|': ' ',
64
+ '/': ' ',
65
+ '#': ' ',
66
+ '→': ' ',
67
+ '←': ' ',
68
+ };
69
+ for (const [k, v] of Object.entries(replacements)) {
70
+ text = text.replaceAll(k, v);
71
+ }
72
+
73
+ // Remove special symbols
74
+ text = text.replace(/[♥☆♡©\\]/g, '');
75
+
76
+ // Replace known expressions
77
+ const exprReplacements = {
78
+ '@': ' at ',
79
+ 'e.g.,': 'for example, ',
80
+ 'i.e.,': 'that is, ',
81
+ };
82
+ for (const [k, v] of Object.entries(exprReplacements)) {
83
+ text = text.replaceAll(k, v);
84
+ }
85
+
86
+ // Fix spacing around punctuation
87
+ text = text.replace(/ ,/g, ',');
88
+ text = text.replace(/ \./g, '.');
89
+ text = text.replace(/ !/g, '!');
90
+ text = text.replace(/ \?/g, '?');
91
+ text = text.replace(/ ;/g, ';');
92
+ text = text.replace(/ :/g, ':');
93
+ text = text.replace(/ '/g, "'");
94
+
95
+ // Remove duplicate quotes
96
+ while (text.includes('""')) {
97
+ text = text.replace('""', '"');
98
+ }
99
+ while (text.includes("''")) {
100
+ text = text.replace("''", "'");
101
+ }
102
+ while (text.includes('``')) {
103
+ text = text.replace('``', '`');
104
+ }
105
+
106
+ // Remove extra spaces
107
+ text = text.replace(/\s+/g, ' ').trim();
108
+
109
+ // If text doesn't end with punctuation, quotes, or closing brackets, add a period
110
+ if (!/[.!?;:,'\"')\]}…。」』】〉》›»]$/.test(text)) {
111
+ text += '.';
112
+ }
113
+
114
+ // Validate language
115
+ if (!isValidLang(lang)) {
116
+ throw new Error(`Invalid language: ${lang}. Available: ${AVAILABLE_LANGS.join(', ')}`);
117
+ }
118
+
119
+ // Wrap text with language tags
120
+ text = `<${lang}>${text}</${lang}>`;
121
+
122
+ return text;
123
+ }
124
+
125
+ getTextMask(textIdsLengths) {
126
+ const maxLen = Math.max(...textIdsLengths);
127
+ return this.lengthToMask(textIdsLengths, maxLen);
128
+ }
129
+
130
+ lengthToMask(lengths, maxLen = null) {
131
+ const actualMaxLen = maxLen || Math.max(...lengths);
132
+ return lengths.map(len => {
133
+ const row = new Array(actualMaxLen).fill(0.0);
134
+ for (let j = 0; j < Math.min(len, actualMaxLen); j++) {
135
+ row[j] = 1.0;
136
+ }
137
+ return [row];
138
+ });
139
+ }
140
+ }
141
+
142
+ /**
143
+ * Style class to hold TTL and DP tensors
144
+ */
145
+ export class Style {
146
+ constructor(ttlTensor, dpTensor) {
147
+ this.ttl = ttlTensor;
148
+ this.dp = dpTensor;
149
+ }
150
+ }
151
+
152
+ /**
153
+ * Text-to-Speech class
154
+ */
155
+ export class TextToSpeech {
156
+ constructor(cfgs, textProcessor, dpOrt, textEncOrt, vectorEstOrt, vocoderOrt) {
157
+ this.cfgs = cfgs;
158
+ this.textProcessor = textProcessor;
159
+ this.dpOrt = dpOrt;
160
+ this.textEncOrt = textEncOrt;
161
+ this.vectorEstOrt = vectorEstOrt;
162
+ this.vocoderOrt = vocoderOrt;
163
+ this.sampleRate = cfgs.ae.sample_rate;
164
+ }
165
+
166
+ async _infer(textList, langList, style, totalStep, speed = 1.05, progressCallback = null) {
167
+ const bsz = textList.length;
168
+
169
+ // Process text
170
+ const { textIds, textMask } = this.textProcessor.call(textList, langList);
171
+
172
+ const textIdsFlat = new BigInt64Array(textIds.flat().map(x => BigInt(x)));
173
+ const textIdsShape = [bsz, textIds[0].length];
174
+ const textIdsTensor = new ort.Tensor('int64', textIdsFlat, textIdsShape);
175
+
176
+ const textMaskFlat = new Float32Array(textMask.flat(2));
177
+ const textMaskShape = [bsz, 1, textMask[0][0].length];
178
+ const textMaskTensor = new ort.Tensor('float32', textMaskFlat, textMaskShape);
179
+
180
+ // Predict duration
181
+ const dpOutputs = await this.dpOrt.run({
182
+ text_ids: textIdsTensor,
183
+ style_dp: style.dp,
184
+ text_mask: textMaskTensor
185
+ });
186
+ const duration = Array.from(dpOutputs.duration.data);
187
+
188
+ // Apply speed factor to duration
189
+ for (let i = 0; i < duration.length; i++) {
190
+ duration[i] /= speed;
191
+ }
192
+
193
+ // Encode text
194
+ const textEncOutputs = await this.textEncOrt.run({
195
+ text_ids: textIdsTensor,
196
+ style_ttl: style.ttl,
197
+ text_mask: textMaskTensor
198
+ });
199
+ const textEmb = textEncOutputs.text_emb;
200
+
201
+ // Sample noisy latent
202
+ let { xt, latentMask } = this.sampleNoisyLatent(
203
+ duration,
204
+ this.sampleRate,
205
+ this.cfgs.ae.base_chunk_size,
206
+ this.cfgs.ttl.chunk_compress_factor,
207
+ this.cfgs.ttl.latent_dim
208
+ );
209
+
210
+ const latentMaskFlat = new Float32Array(latentMask.flat(2));
211
+ const latentMaskShape = [bsz, 1, latentMask[0][0].length];
212
+ const latentMaskTensor = new ort.Tensor('float32', latentMaskFlat, latentMaskShape);
213
+
214
+ // Prepare constant arrays
215
+ const totalStepArray = new Float32Array(bsz).fill(totalStep);
216
+ const totalStepTensor = new ort.Tensor('float32', totalStepArray, [bsz]);
217
+
218
+ // Denoising loop
219
+ for (let step = 0; step < totalStep; step++) {
220
+ if (progressCallback) {
221
+ progressCallback(step + 1, totalStep);
222
+ }
223
+
224
+ const currentStepArray = new Float32Array(bsz).fill(step);
225
+ const currentStepTensor = new ort.Tensor('float32', currentStepArray, [bsz]);
226
+
227
+ const xtFlat = new Float32Array(xt.flat(2));
228
+ const xtShape = [bsz, xt[0].length, xt[0][0].length];
229
+ const xtTensor = new ort.Tensor('float32', xtFlat, xtShape);
230
+
231
+ const vectorEstOutputs = await this.vectorEstOrt.run({
232
+ noisy_latent: xtTensor,
233
+ text_emb: textEmb,
234
+ style_ttl: style.ttl,
235
+ latent_mask: latentMaskTensor,
236
+ text_mask: textMaskTensor,
237
+ current_step: currentStepTensor,
238
+ total_step: totalStepTensor
239
+ });
240
+
241
+ const denoised = Array.from(vectorEstOutputs.denoised_latent.data);
242
+
243
+ // Reshape to 3D
244
+ const latentDim = xt[0].length;
245
+ const latentLen = xt[0][0].length;
246
+ xt = [];
247
+ let idx = 0;
248
+ for (let b = 0; b < bsz; b++) {
249
+ const batch = [];
250
+ for (let d = 0; d < latentDim; d++) {
251
+ const row = [];
252
+ for (let t = 0; t < latentLen; t++) {
253
+ row.push(denoised[idx++]);
254
+ }
255
+ batch.push(row);
256
+ }
257
+ xt.push(batch);
258
+ }
259
+ }
260
+
261
+ // Generate waveform
262
+ const finalXtFlat = new Float32Array(xt.flat(2));
263
+ const finalXtShape = [bsz, xt[0].length, xt[0][0].length];
264
+ const finalXtTensor = new ort.Tensor('float32', finalXtFlat, finalXtShape);
265
+
266
+ const vocoderOutputs = await this.vocoderOrt.run({
267
+ latent: finalXtTensor
268
+ });
269
+
270
+ const wav = Array.from(vocoderOutputs.wav_tts.data);
271
+
272
+ return { wav, duration };
273
+ }
274
+
275
+ async call(text, lang, style, totalStep, speed = 1.05, silenceDuration = 0.3, progressCallback = null) {
276
+ if (style.ttl.dims[0] !== 1) {
277
+ throw new Error('Single speaker text to speech only supports single style');
278
+ }
279
+ const maxLen = (lang === 'ko' || lang === 'ja') ? 120 : 300;
280
+ const textList = chunkText(text, maxLen);
281
+ const langList = new Array(textList.length).fill(lang);
282
+ let wavCat = [];
283
+ let durCat = 0;
284
+
285
+ for (let i = 0; i < textList.length; i++) {
286
+ const { wav, duration } = await this._infer([textList[i]], [langList[i]], style, totalStep, speed, progressCallback);
287
+
288
+ if (wavCat.length === 0) {
289
+ wavCat = wav;
290
+ durCat = duration[0];
291
+ } else {
292
+ const silenceLen = Math.floor(silenceDuration * this.sampleRate);
293
+ const silence = new Array(silenceLen).fill(0);
294
+ wavCat = [...wavCat, ...silence, ...wav];
295
+ durCat += duration[0] + silenceDuration;
296
+ }
297
+ }
298
+
299
+ return { wav: wavCat, duration: [durCat] };
300
+ }
301
+
302
+ async batch(textList, langList, style, totalStep, speed = 1.05, progressCallback = null) {
303
+ return await this._infer(textList, langList, style, totalStep, speed, progressCallback);
304
+ }
305
+
306
+ sampleNoisyLatent(duration, sampleRate, baseChunkSize, chunkCompress, latentDim) {
307
+ const bsz = duration.length;
308
+ const maxDur = Math.max(...duration);
309
+
310
+ const wavLenMax = Math.floor(maxDur * sampleRate);
311
+ const wavLengths = duration.map(d => Math.floor(d * sampleRate));
312
+
313
+ const chunkSize = baseChunkSize * chunkCompress;
314
+ const latentLen = Math.floor((wavLenMax + chunkSize - 1) / chunkSize);
315
+ const latentDimVal = latentDim * chunkCompress;
316
+
317
+ const xt = [];
318
+ for (let b = 0; b < bsz; b++) {
319
+ const batch = [];
320
+ for (let d = 0; d < latentDimVal; d++) {
321
+ const row = [];
322
+ for (let t = 0; t < latentLen; t++) {
323
+ // Box-Muller transform
324
+ const u1 = Math.max(0.0001, Math.random());
325
+ const u2 = Math.random();
326
+ const val = Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math.PI * u2);
327
+ row.push(val);
328
+ }
329
+ batch.push(row);
330
+ }
331
+ xt.push(batch);
332
+ }
333
+
334
+ const latentLengths = wavLengths.map(len => Math.floor((len + chunkSize - 1) / chunkSize));
335
+ const latentMask = this.lengthToMask(latentLengths, latentLen);
336
+
337
+ // Apply mask
338
+ for (let b = 0; b < bsz; b++) {
339
+ for (let d = 0; d < latentDimVal; d++) {
340
+ for (let t = 0; t < latentLen; t++) {
341
+ xt[b][d][t] *= latentMask[b][0][t];
342
+ }
343
+ }
344
+ }
345
+
346
+ return { xt, latentMask };
347
+ }
348
+
349
+ lengthToMask(lengths, maxLen = null) {
350
+ const actualMaxLen = maxLen || Math.max(...lengths);
351
+ return lengths.map(len => {
352
+ const row = new Array(actualMaxLen).fill(0.0);
353
+ for (let j = 0; j < Math.min(len, actualMaxLen); j++) {
354
+ row[j] = 1.0;
355
+ }
356
+ return [row];
357
+ });
358
+ }
359
+ }
360
+
361
+ /**
362
+ * Load voice style from JSON files
363
+ */
364
+ export async function loadVoiceStyle(voiceStylePaths, verbose = false) {
365
+ const bsz = voiceStylePaths.length;
366
+
367
+ // Read first file to get dimensions
368
+ const firstResponse = await fetch(voiceStylePaths[0]);
369
+ const firstStyle = await firstResponse.json();
370
+
371
+ const ttlDims = firstStyle.style_ttl.dims;
372
+ const dpDims = firstStyle.style_dp.dims;
373
+
374
+ const ttlDim1 = ttlDims[1];
375
+ const ttlDim2 = ttlDims[2];
376
+ const dpDim1 = dpDims[1];
377
+ const dpDim2 = dpDims[2];
378
+
379
+ // Pre-allocate arrays with full batch size
380
+ const ttlSize = bsz * ttlDim1 * ttlDim2;
381
+ const dpSize = bsz * dpDim1 * dpDim2;
382
+ const ttlFlat = new Float32Array(ttlSize);
383
+ const dpFlat = new Float32Array(dpSize);
384
+
385
+ // Fill in the data
386
+ for (let i = 0; i < bsz; i++) {
387
+ const response = await fetch(voiceStylePaths[i]);
388
+ const voiceStyle = await response.json();
389
+
390
+ // Flatten TTL data
391
+ const ttlData = voiceStyle.style_ttl.data.flat(Infinity);
392
+ const ttlOffset = i * ttlDim1 * ttlDim2;
393
+ ttlFlat.set(ttlData, ttlOffset);
394
+
395
+ // Flatten DP data
396
+ const dpData = voiceStyle.style_dp.data.flat(Infinity);
397
+ const dpOffset = i * dpDim1 * dpDim2;
398
+ dpFlat.set(dpData, dpOffset);
399
+ }
400
+
401
+ const ttlShape = [bsz, ttlDim1, ttlDim2];
402
+ const dpShape = [bsz, dpDim1, dpDim2];
403
+
404
+ const ttlTensor = new ort.Tensor('float32', ttlFlat, ttlShape);
405
+ const dpTensor = new ort.Tensor('float32', dpFlat, dpShape);
406
+
407
+ if (verbose) {
408
+ console.log(`Loaded ${bsz} voice styles`);
409
+ }
410
+
411
+ return new Style(ttlTensor, dpTensor);
412
+ }
413
+
414
+ /**
415
+ * Load configuration from JSON
416
+ */
417
+ export async function loadCfgs(onnxDir) {
418
+ const response = await fetch(`${onnxDir}/tts.json`);
419
+ const cfgs = await response.json();
420
+ return cfgs;
421
+ }
422
+
423
+ /**
424
+ * Load text processor
425
+ */
426
+ export async function loadTextProcessor(onnxDir) {
427
+ const response = await fetch(`${onnxDir}/unicode_indexer.json`);
428
+ const indexer = await response.json();
429
+ return new UnicodeProcessor(indexer);
430
+ }
431
+
432
+ /**
433
+ * Load ONNX model
434
+ */
435
+ export async function loadOnnx(onnxPath, options) {
436
+ const session = await ort.InferenceSession.create(onnxPath, options);
437
+ return session;
438
+ }
439
+
440
+ /**
441
+ * Load all TTS components
442
+ */
443
+ export async function loadTextToSpeech(onnxDir, sessionOptions = {}, progressCallback = null) {
444
+ console.log('Using WebAssembly/WebGPU for inference');
445
+
446
+ const cfgs = await loadCfgs(onnxDir);
447
+
448
+ const dpPath = `${onnxDir}/duration_predictor.onnx`;
449
+ const textEncPath = `${onnxDir}/text_encoder.onnx`;
450
+ const vectorEstPath = `${onnxDir}/vector_estimator.onnx`;
451
+ const vocoderPath = `${onnxDir}/vocoder.onnx`;
452
+
453
+ const modelPaths = [
454
+ { name: 'Duration Predictor', path: dpPath },
455
+ { name: 'Text Encoder', path: textEncPath },
456
+ { name: 'Vector Estimator', path: vectorEstPath },
457
+ { name: 'Vocoder', path: vocoderPath }
458
+ ];
459
+
460
+ const sessions = [];
461
+ for (let i = 0; i < modelPaths.length; i++) {
462
+ if (progressCallback) {
463
+ progressCallback(modelPaths[i].name, i + 1, modelPaths.length);
464
+ }
465
+ const session = await loadOnnx(modelPaths[i].path, sessionOptions);
466
+ sessions.push(session);
467
+ }
468
+
469
+ const [dpOrt, textEncOrt, vectorEstOrt, vocoderOrt] = sessions;
470
+
471
+ const textProcessor = await loadTextProcessor(onnxDir);
472
+ const textToSpeech = new TextToSpeech(cfgs, textProcessor, dpOrt, textEncOrt, vectorEstOrt, vocoderOrt);
473
+
474
+ return { textToSpeech, cfgs };
475
+ }
476
+
477
+ /**
478
+ * Chunk text into manageable segments
479
+ */
480
+ function chunkText(text, maxLen = 300) {
481
+ if (typeof text !== 'string') {
482
+ throw new Error(`chunkText expects a string, got ${typeof text}`);
483
+ }
484
+
485
+ // Split by paragraph (two or more newlines)
486
+ const paragraphs = text.trim().split(/\n\s*\n+/).filter(p => p.trim());
487
+
488
+ const chunks = [];
489
+
490
+ for (let paragraph of paragraphs) {
491
+ paragraph = paragraph.trim();
492
+ if (!paragraph) continue;
493
+
494
+ // Split by sentence boundaries (period, question mark, exclamation mark followed by space)
495
+ // But exclude common abbreviations like Mr., Mrs., Dr., etc. and single capital letters like F.
496
+ const sentences = paragraph.split(/(?<!Mr\.|Mrs\.|Ms\.|Dr\.|Prof\.|Sr\.|Jr\.|Ph\.D\.|etc\.|e\.g\.|i\.e\.|vs\.|Inc\.|Ltd\.|Co\.|Corp\.|St\.|Ave\.|Blvd\.)(?<!\b[A-Z]\.)(?<=[.!?])\s+/);
497
+
498
+ let currentChunk = "";
499
+
500
+ for (let sentence of sentences) {
501
+ if (currentChunk.length + sentence.length + 1 <= maxLen) {
502
+ currentChunk += (currentChunk ? " " : "") + sentence;
503
+ } else {
504
+ if (currentChunk) {
505
+ chunks.push(currentChunk.trim());
506
+ }
507
+ currentChunk = sentence;
508
+ }
509
+ }
510
+
511
+ if (currentChunk) {
512
+ chunks.push(currentChunk.trim());
513
+ }
514
+ }
515
+
516
+ return chunks;
517
+ }
518
+
519
+ /**
520
+ * Write WAV file to ArrayBuffer
521
+ */
522
+ export function writeWavFile(audioData, sampleRate) {
523
+ const numChannels = 1;
524
+ const bitsPerSample = 16;
525
+ const byteRate = sampleRate * numChannels * bitsPerSample / 8;
526
+ const blockAlign = numChannels * bitsPerSample / 8;
527
+ const dataSize = audioData.length * 2;
528
+
529
+ // Create ArrayBuffer
530
+ const buffer = new ArrayBuffer(44 + dataSize);
531
+ const view = new DataView(buffer);
532
+
533
+ // Write WAV header
534
+ const writeString = (offset, string) => {
535
+ for (let i = 0; i < string.length; i++) {
536
+ view.setUint8(offset + i, string.charCodeAt(i));
537
+ }
538
+ };
539
+
540
+ writeString(0, 'RIFF');
541
+ view.setUint32(4, 36 + dataSize, true);
542
+ writeString(8, 'WAVE');
543
+ writeString(12, 'fmt ');
544
+ view.setUint32(16, 16, true);
545
+ view.setUint16(20, 1, true); // PCM
546
+ view.setUint16(22, numChannels, true);
547
+ view.setUint32(24, sampleRate, true);
548
+ view.setUint32(28, byteRate, true);
549
+ view.setUint16(32, blockAlign, true);
550
+ view.setUint16(34, bitsPerSample, true);
551
+ writeString(36, 'data');
552
+ view.setUint32(40, dataSize, true);
553
+
554
+ // Write audio data
555
+ const int16Data = new Int16Array(audioData.length);
556
+ for (let i = 0; i < audioData.length; i++) {
557
+ const clamped = Math.max(-1.0, Math.min(1.0, audioData[i]));
558
+ int16Data[i] = Math.floor(clamped * 32767);
559
+ }
560
+
561
+ const dataView = new Uint8Array(buffer, 44);
562
+ dataView.set(new Uint8Array(int16Data.buffer));
563
+
564
+ return buffer;
565
+ }