@contractspec/lib.voice 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/dist/audio/audio-concatenator.d.ts +15 -0
  2. package/dist/audio/audio-concatenator.js +57 -0
  3. package/dist/audio/duration-estimator.d.ts +31 -0
  4. package/dist/audio/duration-estimator.js +22 -0
  5. package/dist/audio/format-converter.d.ts +17 -0
  6. package/dist/audio/format-converter.js +28 -0
  7. package/dist/audio/index.d.ts +4 -0
  8. package/dist/audio/index.js +121 -0
  9. package/dist/audio/silence-generator.d.ts +16 -0
  10. package/dist/audio/silence-generator.js +20 -0
  11. package/dist/browser/audio/audio-concatenator.js +56 -0
  12. package/dist/browser/audio/duration-estimator.js +21 -0
  13. package/dist/browser/audio/format-converter.js +27 -0
  14. package/dist/browser/audio/index.js +120 -0
  15. package/dist/browser/audio/silence-generator.js +19 -0
  16. package/dist/browser/conversational/index.js +241 -0
  17. package/dist/browser/conversational/response-orchestrator.js +62 -0
  18. package/dist/browser/conversational/transcript-builder.js +63 -0
  19. package/dist/browser/conversational/turn-detector.js +43 -0
  20. package/dist/browser/conversational/types.js +0 -0
  21. package/dist/browser/conversational/voice-session-manager.js +137 -0
  22. package/dist/browser/docs/conversational.docblock.js +5 -0
  23. package/dist/browser/docs/stt.docblock.js +5 -0
  24. package/dist/browser/docs/sync.docblock.js +5 -0
  25. package/dist/browser/docs/tts.docblock.js +5 -0
  26. package/dist/browser/docs/voice.docblock.js +5 -0
  27. package/dist/browser/i18n/catalogs/en.js +91 -0
  28. package/dist/browser/i18n/catalogs/es.js +91 -0
  29. package/dist/browser/i18n/catalogs/fr.js +91 -0
  30. package/dist/browser/i18n/catalogs/index.js +271 -0
  31. package/dist/browser/i18n/index.js +335 -0
  32. package/dist/browser/i18n/keys.js +38 -0
  33. package/dist/browser/i18n/locale.js +13 -0
  34. package/dist/browser/i18n/messages.js +283 -0
  35. package/dist/browser/index.js +1070 -0
  36. package/dist/browser/stt/diarization-mapper.js +42 -0
  37. package/dist/browser/stt/index.js +222 -0
  38. package/dist/browser/stt/segment-splitter.js +36 -0
  39. package/dist/browser/stt/subtitle-formatter.js +51 -0
  40. package/dist/browser/stt/transcriber.js +219 -0
  41. package/dist/browser/stt/types.js +0 -0
  42. package/dist/browser/sync/duration-negotiator.js +69 -0
  43. package/dist/browser/sync/index.js +165 -0
  44. package/dist/browser/sync/scene-adapter.js +52 -0
  45. package/dist/browser/sync/timing-calculator.js +46 -0
  46. package/dist/browser/tts/audio-assembler.js +120 -0
  47. package/dist/browser/tts/emphasis-planner.js +134 -0
  48. package/dist/browser/tts/index.js +439 -0
  49. package/dist/browser/tts/pace-analyzer.js +67 -0
  50. package/dist/browser/tts/segment-synthesizer.js +36 -0
  51. package/dist/browser/tts/types.js +0 -0
  52. package/dist/browser/tts/voice-synthesizer.js +435 -0
  53. package/dist/browser/types.js +0 -0
  54. package/dist/conversational/index.d.ts +5 -0
  55. package/dist/conversational/index.js +242 -0
  56. package/dist/conversational/response-orchestrator.d.ts +26 -0
  57. package/dist/conversational/response-orchestrator.js +63 -0
  58. package/dist/conversational/transcript-builder.d.ts +25 -0
  59. package/dist/conversational/transcript-builder.js +64 -0
  60. package/dist/conversational/turn-detector.d.ts +31 -0
  61. package/dist/conversational/turn-detector.js +44 -0
  62. package/dist/conversational/types.d.ts +55 -0
  63. package/dist/conversational/types.js +1 -0
  64. package/dist/conversational/voice-session-manager.d.ts +17 -0
  65. package/dist/conversational/voice-session-manager.js +138 -0
  66. package/dist/docs/conversational.docblock.d.ts +14 -0
  67. package/dist/docs/conversational.docblock.js +6 -0
  68. package/dist/docs/stt.docblock.d.ts +12 -0
  69. package/dist/docs/stt.docblock.js +6 -0
  70. package/dist/docs/sync.docblock.d.ts +12 -0
  71. package/dist/docs/sync.docblock.js +6 -0
  72. package/dist/docs/tts.docblock.d.ts +12 -0
  73. package/dist/docs/tts.docblock.js +6 -0
  74. package/dist/docs/voice.docblock.d.ts +22 -0
  75. package/dist/docs/voice.docblock.js +6 -0
  76. package/dist/i18n/catalogs/en.d.ts +6 -0
  77. package/dist/i18n/catalogs/en.js +92 -0
  78. package/dist/i18n/catalogs/es.d.ts +4 -0
  79. package/dist/i18n/catalogs/es.js +92 -0
  80. package/dist/i18n/catalogs/fr.d.ts +4 -0
  81. package/dist/i18n/catalogs/fr.js +92 -0
  82. package/dist/i18n/catalogs/index.d.ts +3 -0
  83. package/dist/i18n/catalogs/index.js +272 -0
  84. package/dist/i18n/index.d.ts +20 -0
  85. package/dist/i18n/index.js +336 -0
  86. package/dist/i18n/keys.d.ts +50 -0
  87. package/dist/i18n/keys.js +39 -0
  88. package/dist/i18n/locale.d.ts +6 -0
  89. package/dist/i18n/locale.js +14 -0
  90. package/dist/i18n/messages.d.ts +13 -0
  91. package/dist/i18n/messages.js +284 -0
  92. package/dist/index.d.ts +6 -0
  93. package/dist/index.js +1071 -0
  94. package/dist/node/audio/audio-concatenator.js +56 -0
  95. package/dist/node/audio/duration-estimator.js +21 -0
  96. package/dist/node/audio/format-converter.js +27 -0
  97. package/dist/node/audio/index.js +120 -0
  98. package/dist/node/audio/silence-generator.js +19 -0
  99. package/dist/node/conversational/index.js +241 -0
  100. package/dist/node/conversational/response-orchestrator.js +62 -0
  101. package/dist/node/conversational/transcript-builder.js +63 -0
  102. package/dist/node/conversational/turn-detector.js +43 -0
  103. package/dist/node/conversational/types.js +0 -0
  104. package/dist/node/conversational/voice-session-manager.js +137 -0
  105. package/dist/node/docs/conversational.docblock.js +5 -0
  106. package/dist/node/docs/stt.docblock.js +5 -0
  107. package/dist/node/docs/sync.docblock.js +5 -0
  108. package/dist/node/docs/tts.docblock.js +5 -0
  109. package/dist/node/docs/voice.docblock.js +5 -0
  110. package/dist/node/i18n/catalogs/en.js +91 -0
  111. package/dist/node/i18n/catalogs/es.js +91 -0
  112. package/dist/node/i18n/catalogs/fr.js +91 -0
  113. package/dist/node/i18n/catalogs/index.js +271 -0
  114. package/dist/node/i18n/index.js +335 -0
  115. package/dist/node/i18n/keys.js +38 -0
  116. package/dist/node/i18n/locale.js +13 -0
  117. package/dist/node/i18n/messages.js +283 -0
  118. package/dist/node/index.js +1070 -0
  119. package/dist/node/stt/diarization-mapper.js +42 -0
  120. package/dist/node/stt/index.js +222 -0
  121. package/dist/node/stt/segment-splitter.js +36 -0
  122. package/dist/node/stt/subtitle-formatter.js +51 -0
  123. package/dist/node/stt/transcriber.js +219 -0
  124. package/dist/node/stt/types.js +0 -0
  125. package/dist/node/sync/duration-negotiator.js +69 -0
  126. package/dist/node/sync/index.js +165 -0
  127. package/dist/node/sync/scene-adapter.js +52 -0
  128. package/dist/node/sync/timing-calculator.js +46 -0
  129. package/dist/node/tts/audio-assembler.js +120 -0
  130. package/dist/node/tts/emphasis-planner.js +134 -0
  131. package/dist/node/tts/index.js +439 -0
  132. package/dist/node/tts/pace-analyzer.js +67 -0
  133. package/dist/node/tts/segment-synthesizer.js +36 -0
  134. package/dist/node/tts/types.js +0 -0
  135. package/dist/node/tts/voice-synthesizer.js +435 -0
  136. package/dist/node/types.js +0 -0
  137. package/dist/stt/diarization-mapper.d.ts +19 -0
  138. package/dist/stt/diarization-mapper.js +43 -0
  139. package/dist/stt/index.d.ts +5 -0
  140. package/dist/stt/index.js +223 -0
  141. package/dist/stt/segment-splitter.d.ts +19 -0
  142. package/dist/stt/segment-splitter.js +37 -0
  143. package/dist/stt/subtitle-formatter.d.ts +19 -0
  144. package/dist/stt/subtitle-formatter.js +52 -0
  145. package/dist/stt/transcriber.d.ts +21 -0
  146. package/dist/stt/transcriber.js +220 -0
  147. package/dist/stt/types.d.ts +44 -0
  148. package/dist/stt/types.js +1 -0
  149. package/dist/sync/duration-negotiator.d.ts +37 -0
  150. package/dist/sync/duration-negotiator.js +70 -0
  151. package/dist/sync/index.d.ts +3 -0
  152. package/dist/sync/index.js +166 -0
  153. package/dist/sync/scene-adapter.d.ts +29 -0
  154. package/dist/sync/scene-adapter.js +53 -0
  155. package/dist/sync/timing-calculator.d.ts +21 -0
  156. package/dist/sync/timing-calculator.js +47 -0
  157. package/dist/tts/audio-assembler.d.ts +19 -0
  158. package/dist/tts/audio-assembler.js +121 -0
  159. package/dist/tts/emphasis-planner.d.ts +24 -0
  160. package/dist/tts/emphasis-planner.js +135 -0
  161. package/dist/tts/index.d.ts +6 -0
  162. package/dist/tts/index.js +440 -0
  163. package/dist/tts/pace-analyzer.d.ts +30 -0
  164. package/dist/tts/pace-analyzer.js +68 -0
  165. package/dist/tts/segment-synthesizer.d.ts +21 -0
  166. package/dist/tts/segment-synthesizer.js +37 -0
  167. package/dist/tts/types.d.ts +76 -0
  168. package/dist/tts/types.js +1 -0
  169. package/dist/tts/voice-synthesizer.d.ts +28 -0
  170. package/dist/tts/voice-synthesizer.js +436 -0
  171. package/dist/types.d.ts +12 -0
  172. package/dist/types.js +1 -0
  173. package/package.json +760 -0
@@ -0,0 +1,165 @@
1
+ // src/audio/duration-estimator.ts
2
+ class DurationEstimator {
3
+ static DEFAULT_WPM = 150;
4
+ estimateSeconds(text, wordsPerMinute) {
5
+ const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
6
+ const wordCount = text.split(/\s+/).filter(Boolean).length;
7
+ return Math.ceil(wordCount / wpm * 60);
8
+ }
9
+ estimateMs(text, wordsPerMinute) {
10
+ const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
11
+ const wordCount = text.split(/\s+/).filter(Boolean).length;
12
+ return Math.ceil(wordCount / wpm * 60 * 1000);
13
+ }
14
+ estimateWordCount(durationSeconds, wordsPerMinute) {
15
+ const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
16
+ return Math.round(durationSeconds / 60 * wpm);
17
+ }
18
+ }
19
+
20
+ // src/sync/timing-calculator.ts
21
+ class TimingCalculator {
22
+ calculate(segments, fps, breathingRoomFactor = 1.15) {
23
+ const timingSegments = segments.map((seg) => {
24
+ const durationInFrames = Math.ceil(seg.durationMs / 1000 * fps);
25
+ const recommendedSceneDurationInFrames = Math.ceil(durationInFrames * breathingRoomFactor);
26
+ const wordTimings = seg.wordTimings?.map((wt) => ({
27
+ word: wt.word,
28
+ startMs: wt.startMs,
29
+ endMs: wt.endMs
30
+ }));
31
+ return {
32
+ sceneId: seg.sceneId,
33
+ durationMs: seg.durationMs,
34
+ durationInFrames,
35
+ recommendedSceneDurationInFrames,
36
+ wordTimings
37
+ };
38
+ });
39
+ const totalDurationMs = segments.reduce((sum, s) => sum + s.durationMs, 0);
40
+ return {
41
+ totalDurationMs,
42
+ segments: timingSegments,
43
+ fps
44
+ };
45
+ }
46
+ recalculateForFps(timingMap, newFps) {
47
+ const segments = timingMap.segments.map((seg) => {
48
+ const durationInFrames = Math.ceil(seg.durationMs / 1000 * newFps);
49
+ const ratio = seg.recommendedSceneDurationInFrames / Math.max(seg.durationInFrames, 1);
50
+ return {
51
+ ...seg,
52
+ durationInFrames,
53
+ recommendedSceneDurationInFrames: Math.ceil(durationInFrames * ratio)
54
+ };
55
+ });
56
+ return {
57
+ ...timingMap,
58
+ segments,
59
+ fps: newFps
60
+ };
61
+ }
62
+ }
63
+
64
+ // src/sync/scene-adapter.ts
65
+ class SceneAdapter {
66
+ durationEstimator = new DurationEstimator;
67
+ adapt(scenePlan) {
68
+ const scenesWithNarration = scenePlan.scenes.filter((s) => s.narrationText && s.narrationText.trim().length > 0);
69
+ const segments = scenesWithNarration.map((scene, index) => {
70
+ const text = scene.narrationText ?? "";
71
+ return {
72
+ sceneId: scene.id,
73
+ text,
74
+ estimatedDurationSeconds: this.durationEstimator.estimateSeconds(text),
75
+ contentType: this.inferContentType(index, scenesWithNarration.length)
76
+ };
77
+ });
78
+ const fullText = segments.map((s) => s.text).join(" ");
79
+ const estimatedDurationSeconds = segments.reduce((sum, s) => sum + s.estimatedDurationSeconds, 0);
80
+ return { fullText, segments, estimatedDurationSeconds };
81
+ }
82
+ inferContentType(index, total) {
83
+ if (index === 0)
84
+ return "intro";
85
+ if (index === total - 1)
86
+ return "cta";
87
+ if (index === 1 && total > 3)
88
+ return "problem";
89
+ if (index === total - 2 && total > 3)
90
+ return "metric";
91
+ return "solution";
92
+ }
93
+ }
94
+
95
+ // src/sync/duration-negotiator.ts
96
+ class DurationNegotiator {
97
+ static UPPER_THRESHOLD = 1.1;
98
+ static LOWER_THRESHOLD = 0.7;
99
+ static MAX_RATE = 1.3;
100
+ static MIN_RATE = 0.8;
101
+ negotiate(timingMap, sceneDurations) {
102
+ const adjustments = [];
103
+ const updatedSegments = timingMap.segments.map((seg) => {
104
+ const originalSceneDuration = sceneDurations.get(seg.sceneId);
105
+ if (originalSceneDuration === undefined) {
106
+ adjustments.push({
107
+ sceneId: seg.sceneId,
108
+ originalSceneDurationInFrames: seg.recommendedSceneDurationInFrames,
109
+ voiceDurationInFrames: seg.durationInFrames,
110
+ action: "no_change",
111
+ finalSceneDurationInFrames: seg.recommendedSceneDurationInFrames
112
+ });
113
+ return seg;
114
+ }
115
+ const ratio = seg.durationInFrames / originalSceneDuration;
116
+ if (ratio > DurationNegotiator.UPPER_THRESHOLD) {
117
+ const suggestedRate = Math.min(ratio, DurationNegotiator.MAX_RATE);
118
+ adjustments.push({
119
+ sceneId: seg.sceneId,
120
+ originalSceneDurationInFrames: originalSceneDuration,
121
+ voiceDurationInFrames: seg.durationInFrames,
122
+ action: ratio > DurationNegotiator.MAX_RATE ? "extend_scene" : "suggest_rate_change",
123
+ suggestedRate,
124
+ finalSceneDurationInFrames: seg.recommendedSceneDurationInFrames
125
+ });
126
+ return seg;
127
+ }
128
+ if (ratio < DurationNegotiator.LOWER_THRESHOLD) {
129
+ const suggestedRate = Math.max(ratio, DurationNegotiator.MIN_RATE);
130
+ adjustments.push({
131
+ sceneId: seg.sceneId,
132
+ originalSceneDurationInFrames: originalSceneDuration,
133
+ voiceDurationInFrames: seg.durationInFrames,
134
+ action: "pad_silence",
135
+ suggestedRate,
136
+ finalSceneDurationInFrames: originalSceneDuration
137
+ });
138
+ return {
139
+ ...seg,
140
+ recommendedSceneDurationInFrames: originalSceneDuration
141
+ };
142
+ }
143
+ adjustments.push({
144
+ sceneId: seg.sceneId,
145
+ originalSceneDurationInFrames: originalSceneDuration,
146
+ voiceDurationInFrames: seg.durationInFrames,
147
+ action: "no_change",
148
+ finalSceneDurationInFrames: seg.recommendedSceneDurationInFrames
149
+ });
150
+ return seg;
151
+ });
152
+ return {
153
+ timingMap: {
154
+ ...timingMap,
155
+ segments: updatedSegments
156
+ },
157
+ adjustments
158
+ };
159
+ }
160
+ }
161
+ export {
162
+ TimingCalculator,
163
+ SceneAdapter,
164
+ DurationNegotiator
165
+ };
@@ -0,0 +1,52 @@
1
+ // src/audio/duration-estimator.ts
2
+ class DurationEstimator {
3
+ static DEFAULT_WPM = 150;
4
+ estimateSeconds(text, wordsPerMinute) {
5
+ const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
6
+ const wordCount = text.split(/\s+/).filter(Boolean).length;
7
+ return Math.ceil(wordCount / wpm * 60);
8
+ }
9
+ estimateMs(text, wordsPerMinute) {
10
+ const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
11
+ const wordCount = text.split(/\s+/).filter(Boolean).length;
12
+ return Math.ceil(wordCount / wpm * 60 * 1000);
13
+ }
14
+ estimateWordCount(durationSeconds, wordsPerMinute) {
15
+ const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
16
+ return Math.round(durationSeconds / 60 * wpm);
17
+ }
18
+ }
19
+
20
+ // src/sync/scene-adapter.ts
21
+ class SceneAdapter {
22
+ durationEstimator = new DurationEstimator;
23
+ adapt(scenePlan) {
24
+ const scenesWithNarration = scenePlan.scenes.filter((s) => s.narrationText && s.narrationText.trim().length > 0);
25
+ const segments = scenesWithNarration.map((scene, index) => {
26
+ const text = scene.narrationText ?? "";
27
+ return {
28
+ sceneId: scene.id,
29
+ text,
30
+ estimatedDurationSeconds: this.durationEstimator.estimateSeconds(text),
31
+ contentType: this.inferContentType(index, scenesWithNarration.length)
32
+ };
33
+ });
34
+ const fullText = segments.map((s) => s.text).join(" ");
35
+ const estimatedDurationSeconds = segments.reduce((sum, s) => sum + s.estimatedDurationSeconds, 0);
36
+ return { fullText, segments, estimatedDurationSeconds };
37
+ }
38
+ inferContentType(index, total) {
39
+ if (index === 0)
40
+ return "intro";
41
+ if (index === total - 1)
42
+ return "cta";
43
+ if (index === 1 && total > 3)
44
+ return "problem";
45
+ if (index === total - 2 && total > 3)
46
+ return "metric";
47
+ return "solution";
48
+ }
49
+ }
50
+ export {
51
+ SceneAdapter
52
+ };
@@ -0,0 +1,46 @@
1
+ // src/sync/timing-calculator.ts
2
+ class TimingCalculator {
3
+ calculate(segments, fps, breathingRoomFactor = 1.15) {
4
+ const timingSegments = segments.map((seg) => {
5
+ const durationInFrames = Math.ceil(seg.durationMs / 1000 * fps);
6
+ const recommendedSceneDurationInFrames = Math.ceil(durationInFrames * breathingRoomFactor);
7
+ const wordTimings = seg.wordTimings?.map((wt) => ({
8
+ word: wt.word,
9
+ startMs: wt.startMs,
10
+ endMs: wt.endMs
11
+ }));
12
+ return {
13
+ sceneId: seg.sceneId,
14
+ durationMs: seg.durationMs,
15
+ durationInFrames,
16
+ recommendedSceneDurationInFrames,
17
+ wordTimings
18
+ };
19
+ });
20
+ const totalDurationMs = segments.reduce((sum, s) => sum + s.durationMs, 0);
21
+ return {
22
+ totalDurationMs,
23
+ segments: timingSegments,
24
+ fps
25
+ };
26
+ }
27
+ recalculateForFps(timingMap, newFps) {
28
+ const segments = timingMap.segments.map((seg) => {
29
+ const durationInFrames = Math.ceil(seg.durationMs / 1000 * newFps);
30
+ const ratio = seg.recommendedSceneDurationInFrames / Math.max(seg.durationInFrames, 1);
31
+ return {
32
+ ...seg,
33
+ durationInFrames,
34
+ recommendedSceneDurationInFrames: Math.ceil(durationInFrames * ratio)
35
+ };
36
+ });
37
+ return {
38
+ ...timingMap,
39
+ segments,
40
+ fps: newFps
41
+ };
42
+ }
43
+ }
44
+ export {
45
+ TimingCalculator
46
+ };
@@ -0,0 +1,120 @@
1
+ // src/audio/audio-concatenator.ts
2
+ class AudioConcatenator {
3
+ concatenate(segments) {
4
+ if (segments.length === 0) {
5
+ return {
6
+ data: new Uint8Array(0),
7
+ format: "wav",
8
+ sampleRateHz: 44100,
9
+ durationMs: 0,
10
+ channels: 1
11
+ };
12
+ }
13
+ const [firstSegment] = segments;
14
+ if (!firstSegment) {
15
+ return {
16
+ data: new Uint8Array(0),
17
+ format: "wav",
18
+ sampleRateHz: 44100,
19
+ durationMs: 0,
20
+ channels: 1
21
+ };
22
+ }
23
+ if (segments.length === 1) {
24
+ return { ...firstSegment };
25
+ }
26
+ const referenceFormat = firstSegment.format;
27
+ const referenceSampleRate = firstSegment.sampleRateHz;
28
+ const referenceChannels = firstSegment.channels ?? 1;
29
+ for (const seg of segments) {
30
+ if (seg.format !== referenceFormat) {
31
+ throw new Error(`Format mismatch: expected ${referenceFormat}, got ${seg.format}`);
32
+ }
33
+ if (seg.sampleRateHz !== referenceSampleRate) {
34
+ throw new Error(`Sample rate mismatch: expected ${referenceSampleRate}, got ${seg.sampleRateHz}`);
35
+ }
36
+ }
37
+ const totalBytes = segments.reduce((sum, s) => sum + s.data.length, 0);
38
+ const combined = new Uint8Array(totalBytes);
39
+ let offset = 0;
40
+ for (const seg of segments) {
41
+ combined.set(seg.data, offset);
42
+ offset += seg.data.length;
43
+ }
44
+ const totalDurationMs = segments.reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
45
+ return {
46
+ data: combined,
47
+ format: referenceFormat,
48
+ sampleRateHz: referenceSampleRate,
49
+ durationMs: totalDurationMs,
50
+ channels: referenceChannels
51
+ };
52
+ }
53
+ }
54
+
55
+ // src/audio/silence-generator.ts
56
+ class SilenceGenerator {
57
+ generate(durationMs, format = "wav", sampleRateHz = 44100, channels = 1) {
58
+ const totalSamples = Math.ceil(sampleRateHz * durationMs / 1000);
59
+ const bytesPerSample = 2;
60
+ const dataSize = totalSamples * bytesPerSample * channels;
61
+ const data = new Uint8Array(dataSize);
62
+ return {
63
+ data,
64
+ format,
65
+ sampleRateHz,
66
+ durationMs,
67
+ channels
68
+ };
69
+ }
70
+ }
71
+
72
+ // src/tts/audio-assembler.ts
73
+ class AudioAssembler {
74
+ concatenator = new AudioConcatenator;
75
+ silenceGenerator = new SilenceGenerator;
76
+ assemble(segments, directives, defaultPauseMs = 500) {
77
+ if (segments.length === 0) {
78
+ return {
79
+ data: new Uint8Array(0),
80
+ format: "wav",
81
+ sampleRateHz: 44100,
82
+ durationMs: 0,
83
+ channels: 1
84
+ };
85
+ }
86
+ const [firstSegment] = segments;
87
+ if (!firstSegment) {
88
+ return {
89
+ data: new Uint8Array(0),
90
+ format: "wav",
91
+ sampleRateHz: 44100,
92
+ durationMs: 0,
93
+ channels: 1
94
+ };
95
+ }
96
+ const directiveMap = new Map(directives.map((d) => [d.sceneId, d]));
97
+ const reference = firstSegment.audio;
98
+ const parts = [];
99
+ for (let i = 0;i < segments.length; i++) {
100
+ const segment = segments[i];
101
+ if (!segment) {
102
+ continue;
103
+ }
104
+ const directive = directiveMap.get(segment.sceneId);
105
+ const leadingSilenceMs = directive?.leadingSilenceMs ?? 0;
106
+ if (leadingSilenceMs > 0) {
107
+ parts.push(this.silenceGenerator.generate(leadingSilenceMs, reference.format, reference.sampleRateHz, reference.channels ?? 1));
108
+ }
109
+ parts.push(segment.audio);
110
+ const trailingSilenceMs = directive?.trailingSilenceMs ?? (i < segments.length - 1 ? defaultPauseMs : 0);
111
+ if (trailingSilenceMs > 0) {
112
+ parts.push(this.silenceGenerator.generate(trailingSilenceMs, reference.format, reference.sampleRateHz, reference.channels ?? 1));
113
+ }
114
+ }
115
+ return this.concatenator.concatenate(parts);
116
+ }
117
+ }
118
+ export {
119
+ AudioAssembler
120
+ };
@@ -0,0 +1,134 @@
1
+ // src/tts/pace-analyzer.ts
2
+ var CONTENT_TYPE_PACING = {
3
+ intro: {
4
+ rate: 0.95,
5
+ emphasis: "normal",
6
+ tone: "authoritative",
7
+ leadingSilenceMs: 0,
8
+ trailingSilenceMs: 500
9
+ },
10
+ problem: {
11
+ rate: 0.9,
12
+ emphasis: "strong",
13
+ tone: "urgent",
14
+ leadingSilenceMs: 300,
15
+ trailingSilenceMs: 500
16
+ },
17
+ solution: {
18
+ rate: 1,
19
+ emphasis: "normal",
20
+ tone: "calm",
21
+ leadingSilenceMs: 300,
22
+ trailingSilenceMs: 500
23
+ },
24
+ metric: {
25
+ rate: 0.85,
26
+ emphasis: "strong",
27
+ tone: "excited",
28
+ leadingSilenceMs: 300,
29
+ trailingSilenceMs: 600
30
+ },
31
+ cta: {
32
+ rate: 0.9,
33
+ emphasis: "strong",
34
+ tone: "authoritative",
35
+ leadingSilenceMs: 400,
36
+ trailingSilenceMs: 0
37
+ },
38
+ transition: {
39
+ rate: 1.1,
40
+ emphasis: "reduced",
41
+ tone: "neutral",
42
+ leadingSilenceMs: 200,
43
+ trailingSilenceMs: 300
44
+ }
45
+ };
46
+
47
+ class PaceAnalyzer {
48
+ analyze(segments, baseRate = 1) {
49
+ return segments.map((segment) => {
50
+ const defaults = CONTENT_TYPE_PACING[segment.contentType];
51
+ return {
52
+ sceneId: segment.sceneId,
53
+ rate: defaults.rate * baseRate,
54
+ emphasis: defaults.emphasis,
55
+ tone: defaults.tone,
56
+ leadingSilenceMs: defaults.leadingSilenceMs,
57
+ trailingSilenceMs: defaults.trailingSilenceMs
58
+ };
59
+ });
60
+ }
61
+ getDefaults(contentType) {
62
+ return { ...CONTENT_TYPE_PACING[contentType] };
63
+ }
64
+ }
65
+
66
+ // src/tts/emphasis-planner.ts
67
+ class EmphasisPlanner {
68
+ llm;
69
+ model;
70
+ paceAnalyzer;
71
+ constructor(options) {
72
+ this.llm = options?.llm;
73
+ this.model = options?.model;
74
+ this.paceAnalyzer = new PaceAnalyzer;
75
+ }
76
+ async plan(segments, baseRate = 1) {
77
+ if (!this.llm) {
78
+ return this.paceAnalyzer.analyze(segments, baseRate);
79
+ }
80
+ try {
81
+ return await this.planWithLlm(segments, baseRate);
82
+ } catch {
83
+ return this.paceAnalyzer.analyze(segments, baseRate);
84
+ }
85
+ }
86
+ async planWithLlm(segments, baseRate) {
87
+ if (!this.llm) {
88
+ return this.paceAnalyzer.analyze(segments, baseRate);
89
+ }
90
+ const response = await this.llm.chat([
91
+ {
92
+ role: "system",
93
+ content: [
94
+ {
95
+ type: "text",
96
+ text: [
97
+ "You are a voice director planning emphasis and pacing for TTS narration.",
98
+ "For each segment, return a JSON array of directives.",
99
+ "Each directive has: sceneId, rate (0.7-1.3), emphasis (reduced|normal|strong),",
100
+ "tone (neutral|urgent|excited|calm|authoritative), leadingSilenceMs, trailingSilenceMs.",
101
+ "Return ONLY a JSON array, no other text."
102
+ ].join(`
103
+ `)
104
+ }
105
+ ]
106
+ },
107
+ {
108
+ role: "user",
109
+ content: [
110
+ {
111
+ type: "text",
112
+ text: JSON.stringify(segments.map((s) => ({
113
+ sceneId: s.sceneId,
114
+ text: s.text,
115
+ contentType: s.contentType
116
+ })))
117
+ }
118
+ ]
119
+ }
120
+ ], { model: this.model, temperature: 0.3, responseFormat: "json" });
121
+ const text = response.message.content.find((p) => p.type === "text");
122
+ if (!text || text.type !== "text") {
123
+ return this.paceAnalyzer.analyze(segments, baseRate);
124
+ }
125
+ const parsed = JSON.parse(text.text);
126
+ return parsed.map((d) => ({
127
+ ...d,
128
+ rate: d.rate * baseRate
129
+ }));
130
+ }
131
+ }
132
+ export {
133
+ EmphasisPlanner
134
+ };