@contractspec/lib.voice 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/dist/audio/audio-concatenator.d.ts +15 -0
  2. package/dist/audio/audio-concatenator.js +57 -0
  3. package/dist/audio/duration-estimator.d.ts +31 -0
  4. package/dist/audio/duration-estimator.js +22 -0
  5. package/dist/audio/format-converter.d.ts +17 -0
  6. package/dist/audio/format-converter.js +28 -0
  7. package/dist/audio/index.d.ts +4 -0
  8. package/dist/audio/index.js +121 -0
  9. package/dist/audio/silence-generator.d.ts +16 -0
  10. package/dist/audio/silence-generator.js +20 -0
  11. package/dist/browser/audio/audio-concatenator.js +56 -0
  12. package/dist/browser/audio/duration-estimator.js +21 -0
  13. package/dist/browser/audio/format-converter.js +27 -0
  14. package/dist/browser/audio/index.js +120 -0
  15. package/dist/browser/audio/silence-generator.js +19 -0
  16. package/dist/browser/conversational/index.js +241 -0
  17. package/dist/browser/conversational/response-orchestrator.js +62 -0
  18. package/dist/browser/conversational/transcript-builder.js +63 -0
  19. package/dist/browser/conversational/turn-detector.js +43 -0
  20. package/dist/browser/conversational/types.js +0 -0
  21. package/dist/browser/conversational/voice-session-manager.js +137 -0
  22. package/dist/browser/docs/conversational.docblock.js +5 -0
  23. package/dist/browser/docs/stt.docblock.js +5 -0
  24. package/dist/browser/docs/sync.docblock.js +5 -0
  25. package/dist/browser/docs/tts.docblock.js +5 -0
  26. package/dist/browser/docs/voice.docblock.js +5 -0
  27. package/dist/browser/i18n/catalogs/en.js +91 -0
  28. package/dist/browser/i18n/catalogs/es.js +91 -0
  29. package/dist/browser/i18n/catalogs/fr.js +91 -0
  30. package/dist/browser/i18n/catalogs/index.js +271 -0
  31. package/dist/browser/i18n/index.js +335 -0
  32. package/dist/browser/i18n/keys.js +38 -0
  33. package/dist/browser/i18n/locale.js +13 -0
  34. package/dist/browser/i18n/messages.js +283 -0
  35. package/dist/browser/index.js +1070 -0
  36. package/dist/browser/stt/diarization-mapper.js +42 -0
  37. package/dist/browser/stt/index.js +222 -0
  38. package/dist/browser/stt/segment-splitter.js +36 -0
  39. package/dist/browser/stt/subtitle-formatter.js +51 -0
  40. package/dist/browser/stt/transcriber.js +219 -0
  41. package/dist/browser/stt/types.js +0 -0
  42. package/dist/browser/sync/duration-negotiator.js +69 -0
  43. package/dist/browser/sync/index.js +165 -0
  44. package/dist/browser/sync/scene-adapter.js +52 -0
  45. package/dist/browser/sync/timing-calculator.js +46 -0
  46. package/dist/browser/tts/audio-assembler.js +120 -0
  47. package/dist/browser/tts/emphasis-planner.js +134 -0
  48. package/dist/browser/tts/index.js +439 -0
  49. package/dist/browser/tts/pace-analyzer.js +67 -0
  50. package/dist/browser/tts/segment-synthesizer.js +36 -0
  51. package/dist/browser/tts/types.js +0 -0
  52. package/dist/browser/tts/voice-synthesizer.js +435 -0
  53. package/dist/browser/types.js +0 -0
  54. package/dist/conversational/index.d.ts +5 -0
  55. package/dist/conversational/index.js +242 -0
  56. package/dist/conversational/response-orchestrator.d.ts +26 -0
  57. package/dist/conversational/response-orchestrator.js +63 -0
  58. package/dist/conversational/transcript-builder.d.ts +25 -0
  59. package/dist/conversational/transcript-builder.js +64 -0
  60. package/dist/conversational/turn-detector.d.ts +31 -0
  61. package/dist/conversational/turn-detector.js +44 -0
  62. package/dist/conversational/types.d.ts +55 -0
  63. package/dist/conversational/types.js +1 -0
  64. package/dist/conversational/voice-session-manager.d.ts +17 -0
  65. package/dist/conversational/voice-session-manager.js +138 -0
  66. package/dist/docs/conversational.docblock.d.ts +14 -0
  67. package/dist/docs/conversational.docblock.js +6 -0
  68. package/dist/docs/stt.docblock.d.ts +12 -0
  69. package/dist/docs/stt.docblock.js +6 -0
  70. package/dist/docs/sync.docblock.d.ts +12 -0
  71. package/dist/docs/sync.docblock.js +6 -0
  72. package/dist/docs/tts.docblock.d.ts +12 -0
  73. package/dist/docs/tts.docblock.js +6 -0
  74. package/dist/docs/voice.docblock.d.ts +22 -0
  75. package/dist/docs/voice.docblock.js +6 -0
  76. package/dist/i18n/catalogs/en.d.ts +6 -0
  77. package/dist/i18n/catalogs/en.js +92 -0
  78. package/dist/i18n/catalogs/es.d.ts +4 -0
  79. package/dist/i18n/catalogs/es.js +92 -0
  80. package/dist/i18n/catalogs/fr.d.ts +4 -0
  81. package/dist/i18n/catalogs/fr.js +92 -0
  82. package/dist/i18n/catalogs/index.d.ts +3 -0
  83. package/dist/i18n/catalogs/index.js +272 -0
  84. package/dist/i18n/index.d.ts +20 -0
  85. package/dist/i18n/index.js +336 -0
  86. package/dist/i18n/keys.d.ts +50 -0
  87. package/dist/i18n/keys.js +39 -0
  88. package/dist/i18n/locale.d.ts +6 -0
  89. package/dist/i18n/locale.js +14 -0
  90. package/dist/i18n/messages.d.ts +13 -0
  91. package/dist/i18n/messages.js +284 -0
  92. package/dist/index.d.ts +6 -0
  93. package/dist/index.js +1071 -0
  94. package/dist/node/audio/audio-concatenator.js +56 -0
  95. package/dist/node/audio/duration-estimator.js +21 -0
  96. package/dist/node/audio/format-converter.js +27 -0
  97. package/dist/node/audio/index.js +120 -0
  98. package/dist/node/audio/silence-generator.js +19 -0
  99. package/dist/node/conversational/index.js +241 -0
  100. package/dist/node/conversational/response-orchestrator.js +62 -0
  101. package/dist/node/conversational/transcript-builder.js +63 -0
  102. package/dist/node/conversational/turn-detector.js +43 -0
  103. package/dist/node/conversational/types.js +0 -0
  104. package/dist/node/conversational/voice-session-manager.js +137 -0
  105. package/dist/node/docs/conversational.docblock.js +5 -0
  106. package/dist/node/docs/stt.docblock.js +5 -0
  107. package/dist/node/docs/sync.docblock.js +5 -0
  108. package/dist/node/docs/tts.docblock.js +5 -0
  109. package/dist/node/docs/voice.docblock.js +5 -0
  110. package/dist/node/i18n/catalogs/en.js +91 -0
  111. package/dist/node/i18n/catalogs/es.js +91 -0
  112. package/dist/node/i18n/catalogs/fr.js +91 -0
  113. package/dist/node/i18n/catalogs/index.js +271 -0
  114. package/dist/node/i18n/index.js +335 -0
  115. package/dist/node/i18n/keys.js +38 -0
  116. package/dist/node/i18n/locale.js +13 -0
  117. package/dist/node/i18n/messages.js +283 -0
  118. package/dist/node/index.js +1070 -0
  119. package/dist/node/stt/diarization-mapper.js +42 -0
  120. package/dist/node/stt/index.js +222 -0
  121. package/dist/node/stt/segment-splitter.js +36 -0
  122. package/dist/node/stt/subtitle-formatter.js +51 -0
  123. package/dist/node/stt/transcriber.js +219 -0
  124. package/dist/node/stt/types.js +0 -0
  125. package/dist/node/sync/duration-negotiator.js +69 -0
  126. package/dist/node/sync/index.js +165 -0
  127. package/dist/node/sync/scene-adapter.js +52 -0
  128. package/dist/node/sync/timing-calculator.js +46 -0
  129. package/dist/node/tts/audio-assembler.js +120 -0
  130. package/dist/node/tts/emphasis-planner.js +134 -0
  131. package/dist/node/tts/index.js +439 -0
  132. package/dist/node/tts/pace-analyzer.js +67 -0
  133. package/dist/node/tts/segment-synthesizer.js +36 -0
  134. package/dist/node/tts/types.js +0 -0
  135. package/dist/node/tts/voice-synthesizer.js +435 -0
  136. package/dist/node/types.js +0 -0
  137. package/dist/stt/diarization-mapper.d.ts +19 -0
  138. package/dist/stt/diarization-mapper.js +43 -0
  139. package/dist/stt/index.d.ts +5 -0
  140. package/dist/stt/index.js +223 -0
  141. package/dist/stt/segment-splitter.d.ts +19 -0
  142. package/dist/stt/segment-splitter.js +37 -0
  143. package/dist/stt/subtitle-formatter.d.ts +19 -0
  144. package/dist/stt/subtitle-formatter.js +52 -0
  145. package/dist/stt/transcriber.d.ts +21 -0
  146. package/dist/stt/transcriber.js +220 -0
  147. package/dist/stt/types.d.ts +44 -0
  148. package/dist/stt/types.js +1 -0
  149. package/dist/sync/duration-negotiator.d.ts +37 -0
  150. package/dist/sync/duration-negotiator.js +70 -0
  151. package/dist/sync/index.d.ts +3 -0
  152. package/dist/sync/index.js +166 -0
  153. package/dist/sync/scene-adapter.d.ts +29 -0
  154. package/dist/sync/scene-adapter.js +53 -0
  155. package/dist/sync/timing-calculator.d.ts +21 -0
  156. package/dist/sync/timing-calculator.js +47 -0
  157. package/dist/tts/audio-assembler.d.ts +19 -0
  158. package/dist/tts/audio-assembler.js +121 -0
  159. package/dist/tts/emphasis-planner.d.ts +24 -0
  160. package/dist/tts/emphasis-planner.js +135 -0
  161. package/dist/tts/index.d.ts +6 -0
  162. package/dist/tts/index.js +440 -0
  163. package/dist/tts/pace-analyzer.d.ts +30 -0
  164. package/dist/tts/pace-analyzer.js +68 -0
  165. package/dist/tts/segment-synthesizer.d.ts +21 -0
  166. package/dist/tts/segment-synthesizer.js +37 -0
  167. package/dist/tts/types.d.ts +76 -0
  168. package/dist/tts/types.js +1 -0
  169. package/dist/tts/voice-synthesizer.d.ts +28 -0
  170. package/dist/tts/voice-synthesizer.js +436 -0
  171. package/dist/types.d.ts +12 -0
  172. package/dist/types.js +1 -0
  173. package/package.json +760 -0
@@ -0,0 +1,241 @@
1
+ // src/conversational/transcript-builder.ts
2
+ class TranscriptBuilder {
3
+ turns = [];
4
+ currentTurn = null;
5
+ sessionStartMs = Date.now();
6
+ getTranscript() {
7
+ return [...this.turns];
8
+ }
9
+ toText() {
10
+ return this.turns.map((t) => `[${t.role}] ${t.text}`).join(`
11
+ `);
12
+ }
13
+ getTurnCount() {
14
+ return this.turns.length;
15
+ }
16
+ processEvent(event) {
17
+ switch (event.type) {
18
+ case "session_started":
19
+ this.sessionStartMs = Date.now();
20
+ break;
21
+ case "user_speech_started":
22
+ this.currentTurn = {
23
+ role: "user",
24
+ startMs: Date.now() - this.sessionStartMs
25
+ };
26
+ break;
27
+ case "user_speech_ended":
28
+ if (this.currentTurn && this.currentTurn.role === "user") {
29
+ this.currentTurn.text = event.transcript;
30
+ this.currentTurn.endMs = Date.now() - this.sessionStartMs;
31
+ this.turns.push(this.currentTurn);
32
+ this.currentTurn = null;
33
+ }
34
+ break;
35
+ case "agent_speech_started":
36
+ this.currentTurn = {
37
+ role: "agent",
38
+ text: event.text,
39
+ startMs: Date.now() - this.sessionStartMs
40
+ };
41
+ break;
42
+ case "agent_speech_ended":
43
+ if (this.currentTurn && this.currentTurn.role === "agent") {
44
+ this.currentTurn.endMs = Date.now() - this.sessionStartMs;
45
+ this.turns.push(this.currentTurn);
46
+ this.currentTurn = null;
47
+ }
48
+ break;
49
+ case "transcript":
50
+ break;
51
+ default:
52
+ break;
53
+ }
54
+ }
55
+ reset() {
56
+ this.turns.length = 0;
57
+ this.currentTurn = null;
58
+ this.sessionStartMs = Date.now();
59
+ }
60
+ }
61
+
62
+ // src/conversational/voice-session-manager.ts
63
+ class VoiceSessionManager {
64
+ provider;
65
+ constructor(options) {
66
+ this.provider = options.conversational;
67
+ }
68
+ async startSession(config) {
69
+ const transcriptBuilder = new TranscriptBuilder;
70
+ const session = await this.provider.startSession({
71
+ voiceId: config.voiceId,
72
+ language: config.language,
73
+ systemPrompt: config.systemPrompt,
74
+ llmModel: config.llmModel,
75
+ inputFormat: config.inputFormat,
76
+ outputFormat: config.outputFormat,
77
+ turnDetection: config.turnDetection,
78
+ silenceThresholdMs: config.silenceThresholdMs,
79
+ maxDurationSeconds: config.maxDurationSeconds
80
+ });
81
+ const state = {
82
+ sessionId: "",
83
+ status: "connecting",
84
+ currentTurn: "idle",
85
+ turnCount: 0,
86
+ durationMs: 0,
87
+ transcript: []
88
+ };
89
+ const wrappedEvents = this.wrapEvents(session.events, state, transcriptBuilder);
90
+ return {
91
+ state,
92
+ sendAudio: (chunk) => session.sendAudio(chunk),
93
+ sendText: (text) => session.sendText(text),
94
+ interrupt: () => session.interrupt(),
95
+ close: async () => {
96
+ const summary = await session.close();
97
+ state.status = "ended";
98
+ return summary;
99
+ },
100
+ events: wrappedEvents
101
+ };
102
+ }
103
+ async* wrapEvents(events, state, transcriptBuilder) {
104
+ for await (const event of events) {
105
+ transcriptBuilder.processEvent(event);
106
+ switch (event.type) {
107
+ case "session_started":
108
+ state.sessionId = event.sessionId;
109
+ state.status = "active";
110
+ break;
111
+ case "user_speech_started":
112
+ state.currentTurn = "user";
113
+ break;
114
+ case "user_speech_ended":
115
+ state.currentTurn = "idle";
116
+ state.turnCount += 1;
117
+ break;
118
+ case "agent_speech_started":
119
+ state.currentTurn = "agent";
120
+ break;
121
+ case "agent_speech_ended":
122
+ state.currentTurn = "idle";
123
+ state.turnCount += 1;
124
+ break;
125
+ case "session_ended":
126
+ state.status = "ended";
127
+ state.durationMs = event.durationMs;
128
+ break;
129
+ }
130
+ state.transcript = transcriptBuilder.getTranscript();
131
+ yield event;
132
+ }
133
+ }
134
+ }
135
+
136
+ // src/conversational/turn-detector.ts
137
+ class TurnDetector {
138
+ silenceThresholdMs;
139
+ energyThreshold;
140
+ silenceStartMs = null;
141
+ constructor(silenceThresholdMs = 800, energyThreshold = 0.01) {
142
+ this.silenceThresholdMs = silenceThresholdMs;
143
+ this.energyThreshold = energyThreshold;
144
+ }
145
+ processChunk(chunk, timestampMs) {
146
+ const energy = this.calculateEnergy(chunk);
147
+ const isSpeech = energy > this.energyThreshold;
148
+ if (isSpeech) {
149
+ this.silenceStartMs = null;
150
+ return false;
151
+ }
152
+ if (this.silenceStartMs === null) {
153
+ this.silenceStartMs = timestampMs;
154
+ }
155
+ const silenceDurationMs = timestampMs - this.silenceStartMs;
156
+ return silenceDurationMs >= this.silenceThresholdMs;
157
+ }
158
+ reset() {
159
+ this.silenceStartMs = null;
160
+ }
161
+ calculateEnergy(chunk) {
162
+ if (chunk.length < 2)
163
+ return 0;
164
+ let sum = 0;
165
+ const sampleCount = Math.floor(chunk.length / 2);
166
+ for (let i = 0;i < chunk.length - 1; i += 2) {
167
+ const low = chunk[i] ?? 0;
168
+ const high = chunk[i + 1] ?? 0;
169
+ const sample = (low | high << 8) << 16 >> 16;
170
+ const normalized = sample / 32768;
171
+ sum += normalized * normalized;
172
+ }
173
+ return Math.sqrt(sum / sampleCount);
174
+ }
175
+ }
176
+
177
+ // src/conversational/response-orchestrator.ts
178
+ class ResponseOrchestrator {
179
+ stt;
180
+ llm;
181
+ tts;
182
+ conversationHistory = [];
183
+ constructor(stt, llm, tts) {
184
+ this.stt = stt;
185
+ this.llm = llm;
186
+ this.tts = tts;
187
+ }
188
+ async* processUserTurn(userAudio, config) {
189
+ const transcription = await this.stt.transcribe({
190
+ audio: userAudio,
191
+ language: config.language,
192
+ wordTimestamps: false
193
+ });
194
+ const userText = transcription.text;
195
+ yield { type: "user_speech_ended", transcript: userText };
196
+ yield {
197
+ type: "transcript",
198
+ role: "user",
199
+ text: userText,
200
+ timestamp: Date.now()
201
+ };
202
+ this.conversationHistory.push({ role: "user", content: userText });
203
+ const llmResponse = await this.llm.chat([
204
+ {
205
+ role: "system",
206
+ content: [{ type: "text", text: config.systemPrompt }]
207
+ },
208
+ ...this.conversationHistory.map((msg) => ({
209
+ role: msg.role,
210
+ content: [{ type: "text", text: msg.content }]
211
+ }))
212
+ ], { model: config.llmModel });
213
+ const responseText = llmResponse.message.content.find((p) => p.type === "text");
214
+ const agentText = responseText && responseText.type === "text" ? responseText.text : "I apologize, I could not generate a response.";
215
+ this.conversationHistory.push({ role: "assistant", content: agentText });
216
+ yield { type: "agent_speech_started", text: agentText };
217
+ const synthesis = await this.tts.synthesize({
218
+ text: agentText,
219
+ voiceId: config.voiceId,
220
+ language: config.language,
221
+ format: config.outputFormat
222
+ });
223
+ yield { type: "agent_audio", audio: synthesis.audio.data };
224
+ yield { type: "agent_speech_ended" };
225
+ yield {
226
+ type: "transcript",
227
+ role: "agent",
228
+ text: agentText,
229
+ timestamp: Date.now()
230
+ };
231
+ }
232
+ reset() {
233
+ this.conversationHistory.length = 0;
234
+ }
235
+ }
236
+ export {
237
+ VoiceSessionManager,
238
+ TurnDetector,
239
+ TranscriptBuilder,
240
+ ResponseOrchestrator
241
+ };
@@ -0,0 +1,62 @@
1
+ // src/conversational/response-orchestrator.ts
2
+ class ResponseOrchestrator {
3
+ stt;
4
+ llm;
5
+ tts;
6
+ conversationHistory = [];
7
+ constructor(stt, llm, tts) {
8
+ this.stt = stt;
9
+ this.llm = llm;
10
+ this.tts = tts;
11
+ }
12
+ async* processUserTurn(userAudio, config) {
13
+ const transcription = await this.stt.transcribe({
14
+ audio: userAudio,
15
+ language: config.language,
16
+ wordTimestamps: false
17
+ });
18
+ const userText = transcription.text;
19
+ yield { type: "user_speech_ended", transcript: userText };
20
+ yield {
21
+ type: "transcript",
22
+ role: "user",
23
+ text: userText,
24
+ timestamp: Date.now()
25
+ };
26
+ this.conversationHistory.push({ role: "user", content: userText });
27
+ const llmResponse = await this.llm.chat([
28
+ {
29
+ role: "system",
30
+ content: [{ type: "text", text: config.systemPrompt }]
31
+ },
32
+ ...this.conversationHistory.map((msg) => ({
33
+ role: msg.role,
34
+ content: [{ type: "text", text: msg.content }]
35
+ }))
36
+ ], { model: config.llmModel });
37
+ const responseText = llmResponse.message.content.find((p) => p.type === "text");
38
+ const agentText = responseText && responseText.type === "text" ? responseText.text : "I apologize, I could not generate a response.";
39
+ this.conversationHistory.push({ role: "assistant", content: agentText });
40
+ yield { type: "agent_speech_started", text: agentText };
41
+ const synthesis = await this.tts.synthesize({
42
+ text: agentText,
43
+ voiceId: config.voiceId,
44
+ language: config.language,
45
+ format: config.outputFormat
46
+ });
47
+ yield { type: "agent_audio", audio: synthesis.audio.data };
48
+ yield { type: "agent_speech_ended" };
49
+ yield {
50
+ type: "transcript",
51
+ role: "agent",
52
+ text: agentText,
53
+ timestamp: Date.now()
54
+ };
55
+ }
56
+ reset() {
57
+ this.conversationHistory.length = 0;
58
+ }
59
+ }
60
+ export {
61
+ ResponseOrchestrator
62
+ };
@@ -0,0 +1,63 @@
1
+ // src/conversational/transcript-builder.ts
2
+ class TranscriptBuilder {
3
+ turns = [];
4
+ currentTurn = null;
5
+ sessionStartMs = Date.now();
6
+ getTranscript() {
7
+ return [...this.turns];
8
+ }
9
+ toText() {
10
+ return this.turns.map((t) => `[${t.role}] ${t.text}`).join(`
11
+ `);
12
+ }
13
+ getTurnCount() {
14
+ return this.turns.length;
15
+ }
16
+ processEvent(event) {
17
+ switch (event.type) {
18
+ case "session_started":
19
+ this.sessionStartMs = Date.now();
20
+ break;
21
+ case "user_speech_started":
22
+ this.currentTurn = {
23
+ role: "user",
24
+ startMs: Date.now() - this.sessionStartMs
25
+ };
26
+ break;
27
+ case "user_speech_ended":
28
+ if (this.currentTurn && this.currentTurn.role === "user") {
29
+ this.currentTurn.text = event.transcript;
30
+ this.currentTurn.endMs = Date.now() - this.sessionStartMs;
31
+ this.turns.push(this.currentTurn);
32
+ this.currentTurn = null;
33
+ }
34
+ break;
35
+ case "agent_speech_started":
36
+ this.currentTurn = {
37
+ role: "agent",
38
+ text: event.text,
39
+ startMs: Date.now() - this.sessionStartMs
40
+ };
41
+ break;
42
+ case "agent_speech_ended":
43
+ if (this.currentTurn && this.currentTurn.role === "agent") {
44
+ this.currentTurn.endMs = Date.now() - this.sessionStartMs;
45
+ this.turns.push(this.currentTurn);
46
+ this.currentTurn = null;
47
+ }
48
+ break;
49
+ case "transcript":
50
+ break;
51
+ default:
52
+ break;
53
+ }
54
+ }
55
+ reset() {
56
+ this.turns.length = 0;
57
+ this.currentTurn = null;
58
+ this.sessionStartMs = Date.now();
59
+ }
60
+ }
61
+ export {
62
+ TranscriptBuilder
63
+ };
@@ -0,0 +1,43 @@
1
+ // src/conversational/turn-detector.ts
2
+ class TurnDetector {
3
+ silenceThresholdMs;
4
+ energyThreshold;
5
+ silenceStartMs = null;
6
+ constructor(silenceThresholdMs = 800, energyThreshold = 0.01) {
7
+ this.silenceThresholdMs = silenceThresholdMs;
8
+ this.energyThreshold = energyThreshold;
9
+ }
10
+ processChunk(chunk, timestampMs) {
11
+ const energy = this.calculateEnergy(chunk);
12
+ const isSpeech = energy > this.energyThreshold;
13
+ if (isSpeech) {
14
+ this.silenceStartMs = null;
15
+ return false;
16
+ }
17
+ if (this.silenceStartMs === null) {
18
+ this.silenceStartMs = timestampMs;
19
+ }
20
+ const silenceDurationMs = timestampMs - this.silenceStartMs;
21
+ return silenceDurationMs >= this.silenceThresholdMs;
22
+ }
23
+ reset() {
24
+ this.silenceStartMs = null;
25
+ }
26
+ calculateEnergy(chunk) {
27
+ if (chunk.length < 2)
28
+ return 0;
29
+ let sum = 0;
30
+ const sampleCount = Math.floor(chunk.length / 2);
31
+ for (let i = 0;i < chunk.length - 1; i += 2) {
32
+ const low = chunk[i] ?? 0;
33
+ const high = chunk[i + 1] ?? 0;
34
+ const sample = (low | high << 8) << 16 >> 16;
35
+ const normalized = sample / 32768;
36
+ sum += normalized * normalized;
37
+ }
38
+ return Math.sqrt(sum / sampleCount);
39
+ }
40
+ }
41
+ export {
42
+ TurnDetector
43
+ };
File without changes
@@ -0,0 +1,137 @@
1
+ // src/conversational/transcript-builder.ts
2
+ class TranscriptBuilder {
3
+ turns = [];
4
+ currentTurn = null;
5
+ sessionStartMs = Date.now();
6
+ getTranscript() {
7
+ return [...this.turns];
8
+ }
9
+ toText() {
10
+ return this.turns.map((t) => `[${t.role}] ${t.text}`).join(`
11
+ `);
12
+ }
13
+ getTurnCount() {
14
+ return this.turns.length;
15
+ }
16
+ processEvent(event) {
17
+ switch (event.type) {
18
+ case "session_started":
19
+ this.sessionStartMs = Date.now();
20
+ break;
21
+ case "user_speech_started":
22
+ this.currentTurn = {
23
+ role: "user",
24
+ startMs: Date.now() - this.sessionStartMs
25
+ };
26
+ break;
27
+ case "user_speech_ended":
28
+ if (this.currentTurn && this.currentTurn.role === "user") {
29
+ this.currentTurn.text = event.transcript;
30
+ this.currentTurn.endMs = Date.now() - this.sessionStartMs;
31
+ this.turns.push(this.currentTurn);
32
+ this.currentTurn = null;
33
+ }
34
+ break;
35
+ case "agent_speech_started":
36
+ this.currentTurn = {
37
+ role: "agent",
38
+ text: event.text,
39
+ startMs: Date.now() - this.sessionStartMs
40
+ };
41
+ break;
42
+ case "agent_speech_ended":
43
+ if (this.currentTurn && this.currentTurn.role === "agent") {
44
+ this.currentTurn.endMs = Date.now() - this.sessionStartMs;
45
+ this.turns.push(this.currentTurn);
46
+ this.currentTurn = null;
47
+ }
48
+ break;
49
+ case "transcript":
50
+ break;
51
+ default:
52
+ break;
53
+ }
54
+ }
55
+ reset() {
56
+ this.turns.length = 0;
57
+ this.currentTurn = null;
58
+ this.sessionStartMs = Date.now();
59
+ }
60
+ }
61
+
62
+ // src/conversational/voice-session-manager.ts
63
+ class VoiceSessionManager {
64
+ provider;
65
+ constructor(options) {
66
+ this.provider = options.conversational;
67
+ }
68
+ async startSession(config) {
69
+ const transcriptBuilder = new TranscriptBuilder;
70
+ const session = await this.provider.startSession({
71
+ voiceId: config.voiceId,
72
+ language: config.language,
73
+ systemPrompt: config.systemPrompt,
74
+ llmModel: config.llmModel,
75
+ inputFormat: config.inputFormat,
76
+ outputFormat: config.outputFormat,
77
+ turnDetection: config.turnDetection,
78
+ silenceThresholdMs: config.silenceThresholdMs,
79
+ maxDurationSeconds: config.maxDurationSeconds
80
+ });
81
+ const state = {
82
+ sessionId: "",
83
+ status: "connecting",
84
+ currentTurn: "idle",
85
+ turnCount: 0,
86
+ durationMs: 0,
87
+ transcript: []
88
+ };
89
+ const wrappedEvents = this.wrapEvents(session.events, state, transcriptBuilder);
90
+ return {
91
+ state,
92
+ sendAudio: (chunk) => session.sendAudio(chunk),
93
+ sendText: (text) => session.sendText(text),
94
+ interrupt: () => session.interrupt(),
95
+ close: async () => {
96
+ const summary = await session.close();
97
+ state.status = "ended";
98
+ return summary;
99
+ },
100
+ events: wrappedEvents
101
+ };
102
+ }
103
+ async* wrapEvents(events, state, transcriptBuilder) {
104
+ for await (const event of events) {
105
+ transcriptBuilder.processEvent(event);
106
+ switch (event.type) {
107
+ case "session_started":
108
+ state.sessionId = event.sessionId;
109
+ state.status = "active";
110
+ break;
111
+ case "user_speech_started":
112
+ state.currentTurn = "user";
113
+ break;
114
+ case "user_speech_ended":
115
+ state.currentTurn = "idle";
116
+ state.turnCount += 1;
117
+ break;
118
+ case "agent_speech_started":
119
+ state.currentTurn = "agent";
120
+ break;
121
+ case "agent_speech_ended":
122
+ state.currentTurn = "idle";
123
+ state.turnCount += 1;
124
+ break;
125
+ case "session_ended":
126
+ state.status = "ended";
127
+ state.durationMs = event.durationMs;
128
+ break;
129
+ }
130
+ state.transcript = transcriptBuilder.getTranscript();
131
+ yield event;
132
+ }
133
+ }
134
+ }
135
+ export {
136
+ VoiceSessionManager
137
+ };
@@ -0,0 +1,5 @@
1
+ // src/docs/conversational.docblock.ts
2
+ var conversationalDocblock = true;
3
+ export {
4
+ conversationalDocblock
5
+ };
@@ -0,0 +1,5 @@
1
+ // src/docs/stt.docblock.ts
2
+ var sttDocblock = true;
3
+ export {
4
+ sttDocblock
5
+ };
@@ -0,0 +1,5 @@
1
+ // src/docs/sync.docblock.ts
2
+ var syncDocblock = true;
3
+ export {
4
+ syncDocblock
5
+ };
@@ -0,0 +1,5 @@
1
+ // src/docs/tts.docblock.ts
2
+ var ttsDocblock = true;
3
+ export {
4
+ ttsDocblock
5
+ };
@@ -0,0 +1,5 @@
1
+ // src/docs/voice.docblock.ts
2
+ var voiceDocblock = true;
3
+ export {
4
+ voiceDocblock
5
+ };
@@ -0,0 +1,91 @@
1
+ // src/i18n/catalogs/en.ts
2
+ import { defineTranslation } from "@contractspec/lib.contracts-spec/translations";
3
+ var enMessages = defineTranslation({
4
+ meta: {
5
+ key: "voice.messages",
6
+ version: "1.0.0",
7
+ domain: "voice",
8
+ description: "All user-facing, LLM-facing, and developer-facing strings for the voice package",
9
+ owners: ["platform"],
10
+ stability: "experimental"
11
+ },
12
+ locale: "en",
13
+ fallback: "en",
14
+ messages: {
15
+ "prompt.tts.system": {
16
+ value: `You are a voice narration script writer.
17
+ Analyze the content and produce a narration script with pacing directives.
18
+ Return JSON with segments, each having sceneId, text, and contentType.`,
19
+ description: "TTS system prompt for LLM-enhanced script generation"
20
+ },
21
+ "prompt.pace.sceneMatched": {
22
+ value: "Match voice pacing to scene durations. Adjust rate and emphasis per segment to fit the video timeline.",
23
+ description: "Prompt for scene-matched pacing strategy"
24
+ },
25
+ "prompt.emphasis.system": {
26
+ value: "You are a voice director. For each segment, determine the optimal emphasis, tone, and speaking rate.",
27
+ description: "Emphasis planner LLM system prompt"
28
+ },
29
+ "pace.intro.description": {
30
+ value: "Authoritative opening at a measured pace",
31
+ description: "Description for intro pacing"
32
+ },
33
+ "pace.problem.description": {
34
+ value: "Urgent emphasis on the challenge",
35
+ description: "Description for problem pacing"
36
+ },
37
+ "pace.solution.description": {
38
+ value: "Calm, clear delivery of the solution",
39
+ description: "Description for solution pacing"
40
+ },
41
+ "pace.metric.description": {
42
+ value: "Excited emphasis on key results",
43
+ description: "Description for metric pacing"
44
+ },
45
+ "pace.cta.description": {
46
+ value: "Authoritative call to action",
47
+ description: "Description for CTA pacing"
48
+ },
49
+ "pace.transition.description": {
50
+ value: "Quick neutral transition",
51
+ description: "Description for transition pacing"
52
+ },
53
+ "stt.transcribing": {
54
+ value: "Transcribing audio...",
55
+ description: "Status message during transcription"
56
+ },
57
+ "stt.diarization.speaker": {
58
+ value: "Speaker {index}",
59
+ description: "Default speaker label",
60
+ placeholders: [{ name: "index", type: "number" }]
61
+ },
62
+ "stt.subtitle.timestamp": {
63
+ value: "{start} --> {end}",
64
+ description: "Subtitle timestamp format",
65
+ placeholders: [
66
+ { name: "start", type: "string" },
67
+ { name: "end", type: "string" }
68
+ ]
69
+ },
70
+ "conv.session.started": {
71
+ value: "Voice session started",
72
+ description: "Session start notification"
73
+ },
74
+ "conv.turn.user": {
75
+ value: "User is speaking",
76
+ description: "User turn indicator"
77
+ },
78
+ "conv.turn.agent": {
79
+ value: "Agent is responding",
80
+ description: "Agent turn indicator"
81
+ },
82
+ "conv.session.ended": {
83
+ value: "Voice session ended. Duration: {durationMs}ms",
84
+ description: "Session end notification",
85
+ placeholders: [{ name: "durationMs", type: "number" }]
86
+ }
87
+ }
88
+ });
89
+ export {
90
+ enMessages
91
+ };