@pie-players/pie-section-player 0.3.17 → 0.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,442 @@
1
+ var k = Object.defineProperty, A = (i, t, e) => t in i ? k(i, t, { enumerable: !0, configurable: !0, writable: !0, value: e }) : i[t] = e, h = (i, t, e) => A(i, typeof t != "symbol" ? t + "" : t, e);
2
+ const b = (i) => {
3
+ const t = (i.providerOptions && typeof i.providerOptions == "object" ? i.providerOptions : {}).__pieTelemetry;
4
+ return typeof t == "function" ? t : void 0;
5
+ }, R = {
6
+ pie: 3e3,
7
+ custom: 3e3
8
+ }, f = (i) => i.replace(/\/+$/, ""), I = (i) => {
9
+ const t = f(i.apiEndpoint), e = (i.provider || "").toLowerCase();
10
+ return e === "polly" || e === "google" ? `${t}/${e}/voices` : `${t}/voices`;
11
+ }, T = (i) => i.transportMode === "custom" ? "custom" : i.transportMode === "pie" ? "pie" : i.provider === "custom" ? "custom" : "pie", S = (i, t) => i.endpointMode ? i.endpointMode : t === "custom" ? "rootPost" : "synthesizePath", P = (i, t) => i.endpointValidationMode ? i.endpointValidationMode : t === "custom" ? "none" : "voices", M = (i) => {
12
+ const t = i.providerOptions || {};
13
+ if (typeof t.speedRate == "string")
14
+ return t.speedRate;
15
+ const e = Number(i.rate ?? 1);
16
+ return !Number.isFinite(e) || e <= 0.95 ? "slow" : e >= 1.5 ? "fast" : "medium";
17
+ }, E = (i) => {
18
+ const t = [];
19
+ let e = 0;
20
+ const o = i.split(`
21
+ `).map((r) => r.trim()).filter(Boolean);
22
+ for (const r of o)
23
+ try {
24
+ const s = JSON.parse(r), a = typeof s.type == "string" ? s.type : "word", n = typeof s.time == "number" && Number.isFinite(s.time) ? s.time : 0, l = typeof s.value == "string" ? s.value : "", d = typeof s.start == "number" && Number.isFinite(s.start) ? s.start : null, u = typeof s.end == "number" && Number.isFinite(s.end) ? s.end : null, y = d ?? e, m = u ?? y + Math.max(1, l.length || String(s.value || "").length);
25
+ e = Math.max(m + 1, e), t.push({
26
+ time: n,
27
+ type: a,
28
+ start: y,
29
+ end: m,
30
+ value: l
31
+ });
32
+ } catch {
33
+ }
34
+ return t;
35
+ }, C = {
36
+ id: "pie",
37
+ resolveSynthesisUrl: (i) => {
38
+ const t = S(i, "pie"), e = f(i.apiEndpoint);
39
+ return t === "rootPost" ? e : `${e}/synthesize`;
40
+ },
41
+ buildRequestBody: (i, t) => {
42
+ const e = t.providerOptions || {}, o = typeof t.engine == "string" ? t.engine : typeof e.engine == "string" ? e.engine : void 0, r = typeof e.sampleRate == "number" && Number.isFinite(e.sampleRate) ? e.sampleRate : void 0, s = e.format === "mp3" || e.format === "ogg" || e.format === "pcm" ? e.format : void 0, a = Array.isArray(e.speechMarkTypes) ? e.speechMarkTypes.filter((n) => n === "word" || n === "sentence" || n === "ssml") : void 0;
43
+ return {
44
+ text: i,
45
+ provider: t.provider || "polly",
46
+ voice: t.voice,
47
+ language: t.language,
48
+ rate: t.rate,
49
+ engine: o,
50
+ sampleRate: r,
51
+ format: s,
52
+ speechMarkTypes: a,
53
+ includeSpeechMarks: !0
54
+ };
55
+ },
56
+ parseResponse: async (i) => {
57
+ const t = await i.json();
58
+ return {
59
+ audio: {
60
+ kind: "base64",
61
+ data: t.audio,
62
+ contentType: t.contentType
63
+ },
64
+ speechMarks: Array.isArray(t.speechMarks) ? t.speechMarks : []
65
+ };
66
+ }
67
+ }, z = {
68
+ id: "custom",
69
+ resolveSynthesisUrl: (i) => {
70
+ const t = S(i, "custom"), e = f(i.apiEndpoint);
71
+ return t === "synthesizePath" ? `${e}/synthesize` : e;
72
+ },
73
+ buildRequestBody: (i, t) => {
74
+ const e = t.providerOptions || {}, o = typeof e.lang_id == "string" ? e.lang_id : t.language || "en-US", r = typeof e.cache == "boolean" ? e.cache : !0;
75
+ return {
76
+ text: i,
77
+ speedRate: M(t),
78
+ lang_id: o,
79
+ cache: r
80
+ };
81
+ },
82
+ parseResponse: async (i, t, e, o) => {
83
+ const r = await i.json(), s = {};
84
+ if (t.includeAuthOnAssetFetch)
85
+ for (const [n, l] of Object.entries(e))
86
+ n.toLowerCase() === "authorization" && (s[n] = l);
87
+ let a = [];
88
+ if (typeof r.word == "string" && r.word.length > 0) {
89
+ const n = await fetch(r.word, {
90
+ headers: s,
91
+ signal: o
92
+ });
93
+ if (n.ok) {
94
+ const l = await n.text();
95
+ a = E(l);
96
+ }
97
+ }
98
+ return {
99
+ audio: {
100
+ kind: "url",
101
+ url: r.audioContent
102
+ },
103
+ speechMarks: a
104
+ };
105
+ }
106
+ }, B = {
107
+ pie: C,
108
+ custom: z
109
+ };
110
+ class O {
111
+ constructor(t, e) {
112
+ h(this, "config"), h(this, "adapter"), h(this, "currentAudio", null), h(this, "pausedState", !1), h(this, "wordTimings", []), h(this, "highlightInterval", null), h(this, "intentionallyStopped", !1), h(this, "activeSynthesisController", null), h(this, "synthesisRunId", 0), h(this, "telemetryReporter"), h(this, "onWordBoundary"), this.config = t, this.adapter = e, this.telemetryReporter = b(t);
113
+ }
114
+ async emitTelemetry(t, e) {
115
+ try {
116
+ await this.telemetryReporter?.(t, e);
117
+ } catch (o) {
118
+ console.warn("[ServerTTSProvider] telemetry callback failed:", o);
119
+ }
120
+ }
121
+ async speak(t) {
122
+ this.stop(), this.intentionallyStopped = !1;
123
+ const e = ++this.synthesisRunId, o = new AbortController();
124
+ this.activeSynthesisController = o;
125
+ const { audioUrl: r, wordTimings: s } = await this.synthesizeSpeech(t, o.signal, e);
126
+ if (e !== this.synthesisRunId) {
127
+ URL.revokeObjectURL(r);
128
+ return;
129
+ }
130
+ const a = this.config.rate || 1;
131
+ return this.wordTimings = s.map((n) => ({
132
+ ...n,
133
+ time: n.time / a
134
+ })), new Promise((n, l) => {
135
+ const d = new Audio(r);
136
+ this.currentAudio = d, this.config.rate && (d.playbackRate = Math.max(0.25, Math.min(4, this.config.rate))), this.config.volume !== void 0 && (d.volume = Math.max(0, Math.min(1, this.config.volume))), d.onplay = () => {
137
+ this.pausedState = !1, this.onWordBoundary && this.wordTimings.length > 0 && this.startWordHighlighting();
138
+ }, d.onended = () => {
139
+ this.stopWordHighlighting(), URL.revokeObjectURL(r), this.currentAudio = null, this.wordTimings = [], n();
140
+ }, d.onerror = (u) => {
141
+ this.stopWordHighlighting(), URL.revokeObjectURL(r), this.currentAudio = null, this.wordTimings = [], this.intentionallyStopped ? n() : l(new Error("Failed to play audio from server"));
142
+ }, d.onpause = () => {
143
+ this.stopWordHighlighting(), this.pausedState = !0;
144
+ }, d.play().catch(l);
145
+ });
146
+ }
147
+ /**
148
+ * Call server API to synthesize speech
149
+ */
150
+ async synthesizeSpeech(t, e, o) {
151
+ const r = Date.now();
152
+ await this.emitTelemetry("pie-tool-backend-call-start", {
153
+ toolId: "tts",
154
+ backend: this.config.provider || "server",
155
+ operation: "synthesize-speech"
156
+ });
157
+ const s = {
158
+ "Content-Type": "application/json",
159
+ ...this.config.headers
160
+ };
161
+ this.config.authToken && (s.Authorization = `Bearer ${this.config.authToken}`);
162
+ const a = this.adapter.resolveSynthesisUrl(this.config), n = this.adapter.buildRequestBody(t, this.config), l = await (async () => {
163
+ try {
164
+ return await fetch(a, {
165
+ method: "POST",
166
+ headers: s,
167
+ body: JSON.stringify(n),
168
+ signal: e
169
+ });
170
+ } catch (c) {
171
+ throw await this.emitTelemetry("pie-tool-backend-call-error", {
172
+ toolId: "tts",
173
+ backend: this.config.provider || "server",
174
+ operation: "synthesize-speech",
175
+ duration: Date.now() - r,
176
+ errorType: "TTSBackendNetworkError",
177
+ message: c instanceof Error ? c.message : String(c)
178
+ }), c;
179
+ }
180
+ })();
181
+ if (!l.ok) {
182
+ const c = await l.json().catch(() => ({})), p = c.message || c.error?.message || `Server returned ${l.status}`;
183
+ throw await this.emitTelemetry("pie-tool-backend-call-error", {
184
+ toolId: "tts",
185
+ backend: this.config.provider || "server",
186
+ operation: "synthesize-speech",
187
+ duration: Date.now() - r,
188
+ statusCode: l.status,
189
+ errorType: "TTSBackendRequestError",
190
+ message: p
191
+ }), new Error(p);
192
+ }
193
+ const d = await this.adapter.parseResponse(l, this.config, s, e);
194
+ if (o !== this.synthesisRunId || e.aborted)
195
+ throw new Error("Synthesis superseded by a newer request");
196
+ let u;
197
+ if (d.audio.kind === "base64")
198
+ u = this.base64ToBlob(d.audio.data, d.audio.contentType);
199
+ else {
200
+ const c = d.audio.url, p = Date.now();
201
+ await this.emitTelemetry("pie-tool-backend-call-start", {
202
+ toolId: "tts",
203
+ backend: this.config.provider || "server",
204
+ operation: "fetch-synthesized-audio-asset"
205
+ });
206
+ const w = {};
207
+ this.config.includeAuthOnAssetFetch && this.config.authToken && (w.Authorization = `Bearer ${this.config.authToken}`);
208
+ const g = await (async () => {
209
+ try {
210
+ return await fetch(c, {
211
+ headers: w,
212
+ signal: e
213
+ });
214
+ } catch (v) {
215
+ throw await this.emitTelemetry("pie-tool-backend-call-error", {
216
+ toolId: "tts",
217
+ backend: this.config.provider || "server",
218
+ operation: "fetch-synthesized-audio-asset",
219
+ duration: Date.now() - p,
220
+ errorType: "TTSAssetNetworkError",
221
+ message: v instanceof Error ? v.message : String(v)
222
+ }), v;
223
+ }
224
+ })();
225
+ if (!g.ok)
226
+ throw await this.emitTelemetry("pie-tool-backend-call-error", {
227
+ toolId: "tts",
228
+ backend: this.config.provider || "server",
229
+ operation: "fetch-synthesized-audio-asset",
230
+ duration: Date.now() - p,
231
+ statusCode: g.status,
232
+ errorType: "TTSAssetFetchError",
233
+ message: `Failed to download synthesized audio (${g.status})`
234
+ }), new Error(`Failed to download synthesized audio (${g.status})`);
235
+ u = await g.blob(), await this.emitTelemetry("pie-tool-backend-call-success", {
236
+ toolId: "tts",
237
+ backend: this.config.provider || "server",
238
+ operation: "fetch-synthesized-audio-asset",
239
+ duration: Date.now() - p
240
+ });
241
+ }
242
+ const y = URL.createObjectURL(u), m = this.parseSpeechMarks(d.speechMarks);
243
+ return await this.emitTelemetry("pie-tool-backend-call-success", {
244
+ toolId: "tts",
245
+ backend: this.config.provider || "server",
246
+ operation: "synthesize-speech",
247
+ duration: Date.now() - r
248
+ }), { audioUrl: y, wordTimings: m };
249
+ }
250
+ /**
251
+ * Convert base64 to Blob
252
+ */
253
+ base64ToBlob(t, e) {
254
+ const o = atob(t), r = new Array(o.length);
255
+ for (let a = 0; a < o.length; a++)
256
+ r[a] = o.charCodeAt(a);
257
+ const s = new Uint8Array(r);
258
+ return new Blob([s], { type: e });
259
+ }
260
+ /**
261
+ * Parse speech marks into word timings
262
+ */
263
+ parseSpeechMarks(t) {
264
+ return t.filter((e) => e.type === "word").map((e, o) => ({
265
+ time: e.time,
266
+ wordIndex: o,
267
+ charIndex: e.start,
268
+ length: e.end - e.start
269
+ }));
270
+ }
271
+ /**
272
+ * Start word highlighting synchronized with audio playback
273
+ */
274
+ startWordHighlighting() {
275
+ if (this.stopWordHighlighting(), !this.currentAudio || !this.onWordBoundary || this.wordTimings.length === 0) {
276
+ console.log("[ServerTTSProvider] Cannot start highlighting:", {
277
+ hasAudio: !!this.currentAudio,
278
+ hasCallback: !!this.onWordBoundary,
279
+ wordTimingsCount: this.wordTimings.length
280
+ });
281
+ return;
282
+ }
283
+ console.log("[ServerTTSProvider] Starting word highlighting with", this.wordTimings.length, "word timings"), console.log("[ServerTTSProvider] Playback rate:", this.currentAudio.playbackRate), console.log("[ServerTTSProvider] First 3 timings:", this.wordTimings.slice(0, 3));
284
+ let t = -1;
285
+ this.highlightInterval = window.setInterval(() => {
286
+ if (!this.currentAudio) {
287
+ this.stopWordHighlighting();
288
+ return;
289
+ }
290
+ const e = this.currentAudio.currentTime * 1e3;
291
+ for (let o = 0; o < this.wordTimings.length; o++) {
292
+ const r = this.wordTimings[o];
293
+ if (e >= r.time && o > t) {
294
+ this.onWordBoundary && (console.log("[ServerTTSProvider] Highlighting word at charIndex:", r.charIndex, "length:", r.length, "time:", r.time, "currentTime:", e), this.onWordBoundary("", r.charIndex, r.length)), t = o;
295
+ break;
296
+ }
297
+ }
298
+ }, 50);
299
+ }
300
+ /**
301
+ * Stop word highlighting
302
+ */
303
+ stopWordHighlighting() {
304
+ this.highlightInterval !== null && (clearInterval(this.highlightInterval), this.highlightInterval = null);
305
+ }
306
+ pause() {
307
+ this.currentAudio && !this.pausedState && (this.currentAudio.pause(), this.stopWordHighlighting(), this.pausedState = !0);
308
+ }
309
+ resume() {
310
+ this.currentAudio && this.pausedState && (this.currentAudio.play(), this.pausedState = !1, this.onWordBoundary && this.wordTimings.length > 0 && this.startWordHighlighting());
311
+ }
312
+ stop() {
313
+ this.synthesisRunId += 1, this.activeSynthesisController && (this.activeSynthesisController.abort(), this.activeSynthesisController = null), this.stopWordHighlighting(), this.currentAudio && (this.intentionallyStopped = !0, this.currentAudio.pause(), this.currentAudio.src && URL.revokeObjectURL(this.currentAudio.src), this.currentAudio.src = "", this.currentAudio = null), this.pausedState = !1, this.wordTimings = [];
314
+ }
315
+ isPlaying() {
316
+ return this.currentAudio !== null && !this.pausedState;
317
+ }
318
+ isPaused() {
319
+ return this.pausedState;
320
+ }
321
+ /**
322
+ * Update settings dynamically (rate, pitch, voice)
323
+ * Note: Voice changes require resynthesis, so voice updates are stored but
324
+ * take effect on the next speak() call. Rate can be applied to current playback.
325
+ */
326
+ updateSettings(t) {
327
+ t.rate !== void 0 && (this.config.rate = t.rate, this.currentAudio && (this.currentAudio.playbackRate = Math.max(0.25, Math.min(4, t.rate)))), t.pitch !== void 0 && (this.config.pitch = t.pitch), t.voice !== void 0 && (this.config.voice = t.voice);
328
+ }
329
+ }
330
+ class W {
331
+ constructor() {
332
+ h(this, "providerId", "server-tts"), h(this, "providerName", "Server TTS"), h(this, "version", "1.0.0"), h(this, "config", null), h(this, "adapter", null), h(this, "telemetryReporter");
333
+ }
334
+ async emitTelemetry(t, e) {
335
+ try {
336
+ await this.telemetryReporter?.(t, e);
337
+ } catch (o) {
338
+ console.warn("[ServerTTSProvider] telemetry callback failed:", o);
339
+ }
340
+ }
341
+ /**
342
+ * Initialize the server TTS provider.
343
+ *
344
+ * This is designed to be fast by default (no API calls).
345
+ * Set validateEndpoint: true in config to test API availability during initialization.
346
+ *
347
+ * @performance Default: <10ms, With validation: 100-500ms
348
+ */
349
+ async initialize(t) {
350
+ const e = t;
351
+ if (!e.apiEndpoint)
352
+ throw new Error("apiEndpoint is required for ServerTTSProvider");
353
+ this.config = e, this.telemetryReporter = b(e);
354
+ const o = T(e);
355
+ if (this.adapter = B[o], e.validateEndpoint) {
356
+ const r = Date.now();
357
+ if (await this.emitTelemetry("pie-tool-backend-call-start", {
358
+ toolId: "tts",
359
+ backend: e.provider || "server",
360
+ operation: "validate-endpoint"
361
+ }), !await this.testAPIAvailability())
362
+ throw await this.emitTelemetry("pie-tool-backend-call-error", {
363
+ toolId: "tts",
364
+ backend: e.provider || "server",
365
+ operation: "validate-endpoint",
366
+ duration: Date.now() - r,
367
+ errorType: "TTSEndpointValidationError",
368
+ message: `Server TTS API not available at ${e.apiEndpoint}`
369
+ }), new Error(`Server TTS API not available at ${e.apiEndpoint}`);
370
+ await this.emitTelemetry("pie-tool-backend-call-success", {
371
+ toolId: "tts",
372
+ backend: e.provider || "server",
373
+ operation: "validate-endpoint",
374
+ duration: Date.now() - r
375
+ });
376
+ }
377
+ return new O(e, this.adapter);
378
+ }
379
+ /**
380
+ * Test if API endpoint is available (with timeout).
381
+ *
382
+ * @performance 100-500ms depending on network
383
+ */
384
+ async testAPIAvailability() {
385
+ if (!this.config || !this.adapter)
386
+ return !1;
387
+ try {
388
+ const t = { ...this.config.headers };
389
+ this.config.authToken && (t.Authorization = `Bearer ${this.config.authToken}`);
390
+ const e = new AbortController(), o = setTimeout(() => e.abort(), 5e3), r = P(this.config, this.adapter.id);
391
+ if (r === "none")
392
+ return clearTimeout(o), !0;
393
+ const s = r === "voices" ? I(this.config) : this.adapter.resolveSynthesisUrl(this.config), a = r === "voices" ? "GET" : "OPTIONS";
394
+ try {
395
+ const n = await fetch(s, {
396
+ method: a,
397
+ headers: t,
398
+ signal: e.signal
399
+ });
400
+ return clearTimeout(o), n.ok || n.status === 405;
401
+ } catch {
402
+ return clearTimeout(o), !1;
403
+ }
404
+ } catch {
405
+ return !1;
406
+ }
407
+ }
408
+ supportsFeature(t) {
409
+ switch (t) {
410
+ case "pause":
411
+ case "resume":
412
+ case "wordBoundary":
413
+ case "voiceSelection":
414
+ case "rateControl":
415
+ return !0;
416
+ case "pitchControl":
417
+ return !1;
418
+ default:
419
+ return !1;
420
+ }
421
+ }
422
+ getCapabilities() {
423
+ const t = this.config ? T(this.config) : "pie";
424
+ return {
425
+ supportsPause: !0,
426
+ supportsResume: !0,
427
+ supportsWordBoundary: !0,
428
+ // ✅ Via speech marks from server
429
+ supportsVoiceSelection: !0,
430
+ supportsRateControl: !0,
431
+ supportsPitchControl: !1,
432
+ // Depends on server provider
433
+ maxTextLength: R[t]
434
+ };
435
+ }
436
+ destroy() {
437
+ this.config = null, this.adapter = null, this.telemetryReporter = void 0;
438
+ }
439
+ }
440
+ export {
441
+ W as ServerTTSProvider
442
+ };