@pie-players/pie-tool-annotation-toolbar 0.3.7 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,354 @@
1
+ var v = Object.defineProperty;
2
+ var T = (i, t, e) => t in i ? v(i, t, { enumerable: !0, configurable: !0, writable: !0, value: e }) : i[t] = e;
3
+ var d = (i, t, e) => T(i, typeof t != "symbol" ? t + "" : t, e);
4
+ const w = {
5
+ pie: 3e3,
6
+ custom: 3e3
7
+ }, f = (i) => i.replace(/\/+$/, ""), m = (i) => i.transportMode === "custom" ? "custom" : i.transportMode === "pie" ? "pie" : i.provider === "custom" ? "custom" : "pie", y = (i, t) => i.endpointMode ? i.endpointMode : t === "custom" ? "rootPost" : "synthesizePath", S = (i, t) => i.endpointValidationMode ? i.endpointValidationMode : t === "custom" ? "none" : "voices", b = (i) => {
8
+ const t = i.providerOptions || {};
9
+ if (typeof t.speedRate == "string")
10
+ return t.speedRate;
11
+ const e = Number(i.rate ?? 1);
12
+ return !Number.isFinite(e) || e <= 0.95 ? "slow" : e >= 1.5 ? "fast" : "medium";
13
+ }, A = (i) => {
14
+ const t = [];
15
+ let e = 0;
16
+ const s = i.split(`
17
+ `).map((r) => r.trim()).filter(Boolean);
18
+ for (const r of s)
19
+ try {
20
+ const n = JSON.parse(r), h = typeof n.type == "string" ? n.type : "word", o = typeof n.time == "number" && Number.isFinite(n.time) ? n.time : 0, a = typeof n.value == "string" ? n.value : "", u = typeof n.start == "number" && Number.isFinite(n.start) ? n.start : null, p = typeof n.end == "number" && Number.isFinite(n.end) ? n.end : null, g = u ?? e, c = p ?? g + Math.max(1, a.length || String(n.value || "").length);
21
+ e = Math.max(c + 1, e), t.push({
22
+ time: o,
23
+ type: h,
24
+ start: g,
25
+ end: c,
26
+ value: a
27
+ });
28
+ } catch {
29
+ }
30
+ return t;
31
+ }, k = {
32
+ id: "pie",
33
+ resolveSynthesisUrl: (i) => {
34
+ const t = y(i, "pie"), e = f(i.apiEndpoint);
35
+ return t === "rootPost" ? e : `${e}/synthesize`;
36
+ },
37
+ buildRequestBody: (i, t) => {
38
+ const e = t.providerOptions || {}, s = typeof t.engine == "string" ? t.engine : typeof e.engine == "string" ? e.engine : void 0, r = typeof e.sampleRate == "number" && Number.isFinite(e.sampleRate) ? e.sampleRate : void 0, n = e.format === "mp3" || e.format === "ogg" || e.format === "pcm" ? e.format : void 0, h = Array.isArray(e.speechMarkTypes) ? e.speechMarkTypes.filter((o) => o === "word" || o === "sentence" || o === "ssml") : void 0;
39
+ return {
40
+ text: i,
41
+ provider: t.provider || "polly",
42
+ voice: t.voice,
43
+ language: t.language,
44
+ rate: t.rate,
45
+ engine: s,
46
+ sampleRate: r,
47
+ format: n,
48
+ speechMarkTypes: h,
49
+ includeSpeechMarks: !0
50
+ };
51
+ },
52
+ parseResponse: async (i) => {
53
+ const t = await i.json();
54
+ return {
55
+ audio: {
56
+ kind: "base64",
57
+ data: t.audio,
58
+ contentType: t.contentType
59
+ },
60
+ speechMarks: Array.isArray(t.speechMarks) ? t.speechMarks : []
61
+ };
62
+ }
63
+ }, R = {
64
+ id: "custom",
65
+ resolveSynthesisUrl: (i) => {
66
+ const t = y(i, "custom"), e = f(i.apiEndpoint);
67
+ return t === "synthesizePath" ? `${e}/synthesize` : e;
68
+ },
69
+ buildRequestBody: (i, t) => {
70
+ const e = t.providerOptions || {}, s = typeof e.lang_id == "string" ? e.lang_id : t.language || "en-US", r = typeof e.cache == "boolean" ? e.cache : !0;
71
+ return {
72
+ text: i,
73
+ speedRate: b(t),
74
+ lang_id: s,
75
+ cache: r
76
+ };
77
+ },
78
+ parseResponse: async (i, t, e, s) => {
79
+ const r = await i.json(), n = {};
80
+ if (t.includeAuthOnAssetFetch)
81
+ for (const [o, a] of Object.entries(e))
82
+ o.toLowerCase() === "authorization" && (n[o] = a);
83
+ let h = [];
84
+ if (typeof r.word == "string" && r.word.length > 0) {
85
+ const o = await fetch(r.word, {
86
+ headers: n,
87
+ signal: s
88
+ });
89
+ if (o.ok) {
90
+ const a = await o.text();
91
+ h = A(a);
92
+ }
93
+ }
94
+ return {
95
+ audio: {
96
+ kind: "url",
97
+ url: r.audioContent
98
+ },
99
+ speechMarks: h
100
+ };
101
+ }
102
+ }, M = {
103
+ pie: k,
104
+ custom: R
105
+ };
106
+ class I {
107
+ constructor(t, e) {
108
+ d(this, "config");
109
+ d(this, "adapter");
110
+ d(this, "currentAudio", null);
111
+ d(this, "pausedState", !1);
112
+ d(this, "wordTimings", []);
113
+ d(this, "highlightInterval", null);
114
+ d(this, "intentionallyStopped", !1);
115
+ d(this, "activeSynthesisController", null);
116
+ d(this, "synthesisRunId", 0);
117
+ d(this, "onWordBoundary");
118
+ this.config = t, this.adapter = e;
119
+ }
120
+ async speak(t) {
121
+ this.stop(), this.intentionallyStopped = !1;
122
+ const e = ++this.synthesisRunId, s = new AbortController();
123
+ this.activeSynthesisController = s;
124
+ const { audioUrl: r, wordTimings: n } = await this.synthesizeSpeech(t, s.signal, e);
125
+ if (e !== this.synthesisRunId) {
126
+ URL.revokeObjectURL(r);
127
+ return;
128
+ }
129
+ const h = this.config.rate || 1;
130
+ return this.wordTimings = n.map((o) => ({
131
+ ...o,
132
+ time: o.time / h
133
+ })), new Promise((o, a) => {
134
+ const u = new Audio(r);
135
+ this.currentAudio = u, this.config.rate && (u.playbackRate = Math.max(0.25, Math.min(4, this.config.rate))), this.config.volume !== void 0 && (u.volume = Math.max(0, Math.min(1, this.config.volume))), u.onplay = () => {
136
+ this.pausedState = !1, this.onWordBoundary && this.wordTimings.length > 0 && this.startWordHighlighting();
137
+ }, u.onended = () => {
138
+ this.stopWordHighlighting(), URL.revokeObjectURL(r), this.currentAudio = null, this.wordTimings = [], o();
139
+ }, u.onerror = (p) => {
140
+ this.stopWordHighlighting(), URL.revokeObjectURL(r), this.currentAudio = null, this.wordTimings = [], this.intentionallyStopped ? o() : a(new Error("Failed to play audio from server"));
141
+ }, u.onpause = () => {
142
+ this.stopWordHighlighting(), this.pausedState = !0;
143
+ }, u.play().catch(a);
144
+ });
145
+ }
146
+ /**
147
+ * Call server API to synthesize speech
148
+ */
149
+ async synthesizeSpeech(t, e, s) {
150
+ const r = {
151
+ "Content-Type": "application/json",
152
+ ...this.config.headers
153
+ };
154
+ this.config.authToken && (r.Authorization = `Bearer ${this.config.authToken}`);
155
+ const n = this.adapter.resolveSynthesisUrl(this.config), h = this.adapter.buildRequestBody(t, this.config), o = await fetch(n, {
156
+ method: "POST",
157
+ headers: r,
158
+ body: JSON.stringify(h),
159
+ signal: e
160
+ });
161
+ if (!o.ok) {
162
+ const c = await o.json().catch(() => ({})), l = c.message || c.error?.message || `Server returned ${o.status}`;
163
+ throw new Error(l);
164
+ }
165
+ const a = await this.adapter.parseResponse(o, this.config, r, e);
166
+ if (s !== this.synthesisRunId || e.aborted)
167
+ throw new Error("Synthesis superseded by a newer request");
168
+ let u;
169
+ if (a.audio.kind === "base64")
170
+ u = this.base64ToBlob(a.audio.data, a.audio.contentType);
171
+ else {
172
+ const c = {};
173
+ this.config.includeAuthOnAssetFetch && this.config.authToken && (c.Authorization = `Bearer ${this.config.authToken}`);
174
+ const l = await fetch(a.audio.url, {
175
+ headers: c,
176
+ signal: e
177
+ });
178
+ if (!l.ok)
179
+ throw new Error(`Failed to download synthesized audio (${l.status})`);
180
+ u = await l.blob();
181
+ }
182
+ const p = URL.createObjectURL(u), g = this.parseSpeechMarks(a.speechMarks);
183
+ return { audioUrl: p, wordTimings: g };
184
+ }
185
+ /**
186
+ * Convert base64 to Blob
187
+ */
188
+ base64ToBlob(t, e) {
189
+ const s = atob(t), r = new Array(s.length);
190
+ for (let h = 0; h < s.length; h++)
191
+ r[h] = s.charCodeAt(h);
192
+ const n = new Uint8Array(r);
193
+ return new Blob([n], { type: e });
194
+ }
195
+ /**
196
+ * Parse speech marks into word timings
197
+ */
198
+ parseSpeechMarks(t) {
199
+ return t.filter((e) => e.type === "word").map((e, s) => ({
200
+ time: e.time,
201
+ wordIndex: s,
202
+ charIndex: e.start,
203
+ length: e.end - e.start
204
+ }));
205
+ }
206
+ /**
207
+ * Start word highlighting synchronized with audio playback
208
+ */
209
+ startWordHighlighting() {
210
+ if (this.stopWordHighlighting(), !this.currentAudio || !this.onWordBoundary || this.wordTimings.length === 0) {
211
+ console.log("[ServerTTSProvider] Cannot start highlighting:", {
212
+ hasAudio: !!this.currentAudio,
213
+ hasCallback: !!this.onWordBoundary,
214
+ wordTimingsCount: this.wordTimings.length
215
+ });
216
+ return;
217
+ }
218
+ console.log("[ServerTTSProvider] Starting word highlighting with", this.wordTimings.length, "word timings"), console.log("[ServerTTSProvider] Playback rate:", this.currentAudio.playbackRate), console.log("[ServerTTSProvider] First 3 timings:", this.wordTimings.slice(0, 3));
219
+ let t = -1;
220
+ this.highlightInterval = window.setInterval(() => {
221
+ if (!this.currentAudio) {
222
+ this.stopWordHighlighting();
223
+ return;
224
+ }
225
+ const e = this.currentAudio.currentTime * 1e3;
226
+ for (let s = 0; s < this.wordTimings.length; s++) {
227
+ const r = this.wordTimings[s];
228
+ if (e >= r.time && s > t) {
229
+ this.onWordBoundary && (console.log("[ServerTTSProvider] Highlighting word at charIndex:", r.charIndex, "length:", r.length, "time:", r.time, "currentTime:", e), this.onWordBoundary("", r.charIndex, r.length)), t = s;
230
+ break;
231
+ }
232
+ }
233
+ }, 50);
234
+ }
235
+ /**
236
+ * Stop word highlighting
237
+ */
238
+ stopWordHighlighting() {
239
+ this.highlightInterval !== null && (clearInterval(this.highlightInterval), this.highlightInterval = null);
240
+ }
241
+ pause() {
242
+ this.currentAudio && !this.pausedState && (this.currentAudio.pause(), this.stopWordHighlighting(), this.pausedState = !0);
243
+ }
244
+ resume() {
245
+ this.currentAudio && this.pausedState && (this.currentAudio.play(), this.pausedState = !1, this.onWordBoundary && this.wordTimings.length > 0 && this.startWordHighlighting());
246
+ }
247
+ stop() {
248
+ this.synthesisRunId += 1, this.activeSynthesisController && (this.activeSynthesisController.abort(), this.activeSynthesisController = null), this.stopWordHighlighting(), this.currentAudio && (this.intentionallyStopped = !0, this.currentAudio.pause(), this.currentAudio.src && URL.revokeObjectURL(this.currentAudio.src), this.currentAudio.src = "", this.currentAudio = null), this.pausedState = !1, this.wordTimings = [];
249
+ }
250
+ isPlaying() {
251
+ return this.currentAudio !== null && !this.pausedState;
252
+ }
253
+ isPaused() {
254
+ return this.pausedState;
255
+ }
256
+ /**
257
+ * Update settings dynamically (rate, pitch, voice)
258
+ * Note: Voice changes require resynthesis, so voice updates are stored but
259
+ * take effect on the next speak() call. Rate can be applied to current playback.
260
+ */
261
+ updateSettings(t) {
262
+ t.rate !== void 0 && (this.config.rate = t.rate, this.currentAudio && (this.currentAudio.playbackRate = Math.max(0.25, Math.min(4, t.rate)))), t.pitch !== void 0 && (this.config.pitch = t.pitch), t.voice !== void 0 && (this.config.voice = t.voice);
263
+ }
264
+ }
265
+ class B {
266
+ constructor() {
267
+ d(this, "providerId", "server-tts");
268
+ d(this, "providerName", "Server TTS");
269
+ d(this, "version", "1.0.0");
270
+ d(this, "config", null);
271
+ d(this, "adapter", null);
272
+ }
273
+ /**
274
+ * Initialize the server TTS provider.
275
+ *
276
+ * This is designed to be fast by default (no API calls).
277
+ * Set validateEndpoint: true in config to test API availability during initialization.
278
+ *
279
+ * @performance Default: <10ms, With validation: 100-500ms
280
+ */
281
+ async initialize(t) {
282
+ const e = t;
283
+ if (!e.apiEndpoint)
284
+ throw new Error("apiEndpoint is required for ServerTTSProvider");
285
+ this.config = e;
286
+ const s = m(e);
287
+ if (this.adapter = M[s], e.validateEndpoint && !await this.testAPIAvailability())
288
+ throw new Error(`Server TTS API not available at ${e.apiEndpoint}`);
289
+ return new I(e, this.adapter);
290
+ }
291
+ /**
292
+ * Test if API endpoint is available (with timeout).
293
+ *
294
+ * @performance 100-500ms depending on network
295
+ */
296
+ async testAPIAvailability() {
297
+ if (!this.config || !this.adapter)
298
+ return !1;
299
+ try {
300
+ const t = { ...this.config.headers };
301
+ this.config.authToken && (t.Authorization = `Bearer ${this.config.authToken}`);
302
+ const e = new AbortController(), s = setTimeout(() => e.abort(), 5e3), r = S(this.config, this.adapter.id);
303
+ if (r === "none")
304
+ return clearTimeout(s), !0;
305
+ const n = f(this.config.apiEndpoint), h = r === "voices" ? `${n}/voices` : this.adapter.resolveSynthesisUrl(this.config), o = r === "voices" ? "GET" : "OPTIONS";
306
+ try {
307
+ const a = await fetch(h, {
308
+ method: o,
309
+ headers: t,
310
+ signal: e.signal
311
+ });
312
+ return clearTimeout(s), a.ok || a.status === 405;
313
+ } catch {
314
+ return clearTimeout(s), !1;
315
+ }
316
+ } catch {
317
+ return !1;
318
+ }
319
+ }
320
+ supportsFeature(t) {
321
+ switch (t) {
322
+ case "pause":
323
+ case "resume":
324
+ case "wordBoundary":
325
+ case "voiceSelection":
326
+ case "rateControl":
327
+ return !0;
328
+ case "pitchControl":
329
+ return !1;
330
+ default:
331
+ return !1;
332
+ }
333
+ }
334
+ getCapabilities() {
335
+ const t = this.config ? m(this.config) : "pie";
336
+ return {
337
+ supportsPause: !0,
338
+ supportsResume: !0,
339
+ supportsWordBoundary: !0,
340
+ // ✅ Via speech marks from server
341
+ supportsVoiceSelection: !0,
342
+ supportsRateControl: !0,
343
+ supportsPitchControl: !1,
344
+ // Depends on server provider
345
+ maxTextLength: w[t]
346
+ };
347
+ }
348
+ destroy() {
349
+ this.config = null, this.adapter = null;
350
+ }
351
+ }
352
+ export {
353
+ B as ServerTTSProvider
354
+ };