@pie-players/pie-tool-ruler 0.3.17 → 0.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,460 @@
1
+ var k = Object.defineProperty;
2
+ var A = (r, t, e) => t in r ? k(r, t, { enumerable: !0, configurable: !0, writable: !0, value: e }) : r[t] = e;
3
+ var a = (r, t, e) => A(r, typeof t != "symbol" ? t + "" : t, e);
4
+ const b = (r) => {
5
+ const e = (r.providerOptions && typeof r.providerOptions == "object" ? r.providerOptions : {}).__pieTelemetry;
6
+ return typeof e == "function" ? e : void 0;
7
+ }, R = {
8
+ pie: 3e3,
9
+ custom: 3e3
10
+ }, v = (r) => r.replace(/\/+$/, ""), I = (r) => {
11
+ const t = v(r.apiEndpoint), e = (r.provider || "").toLowerCase();
12
+ return e === "polly" || e === "google" ? `${t}/${e}/voices` : `${t}/voices`;
13
+ }, w = (r) => r.transportMode === "custom" ? "custom" : r.transportMode === "pie" ? "pie" : r.provider === "custom" ? "custom" : "pie", S = (r, t) => r.endpointMode ? r.endpointMode : t === "custom" ? "rootPost" : "synthesizePath", M = (r, t) => r.endpointValidationMode ? r.endpointValidationMode : t === "custom" ? "none" : "voices", E = (r) => {
14
+ const t = r.providerOptions || {};
15
+ if (typeof t.speedRate == "string")
16
+ return t.speedRate;
17
+ const e = Number(r.rate ?? 1);
18
+ return !Number.isFinite(e) || e <= 0.95 ? "slow" : e >= 1.5 ? "fast" : "medium";
19
+ }, P = (r) => {
20
+ const t = [];
21
+ let e = 0;
22
+ const o = r.split(`
23
+ `).map((i) => i.trim()).filter(Boolean);
24
+ for (const i of o)
25
+ try {
26
+ const s = JSON.parse(i), d = typeof s.type == "string" ? s.type : "word", n = typeof s.time == "number" && Number.isFinite(s.time) ? s.time : 0, c = typeof s.value == "string" ? s.value : "", l = typeof s.start == "number" && Number.isFinite(s.start) ? s.start : null, u = typeof s.end == "number" && Number.isFinite(s.end) ? s.end : null, m = l ?? e, y = u ?? m + Math.max(1, c.length || String(s.value || "").length);
27
+ e = Math.max(y + 1, e), t.push({
28
+ time: n,
29
+ type: d,
30
+ start: m,
31
+ end: y,
32
+ value: c
33
+ });
34
+ } catch {
35
+ }
36
+ return t;
37
+ }, O = {
38
+ id: "pie",
39
+ resolveSynthesisUrl: (r) => {
40
+ const t = S(r, "pie"), e = v(r.apiEndpoint);
41
+ return t === "rootPost" ? e : `${e}/synthesize`;
42
+ },
43
+ buildRequestBody: (r, t) => {
44
+ const e = t.providerOptions || {}, o = typeof t.engine == "string" ? t.engine : typeof e.engine == "string" ? e.engine : void 0, i = typeof e.sampleRate == "number" && Number.isFinite(e.sampleRate) ? e.sampleRate : void 0, s = e.format === "mp3" || e.format === "ogg" || e.format === "pcm" ? e.format : void 0, d = Array.isArray(e.speechMarkTypes) ? e.speechMarkTypes.filter((n) => n === "word" || n === "sentence" || n === "ssml") : void 0;
45
+ return {
46
+ text: r,
47
+ provider: t.provider || "polly",
48
+ voice: t.voice,
49
+ language: t.language,
50
+ rate: t.rate,
51
+ engine: o,
52
+ sampleRate: i,
53
+ format: s,
54
+ speechMarkTypes: d,
55
+ includeSpeechMarks: !0
56
+ };
57
+ },
58
+ parseResponse: async (r) => {
59
+ const t = await r.json();
60
+ return {
61
+ audio: {
62
+ kind: "base64",
63
+ data: t.audio,
64
+ contentType: t.contentType
65
+ },
66
+ speechMarks: Array.isArray(t.speechMarks) ? t.speechMarks : []
67
+ };
68
+ }
69
+ }, C = {
70
+ id: "custom",
71
+ resolveSynthesisUrl: (r) => {
72
+ const t = S(r, "custom"), e = v(r.apiEndpoint);
73
+ return t === "synthesizePath" ? `${e}/synthesize` : e;
74
+ },
75
+ buildRequestBody: (r, t) => {
76
+ const e = t.providerOptions || {}, o = typeof e.lang_id == "string" ? e.lang_id : t.language || "en-US", i = typeof e.cache == "boolean" ? e.cache : !0;
77
+ return {
78
+ text: r,
79
+ speedRate: E(t),
80
+ lang_id: o,
81
+ cache: i
82
+ };
83
+ },
84
+ parseResponse: async (r, t, e, o) => {
85
+ const i = await r.json(), s = {};
86
+ if (t.includeAuthOnAssetFetch)
87
+ for (const [n, c] of Object.entries(e))
88
+ n.toLowerCase() === "authorization" && (s[n] = c);
89
+ let d = [];
90
+ if (typeof i.word == "string" && i.word.length > 0) {
91
+ const n = await fetch(i.word, {
92
+ headers: s,
93
+ signal: o
94
+ });
95
+ if (n.ok) {
96
+ const c = await n.text();
97
+ d = P(c);
98
+ }
99
+ }
100
+ return {
101
+ audio: {
102
+ kind: "url",
103
+ url: i.audioContent
104
+ },
105
+ speechMarks: d
106
+ };
107
+ }
108
+ }, B = {
109
+ pie: O,
110
+ custom: C
111
+ };
112
+ class z {
113
+ constructor(t, e) {
114
+ a(this, "config");
115
+ a(this, "adapter");
116
+ a(this, "currentAudio", null);
117
+ a(this, "pausedState", !1);
118
+ a(this, "wordTimings", []);
119
+ a(this, "highlightInterval", null);
120
+ a(this, "intentionallyStopped", !1);
121
+ a(this, "activeSynthesisController", null);
122
+ a(this, "synthesisRunId", 0);
123
+ a(this, "telemetryReporter");
124
+ a(this, "onWordBoundary");
125
+ this.config = t, this.adapter = e, this.telemetryReporter = b(t);
126
+ }
127
+ async emitTelemetry(t, e) {
128
+ try {
129
+ await this.telemetryReporter?.(t, e);
130
+ } catch (o) {
131
+ console.warn("[ServerTTSProvider] telemetry callback failed:", o);
132
+ }
133
+ }
134
+ async speak(t) {
135
+ this.stop(), this.intentionallyStopped = !1;
136
+ const e = ++this.synthesisRunId, o = new AbortController();
137
+ this.activeSynthesisController = o;
138
+ const { audioUrl: i, wordTimings: s } = await this.synthesizeSpeech(t, o.signal, e);
139
+ if (e !== this.synthesisRunId) {
140
+ URL.revokeObjectURL(i);
141
+ return;
142
+ }
143
+ const d = this.config.rate || 1;
144
+ return this.wordTimings = s.map((n) => ({
145
+ ...n,
146
+ time: n.time / d
147
+ })), new Promise((n, c) => {
148
+ const l = new Audio(i);
149
+ this.currentAudio = l, this.config.rate && (l.playbackRate = Math.max(0.25, Math.min(4, this.config.rate))), this.config.volume !== void 0 && (l.volume = Math.max(0, Math.min(1, this.config.volume))), l.onplay = () => {
150
+ this.pausedState = !1, this.onWordBoundary && this.wordTimings.length > 0 && this.startWordHighlighting();
151
+ }, l.onended = () => {
152
+ this.stopWordHighlighting(), URL.revokeObjectURL(i), this.currentAudio = null, this.wordTimings = [], n();
153
+ }, l.onerror = (u) => {
154
+ this.stopWordHighlighting(), URL.revokeObjectURL(i), this.currentAudio = null, this.wordTimings = [], this.intentionallyStopped ? n() : c(new Error("Failed to play audio from server"));
155
+ }, l.onpause = () => {
156
+ this.stopWordHighlighting(), this.pausedState = !0;
157
+ }, l.play().catch(c);
158
+ });
159
+ }
160
+ /**
161
+ * Call server API to synthesize speech
162
+ */
163
+ async synthesizeSpeech(t, e, o) {
164
+ const i = Date.now();
165
+ await this.emitTelemetry("pie-tool-backend-call-start", {
166
+ toolId: "tts",
167
+ backend: this.config.provider || "server",
168
+ operation: "synthesize-speech"
169
+ });
170
+ const s = {
171
+ "Content-Type": "application/json",
172
+ ...this.config.headers
173
+ };
174
+ this.config.authToken && (s.Authorization = `Bearer ${this.config.authToken}`);
175
+ const d = this.adapter.resolveSynthesisUrl(this.config), n = this.adapter.buildRequestBody(t, this.config), c = await (async () => {
176
+ try {
177
+ return await fetch(d, {
178
+ method: "POST",
179
+ headers: s,
180
+ body: JSON.stringify(n),
181
+ signal: e
182
+ });
183
+ } catch (h) {
184
+ throw await this.emitTelemetry("pie-tool-backend-call-error", {
185
+ toolId: "tts",
186
+ backend: this.config.provider || "server",
187
+ operation: "synthesize-speech",
188
+ duration: Date.now() - i,
189
+ errorType: "TTSBackendNetworkError",
190
+ message: h instanceof Error ? h.message : String(h)
191
+ }), h;
192
+ }
193
+ })();
194
+ if (!c.ok) {
195
+ const h = await c.json().catch(() => ({})), p = h.message || h.error?.message || `Server returned ${c.status}`;
196
+ throw await this.emitTelemetry("pie-tool-backend-call-error", {
197
+ toolId: "tts",
198
+ backend: this.config.provider || "server",
199
+ operation: "synthesize-speech",
200
+ duration: Date.now() - i,
201
+ statusCode: c.status,
202
+ errorType: "TTSBackendRequestError",
203
+ message: p
204
+ }), new Error(p);
205
+ }
206
+ const l = await this.adapter.parseResponse(c, this.config, s, e);
207
+ if (o !== this.synthesisRunId || e.aborted)
208
+ throw new Error("Synthesis superseded by a newer request");
209
+ let u;
210
+ if (l.audio.kind === "base64")
211
+ u = this.base64ToBlob(l.audio.data, l.audio.contentType);
212
+ else {
213
+ const h = l.audio.url, p = Date.now();
214
+ await this.emitTelemetry("pie-tool-backend-call-start", {
215
+ toolId: "tts",
216
+ backend: this.config.provider || "server",
217
+ operation: "fetch-synthesized-audio-asset"
218
+ });
219
+ const T = {};
220
+ this.config.includeAuthOnAssetFetch && this.config.authToken && (T.Authorization = `Bearer ${this.config.authToken}`);
221
+ const g = await (async () => {
222
+ try {
223
+ return await fetch(h, {
224
+ headers: T,
225
+ signal: e
226
+ });
227
+ } catch (f) {
228
+ throw await this.emitTelemetry("pie-tool-backend-call-error", {
229
+ toolId: "tts",
230
+ backend: this.config.provider || "server",
231
+ operation: "fetch-synthesized-audio-asset",
232
+ duration: Date.now() - p,
233
+ errorType: "TTSAssetNetworkError",
234
+ message: f instanceof Error ? f.message : String(f)
235
+ }), f;
236
+ }
237
+ })();
238
+ if (!g.ok)
239
+ throw await this.emitTelemetry("pie-tool-backend-call-error", {
240
+ toolId: "tts",
241
+ backend: this.config.provider || "server",
242
+ operation: "fetch-synthesized-audio-asset",
243
+ duration: Date.now() - p,
244
+ statusCode: g.status,
245
+ errorType: "TTSAssetFetchError",
246
+ message: `Failed to download synthesized audio (${g.status})`
247
+ }), new Error(`Failed to download synthesized audio (${g.status})`);
248
+ u = await g.blob(), await this.emitTelemetry("pie-tool-backend-call-success", {
249
+ toolId: "tts",
250
+ backend: this.config.provider || "server",
251
+ operation: "fetch-synthesized-audio-asset",
252
+ duration: Date.now() - p
253
+ });
254
+ }
255
+ const m = URL.createObjectURL(u), y = this.parseSpeechMarks(l.speechMarks);
256
+ return await this.emitTelemetry("pie-tool-backend-call-success", {
257
+ toolId: "tts",
258
+ backend: this.config.provider || "server",
259
+ operation: "synthesize-speech",
260
+ duration: Date.now() - i
261
+ }), { audioUrl: m, wordTimings: y };
262
+ }
263
+ /**
264
+ * Convert base64 to Blob
265
+ */
266
+ base64ToBlob(t, e) {
267
+ const o = atob(t), i = new Array(o.length);
268
+ for (let d = 0; d < o.length; d++)
269
+ i[d] = o.charCodeAt(d);
270
+ const s = new Uint8Array(i);
271
+ return new Blob([s], { type: e });
272
+ }
273
+ /**
274
+ * Parse speech marks into word timings
275
+ */
276
+ parseSpeechMarks(t) {
277
+ return t.filter((e) => e.type === "word").map((e, o) => ({
278
+ time: e.time,
279
+ wordIndex: o,
280
+ charIndex: e.start,
281
+ length: e.end - e.start
282
+ }));
283
+ }
284
+ /**
285
+ * Start word highlighting synchronized with audio playback
286
+ */
287
+ startWordHighlighting() {
288
+ if (this.stopWordHighlighting(), !this.currentAudio || !this.onWordBoundary || this.wordTimings.length === 0) {
289
+ console.log("[ServerTTSProvider] Cannot start highlighting:", {
290
+ hasAudio: !!this.currentAudio,
291
+ hasCallback: !!this.onWordBoundary,
292
+ wordTimingsCount: this.wordTimings.length
293
+ });
294
+ return;
295
+ }
296
+ console.log("[ServerTTSProvider] Starting word highlighting with", this.wordTimings.length, "word timings"), console.log("[ServerTTSProvider] Playback rate:", this.currentAudio.playbackRate), console.log("[ServerTTSProvider] First 3 timings:", this.wordTimings.slice(0, 3));
297
+ let t = -1;
298
+ this.highlightInterval = window.setInterval(() => {
299
+ if (!this.currentAudio) {
300
+ this.stopWordHighlighting();
301
+ return;
302
+ }
303
+ const e = this.currentAudio.currentTime * 1e3;
304
+ for (let o = 0; o < this.wordTimings.length; o++) {
305
+ const i = this.wordTimings[o];
306
+ if (e >= i.time && o > t) {
307
+ this.onWordBoundary && (console.log("[ServerTTSProvider] Highlighting word at charIndex:", i.charIndex, "length:", i.length, "time:", i.time, "currentTime:", e), this.onWordBoundary("", i.charIndex, i.length)), t = o;
308
+ break;
309
+ }
310
+ }
311
+ }, 50);
312
+ }
313
+ /**
314
+ * Stop word highlighting
315
+ */
316
+ stopWordHighlighting() {
317
+ this.highlightInterval !== null && (clearInterval(this.highlightInterval), this.highlightInterval = null);
318
+ }
319
+ pause() {
320
+ this.currentAudio && !this.pausedState && (this.currentAudio.pause(), this.stopWordHighlighting(), this.pausedState = !0);
321
+ }
322
+ resume() {
323
+ this.currentAudio && this.pausedState && (this.currentAudio.play(), this.pausedState = !1, this.onWordBoundary && this.wordTimings.length > 0 && this.startWordHighlighting());
324
+ }
325
+ stop() {
326
+ this.synthesisRunId += 1, this.activeSynthesisController && (this.activeSynthesisController.abort(), this.activeSynthesisController = null), this.stopWordHighlighting(), this.currentAudio && (this.intentionallyStopped = !0, this.currentAudio.pause(), this.currentAudio.src && URL.revokeObjectURL(this.currentAudio.src), this.currentAudio.src = "", this.currentAudio = null), this.pausedState = !1, this.wordTimings = [];
327
+ }
328
+ isPlaying() {
329
+ return this.currentAudio !== null && !this.pausedState;
330
+ }
331
+ isPaused() {
332
+ return this.pausedState;
333
+ }
334
+ /**
335
+ * Update settings dynamically (rate, pitch, voice)
336
+ * Note: Voice changes require resynthesis, so voice updates are stored but
337
+ * take effect on the next speak() call. Rate can be applied to current playback.
338
+ */
339
+ updateSettings(t) {
340
+ t.rate !== void 0 && (this.config.rate = t.rate, this.currentAudio && (this.currentAudio.playbackRate = Math.max(0.25, Math.min(4, t.rate)))), t.pitch !== void 0 && (this.config.pitch = t.pitch), t.voice !== void 0 && (this.config.voice = t.voice);
341
+ }
342
+ }
343
+ class W {
344
+ constructor() {
345
+ a(this, "providerId", "server-tts");
346
+ a(this, "providerName", "Server TTS");
347
+ a(this, "version", "1.0.0");
348
+ a(this, "config", null);
349
+ a(this, "adapter", null);
350
+ a(this, "telemetryReporter");
351
+ }
352
+ async emitTelemetry(t, e) {
353
+ try {
354
+ await this.telemetryReporter?.(t, e);
355
+ } catch (o) {
356
+ console.warn("[ServerTTSProvider] telemetry callback failed:", o);
357
+ }
358
+ }
359
+ /**
360
+ * Initialize the server TTS provider.
361
+ *
362
+ * This is designed to be fast by default (no API calls).
363
+ * Set validateEndpoint: true in config to test API availability during initialization.
364
+ *
365
+ * @performance Default: <10ms, With validation: 100-500ms
366
+ */
367
+ async initialize(t) {
368
+ const e = t;
369
+ if (!e.apiEndpoint)
370
+ throw new Error("apiEndpoint is required for ServerTTSProvider");
371
+ this.config = e, this.telemetryReporter = b(e);
372
+ const o = w(e);
373
+ if (this.adapter = B[o], e.validateEndpoint) {
374
+ const i = Date.now();
375
+ if (await this.emitTelemetry("pie-tool-backend-call-start", {
376
+ toolId: "tts",
377
+ backend: e.provider || "server",
378
+ operation: "validate-endpoint"
379
+ }), !await this.testAPIAvailability())
380
+ throw await this.emitTelemetry("pie-tool-backend-call-error", {
381
+ toolId: "tts",
382
+ backend: e.provider || "server",
383
+ operation: "validate-endpoint",
384
+ duration: Date.now() - i,
385
+ errorType: "TTSEndpointValidationError",
386
+ message: `Server TTS API not available at ${e.apiEndpoint}`
387
+ }), new Error(`Server TTS API not available at ${e.apiEndpoint}`);
388
+ await this.emitTelemetry("pie-tool-backend-call-success", {
389
+ toolId: "tts",
390
+ backend: e.provider || "server",
391
+ operation: "validate-endpoint",
392
+ duration: Date.now() - i
393
+ });
394
+ }
395
+ return new z(e, this.adapter);
396
+ }
397
+ /**
398
+ * Test if API endpoint is available (with timeout).
399
+ *
400
+ * @performance 100-500ms depending on network
401
+ */
402
+ async testAPIAvailability() {
403
+ if (!this.config || !this.adapter)
404
+ return !1;
405
+ try {
406
+ const t = { ...this.config.headers };
407
+ this.config.authToken && (t.Authorization = `Bearer ${this.config.authToken}`);
408
+ const e = new AbortController(), o = setTimeout(() => e.abort(), 5e3), i = M(this.config, this.adapter.id);
409
+ if (i === "none")
410
+ return clearTimeout(o), !0;
411
+ const s = i === "voices" ? I(this.config) : this.adapter.resolveSynthesisUrl(this.config), d = i === "voices" ? "GET" : "OPTIONS";
412
+ try {
413
+ const n = await fetch(s, {
414
+ method: d,
415
+ headers: t,
416
+ signal: e.signal
417
+ });
418
+ return clearTimeout(o), n.ok || n.status === 405;
419
+ } catch {
420
+ return clearTimeout(o), !1;
421
+ }
422
+ } catch {
423
+ return !1;
424
+ }
425
+ }
426
+ supportsFeature(t) {
427
+ switch (t) {
428
+ case "pause":
429
+ case "resume":
430
+ case "wordBoundary":
431
+ case "voiceSelection":
432
+ case "rateControl":
433
+ return !0;
434
+ case "pitchControl":
435
+ return !1;
436
+ default:
437
+ return !1;
438
+ }
439
+ }
440
+ getCapabilities() {
441
+ const t = this.config ? w(this.config) : "pie";
442
+ return {
443
+ supportsPause: !0,
444
+ supportsResume: !0,
445
+ supportsWordBoundary: !0,
446
+ // ✅ Via speech marks from server
447
+ supportsVoiceSelection: !0,
448
+ supportsRateControl: !0,
449
+ supportsPitchControl: !1,
450
+ // Depends on server provider
451
+ maxTextLength: R[t]
452
+ };
453
+ }
454
+ destroy() {
455
+ this.config = null, this.adapter = null, this.telemetryReporter = void 0;
456
+ }
457
+ }
458
+ export {
459
+ W as ServerTTSProvider
460
+ };