speech-to-speech 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1,27 +1,7 @@
1
1
  'use strict';
2
2
 
3
3
  var vadWeb = require('@ricky0123/vad-web');
4
- var piperTts = require('@realtimex/piper-tts-web');
5
-
6
- function _interopNamespace(e) {
7
- if (e && e.__esModule) return e;
8
- var n = Object.create(null);
9
- if (e) {
10
- Object.keys(e).forEach(function (k) {
11
- if (k !== 'default') {
12
- var d = Object.getOwnPropertyDescriptor(e, k);
13
- Object.defineProperty(n, k, d.get ? d : {
14
- enumerable: true,
15
- get: function () { return e[k]; }
16
- });
17
- }
18
- });
19
- }
20
- n.default = e;
21
- return Object.freeze(n);
22
- }
23
-
24
- var piperTts__namespace = /*#__PURE__*/_interopNamespace(piperTts);
4
+ var piperTtsWeb = require('@realtimex/piper-tts-web');
25
5
 
26
6
  // src/stt/reset-stt-logic.ts
27
7
  var ResetSTTLogic = class {
@@ -557,17 +537,101 @@ var sharedAudioPlayer = {
557
537
  /** Close */
558
538
  close: () => AudioPlayer.reset()
559
539
  };
540
+
541
+ // src/tts/wasm-cache.ts
542
+ var CACHE_NAME = "stt-tts-lib-piper-wasm-v1";
543
+ var DEFAULT_CDN_BASE = "https://cdn.jsdelivr.net/npm/@diffusionstudio/piper-wasm@1.0.0/build/piper_phonemize";
544
+ var cachedBlobUrls = null;
545
+ function isCacheAvailable() {
546
+ return typeof caches !== "undefined";
547
+ }
548
+ async function fetchAndCache(cache, url) {
549
+ const existing = await cache.match(url);
550
+ if (existing) {
551
+ const blob2 = await existing.blob();
552
+ return URL.createObjectURL(blob2);
553
+ }
554
+ const response = await fetch(url);
555
+ if (!response.ok) {
556
+ throw new Error(`Failed to fetch ${url}: ${response.status}`);
557
+ }
558
+ const responseForCache = response.clone();
559
+ await cache.put(url, responseForCache);
560
+ const blob = await response.blob();
561
+ return URL.createObjectURL(blob);
562
+ }
563
+ async function ensureWasmCached(cdnBase = DEFAULT_CDN_BASE) {
564
+ if (cachedBlobUrls) return cachedBlobUrls;
565
+ const dataUrl = `${cdnBase}.data`;
566
+ const wasmUrl = `${cdnBase}.wasm`;
567
+ if (!isCacheAvailable()) {
568
+ console.log("[wasm-cache] Cache API unavailable \u2014 using CDN URLs directly");
569
+ cachedBlobUrls = { piperData: dataUrl, piperWasm: wasmUrl };
570
+ return cachedBlobUrls;
571
+ }
572
+ try {
573
+ const cache = await caches.open(CACHE_NAME);
574
+ const [piperData, piperWasm] = await Promise.all([
575
+ fetchAndCache(cache, dataUrl),
576
+ fetchAndCache(cache, wasmUrl)
577
+ ]);
578
+ cachedBlobUrls = { piperData, piperWasm };
579
+ console.log("[wasm-cache] WASM assets served from Cache API (blob URLs)");
580
+ return cachedBlobUrls;
581
+ } catch (err) {
582
+ console.warn("[wasm-cache] Caching failed, falling back to CDN URLs:", err);
583
+ cachedBlobUrls = { piperData: dataUrl, piperWasm: wasmUrl };
584
+ return cachedBlobUrls;
585
+ }
586
+ }
587
+ async function isWasmCached(cdnBase = DEFAULT_CDN_BASE) {
588
+ if (!isCacheAvailable()) return false;
589
+ try {
590
+ const cache = await caches.open(CACHE_NAME);
591
+ const [data, wasm] = await Promise.all([
592
+ cache.match(`${cdnBase}.data`),
593
+ cache.match(`${cdnBase}.wasm`)
594
+ ]);
595
+ return !!data && !!wasm;
596
+ } catch {
597
+ return false;
598
+ }
599
+ }
600
+ async function clearWasmCache() {
601
+ if (cachedBlobUrls) {
602
+ try {
603
+ URL.revokeObjectURL(cachedBlobUrls.piperData);
604
+ } catch {
605
+ }
606
+ try {
607
+ URL.revokeObjectURL(cachedBlobUrls.piperWasm);
608
+ } catch {
609
+ }
610
+ cachedBlobUrls = null;
611
+ }
612
+ if (!isCacheAvailable()) return;
613
+ try {
614
+ await caches.delete(CACHE_NAME);
615
+ console.log("[wasm-cache] Cache cleared");
616
+ } catch (err) {
617
+ console.warn("[wasm-cache] Failed to clear cache:", err);
618
+ }
619
+ }
620
+
621
+ // src/tts/piper-synthesizer.ts
560
622
  var DEFAULT_VOICE_ID = "en_US-hfc_female-medium";
561
623
  var TTSLogic = class {
562
624
  constructor(config = {}) {
563
625
  this.ready = false;
564
626
  this.voiceLoaded = false;
565
627
  this.warmUp = true;
628
+ this.ttsSession = null;
566
629
  this.config = {
567
630
  voiceId: DEFAULT_VOICE_ID,
568
631
  sampleRate: 22050,
569
632
  useSharedAudioPlayer: true,
570
633
  warmUp: true,
634
+ enableWasmCache: true,
571
635
  ...config
572
636
  };
573
637
  this.useSharedPlayer = this.config.useSharedAudioPlayer !== false;
@@ -594,10 +658,11 @@ var TTSLogic = class {
594
658
  throw new Error("Voice not loaded. Call initialize() first.");
595
659
  }
596
660
  try {
597
- await piperTts__namespace.predict({
598
- text,
599
- voiceId: this.config.voiceId
600
- });
661
+ if (this.ttsSession) {
662
+ await this.ttsSession.predict(text);
663
+ } else {
664
+ throw new Error("TtsSession not created yet");
665
+ }
601
666
  console.log("\u2713 Piper synthesizer warmed up");
602
667
  return { synthesized: true };
603
668
  } catch (error) {
@@ -612,11 +677,30 @@ var TTSLogic = class {
612
677
  try {
613
678
  const voiceId = this.config.voiceId;
614
679
  console.log("\u{1F4CD} Loading Piper voice:", voiceId);
615
- const storedVoices = await piperTts__namespace.stored();
680
+ let wasmPaths;
681
+ if (this.config.wasmPaths) {
682
+ console.log("\u{1F4E6} Using custom WASM paths");
683
+ wasmPaths = {
684
+ onnxWasm: this.config.wasmPaths.onnxWasm ?? piperTtsWeb.TtsSession.WASM_LOCATIONS.onnxWasm,
685
+ piperData: this.config.wasmPaths.piperData,
686
+ piperWasm: this.config.wasmPaths.piperWasm
687
+ };
688
+ } else if (this.config.enableWasmCache !== false) {
689
+ console.log("\u{1F4E6} Caching WASM assets via Cache API...");
690
+ const cached = await ensureWasmCached(piperTtsWeb.WASM_BASE);
691
+ wasmPaths = {
692
+ onnxWasm: piperTtsWeb.TtsSession.WASM_LOCATIONS.onnxWasm,
693
+ piperData: cached.piperData,
694
+ piperWasm: cached.piperWasm
695
+ };
696
+ } else {
697
+ wasmPaths = { ...piperTtsWeb.TtsSession.WASM_LOCATIONS };
698
+ }
699
+ const storedVoices = await piperTtsWeb.stored();
616
700
  const alreadyCached = Array.isArray(storedVoices) ? storedVoices.includes(voiceId) : false;
617
701
  if (!alreadyCached) {
618
702
  console.log("\u2B07\uFE0F Downloading voice model...");
619
- await piperTts__namespace.download(voiceId, (progress) => {
703
+ await piperTtsWeb.download(voiceId, (progress) => {
620
704
  if (progress?.total) {
621
705
  const pct = Math.round(progress.loaded * 100 / progress.total);
622
706
  console.log(`\u2B07\uFE0F Downloading: ${pct}%`);
@@ -625,6 +709,12 @@ var TTSLogic = class {
625
709
  } else {
626
710
  console.log("\u2713 Voice found in cache");
627
711
  }
712
+ this.ttsSession = new piperTtsWeb.TtsSession({
713
+ voiceId,
714
+ wasmPaths,
715
+ logger: (msg) => console.log(`[piper] ${msg}`)
716
+ });
717
+ await this.ttsSession.waitReady;
628
718
  this.voiceLoaded = true;
629
719
  if (this.config.warmUp) {
630
720
  const { synthesized } = await this.warmup();
@@ -660,10 +750,7 @@ var TTSLogic = class {
660
750
  throw new Error("No text provided for synthesis");
661
751
  }
662
752
  try {
663
- const wavBlob = await piperTts__namespace.predict({
664
- text: trimmed,
665
- voiceId: this.config.voiceId
666
- });
753
+ const wavBlob = await this.ttsSession.predict(trimmed);
667
754
  const arrayBuffer = await wavBlob.arrayBuffer();
668
755
  const audioContext = new (window.AudioContext || window.webkitAudioContext)();
669
756
  const decodedBuffer = await audioContext.decodeAudioData(arrayBuffer);
@@ -690,10 +777,7 @@ var TTSLogic = class {
690
777
  if (!trimmed) {
691
778
  throw new Error("No text provided for synthesis");
692
779
  }
693
- return piperTts__namespace.predict({
694
- text: trimmed,
695
- voiceId: this.config.voiceId
696
- });
780
+ return this.ttsSession.predict(trimmed);
697
781
  }
698
782
  /**
699
783
  * Synthesize text and add to queue (uses shared player by default)
@@ -717,6 +801,7 @@ var TTSLogic = class {
717
801
  async dispose() {
718
802
  this.ready = false;
719
803
  this.voiceLoaded = false;
804
+ this.ttsSession = null;
720
805
  }
721
806
  };
722
807
  function textToPhonemes(_text) {
@@ -1822,17 +1907,20 @@ exports.STTLogic = STTLogic;
1822
1907
  exports.SimpleQueue = SimpleQueue;
1823
1908
  exports.TTSLogic = TTSLogic;
1824
1909
  exports.VADController = VADController;
1910
+ exports.clearWasmCache = clearWasmCache;
1825
1911
  exports.configureFillerManager = configureFillerManager;
1826
1912
  exports.createAudioPlayer = createAudioPlayer;
1827
1913
  exports.createOrtEnvironment = createOrtEnvironment;
1828
1914
  exports.emitSentence = emitSentence;
1829
1915
  exports.ensureOrtReady = ensureOrtReady;
1830
1916
  exports.ensureVoiceLoaded = ensureVoiceLoaded;
1917
+ exports.ensureWasmCached = ensureWasmCached;
1831
1918
  exports.getAsyncIterator = getAsyncIterator;
1832
1919
  exports.getBackendLabel = getBackendLabel;
1833
1920
  exports.getFillerManager = getFillerManager;
1834
1921
  exports.handleChunk = handleChunk;
1835
1922
  exports.isCorruptModelError = isCorruptModelError;
1923
+ exports.isWasmCached = isWasmCached;
1836
1924
  exports.nextBoundaryIndex = nextBoundaryIndex;
1837
1925
  exports.playerWorker = playerWorker;
1838
1926
  exports.preparePiperVoice = preparePiperVoice;