npm - react-native-executorch - Versions diffs - 0.5.3 → 0.5.4 - Mend

react-native-executorch 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/lib/module/modules/natural_language_processing/SpeechToTextModule.js CHANGED Viewed

@@ -1,71 +1,98 @@
 "use strict";
-import { ASR } from '../../utils/SpeechToTextModule/ASR';
-import { OnlineASRProcessor } from '../../utils/SpeechToTextModule/OnlineProcessor';
+import { Logger } from '../../common/Logger';
+import { ResourceFetcher } from '../../utils/ResourceFetcher';
 export class SpeechToTextModule {
-  asr = new ASR();
-  processor = new OnlineASRProcessor(this.asr);
-  isStreaming = false;
-  readyToProcess = false;
-  minAudioSamples = 1 * 16000; // 1 second
   async load(model, onDownloadProgressCallback = () => {}) {
     this.modelConfig = model;
-    return this.asr.load(model, onDownloadProgressCallback);
+    const tokenizerLoadPromise = ResourceFetcher.fetch(undefined, model.tokenizerSource);
+    const encoderDecoderPromise = ResourceFetcher.fetch(onDownloadProgressCallback, model.encoderSource, model.decoderSource);
+    const [tokenizerSources, encoderDecoderResults] = await Promise.all([tokenizerLoadPromise, encoderDecoderPromise]);
+    const encoderSource = encoderDecoderResults?.[0];
+    const decoderSource = encoderDecoderResults?.[1];
+    if (!encoderSource || !decoderSource || !tokenizerSources) {
+      throw new Error('Download interrupted.');
+    }
+    this.nativeModule = await global.loadSpeechToText(encoderSource, decoderSource, tokenizerSources[0]);
   }
   async encode(waveform) {
-    return this.asr.encode(waveform);
+    if (Array.isArray(waveform)) {
+      Logger.info('Passing waveform as number[] is deprecated, use Float32Array instead');
+      waveform = new Float32Array(waveform);
+    }
+    return new Float32Array(await this.nativeModule.encode(waveform));
   }
-  async decode(tokens) {
-    return this.asr.decode(tokens);
+  async decode(tokens, encoderOutput) {
+    if (Array.isArray(tokens)) {
+      Logger.info('Passing tokens as number[] is deprecated, use Int32Array instead');
+      tokens = new Int32Array(tokens);
+    }
+    if (Array.isArray(encoderOutput)) {
+      Logger.info('Passing encoderOutput as number[] is deprecated, use Float32Array instead');
+      encoderOutput = new Float32Array(encoderOutput);
+    }
+    return new Float32Array(await this.nativeModule.decode(tokens, encoderOutput));
   }
   async transcribe(waveform, options = {}) {
     this.validateOptions(options);
-    const segments = await this.asr.transcribe(waveform, options);
-    let transcription = '';
-    for (const segment of segments) {
-      for (const word of segment.words) {
-        transcription += ` ${word.word}`;
-      }
+    if (Array.isArray(waveform)) {
+      Logger.info('Passing waveform as number[] is deprecated, use Float32Array instead');
+      waveform = new Float32Array(waveform);
     }
-    return transcription.trim();
+    return this.nativeModule.transcribe(waveform, options.language || '');
   }
   async *stream(options = {}) {
-    if (this.isStreaming) {
-      throw new Error('Streaming is already in progress');
-    }
     this.validateOptions(options);
-    this.resetStreamState();
-    this.isStreaming = true;
-    while (this.isStreaming) {
-      if (!this.readyToProcess || this.processor.audioBuffer.length < this.minAudioSamples) {
-        await new Promise(resolve => setTimeout(resolve, 100));
+    const queue = [];
+    let waiter = null;
+    let finished = false;
+    let error;
+    const wake = () => {
+      waiter?.();
+      waiter = null;
+    };
+    (async () => {
+      try {
+        await this.nativeModule.stream((committed, nonCommitted, isDone) => {
+          queue.push({
+            committed,
+            nonCommitted
+          });
+          if (isDone) {
+            finished = true;
+          }
+          wake();
+        }, options.language || '');
+        finished = true;
+        wake();
+      } catch (e) {
+        error = e;
+        finished = true;
+        wake();
+      }
+    })();
+    while (true) {
+      if (queue.length > 0) {
+        yield queue.shift();
+        if (finished && queue.length === 0) {
+          return;
+        }
         continue;
       }
-      const {
-        committed,
-        nonCommitted
-      } = await this.processor.processIter(options);
-      yield {
-        committed,
-        nonCommitted
-      };
-      this.readyToProcess = false;
+      if (error) throw error;
+      if (finished) return;
+      await new Promise(r => waiter = r);
     }
-    const {
-      committed
-    } = await this.processor.finish();
-    yield {
-      committed,
-      nonCommitted: ''
-    };
   }
-  streamStop() {
-    this.isStreaming = false;
+  async streamInsert(waveform) {
+    if (Array.isArray(waveform)) {
+      Logger.info('Passing waveform as number[] is deprecated, use Float32Array instead');
+      waveform = new Float32Array(waveform);
+    }
+    return this.nativeModule.streamInsert(waveform);
   }
-  streamInsert(waveform) {
-    this.processor.insertAudioChunk(waveform);
-    this.readyToProcess = true;
+  async streamStop() {
+    return this.nativeModule.streamStop();
   }
   validateOptions(options) {
     if (!this.modelConfig.isMultilingual && options.language) {
@@ -75,10 +102,5 @@ export class SpeechToTextModule {
       throw new Error('Model is multilingual, provide a language');
     }
   }
-  resetStreamState() {
-    this.isStreaming = false;
-    this.readyToProcess = false;
-    this.processor = new OnlineASRProcessor(this.asr);
-  }
 }
 //# sourceMappingURL=SpeechToTextModule.js.map

package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"names":["~~ASR~~","~~OnlineASRProcessor~~","SpeechToTextModule","~~asr~~","~~processor~~","~~isStreaming~~","~~readyToProcess~~","~~minAudioSamples~~","~~load~~","~~model~~","~~onDownloadProgressCallback~~","~~modelConfig~~","encode","waveform","~~decode~~","~~tokens~~","~~transcribe~~","~~options~~","~~validateOptions~~","~~segments~~","~~transcription~~","~~segment~~","~~word~~","~~words~~","~~trim~~","stream","~~Error~~","~~resetStreamState~~","~~audioBuffer~~","~~length~~","~~Promise~~","~~resolve~~","~~setTimeout~~","~~committed~~","~~nonCommitted~~","~~processIter~~","~~finish~~","~~streamStop~~","~~streamInsert~~","~~insertAudioChunk~~","~~isMultilingual~~","~~language~~"],"sourceRoot":"../../../../src","sources":["modules/natural_language_processing/SpeechToTextModule.ts"],"mappings":";;~~AACA~~,SAASA,~~GAAG~~,QAAQ,~~oCAAoC~~;~~AACxD~~,SAASC,~~kBAAkB~~,QAAQ,~~gDAAgD~~;~~AAEnF~~,OAAO,MAAMC,kBAAkB,CAAC;~~EAEtBC~~,~~GAAG~~,~~GAAQ~~,~~IAAIH~~,~~GAAG~~,CAAC,CAAC;~~EAEpBI~~,~~SAAS~~,~~GAAuB~~,~~IAAIH~~,~~kBAAkB~~,~~CAAC~~,~~IAAI~~,~~CAACE~~,~~GAAG~~,CAAC;~~EAChEE~~,~~WAAW~~,~~GAAG~~,KAAK~~;EACnBC~~,~~cAAc~~,~~GAAG~~,KAAK;~~EACtBC~~,~~eAAe~~,~~GAAW~~,CAAC,GAAG,~~KAAK~~,CAAC,CAAC~~;;EAE7C~~,~~MAAaC~~,~~IAAIA~~,~~CACfC~~,~~KAA8B~~,~~EAC9BC~~,~~0BAAsD~~,~~GAAGA~~,~~CAAA~~,~~KAAM~~,CAAC,CAAC~~,EACjE~~;~~IACA~~,IAAI,CAACC,~~WAAW~~,~~GAAGF~~,KAAK;~~IACxB~~,~~OAAO,~~IAAI,~~CAACN~~,GAAG,~~CAACK~~,~~IAAI~~,CAACC,~~KAAK~~,~~EAAEC~~,~~0BAA0B~~,CAAC;~~EACzD~~;EAEA,~~MAAaE~~,MAAMA,~~CAACC~~,~~QAAsB~~,~~EAAiB~~;~~IACzD~~,OAAO,IAAI,~~CAACV~~,GAAG,~~CAACS~~,MAAM,CAACC,QAAQ,CAAC;~~EAClC~~;EAEA,~~MAAaC~~,MAAMA,CAACC,~~MAAgB~~,~~EAAyB~~;~~IAC3D~~,OAAO,IAAI,~~CAACZ~~,GAAG,~~CAACW~~,MAAM,CAACC,MAAM,CAAC;~~EAChC~~;EAEA,~~MAAaC~~,UAAUA,~~CACrBH~~,~~QAAkB~~,~~EAClBI~~,OAAwB,GAAG,CAAC,CAAC,EACZ;IACjB,IAAI,CAACC,eAAe,CAACD,OAAO,CAAC;IAE7B,~~MAAME~~,~~QAAQ~~,~~GAAG~~,~~MAAM~~,~~IAAI~~,~~CAAChB~~,~~GAAG~~,~~CAACa~~,~~UAAU~~,~~CAACH~~,~~QAAQ~~,~~EAAEI~~,~~OAAO~~,CAAC;~~IAE7D~~,~~IAAIG~~,~~aAAa,~~GAAG,~~EAAE;IACtB~~,~~KAAK~~,~~MAAMC~~,~~OAAO,IAAIF,~~QAAQ,~~EAAE~~;~~MAC9B~~,~~KAAK~~,~~MAAMG,~~IAAI,~~IAAID~~,~~OAAO~~,~~CAACE~~,~~KAAK~~,~~EAAE;QAChCH~~,~~aAAa~~,~~IAAI~~,~~IAAIE~~,~~IAAI~~,~~CAACA~~,IAAI,EAAE~~;MAClC;IACF;IAEA~~,~~OAAOF,aAAa,CAACI,IAAI,~~CAAC~~,CAAC~~;~~EAC7B~~;EAEA,OAAcC,MAAMA,~~CAACR~~,OAAwB,GAAG,CAAC,CAAC,~~EAAE~~;~~IAClD~~,IAAI,~~IAAI~~,~~CAACZ~~,~~WAAW~~,EAAE;~~MACpB~~,~~MAAM~~,~~IAAIqB~~,~~KAAK~~,~~CAAC~~,~~kCAAkC~~,~~CAAC~~;~~IACrD~~;~~IACA~~,IAAI,~~CAACR~~,~~eAAe~~,~~CAACD~~,~~OAAO~~,CAAC;~~IAC7B~~,~~IAAI~~,~~CAACU~~,~~gBAAgB~~,CAAC,CAAC;~~IAEvB~~,IAAI,~~CAACtB~~,~~WAAW~~,~~GAAG~~,~~IAAI;IACvB~~,~~OAAO~~,~~IAAI~~,~~CAACA~~,~~WAAW~~,~~EAAE;MACvB~~,~~IACE~~,~~CAAC~~,~~IAAI~~,~~CAACC~~,~~cAAc~~,~~IACpB~~,IAAI,~~CAACF~~,SAAS,~~CAACwB~~,~~WAAW~~,~~CAACC~~,MAAM,GAAG,IAAI,~~CAACtB~~,~~eAAe~~,~~EACxD~~;~~QACA~~,~~MAAM~~,~~IAAIuB~~,OAAO,~~CAAEC~~,~~OAAO~~,~~IAAKC~~,~~UAAU~~,~~CAACD~~,~~OAAO~~,~~EAAE,~~GAAG,CAAC,CAAC;~~QACxD~~;~~MACF~~;~~MAEA~~,~~MAAM~~;~~QAAEE~~,~~SAAS~~;~~QAAEC~~;~~MAAa~~,CAAC,~~GAC/B~~,~~MAAM~~,IAAI,~~CAAC9B~~,~~SAAS~~,~~CAAC+B~~,~~WAAW~~,~~CAAClB~~,~~OAAO~~,CAAC~~;MAC3C~~,~~MAAM~~;~~QAAEgB~~,~~SAAS;QAAEC;MAAa~~,CAAC;~~MACjC~~,~~IAAI~~,~~CAAC5B~~,~~cAAc~~,~~GAAG~~,KAAK;~~IAC7B~~;~~IAEA,MAAM~~;~~MAAE2B~~;~~IAAU~~,~~CAAC~~,~~GAAG~~,~~MAAM~~,~~IAAI~~,~~CAAC7B~~,~~SAAS~~,~~CAACgC~~,MAAM,~~CAAC~~,CAAC~~;IACnD~~,MAAM~~;MAAEH~~,~~SAAS;MAAEC~~,~~YAAY~~,~~EAAE;IAAG,~~CAAC;~~EACvC~~;~~EAEOG~~,~~UAAUA~~,~~CAAA~~,~~EAAG~~;~~IAClB~~,~~IAAI~~,~~CAAChC~~,~~WAAW~~,~~GAAG~~,~~KAAK~~;~~EAC1B~~;~~EAEOiC~~,~~YAAYA~~,~~CAACzB~~,~~QAAkB~~,~~EAAE~~;~~IACtC~~,IAAI,~~CAACT~~,~~SAAS~~,~~CAACmC~~,~~gBAAgB~~,~~CAAC1B~~,QAAQ,CAAC;~~IACzC~~,IAAI,~~CAACP~~,~~cAAc~~,~~GAAG~~,~~IAAI~~;~~EAC5B~~;~~EAEQY~~,eAAeA,CAACD,OAAwB,EAAE;IAChD,IAAI,CAAC,IAAI,~~CAACN~~,WAAW,~~CAAC6B~~,cAAc,~~IAAIvB~~,OAAO,~~CAACwB~~,QAAQ,EAAE;MACxD,MAAM,~~IAAIf~~,KAAK,CAAC,gDAAgD,CAAC;IACnE;IACA,IAAI,IAAI,~~CAACf~~,WAAW,~~CAAC6B~~,cAAc,IAAI,~~CAACvB~~,OAAO,~~CAACwB~~,QAAQ,EAAE;MACxD,MAAM,~~IAAIf~~,KAAK,CAAC,2CAA2C,CAAC;IAC9D;EACF;~~EAEQC,gBAAgBA,CAAA,EAAG;IACzB,IAAI,CAACtB,WAAW,GAAG,KAAK;IACxB,IAAI,CAACC,cAAc,GAAG,KAAK;IAC3B,IAAI,CAACF,SAAS,GAAG,IAAIH,kBAAkB,CAAC,IAAI,CAACE,GAAG,CAAC;EACnD;~~AACF","ignoreList":[]}
1	+ {"version":3,"names":["Logger","ResourceFetcher","SpeechToTextModule","load","model","onDownloadProgressCallback","modelConfig","tokenizerLoadPromise","fetch","undefined","tokenizerSource","encoderDecoderPromise","encoderSource","decoderSource","tokenizerSources","encoderDecoderResults","Promise","all","Error","nativeModule","global","loadSpeechToText","encode","waveform","Array","isArray","info","Float32Array","decode","tokens","encoderOutput","Int32Array","transcribe","options","validateOptions","language","stream","queue","waiter","finished","error","wake","committed","nonCommitted","isDone","push","e","length","shift","r","streamInsert","streamStop","isMultilingual"],"sourceRoot":"../../../../src","sources":["modules/natural_language_processing/SpeechToTextModule.ts"],"mappings":";;AAAA,SAASA,MAAM,QAAQ,qBAAqB;AAE5C,SAASC,eAAe,QAAQ,6BAA6B;AAE7D,OAAO,MAAMC,kBAAkB,CAAC;EAK9B,MAAaC,IAAIA,CACfC,KAA8B,EAC9BC,0BAAsD,GAAGA,CAAA,KAAM,CAAC,CAAC,EACjE;IACA,IAAI,CAACC,WAAW,GAAGF,KAAK;IAExB,MAAMG,oBAAoB,GAAGN,eAAe,CAACO,KAAK,CAChDC,SAAS,EACTL,KAAK,CAACM,eACR,CAAC;IACD,MAAMC,qBAAqB,GAAGV,eAAe,CAACO,KAAK,CACjDH,0BAA0B,EAC1BD,KAAK,CAACQ,aAAa,EACnBR,KAAK,CAACS,aACR,CAAC;IACD,MAAM,CAACC,gBAAgB,EAAEC,qBAAqB,CAAC,GAAG,MAAMC,OAAO,CAACC,GAAG,CAAC,CAClEV,oBAAoB,EACpBI,qBAAqB,CACtB,CAAC;IACF,MAAMC,aAAa,GAAGG,qBAAqB,GAAG,CAAC,CAAC;IAChD,MAAMF,aAAa,GAAGE,qBAAqB,GAAG,CAAC,CAAC;IAChD,IAAI,CAACH,aAAa,IAAI,CAACC,aAAa,IAAI,CAACC,gBAAgB,EAAE;MACzD,MAAM,IAAII,KAAK,CAAC,uBAAuB,CAAC;IAC1C;IACA,IAAI,CAACC,YAAY,GAAG,MAAMC,MAAM,CAACC,gBAAgB,CAC/CT,aAAa,EACbC,aAAa,EACbC,gBAAgB,CAAC,CAAC,CACpB,CAAC;EACH;EAEA,MAAaQ,MAAMA,CACjBC,QAAiC,EACV;IACvB,IAAIC,KAAK,CAACC,OAAO,CAACF,QAAQ,CAAC,EAAE;MAC3BvB,MAAM,CAAC0B,IAAI,CACT,sEACF,CAAC;MACDH,QAAQ,GAAG,IAAII,YAAY,CAACJ,QAAQ,CAAC;IACvC;IACA,OAAO,IAAII,YAAY,CAAC,MAAM,IAAI,CAACR,YAAY,CAACG,MAAM,CAACC,QAAQ,CAAC,CAAC;EACnE;EAEA,MAAaK,MAAMA,CACjBC,MAA6B,EAC7BC,aAAsC,EACf;IACvB,IAAIN,KAAK,CAACC,OAAO,CAACI,MAAM,CAAC,EAAE;MACzB7B,MAAM,CAAC0B,IAAI,CACT,kEACF,CAAC;MACDG,MAAM,GAAG,IAAIE,UAAU,CAACF,MAAM,CAAC;IACjC;IACA,IAAIL,KAAK,CAACC,OAAO,CAACK,aAAa,CAAC,EAAE;MAChC9B,MAAM,CAAC0B,IAAI,CACT,2EACF,CAAC;MACDI,aAAa,GAAG,IAAIH,YAAY,CAACG,aAAa,CAAC;IACjD;IACA,OAAO,IAAIH,YAAY,CACrB,MAAM,IAAI,CAACR,YAAY,CAACS,MAAM,CAACC,MAAM,EAAEC,aAAa,CACtD,CAAC;EACH;EAEA,MAAaE,UAAUA,CACrBT,QAAiC,EACjCU,OAAwB,GAAG,CAAC,CAAC,EACZ;IACjB,IAAI,CAACC,eAAe,CAACD,OAAO,CAAC;IAE7B,IAAIT,KAAK,CAACC,OAAO,CAACF,QAAQ,CAAC,EAAE;MAC3BvB,MAAM,CAAC0B,IAAI,CACT,sEACF,CAAC;MACDH,QAAQ,GAAG,IAAII,YAAY,CAACJ,QAAQ,CAAC;IACvC;IAEA,OAAO,IAAI,CAACJ,YAAY,CAACa,UAAU,CAACT,QAAQ,EAAEU,OAAO,CAACE,QAAQ,IAAI,EAAE,CAAC;EACvE;EAEA,OAAcC,MAAMA,CAClBH,OAAwB,GAAG,CAAC,CAAC,EACgC;IAC7D,IAAI,CAACC,eAAe,CAACD,OAAO,CAAC;IAE7B,MAAMI,KAAoD,GAAG,EAAE;IAC/D,IAAIC,MAA2B,GAAG,IAAI;IACtC,IAAIC,QAAQ,GAAG,KAAK;IACpB,IAAIC,KAAc;IAElB,MAAMC,IAAI,GAAGA,CAAA,KAAM;MACjBH,MAAM,GAAG,CAAC;MACVA,MAAM,GAAG,IAAI;IACf,CAAC;IAED,CAAC,YAAY;MACX,IAAI;QACF,MAAM,IAAI,CAACnB,YAAY,CAACiB,MAAM,CAC5B,CAACM,SAAiB,EAAEC,YAAoB,EAAEC,MAAe,KAAK;UAC5DP,KAAK,CAACQ,IAAI,CAAC;YAAEH,SAAS;YAAEC;UAAa,CAAC,CAAC;UACvC,IAAIC,MAAM,EAAE;YACVL,QAAQ,GAAG,IAAI;UACjB;UACAE,IAAI,CAAC,CAAC;QACR,CAAC,EACDR,OAAO,CAACE,QAAQ,IAAI,EACtB,CAAC;QACDI,QAAQ,GAAG,IAAI;QACfE,IAAI,CAAC,CAAC;MACR,CAAC,CAAC,OAAOK,CAAC,EAAE;QACVN,KAAK,GAAGM,CAAC;QACTP,QAAQ,GAAG,IAAI;QACfE,IAAI,CAAC,CAAC;MACR;IACF,CAAC,EAAE,CAAC;IAEJ,OAAO,IAAI,EAAE;MACX,IAAIJ,KAAK,CAACU,MAAM,GAAG,CAAC,EAAE;QACpB,MAAMV,KAAK,CAACW,KAAK,CAAC,CAAE;QACpB,IAAIT,QAAQ,IAAIF,KAAK,CAACU,MAAM,KAAK,CAAC,EAAE;UAClC;QACF;QACA;MACF;MACA,IAAIP,KAAK,EAAE,MAAMA,KAAK;MACtB,IAAID,QAAQ,EAAE;MACd,MAAM,IAAIvB,OAAO,CAAQiC,CAAC,IAAMX,MAAM,GAAGW,CAAE,CAAC;IAC9C;EACF;EAEA,MAAaC,YAAYA,CAAC3B,QAAiC,EAAiB;IAC1E,IAAIC,KAAK,CAACC,OAAO,CAACF,QAAQ,CAAC,EAAE;MAC3BvB,MAAM,CAAC0B,IAAI,CACT,sEACF,CAAC;MACDH,QAAQ,GAAG,IAAII,YAAY,CAACJ,QAAQ,CAAC;IACvC;IACA,OAAO,IAAI,CAACJ,YAAY,CAAC+B,YAAY,CAAC3B,QAAQ,CAAC;EACjD;EAEA,MAAa4B,UAAUA,CAAA,EAAkB;IACvC,OAAO,IAAI,CAAChC,YAAY,CAACgC,UAAU,CAAC,CAAC;EACvC;EAEQjB,eAAeA,CAACD,OAAwB,EAAE;IAChD,IAAI,CAAC,IAAI,CAAC3B,WAAW,CAAC8C,cAAc,IAAInB,OAAO,CAACE,QAAQ,EAAE;MACxD,MAAM,IAAIjB,KAAK,CAAC,gDAAgD,CAAC;IACnE;IACA,IAAI,IAAI,CAACZ,WAAW,CAAC8C,cAAc,IAAI,CAACnB,OAAO,CAACE,QAAQ,EAAE;MACxD,MAAM,IAAIjB,KAAK,CAAC,2CAA2C,CAAC;IAC9D;EACF;AACF","ignoreList":[]}

package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts CHANGED Viewed

@@ -9,11 +9,11 @@ export declare const useSpeechToText: ({ model, preventLoad, }: {
     downloadProgress: number;
     committedTranscription: string;
     nonCommittedTranscription: string;
-    encode: (waveform: Float32Array<ArrayBufferLike>) => Promise<void>;
-    decode: (tokens: number[]) => Promise<Float32Array<ArrayBufferLike>>;
-    transcribe: (waveform: number[], options?: import("../../types/stt").DecodingOptions | undefined) => Promise<string>;
+    encode: (waveform: number[] | Float32Array<ArrayBufferLike>) => Promise<Float32Array<ArrayBufferLike>>;
+    decode: (tokens: number[] | Int32Array<ArrayBufferLike>, encoderOutput: number[] | Float32Array<ArrayBufferLike>) => Promise<Float32Array<ArrayBufferLike>>;
+    transcribe: (waveform: number[] | Float32Array<ArrayBufferLike>, options?: import("../../types/stt").DecodingOptions | undefined) => Promise<string>;
     stream: () => Promise<string>;
-    streamStop: () => void;
-    streamInsert: (waveform: number[]) => void;
+    streamStop: () => Promise<void>;
+    streamInsert: (waveform: number[] | Float32Array<ArrayBufferLike>) => Promise<void>;
 };
 //# sourceMappingURL=useSpeechToText.d.ts.map

package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts CHANGED Viewed

@@ -1,22 +1,17 @@
 import { DecodingOptions, SpeechToTextModelConfig } from '../../types/stt';
 export declare class SpeechToTextModule {
+    private nativeModule;
     private modelConfig;
-    private asr;
-    private processor;
-    private isStreaming;
-    private readyToProcess;
-    private minAudioSamples;
     load(model: SpeechToTextModelConfig, onDownloadProgressCallback?: (progress: number) => void): Promise<void>;
-    encode(waveform: Float32Array): Promise<void>;
-    decode(tokens: number[]): Promise<Float32Array>;
-    transcribe(waveform: number[], options?: DecodingOptions): Promise<string>;
+    encode(waveform: Float32Array | number[]): Promise<Float32Array>;
+    decode(tokens: Int32Array | number[], encoderOutput: Float32Array | number[]): Promise<Float32Array>;
+    transcribe(waveform: Float32Array | number[], options?: DecodingOptions): Promise<string>;
     stream(options?: DecodingOptions): AsyncGenerator<{
         committed: string;
         nonCommitted: string;
-    }, void, unknown>;
-    streamStop(): void;
-    streamInsert(waveform: number[]): void;
+    }>;
+    streamInsert(waveform: Float32Array | number[]): Promise<void>;
+    streamStop(): Promise<void>;
     private validateOptions;
-    private resetStreamState;
 }
 //# sourceMappingURL=SpeechToTextModule.d.ts.map

package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"SpeechToTextModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/natural_language_processing/SpeechToTextModule.ts"],"names":[],"mappings":"~~AAAA~~,OAAO,EAAE,eAAe,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;~~AAI3E~~,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,~~WAAW~~,~~CAA2B~~;~~IAC9C~~,OAAO,CAAC,~~GAAG,CAAkB;IAE7B,OAAO,CAAC,SAAS,CAAwD;IACzE,OAAO,CAAC,~~WAAW,~~CAAS~~;~~IAC5B~~,~~OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,eAAe,CAAqB;IAE/B,~~IAAI,CACf,KAAK,EAAE,uBAAuB,EAC9B,0BAA0B,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe;~~IAMtD~~,MAAM,~~CAAC~~,QAAQ,EAAE,YAAY,GAAG,OAAO,CAAC,~~IAAI~~,CAAC;~~IAI7C~~,MAAM,~~CAAC~~,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,CAAC;~~IAI/C~~,UAAU,CACrB,QAAQ,EAAE,MAAM,EAAE,~~EAClB~~,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC;~~IAeJ~~,MAAM,~~CAAC~~,OAAO,GAAE,eAAoB~~;;;;IA2B3C~~,~~UAAU~~;~~IAIV~~,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE;~~IAKtC~~,OAAO,CAAC,~~eAAe~~;~~IASvB~~,OAAO,CAAC,~~gBAAgB~~;~~CAKzB~~"}
1	+ {"version":3,"file":"SpeechToTextModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/natural_language_processing/SpeechToTextModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAG3E,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,YAAY,CAAM;IAE1B,OAAO,CAAC,WAAW,CAA2B;IAEjC,IAAI,CACf,KAAK,EAAE,uBAAuB,EAC9B,0BAA0B,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe;IA6BtD,MAAM,CACjB,QAAQ,EAAE,YAAY,GAAG,MAAM,EAAE,GAChC,OAAO,CAAC,YAAY,CAAC;IAUX,MAAM,CACjB,MAAM,EAAE,UAAU,GAAG,MAAM,EAAE,EAC7B,aAAa,EAAE,YAAY,GAAG,MAAM,EAAE,GACrC,OAAO,CAAC,YAAY,CAAC;IAkBX,UAAU,CACrB,QAAQ,EAAE,YAAY,GAAG,MAAM,EAAE,EACjC,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC;IAaJ,MAAM,CAClB,OAAO,GAAE,eAAoB,GAC5B,cAAc,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;IAgDjD,YAAY,CAAC,QAAQ,EAAE,YAAY,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxC,OAAO,CAAC,eAAe;CAQxB"}

package/lib/typescript/types/stt.d.ts CHANGED Viewed

@@ -1,13 +1,4 @@
 import { ResourceSource } from './common';
-export type WordTuple = [number, number, string];
-export interface WordObject {
-    start: number;
-    end: number;
-    word: string;
-}
-export interface Segment {
-    words: WordObject[];
-}
 export type SpeechToTextLanguage = 'af' | 'sq' | 'ar' | 'hy' | 'az' | 'eu' | 'be' | 'bn' | 'bs' | 'bg' | 'my' | 'ca' | 'zh' | 'hr' | 'cs' | 'da' | 'nl' | 'et' | 'en' | 'fi' | 'fr' | 'gl' | 'ka' | 'de' | 'el' | 'gu' | 'ht' | 'he' | 'hi' | 'hu' | 'is' | 'id' | 'it' | 'ja' | 'kn' | 'kk' | 'km' | 'ko' | 'lo' | 'lv' | 'lt' | 'mk' | 'mg' | 'ms' | 'ml' | 'mt' | 'mr' | 'ne' | 'no' | 'fa' | 'pl' | 'pt' | 'pa' | 'ro' | 'ru' | 'sr' | 'si' | 'sk' | 'sl' | 'es' | 'su' | 'sw' | 'sv' | 'tl' | 'tg' | 'ta' | 'te' | 'th' | 'tr' | 'uk' | 'ur' | 'uz' | 'vi' | 'cy' | 'yi';
 export interface DecodingOptions {
     language?: SpeechToTextLanguage;

package/lib/typescript/types/stt.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../../src/types/stt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;~~AAE1C~~,MAAM,MAAM,SAAS,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;AAEjD,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,OAAO;IACtB,KAAK,EAAE,UAAU,EAAE,CAAC;CACrB;AAGD,MAAM,MAAM,oBAAoB,GAC5B,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,CAAC;AAET,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,EAAE,oBAAoB,CAAC;CACjC;AAED,MAAM,WAAW,uBAAuB;IACtC,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,cAAc,CAAC;IAC9B,aAAa,EAAE,cAAc,CAAC;IAC9B,eAAe,EAAE,cAAc,CAAC;CACjC"}
1	+ {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../../src/types/stt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAG1C,MAAM,MAAM,oBAAoB,GAC5B,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,CAAC;AAET,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,EAAE,oBAAoB,CAAC;CACjC;AAED,MAAM,WAAW,uBAAuB;IACtC,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,cAAc,CAAC;IAC9B,aAAa,EAAE,cAAc,CAAC;IAC9B,eAAe,EAAE,cAAc,CAAC;CACjC"}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "react-native-executorch",
-  "version": "0.5.3",
+  "version": "0.5.4",
   "description": "An easy way to run AI models in React Native with ExecuTorch",
   "source": "./src/index.ts",
   "main": "./lib/module/index.js",

package/react-native-executorch.podspec CHANGED Viewed

@@ -75,6 +75,8 @@ Pod::Spec.new do |s|
     "common/**/*.{cpp,c,h,hpp}",
   ]
+  s.libraries = "z"
   # Exclude file with tests to not introduce gtest dependency.
   # Do not include the headers from common/rnexecutorch/jsi/ as source files.
   # Xcode/Cocoapods leaks them to other pods that an app also depends on, so if

package/src/modules/natural_language_processing/SpeechToTextModule.ts CHANGED Viewed

@@ -1,84 +1,154 @@
+import { Logger } from '../../common/Logger';
 import { DecodingOptions, SpeechToTextModelConfig } from '../../types/stt';
-import { ASR } from '../../utils/SpeechToTextModule/ASR';
-import { OnlineASRProcessor } from '../../utils/SpeechToTextModule/OnlineProcessor';
+import { ResourceFetcher } from '../../utils/ResourceFetcher';
 export class SpeechToTextModule {
-  private modelConfig!: SpeechToTextModelConfig;
-  private asr: ASR = new ASR();
+  private nativeModule: any;
-  private processor: OnlineASRProcessor = new OnlineASRProcessor(this.asr);
-  private isStreaming = false;
-  private readyToProcess = false;
-  private minAudioSamples: number = 1 * 16000; // 1 second
+  private modelConfig!: SpeechToTextModelConfig;
   public async load(
     model: SpeechToTextModelConfig,
     onDownloadProgressCallback: (progress: number) => void = () => {}
   ) {
     this.modelConfig = model;
-    return this.asr.load(model, onDownloadProgressCallback);
+    const tokenizerLoadPromise = ResourceFetcher.fetch(
+      undefined,
+      model.tokenizerSource
+    );
+    const encoderDecoderPromise = ResourceFetcher.fetch(
+      onDownloadProgressCallback,
+      model.encoderSource,
+      model.decoderSource
+    );
+    const [tokenizerSources, encoderDecoderResults] = await Promise.all([
+      tokenizerLoadPromise,
+      encoderDecoderPromise,
+    ]);
+    const encoderSource = encoderDecoderResults?.[0];
+    const decoderSource = encoderDecoderResults?.[1];
+    if (!encoderSource || !decoderSource || !tokenizerSources) {
+      throw new Error('Download interrupted.');
+    }
+    this.nativeModule = await global.loadSpeechToText(
+      encoderSource,
+      decoderSource,
+      tokenizerSources[0]!
+    );
   }
-  public async encode(waveform: Float32Array): Promise<void> {
-    return this.asr.encode(waveform);
+  public async encode(
+    waveform: Float32Array | number[]
+  ): Promise<Float32Array> {
+    if (Array.isArray(waveform)) {
+      Logger.info(
+        'Passing waveform as number[] is deprecated, use Float32Array instead'
+      );
+      waveform = new Float32Array(waveform);
+    }
+    return new Float32Array(await this.nativeModule.encode(waveform));
   }
-  public async decode(tokens: number[]): Promise<Float32Array> {
-    return this.asr.decode(tokens);
+  public async decode(
+    tokens: Int32Array | number[],
+    encoderOutput: Float32Array | number[]
+  ): Promise<Float32Array> {
+    if (Array.isArray(tokens)) {
+      Logger.info(
+        'Passing tokens as number[] is deprecated, use Int32Array instead'
+      );
+      tokens = new Int32Array(tokens);
+    }
+    if (Array.isArray(encoderOutput)) {
+      Logger.info(
+        'Passing encoderOutput as number[] is deprecated, use Float32Array instead'
+      );
+      encoderOutput = new Float32Array(encoderOutput);
+    }
+    return new Float32Array(
+      await this.nativeModule.decode(tokens, encoderOutput)
+    );
   }
   public async transcribe(
-    waveform: number[],
+    waveform: Float32Array | number[],
     options: DecodingOptions = {}
   ): Promise<string> {
     this.validateOptions(options);
-    const segments = await this.asr.transcribe(waveform, options);
-    let transcription = '';
-    for (const segment of segments) {
-      for (const word of segment.words) {
-        transcription += ` ${word.word}`;
-      }
+    if (Array.isArray(waveform)) {
+      Logger.info(
+        'Passing waveform as number[] is deprecated, use Float32Array instead'
+      );
+      waveform = new Float32Array(waveform);
     }
-    return transcription.trim();
+    return this.nativeModule.transcribe(waveform, options.language || '');
   }
-  public async *stream(options: DecodingOptions = {}) {
-    if (this.isStreaming) {
-      throw new Error('Streaming is already in progress');
-    }
+  public async *stream(
+    options: DecodingOptions = {}
+  ): AsyncGenerator<{ committed: string; nonCommitted: string }> {
     this.validateOptions(options);
-    this.resetStreamState();
-    this.isStreaming = true;
-    while (this.isStreaming) {
-      if (
-        !this.readyToProcess ||
-        this.processor.audioBuffer.length < this.minAudioSamples
-      ) {
-        await new Promise((resolve) => setTimeout(resolve, 100));
+    const queue: { committed: string; nonCommitted: string }[] = [];
+    let waiter: (() => void) | null = null;
+    let finished = false;
+    let error: unknown;
+    const wake = () => {
+      waiter?.();
+      waiter = null;
+    };
+    (async () => {
+      try {
+        await this.nativeModule.stream(
+          (committed: string, nonCommitted: string, isDone: boolean) => {
+            queue.push({ committed, nonCommitted });
+            if (isDone) {
+              finished = true;
+            }
+            wake();
+          },
+          options.language || ''
+        );
+        finished = true;
+        wake();
+      } catch (e) {
+        error = e;
+        finished = true;
+        wake();
+      }
+    })();
+    while (true) {
+      if (queue.length > 0) {
+        yield queue.shift()!;
+        if (finished && queue.length === 0) {
+          return;
+        }
         continue;
       }
-      const { committed, nonCommitted } =
-        await this.processor.processIter(options);
-      yield { committed, nonCommitted };
-      this.readyToProcess = false;
+      if (error) throw error;
+      if (finished) return;
+      await new Promise<void>((r) => (waiter = r));
     }
-    const { committed } = await this.processor.finish();
-    yield { committed, nonCommitted: '' };
   }
-  public streamStop() {
-    this.isStreaming = false;
+  public async streamInsert(waveform: Float32Array | number[]): Promise<void> {
+    if (Array.isArray(waveform)) {
+      Logger.info(
+        'Passing waveform as number[] is deprecated, use Float32Array instead'
+      );
+      waveform = new Float32Array(waveform);
+    }
+    return this.nativeModule.streamInsert(waveform);
   }
-  public streamInsert(waveform: number[]) {
-    this.processor.insertAudioChunk(waveform);
-    this.readyToProcess = true;
+  public async streamStop(): Promise<void> {
+    return this.nativeModule.streamStop();
   }
   private validateOptions(options: DecodingOptions) {
@@ -89,10 +159,4 @@ export class SpeechToTextModule {
       throw new Error('Model is multilingual, provide a language');
     }
   }
-  private resetStreamState() {
-    this.isStreaming = false;
-    this.readyToProcess = false;
-    this.processor = new OnlineASRProcessor(this.asr);
-  }
 }

package/src/types/stt.ts CHANGED Viewed

@@ -1,17 +1,5 @@
 import { ResourceSource } from './common';
-export type WordTuple = [number, number, string];
-export interface WordObject {
-  start: number;
-  end: number;
-  word: string;
-}
-export interface Segment {
-  words: WordObject[];
-}
 // Languages supported by whisper (not whisper.en)
 export type SpeechToTextLanguage =
   | 'af'

package/common/rnexecutorch/models/EncoderDecoderBase.cpp DELETED Viewed

@@ -1,21 +0,0 @@
-#include <rnexecutorch/models/EncoderDecoderBase.h>
-namespace rnexecutorch::models {
-EncoderDecoderBase::EncoderDecoderBase(
-    const std::string &encoderPath, const std::string &decoderPath,
-    std::shared_ptr<react::CallInvoker> callInvoker)
-    : callInvoker(callInvoker),
-      encoder_(std::make_unique<BaseModel>(encoderPath, callInvoker)),
-      decoder_(std::make_unique<BaseModel>(decoderPath, callInvoker)) {};
-size_t EncoderDecoderBase::getMemoryLowerBound() const noexcept {
-  return encoder_->getMemoryLowerBound() + decoder_->getMemoryLowerBound();
-}
-void EncoderDecoderBase::unload() noexcept {
-  encoder_.reset(nullptr);
-  decoder_.reset(nullptr);
-}
-} // namespace rnexecutorch::models

package/common/rnexecutorch/models/EncoderDecoderBase.h DELETED Viewed

@@ -1,31 +0,0 @@
-#pragma once
-#include <ReactCommon/CallInvoker.h>
-#include <memory>
-#include <rnexecutorch/models/BaseModel.h>
-#include <string>
-namespace rnexecutorch::models {
-using namespace facebook;
-using executorch::aten::Tensor;
-using executorch::runtime::EValue;
-class EncoderDecoderBase {
-public:
-  explicit EncoderDecoderBase(const std::string &encoderPath,
-                              const std::string &decoderPath,
-                              std::shared_ptr<react::CallInvoker> callInvoker);
-  size_t getMemoryLowerBound() const noexcept;
-  void unload() noexcept;
-protected:
-  std::shared_ptr<react::CallInvoker> callInvoker;
-  std::unique_ptr<BaseModel> encoder_;
-  std::unique_ptr<BaseModel> decoder_;
-private:
-  size_t memorySizeLowerBound;
-};
-} // namespace rnexecutorch::models

package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h DELETED Viewed

@@ -1,27 +0,0 @@
-#pragma once
-#include "executorch/extension/tensor/tensor_ptr.h"
-#include <rnexecutorch/host_objects/JSTensorViewOut.h>
-#include <span>
-#include <vector>
-namespace rnexecutorch::models::speech_to_text {
-using TensorPtr = ::executorch::extension::TensorPtr;
-class SpeechToTextStrategy {
-public:
-  virtual ~SpeechToTextStrategy() = default;
-  virtual TensorPtr prepareAudioInput(std::span<float> waveform) = 0;
-  virtual TensorPtr
-  prepareTokenInput(const std::vector<int64_t> &prevTokens) = 0;
-  virtual std::string getDecoderMethod() const = 0;
-  virtual std::shared_ptr<OwningArrayBuffer> extractOutputToken(
-      const executorch::aten::Tensor &decoderOutputTensor) const = 0;
-};
-} // namespace rnexecutorch::models::speech_to_text

package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp DELETED Viewed

@@ -1,50 +0,0 @@
-#include "executorch/extension/tensor/tensor_ptr.h"
-#include "rnexecutorch/data_processing/dsp.h"
-#include <rnexecutorch/models/speech_to_text/WhisperStrategy.h>
-namespace rnexecutorch::models::speech_to_text {
-using namespace ::executorch::extension;
-using namespace ::executorch::aten;
-TensorPtr WhisperStrategy::prepareAudioInput(std::span<float> waveform) {
-  constexpr auto fftWindowSize = 512;
-  constexpr auto stftHopLength = 160;
-  constexpr auto innerDim = 256;
-  preprocessedData =
-      dsp::stftFromWaveform(waveform, fftWindowSize, stftHopLength);
-  const auto numFrames = preprocessedData.size() / innerDim;
-  std::vector<int32_t> inputShape = {static_cast<int32_t>(numFrames), innerDim};
-  return make_tensor_ptr(std::move(inputShape), std::move(preprocessedData));
-}
-TensorPtr
-WhisperStrategy::prepareTokenInput(const std::vector<int64_t> &prevTokens) {
-  tokens32.clear();
-  tokens32.reserve(prevTokens.size());
-  for (auto token : prevTokens) {
-    tokens32.push_back(static_cast<int32_t>(token));
-  }
-  auto tensorSizes = {1, static_cast<int32_t>(tokens32.size())};
-  return make_tensor_ptr(std::move(tensorSizes), std::move(tokens32));
-}
-std::shared_ptr<OwningArrayBuffer> WhisperStrategy::extractOutputToken(
-    const executorch::aten::Tensor &decoderOutputTensor) const {
-  const auto innerDim = decoderOutputTensor.size(1);
-  const auto dictSize = decoderOutputTensor.size(2);
-  auto outputNumel = decoderOutputTensor.numel();
-  auto dataPtr =
-      static_cast<const float *>(decoderOutputTensor.const_data_ptr()) +
-      (innerDim - 1) * dictSize;
-  std::span<const float> modelOutput(dataPtr, outputNumel / innerDim);
-  auto createBuffer = [](const auto &data, size_t size) {
-    auto buffer = std::make_shared<OwningArrayBuffer>(size);
-    std::memcpy(buffer->data(), data, size);
-    return buffer;
-  };
-  return createBuffer(modelOutput.data(), modelOutput.size_bytes());
-}
-} // namespace rnexecutorch::models::speech_to_text