react-native-executorch 0.5.1-rc.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +132 -0
  2. package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +4 -10
  3. package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +1 -1
  4. package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h +3 -2
  5. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp +16 -4
  6. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.h +2 -2
  7. package/ios/RnExecutorch.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  8. package/ios/RnExecutorch.xcodeproj/project.xcworkspace/xcuserdata/jakubchmura.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  9. package/ios/RnExecutorch.xcodeproj/xcuserdata/jakubchmura.xcuserdatad/xcschemes/xcschememanagement.plist +14 -0
  10. package/lib/module/constants/modelUrls.js +61 -36
  11. package/lib/module/constants/modelUrls.js.map +1 -1
  12. package/lib/module/constants/ocr/models.js +1 -1
  13. package/lib/module/hooks/natural_language_processing/useSpeechToText.js +71 -34
  14. package/lib/module/hooks/natural_language_processing/useSpeechToText.js.map +1 -1
  15. package/lib/module/index.js +2 -3
  16. package/lib/module/index.js.map +1 -1
  17. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +72 -31
  18. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
  19. package/lib/module/types/stt.js +1 -85
  20. package/lib/module/types/stt.js.map +1 -1
  21. package/lib/module/utils/SpeechToTextModule/ASR.js +191 -0
  22. package/lib/module/utils/SpeechToTextModule/ASR.js.map +1 -0
  23. package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js +73 -0
  24. package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js.map +1 -0
  25. package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js +56 -0
  26. package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js.map +1 -0
  27. package/lib/tsconfig.tsbuildinfo +1 -0
  28. package/lib/typescript/constants/modelUrls.d.ts +24 -7
  29. package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
  30. package/lib/typescript/constants/ocr/models.d.ts +126 -126
  31. package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +15 -24
  32. package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts.map +1 -1
  33. package/lib/typescript/index.d.ts +2 -3
  34. package/lib/typescript/index.d.ts.map +1 -1
  35. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +19 -22
  36. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
  37. package/lib/typescript/types/stt.d.ts +17 -91
  38. package/lib/typescript/types/stt.d.ts.map +1 -1
  39. package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts +27 -0
  40. package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts.map +1 -0
  41. package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts +23 -0
  42. package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts.map +1 -0
  43. package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts +13 -0
  44. package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts.map +1 -0
  45. package/package.json +5 -3
  46. package/src/constants/modelUrls.ts +70 -37
  47. package/src/constants/ocr/models.ts +1 -1
  48. package/src/hooks/natural_language_processing/useSpeechToText.ts +87 -92
  49. package/src/index.ts +6 -8
  50. package/src/modules/natural_language_processing/SpeechToTextModule.ts +81 -69
  51. package/src/types/stt.ts +97 -92
  52. package/src/utils/SpeechToTextModule/ASR.ts +303 -0
  53. package/src/utils/SpeechToTextModule/OnlineProcessor.ts +87 -0
  54. package/src/utils/SpeechToTextModule/hypothesisBuffer.ts +79 -0
  55. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.xcworkspace/xcuserdata/jakubchmura.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  56. package/common/rnexecutorch/models/speech_to_text/MoonshineStrategy.cpp +0 -31
  57. package/common/rnexecutorch/models/speech_to_text/MoonshineStrategy.h +0 -21
  58. package/lib/common/Logger.d.ts +0 -8
  59. package/lib/common/Logger.js +0 -19
  60. package/lib/constants/modelUrls.d.ts +0 -89
  61. package/lib/constants/modelUrls.js +0 -116
  62. package/lib/constants/sttDefaults.js +0 -66
  63. package/lib/controllers/LLMController.js +0 -210
  64. package/lib/controllers/OCRController.js +0 -65
  65. package/lib/controllers/SpeechToTextController.d.ts +0 -52
  66. package/lib/controllers/SpeechToTextController.js +0 -343
  67. package/lib/hooks/natural_language_processing/useSpeechToText.js +0 -44
  68. package/lib/index.d.ts +0 -50
  69. package/lib/index.js +0 -59
  70. package/lib/module/constants/sttDefaults.js +0 -74
  71. package/lib/module/constants/sttDefaults.js.map +0 -1
  72. package/lib/module/controllers/SpeechToTextController.js +0 -320
  73. package/lib/module/controllers/SpeechToTextController.js.map +0 -1
  74. package/lib/modules/natural_language_processing/SpeechToTextModule.d.ts +0 -14
  75. package/lib/modules/natural_language_processing/SpeechToTextModule.js +0 -30
  76. package/lib/modules/natural_language_processing/TokenizerModule.js +0 -29
  77. package/lib/native/RnExecutorchModules.d.ts +0 -3
  78. package/lib/native/RnExecutorchModules.js +0 -16
  79. package/lib/typescript/constants/sttDefaults.d.ts +0 -29
  80. package/lib/typescript/constants/sttDefaults.d.ts.map +0 -1
  81. package/lib/typescript/controllers/SpeechToTextController.d.ts +0 -57
  82. package/lib/typescript/controllers/SpeechToTextController.d.ts.map +0 -1
  83. package/lib/utils/ResourceFetcherUtils.js +0 -119
  84. package/lib/utils/llm.js +0 -72
  85. package/src/constants/sttDefaults.ts +0 -82
  86. package/src/controllers/SpeechToTextController.ts +0 -471
  87. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.xcworkspace/xcuserdata/norbertklockiewicz.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  88. /package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/xcuserdata/{norbertklockiewicz.xcuserdatad → jakubchmura.xcuserdatad}/xcschemes/xcschememanagement.plist +0 -0
@@ -1,343 +0,0 @@
1
- import { HAMMING_DIST_THRESHOLD, MODEL_CONFIGS, SECOND, MODES, NUM_TOKENS_TO_TRIM, STREAMING_ACTION, } from '../constants/sttDefaults';
2
- import { TokenizerModule } from '../modules/natural_language_processing/TokenizerModule';
3
- import { ResourceFetcher } from '../utils/ResourceFetcher';
4
- import { longCommonInfPref } from '../utils/stt';
5
- import { ETError, getError } from '../Error';
6
- import { Logger } from '../common/Logger';
7
- export class SpeechToTextController {
8
- speechToTextNativeModule;
9
- sequence = [];
10
- isReady = false;
11
- isGenerating = false;
12
- tokenizerModule;
13
- overlapSeconds;
14
- windowSize;
15
- chunks = [];
16
- seqs = [];
17
- prevSeq = [];
18
- waveform = [];
19
- numOfChunks = 0;
20
- streaming = false;
21
- // User callbacks
22
- decodedTranscribeCallback;
23
- modelDownloadProgressCallback;
24
- isReadyCallback;
25
- isGeneratingCallback;
26
- onErrorCallback;
27
- config;
28
- constructor({ transcribeCallback, modelDownloadProgressCallback, isReadyCallback, isGeneratingCallback, onErrorCallback, overlapSeconds, windowSize, streamingConfig, }) {
29
- this.tokenizerModule = new TokenizerModule();
30
- this.decodedTranscribeCallback = async (seq) => transcribeCallback(await this.tokenIdsToText(seq));
31
- this.modelDownloadProgressCallback = modelDownloadProgressCallback;
32
- this.isReadyCallback = (isReady) => {
33
- this.isReady = isReady;
34
- isReadyCallback?.(isReady);
35
- };
36
- this.isGeneratingCallback = (isGenerating) => {
37
- this.isGenerating = isGenerating;
38
- isGeneratingCallback?.(isGenerating);
39
- };
40
- this.onErrorCallback = (error) => {
41
- if (onErrorCallback) {
42
- onErrorCallback(error ? new Error(getError(error)) : undefined);
43
- return;
44
- }
45
- else {
46
- throw new Error(getError(error));
47
- }
48
- };
49
- this.configureStreaming(overlapSeconds, windowSize, streamingConfig || 'balanced');
50
- }
51
- async loadModel(modelName, encoderSource, decoderSource, tokenizerSource) {
52
- this.onErrorCallback(undefined);
53
- this.isReadyCallback(false);
54
- this.config = MODEL_CONFIGS[modelName];
55
- try {
56
- await this.tokenizerModule.load(tokenizerSource || this.config.tokenizer.source);
57
- const paths = await ResourceFetcher.fetch(this.modelDownloadProgressCallback, encoderSource || this.config.sources.encoder, decoderSource || this.config.sources.decoder);
58
- if (paths === null || paths.length < 2) {
59
- throw new Error('Download interrupted.');
60
- }
61
- [encoderSource, decoderSource] = paths;
62
- }
63
- catch (e) {
64
- this.onErrorCallback(e);
65
- return;
66
- }
67
- if (modelName === 'whisperMultilingual') {
68
- // The underlying native class is instantiated based on the name of the model. There is no need to
69
- // create a separate class for multilingual version of Whisper, since it is the same. We just need
70
- // the distinction here, in TS, for start tokens and such. If we introduce
71
- // more versions of Whisper, such as the small one, this should be refactored.
72
- modelName = 'whisper';
73
- }
74
- try {
75
- const nativeSpeechToText = await global.loadSpeechToText(encoderSource, decoderSource, modelName);
76
- this.speechToTextNativeModule = nativeSpeechToText;
77
- this.isReadyCallback(true);
78
- }
79
- catch (e) {
80
- this.onErrorCallback(e);
81
- }
82
- }
83
- configureStreaming(overlapSeconds, windowSize, streamingConfig) {
84
- if (streamingConfig) {
85
- this.windowSize = MODES[streamingConfig].windowSize * SECOND;
86
- this.overlapSeconds = MODES[streamingConfig].overlapSeconds * SECOND;
87
- }
88
- if (streamingConfig && (windowSize || overlapSeconds)) {
89
- Logger.warn(`windowSize and overlapSeconds overrides values from streamingConfig ${streamingConfig}.`);
90
- }
91
- this.windowSize = (windowSize || 0) * SECOND || this.windowSize;
92
- this.overlapSeconds = (overlapSeconds || 0) * SECOND || this.overlapSeconds;
93
- if (2 * this.overlapSeconds + this.windowSize >= 30 * SECOND) {
94
- Logger.warn(`Invalid values for overlapSeconds and/or windowSize provided. Expected windowSize + 2 * overlapSeconds (== ${this.windowSize + 2 * this.overlapSeconds}) <= 30. Setting windowSize to ${30 * SECOND - 2 * this.overlapSeconds}.`);
95
- this.windowSize = 30 * SECOND - 2 * this.overlapSeconds;
96
- }
97
- }
98
- chunkWaveform() {
99
- this.numOfChunks = Math.ceil(this.waveform.length / this.windowSize);
100
- for (let i = 0; i < this.numOfChunks; i++) {
101
- let chunk = [];
102
- const left = Math.max(this.windowSize * i - this.overlapSeconds, 0);
103
- const right = Math.min(this.windowSize * (i + 1) + this.overlapSeconds, this.waveform.length);
104
- chunk = this.waveform.slice(left, right);
105
- this.chunks.push(chunk);
106
- }
107
- }
108
- resetState() {
109
- this.sequence = [];
110
- this.seqs = [];
111
- this.waveform = [];
112
- this.prevSeq = [];
113
- this.chunks = [];
114
- this.decodedTranscribeCallback([]);
115
- this.onErrorCallback(undefined);
116
- }
117
- expectedChunkLength() {
118
- //only first chunk can be of shorter length, for first chunk there are no seqs decoded
119
- return this.seqs.length
120
- ? this.windowSize + 2 * this.overlapSeconds
121
- : this.windowSize + this.overlapSeconds;
122
- }
123
- async getStartingTokenIds(audioLanguage) {
124
- // We need different starting token ids based on the multilingualism of the model.
125
- // The eng version only needs BOS token, while the multilingual one needs:
126
- // [BOS, LANG, TRANSCRIBE]. Optionally we should also set notimestamps token, as timestamps
127
- // is not yet supported.
128
- if (!audioLanguage) {
129
- return [this.config.tokenizer.bos];
130
- }
131
- // FIXME: I should use .getTokenId for the BOS as well, should remove it from config
132
- const langTokenId = await this.tokenizerModule.tokenToId(`<|${audioLanguage}|>`);
133
- const transcribeTokenId = await this.tokenizerModule.tokenToId('<|transcribe|>');
134
- const noTimestampsTokenId = await this.tokenizerModule.tokenToId('<|notimestamps|>');
135
- const startingTokenIds = [
136
- this.config.tokenizer.bos,
137
- langTokenId,
138
- transcribeTokenId,
139
- noTimestampsTokenId,
140
- ];
141
- return startingTokenIds;
142
- }
143
- async decodeChunk(chunk, audioLanguage) {
144
- const seq = await this.getStartingTokenIds(audioLanguage);
145
- let prevSeqTokenIdx = 0;
146
- this.prevSeq = this.sequence.slice();
147
- try {
148
- await this.encode(new Float32Array(chunk));
149
- }
150
- catch (error) {
151
- this.onErrorCallback(new Error(getError(error) + ' encoding error'));
152
- return [];
153
- }
154
- let lastToken = seq.at(-1);
155
- while (lastToken !== this.config.tokenizer.eos) {
156
- try {
157
- lastToken = await this.decode(seq);
158
- }
159
- catch (error) {
160
- this.onErrorCallback(new Error(getError(error) + ' decoding error'));
161
- return [...seq, this.config.tokenizer.eos];
162
- }
163
- seq.push(lastToken);
164
- if (this.seqs.length > 0 &&
165
- seq.length < this.seqs.at(-1).length &&
166
- seq.length % 3 !== 0) {
167
- this.prevSeq.push(this.seqs.at(-1)[prevSeqTokenIdx++]);
168
- this.decodedTranscribeCallback(this.prevSeq);
169
- }
170
- }
171
- return seq;
172
- }
173
- async handleOverlaps(seqs) {
174
- const maxInd = longCommonInfPref(seqs.at(-2), seqs.at(-1), HAMMING_DIST_THRESHOLD);
175
- this.sequence = [...this.sequence, ...seqs.at(-2).slice(0, maxInd)];
176
- this.decodedTranscribeCallback(this.sequence);
177
- return this.sequence.slice();
178
- }
179
- trimLeft(numOfTokensToTrim) {
180
- const idx = this.seqs.length - 1;
181
- if (this.seqs[idx][0] === this.config.tokenizer.bos) {
182
- this.seqs[idx] = this.seqs[idx].slice(numOfTokensToTrim);
183
- }
184
- }
185
- trimRight(numOfTokensToTrim) {
186
- const idx = this.seqs.length - 2;
187
- if (this.seqs[idx].at(-1) === this.config.tokenizer.eos) {
188
- this.seqs[idx] = this.seqs[idx].slice(0, -numOfTokensToTrim);
189
- }
190
- }
191
- // since we are calling this every time (except first) after a new seq is pushed to this.seqs
192
- // we can only trim left the last seq and trim right the second to last seq
193
- async trimSequences(audioLanguage) {
194
- const numSpecialTokens = (await this.getStartingTokenIds(audioLanguage))
195
- .length;
196
- this.trimLeft(numSpecialTokens + NUM_TOKENS_TO_TRIM);
197
- this.trimRight(numSpecialTokens + NUM_TOKENS_TO_TRIM);
198
- }
199
- // if last chunk is too short combine it with second to last to improve quality
200
- validateAndFixLastChunk() {
201
- if (this.chunks.length < 2)
202
- return;
203
- const lastChunkLength = this.chunks.at(-1).length / SECOND;
204
- const secondToLastChunkLength = this.chunks.at(-2).length / SECOND;
205
- if (lastChunkLength < 5 && secondToLastChunkLength + lastChunkLength < 30) {
206
- this.chunks[this.chunks.length - 2] = [
207
- ...this.chunks.at(-2).slice(0, -this.overlapSeconds * 2),
208
- ...this.chunks.at(-1),
209
- ];
210
- this.chunks = this.chunks.slice(0, -1);
211
- }
212
- }
213
- async tokenIdsToText(tokenIds) {
214
- try {
215
- return await this.tokenizerModule.decode(tokenIds, true);
216
- }
217
- catch (e) {
218
- this.onErrorCallback(new Error(`An error has occurred when decoding the token ids: ${e}`));
219
- return '';
220
- }
221
- }
222
- async transcribe(waveform, audioLanguage) {
223
- try {
224
- if (!this.isReady)
225
- throw Error(getError(ETError.ModuleNotLoaded));
226
- if (this.isGenerating || this.streaming)
227
- throw Error(getError(ETError.ModelGenerating));
228
- if (!!audioLanguage !== this.config.isMultilingual)
229
- throw new Error(getError(ETError.MultilingualConfiguration));
230
- }
231
- catch (e) {
232
- this.onErrorCallback(e);
233
- return '';
234
- }
235
- // Making sure that the error is not set when we get there
236
- this.isGeneratingCallback(true);
237
- this.resetState();
238
- this.waveform = waveform;
239
- this.chunkWaveform();
240
- this.validateAndFixLastChunk();
241
- for (let chunkId = 0; chunkId < this.chunks.length; chunkId++) {
242
- const seq = await this.decodeChunk(this.chunks.at(chunkId), audioLanguage);
243
- // whole audio is inside one chunk, no processing required
244
- if (this.chunks.length === 1) {
245
- this.sequence = seq;
246
- this.decodedTranscribeCallback(seq);
247
- break;
248
- }
249
- this.seqs.push(seq);
250
- if (this.seqs.length < 2)
251
- continue;
252
- // Remove starting tokenIds and some additional ones
253
- await this.trimSequences(audioLanguage);
254
- this.prevSeq = await this.handleOverlaps(this.seqs);
255
- // last sequence processed
256
- // overlaps are already handled, so just append the last seq
257
- if (this.seqs.length === this.chunks.length) {
258
- this.sequence = [...this.sequence, ...this.seqs.at(-1)];
259
- this.decodedTranscribeCallback(this.sequence);
260
- this.prevSeq = this.sequence;
261
- }
262
- }
263
- const decodedText = await this.tokenIdsToText(this.sequence);
264
- this.isGeneratingCallback(false);
265
- return decodedText;
266
- }
267
- async streamingTranscribe(streamAction, waveform, audioLanguage) {
268
- try {
269
- if (!this.isReady)
270
- throw Error(getError(ETError.ModuleNotLoaded));
271
- if (!!audioLanguage !== this.config.isMultilingual)
272
- throw new Error(getError(ETError.MultilingualConfiguration));
273
- if (streamAction === STREAMING_ACTION.START &&
274
- !this.streaming &&
275
- this.isGenerating)
276
- throw Error(getError(ETError.ModelGenerating));
277
- if (streamAction === STREAMING_ACTION.START && this.streaming)
278
- throw Error(getError(ETError.ModelGenerating));
279
- if (streamAction === STREAMING_ACTION.DATA && !this.streaming)
280
- throw Error(getError(ETError.StreamingNotStarted));
281
- if (streamAction === STREAMING_ACTION.STOP && !this.streaming)
282
- throw Error(getError(ETError.StreamingNotStarted));
283
- if (streamAction === STREAMING_ACTION.DATA && !waveform)
284
- throw new Error(getError(ETError.MissingDataChunk));
285
- }
286
- catch (e) {
287
- this.onErrorCallback(e);
288
- return '';
289
- }
290
- if (streamAction === STREAMING_ACTION.START) {
291
- this.resetState();
292
- this.streaming = true;
293
- this.isGeneratingCallback(true);
294
- }
295
- this.waveform = [...this.waveform, ...(waveform || [])];
296
- // while buffer has at least required size get chunk and decode
297
- while (this.waveform.length >= this.expectedChunkLength()) {
298
- const chunk = this.waveform.slice(0, this.windowSize +
299
- this.overlapSeconds * (1 + Number(this.seqs.length > 0)));
300
- this.chunks = [chunk]; //save last chunk for STREAMING_ACTION.STOP
301
- this.waveform = this.waveform.slice(this.windowSize - this.overlapSeconds * Number(this.seqs.length === 0));
302
- const seq = await this.decodeChunk(chunk, audioLanguage);
303
- this.seqs.push(seq);
304
- if (this.seqs.length < 2)
305
- continue;
306
- await this.trimSequences(audioLanguage);
307
- await this.handleOverlaps(this.seqs);
308
- }
309
- // got final package, process all remaining waveform data
310
- // since we run the loop above the waveform has at most one chunk in it
311
- if (streamAction === STREAMING_ACTION.STOP) {
312
- // pad remaining waveform data with previous chunk to this.windowSize + 2 * this.overlapSeconds
313
- const chunk = this.chunks.length
314
- ? [
315
- ...this.chunks[0].slice(0, this.windowSize),
316
- ...this.waveform,
317
- ].slice(-this.windowSize - 2 * this.overlapSeconds)
318
- : this.waveform;
319
- this.waveform = [];
320
- const seq = await this.decodeChunk(chunk, audioLanguage);
321
- this.seqs.push(seq);
322
- if (this.seqs.length === 1) {
323
- this.sequence = this.seqs[0];
324
- }
325
- else {
326
- await this.trimSequences(audioLanguage);
327
- await this.handleOverlaps(this.seqs);
328
- this.sequence = [...this.sequence, ...this.seqs.at(-1)];
329
- }
330
- this.decodedTranscribeCallback(this.sequence);
331
- this.isGeneratingCallback(false);
332
- this.streaming = false;
333
- }
334
- const decodedText = await this.tokenIdsToText(this.sequence);
335
- return decodedText;
336
- }
337
- async encode(waveform) {
338
- return await this.speechToTextNativeModule.encode(waveform);
339
- }
340
- async decode(seq) {
341
- return await this.speechToTextNativeModule.decode(seq);
342
- }
343
- }
@@ -1,44 +0,0 @@
1
- import { useEffect, useMemo, useState } from 'react';
2
- import { SpeechToTextController } from '../../controllers/SpeechToTextController';
3
- export const useSpeechToText = ({ modelName, encoderSource, decoderSource, tokenizerSource, overlapSeconds, windowSize, streamingConfig, preventLoad = false, }) => {
4
- const [sequence, setSequence] = useState('');
5
- const [isReady, setIsReady] = useState(false);
6
- const [downloadProgress, setDownloadProgress] = useState(0);
7
- const [isGenerating, setIsGenerating] = useState(false);
8
- const [error, setError] = useState();
9
- const model = useMemo(() => new SpeechToTextController({
10
- transcribeCallback: setSequence,
11
- isReadyCallback: setIsReady,
12
- isGeneratingCallback: setIsGenerating,
13
- onErrorCallback: setError,
14
- modelDownloadProgressCallback: setDownloadProgress,
15
- }), []);
16
- useEffect(() => {
17
- model.configureStreaming(overlapSeconds, windowSize, streamingConfig);
18
- }, [model, overlapSeconds, windowSize, streamingConfig]);
19
- useEffect(() => {
20
- const loadModel = async () => {
21
- await model.loadModel(modelName, encoderSource, decoderSource, tokenizerSource);
22
- };
23
- if (!preventLoad) {
24
- loadModel();
25
- }
26
- }, [
27
- model,
28
- modelName,
29
- encoderSource,
30
- decoderSource,
31
- tokenizerSource,
32
- preventLoad,
33
- ]);
34
- return {
35
- isReady,
36
- isGenerating,
37
- downloadProgress,
38
- configureStreaming: model.configureStreaming,
39
- sequence,
40
- error,
41
- transcribe: (waveform, audioLanguage) => model.transcribe(waveform, audioLanguage),
42
- streamingTranscribe: (streamAction, waveform, audioLanguage) => model.streamingTranscribe(streamAction, waveform, audioLanguage),
43
- };
44
- };
package/lib/index.d.ts DELETED
@@ -1,50 +0,0 @@
1
- import { SpeechToTextLanguage } from './types/stt';
2
- declare global {
3
- var loadStyleTransfer: (source: string) => any;
4
- var loadImageSegmentation: (source: string) => any;
5
- var loadClassification: (source: string) => any;
6
- var loadObjectDetection: (source: string) => any;
7
- var loadExecutorchModule: (source: string) => any;
8
- var loadTokenizerModule: (source: string) => any;
9
- var loadImageEmbeddings: (source: string) => any;
10
- var loadTextEmbeddings: (modelSource: string, tokenizerSource: string) => any;
11
- var loadLLM: (modelSource: string, tokenizerSource: string) => any;
12
- var loadSpeechToText: (encoderSource: string, decoderSource: string, modelName: string) => any;
13
- var loadOCR: (detectorSource: string, recognizerLarge: string, recognizerMedium: string, recognizerSmall: string, symbols: string) => any;
14
- var loadVerticalOCR: (detectorLarge: string, detectorNarrow: string, recognizer: string, symbols: string, independentCharacters?: boolean) => any;
15
- }
16
- export * from './hooks/computer_vision/useClassification';
17
- export * from './hooks/computer_vision/useObjectDetection';
18
- export * from './hooks/computer_vision/useStyleTransfer';
19
- export * from './hooks/computer_vision/useImageSegmentation';
20
- export * from './hooks/computer_vision/useOCR';
21
- export * from './hooks/computer_vision/useVerticalOCR';
22
- export * from './hooks/computer_vision/useImageEmbeddings';
23
- export * from './hooks/natural_language_processing/useLLM';
24
- export * from './hooks/natural_language_processing/useSpeechToText';
25
- export * from './hooks/natural_language_processing/useTextEmbeddings';
26
- export * from './hooks/natural_language_processing/useTokenizer';
27
- export * from './hooks/general/useExecutorchModule';
28
- export * from './modules/computer_vision/ClassificationModule';
29
- export * from './modules/computer_vision/ObjectDetectionModule';
30
- export * from './modules/computer_vision/StyleTransferModule';
31
- export * from './modules/computer_vision/ImageSegmentationModule';
32
- export * from './modules/computer_vision/OCRModule';
33
- export * from './modules/computer_vision/VerticalOCRModule';
34
- export * from './modules/general/ExecutorchModule';
35
- export * from './modules/computer_vision/ImageEmbeddingsModule';
36
- export * from './modules/natural_language_processing/LLMModule';
37
- export * from './modules/natural_language_processing/SpeechToTextModule';
38
- export * from './modules/natural_language_processing/TextEmbeddingsModule';
39
- export * from './modules/natural_language_processing/TokenizerModule';
40
- export * from './utils/ResourceFetcher';
41
- export * from './utils/llm';
42
- export * from './types/objectDetection';
43
- export * from './types/ocr';
44
- export * from './types/imageSegmentation';
45
- export * from './types/llm';
46
- export { SpeechToTextLanguage };
47
- export * from './constants/modelUrls';
48
- export * from './constants/ocr/models';
49
- export * from './constants/llmDefaults';
50
- export { STREAMING_ACTION, MODES } from './constants/sttDefaults';
package/lib/index.js DELETED
@@ -1,59 +0,0 @@
1
- import { SpeechToTextLanguage } from './types/stt';
2
- import { ETInstallerNativeModule } from './native/RnExecutorchModules';
3
- // eslint-disable no-var
4
- if (global.loadStyleTransfer == null ||
5
- global.loadImageSegmentation == null ||
6
- global.loadExecutorchModule == null ||
7
- global.loadClassification == null ||
8
- global.loadObjectDetection == null ||
9
- global.loadTokenizerModule == null ||
10
- global.loadTextEmbeddings == null ||
11
- global.loadImageEmbeddings == null ||
12
- global.loadLLM == null ||
13
- global.loadSpeechToText == null ||
14
- global.loadOCR == null) {
15
- if (!ETInstallerNativeModule) {
16
- throw new Error(`Failed to install react-native-executorch: The native module could not be found.`);
17
- }
18
- ETInstallerNativeModule.install();
19
- }
20
- // hooks
21
- export * from './hooks/computer_vision/useClassification';
22
- export * from './hooks/computer_vision/useObjectDetection';
23
- export * from './hooks/computer_vision/useStyleTransfer';
24
- export * from './hooks/computer_vision/useImageSegmentation';
25
- export * from './hooks/computer_vision/useOCR';
26
- export * from './hooks/computer_vision/useVerticalOCR';
27
- export * from './hooks/computer_vision/useImageEmbeddings';
28
- export * from './hooks/natural_language_processing/useLLM';
29
- export * from './hooks/natural_language_processing/useSpeechToText';
30
- export * from './hooks/natural_language_processing/useTextEmbeddings';
31
- export * from './hooks/natural_language_processing/useTokenizer';
32
- export * from './hooks/general/useExecutorchModule';
33
- // modules
34
- export * from './modules/computer_vision/ClassificationModule';
35
- export * from './modules/computer_vision/ObjectDetectionModule';
36
- export * from './modules/computer_vision/StyleTransferModule';
37
- export * from './modules/computer_vision/ImageSegmentationModule';
38
- export * from './modules/computer_vision/OCRModule';
39
- export * from './modules/computer_vision/VerticalOCRModule';
40
- export * from './modules/general/ExecutorchModule';
41
- export * from './modules/computer_vision/ImageEmbeddingsModule';
42
- export * from './modules/natural_language_processing/LLMModule';
43
- export * from './modules/natural_language_processing/SpeechToTextModule';
44
- export * from './modules/natural_language_processing/TextEmbeddingsModule';
45
- export * from './modules/natural_language_processing/TokenizerModule';
46
- // utils
47
- export * from './utils/ResourceFetcher';
48
- export * from './utils/llm';
49
- // types
50
- export * from './types/objectDetection';
51
- export * from './types/ocr';
52
- export * from './types/imageSegmentation';
53
- export * from './types/llm';
54
- export { SpeechToTextLanguage };
55
- // constants
56
- export * from './constants/modelUrls';
57
- export * from './constants/ocr/models';
58
- export * from './constants/llmDefaults';
59
- export { STREAMING_ACTION, MODES } from './constants/sttDefaults';
@@ -1,74 +0,0 @@
1
- "use strict";
2
-
3
- import { MOONSHINE_TINY, WHISPER_TINY, WHISPER_TINY_MULTILINGUAL } from './modelUrls';
4
- import { AvailableModels } from '../types/stt';
5
- export const SAMPLE_RATE = 16_000;
6
- export const SECOND = SAMPLE_RATE;
7
- export const HAMMING_DIST_THRESHOLD = 1;
8
- const whisperTinyModelConfig = {
9
- sources: {
10
- encoder: WHISPER_TINY.encoderSource,
11
- decoder: WHISPER_TINY.decoderSource
12
- },
13
- tokenizer: {
14
- source: WHISPER_TINY.tokenizerSource,
15
- bos: 50257,
16
- // FIXME: this is a placeholder and needs to be changed
17
- eos: 50256 // FIXME: this is a placeholder and needs to be changed
18
- },
19
- isMultilingual: false
20
- };
21
- const moonshineTinyModelConfig = {
22
- sources: {
23
- encoder: MOONSHINE_TINY.encoderSource,
24
- decoder: MOONSHINE_TINY.decoderSource
25
- },
26
- tokenizer: {
27
- source: MOONSHINE_TINY.tokenizerSource,
28
- bos: 1,
29
- // FIXME: this is a placeholder and needs to be changed
30
- eos: 2 // FIXME: this is a placeholder and needs to be changed
31
- },
32
- isMultilingual: false
33
- };
34
- const whisperTinyMultilingualModelConfig = {
35
- sources: {
36
- encoder: WHISPER_TINY_MULTILINGUAL.encoderSource,
37
- decoder: WHISPER_TINY_MULTILINGUAL.decoderSource
38
- },
39
- tokenizer: {
40
- source: WHISPER_TINY_MULTILINGUAL.tokenizerSource,
41
- bos: 50258,
42
- // FIXME: this is a placeholder and needs to be changed
43
- eos: 50257 // FIXME: this is a placeholder and needs to be changed
44
- },
45
- isMultilingual: true
46
- };
47
- export const MODEL_CONFIGS = {
48
- moonshine: moonshineTinyModelConfig,
49
- whisper: whisperTinyModelConfig,
50
- whisperMultilingual: whisperTinyMultilingualModelConfig
51
- };
52
- export const MODES = {
53
- fast: {
54
- windowSize: 5,
55
- overlapSeconds: 1.2
56
- },
57
- balanced: {
58
- windowSize: 12,
59
- overlapSeconds: 2
60
- },
61
- quality: {
62
- windowSize: 24,
63
- overlapSeconds: 3
64
- }
65
- };
66
- export const NUM_TOKENS_TO_TRIM = 3;
67
- export let STREAMING_ACTION = /*#__PURE__*/function (STREAMING_ACTION) {
68
- STREAMING_ACTION[STREAMING_ACTION["START"] = 0] = "START";
69
- STREAMING_ACTION[STREAMING_ACTION["DATA"] = 1] = "DATA";
70
- STREAMING_ACTION[STREAMING_ACTION["STOP"] = 2] = "STOP";
71
- return STREAMING_ACTION;
72
- }({});
73
- export { AvailableModels };
74
- //# sourceMappingURL=sttDefaults.js.map
@@ -1 +0,0 @@
1
- {"version":3,"names":["MOONSHINE_TINY","WHISPER_TINY","WHISPER_TINY_MULTILINGUAL","AvailableModels","SAMPLE_RATE","SECOND","HAMMING_DIST_THRESHOLD","whisperTinyModelConfig","sources","encoder","encoderSource","decoder","decoderSource","tokenizer","source","tokenizerSource","bos","eos","isMultilingual","moonshineTinyModelConfig","whisperTinyMultilingualModelConfig","MODEL_CONFIGS","moonshine","whisper","whisperMultilingual","MODES","fast","windowSize","overlapSeconds","balanced","quality","NUM_TOKENS_TO_TRIM","STREAMING_ACTION"],"sourceRoot":"../../../src","sources":["constants/sttDefaults.ts"],"mappings":";;AAAA,SACEA,cAAc,EACdC,YAAY,EACZC,yBAAyB,QACpB,aAAa;AACpB,SAASC,eAAe,QAAqB,cAAc;AAE3D,OAAO,MAAMC,WAAW,GAAG,MAAM;AACjC,OAAO,MAAMC,MAAM,GAAGD,WAAW;AACjC,OAAO,MAAME,sBAAsB,GAAG,CAAC;AAEvC,MAAMC,sBAAsB,GAAG;EAC7BC,OAAO,EAAE;IACPC,OAAO,EAAER,YAAY,CAACS,aAAa;IACnCC,OAAO,EAAEV,YAAY,CAACW;EACxB,CAAC;EACDC,SAAS,EAAE;IACTC,MAAM,EAAEb,YAAY,CAACc,eAAe;IACpCC,GAAG,EAAE,KAAK;IAAE;IACZC,GAAG,EAAE,KAAK,CAAE;EACd,CAAC;EACDC,cAAc,EAAE;AAClB,CAAC;AAED,MAAMC,wBAAwB,GAAG;EAC/BX,OAAO,EAAE;IACPC,OAAO,EAAET,cAAc,CAACU,aAAa;IACrCC,OAAO,EAAEX,cAAc,CAACY;EAC1B,CAAC;EACDC,SAAS,EAAE;IACTC,MAAM,EAAEd,cAAc,CAACe,eAAe;IACtCC,GAAG,EAAE,CAAC;IAAE;IACRC,GAAG,EAAE,CAAC,CAAE;EACV,CAAC;EACDC,cAAc,EAAE;AAClB,CAAC;AAED,MAAME,kCAAkC,GAAG;EACzCZ,OAAO,EAAE;IACPC,OAAO,EAAEP,yBAAyB,CAACQ,aAAa;IAChDC,OAAO,EAAET,yBAAyB,CAACU;EACrC,CAAC;EACDC,SAAS,EAAE;IACTC,MAAM,EAAEZ,yBAAyB,CAACa,eAAe;IACjDC,GAAG,EAAE,KAAK;IAAE;IACZC,GAAG,EAAE,KAAK,CAAE;EACd,CAAC;EACDC,cAAc,EAAE;AAClB,CAAC;AAED,OAAO,MAAMG,aAEZ,GAAG;EACFC,SAAS,EAAEH,wBAAwB;EACnCI,OAAO,EAAEhB,sBAAsB;EAC/BiB,mBAAmB,EAAEJ;AACvB,CAAC;AAED,OAAO,MAAMK,KAAK,GAAG;EACnBC,IAAI,EAAE;IACJC,UAAU,EAAE,CAAC;IACbC,cAAc,EAAE;EAClB,CAAC;EACDC,QAAQ,EAAE;IACRF,UAAU,EAAE,EAAE;IACdC,cAAc,EAAE;EAClB,CAAC;EACDE,OAAO,EAAE;IACPH,UAAU,EAAE,EAAE;IACdC,cAAc,EAAE;EAClB;AACF,CAAC;AAED,OAAO,MAAMG,kBAAkB,GAAG,CAAC;AAEnC,WAAYC,gBAAgB,0BAAhBA,gBAAgB;EAAhBA,gBAAgB,CAAhBA,gBAAgB;EAAhBA,gBAAgB,CAAhBA,gBAAgB;EAAhBA,gBAAgB,CAAhBA,gBAAgB;EAAA,OAAhBA,gBAAgB;AAAA;AAM5B,SAAS7B,eAAe","ignoreList":[]}