react-native-executorch 0.5.1-rc.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +132 -0
  2. package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +4 -10
  3. package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +1 -1
  4. package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h +3 -2
  5. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp +16 -4
  6. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.h +2 -2
  7. package/ios/RnExecutorch.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  8. package/ios/RnExecutorch.xcodeproj/project.xcworkspace/xcuserdata/jakubchmura.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  9. package/ios/RnExecutorch.xcodeproj/xcuserdata/jakubchmura.xcuserdatad/xcschemes/xcschememanagement.plist +14 -0
  10. package/lib/module/constants/modelUrls.js +61 -36
  11. package/lib/module/constants/modelUrls.js.map +1 -1
  12. package/lib/module/constants/ocr/models.js +1 -1
  13. package/lib/module/hooks/natural_language_processing/useSpeechToText.js +71 -34
  14. package/lib/module/hooks/natural_language_processing/useSpeechToText.js.map +1 -1
  15. package/lib/module/index.js +2 -3
  16. package/lib/module/index.js.map +1 -1
  17. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +72 -31
  18. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
  19. package/lib/module/types/stt.js +1 -85
  20. package/lib/module/types/stt.js.map +1 -1
  21. package/lib/module/utils/SpeechToTextModule/ASR.js +191 -0
  22. package/lib/module/utils/SpeechToTextModule/ASR.js.map +1 -0
  23. package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js +73 -0
  24. package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js.map +1 -0
  25. package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js +56 -0
  26. package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js.map +1 -0
  27. package/lib/tsconfig.tsbuildinfo +1 -0
  28. package/lib/typescript/constants/modelUrls.d.ts +24 -7
  29. package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
  30. package/lib/typescript/constants/ocr/models.d.ts +126 -126
  31. package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +15 -24
  32. package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts.map +1 -1
  33. package/lib/typescript/index.d.ts +2 -3
  34. package/lib/typescript/index.d.ts.map +1 -1
  35. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +19 -22
  36. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
  37. package/lib/typescript/types/stt.d.ts +17 -91
  38. package/lib/typescript/types/stt.d.ts.map +1 -1
  39. package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts +27 -0
  40. package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts.map +1 -0
  41. package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts +23 -0
  42. package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts.map +1 -0
  43. package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts +13 -0
  44. package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts.map +1 -0
  45. package/package.json +5 -3
  46. package/src/constants/modelUrls.ts +70 -37
  47. package/src/constants/ocr/models.ts +1 -1
  48. package/src/hooks/natural_language_processing/useSpeechToText.ts +87 -92
  49. package/src/index.ts +6 -8
  50. package/src/modules/natural_language_processing/SpeechToTextModule.ts +81 -69
  51. package/src/types/stt.ts +97 -92
  52. package/src/utils/SpeechToTextModule/ASR.ts +303 -0
  53. package/src/utils/SpeechToTextModule/OnlineProcessor.ts +87 -0
  54. package/src/utils/SpeechToTextModule/hypothesisBuffer.ts +79 -0
  55. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.xcworkspace/xcuserdata/jakubchmura.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  56. package/common/rnexecutorch/models/speech_to_text/MoonshineStrategy.cpp +0 -31
  57. package/common/rnexecutorch/models/speech_to_text/MoonshineStrategy.h +0 -21
  58. package/lib/common/Logger.d.ts +0 -8
  59. package/lib/common/Logger.js +0 -19
  60. package/lib/constants/modelUrls.d.ts +0 -89
  61. package/lib/constants/modelUrls.js +0 -116
  62. package/lib/constants/sttDefaults.js +0 -66
  63. package/lib/controllers/LLMController.js +0 -210
  64. package/lib/controllers/OCRController.js +0 -65
  65. package/lib/controllers/SpeechToTextController.d.ts +0 -52
  66. package/lib/controllers/SpeechToTextController.js +0 -343
  67. package/lib/hooks/natural_language_processing/useSpeechToText.js +0 -44
  68. package/lib/index.d.ts +0 -50
  69. package/lib/index.js +0 -59
  70. package/lib/module/constants/sttDefaults.js +0 -74
  71. package/lib/module/constants/sttDefaults.js.map +0 -1
  72. package/lib/module/controllers/SpeechToTextController.js +0 -320
  73. package/lib/module/controllers/SpeechToTextController.js.map +0 -1
  74. package/lib/modules/natural_language_processing/SpeechToTextModule.d.ts +0 -14
  75. package/lib/modules/natural_language_processing/SpeechToTextModule.js +0 -30
  76. package/lib/modules/natural_language_processing/TokenizerModule.js +0 -29
  77. package/lib/native/RnExecutorchModules.d.ts +0 -3
  78. package/lib/native/RnExecutorchModules.js +0 -16
  79. package/lib/typescript/constants/sttDefaults.d.ts +0 -29
  80. package/lib/typescript/constants/sttDefaults.d.ts.map +0 -1
  81. package/lib/typescript/controllers/SpeechToTextController.d.ts +0 -57
  82. package/lib/typescript/controllers/SpeechToTextController.d.ts.map +0 -1
  83. package/lib/utils/ResourceFetcherUtils.js +0 -119
  84. package/lib/utils/llm.js +0 -72
  85. package/src/constants/sttDefaults.ts +0 -82
  86. package/src/controllers/SpeechToTextController.ts +0 -471
  87. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.xcworkspace/xcuserdata/norbertklockiewicz.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  88. /package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/xcuserdata/{norbertklockiewicz.xcuserdatad → jakubchmura.xcuserdatad}/xcschemes/xcschememanagement.plist +0 -0
@@ -1,28 +1,19 @@
1
- import { SpeechToTextController } from '../../controllers/SpeechToTextController';
2
- import { ResourceSource } from '../../types/common';
3
- import { STREAMING_ACTION } from '../../constants/sttDefaults';
4
- import { AvailableModels, SpeechToTextLanguage } from '../../types/stt';
5
- interface SpeechToTextModule {
1
+ import { SpeechToTextModelConfig } from '../../types/stt';
2
+ export declare const useSpeechToText: ({ model, preventLoad, }: {
3
+ model: SpeechToTextModelConfig;
4
+ preventLoad?: boolean;
5
+ }) => {
6
+ error: string | null;
6
7
  isReady: boolean;
7
8
  isGenerating: boolean;
8
- sequence: string;
9
9
  downloadProgress: number;
10
- configureStreaming: SpeechToTextController['configureStreaming'];
11
- error: Error | undefined;
12
- transcribe: (input: number[], audioLanguage?: SpeechToTextLanguage) => ReturnType<SpeechToTextController['transcribe']>;
13
- streamingTranscribe: (streamAction: STREAMING_ACTION, input?: number[], audioLanguage?: SpeechToTextLanguage) => ReturnType<SpeechToTextController['streamingTranscribe']>;
14
- }
15
- export declare const useSpeechToText: ({ model, overlapSeconds, windowSize, streamingConfig, preventLoad, }: {
16
- model: {
17
- modelName: AvailableModels;
18
- encoderSource: ResourceSource;
19
- decoderSource: ResourceSource;
20
- tokenizerSource: ResourceSource;
21
- };
22
- overlapSeconds?: ConstructorParameters<typeof SpeechToTextController>["0"]["overlapSeconds"];
23
- windowSize?: ConstructorParameters<typeof SpeechToTextController>["0"]["windowSize"];
24
- streamingConfig?: ConstructorParameters<typeof SpeechToTextController>["0"]["streamingConfig"];
25
- preventLoad?: boolean;
26
- }) => SpeechToTextModule;
27
- export {};
10
+ committedTranscription: string;
11
+ nonCommittedTranscription: string;
12
+ encode: (waveform: Float32Array<ArrayBufferLike>) => Promise<void>;
13
+ decode: (tokens: number[]) => Promise<Float32Array<ArrayBufferLike>>;
14
+ transcribe: (waveform: number[], options?: import("../../types/stt").DecodingOptions | undefined) => Promise<string>;
15
+ stream: () => Promise<string>;
16
+ streamStop: () => void;
17
+ streamInsert: (waveform: number[]) => void;
18
+ };
28
19
  //# sourceMappingURL=useSpeechToText.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"useSpeechToText.d.ts","sourceRoot":"","sources":["../../../../src/hooks/natural_language_processing/useSpeechToText.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,sBAAsB,EAAE,MAAM,0CAA0C,CAAC;AAClF,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAExE,UAAU,kBAAkB;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY,EAAE,OAAO,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,gBAAgB,EAAE,MAAM,CAAC;IACzB,kBAAkB,EAAE,sBAAsB,CAAC,oBAAoB,CAAC,CAAC;IACjE,KAAK,EAAE,KAAK,GAAG,SAAS,CAAC;IACzB,UAAU,EAAE,CACV,KAAK,EAAE,MAAM,EAAE,EACf,aAAa,CAAC,EAAE,oBAAoB,KACjC,UAAU,CAAC,sBAAsB,CAAC,YAAY,CAAC,CAAC,CAAC;IACtD,mBAAmB,EAAE,CACnB,YAAY,EAAE,gBAAgB,EAC9B,KAAK,CAAC,EAAE,MAAM,EAAE,EAChB,aAAa,CAAC,EAAE,oBAAoB,KACjC,UAAU,CAAC,sBAAsB,CAAC,qBAAqB,CAAC,CAAC,CAAC;CAChE;AAED,eAAO,MAAM,eAAe,GAAI,sEAM7B;IACD,KAAK,EAAE;QACL,SAAS,EAAE,eAAe,CAAC;QAC3B,aAAa,EAAE,cAAc,CAAC;QAC9B,aAAa,EAAE,cAAc,CAAC;QAC9B,eAAe,EAAE,cAAc,CAAC;KACjC,CAAC;IACF,cAAc,CAAC,EAAE,qBAAqB,CACpC,OAAO,sBAAsB,CAC9B,CAAC,GAAG,CAAC,CAAC,gBAAgB,CAAC,CAAC;IACzB,UAAU,CAAC,EAAE,qBAAqB,CAChC,OAAO,sBAAsB,CAC9B,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC;IACrB,eAAe,CAAC,EAAE,qBAAqB,CACrC,OAAO,sBAAsB,CAC9B,CAAC,GAAG,CAAC,CAAC,iBAAiB,CAAC,CAAC;IAC1B,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB,KAAG,kBAoEH,CAAC"}
1
+ {"version":3,"file":"useSpeechToText.d.ts","sourceRoot":"","sources":["../../../../src/hooks/natural_language_processing/useSpeechToText.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAE1D,eAAO,MAAM,eAAe,GAAI,yBAG7B;IACD,KAAK,EAAE,uBAAuB,CAAC;IAC/B,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;;;;;;;;;;;;;CAmGA,CAAC"}
@@ -1,4 +1,3 @@
1
- import { SpeechToTextLanguage } from './types/stt';
2
1
  declare global {
3
2
  var loadStyleTransfer: (source: string) => any;
4
3
  var loadImageSegmentation: (source: string) => any;
@@ -42,9 +41,9 @@ export * from './types/objectDetection';
42
41
  export * from './types/ocr';
43
42
  export * from './types/imageSegmentation';
44
43
  export * from './types/llm';
45
- export { SpeechToTextLanguage };
44
+ export * from './types/common';
45
+ export { SpeechToTextLanguage, SpeechToTextModelConfig, DecodingOptions, } from './types/stt';
46
46
  export * from './constants/modelUrls';
47
47
  export * from './constants/ocr/models';
48
48
  export * from './constants/llmDefaults';
49
- export { STREAMING_ACTION, MODES, AvailableModels, } from './constants/sttDefaults';
50
49
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAKnD,OAAO,CAAC,MAAM,CAAC;IACb,IAAI,iBAAiB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IAC/C,IAAI,qBAAqB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IACnD,IAAI,kBAAkB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IAChD,IAAI,mBAAmB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IACjD,IAAI,oBAAoB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IAClD,IAAI,mBAAmB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IACjD,IAAI,mBAAmB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IACjD,IAAI,kBAAkB,EAAE,CAAC,WAAW,EAAE,MAAM,EAAE,eAAe,EAAE,MAAM,KAAK,GAAG,CAAC;IAC9E,IAAI,gBAAgB,EAAE,CACpB,aAAa,EAAE,MAAM,EACrB,aAAa,EAAE,MAAM,EACrB,SAAS,EAAE,MAAM,KACd,GAAG,CAAC;IACT,IAAI,OAAO,EAAE,CACX,cAAc,EAAE,MAAM,EACtB,eAAe,EAAE,MAAM,EACvB,gBAAgB,EAAE,MAAM,EACxB,eAAe,EAAE,MAAM,EACvB,OAAO,EAAE,MAAM,KACZ,GAAG,CAAC;IACT,IAAI,eAAe,EAAE,CACnB,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,MAAM,EACtB,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM,EACf,qBAAqB,CAAC,EAAE,OAAO,KAC5B,GAAG,CAAC;CACV;AAyBD,cAAc,2CAA2C,CAAC;AAC1D,cAAc,4CAA4C,CAAC;AAC3D,cAAc,0CAA0C,CAAC;AACzD,cAAc,8CAA8C,CAAC;AAC7D,cAAc,gCAAgC,CAAC;AAC/C,cAAc,wCAAwC,CAAC;AACvD,cAAc,4CAA4C,CAAC;AAE3D,cAAc,4CAA4C,CAAC;AAC3D,cAAc,qDAAqD,CAAC;AACpE,cAAc,uDAAuD,CAAC;AACtE,cAAc,kDAAkD,CAAC;AAEjE,cAAc,qCAAqC,CAAC;AAGpD,cAAc,gDAAgD,CAAC;AAC/D,cAAc,iDAAiD,CAAC;AAChE,cAAc,+CAA+C,CAAC;AAC9D,cAAc,mDAAmD,CAAC;AAClE,cAAc,qCAAqC,CAAC;AACpD,cAAc,6CAA6C,CAAC;AAC5D,cAAc,oCAAoC,CAAC;AACnD,cAAc,iDAAiD,CAAC;AAEhE,cAAc,iDAAiD,CAAC;AAChE,cAAc,0DAA0D,CAAC;AACzE,cAAc,4DAA4D,CAAC;AAC3E,cAAc,uDAAuD,CAAC;AAGtE,cAAc,yBAAyB,CAAC;AACxC,cAAc,aAAa,CAAC;AAG5B,cAAc,yBAAyB,CAAC;AACxC,cAAc,aAAa,CAAC;AAC5B,cAAc,2BAA2B,CAAC;AAC1C,cAAc,aAAa,CAAC;AAC5B,OAAO,EAAE,oBAAoB,EAAE,CAAC;AAGhC,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,yBAAyB,CAAC;AACxC,OAAO,EACL,gBAAgB,EAChB,KAAK,EACL,eAAe,GAChB,MAAM,yBAAyB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,CAAC,MAAM,CAAC;IACb,IAAI,iBAAiB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IAC/C,IAAI,qBAAqB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IACnD,IAAI,kBAAkB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IAChD,IAAI,mBAAmB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IACjD,IAAI,oBAAoB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IAClD,IAAI,mBAAmB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IACjD,IAAI,mBAAmB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,GAAG,CAAC;IACjD,IAAI,kBAAkB,EAAE,CAAC,WAAW,EAAE,MAAM,EAAE,eAAe,EAAE,MAAM,KAAK,GAAG,CAAC;IAC9E,IAAI,gBAAgB,EAAE,CACpB,aAAa,EAAE,MAAM,EACrB,aAAa,EAAE,MAAM,EACrB,SAAS,EAAE,MAAM,KACd,GAAG,CAAC;IACT,IAAI,OAAO,EAAE,CACX,cAAc,EAAE,MAAM,EACtB,eAAe,EAAE,MAAM,EACvB,gBAAgB,EAAE,MAAM,EACxB,eAAe,EAAE,MAAM,EACvB,OAAO,EAAE,MAAM,KACZ,GAAG,CAAC;IACT,IAAI,eAAe,EAAE,CACnB,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,MAAM,EACtB,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM,EACf,qBAAqB,CAAC,EAAE,OAAO,KAC5B,GAAG,CAAC;CACV;AAyBD,cAAc,2CAA2C,CAAC;AAC1D,cAAc,4CAA4C,CAAC;AAC3D,cAAc,0CAA0C,CAAC;AACzD,cAAc,8CAA8C,CAAC;AAC7D,cAAc,gCAAgC,CAAC;AAC/C,cAAc,wCAAwC,CAAC;AACvD,cAAc,4CAA4C,CAAC;AAE3D,cAAc,4CAA4C,CAAC;AAC3D,cAAc,qDAAqD,CAAC;AACpE,cAAc,uDAAuD,CAAC;AACtE,cAAc,kDAAkD,CAAC;AAEjE,cAAc,qCAAqC,CAAC;AAGpD,cAAc,gDAAgD,CAAC;AAC/D,cAAc,iDAAiD,CAAC;AAChE,cAAc,+CAA+C,CAAC;AAC9D,cAAc,mDAAmD,CAAC;AAClE,cAAc,qCAAqC,CAAC;AACpD,cAAc,6CAA6C,CAAC;AAC5D,cAAc,oCAAoC,CAAC;AACnD,cAAc,iDAAiD,CAAC;AAEhE,cAAc,iDAAiD,CAAC;AAChE,cAAc,0DAA0D,CAAC;AACzE,cAAc,4DAA4D,CAAC;AAC3E,cAAc,uDAAuD,CAAC;AAGtE,cAAc,yBAAyB,CAAC;AACxC,cAAc,aAAa,CAAC;AAG5B,cAAc,yBAAyB,CAAC;AACxC,cAAc,aAAa,CAAC;AAC5B,cAAc,2BAA2B,CAAC;AAC1C,cAAc,aAAa,CAAC;AAC5B,cAAc,gBAAgB,CAAC;AAC/B,OAAO,EACL,oBAAoB,EACpB,uBAAuB,EACvB,eAAe,GAChB,MAAM,aAAa,CAAC;AAGrB,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,yBAAyB,CAAC"}
@@ -1,25 +1,22 @@
1
- import { ResourceSource } from '../../types/common';
2
- import { SpeechToTextController } from '../../controllers/SpeechToTextController';
3
- import { AvailableModels, SpeechToTextLanguage } from '../../types/stt';
4
- import { STREAMING_ACTION } from '../../constants/sttDefaults';
1
+ import { DecodingOptions, SpeechToTextModelConfig } from '../../types/stt';
5
2
  export declare class SpeechToTextModule {
6
- private module;
7
- constructor({ transcribeCallback, overlapSeconds, windowSize, streamingConfig, }?: {
8
- transcribeCallback?: (sequence: string) => void;
9
- overlapSeconds?: ConstructorParameters<typeof SpeechToTextController>['0']['overlapSeconds'];
10
- windowSize?: ConstructorParameters<typeof SpeechToTextController>['0']['windowSize'];
11
- streamingConfig?: ConstructorParameters<typeof SpeechToTextController>['0']['streamingConfig'];
12
- });
13
- load(model: {
14
- modelName: AvailableModels;
15
- encoderSource?: ResourceSource;
16
- decoderSource?: ResourceSource;
17
- tokenizerSource?: ResourceSource;
18
- }, onDownloadProgressCallback?: (progress: number) => void): Promise<void>;
19
- configureStreaming(overlapSeconds: Parameters<SpeechToTextController['configureStreaming']>[0], windowSize: Parameters<SpeechToTextController['configureStreaming']>[1], streamingConfig: Parameters<SpeechToTextController['configureStreaming']>[2]): void;
20
- encode(waveform: Float32Array): Promise<null>;
21
- decode(seq: number[]): Promise<number>;
22
- transcribe(waveform: number[], audioLanguage?: SpeechToTextLanguage): ReturnType<SpeechToTextController['transcribe']>;
23
- streamingTranscribe(streamAction: STREAMING_ACTION, waveform?: number[], audioLanguage?: SpeechToTextLanguage): ReturnType<SpeechToTextController['streamingTranscribe']>;
3
+ private modelConfig;
4
+ private asr;
5
+ private processor;
6
+ private isStreaming;
7
+ private readyToProcess;
8
+ private minAudioSamples;
9
+ load(model: SpeechToTextModelConfig, onDownloadProgressCallback?: (progress: number) => void): Promise<void>;
10
+ encode(waveform: Float32Array): Promise<void>;
11
+ decode(tokens: number[]): Promise<Float32Array>;
12
+ transcribe(waveform: number[], options?: DecodingOptions): Promise<string>;
13
+ stream(options?: DecodingOptions): AsyncGenerator<{
14
+ committed: string;
15
+ nonCommitted: string;
16
+ }, void, unknown>;
17
+ streamStop(): void;
18
+ streamInsert(waveform: number[]): void;
19
+ private validateOptions;
20
+ private resetStreamState;
24
21
  }
25
22
  //# sourceMappingURL=SpeechToTextModule.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"SpeechToTextModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/natural_language_processing/SpeechToTextModule.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,sBAAsB,EAAE,MAAM,0CAA0C,CAAC;AAClF,OAAO,EAAE,eAAe,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACxE,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAE/D,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,MAAM,CAAyB;gBAE3B,EACV,kBAAkB,EAClB,cAAc,EACd,UAAU,EACV,eAAe,GAChB,GAAE;QACD,kBAAkB,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;QAChD,cAAc,CAAC,EAAE,qBAAqB,CACpC,OAAO,sBAAsB,CAC9B,CAAC,GAAG,CAAC,CAAC,gBAAgB,CAAC,CAAC;QACzB,UAAU,CAAC,EAAE,qBAAqB,CAChC,OAAO,sBAAsB,CAC9B,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC;QACrB,eAAe,CAAC,EAAE,qBAAqB,CACrC,OAAO,sBAAsB,CAC9B,CAAC,GAAG,CAAC,CAAC,iBAAiB,CAAC,CAAC;KACtB;IASA,IAAI,CACR,KAAK,EAAE;QACL,SAAS,EAAE,eAAe,CAAC;QAC3B,aAAa,CAAC,EAAE,cAAc,CAAC;QAC/B,aAAa,CAAC,EAAE,cAAc,CAAC;QAC/B,eAAe,CAAC,EAAE,cAAc,CAAC;KAClC,EACD,0BAA0B,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe;IAWnE,kBAAkB,CAChB,cAAc,EAAE,UAAU,CAAC,sBAAsB,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC,EAC3E,UAAU,EAAE,UAAU,CAAC,sBAAsB,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC,EACvE,eAAe,EAAE,UAAU,CAAC,sBAAsB,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC;IAKxE,MAAM,CAAC,QAAQ,EAAE,YAAY;IAI7B,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE;IAIpB,UAAU,CACd,QAAQ,EAAE,MAAM,EAAE,EAClB,aAAa,CAAC,EAAE,oBAAoB,GACnC,UAAU,CAAC,sBAAsB,CAAC,YAAY,CAAC,CAAC;IAI7C,mBAAmB,CACvB,YAAY,EAAE,gBAAgB,EAC9B,QAAQ,CAAC,EAAE,MAAM,EAAE,EACnB,aAAa,CAAC,EAAE,oBAAoB,GACnC,UAAU,CAAC,sBAAsB,CAAC,qBAAqB,CAAC,CAAC;CAO7D"}
1
+ {"version":3,"file":"SpeechToTextModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/natural_language_processing/SpeechToTextModule.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAI3E,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,WAAW,CAA2B;IAC9C,OAAO,CAAC,GAAG,CAAkB;IAE7B,OAAO,CAAC,SAAS,CAAwD;IACzE,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,eAAe,CAAqB;IAE/B,IAAI,CACf,KAAK,EAAE,uBAAuB,EAC9B,0BAA0B,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe;IAMtD,MAAM,CAAC,QAAQ,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAI7C,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,CAAC;IAI/C,UAAU,CACrB,QAAQ,EAAE,MAAM,EAAE,EAClB,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC;IAeJ,MAAM,CAAC,OAAO,GAAE,eAAoB;;;;IA2B3C,UAAU;IAIV,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE;IAKtC,OAAO,CAAC,eAAe;IASvB,OAAO,CAAC,gBAAgB;CAKzB"}
@@ -1,95 +1,21 @@
1
- export interface ModelConfig {
2
- sources: {
3
- encoder: string;
4
- decoder: string;
5
- };
6
- tokenizer: {
7
- source: string;
8
- bos: number;
9
- eos: number;
10
- };
11
- isMultilingual: boolean;
1
+ import { ResourceSource } from './common';
2
+ export type WordTuple = [number, number, string];
3
+ export interface WordObject {
4
+ start: number;
5
+ end: number;
6
+ word: string;
7
+ }
8
+ export interface Segment {
9
+ words: WordObject[];
12
10
  }
13
- export declare enum SpeechToTextLanguage {
14
- Afrikaans = "af",
15
- Albanian = "sq",
16
- Arabic = "ar",
17
- Armenian = "hy",
18
- Azerbaijani = "az",
19
- Basque = "eu",
20
- Belarusian = "be",
21
- Bengali = "bn",
22
- Bosnian = "bs",
23
- Bulgarian = "bg",
24
- Burmese = "my",
25
- Catalan = "ca",
26
- Chinese = "zh",
27
- Croatian = "hr",
28
- Czech = "cs",
29
- Danish = "da",
30
- Dutch = "nl",
31
- Estonian = "et",
32
- English = "en",
33
- Finnish = "fi",
34
- French = "fr",
35
- Galician = "gl",
36
- Georgian = "ka",
37
- German = "de",
38
- Greek = "el",
39
- Gujarati = "gu",
40
- HaitianCreole = "ht",
41
- Hebrew = "he",
42
- Hindi = "hi",
43
- Hungarian = "hu",
44
- Icelandic = "is",
45
- Indonesian = "id",
46
- Italian = "it",
47
- Japanese = "ja",
48
- Kannada = "kn",
49
- Kazakh = "kk",
50
- Khmer = "km",
51
- Korean = "ko",
52
- Lao = "lo",
53
- Latvian = "lv",
54
- Lithuanian = "lt",
55
- Macedonian = "mk",
56
- Malagasy = "mg",
57
- Malay = "ms",
58
- Malayalam = "ml",
59
- Maltese = "mt",
60
- Marathi = "mr",
61
- Nepali = "ne",
62
- Norwegian = "no",
63
- Persian = "fa",
64
- Polish = "pl",
65
- Portuguese = "pt",
66
- Punjabi = "pa",
67
- Romanian = "ro",
68
- Russian = "ru",
69
- Serbian = "sr",
70
- Sinhala = "si",
71
- Slovak = "sk",
72
- Slovenian = "sl",
73
- Spanish = "es",
74
- Sundanese = "su",
75
- Swahili = "sw",
76
- Swedish = "sv",
77
- Tagalog = "tl",
78
- Tajik = "tg",
79
- Tamil = "ta",
80
- Telugu = "te",
81
- Thai = "th",
82
- Turkish = "tr",
83
- Ukrainian = "uk",
84
- Urdu = "ur",
85
- Uzbek = "uz",
86
- Vietnamese = "vi",
87
- Welsh = "cy",
88
- Yiddish = "yi"
11
+ export type SpeechToTextLanguage = 'af' | 'sq' | 'ar' | 'hy' | 'az' | 'eu' | 'be' | 'bn' | 'bs' | 'bg' | 'my' | 'ca' | 'zh' | 'hr' | 'cs' | 'da' | 'nl' | 'et' | 'en' | 'fi' | 'fr' | 'gl' | 'ka' | 'de' | 'el' | 'gu' | 'ht' | 'he' | 'hi' | 'hu' | 'is' | 'id' | 'it' | 'ja' | 'kn' | 'kk' | 'km' | 'ko' | 'lo' | 'lv' | 'lt' | 'mk' | 'mg' | 'ms' | 'ml' | 'mt' | 'mr' | 'ne' | 'no' | 'fa' | 'pl' | 'pt' | 'pa' | 'ro' | 'ru' | 'sr' | 'si' | 'sk' | 'sl' | 'es' | 'su' | 'sw' | 'sv' | 'tl' | 'tg' | 'ta' | 'te' | 'th' | 'tr' | 'uk' | 'ur' | 'uz' | 'vi' | 'cy' | 'yi';
12
+ export interface DecodingOptions {
13
+ language?: SpeechToTextLanguage;
89
14
  }
90
- export declare enum AvailableModels {
91
- WHISPER = "whisper",
92
- MOONSHINE = "moonshine",
93
- WHISPER_MULTILINGUAL = "whisperMultilingual"
15
+ export interface SpeechToTextModelConfig {
16
+ isMultilingual: boolean;
17
+ encoderSource: ResourceSource;
18
+ decoderSource: ResourceSource;
19
+ tokenizerSource: ResourceSource;
94
20
  }
95
21
  //# sourceMappingURL=stt.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../../src/types/stt.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE;QACP,OAAO,EAAE,MAAM,CAAC;QAChB,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;IACF,SAAS,EAAE;QACT,MAAM,EAAE,MAAM,CAAC;QACf,GAAG,EAAE,MAAM,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;KACb,CAAC;IACF,cAAc,EAAE,OAAO,CAAC;CACzB;AAGD,oBAAY,oBAAoB;IAC9B,SAAS,OAAO;IAChB,QAAQ,OAAO;IACf,MAAM,OAAO;IACb,QAAQ,OAAO;IACf,WAAW,OAAO;IAClB,MAAM,OAAO;IACb,UAAU,OAAO;IACjB,OAAO,OAAO;IACd,OAAO,OAAO;IACd,SAAS,OAAO;IAChB,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,QAAQ,OAAO;IACf,KAAK,OAAO;IACZ,MAAM,OAAO;IACb,KAAK,OAAO;IACZ,QAAQ,OAAO;IACf,OAAO,OAAO;IACd,OAAO,OAAO;IACd,MAAM,OAAO;IACb,QAAQ,OAAO;IACf,QAAQ,OAAO;IACf,MAAM,OAAO;IACb,KAAK,OAAO;IACZ,QAAQ,OAAO;IACf,aAAa,OAAO;IACpB,MAAM,OAAO;IACb,KAAK,OAAO;IACZ,SAAS,OAAO;IAChB,SAAS,OAAO;IAChB,UAAU,OAAO;IACjB,OAAO,OAAO;IACd,QAAQ,OAAO;IACf,OAAO,OAAO;IACd,MAAM,OAAO;IACb,KAAK,OAAO;IACZ,MAAM,OAAO;IACb,GAAG,OAAO;IACV,OAAO,OAAO;IACd,UAAU,OAAO;IACjB,UAAU,OAAO;IACjB,QAAQ,OAAO;IACf,KAAK,OAAO;IACZ,SAAS,OAAO;IAChB,OAAO,OAAO;IACd,OAAO,OAAO;IACd,MAAM,OAAO;IACb,SAAS,OAAO;IAChB,OAAO,OAAO;IACd,MAAM,OAAO;IACb,UAAU,OAAO;IACjB,OAAO,OAAO;IACd,QAAQ,OAAO;IACf,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,MAAM,OAAO;IACb,SAAS,OAAO;IAChB,OAAO,OAAO;IACd,SAAS,OAAO;IAChB,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,KAAK,OAAO;IACZ,KAAK,OAAO;IACZ,MAAM,OAAO;IACb,IAAI,OAAO;IACX,OAAO,OAAO;IACd,SAAS,OAAO;IAChB,IAAI,OAAO;IACX,KAAK,OAAO;IACZ,UAAU,OAAO;IACjB,KAAK,OAAO;IACZ,OAAO,OAAO;CACf;AAED,oBAAY,eAAe;IACzB,OAAO,YAAY;IACnB,SAAS,cAAc;IACvB,oBAAoB,wBAAwB;CAC7C"}
1
+ {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../../src/types/stt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAE1C,MAAM,MAAM,SAAS,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;AAEjD,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,OAAO;IACtB,KAAK,EAAE,UAAU,EAAE,CAAC;CACrB;AAGD,MAAM,MAAM,oBAAoB,GAC5B,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,CAAC;AAET,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,EAAE,oBAAoB,CAAC;CACjC;AAED,MAAM,WAAW,uBAAuB;IACtC,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,cAAc,CAAC;IAC9B,aAAa,EAAE,cAAc,CAAC;IAC9B,eAAe,EAAE,cAAc,CAAC;CACjC"}
@@ -0,0 +1,27 @@
1
+ import { DecodingOptions, Segment, SpeechToTextModelConfig, WordTuple } from '../../types/stt';
2
+ export declare class ASR {
3
+ private nativeModule;
4
+ private tokenizerModule;
5
+ private timePrecision;
6
+ private maxDecodeLength;
7
+ private chunkSize;
8
+ private minChunkSamples;
9
+ private samplingRate;
10
+ private startOfTranscriptToken;
11
+ private endOfTextToken;
12
+ private timestampBeginToken;
13
+ load(model: SpeechToTextModelConfig, onDownloadProgressCallback: (progress: number) => void): Promise<void>;
14
+ private getInitialSequence;
15
+ private generate;
16
+ private softmaxWithTemperature;
17
+ private sampleFromDistribution;
18
+ private generateWithFallback;
19
+ private calculateWordLevelTimestamps;
20
+ private estimateWordTimestampsLinear;
21
+ transcribe(audio: number[], options: DecodingOptions): Promise<Segment[]>;
22
+ tsWords(segments: Segment[]): WordTuple[];
23
+ segmentsEndTs(res: Segment[]): number[];
24
+ encode(waveform: Float32Array): Promise<void>;
25
+ decode(tokens: number[]): Promise<Float32Array>;
26
+ }
27
+ //# sourceMappingURL=ASR.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ASR.d.ts","sourceRoot":"","sources":["../../../../src/utils/SpeechToTextModule/ASR.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,eAAe,EACf,OAAO,EACP,uBAAuB,EAEvB,SAAS,EACV,MAAM,iBAAiB,CAAC;AAGzB,qBAAa,GAAG;IACd,OAAO,CAAC,YAAY,CAAM;IAC1B,OAAO,CAAC,eAAe,CAA0C;IAEjE,OAAO,CAAC,aAAa,CAAgB;IACrC,OAAO,CAAC,eAAe,CAAe;IACtC,OAAO,CAAC,SAAS,CAAc;IAC/B,OAAO,CAAC,eAAe,CAAqB;IAC5C,OAAO,CAAC,YAAY,CAAiB;IAErC,OAAO,CAAC,sBAAsB,CAAU;IACxC,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,mBAAmB,CAAU;IAExB,IAAI,CACf,KAAK,EAAE,uBAAuB,EAC9B,0BAA0B,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI;YA8B1C,kBAAkB;YAgBlB,QAAQ;IAoCtB,OAAO,CAAC,sBAAsB;IAO9B,OAAO,CAAC,sBAAsB;YAYhB,oBAAoB;YA4BpB,4BAA4B;YA4D5B,4BAA4B;IAkC7B,UAAU,CACrB,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,EAAE,eAAe,GACvB,OAAO,CAAC,OAAO,EAAE,CAAC;IA2Bd,OAAO,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG,SAAS,EAAE;IAUzC,aAAa,CAAC,GAAG,EAAE,OAAO,EAAE;IAItB,MAAM,CAAC,QAAQ,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAI7C,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,CAAC;CAG7D"}
@@ -0,0 +1,23 @@
1
+ import { DecodingOptions } from '../../types/stt';
2
+ import { ASR } from './ASR';
3
+ export declare class OnlineASRProcessor {
4
+ private asr;
5
+ private samplingRate;
6
+ audioBuffer: number[];
7
+ private transcriptBuffer;
8
+ private bufferTimeOffset;
9
+ private committed;
10
+ constructor(asr: ASR);
11
+ insertAudioChunk(audio: number[]): void;
12
+ processIter(options: DecodingOptions): Promise<{
13
+ committed: string;
14
+ nonCommitted: string;
15
+ }>;
16
+ private chunkCompletedSegment;
17
+ private chunkAt;
18
+ finish(): Promise<{
19
+ committed: string;
20
+ }>;
21
+ private toFlush;
22
+ }
23
+ //# sourceMappingURL=OnlineProcessor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"OnlineProcessor.d.ts","sourceRoot":"","sources":["../../../../src/utils/SpeechToTextModule/OnlineProcessor.ts"],"names":[],"mappings":"AAEA,OAAO,EAAa,eAAe,EAAW,MAAM,iBAAiB,CAAC;AACtE,OAAO,EAAE,GAAG,EAAE,MAAM,OAAO,CAAC;AAG5B,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,GAAG,CAAM;IAEjB,OAAO,CAAC,YAAY,CAAiB;IAC9B,WAAW,EAAE,MAAM,EAAE,CAAM;IAClC,OAAO,CAAC,gBAAgB,CAA4C;IACpE,OAAO,CAAC,gBAAgB,CAAa;IACrC,OAAO,CAAC,SAAS,CAAmB;gBAExB,GAAG,EAAE,GAAG;IAIb,gBAAgB,CAAC,KAAK,EAAE,MAAM,EAAE;IAI1B,WAAW,CAAC,OAAO,EAAE,eAAe;;;;IAoBjD,OAAO,CAAC,qBAAqB;IAqB7B,OAAO,CAAC,OAAO;IASF,MAAM;;;IAOnB,OAAO,CAAC,OAAO;CAMhB"}
@@ -0,0 +1,13 @@
1
+ import { WordTuple } from '../../types/stt';
2
+ export declare class HypothesisBuffer {
3
+ private committedInBuffer;
4
+ private buffer;
5
+ private new;
6
+ private lastCommittedTime;
7
+ lastCommittedWord: string | null;
8
+ insert(newWords: WordTuple[], offset: number): void;
9
+ flush(): WordTuple[];
10
+ popCommitted(time: number): void;
11
+ complete(): WordTuple[];
12
+ }
13
+ //# sourceMappingURL=hypothesisBuffer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hypothesisBuffer.d.ts","sourceRoot":"","sources":["../../../../src/utils/SpeechToTextModule/hypothesisBuffer.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAE5C,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,iBAAiB,CAAmB;IAC5C,OAAO,CAAC,MAAM,CAAmB;IACjC,OAAO,CAAC,GAAG,CAAmB;IAE9B,OAAO,CAAC,iBAAiB,CAAa;IAC/B,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAQ;IAExC,MAAM,CAAC,QAAQ,EAAE,SAAS,EAAE,EAAE,MAAM,EAAE,MAAM;IAuC5C,KAAK,IAAI,SAAS,EAAE;IAkBpB,YAAY,CAAC,IAAI,EAAE,MAAM;IAMzB,QAAQ,IAAI,SAAS,EAAE;CAG/B"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "react-native-executorch",
3
- "version": "0.5.1-rc.0",
3
+ "version": "0.5.1",
4
4
  "description": "An easy way to run AI models in React Native with ExecuTorch",
5
5
  "source": "./src/index.ts",
6
6
  "main": "./lib/module/index.js",
@@ -31,10 +31,12 @@
31
31
  ],
32
32
  "scripts": {
33
33
  "example": "yarn workspace react-native-executorch-example",
34
- "typecheck": "tsc",
34
+ "typecheck": "tsc --noEmit",
35
35
  "lint": "eslint \"**/*.{js,ts,tsx}\"",
36
36
  "clean": "del-cli android/build example/android/build example/android/app/build example/ios/build lib",
37
- "prepare": "bob build"
37
+ "prepare": "bob build",
38
+ "prepack": "cp ../../README.md ./README.md",
39
+ "postpack": "rm ./README.md"
38
40
  },
39
41
  "keywords": [
40
42
  "react-native",
@@ -1,10 +1,8 @@
1
1
  import { Platform } from 'react-native';
2
- import { AvailableModels } from '../types/stt';
3
2
 
4
3
  const URL_PREFIX =
5
4
  'https://huggingface.co/software-mansion/react-native-executorch';
6
- const VERSION_TAG = 'resolve/v0.4.0';
7
- const NEXT_VERSION_TAG = 'resolve/v0.5.0';
5
+ const VERSION_TAG = 'resolve/v0.5.0';
8
6
 
9
7
  // LLMs
10
8
 
@@ -308,35 +306,70 @@ export const STYLE_TRANSFER_UDNIE = {
308
306
  };
309
307
 
310
308
  // S2T
311
- const MOONSHINE_TINY_DECODER_MODEL = `${URL_PREFIX}-moonshine-tiny/${VERSION_TAG}/xnnpack/moonshine_tiny_xnnpack_decoder.pte`;
312
- const MOONSHINE_TINY_ENCODER_MODEL = `${URL_PREFIX}-moonshine-tiny/${VERSION_TAG}/xnnpack/moonshine_tiny_xnnpack_encoder.pte`;
313
- const MOONSHINE_TOKENIZER = `${URL_PREFIX}-moonshine-tiny/${VERSION_TAG}/moonshine_tiny_tokenizer.json`;
314
- const WHISPER_TOKENIZER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/whisper_tokenizer.json`;
315
- const WHISPER_TINY_DECODER_MODEL = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/xnnpack/whisper_tiny_en_xnnpack_decoder.pte`;
316
- const WHISPER_TINY_ENCODER_MODEL = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/xnnpack/whisper_tiny_en_xnnpack_encoder.pte`;
317
- const WHISPER_TINY_MULTILINGUAL_ENCODER_MODEL = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/xnnpack/xnnpack_whisper_encoder.pte`;
318
- const WHISPER_TINY_MULTILINGUAL_DECODER_MODEL = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/xnnpack/xnnpack_whisper_decoder.pte`;
319
- const WHISPER_TINY_MULTILINGUAL_TOKENIZER = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/tokenizer.json`;
320
-
321
- export const MOONSHINE_TINY = {
322
- modelName: AvailableModels.MOONSHINE,
323
- decoderSource: MOONSHINE_TINY_DECODER_MODEL,
324
- encoderSource: MOONSHINE_TINY_ENCODER_MODEL,
325
- tokenizerSource: MOONSHINE_TOKENIZER,
309
+ const WHISPER_TINY_EN_TOKENIZER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/tokenizer.json`;
310
+ const WHISPER_TINY_EN_ENCODER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/xnnpack/whisper_tiny_en_encoder_xnnpack.pte`;
311
+ const WHISPER_TINY_EN_DECODER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/xnnpack/whisper_tiny_en_decoder_xnnpack.pte`;
312
+
313
+ const WHISPER_BASE_EN_TOKENIZER = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/tokenizer.json`;
314
+ const WHISPER_BASE_EN_ENCODER = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/xnnpack/whisper_base_en_encoder_xnnpack.pte`;
315
+ const WHISPER_BASE_EN_DECODER = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/xnnpack/whisper_base_en_decoder_xnnpack.pte`;
316
+
317
+ const WHISPER_SMALL_EN_TOKENIZER = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/tokenizer.json`;
318
+ const WHISPER_SMALL_EN_ENCODER = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/xnnpack/whisper_small_en_encoder_xnnpack.pte`;
319
+ const WHISPER_SMALL_EN_DECODER = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/xnnpack/whisper_small_en_decoder_xnnpack.pte`;
320
+
321
+ const WHISPER_TINY_TOKENIZER = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/tokenizer.json`;
322
+ const WHISPER_TINY_ENCODER_MODEL = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/xnnpack/whisper_tiny_encoder_xnnpack.pte`;
323
+ const WHISPER_TINY_DECODER_MODEL = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/xnnpack/whisper_tiny_decoder_xnnpack.pte`;
324
+
325
+ const WHISPER_BASE_TOKENIZER = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/tokenizer.json`;
326
+ const WHISPER_BASE_ENCODER_MODEL = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/xnnpack/whisper_base_encoder_xnnpack.pte`;
327
+ const WHISPER_BASE_DECODER_MODEL = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/xnnpack/whisper_base_decoder_xnnpack.pte`;
328
+
329
+ const WHISPER_SMALL_TOKENIZER = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/tokenizer.json`;
330
+ const WHISPER_SMALL_ENCODER_MODEL = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/xnnpack/whisper_small_encoder_xnnpack.pte`;
331
+ const WHISPER_SMALL_DECODER_MODEL = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/xnnpack/whisper_small_decoder_xnnpack.pte`;
332
+
333
+ export const WHISPER_TINY_EN = {
334
+ isMultilingual: false,
335
+ encoderSource: WHISPER_TINY_EN_ENCODER,
336
+ decoderSource: WHISPER_TINY_EN_DECODER,
337
+ tokenizerSource: WHISPER_TINY_EN_TOKENIZER,
338
+ };
339
+
340
+ export const WHISPER_BASE_EN = {
341
+ isMultilingual: false,
342
+ encoderSource: WHISPER_BASE_EN_ENCODER,
343
+ decoderSource: WHISPER_BASE_EN_DECODER,
344
+ tokenizerSource: WHISPER_BASE_EN_TOKENIZER,
345
+ };
346
+
347
+ export const WHISPER_SMALL_EN = {
348
+ isMultilingual: false,
349
+ encoderSource: WHISPER_SMALL_EN_ENCODER,
350
+ decoderSource: WHISPER_SMALL_EN_DECODER,
351
+ tokenizerSource: WHISPER_SMALL_EN_TOKENIZER,
326
352
  };
327
353
 
328
354
  export const WHISPER_TINY = {
329
- modelName: AvailableModels.WHISPER,
330
- decoderSource: WHISPER_TINY_DECODER_MODEL,
355
+ isMultilingual: true,
331
356
  encoderSource: WHISPER_TINY_ENCODER_MODEL,
332
- tokenizerSource: WHISPER_TOKENIZER,
357
+ decoderSource: WHISPER_TINY_DECODER_MODEL,
358
+ tokenizerSource: WHISPER_TINY_TOKENIZER,
359
+ };
360
+
361
+ export const WHISPER_BASE = {
362
+ isMultilingual: true,
363
+ encoderSource: WHISPER_BASE_ENCODER_MODEL,
364
+ decoderSource: WHISPER_BASE_DECODER_MODEL,
365
+ tokenizerSource: WHISPER_BASE_TOKENIZER,
333
366
  };
334
367
 
335
- export const WHISPER_TINY_MULTILINGUAL = {
336
- modelName: AvailableModels.WHISPER_MULTILINGUAL,
337
- decoderSource: WHISPER_TINY_MULTILINGUAL_DECODER_MODEL,
338
- encoderSource: WHISPER_TINY_MULTILINGUAL_ENCODER_MODEL,
339
- tokenizerSource: WHISPER_TINY_MULTILINGUAL_TOKENIZER,
368
+ export const WHISPER_SMALL = {
369
+ isMultilingual: true,
370
+ encoderSource: WHISPER_SMALL_ENCODER_MODEL,
371
+ decoderSource: WHISPER_SMALL_DECODER_MODEL,
372
+ tokenizerSource: WHISPER_SMALL_TOKENIZER,
340
373
  };
341
374
 
342
375
  // Image segmentation
@@ -347,23 +380,23 @@ export const DEEPLAB_V3_RESNET50 = {
347
380
  };
348
381
 
349
382
  // Image Embeddings
350
- const CLIP_VIT_BASE_PATCH32_IMAGE_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${NEXT_VERSION_TAG}/clip-vit-base-patch32-vision_xnnpack.pte`;
383
+ const CLIP_VIT_BASE_PATCH32_IMAGE_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/clip-vit-base-patch32-vision_xnnpack.pte`;
351
384
 
352
385
  export const CLIP_VIT_BASE_PATCH32_IMAGE = {
353
386
  modelSource: CLIP_VIT_BASE_PATCH32_IMAGE_MODEL,
354
387
  };
355
388
 
356
389
  // Text Embeddings
357
- const ALL_MINILM_L6_V2_MODEL = `${URL_PREFIX}-all-MiniLM-L6-v2/${NEXT_VERSION_TAG}/all-MiniLM-L6-v2_xnnpack.pte`;
358
- const ALL_MINILM_L6_V2_TOKENIZER = `${URL_PREFIX}-all-MiniLM-L6-v2/${NEXT_VERSION_TAG}/tokenizer.json`;
359
- const ALL_MPNET_BASE_V2_MODEL = `${URL_PREFIX}-all-mpnet-base-v2/${NEXT_VERSION_TAG}/all-mpnet-base-v2_xnnpack.pte`;
360
- const ALL_MPNET_BASE_V2_TOKENIZER = `${URL_PREFIX}-all-mpnet-base-v2/${NEXT_VERSION_TAG}/tokenizer.json`;
361
- const MULTI_QA_MINILM_L6_COS_V1_MODEL = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1/${NEXT_VERSION_TAG}/multi-qa-MiniLM-L6-cos-v1_xnnpack.pte`;
362
- const MULTI_QA_MINILM_L6_COS_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1/${NEXT_VERSION_TAG}/tokenizer.json`;
363
- const MULTI_QA_MPNET_BASE_DOT_V1_MODEL = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${NEXT_VERSION_TAG}/multi-qa-mpnet-base-dot-v1_xnnpack.pte`;
364
- const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${NEXT_VERSION_TAG}/tokenizer.json`;
365
- const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${NEXT_VERSION_TAG}/clip-vit-base-patch32-text_xnnpack.pte`;
366
- const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${NEXT_VERSION_TAG}/tokenizer.json`;
390
+ const ALL_MINILM_L6_V2_MODEL = `${URL_PREFIX}-all-MiniLM-L6-v2/${VERSION_TAG}/all-MiniLM-L6-v2_xnnpack.pte`;
391
+ const ALL_MINILM_L6_V2_TOKENIZER = `${URL_PREFIX}-all-MiniLM-L6-v2/${VERSION_TAG}/tokenizer.json`;
392
+ const ALL_MPNET_BASE_V2_MODEL = `${URL_PREFIX}-all-mpnet-base-v2/${VERSION_TAG}/all-mpnet-base-v2_xnnpack.pte`;
393
+ const ALL_MPNET_BASE_V2_TOKENIZER = `${URL_PREFIX}-all-mpnet-base-v2/${VERSION_TAG}/tokenizer.json`;
394
+ const MULTI_QA_MINILM_L6_COS_V1_MODEL = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1/${VERSION_TAG}/multi-qa-MiniLM-L6-cos-v1_xnnpack.pte`;
395
+ const MULTI_QA_MINILM_L6_COS_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1/${VERSION_TAG}/tokenizer.json`;
396
+ const MULTI_QA_MPNET_BASE_DOT_V1_MODEL = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/multi-qa-mpnet-base-dot-v1_xnnpack.pte`;
397
+ const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/tokenizer.json`;
398
+ const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/clip-vit-base-patch32-text_xnnpack.pte`;
399
+ const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/tokenizer.json`;
367
400
 
368
401
  export const ALL_MINILM_L6_V2 = {
369
402
  modelSource: ALL_MINILM_L6_V2_MODEL,
@@ -2,7 +2,7 @@ import { alphabets, symbols } from './symbols';
2
2
 
3
3
  const URL_PREFIX =
4
4
  'https://huggingface.co/software-mansion/react-native-executorch';
5
- const VERSION_TAG = 'resolve/v0.4.0';
5
+ const VERSION_TAG = 'resolve/v0.5.0';
6
6
 
7
7
  const DETECTOR_CRAFT_1280_MODEL = `${URL_PREFIX}-detector-craft/${VERSION_TAG}/xnnpack/xnnpack_craft_1280.pte`;
8
8
  const DETECTOR_CRAFT_800_MODEL = `${URL_PREFIX}-detector-craft/${VERSION_TAG}/xnnpack/xnnpack_craft_800.pte`;