react-native-executorch 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/android/libs/classes.jar +0 -0
  2. package/common/rnexecutorch/host_objects/JsiConversions.h +43 -0
  3. package/common/rnexecutorch/models/llm/LLM.cpp +55 -42
  4. package/common/rnexecutorch/models/llm/LLM.h +4 -3
  5. package/common/rnexecutorch/models/llm/Types.h +23 -0
  6. package/common/runner/base_llm_runner.cpp +10 -3
  7. package/common/runner/base_llm_runner.h +1 -0
  8. package/common/runner/constants.h +15 -1
  9. package/common/runner/encoders/audio_encoder.cpp +111 -0
  10. package/common/runner/encoders/audio_encoder.h +40 -0
  11. package/common/runner/encoders/vision_encoder.cpp +0 -1
  12. package/common/runner/irunner.h +5 -0
  13. package/common/runner/multimodal_decoder_runner.h +50 -1
  14. package/common/runner/multimodal_input.h +16 -1
  15. package/common/runner/multimodal_prefiller.cpp +374 -64
  16. package/common/runner/multimodal_prefiller.h +57 -6
  17. package/common/runner/multimodal_runner.cpp +19 -12
  18. package/common/runner/multimodal_runner.h +1 -1
  19. package/common/runner/sampler.cpp +111 -35
  20. package/common/runner/sampler.h +13 -5
  21. package/common/runner/text_decoder_runner.cpp +1 -4
  22. package/common/runner/text_decoder_runner.h +3 -2
  23. package/common/runner/text_prefiller.cpp +8 -8
  24. package/common/runner/text_prefiller.h +8 -1
  25. package/common/runner/text_runner.cpp +35 -9
  26. package/common/runner/text_token_generator.h +2 -3
  27. package/common/runner/util.h +0 -1
  28. package/lib/module/constants/llmDefaults.js +1 -1
  29. package/lib/module/constants/llmDefaults.js.map +1 -1
  30. package/lib/module/constants/modelRegistry.js +33 -2
  31. package/lib/module/constants/modelRegistry.js.map +1 -1
  32. package/lib/module/constants/modelUrls.js +43 -6
  33. package/lib/module/constants/modelUrls.js.map +1 -1
  34. package/lib/module/controllers/LLMController.js +69 -20
  35. package/lib/module/controllers/LLMController.js.map +1 -1
  36. package/lib/module/hooks/natural_language_processing/useLLM.js +1 -5
  37. package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
  38. package/lib/module/modules/natural_language_processing/LLMModule.js +12 -7
  39. package/lib/module/modules/natural_language_processing/LLMModule.js.map +1 -1
  40. package/lib/module/types/llm.js +11 -0
  41. package/lib/module/types/llm.js.map +1 -1
  42. package/lib/typescript/constants/llmDefaults.d.ts +1 -1
  43. package/lib/typescript/constants/llmDefaults.d.ts.map +1 -1
  44. package/lib/typescript/constants/modelRegistry.d.ts +28 -1
  45. package/lib/typescript/constants/modelRegistry.d.ts.map +1 -1
  46. package/lib/typescript/constants/modelUrls.d.ts +40 -12
  47. package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
  48. package/lib/typescript/controllers/LLMController.d.ts +7 -9
  49. package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
  50. package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts +6 -3
  51. package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts.map +1 -1
  52. package/lib/typescript/types/llm.d.ts +63 -36
  53. package/lib/typescript/types/llm.d.ts.map +1 -1
  54. package/package.json +1 -1
  55. package/react-native-executorch.podspec +6 -0
  56. package/src/constants/llmDefaults.ts +1 -1
  57. package/src/constants/modelRegistry.ts +34 -2
  58. package/src/constants/modelUrls.ts +47 -6
  59. package/src/controllers/LLMController.ts +89 -40
  60. package/src/hooks/natural_language_processing/useLLM.ts +5 -6
  61. package/src/modules/natural_language_processing/LLMModule.ts +19 -8
  62. package/src/types/llm.ts +64 -34
  63. package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
  64. package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
  65. package/third-party/include/executorch/ExecuTorch.h +2 -0
  66. package/third-party/include/executorch/ExecuTorchModule.h +46 -0
  67. package/third-party/include/executorch/extension/data_loader/buffer_data_loader.h +4 -3
  68. package/third-party/include/executorch/extension/data_loader/mman.h +46 -0
  69. package/third-party/include/executorch/extension/data_loader/mmap_data_loader.h +4 -0
  70. package/third-party/include/executorch/extension/data_loader/shared_ptr_data_loader.h +7 -3
  71. package/third-party/include/executorch/extension/module/module.h +47 -8
  72. package/third-party/include/executorch/extension/tensor/tensor_ptr.h +17 -5
  73. package/third-party/include/executorch/kernels/optimized/Functions.h +12 -0
  74. package/third-party/include/executorch/kernels/optimized/NativeFunctions.h +4 -0
  75. package/third-party/include/executorch/kernels/portable/Functions.h +18 -0
  76. package/third-party/include/executorch/kernels/portable/NativeFunctions.h +6 -0
  77. package/third-party/include/executorch/runtime/backend/backend_options_map.h +37 -0
  78. package/third-party/include/executorch/runtime/core/array_ref.h +3 -1
  79. package/third-party/include/executorch/runtime/core/error.h +1 -0
  80. package/third-party/include/executorch/runtime/core/evalue.h +256 -9
  81. package/third-party/include/executorch/runtime/core/exec_aten/exec_aten.h +24 -0
  82. package/third-party/include/executorch/runtime/core/hierarchical_allocator.h +9 -6
  83. package/third-party/include/executorch/runtime/core/portable_type/device.h +3 -4
  84. package/third-party/include/executorch/runtime/core/portable_type/tensor_impl.h +31 -1
  85. package/third-party/include/executorch/runtime/executor/method.h +9 -3
  86. package/third-party/include/executorch/runtime/executor/method_meta.h +14 -0
  87. package/third-party/include/executorch/runtime/executor/platform_memory_allocator.h +12 -2
  88. package/third-party/include/executorch/runtime/executor/program.h +3 -1
  89. package/third-party/include/executorch/runtime/executor/tensor_parser.h +5 -1
  90. package/third-party/include/executorch/runtime/kernel/operator_registry.h +9 -0
  91. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
  92. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
  93. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/mlx.metallib +0 -0
  94. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
  95. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
  96. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/mlx.metallib +0 -0
@@ -4,56 +4,76 @@ import { ResourceSource } from './common';
4
4
  * Capabilities a multimodal LLM can have.
5
5
  * @category Types
6
6
  */
7
- export type LLMCapability = 'vision';
7
+ export type LLMCapability = 'vision' | 'audio';
8
8
  /**
9
9
  * Derives the media argument shape for `sendMessage` from a capabilities tuple.
10
10
  * @category Types
11
11
  */
12
- export type MediaArg<C extends readonly LLMCapability[]> = 'vision' extends C[number] ? {
12
+ export type MediaArg<C extends readonly LLMCapability[]> = ('vision' extends C[number] ? {
13
13
  imagePath?: string;
14
- } : object;
14
+ } : object) & ('audio' extends C[number] ? {
15
+ audioBuffer?: Float32Array;
16
+ } : object);
15
17
  /**
16
18
  * Union of all built-in LLM model names.
17
19
  * @category Types
18
20
  */
19
- export type LLMModelName = 'llama-3.2-3b' | 'llama-3.2-3b-qlora' | 'llama-3.2-3b-spinquant' | 'llama-3.2-1b' | 'llama-3.2-1b-qlora' | 'llama-3.2-1b-spinquant' | 'qwen3-0.6b' | 'qwen3-0.6b-quantized' | 'qwen3-1.7b' | 'qwen3-1.7b-quantized' | 'qwen3-4b' | 'qwen3-4b-quantized' | 'hammer2.1-0.5b' | 'hammer2.1-0.5b-quantized' | 'hammer2.1-1.5b' | 'hammer2.1-1.5b-quantized' | 'hammer2.1-3b' | 'hammer2.1-3b-quantized' | 'smollm2.1-135m' | 'smollm2.1-135m-quantized' | 'smollm2.1-360m' | 'smollm2.1-360m-quantized' | 'smollm2.1-1.7b' | 'smollm2.1-1.7b-quantized' | 'qwen2.5-0.5b' | 'qwen2.5-0.5b-quantized' | 'qwen2.5-1.5b' | 'qwen2.5-1.5b-quantized' | 'qwen2.5-3b' | 'qwen2.5-3b-quantized' | 'phi-4-mini-4b' | 'phi-4-mini-4b-quantized' | 'lfm2.5-350m' | 'lfm2.5-350m-quantized' | 'lfm2.5-1.2b-instruct' | 'lfm2.5-1.2b-instruct-quantized' | 'lfm2.5-vl-1.6b-quantized' | 'lfm2.5-vl-450m-quantized' | 'qwen3.5-0.8b-quantized' | 'qwen3.5-2b-quantized' | 'bielik-v3.0-1.5b' | 'bielik-v3.0-1.5b-quantized';
21
+ export type LLMModelName = 'gemma4-e2b' | 'gemma4-e2b-multimodal' | 'llama-3.2-3b' | 'llama-3.2-3b-qlora' | 'llama-3.2-3b-spinquant' | 'llama-3.2-1b' | 'llama-3.2-1b-qlora' | 'llama-3.2-1b-spinquant' | 'qwen3-0.6b' | 'qwen3-0.6b-quantized' | 'qwen3-1.7b' | 'qwen3-1.7b-quantized' | 'qwen3-4b' | 'qwen3-4b-quantized' | 'hammer2.1-0.5b' | 'hammer2.1-0.5b-quantized' | 'hammer2.1-1.5b' | 'hammer2.1-1.5b-quantized' | 'hammer2.1-3b' | 'hammer2.1-3b-quantized' | 'smollm2.1-135m' | 'smollm2.1-135m-quantized' | 'smollm2.1-360m' | 'smollm2.1-360m-quantized' | 'smollm2.1-1.7b' | 'smollm2.1-1.7b-quantized' | 'qwen2.5-0.5b' | 'qwen2.5-0.5b-quantized' | 'qwen2.5-1.5b' | 'qwen2.5-1.5b-quantized' | 'qwen2.5-3b' | 'qwen2.5-3b-quantized' | 'phi-4-mini-4b' | 'phi-4-mini-4b-quantized' | 'lfm2.5-350m' | 'lfm2.5-350m-quantized' | 'lfm2.5-1.2b-instruct' | 'lfm2.5-1.2b-instruct-quantized' | 'lfm2.5-vl-1.6b-quantized' | 'lfm2.5-vl-450m-quantized' | 'qwen3.5-0.8b-quantized' | 'qwen3.5-2b-quantized' | 'bielik-v3.0-1.5b' | 'bielik-v3.0-1.5b-quantized';
22
+ /**
23
+ * Audio soft-token expansion constants for audio_encoder.
24
+ * @category Types
25
+ */
26
+ export interface AudioConfig {
27
+ samplesPerBlock: number;
28
+ tokensPerBlock: number;
29
+ }
30
+ /**
31
+ * Properties defining LLMModel.
32
+ * @category Types
33
+ */
34
+ export interface LLMModel {
35
+ /**
36
+ * The built-in model name (e.g. `'llama-3.2-3b'`). Used for telemetry and hook reload triggers.
37
+ * Pass one of the pre-built LLM constants (e.g. `LLAMA3_2_3B`) to populate all required fields.
38
+ */
39
+ modelName: LLMModelName;
40
+ /**
41
+ * `ResourceSource` that specifies the location of the model binary.
42
+ */
43
+ modelSource: ResourceSource;
44
+ /**
45
+ * `ResourceSource` pointing to the JSON file which contains the tokenizer.
46
+ */
47
+ tokenizerSource: ResourceSource;
48
+ /**
49
+ * `ResourceSource` pointing to the JSON file which contains the tokenizer config.
50
+ */
51
+ tokenizerConfigSource: ResourceSource;
52
+ /**
53
+ * Optional list of modality capabilities the model supports.
54
+ * Determines the type of the `media` argument in `sendMessage`.
55
+ * Example: `['vision']` enables `sendMessage(text, { imagePath })`.
56
+ */
57
+ capabilities?: readonly LLMCapability[];
58
+ /**
59
+ * Recommended default generation settings, typically copied from the
60
+ * upstream `generation_config.json` or the model card. Applied automatically
61
+ * after the native module loads and before any user `configure()` call,
62
+ * so callers only need to override the values they want to change.
63
+ */
64
+ generationConfig?: GenerationConfig;
65
+ /**
66
+ * Defines config for audio input modality for multimodal LLMs.
67
+ * `capabilities` must include 'audio'.
68
+ */
69
+ audioConfig?: AudioConfig;
70
+ }
20
71
  /**
21
72
  * Properties for initializing and configuring a Large Language Model (LLM) instance.
22
73
  * @category Types
23
74
  */
24
75
  export interface LLMProps {
25
- model: {
26
- /**
27
- * The built-in model name (e.g. `'llama-3.2-3b'`). Used for telemetry and hook reload triggers.
28
- * Pass one of the pre-built LLM constants (e.g. `LLAMA3_2_3B`) to populate all required fields.
29
- */
30
- modelName: LLMModelName;
31
- /**
32
- * `ResourceSource` that specifies the location of the model binary.
33
- */
34
- modelSource: ResourceSource;
35
- /**
36
- * `ResourceSource` pointing to the JSON file which contains the tokenizer.
37
- */
38
- tokenizerSource: ResourceSource;
39
- /**
40
- * `ResourceSource` pointing to the JSON file which contains the tokenizer config.
41
- */
42
- tokenizerConfigSource: ResourceSource;
43
- /**
44
- * Optional list of modality capabilities the model supports.
45
- * Determines the type of the `media` argument in `sendMessage`.
46
- * Example: `['vision']` enables `sendMessage(text, { imagePath })`.
47
- */
48
- capabilities?: readonly LLMCapability[];
49
- /**
50
- * Recommended default generation settings, typically copied from the
51
- * upstream `generation_config.json` or the model card. Applied automatically
52
- * after the native module loads and before any user `configure()` call,
53
- * so callers only need to override the values they want to change.
54
- */
55
- generationConfig?: GenerationConfig;
56
- };
76
+ model: LLMModel;
57
77
  /**
58
78
  * Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
59
79
  */
@@ -224,6 +244,12 @@ export interface Message {
224
244
  * controller normalizes the path before passing it to native code.
225
245
  */
226
246
  mediaPath?: string;
247
+ /**
248
+ * Optional fp32 mono 16 kHz PCM buffer. Only valid on `user` messages for
249
+ * models with the `'audio'` capability. The controller forwards it to the
250
+ * native `generateMultimodal` path.
251
+ */
252
+ audioWaveform?: Float32Array;
227
253
  }
228
254
  /**
229
255
  * Represents a tool call made by the model.
@@ -309,6 +335,7 @@ export interface ContextStrategy {
309
335
  export declare const SPECIAL_TOKENS: {
310
336
  BOS_TOKEN: string;
311
337
  EOS_TOKEN: string;
338
+ EOT_TOKEN: string;
312
339
  UNK_TOKEN: string;
313
340
  SEP_TOKEN: string;
314
341
  PAD_TOKEN: string;
@@ -1 +1 @@
1
- {"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../../src/types/llm.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAE1C;;;GAGG;AACH,MAAM,MAAM,aAAa,GAAG,QAAQ,CAAC;AAErC;;;GAGG;AACH,MAAM,MAAM,QAAQ,CAAC,CAAC,SAAS,SAAS,aAAa,EAAE,IACrD,QAAQ,SAAS,CAAC,CAAC,MAAM,CAAC,GAAG;IAAE,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,MAAM,CAAC;AAE/D;;;GAGG;AACH,MAAM,MAAM,YAAY,GACpB,cAAc,GACd,oBAAoB,GACpB,wBAAwB,GACxB,cAAc,GACd,oBAAoB,GACpB,wBAAwB,GACxB,YAAY,GACZ,sBAAsB,GACtB,YAAY,GACZ,sBAAsB,GACtB,UAAU,GACV,oBAAoB,GACpB,gBAAgB,GAChB,0BAA0B,GAC1B,gBAAgB,GAChB,0BAA0B,GAC1B,cAAc,GACd,wBAAwB,GACxB,gBAAgB,GAChB,0BAA0B,GAC1B,gBAAgB,GAChB,0BAA0B,GAC1B,gBAAgB,GAChB,0BAA0B,GAC1B,cAAc,GACd,wBAAwB,GACxB,cAAc,GACd,wBAAwB,GACxB,YAAY,GACZ,sBAAsB,GACtB,eAAe,GACf,yBAAyB,GACzB,aAAa,GACb,uBAAuB,GACvB,sBAAsB,GACtB,gCAAgC,GAChC,0BAA0B,GAC1B,0BAA0B,GAC1B,wBAAwB,GACxB,sBAAsB,GACtB,kBAAkB,GAClB,4BAA4B,CAAC;AAEjC;;;GAGG;AACH,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE;QACL;;;WAGG;QACH,SAAS,EAAE,YAAY,CAAC;QACxB;;WAEG;QACH,WAAW,EAAE,cAAc,CAAC;QAC5B;;WAEG;QACH,eAAe,EAAE,cAAc,CAAC;QAChC;;WAEG;QACH,qBAAqB,EAAE,cAAc,CAAC;QACtC;;;;WAIG;QACH,YAAY,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;QACxC;;;;;WAKG;QACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;KACrC,CAAC;IACF;;OAEG;IACH,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B;;OAEG;IACH,cAAc,EAAE,OAAO,EAAE,CAAC;IAE1B;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;OAEG;IACH,OAAO,EAAE,OAAO,CAAC;IAEjB;;OAEG;IACH,YAAY,EAAE,OAAO,CAAC;IAEtB;;OAEG;IACH,gBAAgB,EAAE,MAAM,CAAC;IAEzB;;OAEG;IACH,KAAK,EAAE,iBAAiB,GAAG,IAAI,CAAC;IAEhC;;;;OAIG;IACH,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,WAAW,EAAE,gBAAgB,EAAE,EAAE,SAAS,KAAK,IAAI,CAAC;IAE9E;;;OAGG;IACH,sBAAsB,EAAE,MAAM,MAAM,CAAC;IACrC;;;;;;OAMG;IACH,QAAQ,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,KAAK,CAAC,EAAE,OAAO,EAAE,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;IACtE;;;OAGG;IACH,kBAAkB,EAAE,MAAM,MAAM,CAAC;IACjC;;;OAGG;IACH,mBAAmB,EAAE,MAAM,MAAM,CAAC;IAElC;;;OAGG;IACH,aAAa,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IAEvC;;OAEG;IACH,SAAS,EAAE,MAAM,IAAI,CAAC;CACvB;AAED;;;;GAIG;AACH,MAAM,WAAW,iBAAiB,CAChC,CAAC,SAAS,SAAS,aAAa,EAAE,GAAG,SAAS,aAAa,EAAE,CAC7D,SAAQ,WAAW;IACnB;;;;;;;OAOG;IACH,WAAW,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACxE;AAED;;;;GAIG;AACH,MAAM,WAAW,OAAQ,SAAQ,WAAW;IAC1C;;;;;OAKG;IACH,WAAW,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACnD;AAED;;;GAGG;AACH,MAAM,WAAW,SAAS;IACxB;;;;;;;;OAQG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;IAEjC;;;;;;;;OAQG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;IAE1B;;;;;;;;;;;;;;OAcG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACrC;AAED;;;GAGG;AACH,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;AAE1D;;;;;GAKG;AACH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,WAAW,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;GAKG;AACH,MAAM,WAAW,QAAQ;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;;;GAKG;AACH,MAAM,MAAM,OAAO,GAAG,MAAM,CAAC;AAE7B;;;;;;GAMG;AACH,MAAM,WAAW,UAAU;IACzB,qBAAqB,EAAE,OAAO,EAAE,CAAC;IACjC,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,eAAe,CAAC;CAClC;AAED;;;;;;GAMG;AACH,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,OAAO,EAAE,CAAC;IACjB,mBAAmB,EAAE,CAAC,IAAI,EAAE,QAAQ,KAAK,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAChE,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;;;;;;;;;GAUG;AACH,MAAM,WAAW,gBAAgB;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,sCAAsC;IACtC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED;;;GAGG;AACH,MAAM,WAAW,eAAe;IAC9B;;;;;;;OAOG;IACH,YAAY,CACV,YAAY,EAAE,MAAM,EACpB,OAAO,EAAE,OAAO,EAAE,EAClB,gBAAgB,EAAE,MAAM,EACxB,aAAa,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,MAAM,GAC7C,OAAO,EAAE,CAAC;CACd;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc;;;;;;;;CAQ1B,CAAC"}
1
+ {"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../../src/types/llm.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAE1C;;;GAGG;AACH,MAAM,MAAM,aAAa,GAAG,QAAQ,GAAG,OAAO,CAAC;AAE/C;;;GAGG;AACH,MAAM,MAAM,QAAQ,CAAC,CAAC,SAAS,SAAS,aAAa,EAAE,IACrD,CAAC,QAAQ,SAAS,CAAC,CAAC,MAAM,CAAC,GAAG;IAAE,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,MAAM,CAAC,GAC5D,CAAC,OAAO,SAAS,CAAC,CAAC,MAAM,CAAC,GAAG;IAAE,WAAW,CAAC,EAAE,YAAY,CAAA;CAAE,GAAG,MAAM,CAAC,CAAC;AAE1E;;;GAGG;AACH,MAAM,MAAM,YAAY,GACpB,YAAY,GACZ,uBAAuB,GACvB,cAAc,GACd,oBAAoB,GACpB,wBAAwB,GACxB,cAAc,GACd,oBAAoB,GACpB,wBAAwB,GACxB,YAAY,GACZ,sBAAsB,GACtB,YAAY,GACZ,sBAAsB,GACtB,UAAU,GACV,oBAAoB,GACpB,gBAAgB,GAChB,0BAA0B,GAC1B,gBAAgB,GAChB,0BAA0B,GAC1B,cAAc,GACd,wBAAwB,GACxB,gBAAgB,GAChB,0BAA0B,GAC1B,gBAAgB,GAChB,0BAA0B,GAC1B,gBAAgB,GAChB,0BAA0B,GAC1B,cAAc,GACd,wBAAwB,GACxB,cAAc,GACd,wBAAwB,GACxB,YAAY,GACZ,sBAAsB,GACtB,eAAe,GACf,yBAAyB,GACzB,aAAa,GACb,uBAAuB,GACvB,sBAAsB,GACtB,gCAAgC,GAChC,0BAA0B,GAC1B,0BAA0B,GAC1B,wBAAwB,GACxB,sBAAsB,GACtB,kBAAkB,GAClB,4BAA4B,CAAC;AAEjC;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB;AAED;;;GAGG;AACH,MAAM,WAAW,QAAQ;IACvB;;;OAGG;IACH,SAAS,EAAE,YAAY,CAAC;IACxB;;OAEG;IACH,WAAW,EAAE,cAAc,CAAC;IAC5B;;OAEG;IACH,eAAe,EAAE,cAAc,CAAC;IAChC;;OAEG;IACH,qBAAqB,EAAE,cAAc,CAAC;IACtC;;;;OAIG;IACH,YAAY,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;IACxC;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC;;;OAGG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;CAC3B;AAED;;;GAGG;AACH,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,QAAQ,CAAC;IAChB;;OAEG;IACH,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B;;OAEG;IACH,cAAc,EAAE,OAAO,EAAE,CAAC;IAE1B;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;OAEG;IACH,OAAO,EAAE,OAAO,CAAC;IAEjB;;OAEG;IACH,YAAY,EAAE,OAAO,CAAC;IAEtB;;OAEG;IACH,gBAAgB,EAAE,MAAM,CAAC;IAEzB;;OAEG;IACH,KAAK,EAAE,iBAAiB,GAAG,IAAI,CAAC;IAEhC;;;;OAIG;IACH,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,WAAW,EAAE,gBAAgB,EAAE,EAAE,SAAS,KAAK,IAAI,CAAC;IAE9E;;;OAGG;IACH,sBAAsB,EAAE,MAAM,MAAM,CAAC;IACrC;;;;;;OAMG;IACH,QAAQ,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,KAAK,CAAC,EAAE,OAAO,EAAE,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;IACtE;;;OAGG;IACH,kBAAkB,EAAE,MAAM,MAAM,CAAC;IACjC;;;OAGG;IACH,mBAAmB,EAAE,MAAM,MAAM,CAAC;IAElC;;;OAGG;IACH,aAAa,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IAEvC;;OAEG;IACH,SAAS,EAAE,MAAM,IAAI,CAAC;CACvB;AAED;;;;GAIG;AACH,MAAM,WAAW,iBAAiB,CAChC,CAAC,SAAS,SAAS,aAAa,EAAE,GAAG,SAAS,aAAa,EAAE,CAC7D,SAAQ,WAAW;IACnB;;;;;;;OAOG;IACH,WAAW,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACxE;AAED;;;;GAIG;AACH,MAAM,WAAW,OAAQ,SAAQ,WAAW;IAC1C;;;;;OAKG;IACH,WAAW,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACnD;AAED;;;GAGG;AACH,MAAM,WAAW,SAAS;IACxB;;;;;;;;OAQG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;IAEjC;;;;;;;;OAQG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;IAE1B;;;;;;;;;;;;;;OAcG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACrC;AAED;;;GAGG;AACH,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;AAE1D;;;;;GAKG;AACH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,WAAW,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;OAIG;IACH,aAAa,CAAC,EAAE,YAAY,CAAC;CAC9B;AAED;;;;;GAKG;AACH,MAAM,WAAW,QAAQ;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;;;GAKG;AACH,MAAM,MAAM,OAAO,GAAG,MAAM,CAAC;AAE7B;;;;;;GAMG;AACH,MAAM,WAAW,UAAU;IACzB,qBAAqB,EAAE,OAAO,EAAE,CAAC;IACjC,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,eAAe,CAAC;CAClC;AAED;;;;;;GAMG;AACH,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,OAAO,EAAE,CAAC;IACjB,mBAAmB,EAAE,CAAC,IAAI,EAAE,QAAQ,KAAK,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAChE,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;;;;;;;;;GAUG;AACH,MAAM,WAAW,gBAAgB;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,sCAAsC;IACtC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED;;;GAGG;AACH,MAAM,WAAW,eAAe;IAC9B;;;;;;;OAOG;IACH,YAAY,CACV,YAAY,EAAE,MAAM,EACpB,OAAO,EAAE,OAAO,EAAE,EAClB,gBAAgB,EAAE,MAAM,EACxB,aAAa,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,MAAM,GAC7C,OAAO,EAAE,CAAC;CACd;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc;;;;;;;;;CAS1B,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "react-native-executorch",
3
- "version": "0.9.0",
3
+ "version": "0.9.1",
4
4
  "description": "An easy way to run AI models in React Native with ExecuTorch",
5
5
  "source": "./src/index.ts",
6
6
  "main": "./lib/module/index.js",
@@ -62,6 +62,12 @@ Pod::Spec.new do |s|
62
62
 
63
63
  s.libraries = "z"
64
64
  s.ios.vendored_frameworks = "third-party/ios/ExecutorchLib.xcframework"
65
+
66
+ # NOTE: mlx.metallib (the MLX GPU kernels) is bundled INSIDE
67
+ # ExecutorchLib.framework, colocated with the binary that contains the MLX
68
+ # code. MLX's runtime loader resolves the metallib relative to that binary
69
+ # (via dladdr), so it must live next to it in the framework — not at the app
70
+ # bundle root.
65
71
  # Exclude file with tests to not introduce gtest dependency.
66
72
  # Do not include the headers from common/rnexecutorch/jsi/ as source files.
67
73
  # Xcode/Cocoapods leaks them to other pods that an app also depends on, so if
@@ -6,7 +6,7 @@ import { SlidingWindowContextStrategy } from '../utils/llms/context_strategy';
6
6
  * @category Utilities - LLM
7
7
  */
8
8
  export const DEFAULT_SYSTEM_PROMPT =
9
- "You are a knowledgeable, efficient, and direct AI assistant. Provide concise answers, focusing on the key information needed. Offer suggestions tactfully when appropriate to improve outcomes. Engage in productive collaboration with the user. Don't return too much text.";
9
+ "You are a knowledgeable, efficient, and direct AI assistant. Provide concise answers, focusing on the key information needed. Offer suggestions tactfully when appropriate to improve outcomes. Engage in productive collaboration with the user. Don't return too much text. If provided with audio samples treat it with at most importance";
10
10
 
11
11
  /**
12
12
  * Generates a default structured output prompt based on the provided JSON schema.
@@ -38,7 +38,7 @@ import { RnExecutorchErrorCode } from '../errors/ErrorCodes';
38
38
  * compile-time error.
39
39
  * @category Utils
40
40
  */
41
- export type Backend = 'xnnpack' | 'coreml' | 'vulkan' | 'qnn';
41
+ export type Backend = 'xnnpack' | 'coreml' | 'vulkan' | 'qnn' | 'mlx';
42
42
 
43
43
  /**
44
44
  * Options for a `models` accessor call.
@@ -78,7 +78,7 @@ type ConfigOf<V> = Extract<
78
78
  >;
79
79
  type BackendsOf<V> = Extract<keyof V, Backend>;
80
80
 
81
- const BACKEND_ORDER: Backend[] = ['xnnpack', 'coreml', 'vulkan', 'qnn'];
81
+ const BACKEND_ORDER: Backend[] = ['xnnpack', 'coreml', 'mlx', 'vulkan', 'qnn'];
82
82
 
83
83
  function firstBackend(variants: AnyVariantMap): Backend {
84
84
  for (const b of BACKEND_ORDER) {
@@ -181,6 +181,33 @@ function tts<C extends TextToSpeechModelConfig>(c: C): () => C {
181
181
  // Per-backend variant maps for models that ship more than one backend.
182
182
  // ─────────────────────────────────────────────────────────────────────────────
183
183
 
184
+ const GEMMA4_E2B_VARIANTS = {
185
+ mlx: {
186
+ base: {
187
+ modelName: 'gemma4-e2b' as const,
188
+ modelSource: M.GEMMA4_E2B_MLX_MODEL,
189
+ tokenizerSource: M.GEMMA4_E2B_TOKENIZER,
190
+ tokenizerConfigSource: M.GEMMA4_E2B_TOKENIZER_CONFIG,
191
+ },
192
+ },
193
+ xnnpack: {
194
+ base: {
195
+ modelName: 'gemma4-e2b' as const,
196
+ modelSource: M.GEMMA4_E2B_XNNPACK_MODEL,
197
+ tokenizerSource: M.GEMMA4_E2B_TOKENIZER,
198
+ tokenizerConfigSource: M.GEMMA4_E2B_TOKENIZER_CONFIG,
199
+ },
200
+ },
201
+ vulkan: {
202
+ base: {
203
+ modelName: 'gemma4-e2b' as const,
204
+ modelSource: M.GEMMA4_E2B_VULKAN_MODEL,
205
+ tokenizerSource: M.GEMMA4_E2B_TOKENIZER,
206
+ tokenizerConfigSource: M.GEMMA4_E2B_TOKENIZER_CONFIG,
207
+ },
208
+ },
209
+ };
210
+
184
211
  const EFFICIENTNET_V2_S_VARIANTS = {
185
212
  xnnpack: {
186
213
  base: {
@@ -496,10 +523,15 @@ export const models = {
496
523
  M.LFM2_5_1_2B_INSTRUCT_QUANTIZED
497
524
  ),
498
525
  bielik_v3_0_1_5b: pair(M.BIELIK_V3_0_1_5B, M.BIELIK_V3_0_1_5B_QUANTIZED),
526
+ gemma4_e2b: variant(GEMMA4_E2B_VARIANTS, {
527
+ ios: 'mlx',
528
+ android: 'vulkan',
529
+ }),
499
530
  // Multimodal LLMs — same hook/module as plain LLMs, listed here so users
500
531
  // pick a model by capability ("LLM") rather than by modality.
501
532
  lfm2_5_vl_1_6b: base(M.LFM2_5_VL_1_6B_QUANTIZED),
502
533
  lfm2_5_vl_450m: base(M.LFM2_5_VL_450M_QUANTIZED),
534
+ gemma4_e2b_multimodal: base(M.GEMMA4_E2B_MM),
503
535
  },
504
536
  classification: {
505
537
  efficientnet_v2_s: variant(EFFICIENTNET_V2_S_VARIANTS),
@@ -125,6 +125,47 @@ export const QWEN3_0_6B_QUANTIZED = {
125
125
  generationConfig: QWEN3_GENERATION_CONFIG,
126
126
  } as const;
127
127
 
128
+ // GEMMA 4 — separate HF repo; tokenizer files live at the e2b root and are
129
+ // shared by all backend variants.
130
+ const GEMMA4_E2B_PREFIX = `${URL_PREFIX}-gemma-4/${VERSION_TAG}/e2b`;
131
+ export const GEMMA4_E2B_MLX_MODEL = `${GEMMA4_E2B_PREFIX}/mlx/gemma4_e2b_mlx_int4.pte`;
132
+ export const GEMMA4_E2B_XNNPACK_MODEL = `${GEMMA4_E2B_PREFIX}/xnnpack/gemma_4_e2b_xnnpack_8da4w.pte`;
133
+ export const GEMMA4_E2B_VULKAN_MODEL = `${GEMMA4_E2B_PREFIX}/vulkan/gemma_4_e2b_vulkan_8da4w.pte`;
134
+ export const GEMMA4_E2B_TOKENIZER = `${GEMMA4_E2B_PREFIX}/tokenizer.json`;
135
+ export const GEMMA4_E2B_TOKENIZER_CONFIG = `${GEMMA4_E2B_PREFIX}/tokenizer_config.json`;
136
+
137
+ const GEMMA4_E2B_MODEL =
138
+ Platform.OS === `android` ? GEMMA4_E2B_VULKAN_MODEL : GEMMA4_E2B_MLX_MODEL;
139
+
140
+ const GEMMA4_E2B_MLX_MM = `${URL_PREFIX}-gemma-4-multimodal/${VERSION_TAG}/e2b/mlx/gemma4_e2b_mlx_int4.pte`;
141
+ const GEMMA4_E2B_VULKAN_MM = `${URL_PREFIX}-gemma-4-multimodal/${VERSION_TAG}/e2b/vulkan/gemma_4_e2b_vulkan_8da4w.pte`;
142
+
143
+ /**
144
+ * @category Models - LLM
145
+ */
146
+ export const GEMMA4_E2B = {
147
+ modelName: 'gemma4-e2b',
148
+ modelSource: GEMMA4_E2B_MODEL,
149
+ tokenizerSource: GEMMA4_E2B_TOKENIZER,
150
+ tokenizerConfigSource: GEMMA4_E2B_TOKENIZER_CONFIG,
151
+ } as const;
152
+
153
+ /**
154
+ * @category Models - LLM Multimodal
155
+ */
156
+ export const GEMMA4_E2B_MM = {
157
+ modelName: 'gemma4-e2b-multimodal',
158
+ modelSource:
159
+ Platform.OS === `android` ? GEMMA4_E2B_VULKAN_MM : GEMMA4_E2B_MLX_MM,
160
+ tokenizerSource: GEMMA4_E2B_TOKENIZER,
161
+ tokenizerConfigSource: GEMMA4_E2B_TOKENIZER_CONFIG,
162
+ capabilities: ['vision', 'audio'],
163
+ audioConfig: {
164
+ samplesPerBlock: 7680,
165
+ tokensPerBlock: 12,
166
+ },
167
+ } as const;
168
+
128
169
  /**
129
170
  * @category Models - LLM
130
171
  */
@@ -816,27 +857,27 @@ export const STYLE_TRANSFER_UDNIE_QUANTIZED = {
816
857
  // S2T
817
858
  export const WHISPER_TINY_EN_TOKENIZER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/tokenizer.json`;
818
859
  export const WHISPER_TINY_EN_MODEL_XNNPACK = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/xnnpack/whisper_tiny_en_xnnpack_fp32.pte`;
819
- export const WHISPER_TINY_EN_MODEL_COREML = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/coreml/whisper_tiny_en_coreml_fp32.pte`;
860
+ export const WHISPER_TINY_EN_MODEL_COREML = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/coreml/whisper_tiny_en_coreml_fp16.pte`;
820
861
 
821
862
  export const WHISPER_BASE_EN_TOKENIZER = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/tokenizer.json`;
822
863
  export const WHISPER_BASE_EN_MODEL_XNNPACK = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/xnnpack/whisper_base_en_xnnpack_fp32.pte`;
823
- export const WHISPER_BASE_EN_MODEL_COREML = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/coreml/whisper_base_en_coreml_fp32.pte`;
864
+ export const WHISPER_BASE_EN_MODEL_COREML = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/coreml/whisper_base_en_coreml_fp16.pte`;
824
865
 
825
866
  export const WHISPER_SMALL_EN_TOKENIZER = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/tokenizer.json`;
826
867
  export const WHISPER_SMALL_EN_MODEL_XNNPACK = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/xnnpack/whisper_small_en_xnnpack_fp32.pte`;
827
- export const WHISPER_SMALL_EN_MODEL_COREML = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/coreml/whisper_small_en_coreml_fp32.pte`;
868
+ export const WHISPER_SMALL_EN_MODEL_COREML = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/coreml/whisper_small_en_coreml_fp16.pte`;
828
869
 
829
870
  export const WHISPER_TINY_TOKENIZER = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/tokenizer.json`;
830
871
  export const WHISPER_TINY_MODEL_XNNPACK = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/xnnpack/whisper_tiny_xnnpack_fp32.pte`;
831
- export const WHISPER_TINY_MODEL_COREML = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/coreml/whisper_tiny_coreml_fp32.pte`;
872
+ export const WHISPER_TINY_MODEL_COREML = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/coreml/whisper_tiny_coreml_fp16.pte`;
832
873
 
833
874
  export const WHISPER_BASE_TOKENIZER = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/tokenizer.json`;
834
875
  export const WHISPER_BASE_MODEL_XNNPACK = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/xnnpack/whisper_base_xnnpack_fp32.pte`;
835
- export const WHISPER_BASE_MODEL_COREML = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/coreml/whisper_base_coreml_fp32.pte`;
876
+ export const WHISPER_BASE_MODEL_COREML = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/coreml/whisper_base_coreml_fp16.pte`;
836
877
 
837
878
  export const WHISPER_SMALL_TOKENIZER = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/tokenizer.json`;
838
879
  export const WHISPER_SMALL_MODEL_XNNPACK = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/xnnpack/whisper_small_xnnpack_fp32.pte`;
839
- export const WHISPER_SMALL_MODEL_COREML = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/coreml/whisper_small_coreml_fp32.pte`;
880
+ export const WHISPER_SMALL_MODEL_COREML = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/coreml/whisper_small_coreml_fp16.pte`;
840
881
 
841
882
  /**
842
883
  * @category Models - Speech To Text
@@ -1,11 +1,11 @@
1
- import { ResourceSource } from '../types/common';
2
1
  import { ResourceFetcher } from '../utils/ResourceFetcher';
3
2
  import { Template } from '@huggingface/jinja';
4
3
  import { DEFAULT_CHAT_CONFIG } from '../constants/llmDefaults';
5
4
  import {
5
+ AudioConfig,
6
6
  ChatConfig,
7
7
  GenerationConfig,
8
- LLMCapability,
8
+ LLMModel,
9
9
  LLMTool,
10
10
  Message,
11
11
  SPECIAL_TOKENS,
@@ -30,6 +30,7 @@ export class LLMController {
30
30
  private messageHistoryCallback: (messageHistory: Message[]) => void;
31
31
  private isReadyCallback: (isReady: boolean) => void;
32
32
  private isGeneratingCallback: (isGenerating: boolean) => void;
33
+ private audioConfig: AudioConfig | undefined;
33
34
 
34
35
  constructor({
35
36
  tokenCallback,
@@ -72,18 +73,10 @@ export class LLMController {
72
73
  }
73
74
 
74
75
  public async load({
75
- modelSource,
76
- tokenizerSource,
77
- tokenizerConfigSource,
78
- capabilities,
79
- defaultGenerationConfig,
76
+ model,
80
77
  onDownloadProgressCallback,
81
78
  }: {
82
- modelSource: ResourceSource;
83
- tokenizerSource: ResourceSource;
84
- tokenizerConfigSource: ResourceSource;
85
- capabilities?: readonly LLMCapability[];
86
- defaultGenerationConfig?: GenerationConfig;
79
+ model: LLMModel;
87
80
  onDownloadProgressCallback?: (downloadProgress: number) => void;
88
81
  }) {
89
82
  // reset inner state when loading new model
@@ -94,13 +87,13 @@ export class LLMController {
94
87
  try {
95
88
  const tokenizersPromise = ResourceFetcher.fetch(
96
89
  undefined,
97
- tokenizerSource,
98
- tokenizerConfigSource
90
+ model.tokenizerSource,
91
+ model.tokenizerConfigSource
99
92
  );
100
93
 
101
94
  const modelPromise = ResourceFetcher.fetch(
102
95
  onDownloadProgressCallback,
103
- modelSource
96
+ model.modelSource
104
97
  );
105
98
 
106
99
  const [tokenizersResults, modelResult] = await Promise.all([
@@ -124,16 +117,18 @@ export class LLMController {
124
117
  this.nativeModule.unload();
125
118
  }
126
119
 
120
+ this.audioConfig = model.audioConfig;
121
+
127
122
  this.nativeModule = await global.loadLLM(
128
123
  modelPath,
129
124
  tokenizerPath,
130
- capabilities ?? []
125
+ model.capabilities ?? []
131
126
  );
132
- if (defaultGenerationConfig) {
127
+ if (model.generationConfig) {
133
128
  // Apply model-specific recommended sampling defaults before flipping
134
129
  // isReady so callers that react to it see the right config on first
135
130
  // send. User-provided `configure()` calls still override these.
136
- this.applyGenerationConfig(defaultGenerationConfig);
131
+ this.applyGenerationConfig(model.generationConfig);
137
132
  }
138
133
  this.isReadyCallback(true);
139
134
  this.onToken = (data: string) => {
@@ -236,6 +231,17 @@ export class LLMController {
236
231
  return token;
237
232
  }
238
233
 
234
+ private getAudioToken(): string {
235
+ const token = this.tokenizerConfig.audio_token;
236
+ if (!token) {
237
+ throw new RnExecutorchError(
238
+ RnExecutorchErrorCode.InvalidConfig,
239
+ "Tokenizer config is missing 'audio_token'. Audio-capable models require tokenizerConfigSource with an 'audio_token' field."
240
+ );
241
+ }
242
+ return token;
243
+ }
244
+
239
245
  private filterSpecialTokens(text: string): string {
240
246
  let filtered = text;
241
247
  if (
@@ -244,6 +250,12 @@ export class LLMController {
244
250
  ) {
245
251
  filtered = filtered.replaceAll(this.tokenizerConfig.eos_token, '');
246
252
  }
253
+ if (
254
+ SPECIAL_TOKENS.EOT_TOKEN in this.tokenizerConfig &&
255
+ this.tokenizerConfig.eot_token
256
+ ) {
257
+ filtered = filtered.replaceAll(this.tokenizerConfig.eot_token, '');
258
+ }
247
259
  if (
248
260
  SPECIAL_TOKENS.PAD_TOKEN in this.tokenizerConfig &&
249
261
  this.tokenizerConfig.pad_token
@@ -269,25 +281,37 @@ export class LLMController {
269
281
  this.isGeneratingCallback(false);
270
282
  }
271
283
 
272
- public async forward(input: string, imagePaths?: string[]): Promise<string> {
284
+ public async forward(
285
+ input: string,
286
+ imagePaths?: string[],
287
+ audioWaveforms?: Float32Array[]
288
+ ): Promise<string> {
273
289
  if (!this._isReady) {
274
290
  throw new RnExecutorchError(RnExecutorchErrorCode.ModuleNotLoaded);
275
291
  }
276
292
  if (this._isGenerating) {
277
293
  throw new RnExecutorchError(RnExecutorchErrorCode.ModelGenerating);
278
294
  }
295
+ const hasImages = !!imagePaths && imagePaths.length > 0;
296
+ const hasAudio = !!audioWaveforms && audioWaveforms.length > 0;
279
297
  try {
280
298
  this.isGeneratingCallback(true);
281
299
  this.nativeModule.reset();
282
- const response =
283
- imagePaths && imagePaths.length > 0
284
- ? await this.nativeModule.generateMultimodal(
285
- input,
286
- imagePaths.map(normalizeImagePath),
287
- this.getImageToken(),
288
- this.onToken
289
- )
290
- : await this.nativeModule.generate(input, this.onToken);
300
+ let response: string;
301
+ if (hasImages || hasAudio) {
302
+ response = await this.nativeModule.generateMultimodal(
303
+ input,
304
+ this.onToken,
305
+ {
306
+ imagePaths: hasImages ? imagePaths!.map(normalizeImagePath) : null,
307
+ imageToken: hasImages ? this.getImageToken() : null,
308
+ audioWaveforms: hasAudio ? audioWaveforms! : null,
309
+ audioToken: hasAudio ? this.getAudioToken() : null,
310
+ }
311
+ );
312
+ } else {
313
+ response = await this.nativeModule.generate(input, this.onToken);
314
+ }
291
315
  return this.filterSpecialTokens(response);
292
316
  } catch (e) {
293
317
  throw parseUnknownError(e);
@@ -355,7 +379,9 @@ export class LLMController {
355
379
  const imagePaths = messages
356
380
  .filter((m) => m.mediaPath)
357
381
  .map((m) => m.mediaPath!);
358
-
382
+ const audioWaveforms = messages
383
+ .filter((m) => m.audioWaveform)
384
+ .map((m) => m.audioWaveform!);
359
385
  const renderedChat: string = this.applyChatTemplate(
360
386
  messages,
361
387
  this.tokenizerConfig,
@@ -365,19 +391,22 @@ export class LLMController {
365
391
 
366
392
  return await this.forward(
367
393
  renderedChat,
368
- imagePaths.length > 0 ? imagePaths : undefined
394
+ imagePaths.length > 0 ? imagePaths : undefined,
395
+ audioWaveforms.length > 0 ? audioWaveforms : undefined
369
396
  );
370
397
  }
371
398
 
372
399
  public async sendMessage(
373
400
  message: string,
374
- media?: { imagePath?: string }
401
+ media?: { imagePath?: string; audioBuffer?: Float32Array }
375
402
  ): Promise<string> {
376
403
  const mediaPath = media?.imagePath;
404
+ const audioBuffer = media?.audioBuffer;
377
405
  const newMessage: Message = {
378
406
  content: message,
379
407
  role: 'user',
380
408
  ...(mediaPath ? { mediaPath } : {}),
409
+ ...(audioBuffer ? { audioWaveform: audioBuffer } : {}),
381
410
  };
382
411
  const updatedHistory = [...this._messageHistory, newMessage];
383
412
  this.messageHistoryCallback(updatedHistory);
@@ -392,7 +421,22 @@ export class LLMController {
392
421
  );
393
422
  const textTokens = this.nativeModule.countTextTokens(rendered);
394
423
  const imageCount = messages.filter((m) => m.mediaPath).length;
395
- return textTokens + imageCount * (visualTokenCount - 1);
424
+ // Audio soft-token expansion: audio_encoder pads samples to
425
+ // multiples of this.audioConfig.samplesPerBlock (7680 @ 16 kHz) and emits
426
+ // this.audioConfig.tokensPerBlock (~12) soft tokens per padded block. The
427
+ // rendered template only contributes 1 token for the audio placeholder,
428
+ // so add (expansion - 1) per audio message to match prefill consumption.
429
+ const audioTokenExpansion = messages.reduce((acc, m) => {
430
+ if (!m.audioWaveform) return acc;
431
+ const kBlocks = Math.max(
432
+ 1,
433
+ Math.ceil(m.audioWaveform.length / this.audioConfig!.samplesPerBlock)
434
+ );
435
+ return acc + (this.audioConfig!.tokensPerBlock * kBlocks - 1);
436
+ }, 0);
437
+ return (
438
+ textTokens + imageCount * (visualTokenCount - 1) + audioTokenExpansion
439
+ );
396
440
  };
397
441
  const maxContextLength = this.nativeModule.getMaxContextLength();
398
442
  const messageHistoryWithPrompt =
@@ -497,12 +541,17 @@ function normalizeImagePath(path: string): string {
497
541
  * @returns Messages with image-bearing turns rewritten to structured content.
498
542
  */
499
543
  function messagesForChatTemplate(messages: Message[]): any[] {
500
- return messages.map((m) =>
501
- m.mediaPath && typeof m.content === 'string'
502
- ? {
503
- ...m,
504
- content: [{ type: 'image' }, { type: 'text', text: m.content }],
505
- }
506
- : m
507
- );
544
+ return messages.map((m) => {
545
+ if (typeof m.content !== 'string') return m;
546
+ const hasImage = !!m.mediaPath;
547
+ const hasAudio = !!m.audioWaveform;
548
+ if (!hasImage && !hasAudio) return m;
549
+ const parts: any[] = [];
550
+ if (hasImage) parts.push({ type: 'image' });
551
+ if (hasAudio) parts.push({ type: 'audio' });
552
+ parts.push({ type: 'text', text: m.content });
553
+ // Drop the Float32Array on the clone only — passing it into the Jinja
554
+ // template engine slows render past 3s. Don't mutate m;
555
+ return { ...m, content: parts, audioWaveform: undefined };
556
+ });
508
557
  }
@@ -58,11 +58,7 @@ export function useLLM({
58
58
  (async () => {
59
59
  try {
60
60
  await controllerInstance.load({
61
- modelSource: model.modelSource,
62
- tokenizerSource: model.tokenizerSource,
63
- tokenizerConfigSource: model.tokenizerConfigSource!,
64
- capabilities: model.capabilities,
65
- defaultGenerationConfig: model.generationConfig,
61
+ model: model,
66
62
  onDownloadProgressCallback: setDownloadProgress,
67
63
  });
68
64
  } catch (e) {
@@ -106,7 +102,10 @@ export function useLLM({
106
102
  );
107
103
 
108
104
  const sendMessage = useCallback(
109
- (message: string, media?: { imagePath?: string }) => {
105
+ (
106
+ message: string,
107
+ media?: { imagePath?: string; audioBuffer?: Float32Array }
108
+ ) => {
110
109
  setResponse('');
111
110
  return controllerInstance.sendMessage(message, media);
112
111
  },