react-native-executorch 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/libs/classes.jar +0 -0
- package/common/rnexecutorch/host_objects/JsiConversions.h +43 -0
- package/common/rnexecutorch/models/llm/LLM.cpp +55 -42
- package/common/rnexecutorch/models/llm/LLM.h +4 -3
- package/common/rnexecutorch/models/llm/Types.h +23 -0
- package/common/runner/base_llm_runner.cpp +10 -3
- package/common/runner/base_llm_runner.h +1 -0
- package/common/runner/constants.h +15 -1
- package/common/runner/encoders/audio_encoder.cpp +111 -0
- package/common/runner/encoders/audio_encoder.h +40 -0
- package/common/runner/encoders/vision_encoder.cpp +0 -1
- package/common/runner/irunner.h +5 -0
- package/common/runner/multimodal_decoder_runner.h +50 -1
- package/common/runner/multimodal_input.h +16 -1
- package/common/runner/multimodal_prefiller.cpp +374 -64
- package/common/runner/multimodal_prefiller.h +57 -6
- package/common/runner/multimodal_runner.cpp +19 -12
- package/common/runner/multimodal_runner.h +1 -1
- package/common/runner/sampler.cpp +111 -35
- package/common/runner/sampler.h +13 -5
- package/common/runner/text_decoder_runner.cpp +1 -4
- package/common/runner/text_decoder_runner.h +3 -2
- package/common/runner/text_prefiller.cpp +8 -8
- package/common/runner/text_prefiller.h +8 -1
- package/common/runner/text_runner.cpp +35 -9
- package/common/runner/text_token_generator.h +2 -3
- package/common/runner/util.h +0 -1
- package/lib/module/constants/llmDefaults.js +1 -1
- package/lib/module/constants/llmDefaults.js.map +1 -1
- package/lib/module/constants/modelRegistry.js +33 -2
- package/lib/module/constants/modelRegistry.js.map +1 -1
- package/lib/module/constants/modelUrls.js +43 -6
- package/lib/module/constants/modelUrls.js.map +1 -1
- package/lib/module/controllers/LLMController.js +69 -20
- package/lib/module/controllers/LLMController.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useLLM.js +1 -5
- package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
- package/lib/module/modules/natural_language_processing/LLMModule.js +12 -7
- package/lib/module/modules/natural_language_processing/LLMModule.js.map +1 -1
- package/lib/module/types/llm.js +11 -0
- package/lib/module/types/llm.js.map +1 -1
- package/lib/typescript/constants/llmDefaults.d.ts +1 -1
- package/lib/typescript/constants/llmDefaults.d.ts.map +1 -1
- package/lib/typescript/constants/modelRegistry.d.ts +28 -1
- package/lib/typescript/constants/modelRegistry.d.ts.map +1 -1
- package/lib/typescript/constants/modelUrls.d.ts +40 -12
- package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
- package/lib/typescript/controllers/LLMController.d.ts +7 -9
- package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts +6 -3
- package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts.map +1 -1
- package/lib/typescript/types/llm.d.ts +63 -36
- package/lib/typescript/types/llm.d.ts.map +1 -1
- package/package.json +1 -1
- package/react-native-executorch.podspec +6 -0
- package/src/constants/llmDefaults.ts +1 -1
- package/src/constants/modelRegistry.ts +34 -2
- package/src/constants/modelUrls.ts +47 -6
- package/src/controllers/LLMController.ts +89 -40
- package/src/hooks/natural_language_processing/useLLM.ts +5 -6
- package/src/modules/natural_language_processing/LLMModule.ts +19 -8
- package/src/types/llm.ts +64 -34
- package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
- package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
- package/third-party/include/executorch/ExecuTorch.h +2 -0
- package/third-party/include/executorch/ExecuTorchModule.h +46 -0
- package/third-party/include/executorch/extension/data_loader/buffer_data_loader.h +4 -3
- package/third-party/include/executorch/extension/data_loader/mman.h +46 -0
- package/third-party/include/executorch/extension/data_loader/mmap_data_loader.h +4 -0
- package/third-party/include/executorch/extension/data_loader/shared_ptr_data_loader.h +7 -3
- package/third-party/include/executorch/extension/module/module.h +47 -8
- package/third-party/include/executorch/extension/tensor/tensor_ptr.h +17 -5
- package/third-party/include/executorch/kernels/optimized/Functions.h +12 -0
- package/third-party/include/executorch/kernels/optimized/NativeFunctions.h +4 -0
- package/third-party/include/executorch/kernels/portable/Functions.h +18 -0
- package/third-party/include/executorch/kernels/portable/NativeFunctions.h +6 -0
- package/third-party/include/executorch/runtime/backend/backend_options_map.h +37 -0
- package/third-party/include/executorch/runtime/core/array_ref.h +3 -1
- package/third-party/include/executorch/runtime/core/error.h +1 -0
- package/third-party/include/executorch/runtime/core/evalue.h +256 -9
- package/third-party/include/executorch/runtime/core/exec_aten/exec_aten.h +24 -0
- package/third-party/include/executorch/runtime/core/hierarchical_allocator.h +9 -6
- package/third-party/include/executorch/runtime/core/portable_type/device.h +3 -4
- package/third-party/include/executorch/runtime/core/portable_type/tensor_impl.h +31 -1
- package/third-party/include/executorch/runtime/executor/method.h +9 -3
- package/third-party/include/executorch/runtime/executor/method_meta.h +14 -0
- package/third-party/include/executorch/runtime/executor/platform_memory_allocator.h +12 -2
- package/third-party/include/executorch/runtime/executor/program.h +3 -1
- package/third-party/include/executorch/runtime/executor/tensor_parser.h +5 -1
- package/third-party/include/executorch/runtime/kernel/operator_registry.h +9 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/mlx.metallib +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/mlx.metallib +0 -0
|
@@ -4,56 +4,76 @@ import { ResourceSource } from './common';
|
|
|
4
4
|
* Capabilities a multimodal LLM can have.
|
|
5
5
|
* @category Types
|
|
6
6
|
*/
|
|
7
|
-
export type LLMCapability = 'vision';
|
|
7
|
+
export type LLMCapability = 'vision' | 'audio';
|
|
8
8
|
/**
|
|
9
9
|
* Derives the media argument shape for `sendMessage` from a capabilities tuple.
|
|
10
10
|
* @category Types
|
|
11
11
|
*/
|
|
12
|
-
export type MediaArg<C extends readonly LLMCapability[]> = 'vision' extends C[number] ? {
|
|
12
|
+
export type MediaArg<C extends readonly LLMCapability[]> = ('vision' extends C[number] ? {
|
|
13
13
|
imagePath?: string;
|
|
14
|
-
} : object
|
|
14
|
+
} : object) & ('audio' extends C[number] ? {
|
|
15
|
+
audioBuffer?: Float32Array;
|
|
16
|
+
} : object);
|
|
15
17
|
/**
|
|
16
18
|
* Union of all built-in LLM model names.
|
|
17
19
|
* @category Types
|
|
18
20
|
*/
|
|
19
|
-
export type LLMModelName = 'llama-3.2-3b' | 'llama-3.2-3b-qlora' | 'llama-3.2-3b-spinquant' | 'llama-3.2-1b' | 'llama-3.2-1b-qlora' | 'llama-3.2-1b-spinquant' | 'qwen3-0.6b' | 'qwen3-0.6b-quantized' | 'qwen3-1.7b' | 'qwen3-1.7b-quantized' | 'qwen3-4b' | 'qwen3-4b-quantized' | 'hammer2.1-0.5b' | 'hammer2.1-0.5b-quantized' | 'hammer2.1-1.5b' | 'hammer2.1-1.5b-quantized' | 'hammer2.1-3b' | 'hammer2.1-3b-quantized' | 'smollm2.1-135m' | 'smollm2.1-135m-quantized' | 'smollm2.1-360m' | 'smollm2.1-360m-quantized' | 'smollm2.1-1.7b' | 'smollm2.1-1.7b-quantized' | 'qwen2.5-0.5b' | 'qwen2.5-0.5b-quantized' | 'qwen2.5-1.5b' | 'qwen2.5-1.5b-quantized' | 'qwen2.5-3b' | 'qwen2.5-3b-quantized' | 'phi-4-mini-4b' | 'phi-4-mini-4b-quantized' | 'lfm2.5-350m' | 'lfm2.5-350m-quantized' | 'lfm2.5-1.2b-instruct' | 'lfm2.5-1.2b-instruct-quantized' | 'lfm2.5-vl-1.6b-quantized' | 'lfm2.5-vl-450m-quantized' | 'qwen3.5-0.8b-quantized' | 'qwen3.5-2b-quantized' | 'bielik-v3.0-1.5b' | 'bielik-v3.0-1.5b-quantized';
|
|
21
|
+
export type LLMModelName = 'gemma4-e2b' | 'gemma4-e2b-multimodal' | 'llama-3.2-3b' | 'llama-3.2-3b-qlora' | 'llama-3.2-3b-spinquant' | 'llama-3.2-1b' | 'llama-3.2-1b-qlora' | 'llama-3.2-1b-spinquant' | 'qwen3-0.6b' | 'qwen3-0.6b-quantized' | 'qwen3-1.7b' | 'qwen3-1.7b-quantized' | 'qwen3-4b' | 'qwen3-4b-quantized' | 'hammer2.1-0.5b' | 'hammer2.1-0.5b-quantized' | 'hammer2.1-1.5b' | 'hammer2.1-1.5b-quantized' | 'hammer2.1-3b' | 'hammer2.1-3b-quantized' | 'smollm2.1-135m' | 'smollm2.1-135m-quantized' | 'smollm2.1-360m' | 'smollm2.1-360m-quantized' | 'smollm2.1-1.7b' | 'smollm2.1-1.7b-quantized' | 'qwen2.5-0.5b' | 'qwen2.5-0.5b-quantized' | 'qwen2.5-1.5b' | 'qwen2.5-1.5b-quantized' | 'qwen2.5-3b' | 'qwen2.5-3b-quantized' | 'phi-4-mini-4b' | 'phi-4-mini-4b-quantized' | 'lfm2.5-350m' | 'lfm2.5-350m-quantized' | 'lfm2.5-1.2b-instruct' | 'lfm2.5-1.2b-instruct-quantized' | 'lfm2.5-vl-1.6b-quantized' | 'lfm2.5-vl-450m-quantized' | 'qwen3.5-0.8b-quantized' | 'qwen3.5-2b-quantized' | 'bielik-v3.0-1.5b' | 'bielik-v3.0-1.5b-quantized';
|
|
22
|
+
/**
|
|
23
|
+
* Audio soft-token expansion constants for audio_encoder.
|
|
24
|
+
* @category Types
|
|
25
|
+
*/
|
|
26
|
+
export interface AudioConfig {
|
|
27
|
+
samplesPerBlock: number;
|
|
28
|
+
tokensPerBlock: number;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Properties defining LLMModel.
|
|
32
|
+
* @category Types
|
|
33
|
+
*/
|
|
34
|
+
export interface LLMModel {
|
|
35
|
+
/**
|
|
36
|
+
* The built-in model name (e.g. `'llama-3.2-3b'`). Used for telemetry and hook reload triggers.
|
|
37
|
+
* Pass one of the pre-built LLM constants (e.g. `LLAMA3_2_3B`) to populate all required fields.
|
|
38
|
+
*/
|
|
39
|
+
modelName: LLMModelName;
|
|
40
|
+
/**
|
|
41
|
+
* `ResourceSource` that specifies the location of the model binary.
|
|
42
|
+
*/
|
|
43
|
+
modelSource: ResourceSource;
|
|
44
|
+
/**
|
|
45
|
+
* `ResourceSource` pointing to the JSON file which contains the tokenizer.
|
|
46
|
+
*/
|
|
47
|
+
tokenizerSource: ResourceSource;
|
|
48
|
+
/**
|
|
49
|
+
* `ResourceSource` pointing to the JSON file which contains the tokenizer config.
|
|
50
|
+
*/
|
|
51
|
+
tokenizerConfigSource: ResourceSource;
|
|
52
|
+
/**
|
|
53
|
+
* Optional list of modality capabilities the model supports.
|
|
54
|
+
* Determines the type of the `media` argument in `sendMessage`.
|
|
55
|
+
* Example: `['vision']` enables `sendMessage(text, { imagePath })`.
|
|
56
|
+
*/
|
|
57
|
+
capabilities?: readonly LLMCapability[];
|
|
58
|
+
/**
|
|
59
|
+
* Recommended default generation settings, typically copied from the
|
|
60
|
+
* upstream `generation_config.json` or the model card. Applied automatically
|
|
61
|
+
* after the native module loads and before any user `configure()` call,
|
|
62
|
+
* so callers only need to override the values they want to change.
|
|
63
|
+
*/
|
|
64
|
+
generationConfig?: GenerationConfig;
|
|
65
|
+
/**
|
|
66
|
+
* Defines config for audio input modality for multimodal LLMs.
|
|
67
|
+
* `capabilities` must include 'audio'.
|
|
68
|
+
*/
|
|
69
|
+
audioConfig?: AudioConfig;
|
|
70
|
+
}
|
|
20
71
|
/**
|
|
21
72
|
* Properties for initializing and configuring a Large Language Model (LLM) instance.
|
|
22
73
|
* @category Types
|
|
23
74
|
*/
|
|
24
75
|
export interface LLMProps {
|
|
25
|
-
model:
|
|
26
|
-
/**
|
|
27
|
-
* The built-in model name (e.g. `'llama-3.2-3b'`). Used for telemetry and hook reload triggers.
|
|
28
|
-
* Pass one of the pre-built LLM constants (e.g. `LLAMA3_2_3B`) to populate all required fields.
|
|
29
|
-
*/
|
|
30
|
-
modelName: LLMModelName;
|
|
31
|
-
/**
|
|
32
|
-
* `ResourceSource` that specifies the location of the model binary.
|
|
33
|
-
*/
|
|
34
|
-
modelSource: ResourceSource;
|
|
35
|
-
/**
|
|
36
|
-
* `ResourceSource` pointing to the JSON file which contains the tokenizer.
|
|
37
|
-
*/
|
|
38
|
-
tokenizerSource: ResourceSource;
|
|
39
|
-
/**
|
|
40
|
-
* `ResourceSource` pointing to the JSON file which contains the tokenizer config.
|
|
41
|
-
*/
|
|
42
|
-
tokenizerConfigSource: ResourceSource;
|
|
43
|
-
/**
|
|
44
|
-
* Optional list of modality capabilities the model supports.
|
|
45
|
-
* Determines the type of the `media` argument in `sendMessage`.
|
|
46
|
-
* Example: `['vision']` enables `sendMessage(text, { imagePath })`.
|
|
47
|
-
*/
|
|
48
|
-
capabilities?: readonly LLMCapability[];
|
|
49
|
-
/**
|
|
50
|
-
* Recommended default generation settings, typically copied from the
|
|
51
|
-
* upstream `generation_config.json` or the model card. Applied automatically
|
|
52
|
-
* after the native module loads and before any user `configure()` call,
|
|
53
|
-
* so callers only need to override the values they want to change.
|
|
54
|
-
*/
|
|
55
|
-
generationConfig?: GenerationConfig;
|
|
56
|
-
};
|
|
76
|
+
model: LLMModel;
|
|
57
77
|
/**
|
|
58
78
|
* Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
|
|
59
79
|
*/
|
|
@@ -224,6 +244,12 @@ export interface Message {
|
|
|
224
244
|
* controller normalizes the path before passing it to native code.
|
|
225
245
|
*/
|
|
226
246
|
mediaPath?: string;
|
|
247
|
+
/**
|
|
248
|
+
* Optional fp32 mono 16 kHz PCM buffer. Only valid on `user` messages for
|
|
249
|
+
* models with the `'audio'` capability. The controller forwards it to the
|
|
250
|
+
* native `generateMultimodal` path.
|
|
251
|
+
*/
|
|
252
|
+
audioWaveform?: Float32Array;
|
|
227
253
|
}
|
|
228
254
|
/**
|
|
229
255
|
* Represents a tool call made by the model.
|
|
@@ -309,6 +335,7 @@ export interface ContextStrategy {
|
|
|
309
335
|
export declare const SPECIAL_TOKENS: {
|
|
310
336
|
BOS_TOKEN: string;
|
|
311
337
|
EOS_TOKEN: string;
|
|
338
|
+
EOT_TOKEN: string;
|
|
312
339
|
UNK_TOKEN: string;
|
|
313
340
|
SEP_TOKEN: string;
|
|
314
341
|
PAD_TOKEN: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../../src/types/llm.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAE1C;;;GAGG;AACH,MAAM,MAAM,aAAa,GAAG,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../../src/types/llm.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAE1C;;;GAGG;AACH,MAAM,MAAM,aAAa,GAAG,QAAQ,GAAG,OAAO,CAAC;AAE/C;;;GAGG;AACH,MAAM,MAAM,QAAQ,CAAC,CAAC,SAAS,SAAS,aAAa,EAAE,IACrD,CAAC,QAAQ,SAAS,CAAC,CAAC,MAAM,CAAC,GAAG;IAAE,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,MAAM,CAAC,GAC5D,CAAC,OAAO,SAAS,CAAC,CAAC,MAAM,CAAC,GAAG;IAAE,WAAW,CAAC,EAAE,YAAY,CAAA;CAAE,GAAG,MAAM,CAAC,CAAC;AAE1E;;;GAGG;AACH,MAAM,MAAM,YAAY,GACpB,YAAY,GACZ,uBAAuB,GACvB,cAAc,GACd,oBAAoB,GACpB,wBAAwB,GACxB,cAAc,GACd,oBAAoB,GACpB,wBAAwB,GACxB,YAAY,GACZ,sBAAsB,GACtB,YAAY,GACZ,sBAAsB,GACtB,UAAU,GACV,oBAAoB,GACpB,gBAAgB,GAChB,0BAA0B,GAC1B,gBAAgB,GAChB,0BAA0B,GAC1B,cAAc,GACd,wBAAwB,GACxB,gBAAgB,GAChB,0BAA0B,GAC1B,gBAAgB,GAChB,0BAA0B,GAC1B,gBAAgB,GAChB,0BAA0B,GAC1B,cAAc,GACd,wBAAwB,GACxB,cAAc,GACd,wBAAwB,GACxB,YAAY,GACZ,sBAAsB,GACtB,eAAe,GACf,yBAAyB,GACzB,aAAa,GACb,uBAAuB,GACvB,sBAAsB,GACtB,gCAAgC,GAChC,0BAA0B,GAC1B,0BAA0B,GAC1B,wBAAwB,GACxB,sBAAsB,GACtB,kBAAkB,GAClB,4BAA4B,CAAC;AAEjC;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB;AAED;;;GAGG;AACH,MAAM,WAAW,QAAQ;IACvB;;;OAGG;IACH,SAAS,EAAE,YAAY,CAAC;IACxB;;OAEG;IACH,WAAW,EAAE,cAAc,CAAC;IAC5B;;OAEG;IACH,eAAe,EAAE,cAAc,CAAC;IAChC;;OAEG;IACH,qBAAqB,EAAE,cAAc,CAAC;IACtC;;;;OAIG;IACH,YAAY,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;IACxC;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC;;;OAGG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;CAC3B;AAED;;;GAGG;AACH,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,QAAQ,CAAC;IAChB;;OAEG;IACH,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B;;OAEG;IACH,cAAc,EAAE,OAAO,EAAE,CAAC;IAE1B;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;OAEG;IACH,OAAO,EAAE,OAAO,CAAC;IAEjB;;OAEG;IACH,YAAY,EAAE,OAAO,CAAC;IAEtB;;OAEG;IACH,gBAAgB,EAAE,MAAM,CAAC;IAEzB;;OAEG;IACH,KAAK,EAAE,iBAAiB,GAAG,IAAI,CAAC;IAEhC;;;;OAIG;IACH,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,WAAW,EAAE,gBAAgB,EAAE,EAAE,SAAS,KAAK,IAAI,CAAC;IAE9E;;;OAGG;IACH,sBAAsB,EAAE,MAAM,MAAM,CAAC;IACrC;;;;;;OAMG;IACH,QAAQ,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,KAAK,CAAC,EAAE,OAAO,EAAE,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;IACtE;;;OAGG;IACH,kBAAkB,EAAE,MAAM,MAAM,CAAC;IACjC;;;OAGG;IACH,mBAAmB,EAAE,MAAM,MAAM,CAAC;IAElC;;;OAGG;IACH,aAAa,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IAEvC;;OAEG;IACH,SAAS,EAAE,MAAM,IAAI,CAAC;CACvB;AAED;;;;GAIG;AACH,MAAM,WAAW,iBAAiB,CAChC,CAAC,SAAS,SAAS,aAAa,EAAE,GAAG,SAAS,aAAa,EAAE,CAC7D,SAAQ,WAAW;IACnB;;;;;;;OAOG;IACH,WAAW,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACxE;AAED;;;;GAIG;AACH,MAAM,WAAW,OAAQ,SAAQ,WAAW;IAC1C;;;;;OAKG;IACH,WAAW,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACnD;AAED;;;GAGG;AACH,MAAM,WAAW,SAAS;IACxB;;;;;;;;OAQG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;IAEjC;;;;;;;;OAQG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;IAE1B;;;;;;;;;;;;;;OAcG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACrC;AAED;;;GAGG;AACH,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;AAE1D;;;;;GAKG;AACH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,WAAW,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;OAIG;IACH,aAAa,CAAC,EAAE,YAAY,CAAC;CAC9B;AAED;;;;;GAKG;AACH,MAAM,WAAW,QAAQ;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;;;GAKG;AACH,MAAM,MAAM,OAAO,GAAG,MAAM,CAAC;AAE7B;;;;;;GAMG;AACH,MAAM,WAAW,UAAU;IACzB,qBAAqB,EAAE,OAAO,EAAE,CAAC;IACjC,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,eAAe,CAAC;CAClC;AAED;;;;;;GAMG;AACH,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,OAAO,EAAE,CAAC;IACjB,mBAAmB,EAAE,CAAC,IAAI,EAAE,QAAQ,KAAK,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAChE,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;;;;;;;;;GAUG;AACH,MAAM,WAAW,gBAAgB;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,sCAAsC;IACtC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED;;;GAGG;AACH,MAAM,WAAW,eAAe;IAC9B;;;;;;;OAOG;IACH,YAAY,CACV,YAAY,EAAE,MAAM,EACpB,OAAO,EAAE,OAAO,EAAE,EAClB,gBAAgB,EAAE,MAAM,EACxB,aAAa,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,MAAM,GAC7C,OAAO,EAAE,CAAC;CACd;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc;;;;;;;;;CAS1B,CAAC"}
|
package/package.json
CHANGED
|
@@ -62,6 +62,12 @@ Pod::Spec.new do |s|
|
|
|
62
62
|
|
|
63
63
|
s.libraries = "z"
|
|
64
64
|
s.ios.vendored_frameworks = "third-party/ios/ExecutorchLib.xcframework"
|
|
65
|
+
|
|
66
|
+
# NOTE: mlx.metallib (the MLX GPU kernels) is bundled INSIDE
|
|
67
|
+
# ExecutorchLib.framework, colocated with the binary that contains the MLX
|
|
68
|
+
# code. MLX's runtime loader resolves the metallib relative to that binary
|
|
69
|
+
# (via dladdr), so it must live next to it in the framework — not at the app
|
|
70
|
+
# bundle root.
|
|
65
71
|
# Exclude file with tests to not introduce gtest dependency.
|
|
66
72
|
# Do not include the headers from common/rnexecutorch/jsi/ as source files.
|
|
67
73
|
# Xcode/Cocoapods leaks them to other pods that an app also depends on, so if
|
|
@@ -6,7 +6,7 @@ import { SlidingWindowContextStrategy } from '../utils/llms/context_strategy';
|
|
|
6
6
|
* @category Utilities - LLM
|
|
7
7
|
*/
|
|
8
8
|
export const DEFAULT_SYSTEM_PROMPT =
|
|
9
|
-
"You are a knowledgeable, efficient, and direct AI assistant. Provide concise answers, focusing on the key information needed. Offer suggestions tactfully when appropriate to improve outcomes. Engage in productive collaboration with the user. Don't return too much text.";
|
|
9
|
+
"You are a knowledgeable, efficient, and direct AI assistant. Provide concise answers, focusing on the key information needed. Offer suggestions tactfully when appropriate to improve outcomes. Engage in productive collaboration with the user. Don't return too much text. If provided with audio samples treat it with at most importance";
|
|
10
10
|
|
|
11
11
|
/**
|
|
12
12
|
* Generates a default structured output prompt based on the provided JSON schema.
|
|
@@ -38,7 +38,7 @@ import { RnExecutorchErrorCode } from '../errors/ErrorCodes';
|
|
|
38
38
|
* compile-time error.
|
|
39
39
|
* @category Utils
|
|
40
40
|
*/
|
|
41
|
-
export type Backend = 'xnnpack' | 'coreml' | 'vulkan' | 'qnn';
|
|
41
|
+
export type Backend = 'xnnpack' | 'coreml' | 'vulkan' | 'qnn' | 'mlx';
|
|
42
42
|
|
|
43
43
|
/**
|
|
44
44
|
* Options for a `models` accessor call.
|
|
@@ -78,7 +78,7 @@ type ConfigOf<V> = Extract<
|
|
|
78
78
|
>;
|
|
79
79
|
type BackendsOf<V> = Extract<keyof V, Backend>;
|
|
80
80
|
|
|
81
|
-
const BACKEND_ORDER: Backend[] = ['xnnpack', 'coreml', 'vulkan', 'qnn'];
|
|
81
|
+
const BACKEND_ORDER: Backend[] = ['xnnpack', 'coreml', 'mlx', 'vulkan', 'qnn'];
|
|
82
82
|
|
|
83
83
|
function firstBackend(variants: AnyVariantMap): Backend {
|
|
84
84
|
for (const b of BACKEND_ORDER) {
|
|
@@ -181,6 +181,33 @@ function tts<C extends TextToSpeechModelConfig>(c: C): () => C {
|
|
|
181
181
|
// Per-backend variant maps for models that ship more than one backend.
|
|
182
182
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
183
183
|
|
|
184
|
+
const GEMMA4_E2B_VARIANTS = {
|
|
185
|
+
mlx: {
|
|
186
|
+
base: {
|
|
187
|
+
modelName: 'gemma4-e2b' as const,
|
|
188
|
+
modelSource: M.GEMMA4_E2B_MLX_MODEL,
|
|
189
|
+
tokenizerSource: M.GEMMA4_E2B_TOKENIZER,
|
|
190
|
+
tokenizerConfigSource: M.GEMMA4_E2B_TOKENIZER_CONFIG,
|
|
191
|
+
},
|
|
192
|
+
},
|
|
193
|
+
xnnpack: {
|
|
194
|
+
base: {
|
|
195
|
+
modelName: 'gemma4-e2b' as const,
|
|
196
|
+
modelSource: M.GEMMA4_E2B_XNNPACK_MODEL,
|
|
197
|
+
tokenizerSource: M.GEMMA4_E2B_TOKENIZER,
|
|
198
|
+
tokenizerConfigSource: M.GEMMA4_E2B_TOKENIZER_CONFIG,
|
|
199
|
+
},
|
|
200
|
+
},
|
|
201
|
+
vulkan: {
|
|
202
|
+
base: {
|
|
203
|
+
modelName: 'gemma4-e2b' as const,
|
|
204
|
+
modelSource: M.GEMMA4_E2B_VULKAN_MODEL,
|
|
205
|
+
tokenizerSource: M.GEMMA4_E2B_TOKENIZER,
|
|
206
|
+
tokenizerConfigSource: M.GEMMA4_E2B_TOKENIZER_CONFIG,
|
|
207
|
+
},
|
|
208
|
+
},
|
|
209
|
+
};
|
|
210
|
+
|
|
184
211
|
const EFFICIENTNET_V2_S_VARIANTS = {
|
|
185
212
|
xnnpack: {
|
|
186
213
|
base: {
|
|
@@ -496,10 +523,15 @@ export const models = {
|
|
|
496
523
|
M.LFM2_5_1_2B_INSTRUCT_QUANTIZED
|
|
497
524
|
),
|
|
498
525
|
bielik_v3_0_1_5b: pair(M.BIELIK_V3_0_1_5B, M.BIELIK_V3_0_1_5B_QUANTIZED),
|
|
526
|
+
gemma4_e2b: variant(GEMMA4_E2B_VARIANTS, {
|
|
527
|
+
ios: 'mlx',
|
|
528
|
+
android: 'vulkan',
|
|
529
|
+
}),
|
|
499
530
|
// Multimodal LLMs — same hook/module as plain LLMs, listed here so users
|
|
500
531
|
// pick a model by capability ("LLM") rather than by modality.
|
|
501
532
|
lfm2_5_vl_1_6b: base(M.LFM2_5_VL_1_6B_QUANTIZED),
|
|
502
533
|
lfm2_5_vl_450m: base(M.LFM2_5_VL_450M_QUANTIZED),
|
|
534
|
+
gemma4_e2b_multimodal: base(M.GEMMA4_E2B_MM),
|
|
503
535
|
},
|
|
504
536
|
classification: {
|
|
505
537
|
efficientnet_v2_s: variant(EFFICIENTNET_V2_S_VARIANTS),
|
|
@@ -125,6 +125,47 @@ export const QWEN3_0_6B_QUANTIZED = {
|
|
|
125
125
|
generationConfig: QWEN3_GENERATION_CONFIG,
|
|
126
126
|
} as const;
|
|
127
127
|
|
|
128
|
+
// GEMMA 4 — separate HF repo; tokenizer files live at the e2b root and are
|
|
129
|
+
// shared by all backend variants.
|
|
130
|
+
const GEMMA4_E2B_PREFIX = `${URL_PREFIX}-gemma-4/${VERSION_TAG}/e2b`;
|
|
131
|
+
export const GEMMA4_E2B_MLX_MODEL = `${GEMMA4_E2B_PREFIX}/mlx/gemma4_e2b_mlx_int4.pte`;
|
|
132
|
+
export const GEMMA4_E2B_XNNPACK_MODEL = `${GEMMA4_E2B_PREFIX}/xnnpack/gemma_4_e2b_xnnpack_8da4w.pte`;
|
|
133
|
+
export const GEMMA4_E2B_VULKAN_MODEL = `${GEMMA4_E2B_PREFIX}/vulkan/gemma_4_e2b_vulkan_8da4w.pte`;
|
|
134
|
+
export const GEMMA4_E2B_TOKENIZER = `${GEMMA4_E2B_PREFIX}/tokenizer.json`;
|
|
135
|
+
export const GEMMA4_E2B_TOKENIZER_CONFIG = `${GEMMA4_E2B_PREFIX}/tokenizer_config.json`;
|
|
136
|
+
|
|
137
|
+
const GEMMA4_E2B_MODEL =
|
|
138
|
+
Platform.OS === `android` ? GEMMA4_E2B_VULKAN_MODEL : GEMMA4_E2B_MLX_MODEL;
|
|
139
|
+
|
|
140
|
+
const GEMMA4_E2B_MLX_MM = `${URL_PREFIX}-gemma-4-multimodal/${VERSION_TAG}/e2b/mlx/gemma4_e2b_mlx_int4.pte`;
|
|
141
|
+
const GEMMA4_E2B_VULKAN_MM = `${URL_PREFIX}-gemma-4-multimodal/${VERSION_TAG}/e2b/vulkan/gemma_4_e2b_vulkan_8da4w.pte`;
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* @category Models - LLM
|
|
145
|
+
*/
|
|
146
|
+
export const GEMMA4_E2B = {
|
|
147
|
+
modelName: 'gemma4-e2b',
|
|
148
|
+
modelSource: GEMMA4_E2B_MODEL,
|
|
149
|
+
tokenizerSource: GEMMA4_E2B_TOKENIZER,
|
|
150
|
+
tokenizerConfigSource: GEMMA4_E2B_TOKENIZER_CONFIG,
|
|
151
|
+
} as const;
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* @category Models - LLM Multimodal
|
|
155
|
+
*/
|
|
156
|
+
export const GEMMA4_E2B_MM = {
|
|
157
|
+
modelName: 'gemma4-e2b-multimodal',
|
|
158
|
+
modelSource:
|
|
159
|
+
Platform.OS === `android` ? GEMMA4_E2B_VULKAN_MM : GEMMA4_E2B_MLX_MM,
|
|
160
|
+
tokenizerSource: GEMMA4_E2B_TOKENIZER,
|
|
161
|
+
tokenizerConfigSource: GEMMA4_E2B_TOKENIZER_CONFIG,
|
|
162
|
+
capabilities: ['vision', 'audio'],
|
|
163
|
+
audioConfig: {
|
|
164
|
+
samplesPerBlock: 7680,
|
|
165
|
+
tokensPerBlock: 12,
|
|
166
|
+
},
|
|
167
|
+
} as const;
|
|
168
|
+
|
|
128
169
|
/**
|
|
129
170
|
* @category Models - LLM
|
|
130
171
|
*/
|
|
@@ -816,27 +857,27 @@ export const STYLE_TRANSFER_UDNIE_QUANTIZED = {
|
|
|
816
857
|
// S2T
|
|
817
858
|
export const WHISPER_TINY_EN_TOKENIZER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/tokenizer.json`;
|
|
818
859
|
export const WHISPER_TINY_EN_MODEL_XNNPACK = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/xnnpack/whisper_tiny_en_xnnpack_fp32.pte`;
|
|
819
|
-
export const WHISPER_TINY_EN_MODEL_COREML = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/coreml/
|
|
860
|
+
export const WHISPER_TINY_EN_MODEL_COREML = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/coreml/whisper_tiny_en_coreml_fp16.pte`;
|
|
820
861
|
|
|
821
862
|
export const WHISPER_BASE_EN_TOKENIZER = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/tokenizer.json`;
|
|
822
863
|
export const WHISPER_BASE_EN_MODEL_XNNPACK = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/xnnpack/whisper_base_en_xnnpack_fp32.pte`;
|
|
823
|
-
export const WHISPER_BASE_EN_MODEL_COREML = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/coreml/
|
|
864
|
+
export const WHISPER_BASE_EN_MODEL_COREML = `${URL_PREFIX}-whisper-base.en/${VERSION_TAG}/coreml/whisper_base_en_coreml_fp16.pte`;
|
|
824
865
|
|
|
825
866
|
export const WHISPER_SMALL_EN_TOKENIZER = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/tokenizer.json`;
|
|
826
867
|
export const WHISPER_SMALL_EN_MODEL_XNNPACK = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/xnnpack/whisper_small_en_xnnpack_fp32.pte`;
|
|
827
|
-
export const WHISPER_SMALL_EN_MODEL_COREML = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/coreml/
|
|
868
|
+
export const WHISPER_SMALL_EN_MODEL_COREML = `${URL_PREFIX}-whisper-small.en/${VERSION_TAG}/coreml/whisper_small_en_coreml_fp16.pte`;
|
|
828
869
|
|
|
829
870
|
export const WHISPER_TINY_TOKENIZER = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/tokenizer.json`;
|
|
830
871
|
export const WHISPER_TINY_MODEL_XNNPACK = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/xnnpack/whisper_tiny_xnnpack_fp32.pte`;
|
|
831
|
-
export const WHISPER_TINY_MODEL_COREML = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/coreml/
|
|
872
|
+
export const WHISPER_TINY_MODEL_COREML = `${URL_PREFIX}-whisper-tiny/${VERSION_TAG}/coreml/whisper_tiny_coreml_fp16.pte`;
|
|
832
873
|
|
|
833
874
|
export const WHISPER_BASE_TOKENIZER = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/tokenizer.json`;
|
|
834
875
|
export const WHISPER_BASE_MODEL_XNNPACK = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/xnnpack/whisper_base_xnnpack_fp32.pte`;
|
|
835
|
-
export const WHISPER_BASE_MODEL_COREML = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/coreml/
|
|
876
|
+
export const WHISPER_BASE_MODEL_COREML = `${URL_PREFIX}-whisper-base/${VERSION_TAG}/coreml/whisper_base_coreml_fp16.pte`;
|
|
836
877
|
|
|
837
878
|
export const WHISPER_SMALL_TOKENIZER = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/tokenizer.json`;
|
|
838
879
|
export const WHISPER_SMALL_MODEL_XNNPACK = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/xnnpack/whisper_small_xnnpack_fp32.pte`;
|
|
839
|
-
export const WHISPER_SMALL_MODEL_COREML = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/coreml/
|
|
880
|
+
export const WHISPER_SMALL_MODEL_COREML = `${URL_PREFIX}-whisper-small/${VERSION_TAG}/coreml/whisper_small_coreml_fp16.pte`;
|
|
840
881
|
|
|
841
882
|
/**
|
|
842
883
|
* @category Models - Speech To Text
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { ResourceSource } from '../types/common';
|
|
2
1
|
import { ResourceFetcher } from '../utils/ResourceFetcher';
|
|
3
2
|
import { Template } from '@huggingface/jinja';
|
|
4
3
|
import { DEFAULT_CHAT_CONFIG } from '../constants/llmDefaults';
|
|
5
4
|
import {
|
|
5
|
+
AudioConfig,
|
|
6
6
|
ChatConfig,
|
|
7
7
|
GenerationConfig,
|
|
8
|
-
|
|
8
|
+
LLMModel,
|
|
9
9
|
LLMTool,
|
|
10
10
|
Message,
|
|
11
11
|
SPECIAL_TOKENS,
|
|
@@ -30,6 +30,7 @@ export class LLMController {
|
|
|
30
30
|
private messageHistoryCallback: (messageHistory: Message[]) => void;
|
|
31
31
|
private isReadyCallback: (isReady: boolean) => void;
|
|
32
32
|
private isGeneratingCallback: (isGenerating: boolean) => void;
|
|
33
|
+
private audioConfig: AudioConfig | undefined;
|
|
33
34
|
|
|
34
35
|
constructor({
|
|
35
36
|
tokenCallback,
|
|
@@ -72,18 +73,10 @@ export class LLMController {
|
|
|
72
73
|
}
|
|
73
74
|
|
|
74
75
|
public async load({
|
|
75
|
-
|
|
76
|
-
tokenizerSource,
|
|
77
|
-
tokenizerConfigSource,
|
|
78
|
-
capabilities,
|
|
79
|
-
defaultGenerationConfig,
|
|
76
|
+
model,
|
|
80
77
|
onDownloadProgressCallback,
|
|
81
78
|
}: {
|
|
82
|
-
|
|
83
|
-
tokenizerSource: ResourceSource;
|
|
84
|
-
tokenizerConfigSource: ResourceSource;
|
|
85
|
-
capabilities?: readonly LLMCapability[];
|
|
86
|
-
defaultGenerationConfig?: GenerationConfig;
|
|
79
|
+
model: LLMModel;
|
|
87
80
|
onDownloadProgressCallback?: (downloadProgress: number) => void;
|
|
88
81
|
}) {
|
|
89
82
|
// reset inner state when loading new model
|
|
@@ -94,13 +87,13 @@ export class LLMController {
|
|
|
94
87
|
try {
|
|
95
88
|
const tokenizersPromise = ResourceFetcher.fetch(
|
|
96
89
|
undefined,
|
|
97
|
-
tokenizerSource,
|
|
98
|
-
tokenizerConfigSource
|
|
90
|
+
model.tokenizerSource,
|
|
91
|
+
model.tokenizerConfigSource
|
|
99
92
|
);
|
|
100
93
|
|
|
101
94
|
const modelPromise = ResourceFetcher.fetch(
|
|
102
95
|
onDownloadProgressCallback,
|
|
103
|
-
modelSource
|
|
96
|
+
model.modelSource
|
|
104
97
|
);
|
|
105
98
|
|
|
106
99
|
const [tokenizersResults, modelResult] = await Promise.all([
|
|
@@ -124,16 +117,18 @@ export class LLMController {
|
|
|
124
117
|
this.nativeModule.unload();
|
|
125
118
|
}
|
|
126
119
|
|
|
120
|
+
this.audioConfig = model.audioConfig;
|
|
121
|
+
|
|
127
122
|
this.nativeModule = await global.loadLLM(
|
|
128
123
|
modelPath,
|
|
129
124
|
tokenizerPath,
|
|
130
|
-
capabilities ?? []
|
|
125
|
+
model.capabilities ?? []
|
|
131
126
|
);
|
|
132
|
-
if (
|
|
127
|
+
if (model.generationConfig) {
|
|
133
128
|
// Apply model-specific recommended sampling defaults before flipping
|
|
134
129
|
// isReady so callers that react to it see the right config on first
|
|
135
130
|
// send. User-provided `configure()` calls still override these.
|
|
136
|
-
this.applyGenerationConfig(
|
|
131
|
+
this.applyGenerationConfig(model.generationConfig);
|
|
137
132
|
}
|
|
138
133
|
this.isReadyCallback(true);
|
|
139
134
|
this.onToken = (data: string) => {
|
|
@@ -236,6 +231,17 @@ export class LLMController {
|
|
|
236
231
|
return token;
|
|
237
232
|
}
|
|
238
233
|
|
|
234
|
+
private getAudioToken(): string {
|
|
235
|
+
const token = this.tokenizerConfig.audio_token;
|
|
236
|
+
if (!token) {
|
|
237
|
+
throw new RnExecutorchError(
|
|
238
|
+
RnExecutorchErrorCode.InvalidConfig,
|
|
239
|
+
"Tokenizer config is missing 'audio_token'. Audio-capable models require tokenizerConfigSource with an 'audio_token' field."
|
|
240
|
+
);
|
|
241
|
+
}
|
|
242
|
+
return token;
|
|
243
|
+
}
|
|
244
|
+
|
|
239
245
|
private filterSpecialTokens(text: string): string {
|
|
240
246
|
let filtered = text;
|
|
241
247
|
if (
|
|
@@ -244,6 +250,12 @@ export class LLMController {
|
|
|
244
250
|
) {
|
|
245
251
|
filtered = filtered.replaceAll(this.tokenizerConfig.eos_token, '');
|
|
246
252
|
}
|
|
253
|
+
if (
|
|
254
|
+
SPECIAL_TOKENS.EOT_TOKEN in this.tokenizerConfig &&
|
|
255
|
+
this.tokenizerConfig.eot_token
|
|
256
|
+
) {
|
|
257
|
+
filtered = filtered.replaceAll(this.tokenizerConfig.eot_token, '');
|
|
258
|
+
}
|
|
247
259
|
if (
|
|
248
260
|
SPECIAL_TOKENS.PAD_TOKEN in this.tokenizerConfig &&
|
|
249
261
|
this.tokenizerConfig.pad_token
|
|
@@ -269,25 +281,37 @@ export class LLMController {
|
|
|
269
281
|
this.isGeneratingCallback(false);
|
|
270
282
|
}
|
|
271
283
|
|
|
272
|
-
public async forward(
|
|
284
|
+
public async forward(
|
|
285
|
+
input: string,
|
|
286
|
+
imagePaths?: string[],
|
|
287
|
+
audioWaveforms?: Float32Array[]
|
|
288
|
+
): Promise<string> {
|
|
273
289
|
if (!this._isReady) {
|
|
274
290
|
throw new RnExecutorchError(RnExecutorchErrorCode.ModuleNotLoaded);
|
|
275
291
|
}
|
|
276
292
|
if (this._isGenerating) {
|
|
277
293
|
throw new RnExecutorchError(RnExecutorchErrorCode.ModelGenerating);
|
|
278
294
|
}
|
|
295
|
+
const hasImages = !!imagePaths && imagePaths.length > 0;
|
|
296
|
+
const hasAudio = !!audioWaveforms && audioWaveforms.length > 0;
|
|
279
297
|
try {
|
|
280
298
|
this.isGeneratingCallback(true);
|
|
281
299
|
this.nativeModule.reset();
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
)
|
|
290
|
-
|
|
300
|
+
let response: string;
|
|
301
|
+
if (hasImages || hasAudio) {
|
|
302
|
+
response = await this.nativeModule.generateMultimodal(
|
|
303
|
+
input,
|
|
304
|
+
this.onToken,
|
|
305
|
+
{
|
|
306
|
+
imagePaths: hasImages ? imagePaths!.map(normalizeImagePath) : null,
|
|
307
|
+
imageToken: hasImages ? this.getImageToken() : null,
|
|
308
|
+
audioWaveforms: hasAudio ? audioWaveforms! : null,
|
|
309
|
+
audioToken: hasAudio ? this.getAudioToken() : null,
|
|
310
|
+
}
|
|
311
|
+
);
|
|
312
|
+
} else {
|
|
313
|
+
response = await this.nativeModule.generate(input, this.onToken);
|
|
314
|
+
}
|
|
291
315
|
return this.filterSpecialTokens(response);
|
|
292
316
|
} catch (e) {
|
|
293
317
|
throw parseUnknownError(e);
|
|
@@ -355,7 +379,9 @@ export class LLMController {
|
|
|
355
379
|
const imagePaths = messages
|
|
356
380
|
.filter((m) => m.mediaPath)
|
|
357
381
|
.map((m) => m.mediaPath!);
|
|
358
|
-
|
|
382
|
+
const audioWaveforms = messages
|
|
383
|
+
.filter((m) => m.audioWaveform)
|
|
384
|
+
.map((m) => m.audioWaveform!);
|
|
359
385
|
const renderedChat: string = this.applyChatTemplate(
|
|
360
386
|
messages,
|
|
361
387
|
this.tokenizerConfig,
|
|
@@ -365,19 +391,22 @@ export class LLMController {
|
|
|
365
391
|
|
|
366
392
|
return await this.forward(
|
|
367
393
|
renderedChat,
|
|
368
|
-
imagePaths.length > 0 ? imagePaths : undefined
|
|
394
|
+
imagePaths.length > 0 ? imagePaths : undefined,
|
|
395
|
+
audioWaveforms.length > 0 ? audioWaveforms : undefined
|
|
369
396
|
);
|
|
370
397
|
}
|
|
371
398
|
|
|
372
399
|
public async sendMessage(
|
|
373
400
|
message: string,
|
|
374
|
-
media?: { imagePath?: string }
|
|
401
|
+
media?: { imagePath?: string; audioBuffer?: Float32Array }
|
|
375
402
|
): Promise<string> {
|
|
376
403
|
const mediaPath = media?.imagePath;
|
|
404
|
+
const audioBuffer = media?.audioBuffer;
|
|
377
405
|
const newMessage: Message = {
|
|
378
406
|
content: message,
|
|
379
407
|
role: 'user',
|
|
380
408
|
...(mediaPath ? { mediaPath } : {}),
|
|
409
|
+
...(audioBuffer ? { audioWaveform: audioBuffer } : {}),
|
|
381
410
|
};
|
|
382
411
|
const updatedHistory = [...this._messageHistory, newMessage];
|
|
383
412
|
this.messageHistoryCallback(updatedHistory);
|
|
@@ -392,7 +421,22 @@ export class LLMController {
|
|
|
392
421
|
);
|
|
393
422
|
const textTokens = this.nativeModule.countTextTokens(rendered);
|
|
394
423
|
const imageCount = messages.filter((m) => m.mediaPath).length;
|
|
395
|
-
|
|
424
|
+
// Audio soft-token expansion: audio_encoder pads samples to
|
|
425
|
+
// multiples of this.audioConfig.samplesPerBlock (7680 @ 16 kHz) and emits
|
|
426
|
+
// this.audioConfig.tokensPerBlock (~12) soft tokens per padded block. The
|
|
427
|
+
// rendered template only contributes 1 token for the audio placeholder,
|
|
428
|
+
// so add (expansion - 1) per audio message to match prefill consumption.
|
|
429
|
+
const audioTokenExpansion = messages.reduce((acc, m) => {
|
|
430
|
+
if (!m.audioWaveform) return acc;
|
|
431
|
+
const kBlocks = Math.max(
|
|
432
|
+
1,
|
|
433
|
+
Math.ceil(m.audioWaveform.length / this.audioConfig!.samplesPerBlock)
|
|
434
|
+
);
|
|
435
|
+
return acc + (this.audioConfig!.tokensPerBlock * kBlocks - 1);
|
|
436
|
+
}, 0);
|
|
437
|
+
return (
|
|
438
|
+
textTokens + imageCount * (visualTokenCount - 1) + audioTokenExpansion
|
|
439
|
+
);
|
|
396
440
|
};
|
|
397
441
|
const maxContextLength = this.nativeModule.getMaxContextLength();
|
|
398
442
|
const messageHistoryWithPrompt =
|
|
@@ -497,12 +541,17 @@ function normalizeImagePath(path: string): string {
|
|
|
497
541
|
* @returns Messages with image-bearing turns rewritten to structured content.
|
|
498
542
|
*/
|
|
499
543
|
function messagesForChatTemplate(messages: Message[]): any[] {
|
|
500
|
-
return messages.map((m) =>
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
544
|
+
return messages.map((m) => {
|
|
545
|
+
if (typeof m.content !== 'string') return m;
|
|
546
|
+
const hasImage = !!m.mediaPath;
|
|
547
|
+
const hasAudio = !!m.audioWaveform;
|
|
548
|
+
if (!hasImage && !hasAudio) return m;
|
|
549
|
+
const parts: any[] = [];
|
|
550
|
+
if (hasImage) parts.push({ type: 'image' });
|
|
551
|
+
if (hasAudio) parts.push({ type: 'audio' });
|
|
552
|
+
parts.push({ type: 'text', text: m.content });
|
|
553
|
+
// Drop the Float32Array on the clone only — passing it into the Jinja
|
|
554
|
+
// template engine slows render past 3s. Don't mutate m;
|
|
555
|
+
return { ...m, content: parts, audioWaveform: undefined };
|
|
556
|
+
});
|
|
508
557
|
}
|
|
@@ -58,11 +58,7 @@ export function useLLM({
|
|
|
58
58
|
(async () => {
|
|
59
59
|
try {
|
|
60
60
|
await controllerInstance.load({
|
|
61
|
-
|
|
62
|
-
tokenizerSource: model.tokenizerSource,
|
|
63
|
-
tokenizerConfigSource: model.tokenizerConfigSource!,
|
|
64
|
-
capabilities: model.capabilities,
|
|
65
|
-
defaultGenerationConfig: model.generationConfig,
|
|
61
|
+
model: model,
|
|
66
62
|
onDownloadProgressCallback: setDownloadProgress,
|
|
67
63
|
});
|
|
68
64
|
} catch (e) {
|
|
@@ -106,7 +102,10 @@ export function useLLM({
|
|
|
106
102
|
);
|
|
107
103
|
|
|
108
104
|
const sendMessage = useCallback(
|
|
109
|
-
(
|
|
105
|
+
(
|
|
106
|
+
message: string,
|
|
107
|
+
media?: { imagePath?: string; audioBuffer?: Float32Array }
|
|
108
|
+
) => {
|
|
110
109
|
setResponse('');
|
|
111
110
|
return controllerInstance.sendMessage(message, media);
|
|
112
111
|
},
|