react-native-executorch 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/android/libs/classes.jar +0 -0
  2. package/common/rnexecutorch/host_objects/JsiConversions.h +43 -0
  3. package/common/rnexecutorch/models/llm/LLM.cpp +55 -42
  4. package/common/rnexecutorch/models/llm/LLM.h +4 -3
  5. package/common/rnexecutorch/models/llm/Types.h +23 -0
  6. package/common/runner/base_llm_runner.cpp +10 -3
  7. package/common/runner/base_llm_runner.h +1 -0
  8. package/common/runner/constants.h +15 -1
  9. package/common/runner/encoders/audio_encoder.cpp +111 -0
  10. package/common/runner/encoders/audio_encoder.h +40 -0
  11. package/common/runner/encoders/vision_encoder.cpp +13 -5
  12. package/common/runner/encoders/vision_encoder.h +15 -2
  13. package/common/runner/irunner.h +5 -0
  14. package/common/runner/multimodal_decoder_runner.h +50 -1
  15. package/common/runner/multimodal_input.h +16 -1
  16. package/common/runner/multimodal_prefiller.cpp +374 -64
  17. package/common/runner/multimodal_prefiller.h +57 -6
  18. package/common/runner/multimodal_runner.cpp +19 -12
  19. package/common/runner/multimodal_runner.h +1 -1
  20. package/common/runner/sampler.cpp +126 -39
  21. package/common/runner/sampler.h +13 -5
  22. package/common/runner/text_decoder_runner.cpp +1 -4
  23. package/common/runner/text_decoder_runner.h +3 -2
  24. package/common/runner/text_prefiller.cpp +8 -8
  25. package/common/runner/text_prefiller.h +8 -1
  26. package/common/runner/text_runner.cpp +35 -9
  27. package/common/runner/text_token_generator.h +2 -3
  28. package/common/runner/util.h +0 -1
  29. package/lib/module/constants/llmDefaults.js +1 -1
  30. package/lib/module/constants/llmDefaults.js.map +1 -1
  31. package/lib/module/constants/modelRegistry.js +62 -3
  32. package/lib/module/constants/modelRegistry.js.map +1 -1
  33. package/lib/module/constants/modelUrls.js +62 -6
  34. package/lib/module/constants/modelUrls.js.map +1 -1
  35. package/lib/module/controllers/LLMController.js +69 -20
  36. package/lib/module/controllers/LLMController.js.map +1 -1
  37. package/lib/module/hooks/natural_language_processing/useLLM.js +1 -5
  38. package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
  39. package/lib/module/modules/computer_vision/PoseEstimationModule.js +13 -1
  40. package/lib/module/modules/computer_vision/PoseEstimationModule.js.map +1 -1
  41. package/lib/module/modules/natural_language_processing/LLMModule.js +12 -7
  42. package/lib/module/modules/natural_language_processing/LLMModule.js.map +1 -1
  43. package/lib/module/types/llm.js +11 -0
  44. package/lib/module/types/llm.js.map +1 -1
  45. package/lib/module/types/poseEstimation.js.map +1 -1
  46. package/lib/typescript/constants/llmDefaults.d.ts +1 -1
  47. package/lib/typescript/constants/llmDefaults.d.ts.map +1 -1
  48. package/lib/typescript/constants/modelRegistry.d.ts +38 -1
  49. package/lib/typescript/constants/modelRegistry.d.ts.map +1 -1
  50. package/lib/typescript/constants/modelUrls.d.ts +52 -12
  51. package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
  52. package/lib/typescript/controllers/LLMController.d.ts +7 -9
  53. package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
  54. package/lib/typescript/modules/computer_vision/PoseEstimationModule.d.ts +6 -0
  55. package/lib/typescript/modules/computer_vision/PoseEstimationModule.d.ts.map +1 -1
  56. package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts +6 -3
  57. package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts.map +1 -1
  58. package/lib/typescript/types/llm.d.ts +63 -36
  59. package/lib/typescript/types/llm.d.ts.map +1 -1
  60. package/lib/typescript/types/poseEstimation.d.ts +3 -0
  61. package/lib/typescript/types/poseEstimation.d.ts.map +1 -1
  62. package/package.json +1 -1
  63. package/react-native-executorch.podspec +6 -0
  64. package/src/constants/llmDefaults.ts +1 -1
  65. package/src/constants/modelRegistry.ts +62 -2
  66. package/src/constants/modelUrls.ts +69 -6
  67. package/src/controllers/LLMController.ts +89 -40
  68. package/src/hooks/natural_language_processing/useLLM.ts +5 -6
  69. package/src/modules/computer_vision/PoseEstimationModule.ts +12 -0
  70. package/src/modules/natural_language_processing/LLMModule.ts +19 -8
  71. package/src/types/llm.ts +64 -34
  72. package/src/types/poseEstimation.ts +10 -4
  73. package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
  74. package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
  75. package/third-party/include/executorch/ExecuTorch.h +2 -0
  76. package/third-party/include/executorch/ExecuTorchModule.h +46 -0
  77. package/third-party/include/executorch/extension/data_loader/buffer_data_loader.h +4 -3
  78. package/third-party/include/executorch/extension/data_loader/mman.h +46 -0
  79. package/third-party/include/executorch/extension/data_loader/mmap_data_loader.h +4 -0
  80. package/third-party/include/executorch/extension/data_loader/shared_ptr_data_loader.h +7 -3
  81. package/third-party/include/executorch/extension/module/module.h +47 -8
  82. package/third-party/include/executorch/extension/tensor/tensor_ptr.h +17 -5
  83. package/third-party/include/executorch/kernels/optimized/Functions.h +12 -0
  84. package/third-party/include/executorch/kernels/optimized/NativeFunctions.h +4 -0
  85. package/third-party/include/executorch/kernels/portable/Functions.h +18 -0
  86. package/third-party/include/executorch/kernels/portable/NativeFunctions.h +6 -0
  87. package/third-party/include/executorch/runtime/backend/backend_options_map.h +37 -0
  88. package/third-party/include/executorch/runtime/core/array_ref.h +3 -1
  89. package/third-party/include/executorch/runtime/core/error.h +1 -0
  90. package/third-party/include/executorch/runtime/core/evalue.h +256 -9
  91. package/third-party/include/executorch/runtime/core/exec_aten/exec_aten.h +24 -0
  92. package/third-party/include/executorch/runtime/core/hierarchical_allocator.h +9 -6
  93. package/third-party/include/executorch/runtime/core/portable_type/device.h +3 -4
  94. package/third-party/include/executorch/runtime/core/portable_type/tensor_impl.h +31 -1
  95. package/third-party/include/executorch/runtime/executor/method.h +9 -3
  96. package/third-party/include/executorch/runtime/executor/method_meta.h +14 -0
  97. package/third-party/include/executorch/runtime/executor/platform_memory_allocator.h +12 -2
  98. package/third-party/include/executorch/runtime/executor/program.h +3 -1
  99. package/third-party/include/executorch/runtime/executor/tensor_parser.h +5 -1
  100. package/third-party/include/executorch/runtime/kernel/operator_registry.h +9 -0
  101. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
  102. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
  103. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/mlx.metallib +0 -0
  104. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
  105. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
  106. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/mlx.metallib +0 -0
package/src/types/llm.ts CHANGED
@@ -5,20 +5,23 @@ import { ResourceSource } from './common';
5
5
  * Capabilities a multimodal LLM can have.
6
6
  * @category Types
7
7
  */
8
- export type LLMCapability = 'vision';
8
+ export type LLMCapability = 'vision' | 'audio';
9
9
 
10
10
  /**
11
11
  * Derives the media argument shape for `sendMessage` from a capabilities tuple.
12
12
  * @category Types
13
13
  */
14
14
  export type MediaArg<C extends readonly LLMCapability[]> =
15
- 'vision' extends C[number] ? { imagePath?: string } : object;
15
+ ('vision' extends C[number] ? { imagePath?: string } : object) &
16
+ ('audio' extends C[number] ? { audioBuffer?: Float32Array } : object);
16
17
 
17
18
  /**
18
19
  * Union of all built-in LLM model names.
19
20
  * @category Types
20
21
  */
21
22
  export type LLMModelName =
23
+ | 'gemma4-e2b'
24
+ | 'gemma4-e2b-multimodal'
22
25
  | 'llama-3.2-3b'
23
26
  | 'llama-3.2-3b-qlora'
24
27
  | 'llama-3.2-3b-spinquant'
@@ -62,43 +65,63 @@ export type LLMModelName =
62
65
  | 'bielik-v3.0-1.5b'
63
66
  | 'bielik-v3.0-1.5b-quantized';
64
67
 
68
+ /**
69
+ * Audio soft-token expansion constants for audio_encoder.
70
+ * @category Types
71
+ */
72
+ export interface AudioConfig {
73
+ samplesPerBlock: number;
74
+ tokensPerBlock: number;
75
+ }
76
+
77
+ /**
78
+ * Properties defining LLMModel.
79
+ * @category Types
80
+ */
81
+ export interface LLMModel {
82
+ /**
83
+ * The built-in model name (e.g. `'llama-3.2-3b'`). Used for telemetry and hook reload triggers.
84
+ * Pass one of the pre-built LLM constants (e.g. `LLAMA3_2_3B`) to populate all required fields.
85
+ */
86
+ modelName: LLMModelName;
87
+ /**
88
+ * `ResourceSource` that specifies the location of the model binary.
89
+ */
90
+ modelSource: ResourceSource;
91
+ /**
92
+ * `ResourceSource` pointing to the JSON file which contains the tokenizer.
93
+ */
94
+ tokenizerSource: ResourceSource;
95
+ /**
96
+ * `ResourceSource` pointing to the JSON file which contains the tokenizer config.
97
+ */
98
+ tokenizerConfigSource: ResourceSource;
99
+ /**
100
+ * Optional list of modality capabilities the model supports.
101
+ * Determines the type of the `media` argument in `sendMessage`.
102
+ * Example: `['vision']` enables `sendMessage(text, { imagePath })`.
103
+ */
104
+ capabilities?: readonly LLMCapability[];
105
+ /**
106
+ * Recommended default generation settings, typically copied from the
107
+ * upstream `generation_config.json` or the model card. Applied automatically
108
+ * after the native module loads and before any user `configure()` call,
109
+ * so callers only need to override the values they want to change.
110
+ */
111
+ generationConfig?: GenerationConfig;
112
+ /**
113
+ * Defines config for audio input modality for multimodal LLMs.
114
+ * `capabilities` must include 'audio'.
115
+ */
116
+ audioConfig?: AudioConfig;
117
+ }
118
+
65
119
  /**
66
120
  * Properties for initializing and configuring a Large Language Model (LLM) instance.
67
121
  * @category Types
68
122
  */
69
123
  export interface LLMProps {
70
- model: {
71
- /**
72
- * The built-in model name (e.g. `'llama-3.2-3b'`). Used for telemetry and hook reload triggers.
73
- * Pass one of the pre-built LLM constants (e.g. `LLAMA3_2_3B`) to populate all required fields.
74
- */
75
- modelName: LLMModelName;
76
- /**
77
- * `ResourceSource` that specifies the location of the model binary.
78
- */
79
- modelSource: ResourceSource;
80
- /**
81
- * `ResourceSource` pointing to the JSON file which contains the tokenizer.
82
- */
83
- tokenizerSource: ResourceSource;
84
- /**
85
- * `ResourceSource` pointing to the JSON file which contains the tokenizer config.
86
- */
87
- tokenizerConfigSource: ResourceSource;
88
- /**
89
- * Optional list of modality capabilities the model supports.
90
- * Determines the type of the `media` argument in `sendMessage`.
91
- * Example: `['vision']` enables `sendMessage(text, { imagePath })`.
92
- */
93
- capabilities?: readonly LLMCapability[];
94
- /**
95
- * Recommended default generation settings, typically copied from the
96
- * upstream `generation_config.json` or the model card. Applied automatically
97
- * after the native module loads and before any user `configure()` call,
98
- * so callers only need to override the values they want to change.
99
- */
100
- generationConfig?: GenerationConfig;
101
- };
124
+ model: LLMModel;
102
125
  /**
103
126
  * Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
104
127
  */
@@ -289,6 +312,12 @@ export interface Message {
289
312
  * controller normalizes the path before passing it to native code.
290
313
  */
291
314
  mediaPath?: string;
315
+ /**
316
+ * Optional fp32 mono 16 kHz PCM buffer. Only valid on `user` messages for
317
+ * models with the `'audio'` capability. The controller forwards it to the
318
+ * native `generateMultimodal` path.
319
+ */
320
+ audioWaveform?: Float32Array;
292
321
  }
293
322
 
294
323
  /**
@@ -386,6 +415,7 @@ export interface ContextStrategy {
386
415
  export const SPECIAL_TOKENS = {
387
416
  BOS_TOKEN: 'bos_token',
388
417
  EOS_TOKEN: 'eos_token',
418
+ EOT_TOKEN: 'eot_token',
389
419
  UNK_TOKEN: 'unk_token',
390
420
  SEP_TOKEN: 'sep_token',
391
421
  PAD_TOKEN: 'pad_token',
@@ -62,10 +62,16 @@ export type PoseEstimationConfig<K extends LabelEnum> = {
62
62
  * Each model name maps to its required fields.
63
63
  * @category Types
64
64
  */
65
- export type PoseEstimationModelSources = {
66
- modelName: 'yolo26n-pose';
67
- modelSource: ResourceSource;
68
- };
65
+ export type PoseEstimationModelSources =
66
+ | {
67
+ modelName: 'yolo26n-pose';
68
+ modelSource: ResourceSource;
69
+ }
70
+ // RF-DETR keypoint preview (BETA) — may be renamed once a stable model ships.
71
+ | {
72
+ modelName: 'rfdetr-keypoint-preview';
73
+ modelSource: ResourceSource;
74
+ };
69
75
 
70
76
  /**
71
77
  * Union of all built-in pose estimation model names.
@@ -6,6 +6,8 @@
6
6
  * LICENSE file in the root directory of this source tree.
7
7
  */
8
8
 
9
+ #import "ExecuTorchBackendOption.h"
10
+ #import "ExecuTorchBackendOptionsMap.h"
9
11
  #import "ExecuTorchError.h"
10
12
  #import "ExecuTorchLog.h"
11
13
  #import "ExecuTorchModule.h"
@@ -6,6 +6,8 @@
6
6
  * LICENSE file in the root directory of this source tree.
7
7
  */
8
8
 
9
+ #import "ExecuTorchBackendOption.h"
10
+ #import "ExecuTorchBackendOptionsMap.h"
9
11
  #import "ExecuTorchValue.h"
10
12
 
11
13
  NS_ASSUME_NONNULL_BEGIN
@@ -198,6 +200,37 @@ NS_SWIFT_NAME(Module)
198
200
  */
199
201
  - (BOOL)load:(NSError **)error;
200
202
 
203
+ /**
204
+ * Loads the module's program with per-delegate backend options.
205
+ *
206
+ * The receiver retains @c options for as long as the underlying program
207
+ * references it (lifetime tracked via ARC).
208
+ *
209
+ * @param options A `ExecuTorchBackendOptionsMap` containing per-delegate
210
+ * load-time configuration, built once via
211
+ * `[ExecuTorchBackendOptionsMap mapWithOptions:error:]`.
212
+ * @param verification The verification level to apply when loading the program.
213
+ * @param error A pointer to an NSError pointer that will be set if an error
214
+ * occurs.
215
+ * @return YES if the program was successfully loaded; otherwise, NO.
216
+ */
217
+ - (BOOL)loadWithOptions:(ExecuTorchBackendOptionsMap *)options
218
+ verification:(ExecuTorchVerification)verification
219
+ error:(NSError **)error NS_REFINED_FOR_SWIFT;
220
+
221
+ /**
222
+ * Loads the module's program with per-delegate backend options using minimal
223
+ * verification.
224
+ *
225
+ * @param options A `ExecuTorchBackendOptionsMap` containing per-delegate
226
+ * load-time configuration.
227
+ * @param error A pointer to an NSError pointer that will be set if an error
228
+ * occurs.
229
+ * @return YES if the program was successfully loaded; otherwise, NO.
230
+ */
231
+ - (BOOL)loadWithOptions:(ExecuTorchBackendOptionsMap *)options
232
+ error:(NSError **)error NS_REFINED_FOR_SWIFT;
233
+
201
234
  /**
202
235
  * Checks if the module is loaded.
203
236
  *
@@ -215,6 +248,19 @@ NS_SWIFT_NAME(Module)
215
248
  - (BOOL)loadMethod:(NSString *)methodName
216
249
  error:(NSError **)error NS_SWIFT_NAME(load(_:));
217
250
 
251
+ /**
252
+ * Loads a specific method from the program with per-delegate backend options.
253
+ *
254
+ * @param methodName A string representing the name of the method to load.
255
+ * @param options A `ExecuTorchBackendOptionsMap` containing per-delegate
256
+ * load-time configuration.
257
+ * @param error A pointer to an NSError pointer that is set if an error occurs.
258
+ * @return YES if the method was successfully loaded; otherwise, NO.
259
+ */
260
+ - (BOOL)loadMethod:(NSString *)methodName
261
+ options:(ExecuTorchBackendOptionsMap *)options
262
+ error:(NSError **)error NS_REFINED_FOR_SWIFT;
263
+
218
264
  /**
219
265
  * Checks if a specific method is loaded.
220
266
  *
@@ -36,9 +36,10 @@ public:
36
36
  ET_UNUSED const DataLoader::SegmentInfo &segment_info) const override {
37
37
  size_t total_size;
38
38
  bool overflow = c10::add_overflows(offset, size, &total_size);
39
- ET_CHECK_OR_RETURN_ERROR(!overflow && total_size <= size_, InvalidArgument,
40
- "offset %zu + size %zu > size_ %zu", offset, size,
41
- size_);
39
+ ET_CHECK_OR_RETURN_ERROR(
40
+ !overflow && total_size <= size_, InvalidArgument,
41
+ "offset %zu + size %zu > size_ %zu, or overflow detected", offset, size,
42
+ size_);
42
43
  return executorch::runtime::FreeableBuffer(data_ + offset, size,
43
44
  /*free_fn=*/nullptr);
44
45
  }
@@ -17,6 +17,7 @@
17
17
 
18
18
  #ifndef _WIN32
19
19
 
20
+ #include <fcntl.h>
20
21
  #include <sys/mman.h>
21
22
  #include <unistd.h>
22
23
 
@@ -41,6 +42,34 @@ ET_INLINE off_t get_mmap_offset(size_t offset) {
41
42
  return static_cast<off_t>(offset);
42
43
  }
43
44
 
45
+ /**
46
+ * Hint the kernel to prefetch pages eagerly and to optimize for sequential
47
+ * reads. Intended to reduce page-fault stutter during model initialization
48
+ * when the caller does not want to mlock the pages into RAM.
49
+ */
50
+ ET_INLINE void madvise_pages_willneed_sequential(void *addr, size_t len) {
51
+ ::madvise(addr, len, MADV_WILLNEED);
52
+ ::madvise(addr, len, MADV_SEQUENTIAL);
53
+ }
54
+
55
+ /**
56
+ * On Apple platforms, schedule kernel read-ahead on the file descriptor itself
57
+ * via fcntl(F_RDADVISE). This is more aggressive than madvise for cold starts:
58
+ * it brings pages into the unified buffer cache so first-touch faults are
59
+ * serviced from RAM instead of storage. No-op on non-Apple POSIX platforms.
60
+ */
61
+ ET_INLINE void fcntl_rdadvise_apple(int fd, size_t file_size) {
62
+ #if defined(__APPLE__)
63
+ struct radvisory advice;
64
+ advice.ra_offset = 0;
65
+ advice.ra_count = static_cast<int>(file_size);
66
+ ::fcntl(fd, F_RDADVISE, &advice);
67
+ #else
68
+ (void)fd;
69
+ (void)file_size;
70
+ #endif
71
+ }
72
+
44
73
  #else
45
74
 
46
75
  #define NOMINMAX
@@ -78,4 +107,21 @@ ET_INLINE uint64_t get_mmap_offset(size_t offset) {
78
107
  return static_cast<uint64_t>(offset);
79
108
  }
80
109
 
110
+ /**
111
+ * No-op on Windows: there is no direct equivalent to madvise(MADV_WILLNEED |
112
+ * MADV_SEQUENTIAL) and the existing mman_windows shim does not implement one.
113
+ */
114
+ ET_INLINE void madvise_pages_willneed_sequential(void *addr, size_t len) {
115
+ (void)addr;
116
+ (void)len;
117
+ }
118
+
119
+ /**
120
+ * No-op on Windows: F_RDADVISE is an Apple-specific fcntl command.
121
+ */
122
+ ET_INLINE void fcntl_rdadvise_apple(int fd, size_t file_size) {
123
+ (void)fd;
124
+ (void)file_size;
125
+ }
126
+
81
127
  #endif
@@ -38,6 +38,10 @@ public:
38
38
  UseMlock,
39
39
  /// Call `mlock()` on loaded pages, ignoring errors if it fails.
40
40
  UseMlockIgnoreErrors,
41
+ /// Use madvise(MADV_WILLNEED | MADV_SEQUENTIAL) instead of mlock.
42
+ /// Tells the kernel to prefetch pages eagerly and optimize for
43
+ /// sequential reads, without pinning them in RAM.
44
+ UseMadvise,
41
45
  };
42
46
 
43
47
  /**
@@ -8,6 +8,7 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include <c10/util/safe_numerics.h>
11
12
  #include <executorch/runtime/core/data_loader.h>
12
13
  #include <executorch/runtime/core/error.h>
13
14
  #include <executorch/runtime/core/result.h>
@@ -32,9 +33,12 @@ public:
32
33
  executorch::runtime::Result<executorch::runtime::FreeableBuffer>
33
34
  load(size_t offset, size_t size,
34
35
  ET_UNUSED const DataLoader::SegmentInfo &segment_info) const override {
35
- ET_CHECK_OR_RETURN_ERROR(offset + size <= size_, InvalidArgument,
36
- "offset %zu + size %zu > size_ %zu", offset, size,
37
- size_);
36
+ size_t total_size;
37
+ bool overflow = c10::add_overflows(offset, size, &total_size);
38
+ ET_CHECK_OR_RETURN_ERROR(
39
+ !overflow && total_size <= size_, InvalidArgument,
40
+ "offset %zu + size %zu > size_ %zu, or overflow detected", offset, size,
41
+ size_);
38
42
  return executorch::runtime::FreeableBuffer(
39
43
  static_cast<uint8_t *>(data_.get()) + offset, size,
40
44
  /*free_fn=*/nullptr);
@@ -14,6 +14,8 @@
14
14
  #include <unordered_set>
15
15
  #include <vector>
16
16
 
17
+ #include <executorch/runtime/backend/backend_options_map.h>
18
+ #include <executorch/runtime/backend/options.h>
17
19
  #include <executorch/runtime/executor/program.h>
18
20
 
19
21
  #ifdef USE_ATEN_LIB
@@ -25,6 +27,7 @@
25
27
  namespace executorch {
26
28
  namespace extension {
27
29
 
30
+ using ET_RUNTIME_NAMESPACE::Kernel;
28
31
  using ET_RUNTIME_NAMESPACE::Method;
29
32
  using ET_RUNTIME_NAMESPACE::MethodMeta;
30
33
  using ET_RUNTIME_NAMESPACE::NamedDataMap;
@@ -51,6 +54,8 @@ public:
51
54
  MmapUseMlock,
52
55
  /// Use memory locking and ignore errors.
53
56
  MmapUseMlockIgnoreErrors,
57
+ /// Use mmap with madvise(MADV_WILLNEED | MADV_SEQUENTIAL) hints.
58
+ MmapUseMadvise,
54
59
  };
55
60
 
56
61
  /**
@@ -182,9 +187,18 @@ public:
182
187
  /**
183
188
  * Loads the program with per-delegate runtime options.
184
189
  *
185
- * @param[in] backend_options A LoadBackendOptionsMap containing per-delegate
186
- * load-time configuration options. The caller must ensure this object
187
- * outlives any methods loaded with these options.
190
+ * The Module deep-copies `backend_options` into internal storage, so the
191
+ * caller may release the input (and any backing BackendOption arrays its
192
+ * Spans referenced) immediately after this call returns. Future lazy
193
+ * `load_method` calls (e.g. triggered by `forward`) consume the
194
+ * Module-owned copy.
195
+ *
196
+ * Transactional: on failure, the previously-installed backend options
197
+ * (if any) are left in place; the input is not committed.
198
+ *
199
+ * @param[in] backend_options A LoadBackendOptionsMap containing
200
+ * per-delegate load-time configuration options. Deep-copied into the
201
+ * Module on success; not retained on failure.
188
202
  * @param[in] verification The type of verification to do before returning
189
203
  * success.
190
204
  *
@@ -195,6 +209,21 @@ public:
195
209
  const Program::Verification verification =
196
210
  Program::Verification::Minimal);
197
211
 
212
+ /**
213
+ * Returns the deep-copied LoadBackendOptionsMap most recently installed
214
+ * via `load(LoadBackendOptionsMap, ...)`. The returned reference is owned
215
+ * by the Module and remains valid until the next call to
216
+ * `load(LoadBackendOptionsMap, ...)` or until the Module is destroyed.
217
+ *
218
+ * If `load(LoadBackendOptionsMap, ...)` has never been called, returns a
219
+ * default-constructed (empty, `size() == 0`) map.
220
+ *
221
+ * @returns Const reference to the Module-owned LoadBackendOptionsMap.
222
+ */
223
+ inline const LoadBackendOptionsMap &backend_options() const {
224
+ return backend_options_map_;
225
+ }
226
+
198
227
  /**
199
228
  * Checks if the program is loaded.
200
229
  *
@@ -246,7 +275,8 @@ public:
246
275
  load_method(const std::string &method_name,
247
276
  runtime::HierarchicalAllocator *planned_memory = nullptr,
248
277
  torch::executor::EventTracer *event_tracer = nullptr,
249
- const LoadBackendOptionsMap *backend_options = nullptr);
278
+ const LoadBackendOptionsMap *backend_options = nullptr,
279
+ std::vector<Kernel> kernel_registry = {});
250
280
 
251
281
  ET_DEPRECATED ET_NODISCARD runtime::Error inline load_method(
252
282
  const std::string &method_name,
@@ -294,9 +324,10 @@ public:
294
324
  ET_NODISCARD inline runtime::Error
295
325
  load_forward(runtime::HierarchicalAllocator *planned_memory = nullptr,
296
326
  torch::executor::EventTracer *event_tracer = nullptr,
297
- const LoadBackendOptionsMap *backend_options = nullptr) {
298
- return load_method("forward", planned_memory, event_tracer,
299
- backend_options);
327
+ const LoadBackendOptionsMap *backend_options = nullptr,
328
+ std::vector<Kernel> kernel_registry = {}) {
329
+ return load_method("forward", planned_memory, event_tracer, backend_options,
330
+ std::move(kernel_registry));
300
331
  }
301
332
 
302
333
  ET_DEPRECATED ET_NODISCARD inline runtime::Error
@@ -678,6 +709,7 @@ private:
678
709
  std::unique_ptr<PlannedMemory> planned_memory;
679
710
  std::unique_ptr<runtime::MemoryManager> memory_manager;
680
711
  std::unique_ptr<Method> method;
712
+ std::vector<Kernel> kernel_registry;
681
713
  };
682
714
 
683
715
  std::string file_path_;
@@ -693,7 +725,14 @@ private:
693
725
  std::unique_ptr<NamedDataMap> merged_data_map_;
694
726
  std::vector<std::vector<uint8_t>> shared_arenas_;
695
727
  ET_DEPRECATED std::vector<uint8_t> debug_buffer_;
696
- const LoadBackendOptionsMap *backend_options_ = nullptr;
728
+ // Module-owned deep-copy of the backend options most recently installed
729
+ // via load(LoadBackendOptionsMap, ...). `backend_options_storage_` owns
730
+ // the per-backend BackendOption arrays; `backend_options_map_` is a
731
+ // LoadBackendOptionsMap whose Spans reference those owned arrays. An
732
+ // empty map (`size() == 0`) is observationally indistinguishable from
733
+ // "never set" by downstream consumers, so we don't track that bit.
734
+ std::vector<std::vector<runtime::BackendOption>> backend_options_storage_;
735
+ LoadBackendOptionsMap backend_options_map_;
697
736
  bool share_memory_arenas_;
698
737
 
699
738
  ET_NODISCARD runtime::Error
@@ -14,6 +14,7 @@
14
14
  #include <vector>
15
15
 
16
16
  #include <c10/macros/Macros.h>
17
+ #include <c10/util/safe_numerics.h>
17
18
  #include <executorch/runtime/core/error.h>
18
19
  #include <executorch/runtime/core/exec_aten/exec_aten.h>
19
20
  #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
@@ -105,13 +106,21 @@ make_tensor_ptr(std::vector<executorch::aten::SizesType> sizes,
105
106
  executorch::aten::ScalarType type = deduced_type,
106
107
  executorch::aten::TensorShapeDynamism dynamism =
107
108
  executorch::aten::TensorShapeDynamism::DYNAMIC_BOUND) {
108
- ET_CHECK_MSG(data.size() ==
109
- executorch::aten::compute_numel(sizes.data(), sizes.size()),
109
+ auto numel_result = executorch::aten::safe_numel(sizes.data(), sizes.size());
110
+ ET_CHECK_MSG(numel_result.ok(), "safe_numel failed: %d",
111
+ static_cast<int>(numel_result.error()));
112
+ ET_CHECK_MSG(data.size() == static_cast<size_t>(numel_result.get()),
110
113
  "Data size does not match tensor size.");
111
114
  if (type != deduced_type) {
112
115
  ET_CHECK_MSG(runtime::canCast(deduced_type, type),
113
116
  "Cannot cast deduced type to specified type.");
114
- std::vector<uint8_t> casted_data(data.size() * aten::elementSize(type));
117
+ size_t casted_bytes = 0;
118
+ ET_CHECK_MSG(!c10::mul_overflows(
119
+ data.size(), static_cast<size_t>(aten::elementSize(type)),
120
+ &casted_bytes),
121
+ "casted_data size overflow: %zu elements * %zu bytes/element",
122
+ data.size(), static_cast<size_t>(aten::elementSize(type)));
123
+ std::vector<uint8_t> casted_data(casted_bytes);
115
124
 
116
125
  // Create a minimal context for error handling in ET_SWITCH
117
126
  struct {
@@ -327,8 +336,11 @@ make_tensor_ptr(const executorch::aten::Tensor &tensor,
327
336
  const auto same_rank = sizes.size() == static_cast<size_t>(tensor.dim());
328
337
  const auto same_shape = same_rank && std::equal(sizes.begin(), sizes.end(),
329
338
  tensor.sizes().begin());
330
- const auto element_count =
331
- executorch::aten::compute_numel(sizes.data(), sizes.size());
339
+ auto element_count_result =
340
+ executorch::aten::safe_numel(sizes.data(), sizes.size());
341
+ ET_CHECK_MSG(element_count_result.ok(), "safe_numel failed: %d",
342
+ static_cast<int>(element_count_result.error()));
343
+ const auto element_count = element_count_result.get();
332
344
  const auto parent_element_count = tensor.numel();
333
345
  ET_CHECK_MSG(
334
346
  element_count <= parent_element_count,
@@ -91,6 +91,12 @@ TORCH_API inline torch::executor::Tensor & gelu_outf(torch::executor::KernelRunt
91
91
  }
92
92
 
93
93
 
94
+ // aten::grid_sampler_2d.out(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)
95
+ TORCH_API inline torch::executor::Tensor & grid_sampler_2d_outf(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & input, const torch::executor::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, torch::executor::Tensor & out) {
96
+ return ::torch::executor::native::opt_grid_sampler_2d_out(context, input, grid, interpolation_mode, padding_mode, align_corners, out);
97
+ }
98
+
99
+
94
100
  // aten::le.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
95
101
  TORCH_API inline torch::executor::Tensor & le_outf(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, const torch::executor::Scalar & other, torch::executor::Tensor & out) {
96
102
  return ::torch::executor::native::opt_le_scalar_out(context, self, other, out);
@@ -139,6 +145,12 @@ TORCH_API inline torch::executor::Tensor & sub_outf(torch::executor::KernelRunti
139
145
  }
140
146
 
141
147
 
148
+ // aten::sum.IntList_out(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
149
+ TORCH_API inline torch::executor::Tensor & sum_outf(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::optional<torch::executor::ArrayRef<int64_t>> dim, bool keepdim, torch::executor::optional<torch::executor::ScalarType> dtype, torch::executor::Tensor & out) {
150
+ return ::torch::executor::native::opt_sum_dim_out(context, self, dim, keepdim, dtype, out);
151
+ }
152
+
153
+
142
154
  // aten::sub.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)
143
155
  TORCH_API inline torch::executor::Tensor & sub_outf(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, const torch::executor::Scalar & other, const torch::executor::Scalar & alpha, torch::executor::Tensor & out) {
144
156
  return ::torch::executor::native::opt_sub_scalar_out(context, self, other, alpha, out);
@@ -42,6 +42,8 @@ torch::executor::Tensor & opt_exp_out(const torch::executor::Tensor & self, torc
42
42
  torch::executor::Tensor & opt_exp_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::Tensor & out);
43
43
  torch::executor::Tensor & opt_gelu_out(const torch::executor::Tensor & self, torch::executor::string_view approximate, torch::executor::Tensor & out);
44
44
  torch::executor::Tensor & opt_gelu_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::string_view approximate, torch::executor::Tensor & out);
45
+ torch::executor::Tensor & opt_grid_sampler_2d_out(const torch::executor::Tensor & input, const torch::executor::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, torch::executor::Tensor & out);
46
+ torch::executor::Tensor & opt_grid_sampler_2d_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & input, const torch::executor::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, torch::executor::Tensor & out);
45
47
  torch::executor::Tensor & opt_le_scalar_out(const torch::executor::Tensor & self, const torch::executor::Scalar & other, torch::executor::Tensor & out);
46
48
  torch::executor::Tensor & opt_le_scalar_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, const torch::executor::Scalar & other, torch::executor::Tensor & out);
47
49
  torch::executor::Tensor & opt_le_tensor_out(const torch::executor::Tensor & self, const torch::executor::Tensor & other, torch::executor::Tensor & out);
@@ -58,6 +60,8 @@ torch::executor::Tensor & opt_mul_scalar_out(torch::executor::KernelRuntimeConte
58
60
  ::std::tuple<torch::executor::Tensor &,torch::executor::Tensor &,torch::executor::Tensor &> opt_native_layer_norm_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & input, torch::executor::ArrayRef<int64_t> normalized_shape, const torch::executor::optional<torch::executor::Tensor> & weight, const torch::executor::optional<torch::executor::Tensor> & bias, double eps, torch::executor::Tensor & out0, torch::executor::Tensor & out1, torch::executor::Tensor & out2);
59
61
  torch::executor::Tensor & opt_sub_out(const torch::executor::Tensor & self, const torch::executor::Tensor & other, const torch::executor::Scalar & alpha, torch::executor::Tensor & out);
60
62
  torch::executor::Tensor & opt_sub_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, const torch::executor::Tensor & other, const torch::executor::Scalar & alpha, torch::executor::Tensor & out);
63
+ torch::executor::Tensor & opt_sum_dim_out(const torch::executor::Tensor & self, torch::executor::optional<torch::executor::ArrayRef<int64_t>> dim, bool keepdim, torch::executor::optional<torch::executor::ScalarType> dtype, torch::executor::Tensor & out);
64
+ torch::executor::Tensor & opt_sum_dim_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::optional<torch::executor::ArrayRef<int64_t>> dim, bool keepdim, torch::executor::optional<torch::executor::ScalarType> dtype, torch::executor::Tensor & out);
61
65
  torch::executor::Tensor & opt_sub_scalar_out(const torch::executor::Tensor & self, const torch::executor::Scalar & other, const torch::executor::Scalar & alpha, torch::executor::Tensor & out);
62
66
  torch::executor::Tensor & opt_sub_scalar_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, const torch::executor::Scalar & other, const torch::executor::Scalar & alpha, torch::executor::Tensor & out);
63
67
  torch::executor::Tensor & opt_where_out(const torch::executor::Tensor & condition, const torch::executor::Tensor & self, const torch::executor::Tensor & other, torch::executor::Tensor & out);
@@ -25,12 +25,24 @@ namespace executor {
25
25
 
26
26
  namespace aten {
27
27
 
28
+ // aten::_adaptive_avg_pool2d.out(Tensor self, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)
29
+ TORCH_API inline torch::executor::Tensor & _adaptive_avg_pool2d_outf(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::ArrayRef<int64_t> output_size, torch::executor::Tensor & out) {
30
+ return ::torch::executor::native::_adaptive_avg_pool2d_out(context, self, output_size, out);
31
+ }
32
+
33
+
28
34
  // aten::_cdist_forward.out(Tensor x1, Tensor x2, float p, int? compute_mode, *, Tensor(a!) out) -> Tensor(a!)
29
35
  TORCH_API inline torch::executor::Tensor & _cdist_forward_outf(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & x1, const torch::executor::Tensor & x2, double p, torch::executor::optional<int64_t> compute_mode, torch::executor::Tensor & out) {
30
36
  return ::torch::executor::native::_cdist_forward_out(context, x1, x2, p, compute_mode, out);
31
37
  }
32
38
 
33
39
 
40
+ // aten::_conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
41
+ TORCH_API inline torch::executor::Tensor & _conj_physical_outf(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::Tensor & out) {
42
+ return ::torch::executor::native::_conj_physical_out(context, self, out);
43
+ }
44
+
45
+
34
46
  // aten::_log_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)
35
47
  TORCH_API inline torch::executor::Tensor & _log_softmax_outf(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, int64_t dim, bool half_to_float, torch::executor::Tensor & out) {
36
48
  return ::torch::executor::native::log_softmax_out(context, self, dim, half_to_float, out);
@@ -1201,6 +1213,12 @@ TORCH_API inline torch::executor::Tensor & var_outf(torch::executor::KernelRunti
1201
1213
  }
1202
1214
 
1203
1215
 
1216
+ // aten::var_mean.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))
1217
+ TORCH_API inline ::std::tuple<torch::executor::Tensor &,torch::executor::Tensor &> var_mean_outf(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::optional<torch::executor::ArrayRef<int64_t>> dim, const torch::executor::optional<torch::executor::Scalar> & correction, bool keepdim, torch::executor::Tensor & out0, torch::executor::Tensor & out1) {
1218
+ return ::torch::executor::native::var_mean_correction_out(context, self, dim, correction, keepdim, out0, out1);
1219
+ }
1220
+
1221
+
1204
1222
  // aten::var.out(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
1205
1223
  TORCH_API inline torch::executor::Tensor & var_outf(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::optional<torch::executor::ArrayRef<int64_t>> dim, bool unbiased, bool keepdim, torch::executor::Tensor & out) {
1206
1224
  return ::torch::executor::native::var_out(context, self, dim, unbiased, keepdim, out);
@@ -20,8 +20,12 @@
20
20
  namespace torch {
21
21
  namespace executor {
22
22
  namespace native {
23
+ torch::executor::Tensor & _adaptive_avg_pool2d_out(const torch::executor::Tensor & self, torch::executor::ArrayRef<int64_t> output_size, torch::executor::Tensor & out);
24
+ torch::executor::Tensor & _adaptive_avg_pool2d_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::ArrayRef<int64_t> output_size, torch::executor::Tensor & out);
23
25
  torch::executor::Tensor & _cdist_forward_out(const torch::executor::Tensor & x1, const torch::executor::Tensor & x2, double p, torch::executor::optional<int64_t> compute_mode, torch::executor::Tensor & out);
24
26
  torch::executor::Tensor & _cdist_forward_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & x1, const torch::executor::Tensor & x2, double p, torch::executor::optional<int64_t> compute_mode, torch::executor::Tensor & out);
27
+ torch::executor::Tensor & _conj_physical_out(const torch::executor::Tensor & self, torch::executor::Tensor & out);
28
+ torch::executor::Tensor & _conj_physical_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::Tensor & out);
25
29
  torch::executor::Tensor & log_softmax_out(const torch::executor::Tensor & self, int64_t dim, bool half_to_float, torch::executor::Tensor & out);
26
30
  torch::executor::Tensor & log_softmax_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, int64_t dim, bool half_to_float, torch::executor::Tensor & out);
27
31
  ::std::tuple<torch::executor::Tensor &,torch::executor::Tensor &,torch::executor::Tensor &> _native_batch_norm_legit_out(const torch::executor::Tensor & input, const torch::executor::optional<torch::executor::Tensor> & weight, const torch::executor::optional<torch::executor::Tensor> & bias, torch::executor::Tensor & running_mean, torch::executor::Tensor & running_var, bool training, double momentum, double eps, torch::executor::Tensor & out, torch::executor::Tensor & save_mean, torch::executor::Tensor & save_invstd);
@@ -412,6 +416,8 @@ torch::executor::Tensor & upsample_nearest2d_vec_out(const torch::executor::Tens
412
416
  torch::executor::Tensor & upsample_nearest2d_vec_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & input, torch::executor::optional<torch::executor::ArrayRef<int64_t>> output_size, torch::executor::optional<torch::executor::ArrayRef<double>> scale_factors, torch::executor::Tensor & out);
413
417
  torch::executor::Tensor & var_correction_out(const torch::executor::Tensor & self, torch::executor::optional<torch::executor::ArrayRef<int64_t>> dim, const torch::executor::optional<torch::executor::Scalar> & correction, bool keepdim, torch::executor::Tensor & out);
414
418
  torch::executor::Tensor & var_correction_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::optional<torch::executor::ArrayRef<int64_t>> dim, const torch::executor::optional<torch::executor::Scalar> & correction, bool keepdim, torch::executor::Tensor & out);
419
+ ::std::tuple<torch::executor::Tensor &,torch::executor::Tensor &> var_mean_correction_out(const torch::executor::Tensor & self, torch::executor::optional<torch::executor::ArrayRef<int64_t>> dim, const torch::executor::optional<torch::executor::Scalar> & correction, bool keepdim, torch::executor::Tensor & out0, torch::executor::Tensor & out1);
420
+ ::std::tuple<torch::executor::Tensor &,torch::executor::Tensor &> var_mean_correction_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::optional<torch::executor::ArrayRef<int64_t>> dim, const torch::executor::optional<torch::executor::Scalar> & correction, bool keepdim, torch::executor::Tensor & out0, torch::executor::Tensor & out1);
415
421
  torch::executor::Tensor & var_out(const torch::executor::Tensor & self, torch::executor::optional<torch::executor::ArrayRef<int64_t>> dim, bool unbiased, bool keepdim, torch::executor::Tensor & out);
416
422
  torch::executor::Tensor & var_out(torch::executor::KernelRuntimeContext & context, const torch::executor::Tensor & self, torch::executor::optional<torch::executor::ArrayRef<int64_t>> dim, bool unbiased, bool keepdim, torch::executor::Tensor & out);
417
423
  torch::executor::Tensor & view_as_real_copy_out(const torch::executor::Tensor & self, torch::executor::Tensor & out);