com.github.asus4.onnxruntime 0.1.13 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/Plugins/Android/onnxruntime-android.aar +0 -0
  2. package/Plugins/Linux/x64/libonnxruntime.so +0 -0
  3. package/Plugins/Windows/x64/onnxruntime.dll +0 -0
  4. package/Plugins/iOS~/onnxruntime.xcframework/Info.plist +8 -8
  5. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/coreml_provider_factory.h +4 -1
  6. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_c_api.h +134 -19
  7. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_cxx_api.h +18 -3
  8. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_cxx_inline.h +68 -15
  9. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_lite_custom_op.h +1119 -0
  10. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_run_options_config_keys.h +19 -0
  11. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_session_options_config_keys.h +32 -9
  12. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Info.plist +2 -2
  13. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/onnxruntime +0 -0
  14. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/coreml_provider_factory.h +4 -1
  15. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_c_api.h +134 -19
  16. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_cxx_api.h +18 -3
  17. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_cxx_inline.h +68 -15
  18. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_lite_custom_op.h +1119 -0
  19. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_run_options_config_keys.h +19 -0
  20. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_session_options_config_keys.h +32 -9
  21. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Info.plist +2 -2
  22. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/onnxruntime +0 -0
  23. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/{Headers → Versions/A/Headers}/coreml_provider_factory.h +4 -1
  24. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/{Headers → Versions/A/Headers}/onnxruntime_c_api.h +134 -19
  25. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/{Headers → Versions/A/Headers}/onnxruntime_cxx_api.h +18 -3
  26. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/{Headers → Versions/A/Headers}/onnxruntime_cxx_inline.h +68 -15
  27. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/onnxruntime_lite_custom_op.h +1119 -0
  28. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/{Headers → Versions/A/Headers}/onnxruntime_run_options_config_keys.h +19 -0
  29. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/{Headers → Versions/A/Headers}/onnxruntime_session_options_config_keys.h +32 -9
  30. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/{Info.plist → Versions/A/Resources/Info.plist} +2 -2
  31. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/{onnxruntime → Versions/A/onnxruntime} +0 -0
  32. package/Plugins/macOS/libonnxruntime.dylib +0 -0
  33. package/README.md +8 -8
  34. package/Runtime/AssemblyInfo.shared.cs +1 -11
  35. package/Runtime/NativeMethods.shared.cs +37 -2
  36. package/Runtime/OrtValue.shared.cs +38 -38
  37. package/Runtime/SessionOptions.shared.cs +14 -0
  38. package/Runtime/Training/NativeTrainingMethods.shared.cs +20 -2
  39. package/Runtime/Training/TrainingSession.shared.cs +107 -0
  40. package/package.json +1 -1
  41. /package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/{Headers → Versions/A/Headers}/cpu_provider_factory.h +0 -0
  42. /package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/{Headers → Versions/A/Headers}/onnxruntime_float16.h +0 -0
@@ -30,3 +30,22 @@ static const char* const kOrtRunOptionsConfigEnableMemoryArenaShrinkage = "memor
30
30
  // Per default it will be set to '0'
31
31
  // Taking CUDA EP as an example, it omit triggering cudaStreamSynchronize on the compute stream.
32
32
  static const char* const kOrtRunOptionsConfigDisableSynchronizeExecutionProviders = "disable_synchronize_execution_providers";
33
+
34
+ // Set HTP performance mode for QNN HTP backend before session run.
35
+ // options for HTP performance mode: "burst", "balanced", "default", "high_performance",
36
+ // "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", "power_saver",
37
+ // "sustained_high_performance". Default to "default".
38
+ static const char* const kOrtRunOptionsConfigQnnPerfMode = "qnn.htp_perf_mode";
39
+
40
+ // Set HTP performance mode for QNN HTP backend post session run.
41
+ static const char* const kOrtRunOptionsConfigQnnPerfModePostRun = "qnn.htp_perf_mode_post_run";
42
+
43
+ // Set RPC control latency for QNN HTP backend
44
+ static const char* const kOrtRunOptionsConfigQnnRpcControlLatency = "qnn.rpc_control_latency";
45
+
46
+ // Set graph annotation id for CUDA EP. Use with enable_cuda_graph=true.
47
+ // The value should be an integer. If the value is not set, the default value is 0 and
48
+ // ORT session only captures one cuda graph before another capture is requested.
49
+ // If the value is set to -1, cuda graph capture/replay is disabled in that run.
50
+ // User are not expected to set the value to 0 as it is reserved for internal use.
51
+ static const char* const kOrtRunOptionsConfigCudaGraphAnnotation = "gpu_graph_id";
@@ -78,21 +78,35 @@ static const char* const kOrtSessionOptionsEnableGeluApproximation = "optimizati
78
78
  static const char* const kOrtSessionOptionsDisableAheadOfTimeFunctionInlining = "session.disable_aot_function_inlining";
79
79
 
80
80
  #ifdef ENABLE_TRAINING
81
- // Specifies a list of op types for memory footprint reduction.
82
- // The value should be a ","-delimited list of pair of
83
- // <subgraph string: optimization strategy: number of subgraph to apply>.
84
- // For example, "Gelu+Cast+:1:0,Dropout+:1:1".
85
- // A valid "subgraph string" should be one subgraph representation output by ORT graph transformations.
86
- // "optimization strategy" currently has valid values: 0 - disabled, 1 - recompute.
87
- // "number of subgraph to apply" is used to control how many subgraphs to apply optimization, to avoid "oversaving"
88
- // the memory.
89
- static const char* const kOrtSessionOptionsMemoryOptimizerEnabler = "optimization.memory_optimizer_config";
81
+ // Specifies a path of the file containing a list of memory optimization configurations.
82
+ // The value should be a string indicating the file path of the config file.
83
+ // The content of the config file is a JSON struct like this:
84
+ // [
85
+ // "Gelu+Cast+:1:0",
86
+ // "Dropout+:1:1"
87
+ // ]
88
+ // Taking the example of "Gelu+Cast+:1:0",
89
+ // > "Gelu+Cast+" is the subgraph string, a valid "subgraph string" should be one subgraph representation
90
+ // output by ORT graph transformations.
91
+ // > "1" is "optimization strategy", valid values: 0 - disabled, 1 - recompute.
92
+ // > "0" is "number of subgraph to apply" which is used to control how many subgraphs to apply optimization,
93
+ // to avoid "oversaving" the memory.
94
+ static const char* const kOrtSessionOptionsMemoryOptimizerApplyConfig = "optimization.memory_optimizer_config";
90
95
 
91
96
  // Specifies the config for detecting subgraphs for memory footprint reduction.
92
97
  // The value should be a string contains int separated using commas. The default value is "0:0".
93
98
  static const char* const kOrtSessionOptionsMemoryOptimizerProbeConfig = "optimization.enable_memory_probe_recompute_config";
94
99
  #endif
95
100
 
101
+ // This setting if set should contain a comma separated list of optimizers names that should be disabled.
102
+ // Optimizers may take time to execute and affect model loading time. If you feel that a specific optimizer
103
+ // does not provider runtime benefits, but affects your model loading time you may disable it using this config
104
+ // entry. This option is not enabled in ORT_MINIMAL_BUILD build.
105
+ // A list of optimizes is available in onnxruntime/core/optimizer/graph_transformer_utils.cc
106
+ //
107
+ // Default is an empty string which means no optimizers are disabled.
108
+ static const char* const kOrtSessionOptionsDisableSpecifiedOptimizers = "optimization.disable_specified_optimizers";
109
+
96
110
  // Enable or disable using device allocator for allocating initialized tensor memory. "1": enable; "0": disable. The default is "0".
97
111
  // Using device allocators means the memory allocation is made using malloc/new.
98
112
  static const char* const kOrtSessionOptionsUseDeviceAllocatorForInitializers = "session.use_device_allocator_for_initializers";
@@ -251,8 +265,17 @@ static const char* const kOrtSessionOptionEpContextFilePath = "ep.context_file_p
251
265
  // "1": dump the EP context into the Onnx model. (default).
252
266
  static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode";
253
267
 
268
+ // Specify the EPContext node name prefix to make it unique
269
+ // in case user need to merge/connect multiple EPContext nodes in one model
270
+ static const char* const kOrtSessionOptionEpContextNodeNamePrefix = "ep.context_node_name_prefix";
271
+
254
272
  // Gemm fastmath mode provides fp32 gemm acceleration with bfloat16 based matmul.
255
273
  // Option values:
256
274
  // - "0": Gemm FastMath mode is not enabled. [DEFAULT]
257
275
  // - "1": Gemm FastMath mode is enabled.
258
276
  static const char* const kOrtSessionOptionsMlasGemmFastMathArm64Bfloat16 = "mlas.enable_gemm_fastmath_arm64_bfloat16";
277
+
278
+ // When converting DQ + MatMul -> MatMulNBits, the accuracy level of the MatMulNBits is controlled by this option.
279
+ // Refer to MatMulNBits op schema for more details.
280
+ // If not provided, default is 4.
281
+ static const char* const kOrtSessionOptionsQDQMatMulNBitsAccuracyLevel = "session.qdq_matmulnbits_accuracy_level";
@@ -9,9 +9,9 @@
9
9
  <key>CFBundleIdentifier</key>
10
10
  <string>com.microsoft.onnxruntime</string>
11
11
  <key>CFBundleVersion</key>
12
- <string>1.17.1</string>
12
+ <string>1.19.2</string>
13
13
  <key>CFBundleShortVersionString</key>
14
- <string>1.17.1</string>
14
+ <string>1.19.2</string>
15
15
  <key>CFBundleSignature</key>
16
16
  <string>????</string>
17
17
  <key>CFBundlePackageType</key>
@@ -28,9 +28,12 @@ enum COREMLFlags {
28
28
  // dynamic shapes. However, the performance may be negatively impacted if inputs have dynamic shapes.
29
29
  COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES = 0x008,
30
30
 
31
+ // Create an MLProgram. By default it will create a NeuralNetwork model. Requires Core ML 5 or later.
32
+ COREML_FLAG_CREATE_MLPROGRAM = 0x010,
33
+
31
34
  // Keep COREML_FLAG_LAST at the end of the enum definition
32
35
  // And assign the last COREMLFlag to it
33
- COREML_FLAG_LAST = COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES,
36
+ COREML_FLAG_LAST = COREML_FLAG_CREATE_MLPROGRAM,
34
37
  };
35
38
 
36
39
  #ifdef __cplusplus
@@ -38,7 +38,7 @@
38
38
  *
39
39
  * This value is used by some API functions to behave as this version of the header expects.
40
40
  */
41
- #define ORT_API_VERSION 17
41
+ #define ORT_API_VERSION 19
42
42
 
43
43
  #ifdef __cplusplus
44
44
  extern "C" {
@@ -196,7 +196,10 @@ typedef enum ONNXTensorElementDataType {
196
196
  ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FN, // Non-IEEE floating-point format based on IEEE754 single-precision
197
197
  ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FNUZ, // Non-IEEE floating-point format based on IEEE754 single-precision
198
198
  ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2, // Non-IEEE floating-point format based on IEEE754 single-precision
199
- ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ // Non-IEEE floating-point format based on IEEE754 single-precision
199
+ ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ, // Non-IEEE floating-point format based on IEEE754 single-precision
200
+ // Int4 types were introduced in ONNX 1.16. See https://onnx.ai/onnx/technical/int4.html
201
+ ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT4, // maps to a pair of packed uint4 values (size == 1 byte)
202
+ ONNX_TENSOR_ELEMENT_DATA_TYPE_INT4 // maps to a pair of packed int4 values (size == 1 byte)
200
203
  } ONNXTensorElementDataType;
201
204
 
202
205
  // Synced with onnx TypeProto oneof
@@ -267,7 +270,7 @@ typedef enum OrtOpAttrType {
267
270
  //! @}
268
271
  #define ORT_RUNTIME_CLASS(X) \
269
272
  struct Ort##X; \
270
- typedef struct Ort##X Ort##X;
273
+ typedef struct Ort##X Ort##X
271
274
 
272
275
  /** \addtogroup Global
273
276
  * ONNX Runtime C API
@@ -319,6 +322,12 @@ typedef struct OrtAllocator {
319
322
  void*(ORT_API_CALL* Alloc)(struct OrtAllocator* this_, size_t size); ///< Returns a pointer to an allocated block of `size` bytes
320
323
  void(ORT_API_CALL* Free)(struct OrtAllocator* this_, void* p); ///< Free a block of memory previously allocated with OrtAllocator::Alloc
321
324
  const struct OrtMemoryInfo*(ORT_API_CALL* Info)(const struct OrtAllocator* this_); ///< Return a pointer to an ::OrtMemoryInfo that describes this allocator
325
+ /**
326
+ * @brief Optional allocation function to use for memory allocations made during session initialization.
327
+ * Use this function if you want to separate allocations made by ORT during Run() calls from
328
+ * those made during session initialization. This allows for separate memory management strategies for these allocations.
329
+ */
330
+ void*(ORT_API_CALL* Reserve)(struct OrtAllocator* this_, size_t size); ///< Returns a pointer to an allocated block of `size` bytes
322
331
  } OrtAllocator;
323
332
 
324
333
  typedef void(ORT_API_CALL* OrtLoggingFunction)(
@@ -464,13 +473,13 @@ typedef struct OrtCUDAProviderOptions {
464
473
 
465
474
  /** \brief Enable TunableOp for using.
466
475
  * Set it to 1/0 to enable/disable TunableOp. Otherwise, it is disabled by default.
467
- * This option can be overriden by environment variable ORT_CUDA_TUNABLE_OP_ENABLE.
476
+ * This option can be overridden by environment variable ORT_CUDA_TUNABLE_OP_ENABLE.
468
477
  */
469
478
  int tunable_op_enable;
470
479
 
471
480
  /** \brief Enable TunableOp for tuning.
472
481
  * Set it to 1/0 to enable/disable TunableOp tuning. Otherwise, it is disabled by default.
473
- * This option can be overriden by environment variable ORT_CUDA_TUNABLE_OP_TUNING_ENABLE.
482
+ * This option can be overridden by environment variable ORT_CUDA_TUNABLE_OP_TUNING_ENABLE.
474
483
  */
475
484
  int tunable_op_tuning_enable;
476
485
 
@@ -496,6 +505,7 @@ typedef struct OrtROCMProviderOptions {
496
505
  has_user_compute_stream{},
497
506
  user_compute_stream{},
498
507
  default_memory_arena_cfg{},
508
+ enable_hip_graph{false},
499
509
  tunable_op_enable{false},
500
510
  tunable_op_tuning_enable{false},
501
511
  tunable_op_max_tuning_duration_ms{} {}
@@ -548,15 +558,17 @@ typedef struct OrtROCMProviderOptions {
548
558
  */
549
559
  OrtArenaCfg* default_memory_arena_cfg;
550
560
 
561
+ int enable_hip_graph;
562
+
551
563
  /** \brief Enable TunableOp for using.
552
564
  * Set it to 1/0 to enable/disable TunableOp. Otherwise, it is disabled by default.
553
- * This option can be overriden by environment variable ORT_ROCM_TUNABLE_OP_ENABLE.
565
+ * This option can be overridden by environment variable ORT_ROCM_TUNABLE_OP_ENABLE.
554
566
  */
555
567
  int tunable_op_enable;
556
568
 
557
569
  /** \brief Enable TunableOp for tuning.
558
570
  * Set it to 1/0 to enable/disable TunableOp tuning. Otherwise, it is disabled by default.
559
- * This option can be overriden by environment variable ORT_ROCM_TUNABLE_OP_TUNING_ENABLE.
571
+ * This option can be overridden by environment variable ORT_ROCM_TUNABLE_OP_TUNING_ENABLE.
560
572
  */
561
573
  int tunable_op_tuning_enable;
562
574
 
@@ -605,6 +617,10 @@ typedef struct OrtMIGraphXProviderOptions {
605
617
  int migraphx_int8_enable; // MIGraphX INT8 precision. Default 0 = false, nonzero = true
606
618
  int migraphx_use_native_calibration_table; // MIGraphx INT8 cal table. Default 0 = false, noznero = true
607
619
  const char* migraphx_int8_calibration_table_name; // MIGraphx INT8 calibration table name
620
+ int migraphx_save_compiled_model; // migraphx save compiled model. Default 0 = false, noznero = true
621
+ const char* migraphx_save_model_path; // migraphx model path name
622
+ int migraphx_load_compiled_model; // migraphx int8 cal table. Default 0 = false, noznero = true
623
+ const char* migraphx_load_model_path; // migraphx model path name
608
624
  } OrtMIGraphXProviderOptions;
609
625
 
610
626
  /** \brief OpenVINO Provider Options
@@ -1834,14 +1850,30 @@ struct OrtApi {
1834
1850
 
1835
1851
  /** \brief Used for custom operators, get an input of a kernel
1836
1852
  *
1837
- * \see ::OrtCustomOp
1853
+ * The function attempts fetches the input of the kernel. If the input is optional
1854
+ * and not present, the function returns success and out is set to nullptr.
1855
+ *
1856
+ * \param[in] context ::OrtKernelContext instance
1857
+ * \param[in] index See KernelContext_GetInputCount for boundaries check.
1858
+ * \param[out] out OrtValue if the input is present otherwise is set nullptr
1859
+ *
1860
+ * \snippet{doc} snippets.dox OrtStatus Return Value
1838
1861
  */
1839
1862
  ORT_API2_STATUS(KernelContext_GetInput, _In_ const OrtKernelContext* context, _In_ size_t index,
1840
1863
  _Out_ const OrtValue** out);
1841
1864
 
1842
1865
  /** \brief Used for custom operators, get an output of a kernel
1843
1866
  *
1844
- * \see ::OrtCustomOp
1867
+ * The function attempts fetches the output of the kernel. If the output is optional
1868
+ * and not present, the function returns success and out is set to nullptr.
1869
+ *
1870
+ * \param[in] context ::OrtKernelContext instance
1871
+ * \param[in] index See KernelContext_GetOutputCount for boundaries check.
1872
+ * \param[in] dim_values output dimensions
1873
+ * \param[in] dim_count number of dimensions
1874
+ * \param[out] out a ptr to OrtValue to output otherwise set to nullptr
1875
+ *
1876
+ * \snippet{doc} snippets.dox OrtStatus Return Value
1845
1877
  */
1846
1878
  ORT_API2_STATUS(KernelContext_GetOutput, _Inout_ OrtKernelContext* context, _In_ size_t index,
1847
1879
  _In_ const int64_t* dim_values, size_t dim_count, _Outptr_ OrtValue** out);
@@ -2766,7 +2798,7 @@ struct OrtApi {
2766
2798
  * "initial_growth_chunk_size_bytes": (Possible) Size of the second allocation in the arena.
2767
2799
  * Only relevant if arena strategy is `kNextPowerOfTwo`. Use -1 to allow ORT to choose the default.
2768
2800
  * "max_power_of_two_extend_bytes": The maximum enxtend size if arena strategy is `kNextPowerOfTwo`.
2769
- * It is not an allocation limit, it is only a limit for extention when requested byte is less than the limit.
2801
+ * It is not an allocation limit, it is only a limit for extension when requested byte is less than the limit.
2770
2802
  * When requested bytes is more than the limit, allocator will still return as requested.
2771
2803
  * Use -1 to allow ORT to choose the default 1GB for max_power_of_two_extend_bytes.
2772
2804
  * Ultimately, the allocation size is determined by the allocation memory request.
@@ -2914,7 +2946,7 @@ struct OrtApi {
2914
2946
  *
2915
2947
  * Please refer to https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#cc
2916
2948
  * to know the available keys and values. Key should be in null terminated string format of the member of ::OrtTensorRTProviderOptionsV2
2917
- * and value should be its related range.
2949
+ * and value should be its related range. Recreates the options and only sets the supplied values.
2918
2950
  *
2919
2951
  * For example, key="trt_max_workspace_size" and value="2147483648"
2920
2952
  *
@@ -3410,7 +3442,7 @@ struct OrtApi {
3410
3442
  *
3411
3443
  * Please refer to https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
3412
3444
  * to know the available keys and values. Key should be in null terminated string format of the member of ::OrtCUDAProviderOptionsV2
3413
- * and value should be its related range.
3445
+ * and value should be its related range. Recreates the options and only sets the supplied values.
3414
3446
  *
3415
3447
  * For example, key="device_id" and value="0"
3416
3448
  *
@@ -3483,15 +3515,15 @@ struct OrtApi {
3483
3515
  * \param[in] options
3484
3516
  * \param[in] initializer_names Array of null terminated UTF-8 encoded strings of the initializers names.
3485
3517
  * \param[in] initializers Array of ::OrtValue type
3486
- * \param[in] initializers_num Number of elements in the initializer_names and initializers
3518
+ * \param[in] num_initializers Number of elements in the initializer_names and initializers
3487
3519
  *
3488
3520
  * \snippet{doc} snippets.dox OrtStatus Return Value
3489
3521
  *
3490
3522
  * \since Version 1.12.
3491
3523
  */
3492
3524
  ORT_API2_STATUS(AddExternalInitializers, _In_ OrtSessionOptions* options,
3493
- _In_reads_(input_len) const char* const* initializer_names,
3494
- _In_reads_(input_len) const OrtValue* const* initializers, size_t initializers_num);
3525
+ _In_reads_(num_initializers) const char* const* initializer_names,
3526
+ _In_reads_(num_initializers) const OrtValue* const* initializers, size_t num_initializers);
3495
3527
 
3496
3528
  /** \brief: Create attribute of onnxruntime operator
3497
3529
  *
@@ -3595,6 +3627,7 @@ struct OrtApi {
3595
3627
  * QNN supported keys:
3596
3628
  * "backend_path": file path to QNN backend library.
3597
3629
  * "profiling_level": QNN profiling level, options: "off", "basic", "detailed". Default to off.
3630
+ * "profiling_file_path": QNN profiling file path if ETW not enabled.
3598
3631
  * "rpc_control_latency": QNN RPC control latency.
3599
3632
  * "vtcm_mb": QNN VTCM size in MB. default to 0(not set).
3600
3633
  * "htp_performance_mode": QNN performance mode, options: "burst", "balanced", "default", "high_performance",
@@ -3616,6 +3649,10 @@ struct OrtApi {
3616
3649
  * - "73"
3617
3650
  * - "75"
3618
3651
  * "device_id": The ID of the device to use when setting 'htp_arch'. Defaults to "0" (for single device).
3652
+ "enable_htp_fp16_precision": Only used for float32 model.
3653
+ Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision.
3654
+ - "0": Default. With fp32 precision.
3655
+ - "1": With fp16 precision.
3619
3656
  *
3620
3657
  * SNPE supported keys:
3621
3658
  * "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",
@@ -4430,13 +4467,14 @@ struct OrtApi {
4430
4467
  * E.g. a cuda stream or a cublas handle
4431
4468
  *
4432
4469
  * \param context - Kernel context
4433
- * \param resouce_version - Version of the resource
4470
+ * \param resource_version - Version of the resource
4434
4471
  * \param resource_id - Type of resource
4435
4472
  * \param resource - A pointer to returned resource
4436
4473
  *
4437
4474
  * \since Version 1.16.
4438
4475
  */
4439
- ORT_API2_STATUS(KernelContext_GetResource, _In_ const OrtKernelContext* context, _In_ int resouce_version, _In_ int resource_id, _Outptr_ void** resource);
4476
+ ORT_API2_STATUS(KernelContext_GetResource, _In_ const OrtKernelContext* context, _In_ int resource_version,
4477
+ _In_ int resource_id, _Outptr_ void** resource);
4440
4478
 
4441
4479
  /** \brief Set user logging function
4442
4480
  *
@@ -4491,10 +4529,10 @@ struct OrtApi {
4491
4529
  ORT_API2_STATUS(ShapeInferContext_GetAttribute, _In_ const OrtShapeInferContext* context, _In_ const char* attr_name, _Outptr_ const OrtOpAttr** attr);
4492
4530
 
4493
4531
  /**
4494
- * Set type and shape info of an ouput
4532
+ * Set type and shape info of an output
4495
4533
  *
4496
4534
  * \param[in] context
4497
- * \param[in] index The index of the ouput
4535
+ * \param[in] index The index of the output
4498
4536
  * \param[out] info Type shape info of the output
4499
4537
  *
4500
4538
  * \since Version 1.17.
@@ -4566,6 +4604,68 @@ struct OrtApi {
4566
4604
  _In_reads_(num_keys) const char* const* provider_options_keys,
4567
4605
  _In_reads_(num_keys) const char* const* provider_options_values,
4568
4606
  _In_ size_t num_keys);
4607
+
4608
+ /** \brief Append VitisAI provider to session options
4609
+ *
4610
+ * If VitisAI is not available (due to a non VitisAI enabled build, or if VitisAI is not installed on the system), this function will return failure.
4611
+ *
4612
+ * \param[in] options
4613
+ * \param[in] provider_options_keys
4614
+ * \param[in] provider_options_values
4615
+ * \param[in] num_keys
4616
+ *
4617
+ * \snippet{doc} snippets.dox OrtStatus Return Value
4618
+ */
4619
+ ORT_API2_STATUS(SessionOptionsAppendExecutionProvider_VitisAI,
4620
+ _In_ OrtSessionOptions* options,
4621
+ _In_reads_(num_keys) const char* const* provider_options_keys,
4622
+ _In_reads_(num_keys) const char* const* provider_options_values,
4623
+ _In_ size_t num_keys);
4624
+
4625
+ /** \brief Get scratch buffer from the corresponding allocator under the sepcific OrtMemoryInfo object.
4626
+ * NOTE: callers are responsible to release this scratch buffer from the corresponding allocator
4627
+ * \param[in] context OrtKernelContext instance
4628
+ * \param[in] mem_info OrtMemoryInfo instance
4629
+ * \param[in] count_or_bytes How many bytes is this scratch buffer
4630
+ * \param[out] out A pointer to the scrach buffer
4631
+ * \snippet{doc} snippets.dox OrtStatus Return Value
4632
+ */
4633
+ ORT_API2_STATUS(KernelContext_GetScratchBuffer, _In_ const OrtKernelContext* context, _In_ const OrtMemoryInfo* mem_info, _In_ size_t count_or_bytes, _Outptr_ void** out);
4634
+
4635
+ /** \brief Get allocator from KernelInfo for a specific memory type. Please use C API ReleaseAllocator to release out object
4636
+ *
4637
+ * \param[in] info OrtKernelInfo instance
4638
+ * \param[in] mem_type OrtMemType object
4639
+ * \param[out] out A pointer to OrtAllocator
4640
+ *
4641
+ * \snippet{doc} snippets.dox OrtStatus Return Value
4642
+ */
4643
+ ORT_API2_STATUS(KernelInfoGetAllocator, _In_ const OrtKernelInfo* info, _In_ OrtMemType mem_type, _Outptr_ OrtAllocator** out);
4644
+
4645
+ /** \brief Replace initialized Tensors with external data with the provided files in memory
4646
+ *
4647
+ * The function will find the initialized TensorProtos with external data in the graph with the provided
4648
+ * external file names and the file content in memory. The API gets the external file name, offset, data length
4649
+ * from TensorProto, and locate the tensor data from the file in memory buffer.
4650
+ * It creates a Tensor to replace the existing Tensor in graph. The replacement
4651
+ * will occur before any of the optimizations take place. The data will be copied into the graph
4652
+ * since TensorProto can't refer to the user provided buffers.
4653
+ *
4654
+ * \param[in] options
4655
+ * \param[in] external_initializer_file_names Array of null terminated UTF-8 encoded strings of the file names
4656
+ * which holds the external initializers.
4657
+ * \param[in] external_initializer_file_buffer_array Array of pointers to the buffer of the file content.
4658
+ * The buffer can be freed after session creation.
4659
+ * \param[in] external_initializer_file_lengths Array of size_t to indicate the length of file content
4660
+ * \param[in] num_external_initializer_files Number of external files
4661
+ *
4662
+ * \snippet{doc} snippets.dox OrtStatus Return Value
4663
+ */
4664
+ ORT_API2_STATUS(AddExternalInitializersFromFilesInMemory, _In_ OrtSessionOptions* options,
4665
+ _In_reads_(num_external_initializer_files) const ORTCHAR_T* const* external_initializer_file_names,
4666
+ _In_reads_(num_external_initializer_files) char* const* external_initializer_file_buffer_array,
4667
+ _In_reads_(num_external_initializer_files) const size_t* external_initializer_file_lengths,
4668
+ size_t num_external_initializer_files);
4569
4669
  };
4570
4670
 
4571
4671
  /*
@@ -4663,6 +4763,21 @@ struct OrtCustomOp {
4663
4763
  // Get start range
4664
4764
  int(ORT_API_CALL* GetStartVersion)(_In_ const struct OrtCustomOp* op);
4665
4765
  int(ORT_API_CALL* GetEndVersion)(_In_ const struct OrtCustomOp* op);
4766
+
4767
+ // Get the inplace_map that defines which output can reuse which input
4768
+ // Callers will provide 2 raw int* and pass in their address, this function will fill these 2 arrays
4769
+ // when return, output (*output_index)[i] may reuse the input (*input_index[i]).
4770
+ // The return value is the size of these 2 arrays.
4771
+ // Callers are responsible to delete these 2 arrays after use by calling OrtCustomOp::ReleaseMayInplace().
4772
+ size_t(ORT_API_CALL* GetMayInplace)(_Out_ int** input_index, _Out_ int** output_index);
4773
+
4774
+ // Release the pointer input_index and output_index allocated from GetMayInplace() function.
4775
+ // If GetMayInplace() is defined, this function MUST be defined as well.
4776
+ void(ORT_API_CALL* ReleaseMayInplace)(_Frees_ptr_opt_ int* input_index, _Frees_ptr_opt_ int* output_index);
4777
+
4778
+ // Same as GetMayInplace() and ReleaseMayInplace()
4779
+ size_t(ORT_API_CALL* GetAliasMap)(_Out_ int** input_index, _Out_ int** output_index);
4780
+ void(ORT_API_CALL* ReleaseAliasMap)(_Frees_ptr_opt_ int* input_index, _Frees_ptr_opt_ int* output_index);
4666
4781
  };
4667
4782
 
4668
4783
  /*
@@ -873,6 +873,9 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl<T> {
873
873
 
874
874
  SessionOptionsImpl& AddInitializer(const char* name, const OrtValue* ort_val); ///< Wraps OrtApi::AddInitializer
875
875
  SessionOptionsImpl& AddExternalInitializers(const std::vector<std::string>& names, const std::vector<Value>& ort_values); ///< Wraps OrtApi::AddExternalInitializers
876
+ SessionOptionsImpl& AddExternalInitializersFromFilesInMemory(const std::vector<std::basic_string<ORTCHAR_T>>& external_initializer_file_names,
877
+ const std::vector<char*>& external_initializer_file_buffer_array,
878
+ const std::vector<size_t>& external_initializer_file_lengths); ///< Wraps OrtApi::AddExternalInitializersFromFilesInMemory
876
879
 
877
880
  SessionOptionsImpl& AppendExecutionProvider_CUDA(const OrtCUDAProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA
878
881
  SessionOptionsImpl& AppendExecutionProvider_CUDA_V2(const OrtCUDAProviderOptionsV2& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA_V2
@@ -901,6 +904,9 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl<T> {
901
904
  SessionOptionsImpl& RegisterCustomOpsLibrary(const ORTCHAR_T* library_name, const CustomOpConfigs& custom_op_configs = {});
902
905
 
903
906
  SessionOptionsImpl& RegisterCustomOpsUsingFunction(const char* function_name); ///< Wraps OrtApi::RegisterCustomOpsUsingFunction
907
+
908
+ ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_VitisAI
909
+ SessionOptionsImpl& AppendExecutionProvider_VitisAI(const std::unordered_map<std::string, std::string>& provider_options = {});
904
910
  };
905
911
  } // namespace detail
906
912
 
@@ -2052,7 +2058,11 @@ struct KernelContext {
2052
2058
  explicit KernelContext(OrtKernelContext* context);
2053
2059
  size_t GetInputCount() const;
2054
2060
  size_t GetOutputCount() const;
2061
+ // If input is optional and is not present, the method returns en empty ConstValue
2062
+ // which can be compared to nullptr.
2055
2063
  ConstValue GetInput(size_t index) const;
2064
+ // If outout is optional and is not present, the method returns en empty UnownedValue
2065
+ // which can be compared to nullptr.
2056
2066
  UnownedValue GetOutput(size_t index, const int64_t* dim_values, size_t dim_count) const;
2057
2067
  UnownedValue GetOutput(size_t index, const std::vector<int64_t>& dims) const;
2058
2068
  void* GetGPUComputeStream() const;
@@ -2165,8 +2175,8 @@ struct Op : detail::Base<OrtOp> {
2165
2175
  /// </summary>
2166
2176
  struct ShapeInferContext {
2167
2177
  struct SymbolicInteger {
2168
- SymbolicInteger(int64_t i) : i_(i), is_int_(true){};
2169
- SymbolicInteger(const char* s) : s_(s), is_int_(false){};
2178
+ SymbolicInteger(int64_t i) : i_(i), is_int_(true) {};
2179
+ SymbolicInteger(const char* s) : s_(s), is_int_(false) {};
2170
2180
  SymbolicInteger(const SymbolicInteger&) = default;
2171
2181
  SymbolicInteger(SymbolicInteger&&) = default;
2172
2182
 
@@ -2206,7 +2216,7 @@ struct ShapeInferContext {
2206
2216
 
2207
2217
  size_t GetInputCount() const { return input_shapes_.size(); }
2208
2218
 
2209
- Status SetOutputShape(size_t indice, const Shape& shape);
2219
+ Status SetOutputShape(size_t indice, const Shape& shape, ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT);
2210
2220
 
2211
2221
  int64_t GetAttrInt(const char* attr_name);
2212
2222
 
@@ -2294,6 +2304,11 @@ struct CustomOpBase : OrtCustomOp {
2294
2304
  OrtCustomOp::GetEndVersion = [](const OrtCustomOp* this_) {
2295
2305
  return static_cast<const TOp*>(this_)->end_ver_;
2296
2306
  };
2307
+
2308
+ OrtCustomOp::GetMayInplace = nullptr;
2309
+ OrtCustomOp::ReleaseMayInplace = nullptr;
2310
+ OrtCustomOp::GetAliasMap = nullptr;
2311
+ OrtCustomOp::ReleaseAliasMap = nullptr;
2297
2312
  }
2298
2313
 
2299
2314
  // Default implementation of GetExecutionProviderType that returns nullptr to default to the CPU provider
@@ -7,17 +7,27 @@
7
7
  // These are the inline implementations of the C++ header APIs. They're in this separate file as to not clutter
8
8
  // the main C++ file with implementation details.
9
9
 
10
- #include <cstring>
10
+ #include <algorithm>
11
11
  #include <functional>
12
-
13
- #define RETURN_ON_API_FAIL(expression) \
14
- { \
15
- auto err = (expression); \
16
- if (err) { \
17
- return Status(err); \
18
- } \
12
+ #include <iterator>
13
+ #include <type_traits>
14
+
15
+ // Convert OrtStatus to Ort::Status and return
16
+ // instead of throwing
17
+ #define ORT_CXX_RETURN_ON_API_FAIL(expression) \
18
+ { \
19
+ auto ort_status = (expression); \
20
+ if (ort_status) { \
21
+ return Ort::Status(ort_status); \
22
+ } \
19
23
  }
20
24
 
25
+ #ifdef __cpp_if_constexpr
26
+ #define ORT_CXX_IF_CONSTEXPR if constexpr
27
+ #else
28
+ #define ORT_CXX_IF_CONSTEXPR if
29
+ #endif
30
+
21
31
  namespace Ort {
22
32
 
23
33
  namespace detail {
@@ -771,6 +781,27 @@ inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AddExternalInitializers(con
771
781
  return *this;
772
782
  }
773
783
 
784
+ template <typename T>
785
+ inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AddExternalInitializersFromFilesInMemory(const std::vector<std::basic_string<ORTCHAR_T>>& file_names,
786
+ const std::vector<char*>& buffer_array,
787
+ const std::vector<size_t>& file_lengths) {
788
+ const size_t inputs_num = file_names.size();
789
+ if (inputs_num != buffer_array.size()) {
790
+ ORT_CXX_API_THROW("Expecting names and buffer_array to have the same length", ORT_INVALID_ARGUMENT);
791
+ }
792
+ if (inputs_num != file_lengths.size()) {
793
+ ORT_CXX_API_THROW("Expecting names and file_lengths to have the same length", ORT_INVALID_ARGUMENT);
794
+ }
795
+ std::vector<const ORTCHAR_T*> names_ptr;
796
+ names_ptr.reserve(inputs_num);
797
+ for (size_t i = 0; i < inputs_num; ++i) {
798
+ names_ptr.push_back(file_names[i].c_str());
799
+ }
800
+ ThrowOnError(GetApi().AddExternalInitializersFromFilesInMemory(this->p_, names_ptr.data(), buffer_array.data(),
801
+ file_lengths.data(), inputs_num));
802
+ return *this;
803
+ }
804
+
774
805
  template <typename T>
775
806
  inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AppendExecutionProvider_CUDA(const OrtCUDAProviderOptions& provider_options) {
776
807
  ThrowOnError(GetApi().SessionOptionsAppendExecutionProvider_CUDA(this->p_, &provider_options));
@@ -885,6 +916,25 @@ inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AppendExecutionProvider_Ope
885
916
  return *this;
886
917
  }
887
918
 
919
+ template <typename T>
920
+ inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AppendExecutionProvider_VitisAI(const std::unordered_map<std::string, std::string>& provider_options) {
921
+ auto num_entries = provider_options.size();
922
+ std::vector<const char*> keys, values;
923
+ if (num_entries > 0) {
924
+ keys.reserve(num_entries);
925
+ values.reserve(num_entries);
926
+
927
+ for (const auto& entry : provider_options) {
928
+ keys.push_back(entry.first.c_str());
929
+ values.push_back(entry.second.c_str());
930
+ }
931
+ }
932
+
933
+ ThrowOnError(GetApi().SessionOptionsAppendExecutionProvider_VitisAI(this->p_, keys.data(), values.data(), num_entries));
934
+
935
+ return *this;
936
+ }
937
+
888
938
  template <typename T>
889
939
  inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::RegisterCustomOpsLibrary(const ORTCHAR_T* library_name,
890
940
  const CustomOpConfigs& custom_op_configs) {
@@ -1932,7 +1982,9 @@ inline ShapeInferContext::ShapeInferContext(const OrtApi* ort_api,
1932
1982
  TensorTypeAndShapeInfo type_shape_info(info);
1933
1983
  auto integer_shape = type_shape_info.GetShape();
1934
1984
  std::vector<const char*> symbolic_shape(integer_shape.size(), {});
1935
- type_shape_info.GetSymbolicDimensions(&symbolic_shape[0], integer_shape.size());
1985
+ if (!integer_shape.empty()) {
1986
+ type_shape_info.GetSymbolicDimensions(&symbolic_shape[0], integer_shape.size());
1987
+ }
1936
1988
  Shape shape;
1937
1989
  for (size_t ith = 0; ith < integer_shape.size(); ++ith) {
1938
1990
  if (symbolic_shape[ith] && std::string{symbolic_shape[ith]}.size() > 0) {
@@ -1946,9 +1998,10 @@ inline ShapeInferContext::ShapeInferContext(const OrtApi* ort_api,
1946
1998
  }
1947
1999
  }
1948
2000
 
1949
- inline Status ShapeInferContext::SetOutputShape(size_t indice, const Shape& shape) {
2001
+ inline Status ShapeInferContext::SetOutputShape(size_t indice, const Shape& shape, ONNXTensorElementDataType type) {
1950
2002
  OrtTensorTypeAndShapeInfo* info = {};
1951
- RETURN_ON_API_FAIL(ort_api_->CreateTensorTypeAndShapeInfo(&info));
2003
+ ORT_CXX_RETURN_ON_API_FAIL(ort_api_->CreateTensorTypeAndShapeInfo(&info));
2004
+ ORT_CXX_RETURN_ON_API_FAIL(ort_api_->SetTensorElementType(info, type));
1952
2005
 
1953
2006
  using InfoPtr = std::unique_ptr<OrtTensorTypeAndShapeInfo, std::function<void(OrtTensorTypeAndShapeInfo*)>>;
1954
2007
 
@@ -1961,7 +2014,7 @@ inline Status ShapeInferContext::SetOutputShape(size_t indice, const Shape& shap
1961
2014
 
1962
2015
  for (const auto dim : shape) {
1963
2016
  if (dim.IsInt()) {
1964
- integer_dims.push_back(dim.IsInt());
2017
+ integer_dims.push_back(dim.AsInt());
1965
2018
  symbolic_dims.push_back("");
1966
2019
  } else {
1967
2020
  if (!dim.AsSym() || std::string{dim.AsSym()}.empty()) {
@@ -1972,9 +2025,9 @@ inline Status ShapeInferContext::SetOutputShape(size_t indice, const Shape& shap
1972
2025
  }
1973
2026
  }
1974
2027
 
1975
- RETURN_ON_API_FAIL(ort_api_->SetDimensions(info, integer_dims.data(), integer_dims.size()));
1976
- RETURN_ON_API_FAIL(ort_api_->SetSymbolicDimensions(info, symbolic_dims.data(), symbolic_dims.size()));
1977
- RETURN_ON_API_FAIL(ort_api_->ShapeInferContext_SetOutputTypeShape(ctx_, indice, info));
2028
+ ORT_CXX_RETURN_ON_API_FAIL(ort_api_->SetDimensions(info, integer_dims.data(), integer_dims.size()));
2029
+ ORT_CXX_RETURN_ON_API_FAIL(ort_api_->SetSymbolicDimensions(info, symbolic_dims.data(), symbolic_dims.size()));
2030
+ ORT_CXX_RETURN_ON_API_FAIL(ort_api_->ShapeInferContext_SetOutputTypeShape(ctx_, indice, info));
1978
2031
  return Status{nullptr};
1979
2032
  }
1980
2033