com.github.asus4.onnxruntime 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/Plugins/Android/onnxruntime-android.aar +0 -0
  2. package/Plugins/Linux/x64/libonnxruntime.so +0 -0
  3. package/Plugins/Windows/x64/onnxruntime.dll +0 -0
  4. package/Plugins/iOS~/onnxruntime.xcframework/Info.plist +8 -8
  5. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/coreml_provider_factory.h +6 -1
  6. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_c_api.h +92 -7
  7. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_cxx_api.h +48 -0
  8. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_cxx_inline.h +42 -0
  9. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_float16.h +0 -5
  10. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_session_options_config_keys.h +10 -0
  11. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Info.plist +2 -2
  12. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/onnxruntime +0 -0
  13. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/coreml_provider_factory.h +6 -1
  14. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_c_api.h +92 -7
  15. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_cxx_api.h +48 -0
  16. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_cxx_inline.h +42 -0
  17. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_float16.h +0 -5
  18. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_session_options_config_keys.h +10 -0
  19. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Info.plist +2 -2
  20. package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/onnxruntime +0 -0
  21. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/coreml_provider_factory.h +6 -1
  22. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/onnxruntime_c_api.h +92 -7
  23. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/onnxruntime_cxx_api.h +48 -0
  24. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/onnxruntime_cxx_inline.h +42 -0
  25. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/onnxruntime_float16.h +0 -5
  26. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/onnxruntime_session_options_config_keys.h +10 -0
  27. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Resources/Info.plist +2 -2
  28. package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/onnxruntime +0 -0
  29. package/Plugins/macOS/libonnxruntime.dylib +0 -0
  30. package/README.md +5 -5
  31. package/Runtime/AssemblyInfo.shared.cs +1 -2
  32. package/Runtime/DisposableNamedOnnxValue.shared.cs +4 -4
  33. package/Runtime/FixedBufferOnnxValue.shared.cs +2 -2
  34. package/Runtime/InferenceSession.shared.cs +2 -3
  35. package/Runtime/ManagedProjections.shared.cs +1 -1
  36. package/Runtime/NamedOnnxValue.shared.cs +6 -17
  37. package/Runtime/NativeMethods.shared.cs +87 -10
  38. package/Runtime/NativeOnnxValueHelper.shared.cs +1 -1
  39. package/Runtime/OrtEnv.shared.cs +1 -1
  40. package/Runtime/OrtFloat16.shared.cs +42 -39
  41. package/Runtime/OrtIoBinding.shared.cs +1 -1
  42. package/Runtime/OrtLoraAdapter.shared.cs +81 -0
  43. package/Runtime/OrtLoraAdapter.shared.cs.meta +11 -0
  44. package/Runtime/OrtValue.shared.cs +3 -3
  45. package/Runtime/OrtValueTensor.shared.cs +1 -1
  46. package/Runtime/ProviderOptions.shared.cs +8 -5
  47. package/Runtime/RunOptions.shared.cs +12 -0
  48. package/Runtime/SessionOptions.shared.cs +5 -5
  49. package/Runtime/Tensors/ArrayTensorExtensions.shared.cs +1 -1
  50. package/Runtime/Training/NativeTrainingMethods.shared.cs +1 -1
  51. package/Runtime/Training/TrainingSession.shared.cs +2 -2
  52. package/package.json +1 -1
Binary file
Binary file
@@ -6,7 +6,7 @@
6
6
  <array>
7
7
  <dict>
8
8
  <key>LibraryIdentifier</key>
9
- <string>macos-arm64_x86_64</string>
9
+ <string>ios-arm64_x86_64-simulator</string>
10
10
  <key>LibraryPath</key>
11
11
  <string>onnxruntime.framework</string>
12
12
  <key>SupportedArchitectures</key>
@@ -15,34 +15,34 @@
15
15
  <string>x86_64</string>
16
16
  </array>
17
17
  <key>SupportedPlatform</key>
18
- <string>macos</string>
18
+ <string>ios</string>
19
+ <key>SupportedPlatformVariant</key>
20
+ <string>simulator</string>
19
21
  </dict>
20
22
  <dict>
21
23
  <key>LibraryIdentifier</key>
22
- <string>ios-arm64_x86_64-simulator</string>
24
+ <string>ios-arm64</string>
23
25
  <key>LibraryPath</key>
24
26
  <string>onnxruntime.framework</string>
25
27
  <key>SupportedArchitectures</key>
26
28
  <array>
27
29
  <string>arm64</string>
28
- <string>x86_64</string>
29
30
  </array>
30
31
  <key>SupportedPlatform</key>
31
32
  <string>ios</string>
32
- <key>SupportedPlatformVariant</key>
33
- <string>simulator</string>
34
33
  </dict>
35
34
  <dict>
36
35
  <key>LibraryIdentifier</key>
37
- <string>ios-arm64</string>
36
+ <string>macos-arm64_x86_64</string>
38
37
  <key>LibraryPath</key>
39
38
  <string>onnxruntime.framework</string>
40
39
  <key>SupportedArchitectures</key>
41
40
  <array>
42
41
  <string>arm64</string>
42
+ <string>x86_64</string>
43
43
  </array>
44
44
  <key>SupportedPlatform</key>
45
- <string>ios</string>
45
+ <string>macos</string>
46
46
  </dict>
47
47
  </array>
48
48
  <key>CFBundlePackageType</key>
@@ -31,9 +31,14 @@ enum COREMLFlags {
31
31
  // Create an MLProgram. By default it will create a NeuralNetwork model. Requires Core ML 5 or later.
32
32
  COREML_FLAG_CREATE_MLPROGRAM = 0x010,
33
33
 
34
+ // Exclude ANE as sometimes this decrease performance
35
+ // https://developer.apple.com/documentation/coreml/mlcomputeunits?language=objc
36
+ // there are four compute units:
37
+ // MLComputeUnitsCPUAndNeuralEngine|MLComputeUnitsCPUAndGPU|MLComputeUnitsCPUOnly|MLComputeUnitsAll
38
+ COREML_FLAG_USE_CPU_AND_GPU = 0x020,
34
39
  // Keep COREML_FLAG_LAST at the end of the enum definition
35
40
  // And assign the last COREMLFlag to it
36
- COREML_FLAG_LAST = COREML_FLAG_CREATE_MLPROGRAM,
41
+ COREML_FLAG_LAST = COREML_FLAG_USE_CPU_AND_GPU,
37
42
  };
38
43
 
39
44
  #ifdef __cplusplus
@@ -38,7 +38,7 @@
38
38
  *
39
39
  * This value is used by some API functions to behave as this version of the header expects.
40
40
  */
41
- #define ORT_API_VERSION 19
41
+ #define ORT_API_VERSION 20
42
42
 
43
43
  #ifdef __cplusplus
44
44
  extern "C" {
@@ -304,6 +304,7 @@ ORT_RUNTIME_CLASS(Op);
304
304
  ORT_RUNTIME_CLASS(OpAttr);
305
305
  ORT_RUNTIME_CLASS(Logger);
306
306
  ORT_RUNTIME_CLASS(ShapeInferContext);
307
+ ORT_RUNTIME_CLASS(LoraAdapter);
307
308
 
308
309
  #ifdef _WIN32
309
310
  typedef _Return_type_success_(return == 0) OrtStatus* OrtStatusPtr;
@@ -621,6 +622,7 @@ typedef struct OrtMIGraphXProviderOptions {
621
622
  const char* migraphx_save_model_path; // migraphx model path name
622
623
  int migraphx_load_compiled_model; // migraphx int8 cal table. Default 0 = false, noznero = true
623
624
  const char* migraphx_load_model_path; // migraphx model path name
625
+ bool migraphx_exhaustive_tune; // migraphx tuned compile Default = false
624
626
  } OrtMIGraphXProviderOptions;
625
627
 
626
628
  /** \brief OpenVINO Provider Options
@@ -643,7 +645,7 @@ typedef struct OrtOpenVINOProviderOptions {
643
645
  * Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
644
646
  */
645
647
  const char* device_type;
646
- unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled
648
+ unsigned char enable_npu_fast_compile;
647
649
  const char* device_id;
648
650
  size_t num_of_threads; ///< 0 = Use default number of threads
649
651
  const char* cache_dir; // path is set to empty by default
@@ -3649,10 +3651,17 @@ struct OrtApi {
3649
3651
  * - "73"
3650
3652
  * - "75"
3651
3653
  * "device_id": The ID of the device to use when setting 'htp_arch'. Defaults to "0" (for single device).
3652
- "enable_htp_fp16_precision": Only used for float32 model.
3653
- Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision.
3654
- - "0": Default. With fp32 precision.
3655
- - "1": With fp16 precision.
3654
+ * "enable_htp_fp16_precision": Used for float32 model for HTP backend.
3655
+ * Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision.
3656
+ * - "0": With fp32 precision.
3657
+ * - "1": Default. With fp16 precision.
3658
+ * "enable_htp_weight_sharing": Enable QNN weight sharing feature while compiling multiple graphs into one QNN context.
3659
+ * - "0": Default. Disabled.
3660
+ * - "1": Enabled.
3661
+ * "offload_graph_io_quantization": Offload graph input quantization and graph output dequantization to another
3662
+ * execution provider (typically CPU EP).
3663
+ * - "0": Default. Disabled. QNN EP will handle quantization and dequantization of graph I/O.
3664
+ * - "1": Enabled.
3656
3665
  *
3657
3666
  * SNPE supported keys:
3658
3667
  * "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",
@@ -3778,7 +3787,7 @@ struct OrtApi {
3778
3787
 
3779
3788
  /** \brief Release an OrtCANNProviderOptions
3780
3789
  *
3781
- * \param[in] the pointer of OrtCANNProviderOptions which will been deleted
3790
+ * \param[in] input The pointer of OrtCANNProviderOptions which will been deleted
3782
3791
  *
3783
3792
  * \since Version 1.13.
3784
3793
  */
@@ -4666,6 +4675,82 @@ struct OrtApi {
4666
4675
  _In_reads_(num_external_initializer_files) char* const* external_initializer_file_buffer_array,
4667
4676
  _In_reads_(num_external_initializer_files) const size_t* external_initializer_file_lengths,
4668
4677
  size_t num_external_initializer_files);
4678
+
4679
+ /** \brief Create an OrtLoraAdapter
4680
+ *
4681
+ * The function attempts to locate file specified by adapter_file_path, read it and create an OrtLoraAdapter
4682
+ * instance. The adapter_file_path should be a valid path to a file that contains a valid Lora Adapter
4683
+ * format. The function attempts to validate the format at load time. The file will always be memory mapped, unless
4684
+ * the platform does not support memory mapping, in which case the file will be read into memory.
4685
+ *
4686
+ * \param[in] adapter_file_path adapter file path.
4687
+ * \param[in] allocator optional pointer to a device allocator. If specified
4688
+ * data is copied to the device at some point before Run() is invoked. If nullptr, data stays on CPU.
4689
+ * The data would still be copied to device if required by the model at inference time.
4690
+ * \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with
4691
+ * OrtApi::ReleaseLoraAdapter.
4692
+ *
4693
+ * \snippet{doc} snippets.dox OrtStatus Return Value
4694
+ */
4695
+ ORT_API2_STATUS(CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator,
4696
+ _Outptr_ OrtLoraAdapter** out);
4697
+
4698
+ /** \brief Create an OrtLoraAdapter
4699
+ *
4700
+ * The function copies the bytes from the array and creates an OrtLoraAdapter instance.
4701
+ *
4702
+ *
4703
+ * \param[in] bytes pointer to a valid Lora Adapter format buffer.
4704
+ * \param[in] num_bytes length of bytes buffer.
4705
+ * \param[in] allocator optional pointer to a device allocator. If specified
4706
+ * data is copied to the device at some point before Run() is invoked. If nullptr, data stays on CPU.
4707
+ * The data would still be copied to device if required by the model at inference time.
4708
+ * \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with
4709
+ * OrtApi::ReleaseLoraAdapter.
4710
+ *
4711
+ * \snippet{doc} snippets.dox OrtStatus Return Value
4712
+ */
4713
+ ORT_API2_STATUS(CreateLoraAdapterFromArray, _In_ const void* bytes, size_t num_bytes, _In_ OrtAllocator* allocator,
4714
+ _Outptr_ OrtLoraAdapter** out);
4715
+
4716
+ /** \brief Release an ::OrtLoraAdapter obtained from OrtApi::CreateLoraAdapter
4717
+ */
4718
+ ORT_CLASS_RELEASE(LoraAdapter);
4719
+
4720
+ /** \brief Add the Lora Adapter to the list of active adapters.
4721
+ *
4722
+ * The function adds the Lora Adapter to the list of active adapters. The Lora Adapter must be created with
4723
+ * OrtApi::CreateLoraAdapter or FromArray. The Lora Adapter will be used by the session to run the model.
4724
+ * The instance of the OrtRunOptions can then be used to customize the Run() calls.
4725
+ * More than one OrtLoraAdapter can be active at the same time. Lora Parameters that belong to different
4726
+ * Lora adapters that will be active at the same time must not overlap.
4727
+ * This setting does not affect RunWithBinding.
4728
+ *
4729
+ * \param[in] options OrtRunOptions instance
4730
+ * \param[in] adapter OrtLoraAdapter instance
4731
+ *
4732
+ * \snippet{doc} snippets.dox OrtStatus Return Value
4733
+ */
4734
+ ORT_API2_STATUS(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter);
4735
+
4736
+ /// @}
4737
+ /// \name OrtEpDynamicOptions
4738
+ /// @{
4739
+
4740
+ /** \brief Set DynamicOptions for EPs (Execution Providers)
4741
+ *
4742
+ * Valid options can be found in `include\onnxruntime\core\session\onnxruntime_session_options_config_keys.h`
4743
+ * Look for `kOrtEpDynamicOptions`
4744
+ *
4745
+ * \param[in] sess OrtSession
4746
+ * \param[in] keys Array of null terminated UTF8 encoded strings of EP dynamic option keys
4747
+ * \param[in] values Array of null terminated UTF8 encoded string of EP dynamic option values
4748
+ * \param[in] kv_len Number of elements in the keys and values arrays
4749
+ *
4750
+ * \snippet{doc} snippets.dox OrtStatus Return Value
4751
+ */
4752
+ ORT_API2_STATUS(SetEpDynamicOptions, _Inout_ OrtSession* sess, _In_reads_(kv_len) const char* const* keys,
4753
+ _In_reads_(kv_len) const char* const* values, _In_ size_t kv_len);
4669
4754
  };
4670
4755
 
4671
4756
  /*
@@ -508,6 +508,7 @@ ORT_DEFINE_RELEASE(CustomOpDomain);
508
508
  ORT_DEFINE_RELEASE(ThreadingOptions);
509
509
  ORT_DEFINE_RELEASE(Env);
510
510
  ORT_DEFINE_RELEASE(RunOptions);
511
+ ORT_DEFINE_RELEASE(LoraAdapter);
511
512
  ORT_DEFINE_RELEASE(Session);
512
513
  ORT_DEFINE_RELEASE(SessionOptions);
513
514
  ORT_DEFINE_RELEASE(TensorTypeAndShapeInfo);
@@ -736,6 +737,32 @@ struct CustomOpDomain : detail::Base<OrtCustomOpDomain> {
736
737
  void Add(const OrtCustomOp* op); ///< Wraps CustomOpDomain_Add
737
738
  };
738
739
 
740
+ /// \brief LoraAdapter holds a set of Lora Parameters loaded from a single file
741
+ struct LoraAdapter : detail::Base<OrtLoraAdapter> {
742
+ using Base = detail::Base<OrtLoraAdapter>;
743
+ using Base::Base;
744
+
745
+ explicit LoraAdapter(std::nullptr_t) {} ///< Create an empty LoraAdapter object, must be assigned a valid one to be used
746
+ /// \brief Wraps OrtApi::CreateLoraAdapter
747
+ ///
748
+ /// The function attempts to load the adapter from the specified file
749
+ /// \param adapter_path The path to the Lora adapter
750
+ /// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still
751
+ /// be copied to device if required by the model at inference time.
752
+ static LoraAdapter CreateLoraAdapter(const std::basic_string<ORTCHAR_T>& adapter_path,
753
+ OrtAllocator* allocator);
754
+
755
+ /// \brief Wraps OrtApi::CreateLoraAdapterFromArray
756
+ ///
757
+ /// The function attempts to load the adapter from the specified byte array.
758
+ /// \param bytes The byte array containing file LoraAdapter format
759
+ /// \param num_bytes The number of bytes in the byte array
760
+ /// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still
761
+ /// be copied to device if required by the model at inference time.
762
+ static LoraAdapter CreateLoraAdapterFromArray(const void* bytes, size_t num_bytes,
763
+ OrtAllocator* allocator);
764
+ };
765
+
739
766
  /** \brief RunOptions
740
767
  *
741
768
  */
@@ -766,6 +793,14 @@ struct RunOptions : detail::Base<OrtRunOptions> {
766
793
  * Wraps OrtApi::RunOptionsUnsetTerminate
767
794
  */
768
795
  RunOptions& UnsetTerminate();
796
+
797
+ /** \brief Add the LoraAdapter to the list of active adapters.
798
+ * The setting does not affect RunWithBinding() calls.
799
+ *
800
+ * Wraps OrtApi::RunOptionsAddActiveLoraAdapter
801
+ * \param adapter The LoraAdapter to be used as the active adapter
802
+ */
803
+ RunOptions& AddActiveLoraAdapter(const LoraAdapter& adapter);
769
804
  };
770
805
 
771
806
  namespace detail {
@@ -1105,6 +1140,19 @@ struct SessionImpl : ConstSessionImpl<T> {
1105
1140
  * The OrtAllocator instances must be valid at the point of memory release.
1106
1141
  */
1107
1142
  AllocatedStringPtr EndProfilingAllocated(OrtAllocator* allocator); ///< Wraps OrtApi::SessionEndProfiling
1143
+
1144
+ /** \brief Set DynamicOptions for EPs (Execution Providers)
1145
+ *
1146
+ * Wraps OrtApi::SetEpDynamicOptions
1147
+ *
1148
+ * Valid options can be found in `include\onnxruntime\core\session\onnxruntime_session_options_config_keys.h`
1149
+ * Look for `kOrtEpDynamicOptions`
1150
+ *
1151
+ * \param[in] keys Array of null terminated UTF8 encoded strings of EP dynamic option keys
1152
+ * \param[in] values Array of null terminated UTF8 encoded string of EP dynamic option values
1153
+ * \param[in] kv_len Number of elements in the keys and values arrays
1154
+ */
1155
+ void SetEpDynamicOptions(const char* const* keys, const char* const* values, size_t kv_len);
1108
1156
  };
1109
1157
 
1110
1158
  } // namespace detail
@@ -557,6 +557,20 @@ inline void CustomOpDomain::Add(const OrtCustomOp* op) {
557
557
  ThrowOnError(GetApi().CustomOpDomain_Add(p_, op));
558
558
  }
559
559
 
560
+ inline LoraAdapter LoraAdapter::CreateLoraAdapter(const std::basic_string<ORTCHAR_T>& adapter_path,
561
+ OrtAllocator* allocator) {
562
+ OrtLoraAdapter* p;
563
+ ThrowOnError(GetApi().CreateLoraAdapter(adapter_path.c_str(), allocator, &p));
564
+ return LoraAdapter{p};
565
+ }
566
+
567
+ inline LoraAdapter LoraAdapter::CreateLoraAdapterFromArray(const void* bytes, size_t num_bytes,
568
+ OrtAllocator* allocator) {
569
+ OrtLoraAdapter* p;
570
+ ThrowOnError(GetApi().CreateLoraAdapterFromArray(bytes, num_bytes, allocator, &p));
571
+ return LoraAdapter{p};
572
+ }
573
+
560
574
  inline RunOptions::RunOptions() {
561
575
  ThrowOnError(GetApi().CreateRunOptions(&p_));
562
576
  }
@@ -609,6 +623,11 @@ inline RunOptions& RunOptions::UnsetTerminate() {
609
623
  return *this;
610
624
  }
611
625
 
626
+ inline RunOptions& RunOptions::AddActiveLoraAdapter(const LoraAdapter& adapter) {
627
+ ThrowOnError(GetApi().RunOptionsAddActiveLoraAdapter(p_, adapter));
628
+ return *this;
629
+ }
630
+
612
631
  namespace detail {
613
632
 
614
633
  template <typename T>
@@ -1074,6 +1093,11 @@ inline AllocatedStringPtr SessionImpl<T>::EndProfilingAllocated(OrtAllocator* al
1074
1093
  return AllocatedStringPtr(out, detail::AllocatedFree(allocator));
1075
1094
  }
1076
1095
 
1096
+ template <typename T>
1097
+ inline void SessionImpl<T>::SetEpDynamicOptions(const char* const* keys, const char* const* values, size_t kv_len) {
1098
+ ThrowOnError(GetApi().SetEpDynamicOptions(this->p_, keys, values, kv_len));
1099
+ }
1100
+
1077
1101
  } // namespace detail
1078
1102
 
1079
1103
  inline SessionOptions::SessionOptions() {
@@ -2044,6 +2068,9 @@ inline ShapeInferContext::Ints ShapeInferContext::GetAttrInts(const char* attr_n
2044
2068
  int64_t i = {};
2045
2069
  size_t out = {};
2046
2070
  // first call to get the bytes needed
2071
+ // 1. A status == nullptr means that ReadOpAttr was successful. A status != nullptr means failure.
2072
+ // 2. The ReadOpAttr function should normally be called twice: once to get the needed buffer size (returns a status != nullptr), and a second time to actually read the ints (returns status == null on success).
2073
+ // 3. This code tries a subtle optimization in the first call to ReadOpAttr. It passes in a buffer (&i) of size 1 just in case there is only 1 int. In this case, status == nullptr and we need to return {i}.
2047
2074
  auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_INTS, &i, sizeof(i), &out);
2048
2075
  if (status) {
2049
2076
  size_t num_i = out / sizeof(int64_t);
@@ -2051,6 +2078,9 @@ inline ShapeInferContext::Ints ShapeInferContext::GetAttrInts(const char* attr_n
2051
2078
  Ort::ThrowOnError(ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_INTS, ints.data(), out, &out));
2052
2079
  return ints;
2053
2080
  } else {
2081
+ if (out == 0u) {
2082
+ return {};
2083
+ }
2054
2084
  return {i};
2055
2085
  }
2056
2086
  }
@@ -2068,6 +2098,9 @@ inline ShapeInferContext::Floats ShapeInferContext::GetAttrFloats(const char* at
2068
2098
  float f = {};
2069
2099
  size_t out = {};
2070
2100
  // first call to get the bytes needed
2101
+ // 1. A status == nullptr means that ReadOpAttr was successful. A status != nullptr means failure.
2102
+ // 2. The ReadOpAttr function should normally be called twice: once to get the needed buffer size (returns a status != nullptr), and a second time to actually read the ints (returns status == null on success).
2103
+ // 3. This code tries a subtle optimization in the first call to ReadOpAttr. It passes in a buffer (&i) of size 1 just in case there is only 1 int. In this case, status == nullptr and we need to return {i}.
2071
2104
  auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_FLOATS, &f, sizeof(f), &out);
2072
2105
  if (status) {
2073
2106
  size_t num_f = out / sizeof(float);
@@ -2075,6 +2108,9 @@ inline ShapeInferContext::Floats ShapeInferContext::GetAttrFloats(const char* at
2075
2108
  Ort::ThrowOnError(ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_FLOATS, floats.data(), out, &out));
2076
2109
  return floats;
2077
2110
  } else {
2111
+ if (out == 0u) {
2112
+ return {};
2113
+ }
2078
2114
  return {f};
2079
2115
  }
2080
2116
  }
@@ -2099,6 +2135,9 @@ inline ShapeInferContext::Strings ShapeInferContext::GetAttrStrings(const char*
2099
2135
  char c = {};
2100
2136
  size_t out = {};
2101
2137
  // first call to get the bytes needed
2138
+ // 1. A status == nullptr means that ReadOpAttr was successful. A status != nullptr means failure.
2139
+ // 2. The ReadOpAttr function should normally be called twice: once to get the needed buffer size (returns a status != nullptr), and a second time to actually read the ints (returns status == null on success).
2140
+ // 3. This code tries a subtle optimization in the first call to ReadOpAttr. It passes in a buffer (&i) of size 1 just in case there is only 1 int. In this case, status == nullptr and we need to return {i}.
2102
2141
  auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_STRINGS, &c, sizeof(char), &out);
2103
2142
  if (status) {
2104
2143
  std::vector<char> chars(out, '\0');
@@ -2115,6 +2154,9 @@ inline ShapeInferContext::Strings ShapeInferContext::GetAttrStrings(const char*
2115
2154
  }
2116
2155
  return strings;
2117
2156
  } else {
2157
+ if (out == 0u) {
2158
+ return {};
2159
+ }
2118
2160
  return {std::string{c}};
2119
2161
  }
2120
2162
  }
@@ -75,8 +75,6 @@ struct Float16Impl {
75
75
  static constexpr uint16_t kNegativeInfinityBits = 0xFC00U;
76
76
  static constexpr uint16_t kPositiveQNaNBits = 0x7E00U;
77
77
  static constexpr uint16_t kNegativeQNaNBits = 0xFE00U;
78
- static constexpr uint16_t kEpsilonBits = 0x4170U;
79
- static constexpr uint16_t kMinValueBits = 0xFBFFU; // Minimum normal number
80
78
  static constexpr uint16_t kMaxValueBits = 0x7BFFU; // Largest normal number
81
79
  static constexpr uint16_t kOneBits = 0x3C00U;
82
80
  static constexpr uint16_t kMinusOneBits = 0xBC00U;
@@ -364,9 +362,6 @@ struct BFloat16Impl {
364
362
  static constexpr uint16_t kNegativeInfinityBits = 0xFF80U;
365
363
  static constexpr uint16_t kPositiveQNaNBits = 0x7FC1U;
366
364
  static constexpr uint16_t kNegativeQNaNBits = 0xFFC1U;
367
- static constexpr uint16_t kSignaling_NaNBits = 0x7F80U;
368
- static constexpr uint16_t kEpsilonBits = 0x0080U;
369
- static constexpr uint16_t kMinValueBits = 0xFF7FU;
370
365
  static constexpr uint16_t kMaxValueBits = 0x7F7FU;
371
366
  static constexpr uint16_t kRoundToNearest = 0x7FFFU;
372
367
  static constexpr uint16_t kOneBits = 0x3F80U;
@@ -269,6 +269,9 @@ static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed
269
269
  // in case user need to merge/connect multiple EPContext nodes in one model
270
270
  static const char* const kOrtSessionOptionEpContextNodeNamePrefix = "ep.context_node_name_prefix";
271
271
 
272
+ // Share EP related resources across EPs
273
+ static const char* const kOrtSessionOptionShareEpContexts = "ep.share_ep_contexts";
274
+
272
275
  // Gemm fastmath mode provides fp32 gemm acceleration with bfloat16 based matmul.
273
276
  // Option values:
274
277
  // - "0": Gemm FastMath mode is not enabled. [DEFAULT]
@@ -279,3 +282,10 @@ static const char* const kOrtSessionOptionsMlasGemmFastMathArm64Bfloat16 = "mlas
279
282
  // Refer to MatMulNBits op schema for more details.
280
283
  // If not provided, default is 4.
281
284
  static const char* const kOrtSessionOptionsQDQMatMulNBitsAccuracyLevel = "session.qdq_matmulnbits_accuracy_level";
285
+
286
+ // THIS OPTION IS NOT A REGULAR SESSION OPTION SINCE IT CAN BE MODIFIED AT ANY TIME
287
+ // Meant to be used with SetEpDynamicOptions
288
+ // Specify the type of workload for this session.
289
+ // “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default]
290
+ // “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance.
291
+ static const char* const kOrtEpDynamicOptionsWorkloadType = "ep.dynamic.workload_type";
@@ -9,9 +9,9 @@
9
9
  <key>CFBundleIdentifier</key>
10
10
  <string>com.microsoft.onnxruntime</string>
11
11
  <key>CFBundleVersion</key>
12
- <string>1.19.2</string>
12
+ <string>1.20.0</string>
13
13
  <key>CFBundleShortVersionString</key>
14
- <string>1.19.2</string>
14
+ <string>1.20.0</string>
15
15
  <key>CFBundleSignature</key>
16
16
  <string>????</string>
17
17
  <key>CFBundlePackageType</key>
@@ -31,9 +31,14 @@ enum COREMLFlags {
31
31
  // Create an MLProgram. By default it will create a NeuralNetwork model. Requires Core ML 5 or later.
32
32
  COREML_FLAG_CREATE_MLPROGRAM = 0x010,
33
33
 
34
+ // Exclude ANE as sometimes this decrease performance
35
+ // https://developer.apple.com/documentation/coreml/mlcomputeunits?language=objc
36
+ // there are four compute units:
37
+ // MLComputeUnitsCPUAndNeuralEngine|MLComputeUnitsCPUAndGPU|MLComputeUnitsCPUOnly|MLComputeUnitsAll
38
+ COREML_FLAG_USE_CPU_AND_GPU = 0x020,
34
39
  // Keep COREML_FLAG_LAST at the end of the enum definition
35
40
  // And assign the last COREMLFlag to it
36
- COREML_FLAG_LAST = COREML_FLAG_CREATE_MLPROGRAM,
41
+ COREML_FLAG_LAST = COREML_FLAG_USE_CPU_AND_GPU,
37
42
  };
38
43
 
39
44
  #ifdef __cplusplus
@@ -38,7 +38,7 @@
38
38
  *
39
39
  * This value is used by some API functions to behave as this version of the header expects.
40
40
  */
41
- #define ORT_API_VERSION 19
41
+ #define ORT_API_VERSION 20
42
42
 
43
43
  #ifdef __cplusplus
44
44
  extern "C" {
@@ -304,6 +304,7 @@ ORT_RUNTIME_CLASS(Op);
304
304
  ORT_RUNTIME_CLASS(OpAttr);
305
305
  ORT_RUNTIME_CLASS(Logger);
306
306
  ORT_RUNTIME_CLASS(ShapeInferContext);
307
+ ORT_RUNTIME_CLASS(LoraAdapter);
307
308
 
308
309
  #ifdef _WIN32
309
310
  typedef _Return_type_success_(return == 0) OrtStatus* OrtStatusPtr;
@@ -621,6 +622,7 @@ typedef struct OrtMIGraphXProviderOptions {
621
622
  const char* migraphx_save_model_path; // migraphx model path name
622
623
  int migraphx_load_compiled_model; // migraphx int8 cal table. Default 0 = false, noznero = true
623
624
  const char* migraphx_load_model_path; // migraphx model path name
625
+ bool migraphx_exhaustive_tune; // migraphx tuned compile Default = false
624
626
  } OrtMIGraphXProviderOptions;
625
627
 
626
628
  /** \brief OpenVINO Provider Options
@@ -643,7 +645,7 @@ typedef struct OrtOpenVINOProviderOptions {
643
645
  * Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
644
646
  */
645
647
  const char* device_type;
646
- unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled
648
+ unsigned char enable_npu_fast_compile;
647
649
  const char* device_id;
648
650
  size_t num_of_threads; ///< 0 = Use default number of threads
649
651
  const char* cache_dir; // path is set to empty by default
@@ -3649,10 +3651,17 @@ struct OrtApi {
3649
3651
  * - "73"
3650
3652
  * - "75"
3651
3653
  * "device_id": The ID of the device to use when setting 'htp_arch'. Defaults to "0" (for single device).
3652
- "enable_htp_fp16_precision": Only used for float32 model.
3653
- Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision.
3654
- - "0": Default. With fp32 precision.
3655
- - "1": With fp16 precision.
3654
+ * "enable_htp_fp16_precision": Used for float32 model for HTP backend.
3655
+ * Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision.
3656
+ * - "0": With fp32 precision.
3657
+ * - "1": Default. With fp16 precision.
3658
+ * "enable_htp_weight_sharing": Enable QNN weight sharing feature while compiling multiple graphs into one QNN context.
3659
+ * - "0": Default. Disabled.
3660
+ * - "1": Enabled.
3661
+ * "offload_graph_io_quantization": Offload graph input quantization and graph output dequantization to another
3662
+ * execution provider (typically CPU EP).
3663
+ * - "0": Default. Disabled. QNN EP will handle quantization and dequantization of graph I/O.
3664
+ * - "1": Enabled.
3656
3665
  *
3657
3666
  * SNPE supported keys:
3658
3667
  * "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",
@@ -3778,7 +3787,7 @@ struct OrtApi {
3778
3787
 
3779
3788
  /** \brief Release an OrtCANNProviderOptions
3780
3789
  *
3781
- * \param[in] the pointer of OrtCANNProviderOptions which will been deleted
3790
+ * \param[in] input The pointer of OrtCANNProviderOptions which will been deleted
3782
3791
  *
3783
3792
  * \since Version 1.13.
3784
3793
  */
@@ -4666,6 +4675,82 @@ struct OrtApi {
4666
4675
  _In_reads_(num_external_initializer_files) char* const* external_initializer_file_buffer_array,
4667
4676
  _In_reads_(num_external_initializer_files) const size_t* external_initializer_file_lengths,
4668
4677
  size_t num_external_initializer_files);
4678
+
4679
+ /** \brief Create an OrtLoraAdapter
4680
+ *
4681
+ * The function attempts to locate file specified by adapter_file_path, read it and create an OrtLoraAdapter
4682
+ * instance. The adapter_file_path should be a valid path to a file that contains a valid Lora Adapter
4683
+ * format. The function attempts to validate the format at load time. The file will always be memory mapped, unless
4684
+ * the platform does not support memory mapping, in which case the file will be read into memory.
4685
+ *
4686
+ * \param[in] adapter_file_path adapter file path.
4687
+ * \param[in] allocator optional pointer to a device allocator. If specified
4688
+ * data is copied to the device at some point before Run() is invoked. If nullptr, data stays on CPU.
4689
+ * The data would still be copied to device if required by the model at inference time.
4690
+ * \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with
4691
+ * OrtApi::ReleaseLoraAdapter.
4692
+ *
4693
+ * \snippet{doc} snippets.dox OrtStatus Return Value
4694
+ */
4695
+ ORT_API2_STATUS(CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator,
4696
+ _Outptr_ OrtLoraAdapter** out);
4697
+
4698
+ /** \brief Create an OrtLoraAdapter
4699
+ *
4700
+ * The function copies the bytes from the array and creates an OrtLoraAdapter instance.
4701
+ *
4702
+ *
4703
+ * \param[in] bytes pointer to a valid Lora Adapter format buffer.
4704
+ * \param[in] num_bytes length of bytes buffer.
4705
+ * \param[in] allocator optional pointer to a device allocator. If specified
4706
+ * data is copied to the device at some point before Run() is invoked. If nullptr, data stays on CPU.
4707
+ * The data would still be copied to device if required by the model at inference time.
4708
+ * \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with
4709
+ * OrtApi::ReleaseLoraAdapter.
4710
+ *
4711
+ * \snippet{doc} snippets.dox OrtStatus Return Value
4712
+ */
4713
+ ORT_API2_STATUS(CreateLoraAdapterFromArray, _In_ const void* bytes, size_t num_bytes, _In_ OrtAllocator* allocator,
4714
+ _Outptr_ OrtLoraAdapter** out);
4715
+
4716
+ /** \brief Release an ::OrtLoraAdapter obtained from OrtApi::CreateLoraAdapter
4717
+ */
4718
+ ORT_CLASS_RELEASE(LoraAdapter);
4719
+
4720
+ /** \brief Add the Lora Adapter to the list of active adapters.
4721
+ *
4722
+ * The function adds the Lora Adapter to the list of active adapters. The Lora Adapter must be created with
4723
+ * OrtApi::CreateLoraAdapter or FromArray. The Lora Adapter will be used by the session to run the model.
4724
+ * The instance of the OrtRunOptions can then be used to customize the Run() calls.
4725
+ * More than one OrtLoraAdapter can be active at the same time. Lora Parameters that belong to different
4726
+ * Lora adapters that will be active at the same time must not overlap.
4727
+ * This setting does not affect RunWithBinding.
4728
+ *
4729
+ * \param[in] options OrtRunOptions instance
4730
+ * \param[in] adapter OrtLoraAdapter instance
4731
+ *
4732
+ * \snippet{doc} snippets.dox OrtStatus Return Value
4733
+ */
4734
+ ORT_API2_STATUS(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter);
4735
+
4736
+ /// @}
4737
+ /// \name OrtEpDynamicOptions
4738
+ /// @{
4739
+
4740
+ /** \brief Set DynamicOptions for EPs (Execution Providers)
4741
+ *
4742
+ * Valid options can be found in `include\onnxruntime\core\session\onnxruntime_session_options_config_keys.h`
4743
+ * Look for `kOrtEpDynamicOptions`
4744
+ *
4745
+ * \param[in] sess OrtSession
4746
+ * \param[in] keys Array of null terminated UTF8 encoded strings of EP dynamic option keys
4747
+ * \param[in] values Array of null terminated UTF8 encoded string of EP dynamic option values
4748
+ * \param[in] kv_len Number of elements in the keys and values arrays
4749
+ *
4750
+ * \snippet{doc} snippets.dox OrtStatus Return Value
4751
+ */
4752
+ ORT_API2_STATUS(SetEpDynamicOptions, _Inout_ OrtSession* sess, _In_reads_(kv_len) const char* const* keys,
4753
+ _In_reads_(kv_len) const char* const* values, _In_ size_t kv_len);
4669
4754
  };
4670
4755
 
4671
4756
  /*