com.github.asus4.onnxruntime 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Plugins/Android/onnxruntime-android.aar +0 -0
- package/Plugins/Linux/x64/libonnxruntime.so +0 -0
- package/Plugins/Windows/x64/onnxruntime.dll +0 -0
- package/Plugins/iOS~/onnxruntime.xcframework/Info.plist +8 -8
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/coreml_provider_factory.h +6 -1
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_c_api.h +92 -7
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_cxx_api.h +48 -0
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_cxx_inline.h +42 -0
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_float16.h +0 -5
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Headers/onnxruntime_session_options_config_keys.h +10 -0
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/Info.plist +2 -2
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64/onnxruntime.framework/onnxruntime +0 -0
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/coreml_provider_factory.h +6 -1
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_c_api.h +92 -7
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_cxx_api.h +48 -0
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_cxx_inline.h +42 -0
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_float16.h +0 -5
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Headers/onnxruntime_session_options_config_keys.h +10 -0
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/Info.plist +2 -2
- package/Plugins/iOS~/onnxruntime.xcframework/ios-arm64_x86_64-simulator/onnxruntime.framework/onnxruntime +0 -0
- package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/coreml_provider_factory.h +6 -1
- package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/onnxruntime_c_api.h +92 -7
- package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/onnxruntime_cxx_api.h +48 -0
- package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/onnxruntime_cxx_inline.h +42 -0
- package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/onnxruntime_float16.h +0 -5
- package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Headers/onnxruntime_session_options_config_keys.h +10 -0
- package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/Resources/Info.plist +2 -2
- package/Plugins/iOS~/onnxruntime.xcframework/macos-arm64_x86_64/onnxruntime.framework/Versions/A/onnxruntime +0 -0
- package/Plugins/macOS/libonnxruntime.dylib +0 -0
- package/README.md +5 -5
- package/Runtime/AssemblyInfo.shared.cs +1 -2
- package/Runtime/DisposableNamedOnnxValue.shared.cs +4 -4
- package/Runtime/FixedBufferOnnxValue.shared.cs +2 -2
- package/Runtime/InferenceSession.shared.cs +2 -3
- package/Runtime/ManagedProjections.shared.cs +1 -1
- package/Runtime/NamedOnnxValue.shared.cs +6 -17
- package/Runtime/NativeMethods.shared.cs +87 -10
- package/Runtime/NativeOnnxValueHelper.shared.cs +1 -1
- package/Runtime/OrtEnv.shared.cs +1 -1
- package/Runtime/OrtFloat16.shared.cs +42 -39
- package/Runtime/OrtIoBinding.shared.cs +1 -1
- package/Runtime/OrtLoraAdapter.shared.cs +81 -0
- package/Runtime/OrtLoraAdapter.shared.cs.meta +11 -0
- package/Runtime/OrtValue.shared.cs +3 -3
- package/Runtime/OrtValueTensor.shared.cs +1 -1
- package/Runtime/ProviderOptions.shared.cs +8 -5
- package/Runtime/RunOptions.shared.cs +12 -0
- package/Runtime/SessionOptions.shared.cs +5 -5
- package/Runtime/Tensors/ArrayTensorExtensions.shared.cs +1 -1
- package/Runtime/Training/NativeTrainingMethods.shared.cs +1 -1
- package/Runtime/Training/TrainingSession.shared.cs +2 -2
- package/package.json +1 -1
|
@@ -508,6 +508,7 @@ ORT_DEFINE_RELEASE(CustomOpDomain);
|
|
|
508
508
|
ORT_DEFINE_RELEASE(ThreadingOptions);
|
|
509
509
|
ORT_DEFINE_RELEASE(Env);
|
|
510
510
|
ORT_DEFINE_RELEASE(RunOptions);
|
|
511
|
+
ORT_DEFINE_RELEASE(LoraAdapter);
|
|
511
512
|
ORT_DEFINE_RELEASE(Session);
|
|
512
513
|
ORT_DEFINE_RELEASE(SessionOptions);
|
|
513
514
|
ORT_DEFINE_RELEASE(TensorTypeAndShapeInfo);
|
|
@@ -736,6 +737,32 @@ struct CustomOpDomain : detail::Base<OrtCustomOpDomain> {
|
|
|
736
737
|
void Add(const OrtCustomOp* op); ///< Wraps CustomOpDomain_Add
|
|
737
738
|
};
|
|
738
739
|
|
|
740
|
+
/// \brief LoraAdapter holds a set of Lora Parameters loaded from a single file
|
|
741
|
+
struct LoraAdapter : detail::Base<OrtLoraAdapter> {
|
|
742
|
+
using Base = detail::Base<OrtLoraAdapter>;
|
|
743
|
+
using Base::Base;
|
|
744
|
+
|
|
745
|
+
explicit LoraAdapter(std::nullptr_t) {} ///< Create an empty LoraAdapter object, must be assigned a valid one to be used
|
|
746
|
+
/// \brief Wraps OrtApi::CreateLoraAdapter
|
|
747
|
+
///
|
|
748
|
+
/// The function attempts to load the adapter from the specified file
|
|
749
|
+
/// \param adapter_path The path to the Lora adapter
|
|
750
|
+
/// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still
|
|
751
|
+
/// be copied to device if required by the model at inference time.
|
|
752
|
+
static LoraAdapter CreateLoraAdapter(const std::basic_string<ORTCHAR_T>& adapter_path,
|
|
753
|
+
OrtAllocator* allocator);
|
|
754
|
+
|
|
755
|
+
/// \brief Wraps OrtApi::CreateLoraAdapterFromArray
|
|
756
|
+
///
|
|
757
|
+
/// The function attempts to load the adapter from the specified byte array.
|
|
758
|
+
/// \param bytes The byte array containing file LoraAdapter format
|
|
759
|
+
/// \param num_bytes The number of bytes in the byte array
|
|
760
|
+
/// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still
|
|
761
|
+
/// be copied to device if required by the model at inference time.
|
|
762
|
+
static LoraAdapter CreateLoraAdapterFromArray(const void* bytes, size_t num_bytes,
|
|
763
|
+
OrtAllocator* allocator);
|
|
764
|
+
};
|
|
765
|
+
|
|
739
766
|
/** \brief RunOptions
|
|
740
767
|
*
|
|
741
768
|
*/
|
|
@@ -766,6 +793,14 @@ struct RunOptions : detail::Base<OrtRunOptions> {
|
|
|
766
793
|
* Wraps OrtApi::RunOptionsUnsetTerminate
|
|
767
794
|
*/
|
|
768
795
|
RunOptions& UnsetTerminate();
|
|
796
|
+
|
|
797
|
+
/** \brief Add the LoraAdapter to the list of active adapters.
|
|
798
|
+
* The setting does not affect RunWithBinding() calls.
|
|
799
|
+
*
|
|
800
|
+
* Wraps OrtApi::RunOptionsAddActiveLoraAdapter
|
|
801
|
+
* \param adapter The LoraAdapter to be used as the active adapter
|
|
802
|
+
*/
|
|
803
|
+
RunOptions& AddActiveLoraAdapter(const LoraAdapter& adapter);
|
|
769
804
|
};
|
|
770
805
|
|
|
771
806
|
namespace detail {
|
|
@@ -1105,6 +1140,19 @@ struct SessionImpl : ConstSessionImpl<T> {
|
|
|
1105
1140
|
* The OrtAllocator instances must be valid at the point of memory release.
|
|
1106
1141
|
*/
|
|
1107
1142
|
AllocatedStringPtr EndProfilingAllocated(OrtAllocator* allocator); ///< Wraps OrtApi::SessionEndProfiling
|
|
1143
|
+
|
|
1144
|
+
/** \brief Set DynamicOptions for EPs (Execution Providers)
|
|
1145
|
+
*
|
|
1146
|
+
* Wraps OrtApi::SetEpDynamicOptions
|
|
1147
|
+
*
|
|
1148
|
+
* Valid options can be found in `include\onnxruntime\core\session\onnxruntime_session_options_config_keys.h`
|
|
1149
|
+
* Look for `kOrtEpDynamicOptions`
|
|
1150
|
+
*
|
|
1151
|
+
* \param[in] keys Array of null terminated UTF8 encoded strings of EP dynamic option keys
|
|
1152
|
+
* \param[in] values Array of null terminated UTF8 encoded string of EP dynamic option values
|
|
1153
|
+
* \param[in] kv_len Number of elements in the keys and values arrays
|
|
1154
|
+
*/
|
|
1155
|
+
void SetEpDynamicOptions(const char* const* keys, const char* const* values, size_t kv_len);
|
|
1108
1156
|
};
|
|
1109
1157
|
|
|
1110
1158
|
} // namespace detail
|
|
@@ -557,6 +557,20 @@ inline void CustomOpDomain::Add(const OrtCustomOp* op) {
|
|
|
557
557
|
ThrowOnError(GetApi().CustomOpDomain_Add(p_, op));
|
|
558
558
|
}
|
|
559
559
|
|
|
560
|
+
inline LoraAdapter LoraAdapter::CreateLoraAdapter(const std::basic_string<ORTCHAR_T>& adapter_path,
|
|
561
|
+
OrtAllocator* allocator) {
|
|
562
|
+
OrtLoraAdapter* p;
|
|
563
|
+
ThrowOnError(GetApi().CreateLoraAdapter(adapter_path.c_str(), allocator, &p));
|
|
564
|
+
return LoraAdapter{p};
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
inline LoraAdapter LoraAdapter::CreateLoraAdapterFromArray(const void* bytes, size_t num_bytes,
|
|
568
|
+
OrtAllocator* allocator) {
|
|
569
|
+
OrtLoraAdapter* p;
|
|
570
|
+
ThrowOnError(GetApi().CreateLoraAdapterFromArray(bytes, num_bytes, allocator, &p));
|
|
571
|
+
return LoraAdapter{p};
|
|
572
|
+
}
|
|
573
|
+
|
|
560
574
|
inline RunOptions::RunOptions() {
|
|
561
575
|
ThrowOnError(GetApi().CreateRunOptions(&p_));
|
|
562
576
|
}
|
|
@@ -609,6 +623,11 @@ inline RunOptions& RunOptions::UnsetTerminate() {
|
|
|
609
623
|
return *this;
|
|
610
624
|
}
|
|
611
625
|
|
|
626
|
+
inline RunOptions& RunOptions::AddActiveLoraAdapter(const LoraAdapter& adapter) {
|
|
627
|
+
ThrowOnError(GetApi().RunOptionsAddActiveLoraAdapter(p_, adapter));
|
|
628
|
+
return *this;
|
|
629
|
+
}
|
|
630
|
+
|
|
612
631
|
namespace detail {
|
|
613
632
|
|
|
614
633
|
template <typename T>
|
|
@@ -1074,6 +1093,11 @@ inline AllocatedStringPtr SessionImpl<T>::EndProfilingAllocated(OrtAllocator* al
|
|
|
1074
1093
|
return AllocatedStringPtr(out, detail::AllocatedFree(allocator));
|
|
1075
1094
|
}
|
|
1076
1095
|
|
|
1096
|
+
template <typename T>
|
|
1097
|
+
inline void SessionImpl<T>::SetEpDynamicOptions(const char* const* keys, const char* const* values, size_t kv_len) {
|
|
1098
|
+
ThrowOnError(GetApi().SetEpDynamicOptions(this->p_, keys, values, kv_len));
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1077
1101
|
} // namespace detail
|
|
1078
1102
|
|
|
1079
1103
|
inline SessionOptions::SessionOptions() {
|
|
@@ -2044,6 +2068,9 @@ inline ShapeInferContext::Ints ShapeInferContext::GetAttrInts(const char* attr_n
|
|
|
2044
2068
|
int64_t i = {};
|
|
2045
2069
|
size_t out = {};
|
|
2046
2070
|
// first call to get the bytes needed
|
|
2071
|
+
// 1. A status == nullptr means that ReadOpAttr was successful. A status != nullptr means failure.
|
|
2072
|
+
// 2. The ReadOpAttr function should normally be called twice: once to get the needed buffer size (returns a status != nullptr), and a second time to actually read the ints (returns status == null on success).
|
|
2073
|
+
// 3. This code tries a subtle optimization in the first call to ReadOpAttr. It passes in a buffer (&i) of size 1 just in case there is only 1 int. In this case, status == nullptr and we need to return {i}.
|
|
2047
2074
|
auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_INTS, &i, sizeof(i), &out);
|
|
2048
2075
|
if (status) {
|
|
2049
2076
|
size_t num_i = out / sizeof(int64_t);
|
|
@@ -2051,6 +2078,9 @@ inline ShapeInferContext::Ints ShapeInferContext::GetAttrInts(const char* attr_n
|
|
|
2051
2078
|
Ort::ThrowOnError(ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_INTS, ints.data(), out, &out));
|
|
2052
2079
|
return ints;
|
|
2053
2080
|
} else {
|
|
2081
|
+
if (out == 0u) {
|
|
2082
|
+
return {};
|
|
2083
|
+
}
|
|
2054
2084
|
return {i};
|
|
2055
2085
|
}
|
|
2056
2086
|
}
|
|
@@ -2068,6 +2098,9 @@ inline ShapeInferContext::Floats ShapeInferContext::GetAttrFloats(const char* at
|
|
|
2068
2098
|
float f = {};
|
|
2069
2099
|
size_t out = {};
|
|
2070
2100
|
// first call to get the bytes needed
|
|
2101
|
+
// 1. A status == nullptr means that ReadOpAttr was successful. A status != nullptr means failure.
|
|
2102
|
+
// 2. The ReadOpAttr function should normally be called twice: once to get the needed buffer size (returns a status != nullptr), and a second time to actually read the ints (returns status == null on success).
|
|
2103
|
+
// 3. This code tries a subtle optimization in the first call to ReadOpAttr. It passes in a buffer (&i) of size 1 just in case there is only 1 int. In this case, status == nullptr and we need to return {i}.
|
|
2071
2104
|
auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_FLOATS, &f, sizeof(f), &out);
|
|
2072
2105
|
if (status) {
|
|
2073
2106
|
size_t num_f = out / sizeof(float);
|
|
@@ -2075,6 +2108,9 @@ inline ShapeInferContext::Floats ShapeInferContext::GetAttrFloats(const char* at
|
|
|
2075
2108
|
Ort::ThrowOnError(ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_FLOATS, floats.data(), out, &out));
|
|
2076
2109
|
return floats;
|
|
2077
2110
|
} else {
|
|
2111
|
+
if (out == 0u) {
|
|
2112
|
+
return {};
|
|
2113
|
+
}
|
|
2078
2114
|
return {f};
|
|
2079
2115
|
}
|
|
2080
2116
|
}
|
|
@@ -2099,6 +2135,9 @@ inline ShapeInferContext::Strings ShapeInferContext::GetAttrStrings(const char*
|
|
|
2099
2135
|
char c = {};
|
|
2100
2136
|
size_t out = {};
|
|
2101
2137
|
// first call to get the bytes needed
|
|
2138
|
+
// 1. A status == nullptr means that ReadOpAttr was successful. A status != nullptr means failure.
|
|
2139
|
+
// 2. The ReadOpAttr function should normally be called twice: once to get the needed buffer size (returns a status != nullptr), and a second time to actually read the ints (returns status == null on success).
|
|
2140
|
+
// 3. This code tries a subtle optimization in the first call to ReadOpAttr. It passes in a buffer (&i) of size 1 just in case there is only 1 int. In this case, status == nullptr and we need to return {i}.
|
|
2102
2141
|
auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_STRINGS, &c, sizeof(char), &out);
|
|
2103
2142
|
if (status) {
|
|
2104
2143
|
std::vector<char> chars(out, '\0');
|
|
@@ -2115,6 +2154,9 @@ inline ShapeInferContext::Strings ShapeInferContext::GetAttrStrings(const char*
|
|
|
2115
2154
|
}
|
|
2116
2155
|
return strings;
|
|
2117
2156
|
} else {
|
|
2157
|
+
if (out == 0u) {
|
|
2158
|
+
return {};
|
|
2159
|
+
}
|
|
2118
2160
|
return {std::string{c}};
|
|
2119
2161
|
}
|
|
2120
2162
|
}
|
|
@@ -75,8 +75,6 @@ struct Float16Impl {
|
|
|
75
75
|
static constexpr uint16_t kNegativeInfinityBits = 0xFC00U;
|
|
76
76
|
static constexpr uint16_t kPositiveQNaNBits = 0x7E00U;
|
|
77
77
|
static constexpr uint16_t kNegativeQNaNBits = 0xFE00U;
|
|
78
|
-
static constexpr uint16_t kEpsilonBits = 0x4170U;
|
|
79
|
-
static constexpr uint16_t kMinValueBits = 0xFBFFU; // Minimum normal number
|
|
80
78
|
static constexpr uint16_t kMaxValueBits = 0x7BFFU; // Largest normal number
|
|
81
79
|
static constexpr uint16_t kOneBits = 0x3C00U;
|
|
82
80
|
static constexpr uint16_t kMinusOneBits = 0xBC00U;
|
|
@@ -364,9 +362,6 @@ struct BFloat16Impl {
|
|
|
364
362
|
static constexpr uint16_t kNegativeInfinityBits = 0xFF80U;
|
|
365
363
|
static constexpr uint16_t kPositiveQNaNBits = 0x7FC1U;
|
|
366
364
|
static constexpr uint16_t kNegativeQNaNBits = 0xFFC1U;
|
|
367
|
-
static constexpr uint16_t kSignaling_NaNBits = 0x7F80U;
|
|
368
|
-
static constexpr uint16_t kEpsilonBits = 0x0080U;
|
|
369
|
-
static constexpr uint16_t kMinValueBits = 0xFF7FU;
|
|
370
365
|
static constexpr uint16_t kMaxValueBits = 0x7F7FU;
|
|
371
366
|
static constexpr uint16_t kRoundToNearest = 0x7FFFU;
|
|
372
367
|
static constexpr uint16_t kOneBits = 0x3F80U;
|
|
@@ -269,6 +269,9 @@ static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed
|
|
|
269
269
|
// in case user need to merge/connect multiple EPContext nodes in one model
|
|
270
270
|
static const char* const kOrtSessionOptionEpContextNodeNamePrefix = "ep.context_node_name_prefix";
|
|
271
271
|
|
|
272
|
+
// Share EP related resources across EPs
|
|
273
|
+
static const char* const kOrtSessionOptionShareEpContexts = "ep.share_ep_contexts";
|
|
274
|
+
|
|
272
275
|
// Gemm fastmath mode provides fp32 gemm acceleration with bfloat16 based matmul.
|
|
273
276
|
// Option values:
|
|
274
277
|
// - "0": Gemm FastMath mode is not enabled. [DEFAULT]
|
|
@@ -279,3 +282,10 @@ static const char* const kOrtSessionOptionsMlasGemmFastMathArm64Bfloat16 = "mlas
|
|
|
279
282
|
// Refer to MatMulNBits op schema for more details.
|
|
280
283
|
// If not provided, default is 4.
|
|
281
284
|
static const char* const kOrtSessionOptionsQDQMatMulNBitsAccuracyLevel = "session.qdq_matmulnbits_accuracy_level";
|
|
285
|
+
|
|
286
|
+
// THIS OPTION IS NOT A REGULAR SESSION OPTION SINCE IT CAN BE MODIFIED AT ANY TIME
|
|
287
|
+
// Meant to be used with SetEpDynamicOptions
|
|
288
|
+
// Specify the type of workload for this session.
|
|
289
|
+
// “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default]
|
|
290
|
+
// “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance.
|
|
291
|
+
static const char* const kOrtEpDynamicOptionsWorkloadType = "ep.dynamic.workload_type";
|
|
@@ -9,9 +9,9 @@
|
|
|
9
9
|
<key>CFBundleIdentifier</key>
|
|
10
10
|
<string>com.microsoft.onnxruntime</string>
|
|
11
11
|
<key>CFBundleVersion</key>
|
|
12
|
-
<string>1.
|
|
12
|
+
<string>1.20.0</string>
|
|
13
13
|
<key>CFBundleShortVersionString</key>
|
|
14
|
-
<string>1.
|
|
14
|
+
<string>1.20.0</string>
|
|
15
15
|
<key>CFBundleSignature</key>
|
|
16
16
|
<string>????</string>
|
|
17
17
|
<key>CFBundlePackageType</key>
|
|
Binary file
|
|
@@ -31,9 +31,14 @@ enum COREMLFlags {
|
|
|
31
31
|
// Create an MLProgram. By default it will create a NeuralNetwork model. Requires Core ML 5 or later.
|
|
32
32
|
COREML_FLAG_CREATE_MLPROGRAM = 0x010,
|
|
33
33
|
|
|
34
|
+
// Exclude ANE as sometimes this decrease performance
|
|
35
|
+
// https://developer.apple.com/documentation/coreml/mlcomputeunits?language=objc
|
|
36
|
+
// there are four compute units:
|
|
37
|
+
// MLComputeUnitsCPUAndNeuralEngine|MLComputeUnitsCPUAndGPU|MLComputeUnitsCPUOnly|MLComputeUnitsAll
|
|
38
|
+
COREML_FLAG_USE_CPU_AND_GPU = 0x020,
|
|
34
39
|
// Keep COREML_FLAG_LAST at the end of the enum definition
|
|
35
40
|
// And assign the last COREMLFlag to it
|
|
36
|
-
COREML_FLAG_LAST =
|
|
41
|
+
COREML_FLAG_LAST = COREML_FLAG_USE_CPU_AND_GPU,
|
|
37
42
|
};
|
|
38
43
|
|
|
39
44
|
#ifdef __cplusplus
|
|
@@ -38,7 +38,7 @@
|
|
|
38
38
|
*
|
|
39
39
|
* This value is used by some API functions to behave as this version of the header expects.
|
|
40
40
|
*/
|
|
41
|
-
#define ORT_API_VERSION
|
|
41
|
+
#define ORT_API_VERSION 20
|
|
42
42
|
|
|
43
43
|
#ifdef __cplusplus
|
|
44
44
|
extern "C" {
|
|
@@ -304,6 +304,7 @@ ORT_RUNTIME_CLASS(Op);
|
|
|
304
304
|
ORT_RUNTIME_CLASS(OpAttr);
|
|
305
305
|
ORT_RUNTIME_CLASS(Logger);
|
|
306
306
|
ORT_RUNTIME_CLASS(ShapeInferContext);
|
|
307
|
+
ORT_RUNTIME_CLASS(LoraAdapter);
|
|
307
308
|
|
|
308
309
|
#ifdef _WIN32
|
|
309
310
|
typedef _Return_type_success_(return == 0) OrtStatus* OrtStatusPtr;
|
|
@@ -621,6 +622,7 @@ typedef struct OrtMIGraphXProviderOptions {
|
|
|
621
622
|
const char* migraphx_save_model_path; // migraphx model path name
|
|
622
623
|
int migraphx_load_compiled_model; // migraphx int8 cal table. Default 0 = false, noznero = true
|
|
623
624
|
const char* migraphx_load_model_path; // migraphx model path name
|
|
625
|
+
bool migraphx_exhaustive_tune; // migraphx tuned compile Default = false
|
|
624
626
|
} OrtMIGraphXProviderOptions;
|
|
625
627
|
|
|
626
628
|
/** \brief OpenVINO Provider Options
|
|
@@ -643,7 +645,7 @@ typedef struct OrtOpenVINOProviderOptions {
|
|
|
643
645
|
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
|
|
644
646
|
*/
|
|
645
647
|
const char* device_type;
|
|
646
|
-
unsigned char enable_npu_fast_compile;
|
|
648
|
+
unsigned char enable_npu_fast_compile;
|
|
647
649
|
const char* device_id;
|
|
648
650
|
size_t num_of_threads; ///< 0 = Use default number of threads
|
|
649
651
|
const char* cache_dir; // path is set to empty by default
|
|
@@ -3649,10 +3651,17 @@ struct OrtApi {
|
|
|
3649
3651
|
* - "73"
|
|
3650
3652
|
* - "75"
|
|
3651
3653
|
* "device_id": The ID of the device to use when setting 'htp_arch'. Defaults to "0" (for single device).
|
|
3652
|
-
|
|
3653
|
-
|
|
3654
|
-
|
|
3655
|
-
|
|
3654
|
+
* "enable_htp_fp16_precision": Used for float32 model for HTP backend.
|
|
3655
|
+
* Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision.
|
|
3656
|
+
* - "0": With fp32 precision.
|
|
3657
|
+
* - "1": Default. With fp16 precision.
|
|
3658
|
+
* "enable_htp_weight_sharing": Enable QNN weight sharing feature while compiling multiple graphs into one QNN context.
|
|
3659
|
+
* - "0": Default. Disabled.
|
|
3660
|
+
* - "1": Enabled.
|
|
3661
|
+
* "offload_graph_io_quantization": Offload graph input quantization and graph output dequantization to another
|
|
3662
|
+
* execution provider (typically CPU EP).
|
|
3663
|
+
* - "0": Default. Disabled. QNN EP will handle quantization and dequantization of graph I/O.
|
|
3664
|
+
* - "1": Enabled.
|
|
3656
3665
|
*
|
|
3657
3666
|
* SNPE supported keys:
|
|
3658
3667
|
* "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",
|
|
@@ -3778,7 +3787,7 @@ struct OrtApi {
|
|
|
3778
3787
|
|
|
3779
3788
|
/** \brief Release an OrtCANNProviderOptions
|
|
3780
3789
|
*
|
|
3781
|
-
* \param[in]
|
|
3790
|
+
* \param[in] input The pointer of OrtCANNProviderOptions which will been deleted
|
|
3782
3791
|
*
|
|
3783
3792
|
* \since Version 1.13.
|
|
3784
3793
|
*/
|
|
@@ -4666,6 +4675,82 @@ struct OrtApi {
|
|
|
4666
4675
|
_In_reads_(num_external_initializer_files) char* const* external_initializer_file_buffer_array,
|
|
4667
4676
|
_In_reads_(num_external_initializer_files) const size_t* external_initializer_file_lengths,
|
|
4668
4677
|
size_t num_external_initializer_files);
|
|
4678
|
+
|
|
4679
|
+
/** \brief Create an OrtLoraAdapter
|
|
4680
|
+
*
|
|
4681
|
+
* The function attempts to locate file specified by adapter_file_path, read it and create an OrtLoraAdapter
|
|
4682
|
+
* instance. The adapter_file_path should be a valid path to a file that contains a valid Lora Adapter
|
|
4683
|
+
* format. The function attempts to validate the format at load time. The file will always be memory mapped, unless
|
|
4684
|
+
* the platform does not support memory mapping, in which case the file will be read into memory.
|
|
4685
|
+
*
|
|
4686
|
+
* \param[in] adapter_file_path adapter file path.
|
|
4687
|
+
* \param[in] allocator optional pointer to a device allocator. If specified
|
|
4688
|
+
* data is copied to the device at some point before Run() is invoked. If nullptr, data stays on CPU.
|
|
4689
|
+
* The data would still be copied to device if required by the model at inference time.
|
|
4690
|
+
* \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with
|
|
4691
|
+
* OrtApi::ReleaseLoraAdapter.
|
|
4692
|
+
*
|
|
4693
|
+
* \snippet{doc} snippets.dox OrtStatus Return Value
|
|
4694
|
+
*/
|
|
4695
|
+
ORT_API2_STATUS(CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator,
|
|
4696
|
+
_Outptr_ OrtLoraAdapter** out);
|
|
4697
|
+
|
|
4698
|
+
/** \brief Create an OrtLoraAdapter
|
|
4699
|
+
*
|
|
4700
|
+
* The function copies the bytes from the array and creates an OrtLoraAdapter instance.
|
|
4701
|
+
*
|
|
4702
|
+
*
|
|
4703
|
+
* \param[in] bytes pointer to a valid Lora Adapter format buffer.
|
|
4704
|
+
* \param[in] num_bytes length of bytes buffer.
|
|
4705
|
+
* \param[in] allocator optional pointer to a device allocator. If specified
|
|
4706
|
+
* data is copied to the device at some point before Run() is invoked. If nullptr, data stays on CPU.
|
|
4707
|
+
* The data would still be copied to device if required by the model at inference time.
|
|
4708
|
+
* \param[out] out A pointer to a newly created OrtLoraAdapter instance. Must be released with
|
|
4709
|
+
* OrtApi::ReleaseLoraAdapter.
|
|
4710
|
+
*
|
|
4711
|
+
* \snippet{doc} snippets.dox OrtStatus Return Value
|
|
4712
|
+
*/
|
|
4713
|
+
ORT_API2_STATUS(CreateLoraAdapterFromArray, _In_ const void* bytes, size_t num_bytes, _In_ OrtAllocator* allocator,
|
|
4714
|
+
_Outptr_ OrtLoraAdapter** out);
|
|
4715
|
+
|
|
4716
|
+
/** \brief Release an ::OrtLoraAdapter obtained from OrtApi::CreateLoraAdapter
|
|
4717
|
+
*/
|
|
4718
|
+
ORT_CLASS_RELEASE(LoraAdapter);
|
|
4719
|
+
|
|
4720
|
+
/** \brief Add the Lora Adapter to the list of active adapters.
|
|
4721
|
+
*
|
|
4722
|
+
* The function adds the Lora Adapter to the list of active adapters. The Lora Adapter must be created with
|
|
4723
|
+
* OrtApi::CreateLoraAdapter or FromArray. The Lora Adapter will be used by the session to run the model.
|
|
4724
|
+
* The instance of the OrtRunOptions can then be used to customize the Run() calls.
|
|
4725
|
+
* More than one OrtLoraAdapter can be active at the same time. Lora Parameters that belong to different
|
|
4726
|
+
* Lora adapters that will be active at the same time must not overlap.
|
|
4727
|
+
* This setting does not affect RunWithBinding.
|
|
4728
|
+
*
|
|
4729
|
+
* \param[in] options OrtRunOptions instance
|
|
4730
|
+
* \param[in] adapter OrtLoraAdapter instance
|
|
4731
|
+
*
|
|
4732
|
+
* \snippet{doc} snippets.dox OrtStatus Return Value
|
|
4733
|
+
*/
|
|
4734
|
+
ORT_API2_STATUS(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter);
|
|
4735
|
+
|
|
4736
|
+
/// @}
|
|
4737
|
+
/// \name OrtEpDynamicOptions
|
|
4738
|
+
/// @{
|
|
4739
|
+
|
|
4740
|
+
/** \brief Set DynamicOptions for EPs (Execution Providers)
|
|
4741
|
+
*
|
|
4742
|
+
* Valid options can be found in `include\onnxruntime\core\session\onnxruntime_session_options_config_keys.h`
|
|
4743
|
+
* Look for `kOrtEpDynamicOptions`
|
|
4744
|
+
*
|
|
4745
|
+
* \param[in] sess OrtSession
|
|
4746
|
+
* \param[in] keys Array of null terminated UTF8 encoded strings of EP dynamic option keys
|
|
4747
|
+
* \param[in] values Array of null terminated UTF8 encoded string of EP dynamic option values
|
|
4748
|
+
* \param[in] kv_len Number of elements in the keys and values arrays
|
|
4749
|
+
*
|
|
4750
|
+
* \snippet{doc} snippets.dox OrtStatus Return Value
|
|
4751
|
+
*/
|
|
4752
|
+
ORT_API2_STATUS(SetEpDynamicOptions, _Inout_ OrtSession* sess, _In_reads_(kv_len) const char* const* keys,
|
|
4753
|
+
_In_reads_(kv_len) const char* const* values, _In_ size_t kv_len);
|
|
4669
4754
|
};
|
|
4670
4755
|
|
|
4671
4756
|
/*
|
|
@@ -508,6 +508,7 @@ ORT_DEFINE_RELEASE(CustomOpDomain);
|
|
|
508
508
|
ORT_DEFINE_RELEASE(ThreadingOptions);
|
|
509
509
|
ORT_DEFINE_RELEASE(Env);
|
|
510
510
|
ORT_DEFINE_RELEASE(RunOptions);
|
|
511
|
+
ORT_DEFINE_RELEASE(LoraAdapter);
|
|
511
512
|
ORT_DEFINE_RELEASE(Session);
|
|
512
513
|
ORT_DEFINE_RELEASE(SessionOptions);
|
|
513
514
|
ORT_DEFINE_RELEASE(TensorTypeAndShapeInfo);
|
|
@@ -736,6 +737,32 @@ struct CustomOpDomain : detail::Base<OrtCustomOpDomain> {
|
|
|
736
737
|
void Add(const OrtCustomOp* op); ///< Wraps CustomOpDomain_Add
|
|
737
738
|
};
|
|
738
739
|
|
|
740
|
+
/// \brief LoraAdapter holds a set of Lora Parameters loaded from a single file
|
|
741
|
+
struct LoraAdapter : detail::Base<OrtLoraAdapter> {
|
|
742
|
+
using Base = detail::Base<OrtLoraAdapter>;
|
|
743
|
+
using Base::Base;
|
|
744
|
+
|
|
745
|
+
explicit LoraAdapter(std::nullptr_t) {} ///< Create an empty LoraAdapter object, must be assigned a valid one to be used
|
|
746
|
+
/// \brief Wraps OrtApi::CreateLoraAdapter
|
|
747
|
+
///
|
|
748
|
+
/// The function attempts to load the adapter from the specified file
|
|
749
|
+
/// \param adapter_path The path to the Lora adapter
|
|
750
|
+
/// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still
|
|
751
|
+
/// be copied to device if required by the model at inference time.
|
|
752
|
+
static LoraAdapter CreateLoraAdapter(const std::basic_string<ORTCHAR_T>& adapter_path,
|
|
753
|
+
OrtAllocator* allocator);
|
|
754
|
+
|
|
755
|
+
/// \brief Wraps OrtApi::CreateLoraAdapterFromArray
|
|
756
|
+
///
|
|
757
|
+
/// The function attempts to load the adapter from the specified byte array.
|
|
758
|
+
/// \param bytes The byte array containing file LoraAdapter format
|
|
759
|
+
/// \param num_bytes The number of bytes in the byte array
|
|
760
|
+
/// \param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still
|
|
761
|
+
/// be copied to device if required by the model at inference time.
|
|
762
|
+
static LoraAdapter CreateLoraAdapterFromArray(const void* bytes, size_t num_bytes,
|
|
763
|
+
OrtAllocator* allocator);
|
|
764
|
+
};
|
|
765
|
+
|
|
739
766
|
/** \brief RunOptions
|
|
740
767
|
*
|
|
741
768
|
*/
|
|
@@ -766,6 +793,14 @@ struct RunOptions : detail::Base<OrtRunOptions> {
|
|
|
766
793
|
* Wraps OrtApi::RunOptionsUnsetTerminate
|
|
767
794
|
*/
|
|
768
795
|
RunOptions& UnsetTerminate();
|
|
796
|
+
|
|
797
|
+
/** \brief Add the LoraAdapter to the list of active adapters.
|
|
798
|
+
* The setting does not affect RunWithBinding() calls.
|
|
799
|
+
*
|
|
800
|
+
* Wraps OrtApi::RunOptionsAddActiveLoraAdapter
|
|
801
|
+
* \param adapter The LoraAdapter to be used as the active adapter
|
|
802
|
+
*/
|
|
803
|
+
RunOptions& AddActiveLoraAdapter(const LoraAdapter& adapter);
|
|
769
804
|
};
|
|
770
805
|
|
|
771
806
|
namespace detail {
|
|
@@ -1105,6 +1140,19 @@ struct SessionImpl : ConstSessionImpl<T> {
|
|
|
1105
1140
|
* The OrtAllocator instances must be valid at the point of memory release.
|
|
1106
1141
|
*/
|
|
1107
1142
|
AllocatedStringPtr EndProfilingAllocated(OrtAllocator* allocator); ///< Wraps OrtApi::SessionEndProfiling
|
|
1143
|
+
|
|
1144
|
+
/** \brief Set DynamicOptions for EPs (Execution Providers)
|
|
1145
|
+
*
|
|
1146
|
+
* Wraps OrtApi::SetEpDynamicOptions
|
|
1147
|
+
*
|
|
1148
|
+
* Valid options can be found in `include\onnxruntime\core\session\onnxruntime_session_options_config_keys.h`
|
|
1149
|
+
* Look for `kOrtEpDynamicOptions`
|
|
1150
|
+
*
|
|
1151
|
+
* \param[in] keys Array of null terminated UTF8 encoded strings of EP dynamic option keys
|
|
1152
|
+
* \param[in] values Array of null terminated UTF8 encoded string of EP dynamic option values
|
|
1153
|
+
* \param[in] kv_len Number of elements in the keys and values arrays
|
|
1154
|
+
*/
|
|
1155
|
+
void SetEpDynamicOptions(const char* const* keys, const char* const* values, size_t kv_len);
|
|
1108
1156
|
};
|
|
1109
1157
|
|
|
1110
1158
|
} // namespace detail
|
|
@@ -557,6 +557,20 @@ inline void CustomOpDomain::Add(const OrtCustomOp* op) {
|
|
|
557
557
|
ThrowOnError(GetApi().CustomOpDomain_Add(p_, op));
|
|
558
558
|
}
|
|
559
559
|
|
|
560
|
+
inline LoraAdapter LoraAdapter::CreateLoraAdapter(const std::basic_string<ORTCHAR_T>& adapter_path,
|
|
561
|
+
OrtAllocator* allocator) {
|
|
562
|
+
OrtLoraAdapter* p;
|
|
563
|
+
ThrowOnError(GetApi().CreateLoraAdapter(adapter_path.c_str(), allocator, &p));
|
|
564
|
+
return LoraAdapter{p};
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
inline LoraAdapter LoraAdapter::CreateLoraAdapterFromArray(const void* bytes, size_t num_bytes,
|
|
568
|
+
OrtAllocator* allocator) {
|
|
569
|
+
OrtLoraAdapter* p;
|
|
570
|
+
ThrowOnError(GetApi().CreateLoraAdapterFromArray(bytes, num_bytes, allocator, &p));
|
|
571
|
+
return LoraAdapter{p};
|
|
572
|
+
}
|
|
573
|
+
|
|
560
574
|
inline RunOptions::RunOptions() {
|
|
561
575
|
ThrowOnError(GetApi().CreateRunOptions(&p_));
|
|
562
576
|
}
|
|
@@ -609,6 +623,11 @@ inline RunOptions& RunOptions::UnsetTerminate() {
|
|
|
609
623
|
return *this;
|
|
610
624
|
}
|
|
611
625
|
|
|
626
|
+
inline RunOptions& RunOptions::AddActiveLoraAdapter(const LoraAdapter& adapter) {
|
|
627
|
+
ThrowOnError(GetApi().RunOptionsAddActiveLoraAdapter(p_, adapter));
|
|
628
|
+
return *this;
|
|
629
|
+
}
|
|
630
|
+
|
|
612
631
|
namespace detail {
|
|
613
632
|
|
|
614
633
|
template <typename T>
|
|
@@ -1074,6 +1093,11 @@ inline AllocatedStringPtr SessionImpl<T>::EndProfilingAllocated(OrtAllocator* al
|
|
|
1074
1093
|
return AllocatedStringPtr(out, detail::AllocatedFree(allocator));
|
|
1075
1094
|
}
|
|
1076
1095
|
|
|
1096
|
+
template <typename T>
|
|
1097
|
+
inline void SessionImpl<T>::SetEpDynamicOptions(const char* const* keys, const char* const* values, size_t kv_len) {
|
|
1098
|
+
ThrowOnError(GetApi().SetEpDynamicOptions(this->p_, keys, values, kv_len));
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1077
1101
|
} // namespace detail
|
|
1078
1102
|
|
|
1079
1103
|
inline SessionOptions::SessionOptions() {
|
|
@@ -2044,6 +2068,9 @@ inline ShapeInferContext::Ints ShapeInferContext::GetAttrInts(const char* attr_n
|
|
|
2044
2068
|
int64_t i = {};
|
|
2045
2069
|
size_t out = {};
|
|
2046
2070
|
// first call to get the bytes needed
|
|
2071
|
+
// 1. A status == nullptr means that ReadOpAttr was successful. A status != nullptr means failure.
|
|
2072
|
+
// 2. The ReadOpAttr function should normally be called twice: once to get the needed buffer size (returns a status != nullptr), and a second time to actually read the ints (returns status == null on success).
|
|
2073
|
+
// 3. This code tries a subtle optimization in the first call to ReadOpAttr. It passes in a buffer (&i) of size 1 just in case there is only 1 int. In this case, status == nullptr and we need to return {i}.
|
|
2047
2074
|
auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_INTS, &i, sizeof(i), &out);
|
|
2048
2075
|
if (status) {
|
|
2049
2076
|
size_t num_i = out / sizeof(int64_t);
|
|
@@ -2051,6 +2078,9 @@ inline ShapeInferContext::Ints ShapeInferContext::GetAttrInts(const char* attr_n
|
|
|
2051
2078
|
Ort::ThrowOnError(ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_INTS, ints.data(), out, &out));
|
|
2052
2079
|
return ints;
|
|
2053
2080
|
} else {
|
|
2081
|
+
if (out == 0u) {
|
|
2082
|
+
return {};
|
|
2083
|
+
}
|
|
2054
2084
|
return {i};
|
|
2055
2085
|
}
|
|
2056
2086
|
}
|
|
@@ -2068,6 +2098,9 @@ inline ShapeInferContext::Floats ShapeInferContext::GetAttrFloats(const char* at
|
|
|
2068
2098
|
float f = {};
|
|
2069
2099
|
size_t out = {};
|
|
2070
2100
|
// first call to get the bytes needed
|
|
2101
|
+
// 1. A status == nullptr means that ReadOpAttr was successful. A status != nullptr means failure.
|
|
2102
|
+
// 2. The ReadOpAttr function should normally be called twice: once to get the needed buffer size (returns a status != nullptr), and a second time to actually read the ints (returns status == null on success).
|
|
2103
|
+
// 3. This code tries a subtle optimization in the first call to ReadOpAttr. It passes in a buffer (&i) of size 1 just in case there is only 1 int. In this case, status == nullptr and we need to return {i}.
|
|
2071
2104
|
auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_FLOATS, &f, sizeof(f), &out);
|
|
2072
2105
|
if (status) {
|
|
2073
2106
|
size_t num_f = out / sizeof(float);
|
|
@@ -2075,6 +2108,9 @@ inline ShapeInferContext::Floats ShapeInferContext::GetAttrFloats(const char* at
|
|
|
2075
2108
|
Ort::ThrowOnError(ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_FLOATS, floats.data(), out, &out));
|
|
2076
2109
|
return floats;
|
|
2077
2110
|
} else {
|
|
2111
|
+
if (out == 0u) {
|
|
2112
|
+
return {};
|
|
2113
|
+
}
|
|
2078
2114
|
return {f};
|
|
2079
2115
|
}
|
|
2080
2116
|
}
|
|
@@ -2099,6 +2135,9 @@ inline ShapeInferContext::Strings ShapeInferContext::GetAttrStrings(const char*
|
|
|
2099
2135
|
char c = {};
|
|
2100
2136
|
size_t out = {};
|
|
2101
2137
|
// first call to get the bytes needed
|
|
2138
|
+
// 1. A status == nullptr means that ReadOpAttr was successful. A status != nullptr means failure.
|
|
2139
|
+
// 2. The ReadOpAttr function should normally be called twice: once to get the needed buffer size (returns a status != nullptr), and a second time to actually read the ints (returns status == null on success).
|
|
2140
|
+
// 3. This code tries a subtle optimization in the first call to ReadOpAttr. It passes in a buffer (&i) of size 1 just in case there is only 1 int. In this case, status == nullptr and we need to return {i}.
|
|
2102
2141
|
auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_STRINGS, &c, sizeof(char), &out);
|
|
2103
2142
|
if (status) {
|
|
2104
2143
|
std::vector<char> chars(out, '\0');
|
|
@@ -2115,6 +2154,9 @@ inline ShapeInferContext::Strings ShapeInferContext::GetAttrStrings(const char*
|
|
|
2115
2154
|
}
|
|
2116
2155
|
return strings;
|
|
2117
2156
|
} else {
|
|
2157
|
+
if (out == 0u) {
|
|
2158
|
+
return {};
|
|
2159
|
+
}
|
|
2118
2160
|
return {std::string{c}};
|
|
2119
2161
|
}
|
|
2120
2162
|
}
|
|
@@ -75,8 +75,6 @@ struct Float16Impl {
|
|
|
75
75
|
static constexpr uint16_t kNegativeInfinityBits = 0xFC00U;
|
|
76
76
|
static constexpr uint16_t kPositiveQNaNBits = 0x7E00U;
|
|
77
77
|
static constexpr uint16_t kNegativeQNaNBits = 0xFE00U;
|
|
78
|
-
static constexpr uint16_t kEpsilonBits = 0x4170U;
|
|
79
|
-
static constexpr uint16_t kMinValueBits = 0xFBFFU; // Minimum normal number
|
|
80
78
|
static constexpr uint16_t kMaxValueBits = 0x7BFFU; // Largest normal number
|
|
81
79
|
static constexpr uint16_t kOneBits = 0x3C00U;
|
|
82
80
|
static constexpr uint16_t kMinusOneBits = 0xBC00U;
|
|
@@ -364,9 +362,6 @@ struct BFloat16Impl {
|
|
|
364
362
|
static constexpr uint16_t kNegativeInfinityBits = 0xFF80U;
|
|
365
363
|
static constexpr uint16_t kPositiveQNaNBits = 0x7FC1U;
|
|
366
364
|
static constexpr uint16_t kNegativeQNaNBits = 0xFFC1U;
|
|
367
|
-
static constexpr uint16_t kSignaling_NaNBits = 0x7F80U;
|
|
368
|
-
static constexpr uint16_t kEpsilonBits = 0x0080U;
|
|
369
|
-
static constexpr uint16_t kMinValueBits = 0xFF7FU;
|
|
370
365
|
static constexpr uint16_t kMaxValueBits = 0x7F7FU;
|
|
371
366
|
static constexpr uint16_t kRoundToNearest = 0x7FFFU;
|
|
372
367
|
static constexpr uint16_t kOneBits = 0x3F80U;
|
|
@@ -269,6 +269,9 @@ static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed
|
|
|
269
269
|
// in case user need to merge/connect multiple EPContext nodes in one model
|
|
270
270
|
static const char* const kOrtSessionOptionEpContextNodeNamePrefix = "ep.context_node_name_prefix";
|
|
271
271
|
|
|
272
|
+
// Share EP related resources across EPs
|
|
273
|
+
static const char* const kOrtSessionOptionShareEpContexts = "ep.share_ep_contexts";
|
|
274
|
+
|
|
272
275
|
// Gemm fastmath mode provides fp32 gemm acceleration with bfloat16 based matmul.
|
|
273
276
|
// Option values:
|
|
274
277
|
// - "0": Gemm FastMath mode is not enabled. [DEFAULT]
|
|
@@ -279,3 +282,10 @@ static const char* const kOrtSessionOptionsMlasGemmFastMathArm64Bfloat16 = "mlas
|
|
|
279
282
|
// Refer to MatMulNBits op schema for more details.
|
|
280
283
|
// If not provided, default is 4.
|
|
281
284
|
static const char* const kOrtSessionOptionsQDQMatMulNBitsAccuracyLevel = "session.qdq_matmulnbits_accuracy_level";
|
|
285
|
+
|
|
286
|
+
// THIS OPTION IS NOT A REGULAR SESSION OPTION SINCE IT CAN BE MODIFIED AT ANY TIME
|
|
287
|
+
// Meant to be used with SetEpDynamicOptions
|
|
288
|
+
// Specify the type of workload for this session.
|
|
289
|
+
// “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default]
|
|
290
|
+
// “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance.
|
|
291
|
+
static const char* const kOrtEpDynamicOptionsWorkloadType = "ep.dynamic.workload_type";
|
|
@@ -9,9 +9,9 @@
|
|
|
9
9
|
<key>CFBundleIdentifier</key>
|
|
10
10
|
<string>com.microsoft.onnxruntime</string>
|
|
11
11
|
<key>CFBundleVersion</key>
|
|
12
|
-
<string>1.
|
|
12
|
+
<string>1.20.0</string>
|
|
13
13
|
<key>CFBundleShortVersionString</key>
|
|
14
|
-
<string>1.
|
|
14
|
+
<string>1.20.0</string>
|
|
15
15
|
<key>CFBundleSignature</key>
|
|
16
16
|
<string>????</string>
|
|
17
17
|
<key>CFBundlePackageType</key>
|
|
Binary file
|
|
Binary file
|
package/README.md
CHANGED
|
@@ -66,11 +66,11 @@ Pre-built libraries are available on [NPM](https://www.npmjs.com/package/com.git
|
|
|
66
66
|
}
|
|
67
67
|
]
|
|
68
68
|
"dependencies": {
|
|
69
|
-
"com.github.asus4.onnxruntime": "0.2.
|
|
70
|
-
"com.github.asus4.onnxruntime.unity": "0.2.
|
|
71
|
-
"com.github.asus4.onnxruntime.win-x64-gpu": "0.2.
|
|
72
|
-
"com.github.asus4.onnxruntime.linux-x64-gpu": "0.2.
|
|
73
|
-
"com.github.asus4.onnxruntime-extensions": "0.2.
|
|
69
|
+
"com.github.asus4.onnxruntime": "0.2.3",
|
|
70
|
+
"com.github.asus4.onnxruntime.unity": "0.2.3",
|
|
71
|
+
"com.github.asus4.onnxruntime.win-x64-gpu": "0.2.3",
|
|
72
|
+
"com.github.asus4.onnxruntime.linux-x64-gpu": "0.2.3",
|
|
73
|
+
"com.github.asus4.onnxruntime-extensions": "0.2.3",
|
|
74
74
|
... other dependencies
|
|
75
75
|
}
|
|
76
76
|
```
|