cactus-react-native 1.2.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +765 -33
- package/android/CMakeLists.txt +4 -3
- package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusFileSystem.kt +20 -1
- package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_util.a +0 -0
- package/cpp/HybridCactus.cpp +231 -19
- package/cpp/HybridCactus.hpp +25 -3
- package/cpp/HybridCactusIndex.cpp +325 -0
- package/cpp/HybridCactusIndex.hpp +43 -0
- package/cpp/HybridCactusUtil.cpp +3 -3
- package/cpp/HybridCactusUtil.hpp +2 -1
- package/cpp/cactus_ffi.h +107 -2
- package/cpp/cactus_util.h +1 -1
- package/ios/HybridCactusFileSystem.swift +23 -2
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +2 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +107 -2
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_telemetry.h +656 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/{ffi_utils.h → cactus_utils.h} +145 -18
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +135 -7
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gemma_tools.h +549 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +193 -26
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +54 -195
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +399 -140
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus.h +2 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +107 -2
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_telemetry.h +656 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/{ffi_utils.h → cactus_utils.h} +145 -18
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +135 -7
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h +549 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +193 -26
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +54 -195
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +399 -140
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/_CodeSignature/CodeResources +1 -1
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
- package/ios/cactus_util.xcframework/Info.plist +4 -4
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/cactus_util.h +1 -1
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/database.h +27 -0
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Info.plist +0 -0
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/cactus_util +0 -0
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/cactus_util.h +1 -1
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/database.h +27 -0
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Info.plist +0 -0
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/_CodeSignature/CodeResources +3 -3
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/cactus_util +0 -0
- package/lib/module/api/Database.js +12 -95
- package/lib/module/api/Database.js.map +1 -1
- package/lib/module/classes/CactusIndex.js +45 -0
- package/lib/module/classes/CactusIndex.js.map +1 -0
- package/lib/module/classes/CactusLM.js +65 -17
- package/lib/module/classes/CactusLM.js.map +1 -1
- package/lib/module/classes/CactusSTT.js +104 -17
- package/lib/module/classes/CactusSTT.js.map +1 -1
- package/lib/module/config/CactusConfig.js +2 -0
- package/lib/module/config/CactusConfig.js.map +1 -1
- package/lib/module/constants/packageVersion.js +1 -1
- package/lib/module/hooks/useCactusIndex.js +175 -0
- package/lib/module/hooks/useCactusIndex.js.map +1 -0
- package/lib/module/hooks/useCactusLM.js +68 -7
- package/lib/module/hooks/useCactusLM.js.map +1 -1
- package/lib/module/hooks/useCactusSTT.js +102 -6
- package/lib/module/hooks/useCactusSTT.js.map +1 -1
- package/lib/module/index.js +2 -0
- package/lib/module/index.js.map +1 -1
- package/lib/module/models.js +336 -0
- package/lib/module/models.js.map +1 -0
- package/lib/module/native/Cactus.js +61 -13
- package/lib/module/native/Cactus.js.map +1 -1
- package/lib/module/native/CactusFileSystem.js +3 -0
- package/lib/module/native/CactusFileSystem.js.map +1 -1
- package/lib/module/native/CactusIndex.js +32 -0
- package/lib/module/native/CactusIndex.js.map +1 -0
- package/lib/module/native/CactusUtil.js +16 -3
- package/lib/module/native/CactusUtil.js.map +1 -1
- package/lib/module/native/index.js +1 -0
- package/lib/module/native/index.js.map +1 -1
- package/lib/module/specs/CactusIndex.nitro.js +4 -0
- package/lib/module/specs/CactusIndex.nitro.js.map +1 -0
- package/lib/module/telemetry/Telemetry.js +3 -1
- package/lib/module/telemetry/Telemetry.js.map +1 -1
- package/lib/module/types/CactusIndex.js +2 -0
- package/lib/module/types/{CactusModel.js.map → CactusIndex.js.map} +1 -1
- package/lib/module/types/CactusLM.js +2 -0
- package/lib/module/types/CactusSTT.js +2 -0
- package/lib/module/types/common.js +2 -0
- package/lib/module/types/{CactusSTTModel.js.map → common.js.map} +1 -1
- package/lib/typescript/src/api/Database.d.ts +4 -7
- package/lib/typescript/src/api/Database.d.ts.map +1 -1
- package/lib/typescript/src/classes/CactusIndex.d.ts +15 -0
- package/lib/typescript/src/classes/CactusIndex.d.ts.map +1 -0
- package/lib/typescript/src/classes/CactusLM.d.ts +12 -5
- package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
- package/lib/typescript/src/classes/CactusSTT.d.ts +15 -5
- package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/config/CactusConfig.d.ts +1 -0
- package/lib/typescript/src/config/CactusConfig.d.ts.map +1 -1
- package/lib/typescript/src/constants/packageVersion.d.ts +1 -1
- package/lib/typescript/src/hooks/useCactusIndex.d.ts +14 -0
- package/lib/typescript/src/hooks/useCactusIndex.d.ts.map +1 -0
- package/lib/typescript/src/hooks/useCactusLM.d.ts +6 -4
- package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
- package/lib/typescript/src/hooks/useCactusSTT.d.ts +13 -5
- package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +6 -4
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/models.d.ts +6 -0
- package/lib/typescript/src/models.d.ts.map +1 -0
- package/lib/typescript/src/native/Cactus.d.ts +10 -3
- package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
- package/lib/typescript/src/native/CactusFileSystem.d.ts +1 -0
- package/lib/typescript/src/native/CactusFileSystem.d.ts.map +1 -1
- package/lib/typescript/src/native/CactusIndex.d.ts +12 -0
- package/lib/typescript/src/native/CactusIndex.d.ts.map +1 -0
- package/lib/typescript/src/native/CactusUtil.d.ts.map +1 -1
- package/lib/typescript/src/native/index.d.ts +1 -0
- package/lib/typescript/src/native/index.d.ts.map +1 -1
- package/lib/typescript/src/specs/Cactus.nitro.d.ts +9 -2
- package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
- package/lib/typescript/src/specs/CactusFileSystem.nitro.d.ts +1 -0
- package/lib/typescript/src/specs/CactusFileSystem.nitro.d.ts.map +1 -1
- package/lib/typescript/src/specs/CactusIndex.nitro.d.ts +24 -0
- package/lib/typescript/src/specs/CactusIndex.nitro.d.ts.map +1 -0
- package/lib/typescript/src/specs/CactusUtil.nitro.d.ts +1 -1
- package/lib/typescript/src/specs/CactusUtil.nitro.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusIndex.d.ts +34 -0
- package/lib/typescript/src/types/CactusIndex.d.ts.map +1 -0
- package/lib/typescript/src/types/CactusLM.d.ts +19 -0
- package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusSTT.d.ts +21 -1
- package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/types/common.d.ts +28 -0
- package/lib/typescript/src/types/common.d.ts.map +1 -0
- package/nitro.json +3 -0
- package/nitrogen/generated/android/c++/JDeviceInfo.hpp +1 -1
- package/nitrogen/generated/android/c++/JFunc_void_double.hpp +1 -1
- package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.cpp +1 -1
- package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.hpp +1 -1
- package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.cpp +1 -1
- package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.hpp +1 -1
- package/nitrogen/generated/android/c++/JHybridCactusFileSystemSpec.cpp +17 -1
- package/nitrogen/generated/android/c++/JHybridCactusFileSystemSpec.hpp +2 -1
- package/nitrogen/generated/android/c++/JHybridCactusImageSpec.cpp +1 -1
- package/nitrogen/generated/android/c++/JHybridCactusImageSpec.hpp +1 -1
- package/nitrogen/generated/android/cactus+autolinking.cmake +2 -1
- package/nitrogen/generated/android/cactus+autolinking.gradle +1 -1
- package/nitrogen/generated/android/cactusOnLoad.cpp +11 -1
- package/nitrogen/generated/android/cactusOnLoad.hpp +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/DeviceInfo.kt +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/Func_void_double.kt +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusCryptoSpec.kt +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusDeviceInfoSpec.kt +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusFileSystemSpec.kt +5 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusImageSpec.kt +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/cactusOnLoad.kt +1 -1
- package/nitrogen/generated/ios/Cactus+autolinking.rb +1 -1
- package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.cpp +1 -1
- package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.hpp +1 -1
- package/nitrogen/generated/ios/Cactus-Swift-Cxx-Umbrella.hpp +1 -1
- package/nitrogen/generated/ios/CactusAutolinking.mm +11 -1
- package/nitrogen/generated/ios/CactusAutolinking.swift +1 -1
- package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.cpp +1 -1
- package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.hpp +1 -1
- package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.cpp +1 -1
- package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.hpp +1 -1
- package/nitrogen/generated/ios/c++/HybridCactusFileSystemSpecSwift.cpp +1 -1
- package/nitrogen/generated/ios/c++/HybridCactusFileSystemSpecSwift.hpp +9 -1
- package/nitrogen/generated/ios/c++/HybridCactusImageSpecSwift.cpp +1 -1
- package/nitrogen/generated/ios/c++/HybridCactusImageSpecSwift.hpp +1 -1
- package/nitrogen/generated/ios/swift/DeviceInfo.swift +1 -1
- package/nitrogen/generated/ios/swift/Func_void.swift +1 -1
- package/nitrogen/generated/ios/swift/Func_void_DeviceInfo.swift +1 -1
- package/nitrogen/generated/ios/swift/Func_void_bool.swift +1 -1
- package/nitrogen/generated/ios/swift/Func_void_double.swift +1 -1
- package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +1 -1
- package/nitrogen/generated/ios/swift/Func_void_std__optional_std__string_.swift +1 -1
- package/nitrogen/generated/ios/swift/Func_void_std__string.swift +1 -1
- package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec.swift +1 -1
- package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec_cxx.swift +1 -1
- package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec.swift +1 -1
- package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec_cxx.swift +1 -1
- package/nitrogen/generated/ios/swift/HybridCactusFileSystemSpec.swift +2 -1
- package/nitrogen/generated/ios/swift/HybridCactusFileSystemSpec_cxx.swift +20 -1
- package/nitrogen/generated/ios/swift/HybridCactusImageSpec.swift +1 -1
- package/nitrogen/generated/ios/swift/HybridCactusImageSpec_cxx.swift +1 -1
- package/nitrogen/generated/shared/c++/CactusIndexGetResult.hpp +84 -0
- package/nitrogen/generated/shared/c++/CactusIndexQueryResult.hpp +79 -0
- package/nitrogen/generated/shared/c++/DeviceInfo.hpp +1 -1
- package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.cpp +1 -1
- package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.hpp +1 -1
- package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.cpp +1 -1
- package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.hpp +1 -1
- package/nitrogen/generated/shared/c++/HybridCactusFileSystemSpec.cpp +2 -1
- package/nitrogen/generated/shared/c++/HybridCactusFileSystemSpec.hpp +2 -1
- package/nitrogen/generated/shared/c++/HybridCactusImageSpec.cpp +1 -1
- package/nitrogen/generated/shared/c++/HybridCactusImageSpec.hpp +1 -1
- package/nitrogen/generated/shared/c++/HybridCactusIndexSpec.cpp +27 -0
- package/nitrogen/generated/shared/c++/HybridCactusIndexSpec.hpp +76 -0
- package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +8 -1
- package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +11 -3
- package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.cpp +1 -1
- package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.hpp +2 -2
- package/package.json +2 -2
- package/src/api/Database.ts +14 -135
- package/src/classes/CactusIndex.ts +58 -0
- package/src/classes/CactusLM.ts +87 -19
- package/src/classes/CactusSTT.ts +134 -20
- package/src/config/CactusConfig.ts +3 -0
- package/src/constants/packageVersion.ts +1 -1
- package/src/hooks/useCactusIndex.ts +195 -0
- package/src/hooks/useCactusLM.ts +88 -8
- package/src/hooks/useCactusSTT.ts +119 -7
- package/src/index.tsx +22 -2
- package/src/models.ts +344 -0
- package/src/native/Cactus.ts +95 -13
- package/src/native/CactusFileSystem.ts +4 -0
- package/src/native/CactusIndex.ts +54 -0
- package/src/native/CactusUtil.ts +19 -3
- package/src/native/index.ts +1 -0
- package/src/specs/Cactus.nitro.ts +18 -2
- package/src/specs/CactusFileSystem.nitro.ts +2 -0
- package/src/specs/CactusIndex.nitro.ts +31 -0
- package/src/specs/CactusUtil.nitro.ts +1 -1
- package/src/telemetry/Telemetry.ts +1 -1
- package/src/types/CactusIndex.ts +40 -0
- package/src/types/CactusLM.ts +24 -0
- package/src/types/CactusSTT.ts +27 -1
- package/src/types/common.ts +28 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_util.so +0 -0
- package/lib/module/types/CactusModel.js +0 -2
- package/lib/module/types/CactusSTTModel.js +0 -2
- package/lib/typescript/src/types/CactusModel.d.ts +0 -13
- package/lib/typescript/src/types/CactusModel.d.ts.map +0 -1
- package/lib/typescript/src/types/CactusSTTModel.d.ts +0 -8
- package/lib/typescript/src/types/CactusSTTModel.d.ts.map +0 -1
- package/src/types/CactusModel.ts +0 -15
- package/src/types/CactusSTTModel.ts +0 -10
package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/{ffi_utils.h → cactus_utils.h}
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
#ifndef
|
|
2
|
-
#define
|
|
1
|
+
#ifndef CACTUS_UTILS_H
|
|
2
|
+
#define CACTUS_UTILS_H
|
|
3
3
|
|
|
4
4
|
#include "../engine/engine.h"
|
|
5
5
|
#include <string>
|
|
@@ -12,16 +12,65 @@
|
|
|
12
12
|
#include <iostream>
|
|
13
13
|
#include <filesystem>
|
|
14
14
|
#include <cctype>
|
|
15
|
+
#include <memory>
|
|
16
|
+
#include <atomic>
|
|
17
|
+
#include <mutex>
|
|
18
|
+
#include <random>
|
|
19
|
+
|
|
20
|
+
#ifdef __APPLE__
|
|
21
|
+
#include <uuid/uuid.h>
|
|
22
|
+
#endif
|
|
23
|
+
|
|
24
|
+
struct CactusModelHandle {
|
|
25
|
+
std::unique_ptr<cactus::engine::Model> model;
|
|
26
|
+
std::atomic<bool> should_stop;
|
|
27
|
+
std::vector<uint32_t> processed_tokens;
|
|
28
|
+
std::mutex model_mutex;
|
|
29
|
+
std::string model_name;
|
|
30
|
+
|
|
31
|
+
CactusModelHandle() : should_stop(false) {}
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
extern std::string last_error_message;
|
|
35
|
+
|
|
36
|
+
bool matches_stop_sequence(const std::vector<uint32_t>& generated_tokens,
|
|
37
|
+
const std::vector<std::vector<uint32_t>>& stop_sequences);
|
|
15
38
|
|
|
16
39
|
namespace cactus {
|
|
17
40
|
namespace ffi {
|
|
18
41
|
|
|
42
|
+
#ifndef CACTUS_VERSION
|
|
43
|
+
#define CACTUS_VERSION "unknown"
|
|
44
|
+
#endif
|
|
45
|
+
|
|
46
|
+
inline const char* getVersion() {
|
|
47
|
+
return CACTUS_VERSION;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
inline std::string generateUUID() {
|
|
51
|
+
#ifdef __APPLE__
|
|
52
|
+
uuid_t uuid;
|
|
53
|
+
uuid_generate_random(uuid);
|
|
54
|
+
char uuid_str[37];
|
|
55
|
+
uuid_unparse_lower(uuid, uuid_str);
|
|
56
|
+
return std::string(uuid_str);
|
|
57
|
+
#endif
|
|
58
|
+
}
|
|
59
|
+
|
|
19
60
|
struct ToolFunction {
|
|
20
61
|
std::string name;
|
|
21
62
|
std::string description;
|
|
22
63
|
std::unordered_map<std::string, std::string> parameters;
|
|
23
64
|
};
|
|
24
65
|
|
|
66
|
+
} // namespace ffi
|
|
67
|
+
} // namespace cactus
|
|
68
|
+
|
|
69
|
+
#include "gemma_tools.h"
|
|
70
|
+
|
|
71
|
+
namespace cactus {
|
|
72
|
+
namespace ffi {
|
|
73
|
+
|
|
25
74
|
inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
|
|
26
75
|
std::string sanitized_msg = error_message;
|
|
27
76
|
for (auto& c : sanitized_msg) {
|
|
@@ -175,49 +224,58 @@ inline std::vector<ToolFunction> parse_tools_json(const std::string& json) {
|
|
|
175
224
|
return tools;
|
|
176
225
|
}
|
|
177
226
|
|
|
178
|
-
inline void parse_options_json(const std::string& json,
|
|
179
|
-
float& temperature, float& top_p,
|
|
227
|
+
inline void parse_options_json(const std::string& json,
|
|
228
|
+
float& temperature, float& top_p,
|
|
180
229
|
size_t& top_k, size_t& max_tokens,
|
|
181
|
-
std::vector<std::string>& stop_sequences
|
|
230
|
+
std::vector<std::string>& stop_sequences,
|
|
231
|
+
bool& force_tools) {
|
|
182
232
|
temperature = 0.0f;
|
|
183
|
-
top_p = 0.0f;
|
|
184
|
-
top_k = 0;
|
|
185
|
-
max_tokens = 100;
|
|
233
|
+
top_p = 0.0f;
|
|
234
|
+
top_k = 0;
|
|
235
|
+
max_tokens = 100;
|
|
236
|
+
force_tools = false;
|
|
186
237
|
stop_sequences.clear();
|
|
187
|
-
|
|
238
|
+
|
|
188
239
|
if (json.empty()) return;
|
|
189
|
-
|
|
240
|
+
|
|
190
241
|
size_t pos = json.find("\"temperature\"");
|
|
191
242
|
if (pos != std::string::npos) {
|
|
192
243
|
pos = json.find(':', pos) + 1;
|
|
193
244
|
temperature = std::stof(json.substr(pos));
|
|
194
245
|
}
|
|
195
|
-
|
|
246
|
+
|
|
196
247
|
pos = json.find("\"top_p\"");
|
|
197
248
|
if (pos != std::string::npos) {
|
|
198
249
|
pos = json.find(':', pos) + 1;
|
|
199
250
|
top_p = std::stof(json.substr(pos));
|
|
200
251
|
}
|
|
201
|
-
|
|
252
|
+
|
|
202
253
|
pos = json.find("\"top_k\"");
|
|
203
254
|
if (pos != std::string::npos) {
|
|
204
255
|
pos = json.find(':', pos) + 1;
|
|
205
256
|
top_k = std::stoul(json.substr(pos));
|
|
206
257
|
}
|
|
207
|
-
|
|
258
|
+
|
|
208
259
|
pos = json.find("\"max_tokens\"");
|
|
209
260
|
if (pos != std::string::npos) {
|
|
210
261
|
pos = json.find(':', pos) + 1;
|
|
211
262
|
max_tokens = std::stoul(json.substr(pos));
|
|
212
263
|
}
|
|
213
|
-
|
|
264
|
+
|
|
265
|
+
pos = json.find("\"force_tools\"");
|
|
266
|
+
if (pos != std::string::npos) {
|
|
267
|
+
pos = json.find(':', pos) + 1;
|
|
268
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
269
|
+
force_tools = (json.substr(pos, 4) == "true");
|
|
270
|
+
}
|
|
271
|
+
|
|
214
272
|
pos = json.find("\"stop_sequences\"");
|
|
215
273
|
if (pos != std::string::npos) {
|
|
216
274
|
pos = json.find('[', pos);
|
|
217
275
|
if (pos != std::string::npos) {
|
|
218
276
|
size_t end_pos = json.find(']', pos);
|
|
219
277
|
size_t seq_pos = json.find('"', pos);
|
|
220
|
-
|
|
278
|
+
|
|
221
279
|
while (seq_pos != std::string::npos && seq_pos < end_pos) {
|
|
222
280
|
size_t seq_start = seq_pos + 1;
|
|
223
281
|
size_t seq_end = json.find('"', seq_start);
|
|
@@ -234,7 +292,7 @@ inline std::string format_tools_for_prompt(const std::vector<ToolFunction>& tool
|
|
|
234
292
|
if (tools.empty()) return "";
|
|
235
293
|
std::string formatted_tools_json;
|
|
236
294
|
for (size_t i = 0; i < tools.size(); i++) {
|
|
237
|
-
if (i > 0) formatted_tools_json += "
|
|
295
|
+
if (i > 0) formatted_tools_json += "\n";
|
|
238
296
|
formatted_tools_json += "{\"type\":\"function\",\"function\":{\"name\":\""
|
|
239
297
|
+ tools[i].name
|
|
240
298
|
+ "\",\"description\":\""
|
|
@@ -253,11 +311,43 @@ inline void parse_function_calls_from_response(const std::string& response_text,
|
|
|
253
311
|
regular_response = response_text;
|
|
254
312
|
function_calls.clear();
|
|
255
313
|
|
|
314
|
+
gemma::parse_function_calls(regular_response, function_calls);
|
|
315
|
+
|
|
316
|
+
// Parse Qwen-style function calls: <tool_call>{"name": "...", "arguments": {...}}</tool_call>
|
|
317
|
+
const std::string QWEN_TOOL_START = "<tool_call>";
|
|
318
|
+
const std::string QWEN_TOOL_END = "</tool_call>";
|
|
319
|
+
size_t qwen_start_pos = 0;
|
|
320
|
+
|
|
321
|
+
while ((qwen_start_pos = regular_response.find(QWEN_TOOL_START, qwen_start_pos)) != std::string::npos) {
|
|
322
|
+
size_t content_start = qwen_start_pos + QWEN_TOOL_START.length();
|
|
323
|
+
size_t qwen_end_pos = regular_response.find(QWEN_TOOL_END, content_start);
|
|
324
|
+
|
|
325
|
+
if (qwen_end_pos != std::string::npos) {
|
|
326
|
+
std::string json_content = regular_response.substr(content_start, qwen_end_pos - content_start);
|
|
327
|
+
|
|
328
|
+
size_t first = json_content.find_first_not_of(" \t\n\r");
|
|
329
|
+
size_t last = json_content.find_last_not_of(" \t\n\r");
|
|
330
|
+
if (first != std::string::npos && last != std::string::npos) {
|
|
331
|
+
json_content = json_content.substr(first, last - first + 1);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
if (json_content.size() > 2 && json_content[0] == '{' &&
|
|
335
|
+
json_content.find("\"name\"") != std::string::npos) {
|
|
336
|
+
function_calls.push_back(json_content);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
regular_response.erase(qwen_start_pos, qwen_end_pos + QWEN_TOOL_END.length() - qwen_start_pos);
|
|
340
|
+
} else {
|
|
341
|
+
break;
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Parse LFM2-style function calls: <|tool_call_start|>[name(args)]<|tool_call_end|>
|
|
256
346
|
const std::string TOOL_CALL_START = "<|tool_call_start|>";
|
|
257
347
|
const std::string TOOL_CALL_END = "<|tool_call_end|>";
|
|
258
348
|
size_t tool_start_pos = 0;
|
|
259
349
|
|
|
260
|
-
while ((tool_start_pos =
|
|
350
|
+
while ((tool_start_pos = regular_response.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
|
|
261
351
|
size_t content_start = tool_start_pos + TOOL_CALL_START.length();
|
|
262
352
|
size_t tool_end_pos = response_text.find(TOOL_CALL_END, content_start);
|
|
263
353
|
|
|
@@ -398,4 +488,41 @@ inline std::string construct_response_json(const std::string& regular_response,
|
|
|
398
488
|
} // namespace ffi
|
|
399
489
|
} // namespace cactus
|
|
400
490
|
|
|
401
|
-
#
|
|
491
|
+
#ifdef __cplusplus
|
|
492
|
+
extern "C" {
|
|
493
|
+
#endif
|
|
494
|
+
|
|
495
|
+
const char* cactus_get_last_error();
|
|
496
|
+
|
|
497
|
+
__attribute__((weak))
|
|
498
|
+
const char* register_app(const char* encrypted_data);
|
|
499
|
+
|
|
500
|
+
__attribute__((weak))
|
|
501
|
+
const char* get_device_id(const char* current_token);
|
|
502
|
+
|
|
503
|
+
#ifdef __cplusplus
|
|
504
|
+
}
|
|
505
|
+
#endif
|
|
506
|
+
|
|
507
|
+
#ifdef __cplusplus
|
|
508
|
+
extern "C" {
|
|
509
|
+
|
|
510
|
+
__attribute__((weak))
|
|
511
|
+
inline const char* register_app(const char* encrypted_data) {
|
|
512
|
+
(void)encrypted_data;
|
|
513
|
+
static thread_local std::string uuid_storage;
|
|
514
|
+
uuid_storage = cactus::ffi::generateUUID();
|
|
515
|
+
return uuid_storage.c_str();
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
__attribute__((weak))
|
|
519
|
+
inline const char* get_device_id(const char* current_token) {
|
|
520
|
+
(void)current_token;
|
|
521
|
+
static thread_local std::string uuid_storage;
|
|
522
|
+
uuid_storage = cactus::ffi::generateUUID();
|
|
523
|
+
return uuid_storage.c_str();
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
#endif
|
|
527
|
+
|
|
528
|
+
#endif // CACTUS_UTILS_H
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
#include <vector>
|
|
4
4
|
#include <string>
|
|
5
5
|
#include <unordered_map>
|
|
6
|
+
#include <unordered_set>
|
|
6
7
|
#include <memory>
|
|
7
8
|
#include <cstdint>
|
|
8
9
|
|
|
@@ -32,6 +33,9 @@ extern "C" {
|
|
|
32
33
|
class CactusGraph;
|
|
33
34
|
|
|
34
35
|
namespace cactus {
|
|
36
|
+
namespace npu {
|
|
37
|
+
class NPUPrefill;
|
|
38
|
+
}
|
|
35
39
|
namespace engine {
|
|
36
40
|
|
|
37
41
|
class Siglip2Preprocessor;
|
|
@@ -127,9 +131,12 @@ struct MergeRule {
|
|
|
127
131
|
struct ChatMessage {
|
|
128
132
|
std::string role;
|
|
129
133
|
std::string content;
|
|
134
|
+
std::string name;
|
|
130
135
|
std::vector<std::string> images;
|
|
131
136
|
};
|
|
132
137
|
|
|
138
|
+
|
|
139
|
+
|
|
133
140
|
class Tokenizer {
|
|
134
141
|
public:
|
|
135
142
|
virtual ~Tokenizer() = default;
|
|
@@ -325,6 +332,8 @@ struct KVCache {
|
|
|
325
332
|
struct LayerCache {
|
|
326
333
|
std::vector<uint8_t> keys;
|
|
327
334
|
std::vector<uint8_t> values;
|
|
335
|
+
std::vector<float> key_scales;
|
|
336
|
+
std::vector<float> value_scales;
|
|
328
337
|
};
|
|
329
338
|
|
|
330
339
|
std::vector<LayerCache> layer_caches;
|
|
@@ -349,7 +358,12 @@ struct KVCache {
|
|
|
349
358
|
void update_from_graph(CactusGraph* gb, const std::vector<size_t>& k_nodes,
|
|
350
359
|
const std::vector<size_t>& v_nodes, size_t seq_len,
|
|
351
360
|
size_t num_layers, size_t kv_heads, size_t head_dim);
|
|
361
|
+
|
|
362
|
+
void update_from_npu(size_t layer_idx, const __fp16* k_data, const __fp16* v_data,
|
|
363
|
+
size_t num_tokens, size_t kv_heads, size_t head_dim);
|
|
364
|
+
|
|
352
365
|
bool is_empty() const { return current_seq_len == 0; }
|
|
366
|
+
bool is_int8() const { return precision == Precision::INT8; }
|
|
353
367
|
void* get_key_ptr(size_t layer);
|
|
354
368
|
void* get_value_ptr(size_t layer);
|
|
355
369
|
|
|
@@ -363,6 +377,100 @@ struct KVCache {
|
|
|
363
377
|
|
|
364
378
|
CircularView get_key_view(size_t layer);
|
|
365
379
|
CircularView get_value_view(size_t layer);
|
|
380
|
+
|
|
381
|
+
const int8_t* get_keys_int8(size_t layer) const;
|
|
382
|
+
const int8_t* get_values_int8(size_t layer) const;
|
|
383
|
+
const float* get_key_scales(size_t layer) const;
|
|
384
|
+
const float* get_value_scales(size_t layer) const;
|
|
385
|
+
};
|
|
386
|
+
|
|
387
|
+
class ToolCallConstrainer {
|
|
388
|
+
public:
|
|
389
|
+
enum class State {
|
|
390
|
+
DONE,
|
|
391
|
+
|
|
392
|
+
QWEN_START,
|
|
393
|
+
QWEN_EXPECT_OPEN_BRACE,
|
|
394
|
+
QWEN_EXPECT_NAME_KEY,
|
|
395
|
+
QWEN_EXPECT_NAME_COLON,
|
|
396
|
+
QWEN_EXPECT_NAME_VALUE,
|
|
397
|
+
QWEN_EXPECT_COMMA,
|
|
398
|
+
QWEN_EXPECT_ARGS_KEY,
|
|
399
|
+
QWEN_EXPECT_ARGS_COLON,
|
|
400
|
+
QWEN_IN_ARGUMENTS,
|
|
401
|
+
QWEN_EXPECT_CLOSE_BRACE,
|
|
402
|
+
QWEN_EXPECT_END,
|
|
403
|
+
|
|
404
|
+
LFM_START,
|
|
405
|
+
LFM_EXPECT_BRACKET,
|
|
406
|
+
LFM_IN_FUNC_NAME,
|
|
407
|
+
LFM_EXPECT_PAREN,
|
|
408
|
+
LFM_IN_ARGUMENTS,
|
|
409
|
+
LFM_EXPECT_BRACKET_CLOSE,
|
|
410
|
+
LFM_EXPECT_END,
|
|
411
|
+
|
|
412
|
+
GEMMA_START,
|
|
413
|
+
GEMMA_EXPECT_CALL,
|
|
414
|
+
GEMMA_IN_FUNC_NAME,
|
|
415
|
+
GEMMA_EXPECT_BRACE,
|
|
416
|
+
GEMMA_IN_ARGUMENTS,
|
|
417
|
+
GEMMA_EXPECT_END
|
|
418
|
+
};
|
|
419
|
+
|
|
420
|
+
void init(Config::ModelType model_type,
|
|
421
|
+
const std::vector<std::string>& function_names,
|
|
422
|
+
Tokenizer* tokenizer);
|
|
423
|
+
|
|
424
|
+
const std::unordered_map<uint32_t, float>& get_bias() const { return current_bias_; }
|
|
425
|
+
|
|
426
|
+
void update(uint32_t token_id, const std::string& decoded_text);
|
|
427
|
+
|
|
428
|
+
void reset();
|
|
429
|
+
|
|
430
|
+
bool is_active() const { return active_; }
|
|
431
|
+
|
|
432
|
+
private:
|
|
433
|
+
bool active_ = false;
|
|
434
|
+
State state_ = State::QWEN_START;
|
|
435
|
+
Config::ModelType model_type_ = Config::ModelType::QWEN;
|
|
436
|
+
Tokenizer* tokenizer_ = nullptr;
|
|
437
|
+
|
|
438
|
+
std::vector<std::string> function_names_;
|
|
439
|
+
std::string generated_text_;
|
|
440
|
+
int brace_depth_ = 0;
|
|
441
|
+
|
|
442
|
+
std::unordered_set<uint32_t> qwen_tool_call_start_tokens_;
|
|
443
|
+
std::unordered_set<uint32_t> qwen_tool_call_end_tokens_;
|
|
444
|
+
std::unordered_set<uint32_t> open_brace_tokens_;
|
|
445
|
+
std::unordered_set<uint32_t> close_brace_tokens_;
|
|
446
|
+
std::unordered_set<uint32_t> colon_tokens_;
|
|
447
|
+
std::unordered_set<uint32_t> comma_tokens_;
|
|
448
|
+
std::unordered_set<uint32_t> name_key_tokens_;
|
|
449
|
+
std::unordered_set<uint32_t> args_key_tokens_;
|
|
450
|
+
std::unordered_set<uint32_t> quote_tokens_;
|
|
451
|
+
std::unordered_set<uint32_t> backtick_tokens_;
|
|
452
|
+
std::unordered_set<uint32_t> all_func_name_tokens_;
|
|
453
|
+
std::unordered_map<std::string, std::vector<uint32_t>> func_name_sequences_;
|
|
454
|
+
|
|
455
|
+
std::unordered_set<uint32_t> tool_start_tokens_;
|
|
456
|
+
std::unordered_set<uint32_t> tool_end_tokens_;
|
|
457
|
+
std::unordered_set<uint32_t> bracket_open_tokens_;
|
|
458
|
+
std::unordered_set<uint32_t> bracket_close_tokens_;
|
|
459
|
+
std::unordered_set<uint32_t> paren_open_tokens_;
|
|
460
|
+
std::unordered_set<uint32_t> paren_close_tokens_;
|
|
461
|
+
std::unordered_set<uint32_t> equals_tokens_;
|
|
462
|
+
|
|
463
|
+
std::unordered_set<uint32_t> gemma_call_start_tokens_;
|
|
464
|
+
std::unordered_set<uint32_t> gemma_call_end_tokens_;
|
|
465
|
+
std::unordered_set<uint32_t> gemma_response_start_tokens_;
|
|
466
|
+
std::unordered_set<uint32_t> gemma_call_prefix_tokens_;
|
|
467
|
+
std::unordered_set<uint32_t> escape_tokens_;
|
|
468
|
+
|
|
469
|
+
std::unordered_map<uint32_t, float> current_bias_;
|
|
470
|
+
|
|
471
|
+
void compute_bias();
|
|
472
|
+
void tokenize_grammar_elements();
|
|
473
|
+
void add_tokens_for_string(const std::string& str, std::unordered_set<uint32_t>& token_set);
|
|
366
474
|
};
|
|
367
475
|
|
|
368
476
|
class Model {
|
|
@@ -386,26 +494,40 @@ public:
|
|
|
386
494
|
virtual bool init(CactusGraph* external_graph, const std::string& model_folder, size_t context_size,
|
|
387
495
|
const std::string& system_prompt = "", bool do_warmup = true);
|
|
388
496
|
|
|
389
|
-
virtual uint32_t
|
|
390
|
-
size_t top_k = 0, const std::string& profile_file = ""
|
|
497
|
+
virtual uint32_t decode(const std::vector<uint32_t>& tokens, float temperature = -1.0f, float top_p = -1.0f,
|
|
498
|
+
size_t top_k = 0, const std::string& profile_file = "");
|
|
499
|
+
|
|
500
|
+
virtual void prefill(const std::vector<uint32_t>& tokens, size_t chunk_size = 256, const std::string& profile_file = "");
|
|
391
501
|
|
|
392
|
-
virtual uint32_t
|
|
502
|
+
virtual uint32_t decode_with_images(const std::vector<uint32_t>& tokens, const std::vector<std::string>& image_paths,
|
|
393
503
|
float temperature = -1.0f, float top_p = -1.0f,
|
|
394
504
|
size_t top_k = 0, const std::string& profile_file = "");
|
|
395
|
-
|
|
396
|
-
virtual uint32_t
|
|
505
|
+
|
|
506
|
+
virtual uint32_t decode_with_audio(const std::vector<uint32_t>& tokens, const std::vector<float>& mel_bins, float temperature = 0.0f, float top_p = 0.0f,
|
|
397
507
|
size_t top_k = 0, const std::string& profile_file = "");
|
|
398
508
|
|
|
399
|
-
std::vector<float> get_embeddings(const std::vector<uint32_t>& tokens, bool pooled = true, const std::string& profile_file = "");
|
|
509
|
+
std::vector<float> get_embeddings(const std::vector<uint32_t>& tokens, bool pooled = true, bool normalize = false, const std::string& profile_file = "");
|
|
400
510
|
|
|
401
511
|
virtual std::vector<float> get_image_embeddings(const std::string& image_path);
|
|
402
512
|
|
|
403
513
|
virtual std::vector<float> get_audio_embeddings(const std::vector<float>& mel_bins);
|
|
404
514
|
|
|
405
515
|
virtual void reset_cache() { kv_cache_.reset(); }
|
|
406
|
-
|
|
516
|
+
|
|
517
|
+
double score_tokens_window_logprob(const std::vector<uint32_t>& tokens, size_t start, size_t end, size_t context, size_t* tokens_scored);
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
|
|
407
521
|
void set_cache_window(size_t window_size, size_t sink_size = 4) { kv_cache_.set_window_size(window_size, sink_size); }
|
|
408
522
|
|
|
523
|
+
bool load_npu_prefill(const std::string& model_path);
|
|
524
|
+
bool has_npu_prefill() const;
|
|
525
|
+
size_t get_prefill_chunk_size() const;
|
|
526
|
+
|
|
527
|
+
void set_tool_constraints(const std::vector<std::string>& function_names);
|
|
528
|
+
void clear_tool_constraints();
|
|
529
|
+
void update_tool_constraints(uint32_t token_id);
|
|
530
|
+
|
|
409
531
|
void* graph_handle_;
|
|
410
532
|
|
|
411
533
|
protected:
|
|
@@ -449,6 +571,12 @@ protected:
|
|
|
449
571
|
bool init_internal(CactusGraph* gb, const std::string& model_folder, size_t context_size,
|
|
450
572
|
const std::string& system_prompt, bool do_warmup);
|
|
451
573
|
bool owns_graph_;
|
|
574
|
+
|
|
575
|
+
std::unique_ptr<npu::NPUPrefill> npu_prefill_;
|
|
576
|
+
void prefill_npu(const std::vector<uint32_t>& tokens);
|
|
577
|
+
virtual std::vector<__fp16> get_token_embeddings(const std::vector<uint32_t>& tokens);
|
|
578
|
+
|
|
579
|
+
ToolCallConstrainer tool_constrainer_;
|
|
452
580
|
};
|
|
453
581
|
|
|
454
582
|
std::unique_ptr<Model> create_model(const std::string& model_folder);
|