cactus-react-native 0.2.11 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cactus.podspec +34 -0
- package/LICENSE +38 -0
- package/README.md +354 -69
- package/android/CMakeLists.txt +40 -0
- package/android/build.gradle +68 -44
- package/android/gradle.properties +5 -5
- package/android/src/main/AndroidManifest.xml +1 -3
- package/android/src/main/cpp/cpp-adapter.cpp +6 -0
- package/android/src/main/java/com/margelo/nitro/cactus/CactusPackage.kt +22 -0
- package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusCrypto.kt +38 -0
- package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusDeviceInfo.kt +24 -0
- package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusFileSystem.kt +227 -0
- package/android/src/main/jniLibs/arm64-v8a/{libcactus_v8_2_dotprod.so → libcactus.a} +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_util.so +0 -0
- package/cpp/HybridCactus.cpp +123 -0
- package/cpp/HybridCactus.hpp +35 -0
- package/cpp/HybridCactusUtil.cpp +45 -0
- package/cpp/HybridCactusUtil.hpp +26 -0
- package/cpp/cactus_ffi.h +55 -0
- package/cpp/cactus_util.h +25 -0
- package/ios/HybridCactusCrypto.swift +37 -0
- package/ios/HybridCactusDeviceInfo.swift +32 -0
- package/ios/HybridCactusFileSystem.swift +234 -0
- package/ios/cactus.xcframework/{info.plist → Info.plist} +2 -37
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +8 -229
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +28 -229
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +347 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ffi_utils.h +286 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +319 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +254 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +343 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus.h +11 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +55 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +347 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/ffi_utils.h +286 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +319 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +254 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +343 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/{tvos-arm64_x86_64-simulator → ios-arm64-simulator}/cactus.framework/_CodeSignature/CodeResources +1 -1
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
- package/ios/cactus_util.xcframework/Info.plist +39 -0
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/cactus_util.h +25 -0
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/ios_utils.h +10 -0
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/logging.h +25 -0
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Info.plist +0 -0
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/cactus_util +0 -0
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/cactus_util.h +25 -0
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/ios_utils.h +10 -0
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/logging.h +25 -0
- package/ios/{cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework → cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework}/Info.plist +0 -0
- package/ios/{cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework → cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework}/_CodeSignature/CodeResources +36 -2
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/cactus_util +0 -0
- package/lib/module/api/Database.js +60 -0
- package/lib/module/api/Database.js.map +1 -0
- package/lib/module/classes/CactusLM.js +149 -0
- package/lib/module/classes/CactusLM.js.map +1 -0
- package/lib/module/config/CactusConfig.js +6 -0
- package/lib/module/config/CactusConfig.js.map +1 -0
- package/lib/module/constants/packageVersion.js +4 -0
- package/lib/module/constants/packageVersion.js.map +1 -0
- package/lib/module/hooks/useCactusLM.js +233 -0
- package/lib/module/hooks/useCactusLM.js.map +1 -0
- package/lib/module/index.js +9 -409
- package/lib/module/index.js.map +1 -1
- package/lib/module/native/Cactus.js +50 -0
- package/lib/module/native/Cactus.js.map +1 -0
- package/lib/module/native/CactusCrypto.js +10 -0
- package/lib/module/native/CactusCrypto.js.map +1 -0
- package/lib/module/native/CactusDeviceInfo.js +13 -0
- package/lib/module/native/CactusDeviceInfo.js.map +1 -0
- package/lib/module/native/CactusFileSystem.js +35 -0
- package/lib/module/native/CactusFileSystem.js.map +1 -0
- package/lib/module/native/CactusUtil.js +23 -0
- package/lib/module/native/CactusUtil.js.map +1 -0
- package/lib/module/native/index.js +8 -0
- package/lib/module/native/index.js.map +1 -0
- package/lib/module/specs/Cactus.nitro.js +4 -0
- package/lib/module/specs/Cactus.nitro.js.map +1 -0
- package/lib/module/specs/CactusCrypto.nitro.js +4 -0
- package/lib/module/specs/CactusCrypto.nitro.js.map +1 -0
- package/lib/module/specs/CactusDeviceInfo.nitro.js +4 -0
- package/lib/module/specs/CactusDeviceInfo.nitro.js.map +1 -0
- package/lib/module/specs/CactusFileSystem.nitro.js +4 -0
- package/lib/module/specs/CactusFileSystem.nitro.js.map +1 -0
- package/lib/module/specs/CactusUtil.nitro.js +4 -0
- package/lib/module/specs/CactusUtil.nitro.js.map +1 -0
- package/lib/module/telemetry/Telemetry.js +100 -0
- package/lib/module/telemetry/Telemetry.js.map +1 -0
- package/lib/module/types/CactusLM.js +2 -0
- package/lib/module/types/CactusLM.js.map +1 -0
- package/lib/module/types/CactusModel.js +2 -0
- package/lib/module/types/CactusModel.js.map +1 -0
- package/lib/module/utils/error.js +4 -0
- package/lib/module/utils/error.js.map +1 -0
- package/lib/typescript/package.json +1 -0
- package/lib/typescript/src/api/Database.d.ts +11 -0
- package/lib/typescript/src/api/Database.d.ts.map +1 -0
- package/lib/typescript/src/classes/CactusLM.d.ts +25 -0
- package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -0
- package/lib/typescript/src/config/CactusConfig.d.ts +5 -0
- package/lib/typescript/src/config/CactusConfig.d.ts.map +1 -0
- package/lib/typescript/src/constants/packageVersion.d.ts +2 -0
- package/lib/typescript/src/constants/packageVersion.d.ts.map +1 -0
- package/lib/typescript/src/hooks/useCactusLM.d.ts +20 -0
- package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +6 -0
- package/lib/typescript/src/index.d.ts.map +1 -0
- package/lib/typescript/src/native/Cactus.d.ts +11 -0
- package/lib/typescript/src/native/Cactus.d.ts.map +1 -0
- package/lib/typescript/src/native/CactusCrypto.d.ts +5 -0
- package/lib/typescript/src/native/CactusCrypto.d.ts.map +1 -0
- package/lib/typescript/src/native/CactusDeviceInfo.d.ts +7 -0
- package/lib/typescript/src/native/CactusDeviceInfo.d.ts.map +1 -0
- package/lib/typescript/src/native/CactusFileSystem.d.ts +13 -0
- package/lib/typescript/src/native/CactusFileSystem.d.ts.map +1 -0
- package/lib/typescript/src/native/CactusUtil.d.ts +6 -0
- package/lib/typescript/src/native/CactusUtil.d.ts.map +1 -0
- package/lib/typescript/src/native/index.d.ts +6 -0
- package/lib/typescript/src/native/index.d.ts.map +1 -0
- package/lib/typescript/src/specs/Cactus.nitro.d.ts +13 -0
- package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -0
- package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts +8 -0
- package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts.map +1 -0
- package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts +16 -0
- package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts.map +1 -0
- package/lib/typescript/src/specs/CactusFileSystem.nitro.d.ts +16 -0
- package/lib/typescript/src/specs/CactusFileSystem.nitro.d.ts.map +1 -0
- package/lib/typescript/src/specs/CactusUtil.nitro.d.ts +10 -0
- package/lib/typescript/src/specs/CactusUtil.nitro.d.ts.map +1 -0
- package/lib/typescript/src/telemetry/Telemetry.d.ts +30 -0
- package/lib/typescript/src/telemetry/Telemetry.d.ts.map +1 -0
- package/lib/typescript/src/types/CactusLM.d.ts +65 -0
- package/lib/typescript/src/types/CactusLM.d.ts.map +1 -0
- package/lib/typescript/src/types/CactusModel.d.ts +12 -0
- package/lib/typescript/src/types/CactusModel.d.ts.map +1 -0
- package/lib/typescript/src/utils/error.d.ts +2 -0
- package/lib/typescript/src/utils/error.d.ts.map +1 -0
- package/nitro.json +31 -0
- package/nitrogen/generated/android/c++/JDeviceInfo.hpp +74 -0
- package/nitrogen/generated/android/c++/JFunc_void_double.hpp +74 -0
- package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.cpp +65 -0
- package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.hpp +65 -0
- package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.cpp +85 -0
- package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.hpp +66 -0
- package/nitrogen/generated/android/c++/JHybridCactusFileSystemSpec.cpp +192 -0
- package/nitrogen/generated/android/c++/JHybridCactusFileSystemSpec.hpp +73 -0
- package/nitrogen/generated/android/cactus+autolinking.cmake +87 -0
- package/nitrogen/generated/android/cactus+autolinking.gradle +27 -0
- package/nitrogen/generated/android/cactusOnLoad.cpp +86 -0
- package/nitrogen/generated/android/cactusOnLoad.hpp +25 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/DeviceInfo.kt +50 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/Func_void_double.kt +80 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusCryptoSpec.kt +58 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusDeviceInfoSpec.kt +62 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusFileSystemSpec.kt +95 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/cactusOnLoad.kt +35 -0
- package/nitrogen/generated/ios/Cactus+autolinking.rb +60 -0
- package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.cpp +122 -0
- package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.hpp +373 -0
- package/nitrogen/generated/ios/Cactus-Swift-Cxx-Umbrella.hpp +60 -0
- package/nitrogen/generated/ios/CactusAutolinking.mm +69 -0
- package/nitrogen/generated/ios/CactusAutolinking.swift +55 -0
- package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.cpp +11 -0
- package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.hpp +77 -0
- package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.cpp +11 -0
- package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.hpp +88 -0
- package/nitrogen/generated/ios/c++/HybridCactusFileSystemSpecSwift.cpp +11 -0
- package/nitrogen/generated/ios/c++/HybridCactusFileSystemSpecSwift.hpp +143 -0
- package/nitrogen/generated/ios/swift/DeviceInfo.swift +98 -0
- package/nitrogen/generated/ios/swift/Func_void.swift +47 -0
- package/nitrogen/generated/ios/swift/Func_void_DeviceInfo.swift +47 -0
- package/nitrogen/generated/ios/swift/Func_void_bool.swift +47 -0
- package/nitrogen/generated/ios/swift/Func_void_double.swift +47 -0
- package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +47 -0
- package/nitrogen/generated/ios/swift/Func_void_std__optional_std__string_.swift +54 -0
- package/nitrogen/generated/ios/swift/Func_void_std__string.swift +47 -0
- package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec.swift +57 -0
- package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec_cxx.swift +139 -0
- package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec.swift +58 -0
- package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec_cxx.swift +164 -0
- package/nitrogen/generated/ios/swift/HybridCactusFileSystemSpec.swift +65 -0
- package/nitrogen/generated/ios/swift/HybridCactusFileSystemSpec_cxx.swift +303 -0
- package/nitrogen/generated/shared/c++/DeviceInfo.hpp +92 -0
- package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.cpp +21 -0
- package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.hpp +63 -0
- package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.cpp +22 -0
- package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.hpp +67 -0
- package/nitrogen/generated/shared/c++/HybridCactusFileSystemSpec.cpp +29 -0
- package/nitrogen/generated/shared/c++/HybridCactusFileSystemSpec.hpp +73 -0
- package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +26 -0
- package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +71 -0
- package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.cpp +23 -0
- package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.hpp +66 -0
- package/package.json +84 -143
- package/src/api/Database.ts +83 -0
- package/src/classes/CactusLM.ts +203 -0
- package/src/config/CactusConfig.ts +4 -0
- package/src/constants/packageVersion.ts +1 -0
- package/src/hooks/useCactusLM.ts +282 -0
- package/src/index.tsx +23 -0
- package/src/native/Cactus.ts +79 -0
- package/src/native/CactusCrypto.ts +11 -0
- package/src/native/CactusDeviceInfo.ts +18 -0
- package/src/native/CactusFileSystem.ts +47 -0
- package/src/native/CactusUtil.ts +27 -0
- package/src/native/index.ts +5 -0
- package/src/specs/Cactus.nitro.ts +16 -0
- package/src/specs/CactusCrypto.nitro.ts +6 -0
- package/src/specs/CactusDeviceInfo.nitro.ts +15 -0
- package/src/specs/CactusFileSystem.nitro.ts +21 -0
- package/src/specs/CactusUtil.nitro.ts +8 -0
- package/src/telemetry/Telemetry.ts +159 -0
- package/src/types/CactusLM.ts +71 -0
- package/src/types/CactusModel.ts +14 -0
- package/src/utils/error.ts +2 -0
- package/LICENSE.txt +0 -20
- package/android/src/main/CMakeLists.txt +0 -140
- package/android/src/main/java/com/cactus/Cactus.java +0 -1190
- package/android/src/main/java/com/cactus/CactusPackage.java +0 -48
- package/android/src/main/java/com/cactus/LlamaContext.java +0 -748
- package/android/src/main/jni-utils.h +0 -100
- package/android/src/main/jni.cpp +0 -1605
- package/android/src/main/jniLibs/arm64-v8a/libcactus.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_i8mm.so +0 -0
- package/android/src/newarch/java/com/cactus/CactusModule.java +0 -204
- package/android/src/oldarch/java/com/cactus/CactusModule.java +0 -205
- package/cactus-react-native.podspec +0 -42
- package/ios/CMakeLists.txt +0 -131
- package/ios/Cactus.h +0 -6
- package/ios/Cactus.mm +0 -681
- package/ios/CactusContext.h +0 -81
- package/ios/CactusContext.mm +0 -1032
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/chat.h +0 -145
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/common.h +0 -674
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-alloc.h +0 -76
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-backend-impl.h +0 -255
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-backend.h +0 -354
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-common.h +0 -1857
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpp.h +0 -39
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu.h +0 -143
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-impl.h +0 -601
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-metal-impl.h +0 -622
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-metal.h +0 -66
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-opt.h +0 -237
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-quants.h +0 -100
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-threading.h +0 -14
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml.h +0 -2202
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gguf.h +0 -202
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/json-schema-to-grammar.h +0 -21
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/json.hpp +0 -24766
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-adapter.h +0 -76
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-arch.h +0 -437
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-batch.h +0 -89
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-chat.h +0 -58
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-context.h +0 -276
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-cparams.h +0 -39
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-cpp.h +0 -30
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-grammar.h +0 -173
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-graph.h +0 -640
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-hparams.h +0 -190
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-impl.h +0 -61
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-io.h +0 -35
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-memory.h +0 -32
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-mmap.h +0 -68
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-model-loader.h +0 -169
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-model-saver.h +0 -37
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-model.h +0 -425
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-sampling.h +0 -32
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-vocab.h +0 -131
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama.h +0 -1376
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/log.h +0 -103
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/minja/chat-template.hpp +0 -542
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/minja/minja.hpp +0 -2974
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/sampling.h +0 -107
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/unicode-data.h +0 -20
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/unicode.h +0 -66
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/ggml-llama.metallib +0 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +0 -232
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +0 -256
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/chat.h +0 -145
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/common.h +0 -674
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-alloc.h +0 -76
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-backend-impl.h +0 -255
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-backend.h +0 -354
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-common.h +0 -1857
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpp.h +0 -39
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu.h +0 -143
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-impl.h +0 -601
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-metal-impl.h +0 -622
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-metal.h +0 -66
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-opt.h +0 -237
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-quants.h +0 -100
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-threading.h +0 -14
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml.h +0 -2202
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/gguf.h +0 -202
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/json-schema-to-grammar.h +0 -21
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/json.hpp +0 -24766
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-adapter.h +0 -76
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-arch.h +0 -437
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-batch.h +0 -89
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-chat.h +0 -58
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-context.h +0 -276
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-cparams.h +0 -39
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-cpp.h +0 -30
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-grammar.h +0 -173
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-graph.h +0 -640
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-hparams.h +0 -190
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-impl.h +0 -61
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-io.h +0 -35
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-memory.h +0 -32
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-mmap.h +0 -68
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-loader.h +0 -169
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-saver.h +0 -37
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-model.h +0 -425
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-sampling.h +0 -32
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-vocab.h +0 -131
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama.h +0 -1376
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/log.h +0 -103
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/minja/chat-template.hpp +0 -542
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/minja/minja.hpp +0 -2974
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/sampling.h +0 -107
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/unicode-data.h +0 -20
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/unicode.h +0 -66
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/ggml-llama-sim.metallib +0 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus.h +0 -232
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus_ffi.h +0 -256
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/chat.h +0 -145
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/common.h +0 -674
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-alloc.h +0 -76
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-backend-impl.h +0 -255
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-backend.h +0 -354
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-common.h +0 -1857
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpp.h +0 -39
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu.h +0 -143
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-impl.h +0 -601
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-metal-impl.h +0 -622
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-metal.h +0 -66
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-opt.h +0 -237
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-quants.h +0 -100
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-threading.h +0 -14
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml.h +0 -2202
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/gguf.h +0 -202
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/json-schema-to-grammar.h +0 -21
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/json.hpp +0 -24766
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-adapter.h +0 -76
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-arch.h +0 -437
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-batch.h +0 -89
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-chat.h +0 -58
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-context.h +0 -276
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-cparams.h +0 -39
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-cpp.h +0 -30
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-grammar.h +0 -173
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-graph.h +0 -640
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-hparams.h +0 -190
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-impl.h +0 -61
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-io.h +0 -35
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-memory.h +0 -32
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-mmap.h +0 -68
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-model-loader.h +0 -169
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-model-saver.h +0 -37
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-model.h +0 -425
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-sampling.h +0 -32
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-vocab.h +0 -131
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama.h +0 -1376
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/log.h +0 -103
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/minja/chat-template.hpp +0 -542
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/minja/minja.hpp +0 -2974
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/sampling.h +0 -107
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/unicode-data.h +0 -20
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/unicode.h +0 -66
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/ggml-llama.metallib +0 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +0 -232
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +0 -256
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/chat.h +0 -145
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/common.h +0 -674
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-alloc.h +0 -76
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-backend-impl.h +0 -255
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-backend.h +0 -354
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-common.h +0 -1857
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpp.h +0 -39
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu.h +0 -143
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-impl.h +0 -601
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-metal-impl.h +0 -622
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-metal.h +0 -66
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-opt.h +0 -237
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-quants.h +0 -100
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-threading.h +0 -14
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml.h +0 -2202
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/gguf.h +0 -202
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/json-schema-to-grammar.h +0 -21
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/json.hpp +0 -24766
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-adapter.h +0 -76
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-arch.h +0 -437
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-batch.h +0 -89
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-chat.h +0 -58
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-context.h +0 -276
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-cparams.h +0 -39
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-cpp.h +0 -30
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-grammar.h +0 -173
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-graph.h +0 -640
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-hparams.h +0 -190
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-impl.h +0 -61
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-io.h +0 -35
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-memory.h +0 -32
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-mmap.h +0 -68
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-loader.h +0 -169
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-saver.h +0 -37
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-model.h +0 -425
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-sampling.h +0 -32
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-vocab.h +0 -131
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama.h +0 -1376
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/log.h +0 -103
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/minja/chat-template.hpp +0 -542
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/minja/minja.hpp +0 -2974
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/sampling.h +0 -107
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/unicode-data.h +0 -20
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/unicode.h +0 -66
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/ggml-llama-sim.metallib +0 -0
- package/lib/commonjs/NativeCactus.js +0 -9
- package/lib/commonjs/NativeCactus.js.map +0 -1
- package/lib/commonjs/agent.js +0 -73
- package/lib/commonjs/agent.js.map +0 -1
- package/lib/commonjs/chat.js +0 -73
- package/lib/commonjs/chat.js.map +0 -1
- package/lib/commonjs/index.js +0 -523
- package/lib/commonjs/index.js.map +0 -1
- package/lib/commonjs/lm.js +0 -272
- package/lib/commonjs/lm.js.map +0 -1
- package/lib/commonjs/package.json +0 -1
- package/lib/commonjs/projectId.js +0 -9
- package/lib/commonjs/projectId.js.map +0 -1
- package/lib/commonjs/remote.js +0 -100
- package/lib/commonjs/remote.js.map +0 -1
- package/lib/commonjs/telemetry.js +0 -102
- package/lib/commonjs/telemetry.js.map +0 -1
- package/lib/commonjs/tools.js +0 -72
- package/lib/commonjs/tools.js.map +0 -1
- package/lib/commonjs/tts.js +0 -141
- package/lib/commonjs/tts.js.map +0 -1
- package/lib/commonjs/vlm.js +0 -221
- package/lib/commonjs/vlm.js.map +0 -1
- package/lib/module/NativeCactus.js +0 -5
- package/lib/module/NativeCactus.js.map +0 -1
- package/lib/module/agent.js +0 -68
- package/lib/module/agent.js.map +0 -1
- package/lib/module/chat.js +0 -67
- package/lib/module/chat.js.map +0 -1
- package/lib/module/lm.js +0 -267
- package/lib/module/lm.js.map +0 -1
- package/lib/module/projectId.js +0 -5
- package/lib/module/projectId.js.map +0 -1
- package/lib/module/remote.js +0 -91
- package/lib/module/remote.js.map +0 -1
- package/lib/module/telemetry.js +0 -97
- package/lib/module/telemetry.js.map +0 -1
- package/lib/module/tools.js +0 -66
- package/lib/module/tools.js.map +0 -1
- package/lib/module/tts.js +0 -135
- package/lib/module/tts.js.map +0 -1
- package/lib/module/vlm.js +0 -216
- package/lib/module/vlm.js.map +0 -1
- package/lib/typescript/NativeCactus.d.ts +0 -252
- package/lib/typescript/NativeCactus.d.ts.map +0 -1
- package/lib/typescript/agent.d.ts +0 -31
- package/lib/typescript/agent.d.ts.map +0 -1
- package/lib/typescript/chat.d.ts +0 -23
- package/lib/typescript/chat.d.ts.map +0 -1
- package/lib/typescript/index.d.ts +0 -114
- package/lib/typescript/index.d.ts.map +0 -1
- package/lib/typescript/lm.d.ts +0 -36
- package/lib/typescript/lm.d.ts.map +0 -1
- package/lib/typescript/projectId.d.ts +0 -2
- package/lib/typescript/projectId.d.ts.map +0 -1
- package/lib/typescript/remote.d.ts +0 -8
- package/lib/typescript/remote.d.ts.map +0 -1
- package/lib/typescript/telemetry.d.ts +0 -25
- package/lib/typescript/telemetry.d.ts.map +0 -1
- package/lib/typescript/tools.d.ts +0 -36
- package/lib/typescript/tools.d.ts.map +0 -1
- package/lib/typescript/tts.d.ts +0 -54
- package/lib/typescript/tts.d.ts.map +0 -1
- package/lib/typescript/vlm.d.ts +0 -33
- package/lib/typescript/vlm.d.ts.map +0 -1
- package/scripts/postInstall.js +0 -33
- package/src/NativeCactus.ts +0 -317
- package/src/agent.ts +0 -112
- package/src/chat.ts +0 -91
- package/src/index.ts +0 -663
- package/src/lm.ts +0 -324
- package/src/projectId.ts +0 -1
- package/src/remote.ts +0 -113
- package/src/telemetry.ts +0 -137
- package/src/tools.ts +0 -94
- package/src/tts.ts +0 -236
- package/src/vlm.ts +0 -276
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <string>
|
|
5
|
+
#include <unordered_map>
|
|
6
|
+
#include <memory>
|
|
7
|
+
#include <cstdint>
|
|
8
|
+
|
|
9
|
+
#include "../graph/graph.h"
|
|
10
|
+
|
|
11
|
+
class CactusGraph;
|
|
12
|
+
|
|
13
|
+
namespace cactus {
|
|
14
|
+
namespace engine {
|
|
15
|
+
|
|
16
|
+
struct Config {
|
|
17
|
+
uint32_t vocab_size = 151936;
|
|
18
|
+
uint32_t bos_token_id = 151643;
|
|
19
|
+
uint32_t eos_token_id = 151645;
|
|
20
|
+
uint32_t num_layers = 28;
|
|
21
|
+
uint32_t hidden_dim = 1024;
|
|
22
|
+
uint32_t ffn_intermediate_dim = 3072;
|
|
23
|
+
uint32_t attention_heads = 16;
|
|
24
|
+
uint32_t attention_kv_heads = 8;
|
|
25
|
+
uint32_t attention_head_dim = 128;
|
|
26
|
+
float layer_norm_eps = 1e-6f;
|
|
27
|
+
float rope_theta = 1000000.0f;
|
|
28
|
+
uint32_t num_experts = 0;
|
|
29
|
+
uint32_t num_shared_experts = 0;
|
|
30
|
+
uint32_t num_top_experts = 0;
|
|
31
|
+
uint32_t moe_every_n_layers = 0;
|
|
32
|
+
bool tie_word_embeddings = true;
|
|
33
|
+
|
|
34
|
+
enum class ModelType {QWEN = 0, GEMMA = 1, SMOL = 2, NOMIC = 3, LFM2 = 4};
|
|
35
|
+
ModelType model_type = ModelType::QWEN;
|
|
36
|
+
|
|
37
|
+
enum class Activation {GELU = 0, SILU = 1};
|
|
38
|
+
Activation activation = Activation::SILU;
|
|
39
|
+
|
|
40
|
+
enum class Backend {CPU = 0, NPU = 1};
|
|
41
|
+
Backend default_backend = Backend::CPU;
|
|
42
|
+
|
|
43
|
+
enum class Precision {INT8 = 0, FP16 = 1, FP32 = 2};
|
|
44
|
+
Precision precision = Precision::FP32;
|
|
45
|
+
|
|
46
|
+
float default_temperature = 0.6f;
|
|
47
|
+
float default_top_p = 0.95f;
|
|
48
|
+
size_t default_top_k = 20;
|
|
49
|
+
|
|
50
|
+
std::vector<std::string> layer_types;
|
|
51
|
+
size_t conv_L_cache = 0;
|
|
52
|
+
|
|
53
|
+
bool from_json(const std::string& json_path);
|
|
54
|
+
std::string to_json() const;
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
struct MergeRule {
|
|
60
|
+
std::string first;
|
|
61
|
+
std::string second;
|
|
62
|
+
std::string merged;
|
|
63
|
+
uint32_t priority;
|
|
64
|
+
|
|
65
|
+
MergeRule(const std::string& f, const std::string& s, const std::string& m, uint32_t p)
|
|
66
|
+
: first(f), second(s), merged(m), priority(p) {}
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
struct ChatMessage {
|
|
71
|
+
std::string role;
|
|
72
|
+
std::string content;
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
class Tokenizer {
|
|
76
|
+
public:
|
|
77
|
+
virtual ~Tokenizer() = default;
|
|
78
|
+
|
|
79
|
+
virtual std::vector<uint32_t> encode(const std::string& text) const = 0;
|
|
80
|
+
virtual std::string decode(const std::vector<uint32_t>& tokens) const = 0;
|
|
81
|
+
|
|
82
|
+
virtual std::vector<uint32_t> apply_chat_template(const std::vector<ChatMessage>& messages, bool add_generation_prompt = true) const;
|
|
83
|
+
virtual std::string format_chat_prompt(const std::vector<ChatMessage>& messages, bool add_generation_prompt = true, const std::string& tools_json = "") const;
|
|
84
|
+
|
|
85
|
+
virtual uint32_t get_vocab_size() const = 0;
|
|
86
|
+
virtual uint32_t get_unk_token() const = 0;
|
|
87
|
+
virtual uint32_t get_bos_token() const = 0;
|
|
88
|
+
virtual uint32_t get_eos_token() const = 0;
|
|
89
|
+
virtual bool has_chat_template() const { return has_chat_template_; }
|
|
90
|
+
|
|
91
|
+
virtual bool load_vocabulary_with_config(const std::string& vocab_file, const std::string& merges_file, const std::string& config_file) = 0;
|
|
92
|
+
|
|
93
|
+
protected:
|
|
94
|
+
|
|
95
|
+
enum class ModelType { UNKNOWN, QWEN, GEMMA, LFM2 , SMOL, BERT };
|
|
96
|
+
ModelType model_type_ = ModelType::UNKNOWN;
|
|
97
|
+
bool has_chat_template_ = false;
|
|
98
|
+
std::string chat_template_;
|
|
99
|
+
|
|
100
|
+
void detect_model_type(const std::string& config_path);
|
|
101
|
+
std::string format_qwen_style(const std::vector<ChatMessage>& messages, bool add_generation_prompt, const std::string& tools_json) const;
|
|
102
|
+
std::string format_gemma_style(const std::vector<ChatMessage>& messages, bool add_generation_prompt, const std::string& tools_json) const;
|
|
103
|
+
std::string format_lfm2_style(const std::vector<ChatMessage>& messages, bool add_generation_prompt, const std::string& tools_json) const;
|
|
104
|
+
std::string format_smol_style(const std::vector<ChatMessage>& messages, bool add_generation_prompt, const std::string& tools_json) const;
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
class BPETokenizer : public Tokenizer {
|
|
108
|
+
public:
|
|
109
|
+
BPETokenizer();
|
|
110
|
+
~BPETokenizer();
|
|
111
|
+
|
|
112
|
+
bool load_vocabulary_mmap(const std::string& vocab_file, const std::string& merges_file);
|
|
113
|
+
bool load_vocabulary_with_config(const std::string& vocab_file, const std::string& merges_file, const std::string& config_file) override;
|
|
114
|
+
|
|
115
|
+
std::vector<uint32_t> encode(const std::string& text) const override;
|
|
116
|
+
std::string decode(const std::vector<uint32_t>& tokens) const override;
|
|
117
|
+
|
|
118
|
+
uint32_t get_vocab_size() const override { return vocab_size_; }
|
|
119
|
+
uint32_t get_unk_token() const override { return unk_token_id_; }
|
|
120
|
+
uint32_t get_bos_token() const override { return bos_token_id_; }
|
|
121
|
+
uint32_t get_eos_token() const override { return eos_token_id_; }
|
|
122
|
+
|
|
123
|
+
private:
|
|
124
|
+
std::unordered_map<std::string, uint32_t> token_to_id_;
|
|
125
|
+
std::vector<std::string> id_to_token_;
|
|
126
|
+
std::vector<MergeRule> merge_rules_;
|
|
127
|
+
std::unordered_map<std::string, uint32_t> merge_map_;
|
|
128
|
+
|
|
129
|
+
uint32_t vocab_size_;
|
|
130
|
+
uint32_t unk_token_id_;
|
|
131
|
+
uint32_t bos_token_id_;
|
|
132
|
+
uint32_t eos_token_id_;
|
|
133
|
+
|
|
134
|
+
void* vocab_mmap_ptr_;
|
|
135
|
+
size_t vocab_mmap_size_;
|
|
136
|
+
|
|
137
|
+
void* merges_mmap_ptr_;
|
|
138
|
+
size_t merges_mmap_size_;
|
|
139
|
+
|
|
140
|
+
std::vector<std::string> apply_bpe(const std::vector<std::string>& tokens) const;
|
|
141
|
+
std::pair<int, uint32_t> find_best_merge_fast(const std::vector<std::string>& tokens) const;
|
|
142
|
+
|
|
143
|
+
std::string bytes_to_unicode(const std::string& text) const;
|
|
144
|
+
std::string unicode_to_bytes(const std::string& text) const;
|
|
145
|
+
std::vector<std::string> byte_level_split(const std::string& text) const;
|
|
146
|
+
|
|
147
|
+
void cleanup_mmap();
|
|
148
|
+
|
|
149
|
+
private:
|
|
150
|
+
mutable std::unordered_map<uint8_t, std::string> byte_to_unicode_;
|
|
151
|
+
mutable std::unordered_map<std::string, uint8_t> unicode_to_byte_;
|
|
152
|
+
void init_byte_mappings() const;
|
|
153
|
+
|
|
154
|
+
std::unordered_map<std::string, uint32_t> special_tokens_;
|
|
155
|
+
std::vector<std::string> split_with_special_tokens(const std::string& text) const;
|
|
156
|
+
void load_special_tokens(const std::string& config_file);
|
|
157
|
+
|
|
158
|
+
void load_chat_template(const std::string& template_file);
|
|
159
|
+
|
|
160
|
+
std::unordered_map<std::string, uint32_t> tool_tokens_;
|
|
161
|
+
bool has_tool_support_;
|
|
162
|
+
void load_tokenizer_config(const std::string& config_file);
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
class SPTokenizer : public Tokenizer {
|
|
166
|
+
public:
|
|
167
|
+
SPTokenizer();
|
|
168
|
+
~SPTokenizer();
|
|
169
|
+
|
|
170
|
+
bool load_vocabulary_with_config(const std::string& vocab_file, const std::string& merges_file, const std::string& config_file) override;
|
|
171
|
+
|
|
172
|
+
std::vector<uint32_t> encode(const std::string& text) const override;
|
|
173
|
+
std::string decode(const std::vector<uint32_t>& tokens) const override;
|
|
174
|
+
|
|
175
|
+
uint32_t get_vocab_size() const override { return vocab_size_; }
|
|
176
|
+
uint32_t get_unk_token() const override { return unk_token_id_; }
|
|
177
|
+
uint32_t get_bos_token() const override { return bos_token_id_; }
|
|
178
|
+
uint32_t get_eos_token() const override { return eos_token_id_; }
|
|
179
|
+
|
|
180
|
+
private:
|
|
181
|
+
struct TrieNode {
|
|
182
|
+
std::unordered_map<char32_t, std::unique_ptr<TrieNode>> children;
|
|
183
|
+
int32_t token_id = -1;
|
|
184
|
+
float score = 0.0f;
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
std::unique_ptr<TrieNode> trie_root_;
|
|
188
|
+
std::unordered_map<std::string, uint32_t> token_to_id_;
|
|
189
|
+
std::vector<std::string> id_to_token_;
|
|
190
|
+
std::vector<float> token_scores_;
|
|
191
|
+
|
|
192
|
+
uint32_t vocab_size_;
|
|
193
|
+
uint32_t unk_token_id_;
|
|
194
|
+
uint32_t bos_token_id_;
|
|
195
|
+
uint32_t eos_token_id_;
|
|
196
|
+
uint32_t pad_token_id_;
|
|
197
|
+
|
|
198
|
+
void* vocab_mmap_ptr_;
|
|
199
|
+
size_t vocab_mmap_size_;
|
|
200
|
+
|
|
201
|
+
void build_trie();
|
|
202
|
+
std::vector<std::pair<std::string, uint32_t>> tokenize_with_trie(const std::string& text) const;
|
|
203
|
+
std::string preprocess_text(const std::string& text) const;
|
|
204
|
+
std::string postprocess_text(const std::string& text) const;
|
|
205
|
+
std::vector<std::string> split_by_unicode_spaces(const std::string& text) const;
|
|
206
|
+
|
|
207
|
+
void cleanup_mmap();
|
|
208
|
+
|
|
209
|
+
std::unordered_map<std::string, uint32_t> special_tokens_;
|
|
210
|
+
std::vector<std::string> split_with_special_tokens(const std::string& text) const;
|
|
211
|
+
void load_special_tokens(const std::string& config_file);
|
|
212
|
+
|
|
213
|
+
void load_chat_template(const std::string& template_file);
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
class ConvCache {
|
|
217
|
+
public:
|
|
218
|
+
struct CircularView {
|
|
219
|
+
const void* ptr1;
|
|
220
|
+
size_t len1;
|
|
221
|
+
const void* ptr2;
|
|
222
|
+
size_t len2;
|
|
223
|
+
size_t total_len;
|
|
224
|
+
};
|
|
225
|
+
|
|
226
|
+
void init(size_t layers, size_t hidden_dim, size_t window_len, Precision model_precision);
|
|
227
|
+
CircularView get_window(size_t layer) const;
|
|
228
|
+
void update(CactusGraph* gb, size_t layer, const size_t latest_token);
|
|
229
|
+
void reset();
|
|
230
|
+
|
|
231
|
+
bool is_empty() const { return num_layers == 0; }
|
|
232
|
+
|
|
233
|
+
size_t num_layers = 0;
|
|
234
|
+
size_t hidden_size = 0;
|
|
235
|
+
size_t window_size = 0;
|
|
236
|
+
Precision precision = Precision::FP32;
|
|
237
|
+
size_t element_size = 4;
|
|
238
|
+
|
|
239
|
+
private:
|
|
240
|
+
struct LayerState {
|
|
241
|
+
std::vector<uint8_t> data;
|
|
242
|
+
size_t head = 0;
|
|
243
|
+
size_t count = 0;
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
std::vector<LayerState> layer_states;
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
struct KVCache {
|
|
250
|
+
static constexpr size_t DEFAULT_WINDOW_SIZE = 512;
|
|
251
|
+
static constexpr size_t DEFAULT_SINK_SIZE = 4;
|
|
252
|
+
|
|
253
|
+
struct LayerCache {
|
|
254
|
+
std::vector<uint8_t> keys;
|
|
255
|
+
std::vector<uint8_t> values;
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
std::vector<LayerCache> layer_caches;
|
|
259
|
+
|
|
260
|
+
size_t window_size = DEFAULT_WINDOW_SIZE;
|
|
261
|
+
size_t sink_size = DEFAULT_SINK_SIZE;
|
|
262
|
+
size_t current_seq_len = 0;
|
|
263
|
+
size_t total_seq_len = 0;
|
|
264
|
+
size_t max_seq_len = 2048;
|
|
265
|
+
size_t num_kv_heads = 0;
|
|
266
|
+
size_t head_dim = 0;
|
|
267
|
+
size_t num_layers = 0;
|
|
268
|
+
Precision precision;
|
|
269
|
+
size_t element_size = 4;
|
|
270
|
+
|
|
271
|
+
void set_window_size(size_t window, size_t sink = DEFAULT_SINK_SIZE);
|
|
272
|
+
size_t get_effective_seq_len() const { return current_seq_len; }
|
|
273
|
+
size_t get_total_seq_len() const { return total_seq_len; }
|
|
274
|
+
|
|
275
|
+
void init(size_t num_layers, size_t max_seq, size_t num_kv_heads, size_t head_dim, Precision model_precision);
|
|
276
|
+
void reset();
|
|
277
|
+
void update_from_graph(CactusGraph* gb, const std::vector<size_t>& k_nodes,
|
|
278
|
+
const std::vector<size_t>& v_nodes, size_t seq_len,
|
|
279
|
+
size_t num_layers, size_t kv_heads, size_t head_dim);
|
|
280
|
+
bool is_empty() const { return current_seq_len == 0; }
|
|
281
|
+
void* get_key_ptr(size_t layer);
|
|
282
|
+
void* get_value_ptr(size_t layer);
|
|
283
|
+
|
|
284
|
+
struct CircularView {
|
|
285
|
+
const void* ptr1;
|
|
286
|
+
const void* ptr2;
|
|
287
|
+
size_t len1;
|
|
288
|
+
size_t len2;
|
|
289
|
+
size_t total_len;
|
|
290
|
+
};
|
|
291
|
+
|
|
292
|
+
CircularView get_key_view(size_t layer);
|
|
293
|
+
CircularView get_value_view(size_t layer);
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
class Model {
|
|
297
|
+
public:
|
|
298
|
+
Model();
|
|
299
|
+
explicit Model(const Config& config);
|
|
300
|
+
virtual ~Model();
|
|
301
|
+
|
|
302
|
+
const Config& get_config() const { return config_; }
|
|
303
|
+
Tokenizer* get_tokenizer() const { return tokenizer_.get(); }
|
|
304
|
+
|
|
305
|
+
bool init(const std::string& model_folder, size_t context_size, const std::string& system_prompt = "");
|
|
306
|
+
uint32_t generate(const std::vector<uint32_t>& tokens, float temperature = -1.0f, float top_p = -1.0f,
|
|
307
|
+
size_t top_k = 0, const std::string& profile_file = "");
|
|
308
|
+
|
|
309
|
+
std::vector<float> get_embeddings(const std::vector<uint32_t>& tokens, bool pooled = true, const std::string& profile_file = "");
|
|
310
|
+
|
|
311
|
+
virtual void reset_cache() { kv_cache_.reset(); }
|
|
312
|
+
void set_cache_window(size_t window_size, size_t sink_size = 4) { kv_cache_.set_window_size(window_size, sink_size); }
|
|
313
|
+
|
|
314
|
+
protected:
|
|
315
|
+
virtual size_t forward(const std::vector<uint32_t>& tokens, bool use_cache = false) = 0;
|
|
316
|
+
virtual void load_weights_to_graph(CactusGraph* gb) = 0;
|
|
317
|
+
virtual size_t build_attention(CactusGraph* gb, size_t normalized_input, uint32_t layer_idx,
|
|
318
|
+
ComputeBackend backend, bool use_cache = false, size_t position_offset = 0) = 0;
|
|
319
|
+
virtual size_t build_mlp(CactusGraph* gb, size_t normalized_h, uint32_t layer_idx,
|
|
320
|
+
ComputeBackend backend) const = 0;
|
|
321
|
+
virtual size_t build_transformer_block(CactusGraph* gb, size_t hidden, uint32_t layer_idx,
|
|
322
|
+
ComputeBackend backend, bool use_cache = false, size_t position_offset = 0) = 0;
|
|
323
|
+
void update_kv_cache(CactusGraph* gb, size_t seq_len);
|
|
324
|
+
virtual void post_init() {}
|
|
325
|
+
virtual void post_execute_updates(CactusGraph*, size_t) {}
|
|
326
|
+
Config config_;
|
|
327
|
+
std::unique_ptr<Tokenizer> tokenizer_;
|
|
328
|
+
|
|
329
|
+
void* graph_handle_;
|
|
330
|
+
bool initialized_;
|
|
331
|
+
float attention_scale_;
|
|
332
|
+
|
|
333
|
+
protected:
|
|
334
|
+
KVCache kv_cache_;
|
|
335
|
+
std::vector<size_t> cache_k_output_nodes_;
|
|
336
|
+
std::vector<size_t> cache_v_output_nodes_;
|
|
337
|
+
|
|
338
|
+
std::string embedding_file_path_;
|
|
339
|
+
size_t embedding_node_id_;
|
|
340
|
+
std::string model_folder_path_;
|
|
341
|
+
size_t output_weight_node_id_;
|
|
342
|
+
};
|
|
343
|
+
|
|
344
|
+
std::unique_ptr<Model> create_model(const std::string& model_folder);
|
|
345
|
+
|
|
346
|
+
}
|
|
347
|
+
}
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
#ifndef CACTUS_FFI_UTILS_H
|
|
2
|
+
#define CACTUS_FFI_UTILS_H
|
|
3
|
+
|
|
4
|
+
#include "../engine/engine.h"
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <vector>
|
|
7
|
+
#include <unordered_map>
|
|
8
|
+
#include <stdexcept>
|
|
9
|
+
#include <sstream>
|
|
10
|
+
#include <iomanip>
|
|
11
|
+
|
|
12
|
+
namespace cactus {
|
|
13
|
+
namespace ffi {
|
|
14
|
+
|
|
15
|
+
struct ToolFunction {
|
|
16
|
+
std::string name;
|
|
17
|
+
std::string description;
|
|
18
|
+
std::unordered_map<std::string, std::string> parameters;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
|
|
22
|
+
std::string sanitized_msg = error_message;
|
|
23
|
+
for (auto& c : sanitized_msg) {
|
|
24
|
+
if (c == '"') c = '\'';
|
|
25
|
+
if (c == '\n') c = ' ';
|
|
26
|
+
}
|
|
27
|
+
std::string error_json = "{\"success\":false,\"error\":\"" + sanitized_msg + "\"}";
|
|
28
|
+
if (response_buffer && error_json.length() < buffer_size) {
|
|
29
|
+
std::strcpy(response_buffer, error_json.c_str());
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
inline std::vector<cactus::engine::ChatMessage> parse_messages_json(const std::string& json) {
|
|
34
|
+
std::vector<cactus::engine::ChatMessage> messages;
|
|
35
|
+
|
|
36
|
+
size_t pos = json.find('[');
|
|
37
|
+
if (pos == std::string::npos) {
|
|
38
|
+
throw std::runtime_error("Invalid JSON: expected array");
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
pos = json.find('{', pos);
|
|
42
|
+
while (pos != std::string::npos) {
|
|
43
|
+
cactus::engine::ChatMessage msg;
|
|
44
|
+
|
|
45
|
+
size_t role_pos = json.find("\"role\"", pos);
|
|
46
|
+
if (role_pos == std::string::npos) break;
|
|
47
|
+
|
|
48
|
+
size_t role_start = json.find('"', role_pos + 6) + 1;
|
|
49
|
+
size_t role_end = json.find('"', role_start);
|
|
50
|
+
msg.role = json.substr(role_start, role_end - role_start);
|
|
51
|
+
|
|
52
|
+
size_t content_pos = json.find("\"content\"", role_end);
|
|
53
|
+
if (content_pos == std::string::npos) break;
|
|
54
|
+
|
|
55
|
+
size_t content_start = json.find('"', content_pos + 9) + 1;
|
|
56
|
+
size_t content_end = content_start;
|
|
57
|
+
|
|
58
|
+
while (content_end < json.length()) {
|
|
59
|
+
content_end = json.find('"', content_end);
|
|
60
|
+
if (content_end == std::string::npos) break;
|
|
61
|
+
if (json[content_end - 1] != '\\') break;
|
|
62
|
+
content_end++;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
msg.content = json.substr(content_start, content_end - content_start);
|
|
66
|
+
|
|
67
|
+
size_t escape_pos = 0;
|
|
68
|
+
while ((escape_pos = msg.content.find("\\n", escape_pos)) != std::string::npos) {
|
|
69
|
+
msg.content.replace(escape_pos, 2, "\n");
|
|
70
|
+
escape_pos += 1;
|
|
71
|
+
}
|
|
72
|
+
escape_pos = 0;
|
|
73
|
+
while ((escape_pos = msg.content.find("\\\"", escape_pos)) != std::string::npos) {
|
|
74
|
+
msg.content.replace(escape_pos, 2, "\"");
|
|
75
|
+
escape_pos += 1;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
messages.push_back(msg);
|
|
79
|
+
|
|
80
|
+
pos = json.find('{', content_end);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return messages;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
inline std::vector<ToolFunction> parse_tools_json(const std::string& json) {
|
|
87
|
+
std::vector<ToolFunction> tools;
|
|
88
|
+
|
|
89
|
+
if (json.empty()) return tools;
|
|
90
|
+
|
|
91
|
+
size_t pos = json.find('[');
|
|
92
|
+
if (pos == std::string::npos) return tools;
|
|
93
|
+
|
|
94
|
+
pos = json.find("\"function\"", pos);
|
|
95
|
+
while (pos != std::string::npos) {
|
|
96
|
+
ToolFunction tool;
|
|
97
|
+
|
|
98
|
+
size_t name_pos = json.find("\"name\"", pos);
|
|
99
|
+
if (name_pos != std::string::npos) {
|
|
100
|
+
size_t name_start = json.find('"', name_pos + 6) + 1;
|
|
101
|
+
size_t name_end = json.find('"', name_start);
|
|
102
|
+
tool.name = json.substr(name_start, name_end - name_start);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
size_t desc_pos = json.find("\"description\"", pos);
|
|
106
|
+
if (desc_pos != std::string::npos) {
|
|
107
|
+
size_t desc_start = json.find('"', desc_pos + 13) + 1;
|
|
108
|
+
size_t desc_end = json.find('"', desc_start);
|
|
109
|
+
tool.description = json.substr(desc_start, desc_end - desc_start);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
size_t params_pos = json.find("\"parameters\"", pos);
|
|
113
|
+
if (params_pos != std::string::npos) {
|
|
114
|
+
size_t params_start = json.find('{', params_pos);
|
|
115
|
+
if (params_start != std::string::npos) {
|
|
116
|
+
int brace_count = 1;
|
|
117
|
+
size_t params_end = params_start + 1;
|
|
118
|
+
while (params_end < json.length() && brace_count > 0) {
|
|
119
|
+
if (json[params_end] == '{') brace_count++;
|
|
120
|
+
else if (json[params_end] == '}') brace_count--;
|
|
121
|
+
params_end++;
|
|
122
|
+
}
|
|
123
|
+
tool.parameters["schema"] = json.substr(params_start, params_end - params_start);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
tools.push_back(tool);
|
|
128
|
+
|
|
129
|
+
pos = json.find("\"function\"", name_pos);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return tools;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
inline void parse_options_json(const std::string& json,
|
|
136
|
+
float& temperature, float& top_p,
|
|
137
|
+
size_t& top_k, size_t& max_tokens,
|
|
138
|
+
std::vector<std::string>& stop_sequences) {
|
|
139
|
+
temperature = -1.0f; // Use model default
|
|
140
|
+
top_p = -1.0f; // Use model default
|
|
141
|
+
top_k = 0; // Use model default
|
|
142
|
+
max_tokens = 100; // FFI-level default
|
|
143
|
+
stop_sequences.clear();
|
|
144
|
+
|
|
145
|
+
if (json.empty()) return;
|
|
146
|
+
|
|
147
|
+
size_t pos = json.find("\"temperature\"");
|
|
148
|
+
if (pos != std::string::npos) {
|
|
149
|
+
pos = json.find(':', pos) + 1;
|
|
150
|
+
temperature = std::stof(json.substr(pos));
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
pos = json.find("\"top_p\"");
|
|
154
|
+
if (pos != std::string::npos) {
|
|
155
|
+
pos = json.find(':', pos) + 1;
|
|
156
|
+
top_p = std::stof(json.substr(pos));
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
pos = json.find("\"top_k\"");
|
|
160
|
+
if (pos != std::string::npos) {
|
|
161
|
+
pos = json.find(':', pos) + 1;
|
|
162
|
+
top_k = std::stoul(json.substr(pos));
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
pos = json.find("\"max_tokens\"");
|
|
166
|
+
if (pos != std::string::npos) {
|
|
167
|
+
pos = json.find(':', pos) + 1;
|
|
168
|
+
max_tokens = std::stoul(json.substr(pos));
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
pos = json.find("\"stop_sequences\"");
|
|
172
|
+
if (pos != std::string::npos) {
|
|
173
|
+
pos = json.find('[', pos);
|
|
174
|
+
if (pos != std::string::npos) {
|
|
175
|
+
size_t end_pos = json.find(']', pos);
|
|
176
|
+
size_t seq_pos = json.find('"', pos);
|
|
177
|
+
|
|
178
|
+
while (seq_pos != std::string::npos && seq_pos < end_pos) {
|
|
179
|
+
size_t seq_start = seq_pos + 1;
|
|
180
|
+
size_t seq_end = json.find('"', seq_start);
|
|
181
|
+
if (seq_end != std::string::npos) {
|
|
182
|
+
stop_sequences.push_back(json.substr(seq_start, seq_end - seq_start));
|
|
183
|
+
}
|
|
184
|
+
seq_pos = json.find('"', seq_end + 1);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
inline std::string format_tools_for_prompt(const std::vector<ToolFunction>& tools) {
|
|
191
|
+
if (tools.empty()) return "";
|
|
192
|
+
std::string formatted_tools_json;
|
|
193
|
+
for (size_t i = 0; i < tools.size(); i++) {
|
|
194
|
+
if (i > 0) formatted_tools_json += ",\n";
|
|
195
|
+
formatted_tools_json += " {\n";
|
|
196
|
+
formatted_tools_json += " \"type\": \"function\",\n";
|
|
197
|
+
formatted_tools_json += " \"function\": {\n";
|
|
198
|
+
formatted_tools_json += " \"name\": \"" + tools[i].name + "\",\n";
|
|
199
|
+
formatted_tools_json += " \"description\": \"" + tools[i].description + "\"";
|
|
200
|
+
if (tools[i].parameters.find("schema") != tools[i].parameters.end()) {
|
|
201
|
+
formatted_tools_json += ",\n \"parameters\": " + tools[i].parameters.at("schema");
|
|
202
|
+
}
|
|
203
|
+
formatted_tools_json += "\n }\n }";
|
|
204
|
+
}
|
|
205
|
+
return formatted_tools_json;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
inline void parse_function_calls_from_response(const std::string& response_text,
|
|
209
|
+
std::string& regular_response,
|
|
210
|
+
std::vector<std::string>& function_calls) {
|
|
211
|
+
regular_response = response_text;
|
|
212
|
+
function_calls.clear();
|
|
213
|
+
|
|
214
|
+
const char* FUNCTION_CALL_MARKER = "\"function_call\"";
|
|
215
|
+
size_t search_pos = 0;
|
|
216
|
+
const size_t text_len = response_text.length();
|
|
217
|
+
|
|
218
|
+
while (search_pos < text_len) {
|
|
219
|
+
size_t marker_pos = response_text.find(FUNCTION_CALL_MARKER, search_pos);
|
|
220
|
+
if (marker_pos == std::string::npos) break;
|
|
221
|
+
|
|
222
|
+
size_t json_start = response_text.find('{', marker_pos);
|
|
223
|
+
if (json_start == std::string::npos) break;
|
|
224
|
+
|
|
225
|
+
int brace_count = 1;
|
|
226
|
+
size_t json_end = json_start + 1;
|
|
227
|
+
while (json_end < text_len && brace_count > 0) {
|
|
228
|
+
char c = response_text[json_end];
|
|
229
|
+
brace_count += (c == '{') - (c == '}');
|
|
230
|
+
json_end++;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (brace_count == 0) {
|
|
234
|
+
function_calls.push_back(response_text.substr(json_start, json_end - json_start));
|
|
235
|
+
regular_response = response_text.substr(0, marker_pos);
|
|
236
|
+
size_t last_bracket = regular_response.rfind('{');
|
|
237
|
+
if(last_bracket != std::string::npos) {
|
|
238
|
+
regular_response = regular_response.substr(0, last_bracket);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
search_pos = json_end;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
inline std::string construct_response_json(const std::string& regular_response,
|
|
246
|
+
const std::vector<std::string>& function_calls,
|
|
247
|
+
double time_to_first_token,
|
|
248
|
+
double total_time_ms,
|
|
249
|
+
double tokens_per_second,
|
|
250
|
+
size_t prompt_tokens,
|
|
251
|
+
size_t completion_tokens) {
|
|
252
|
+
std::ostringstream json_response;
|
|
253
|
+
json_response << "{";
|
|
254
|
+
json_response << "\"success\":true,";
|
|
255
|
+
json_response << "\"response\":\"";
|
|
256
|
+
for (char c : regular_response) {
|
|
257
|
+
if (c == '"') json_response << "\\\"";
|
|
258
|
+
else if (c == '\n') json_response << "\\n";
|
|
259
|
+
else if (c == '\r') json_response << "\\r";
|
|
260
|
+
else if (c == '\t') json_response << "\\t";
|
|
261
|
+
else if (c == '\\') json_response << "\\\\";
|
|
262
|
+
else json_response << c;
|
|
263
|
+
}
|
|
264
|
+
json_response << "\",";
|
|
265
|
+
if (!function_calls.empty()) {
|
|
266
|
+
json_response << "\"function_calls\":[";
|
|
267
|
+
for (size_t i = 0; i < function_calls.size(); ++i) {
|
|
268
|
+
if (i > 0) json_response << ",";
|
|
269
|
+
json_response << function_calls[i];
|
|
270
|
+
}
|
|
271
|
+
json_response << "],";
|
|
272
|
+
}
|
|
273
|
+
json_response << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
|
|
274
|
+
json_response << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << total_time_ms << ",";
|
|
275
|
+
json_response << "\"tokens_per_second\":" << std::fixed << std::setprecision(2) << tokens_per_second << ",";
|
|
276
|
+
json_response << "\"prefill_tokens\":" << prompt_tokens << ",";
|
|
277
|
+
json_response << "\"decode_tokens\":" << completion_tokens << ",";
|
|
278
|
+
json_response << "\"total_tokens\":" << (prompt_tokens + completion_tokens);
|
|
279
|
+
json_response << "}";
|
|
280
|
+
return json_response.str();
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
} // namespace ffi
|
|
284
|
+
} // namespace cactus
|
|
285
|
+
|
|
286
|
+
#endif // CACTUS_FFI_UTILS_H
|