cactus-react-native 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/README.md +609 -56
  2. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusCrypto.kt +23 -15
  3. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusDeviceInfo.kt +12 -9
  4. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusFileSystem.kt +42 -41
  5. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusImage.kt +81 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
  7. package/cpp/HybridCactus.cpp +161 -44
  8. package/cpp/HybridCactus.hpp +34 -14
  9. package/cpp/HybridCactusUtil.cpp +13 -11
  10. package/cpp/HybridCactusUtil.hpp +9 -9
  11. package/cpp/cactus_ffi.h +28 -1
  12. package/ios/HybridCactusImage.swift +53 -0
  13. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +28 -1
  14. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +237 -7
  15. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ffi_utils.h +158 -43
  16. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +23 -2
  17. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +52 -0
  18. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  19. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +28 -1
  20. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +237 -7
  21. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/ffi_utils.h +158 -43
  22. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +23 -2
  23. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +52 -0
  24. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
  25. package/lib/module/api/Database.js +23 -0
  26. package/lib/module/api/Database.js.map +1 -1
  27. package/lib/module/api/RemoteLM.js +201 -0
  28. package/lib/module/api/RemoteLM.js.map +1 -0
  29. package/lib/module/classes/CactusLM.js +56 -28
  30. package/lib/module/classes/CactusLM.js.map +1 -1
  31. package/lib/module/classes/CactusSTT.js +137 -0
  32. package/lib/module/classes/CactusSTT.js.map +1 -0
  33. package/lib/module/config/CactusConfig.js +4 -0
  34. package/lib/module/config/CactusConfig.js.map +1 -1
  35. package/lib/module/constants/packageVersion.js +1 -1
  36. package/lib/module/hooks/useCactusLM.js +44 -16
  37. package/lib/module/hooks/useCactusLM.js.map +1 -1
  38. package/lib/module/hooks/useCactusSTT.js +234 -0
  39. package/lib/module/hooks/useCactusSTT.js.map +1 -0
  40. package/lib/module/index.js +2 -0
  41. package/lib/module/index.js.map +1 -1
  42. package/lib/module/native/Cactus.js +52 -3
  43. package/lib/module/native/Cactus.js.map +1 -1
  44. package/lib/module/native/CactusFileSystem.js +2 -3
  45. package/lib/module/native/CactusFileSystem.js.map +1 -1
  46. package/lib/module/native/CactusImage.js +13 -0
  47. package/lib/module/native/CactusImage.js.map +1 -0
  48. package/lib/module/native/index.js +1 -0
  49. package/lib/module/native/index.js.map +1 -1
  50. package/lib/module/specs/CactusImage.nitro.js +4 -0
  51. package/lib/module/specs/CactusImage.nitro.js.map +1 -0
  52. package/lib/module/telemetry/Telemetry.js +53 -1
  53. package/lib/module/telemetry/Telemetry.js.map +1 -1
  54. package/lib/module/types/CactusSTT.js +2 -0
  55. package/lib/module/types/CactusSTT.js.map +1 -0
  56. package/lib/typescript/src/api/Database.d.ts +1 -0
  57. package/lib/typescript/src/api/Database.d.ts.map +1 -1
  58. package/lib/typescript/src/api/RemoteLM.d.ts +14 -0
  59. package/lib/typescript/src/api/RemoteLM.d.ts.map +1 -0
  60. package/lib/typescript/src/classes/CactusLM.d.ts +8 -5
  61. package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
  62. package/lib/typescript/src/classes/CactusSTT.d.ts +25 -0
  63. package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -0
  64. package/lib/typescript/src/config/CactusConfig.d.ts +1 -0
  65. package/lib/typescript/src/config/CactusConfig.d.ts.map +1 -1
  66. package/lib/typescript/src/constants/packageVersion.d.ts +1 -1
  67. package/lib/typescript/src/hooks/useCactusLM.d.ts +5 -4
  68. package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
  69. package/lib/typescript/src/hooks/useCactusSTT.d.ts +20 -0
  70. package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -0
  71. package/lib/typescript/src/index.d.ts +4 -1
  72. package/lib/typescript/src/index.d.ts.map +1 -1
  73. package/lib/typescript/src/native/Cactus.d.ts +10 -3
  74. package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
  75. package/lib/typescript/src/native/CactusFileSystem.d.ts +1 -1
  76. package/lib/typescript/src/native/CactusFileSystem.d.ts.map +1 -1
  77. package/lib/typescript/src/native/CactusImage.d.ts +6 -0
  78. package/lib/typescript/src/native/CactusImage.d.ts.map +1 -0
  79. package/lib/typescript/src/native/index.d.ts +1 -0
  80. package/lib/typescript/src/native/index.d.ts.map +1 -1
  81. package/lib/typescript/src/specs/Cactus.nitro.d.ts +4 -1
  82. package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
  83. package/lib/typescript/src/specs/CactusImage.nitro.d.ts +9 -0
  84. package/lib/typescript/src/specs/CactusImage.nitro.d.ts.map +1 -0
  85. package/lib/typescript/src/telemetry/Telemetry.d.ts +5 -1
  86. package/lib/typescript/src/telemetry/Telemetry.d.ts.map +1 -1
  87. package/lib/typescript/src/types/CactusLM.d.ts +11 -6
  88. package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
  89. package/lib/typescript/src/types/CactusSTT.d.ts +37 -0
  90. package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -0
  91. package/nitro.json +4 -0
  92. package/nitrogen/generated/android/c++/JHybridCactusImageSpec.cpp +81 -0
  93. package/nitrogen/generated/android/c++/JHybridCactusImageSpec.hpp +66 -0
  94. package/nitrogen/generated/android/cactus+autolinking.cmake +2 -0
  95. package/nitrogen/generated/android/cactusOnLoad.cpp +10 -0
  96. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusImageSpec.kt +62 -0
  97. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.cpp +17 -0
  98. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.hpp +17 -0
  99. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Umbrella.hpp +5 -0
  100. package/nitrogen/generated/ios/CactusAutolinking.mm +8 -0
  101. package/nitrogen/generated/ios/CactusAutolinking.swift +15 -0
  102. package/nitrogen/generated/ios/c++/HybridCactusImageSpecSwift.cpp +11 -0
  103. package/nitrogen/generated/ios/c++/HybridCactusImageSpecSwift.hpp +85 -0
  104. package/nitrogen/generated/ios/swift/HybridCactusImageSpec.swift +58 -0
  105. package/nitrogen/generated/ios/swift/HybridCactusImageSpec_cxx.swift +158 -0
  106. package/nitrogen/generated/shared/c++/HybridCactusImageSpec.cpp +22 -0
  107. package/nitrogen/generated/shared/c++/HybridCactusImageSpec.hpp +64 -0
  108. package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +3 -0
  109. package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +4 -1
  110. package/package.json +1 -1
  111. package/src/api/Database.ts +27 -0
  112. package/src/api/RemoteLM.ts +273 -0
  113. package/src/classes/CactusLM.ts +76 -40
  114. package/src/classes/CactusSTT.ts +182 -0
  115. package/src/config/CactusConfig.ts +4 -0
  116. package/src/constants/packageVersion.ts +1 -1
  117. package/src/hooks/useCactusLM.ts +53 -22
  118. package/src/hooks/useCactusSTT.ts +285 -0
  119. package/src/index.tsx +14 -2
  120. package/src/native/Cactus.ts +100 -6
  121. package/src/native/CactusFileSystem.ts +2 -2
  122. package/src/native/CactusImage.ts +20 -0
  123. package/src/native/index.ts +1 -0
  124. package/src/specs/Cactus.nitro.ts +14 -1
  125. package/src/specs/CactusImage.nitro.ts +12 -0
  126. package/src/telemetry/Telemetry.ts +78 -1
  127. package/src/types/CactusLM.ts +12 -6
  128. package/src/types/CactusSTT.ts +42 -0
@@ -1,12 +1,11 @@
1
1
  #include "HybridCactusUtil.hpp"
2
2
 
3
- namespace margelo::nitro::cactus
4
- {
3
+ namespace margelo::nitro::cactus {
5
4
 
6
5
  HybridCactusUtil::HybridCactusUtil() : HybridObject(TAG) {}
7
6
 
8
- std::shared_ptr<Promise<std::string>> HybridCactusUtil::registerApp(const std::string &encryptedData)
9
- {
7
+ std::shared_ptr<Promise<std::string>>
8
+ HybridCactusUtil::registerApp(const std::string &encryptedData) {
10
9
  return Promise<std::string>::async([this, encryptedData]() -> std::string {
11
10
  std::lock_guard<std::mutex> lock(this->_mutex);
12
11
 
@@ -23,16 +22,19 @@ std::shared_ptr<Promise<std::string>> HybridCactusUtil::registerApp(const std::s
23
22
  });
24
23
  }
25
24
 
26
- std::shared_ptr<Promise<std::optional<std::string>>> HybridCactusUtil::getDeviceId() {
27
- return Promise<std::optional<std::string>>::async([this]() -> std::optional<std::string> {
28
- std::lock_guard<std::mutex> lock(this->_mutex);
25
+ std::shared_ptr<Promise<std::optional<std::string>>>
26
+ HybridCactusUtil::getDeviceId() {
27
+ return Promise<std::optional<std::string>>::async(
28
+ [this]() -> std::optional<std::string> {
29
+ std::lock_guard<std::mutex> lock(this->_mutex);
29
30
 
30
- const char* deviceId = get_device_id();
31
- return deviceId ? std::optional<std::string>(deviceId) : std::nullopt;
32
- });
31
+ const char *deviceId = get_device_id();
32
+ return deviceId ? std::optional<std::string>(deviceId) : std::nullopt;
33
+ });
33
34
  }
34
35
 
35
- std::shared_ptr<Promise<void>> HybridCactusUtil::setAndroidDataDirectory(const std::string &dataDir) {
36
+ std::shared_ptr<Promise<void>>
37
+ HybridCactusUtil::setAndroidDataDirectory(const std::string &dataDir) {
36
38
  return Promise<void>::async([this, dataDir]() -> void {
37
39
  std::lock_guard<std::mutex> lock(this->_mutex);
38
40
 
@@ -5,19 +5,19 @@
5
5
 
6
6
  #include <mutex>
7
7
 
8
- namespace margelo::nitro::cactus
9
- {
8
+ namespace margelo::nitro::cactus {
10
9
 
11
- class HybridCactusUtil : public HybridCactusUtilSpec
12
- {
10
+ class HybridCactusUtil : public HybridCactusUtilSpec {
13
11
  public:
14
12
  HybridCactusUtil();
15
-
16
- std::shared_ptr<Promise<std::string>> registerApp(const std::string &encryptedData) override;
17
-
13
+
14
+ std::shared_ptr<Promise<std::string>>
15
+ registerApp(const std::string &encryptedData) override;
16
+
18
17
  std::shared_ptr<Promise<std::optional<std::string>>> getDeviceId() override;
19
-
20
- std::shared_ptr<Promise<void>> setAndroidDataDirectory(const std::string &dataDir) override;
18
+
19
+ std::shared_ptr<Promise<void>>
20
+ setAndroidDataDirectory(const std::string &dataDir) override;
21
21
 
22
22
  private:
23
23
  std::mutex _mutex;
package/cpp/cactus_ffi.h CHANGED
@@ -20,7 +20,7 @@ typedef void* cactus_model_t;
20
20
 
21
21
  typedef void (*cactus_token_callback)(const char* token, uint32_t token_id, void* user_data);
22
22
 
23
- CACTUS_FFI_EXPORT cactus_model_t cactus_init(const char* model_path, size_t context_size);
23
+ CACTUS_FFI_EXPORT cactus_model_t cactus_init(const char* model_path, size_t context_size, const char* corpus_dir);
24
24
 
25
25
  CACTUS_FFI_EXPORT int cactus_complete(
26
26
  cactus_model_t model,
@@ -33,6 +33,17 @@ CACTUS_FFI_EXPORT int cactus_complete(
33
33
  void* user_data
34
34
  );
35
35
 
36
+ CACTUS_FFI_EXPORT int cactus_transcribe(
37
+ cactus_model_t model,
38
+ const char* audio_file_path,
39
+ const char* prompt,
40
+ char* response_buffer,
41
+ size_t buffer_size,
42
+ const char* options_json,
43
+ cactus_token_callback callback,
44
+ void* user_data
45
+ );
46
+
36
47
 
37
48
  CACTUS_FFI_EXPORT int cactus_embed(
38
49
  cactus_model_t model,
@@ -42,6 +53,22 @@ CACTUS_FFI_EXPORT int cactus_embed(
42
53
  size_t* embedding_dim
43
54
  );
44
55
 
56
+ CACTUS_FFI_EXPORT int cactus_image_embed(
57
+ cactus_model_t model,
58
+ const char* image_path,
59
+ float* embeddings_buffer,
60
+ size_t buffer_size,
61
+ size_t* embedding_dim
62
+ );
63
+
64
+ CACTUS_FFI_EXPORT int cactus_audio_embed(
65
+ cactus_model_t model,
66
+ const char* audio_path,
67
+ float* embeddings_buffer,
68
+ size_t buffer_size,
69
+ size_t* embedding_dim
70
+ );
71
+
45
72
  CACTUS_FFI_EXPORT void cactus_reset(cactus_model_t model);
46
73
 
47
74
  CACTUS_FFI_EXPORT void cactus_stop(cactus_model_t model);
@@ -0,0 +1,53 @@
1
+ import Foundation
2
+ import NitroModules
3
+ import UIKit
4
+
5
+ class HybridCactusImage: HybridCactusImageSpec {
6
+ func base64(path: String) throws -> Promise<String> {
7
+ return Promise.async {
8
+ let fileURL = URL(fileURLWithPath: path)
9
+
10
+ if !FileManager.default.fileExists(atPath: fileURL.path) {
11
+ throw RuntimeError.error(withMessage: "No such file: \(path)")
12
+ }
13
+
14
+ let imageData = try Data(contentsOf: fileURL)
15
+ return imageData.base64EncodedString()
16
+ }
17
+ }
18
+
19
+ func resize(path: String, height: Double, width: Double, quality: Double) throws -> Promise<String> {
20
+ return Promise.async {
21
+ let fileURL = URL(fileURLWithPath: path)
22
+
23
+ if !FileManager.default.fileExists(atPath: fileURL.path) {
24
+ throw RuntimeError.error(withMessage: "No such file: \(path)")
25
+ }
26
+
27
+ guard let imageData = try? Data(contentsOf: fileURL),
28
+ let image = UIImage(data: imageData) else {
29
+ throw RuntimeError.error(withMessage: "Failed to load image from: \(path)")
30
+ }
31
+
32
+ let targetSize = CGSize(width: CGFloat(width), height: CGFloat(height))
33
+ let renderer = UIGraphicsImageRenderer(size: targetSize)
34
+ let resizedImage = renderer.image { context in
35
+ image.draw(in: CGRect(origin: .zero, size: targetSize))
36
+ }
37
+
38
+ guard let jpegData = resizedImage.jpegData(compressionQuality: CGFloat(quality)) else {
39
+ throw RuntimeError.error(withMessage: "Failed to compress resized image")
40
+ }
41
+
42
+ let cacheDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask)[0]
43
+ let fileName = "\(UUID().uuidString).jpg"
44
+ let outputURL = cacheDir.appendingPathComponent("cactus/images", isDirectory: true)
45
+ .appendingPathComponent(fileName)
46
+
47
+ try FileManager.default.createDirectory(at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true)
48
+ try jpegData.write(to: outputURL)
49
+
50
+ return outputURL.path
51
+ }
52
+ }
53
+ }
@@ -20,7 +20,7 @@ typedef void* cactus_model_t;
20
20
 
21
21
  typedef void (*cactus_token_callback)(const char* token, uint32_t token_id, void* user_data);
22
22
 
23
- CACTUS_FFI_EXPORT cactus_model_t cactus_init(const char* model_path, size_t context_size);
23
+ CACTUS_FFI_EXPORT cactus_model_t cactus_init(const char* model_path, size_t context_size, const char* corpus_dir);
24
24
 
25
25
  CACTUS_FFI_EXPORT int cactus_complete(
26
26
  cactus_model_t model,
@@ -33,6 +33,17 @@ CACTUS_FFI_EXPORT int cactus_complete(
33
33
  void* user_data
34
34
  );
35
35
 
36
+ CACTUS_FFI_EXPORT int cactus_transcribe(
37
+ cactus_model_t model,
38
+ const char* audio_file_path,
39
+ const char* prompt,
40
+ char* response_buffer,
41
+ size_t buffer_size,
42
+ const char* options_json,
43
+ cactus_token_callback callback,
44
+ void* user_data
45
+ );
46
+
36
47
 
37
48
  CACTUS_FFI_EXPORT int cactus_embed(
38
49
  cactus_model_t model,
@@ -42,6 +53,22 @@ CACTUS_FFI_EXPORT int cactus_embed(
42
53
  size_t* embedding_dim
43
54
  );
44
55
 
56
+ CACTUS_FFI_EXPORT int cactus_image_embed(
57
+ cactus_model_t model,
58
+ const char* image_path,
59
+ float* embeddings_buffer,
60
+ size_t buffer_size,
61
+ size_t* embedding_dim
62
+ );
63
+
64
+ CACTUS_FFI_EXPORT int cactus_audio_embed(
65
+ cactus_model_t model,
66
+ const char* audio_path,
67
+ float* embeddings_buffer,
68
+ size_t buffer_size,
69
+ size_t* embedding_dim
70
+ );
71
+
45
72
  CACTUS_FFI_EXPORT void cactus_reset(cactus_model_t model);
46
73
 
47
74
  CACTUS_FFI_EXPORT void cactus_stop(cactus_model_t model);
@@ -8,11 +8,34 @@
8
8
 
9
9
  #include "../graph/graph.h"
10
10
 
11
+ #ifdef __clang__
12
+ #pragma clang diagnostic push
13
+ #pragma clang diagnostic ignored "-Wc99-extensions"
14
+ #pragma clang diagnostic ignored "-Wunused-parameter"
15
+ #elif defined(__GNUC__)
16
+ #pragma GCC diagnostic push
17
+ #pragma GCC diagnostic ignored "-Wpedantic"
18
+ #pragma GCC diagnostic ignored "-Wunused-parameter"
19
+ #endif
20
+
21
+ extern "C" {
22
+ #include "../../libs/stb/stb_image.h"
23
+ #include "../../libs/stb/stb_image_resize2.h"
24
+ }
25
+
26
+ #ifdef __clang__
27
+ #pragma clang diagnostic pop
28
+ #elif defined(__GNUC__)
29
+ #pragma GCC diagnostic pop
30
+ #endif
31
+
11
32
  class CactusGraph;
12
33
 
13
34
  namespace cactus {
14
35
  namespace engine {
15
36
 
37
+ class Siglip2Preprocessor;
38
+
16
39
  struct Config {
17
40
  uint32_t vocab_size = 151936;
18
41
  uint32_t bos_token_id = 151643;
@@ -31,9 +54,43 @@ struct Config {
31
54
  uint32_t moe_every_n_layers = 0;
32
55
  bool tie_word_embeddings = true;
33
56
 
34
- enum class ModelType {QWEN = 0, GEMMA = 1, SMOL = 2, NOMIC = 3, LFM2 = 4};
57
+ uint32_t vision_hidden_dim = 0;
58
+ uint32_t vision_num_layers = 0;
59
+ uint32_t vision_attention_heads = 0;
60
+ uint32_t vision_image_size = 0;
61
+ uint32_t vision_patch_size = 0;
62
+ uint32_t vision_num_channels = 3;
63
+ uint32_t vision_embed_dim = 0;
64
+ uint32_t visual_tokens_per_img = 0;
65
+ bool use_pixel_shuffle = false;
66
+ uint32_t pixel_shuffle_factor = 1;
67
+ bool use_image_tokens = false;
68
+ bool use_layout_tags = false;
69
+ uint32_t image_seq_len = 64;
70
+
71
+ uint32_t global_image_size = 2048;
72
+ uint32_t max_tile_size = 512;
73
+ float rescale_factor = 0.00392156862745098f;
74
+ float image_mean = 0.5f;
75
+ float image_std = 0.5f;
76
+
77
+ uint32_t downsample_factor = 2;
78
+ uint32_t min_tiles = 2;
79
+ uint32_t max_tiles = 10;
80
+ bool use_thumbnail = true;
81
+ uint32_t min_image_tokens = 64;
82
+ uint32_t max_image_tokens = 256;
83
+ uint32_t max_num_patches = 1024;
84
+ uint32_t tile_size = 512;
85
+ float max_pixels_tolerance = 2.0f;
86
+ bool do_image_splitting = true;
87
+
88
+ enum class ModelType {QWEN = 0, GEMMA = 1, SMOL = 2, NOMIC = 3, LFM2 = 5, SIGLIP2 = 6, WHISPER = 7};
35
89
  ModelType model_type = ModelType::QWEN;
36
90
 
91
+ enum class ModelVariant {DEFAULT = 0, VLM = 1, EXTRACT = 2, RAG = 3};
92
+ ModelVariant model_variant = ModelVariant::DEFAULT;
93
+
37
94
  enum class Activation {GELU = 0, SILU = 1};
38
95
  Activation activation = Activation::SILU;
39
96
 
@@ -70,6 +127,7 @@ struct MergeRule {
70
127
  struct ChatMessage {
71
128
  std::string role;
72
129
  std::string content;
130
+ std::vector<std::string> images;
73
131
  };
74
132
 
75
133
  class Tokenizer {
@@ -89,18 +147,32 @@ public:
89
147
  virtual bool has_chat_template() const { return has_chat_template_; }
90
148
 
91
149
  virtual bool load_vocabulary_with_config(const std::string& vocab_file, const std::string& merges_file, const std::string& config_file) = 0;
150
+
151
+ uint32_t get_image_token_id() const { return image_token_id_; }
152
+ uint32_t get_fake_token_id() const { return fake_token_id_; }
153
+ uint32_t get_global_img_token_id() const { return global_img_token_id_; }
92
154
 
93
- protected:
94
155
 
95
- enum class ModelType { UNKNOWN, QWEN, GEMMA, LFM2 , SMOL, BERT };
156
+ void set_corpus_dir(const std::string& dir) { corpus_dir_ = dir; }
157
+
158
+ protected:
159
+ enum class ModelType { UNKNOWN, QWEN, GEMMA, LFM2, SMOL, BERT, WHISPER};
96
160
  ModelType model_type_ = ModelType::UNKNOWN;
161
+ enum class ModelVariant { DEFAULT, VLM, EXTRACT, RAG};
162
+ ModelVariant model_variant_ = ModelVariant::DEFAULT;
97
163
  bool has_chat_template_ = false;
98
164
  std::string chat_template_;
165
+
166
+ uint32_t image_token_id_ = 396;
167
+ uint32_t fake_token_id_ = 49189;
168
+ uint32_t global_img_token_id_ = 49152;
169
+ std::string corpus_dir_;
99
170
 
100
171
  void detect_model_type(const std::string& config_path);
101
172
  std::string format_qwen_style(const std::vector<ChatMessage>& messages, bool add_generation_prompt, const std::string& tools_json) const;
102
173
  std::string format_gemma_style(const std::vector<ChatMessage>& messages, bool add_generation_prompt, const std::string& tools_json) const;
103
174
  std::string format_lfm2_style(const std::vector<ChatMessage>& messages, bool add_generation_prompt, const std::string& tools_json) const;
175
+ std::string format_lfm2_vl_style(const std::vector<ChatMessage>& messages, bool add_generation_prompt, const std::string& tools_json) const;
104
176
  std::string format_smol_style(const std::vector<ChatMessage>& messages, bool add_generation_prompt, const std::string& tools_json) const;
105
177
  };
106
178
 
@@ -295,28 +367,58 @@ struct KVCache {
295
367
 
296
368
  class Model {
297
369
  public:
370
+ struct DebugNode {
371
+ uint32_t layer_idx;
372
+ std::string name;
373
+ size_t node_id;
374
+ };
375
+
298
376
  Model();
299
377
  explicit Model(const Config& config);
300
378
  virtual ~Model();
301
379
 
302
380
  const Config& get_config() const { return config_; }
303
381
  Tokenizer* get_tokenizer() const { return tokenizer_.get(); }
382
+ const std::vector<DebugNode>& get_debug_nodes() const;
383
+
384
+ virtual bool init(const std::string& model_folder, size_t context_size, const std::string& system_prompt = "", bool do_warmup = true);
385
+
386
+ virtual bool init(CactusGraph* external_graph, const std::string& model_folder, size_t context_size,
387
+ const std::string& system_prompt = "", bool do_warmup = true);
388
+
389
+ virtual uint32_t generate(const std::vector<uint32_t>& tokens, float temperature = -1.0f, float top_p = -1.0f,
390
+ size_t top_k = 0, const std::string& profile_file = "");
304
391
 
305
- bool init(const std::string& model_folder, size_t context_size, const std::string& system_prompt = "");
306
- uint32_t generate(const std::vector<uint32_t>& tokens, float temperature = -1.0f, float top_p = -1.0f,
392
+ virtual uint32_t generate_with_images(const std::vector<uint32_t>& tokens, const std::vector<std::string>& image_paths,
393
+ float temperature = -1.0f, float top_p = -1.0f,
394
+ size_t top_k = 0, const std::string& profile_file = "");
395
+
396
+ virtual uint32_t generate_with_audio(const std::vector<uint32_t>& tokens, const std::vector<float>& mel_bins, float temperature = 0.0f, float top_p = 0.0f,
307
397
  size_t top_k = 0, const std::string& profile_file = "");
308
398
 
309
399
  std::vector<float> get_embeddings(const std::vector<uint32_t>& tokens, bool pooled = true, const std::string& profile_file = "");
400
+
401
+ virtual std::vector<float> get_image_embeddings(const std::string& image_path);
402
+
403
+ virtual std::vector<float> get_audio_embeddings(const std::vector<float>& mel_bins);
310
404
 
311
405
  virtual void reset_cache() { kv_cache_.reset(); }
406
+
312
407
  void set_cache_window(size_t window_size, size_t sink_size = 4) { kv_cache_.set_window_size(window_size, sink_size); }
313
408
 
409
+ void* graph_handle_;
410
+
314
411
  protected:
315
412
  virtual size_t forward(const std::vector<uint32_t>& tokens, bool use_cache = false) = 0;
413
+
414
+ virtual size_t forward(const std::vector<float>& mel_bins, const std::vector<uint32_t>& tokens, bool use_cache = false);
415
+
316
416
  virtual void load_weights_to_graph(CactusGraph* gb) = 0;
417
+
317
418
  virtual size_t build_attention(CactusGraph* gb, size_t normalized_input, uint32_t layer_idx,
318
419
  ComputeBackend backend, bool use_cache = false, size_t position_offset = 0) = 0;
319
- virtual size_t build_mlp(CactusGraph* gb, size_t normalized_h, uint32_t layer_idx,
420
+
421
+ virtual size_t build_mlp(CactusGraph* gb, size_t normalized_h, uint32_t layer_idx,
320
422
  ComputeBackend backend) const = 0;
321
423
  virtual size_t build_transformer_block(CactusGraph* gb, size_t hidden, uint32_t layer_idx,
322
424
  ComputeBackend backend, bool use_cache = false, size_t position_offset = 0) = 0;
@@ -326,7 +428,6 @@ protected:
326
428
  Config config_;
327
429
  std::unique_ptr<Tokenizer> tokenizer_;
328
430
 
329
- void* graph_handle_;
330
431
  bool initialized_;
331
432
  float attention_scale_;
332
433
 
@@ -339,9 +440,138 @@ protected:
339
440
  size_t embedding_node_id_;
340
441
  std::string model_folder_path_;
341
442
  size_t output_weight_node_id_;
443
+
444
+ mutable std::vector<DebugNode> debug_nodes_;
445
+
446
+ void capture_debug_node(uint32_t layer_idx, const std::string& name, size_t node_id) const;
447
+ void clear_debug_nodes();
448
+
449
+ bool init_internal(CactusGraph* gb, const std::string& model_folder, size_t context_size,
450
+ const std::string& system_prompt, bool do_warmup);
451
+ bool owns_graph_;
342
452
  };
343
453
 
344
454
  std::unique_ptr<Model> create_model(const std::string& model_folder);
345
455
 
456
+ class Siglip2Preprocessor {
457
+ public:
458
+ struct Config {
459
+ int patch_size = 16;
460
+ int downsample_factor = 2;
461
+ int min_tiles = 2;
462
+ int max_tiles = 10;
463
+ bool use_thumbnail = true;
464
+ int min_image_tokens = 64;
465
+ int max_image_tokens = 256;
466
+ int max_num_patches = 1024;
467
+ int tile_size = 512;
468
+ float max_pixels_tolerance = 2.0f;
469
+ bool do_resize = true;
470
+ bool do_rescale = true;
471
+ bool do_normalize = true;
472
+ bool do_convert_rgb = true;
473
+ bool do_image_splitting = true;
474
+ float rescale_factor = 1.0f / 255.0f;
475
+ float image_mean[3] = {0.5f, 0.5f, 0.5f};
476
+ float image_std[3] = {0.5f, 0.5f, 0.5f};
477
+ };
478
+
479
+ struct PreprocessedImage {
480
+ std::vector<float> pixel_values;
481
+ std::vector<int> pixel_attention_mask;
482
+ std::vector<std::pair<int,int>> spatial_shapes;
483
+ std::vector<size_t> pixel_values_shape;
484
+ std::vector<size_t> pixel_attention_mask_shape;
485
+ std::vector<size_t> spatial_shapes_shape;
486
+ int num_patches_height;
487
+ int num_patches_width;
488
+ int actual_num_patches;
489
+ int num_tiles;
490
+ int patch_dim;
491
+ int max_patches_per_tile;
492
+
493
+ int image_rows;
494
+ int image_cols;
495
+ int image_height;
496
+ int image_width;
497
+ int tokens_per_tile;
498
+ int thumbnail_tokens;
499
+
500
+ ~PreprocessedImage();
501
+ };
502
+
503
+ struct SpatialShapeResult {
504
+ std::vector<std::pair<int, int>> shapes;
505
+ int grid_rows;
506
+ int grid_cols;
507
+ };
508
+
509
+ explicit Siglip2Preprocessor(const Config& config);
510
+ Siglip2Preprocessor();
511
+ ~Siglip2Preprocessor();
512
+
513
+ PreprocessedImage preprocess_from_file(const std::string& image_path);
514
+ PreprocessedImage preprocess_from_memory(const unsigned char* img_data, int width, int height, int channels);
515
+ SpatialShapeResult compute_spatial_shapes(int height, int width);
516
+
517
+ private:
518
+ Config config_;
519
+
520
+ std::vector<unsigned char> convert_to_rgb(const unsigned char* img_data, int width, int height, int channels);
521
+ std::pair<int, int> smart_resize(int height, int width);
522
+ bool is_image_too_large(int height, int width);
523
+ std::pair<int, int> get_grid_layout(int height, int width);
524
+ std::pair<int, int> find_closest_aspect_ratio(float aspect_ratio, int width, int height);
525
+ std::vector<float> resize_image(const unsigned char* img_data, int src_width, int src_height,
526
+ int dst_width, int dst_height, int channels);
527
+ std::vector<float> normalize_image(const float* img_data, int width, int height, int channels);
528
+ std::vector<std::vector<float>> convert_image_to_patches(
529
+ const std::vector<float>& image, int width, int height, int channels, int patch_size);
530
+ PreprocessedImage pad_patches(const std::vector<std::vector<float>>& tile_patches,
531
+ const std::vector<std::pair<int,int>>& spatial_shapes,
532
+ int patch_dim,
533
+ int max_patches_per_tile);
534
+ int round_by_factor(int number, int factor);
535
+ };
536
+
537
+ class AudioProcessor {
538
+ public:
539
+ struct SpectrogramConfig {
540
+ size_t n_fft = 400;
541
+ size_t hop_length = 160;
542
+ size_t frame_length = 400;
543
+ float power = 2.0f;
544
+ bool center = true;
545
+ const char* pad_mode = "reflect";
546
+ bool onesided = true;
547
+ float dither = 0.0f;
548
+ float mel_floor = 1e-10f;
549
+ const char* log_mel = nullptr;
550
+ float reference = 1.0f;
551
+ float min_value = 1e-10f;
552
+ bool remove_dc_offset = false;
553
+ };
554
+
555
+ AudioProcessor();
556
+ ~AudioProcessor();
557
+
558
+ void init_mel_filters(size_t num_frequency_bins, size_t num_mel_filters,
559
+ float min_freq, float max_freq, size_t sampling_rate);
560
+
561
+ std::vector<float> compute_spectrogram(
562
+ const std::vector<float>& waveform,
563
+ const SpectrogramConfig& config);
564
+
565
+ const std::vector<float>& get_mel_filters() const { return mel_filters_; }
566
+
567
+ size_t get_num_mel_filters() const { return num_mel_filters_; }
568
+ size_t get_num_frequency_bins() const { return num_frequency_bins_; }
569
+
570
+ private:
571
+ std::vector<float> mel_filters_;
572
+ size_t num_frequency_bins_;
573
+ size_t num_mel_filters_;
574
+ };
575
+
346
576
  }
347
577
  }