npm - cactus-react-native - Versions diffs - 1.7.0 → 1.10.0 - Mend

cactus-react-native 1.7.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h CHANGED Viewed

@@ -56,6 +56,12 @@ struct Config {
     uint32_t num_shared_experts = 0;
     uint32_t num_top_experts = 0;
     uint32_t moe_every_n_layers = 0;
+    uint32_t moe_intermediate_dim = 0;
+    uint32_t num_dense_layers = 0;
+    uint32_t num_experts_per_tok = 0;
+    bool norm_topk_prob = false;
+    bool use_expert_bias = false;
+    float routed_scaling_factor = 1.0f;
     bool tie_word_embeddings = true;
     uint32_t vision_hidden_dim = 0;
@@ -93,8 +99,22 @@ struct Config {
     uint32_t num_encoder_layers = 0;
     uint32_t num_decoder_layers = 0;
     float partial_rotary_factor = 0.0f;
-    enum class ModelType {QWEN = 0, GEMMA = 1, NOMIC = 3, LFM2 = 5, SIGLIP2 = 6, WHISPER = 7, MOONSHINE = 8, SILERO_VAD = 9};
+    uint32_t pad_token_id = 0;
+    uint32_t conv_kernel_size = 0;
+    uint32_t subsampling_conv_kernel_size = 0;
+    uint32_t subsampling_conv_stride = 0;
+    uint32_t subsampling_conv_channels = 0;
+    uint32_t subsampling_factor = 0;
+    uint32_t num_mel_bins = 80;
+    std::string encoder_hidden_act = "silu";
+    uint32_t predictor_hidden_dim = 0;
+    uint32_t predictor_num_layers = 0;
+    uint32_t tdt_joint_dim = 0;
+    uint32_t tdt_num_durations = 0;
+    uint32_t tdt_blank_id = 0;
+    std::vector<uint32_t> tdt_durations;
+    enum class ModelType {QWEN = 0, GEMMA = 1, NOMIC = 3, LFM2 = 5, SIGLIP2 = 6, WHISPER = 7, MOONSHINE = 8, SILERO_VAD = 9, PARAKEET = 10, PARAKEET_TDT = 11};
     ModelType model_type = ModelType::QWEN;
     enum class ModelVariant {DEFAULT = 0, VLM = 1, EXTRACT = 2, RAG = 3};
@@ -168,7 +188,7 @@ public:
     uint32_t get_global_img_token_id() const { return global_img_token_id_; }
 protected:
-    enum class ModelType { UNKNOWN, QWEN, GEMMA, LFM2, BERT, WHISPER};
+    enum class ModelType { UNKNOWN, QWEN, GEMMA, LFM2, BERT, WHISPER, PARAKEET};
     ModelType model_type_ = ModelType::UNKNOWN;
     enum class ModelVariant { DEFAULT, VLM, EXTRACT, RAG};
     ModelVariant model_variant_ = ModelVariant::DEFAULT;
@@ -366,7 +386,6 @@ struct KVCache {
                          size_t num_tokens, size_t kv_heads, size_t head_dim);
     bool is_empty() const { return current_seq_len == 0; }
-    bool is_int8() const { return precision == Precision::INT8; }
     void* get_key_ptr(size_t layer);
     void* get_value_ptr(size_t layer);
@@ -684,6 +703,8 @@ public:
         float reference = 1.0f;
         float min_value = 1e-10f;
         bool remove_dc_offset = false;
+        float preemphasis = 0.0f;
+        bool hann_periodic = true;
     };
     AudioProcessor();
@@ -696,6 +717,11 @@ public:
         const std::vector<float>& waveform,
         const SpectrogramConfig& config);
+    static std::vector<float> compute_irfft(
+        const std::vector<float>& complex_input,
+        size_t n,
+        const char* norm = "backward");
     const std::vector<float>& get_mel_filters() const { return mel_filters_; }
     size_t get_num_mel_filters() const { return num_mel_filters_; }
@@ -721,6 +747,8 @@ namespace index {
     struct QueryResult {
         int doc_id;
         float score;
+        QueryResult(int doc_id, float score) : doc_id(doc_id), score(score) {}
     };
     struct QueryOptions {

package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h CHANGED Viewed

@@ -6,6 +6,7 @@
 #include <unordered_map>
 #include <unordered_set>
 #include <functional>
+#include <cassert>
 #include <cstring>
 #include <stdexcept>
 #include <string>
@@ -109,23 +110,33 @@ enum class ComputeBackend {
     NPU
 };
+enum class Activation {
+    SILU,
+    GELU,
+    GELU_ERF,
+    RELU,
+    SIGMOID,
+    TANH
+};
 enum class OpType {
     INPUT, PRECISION_CAST,
     ADD, ADD_CLIPPED, SUBTRACT, MULTIPLY, DIVIDE,
     MATMUL, TRANSPOSE, RESHAPE, SLICE, GATHER, EMBEDDING,
     BILINEAR_INTERPOLATION,
     SUM, MEAN, VARIANCE, MIN, MAX,
-    RMS_NORM, ROPE, ROPE_GPTJ, SOFTMAX, ATTENTION, ATTENTION_INT8_HYBRID, CONV1D_CAUSAL, CONV1D_K3, CONV1D_K7S3, CONV1D,
-    SCALAR_ADD, SCALAR_SUBTRACT, SCALAR_MULTIPLY, SCALAR_DIVIDE, SCALAR_EXP, SCALAR_SQRT, SCALAR_COS, SCALAR_SIN,
+    RMS_NORM, ROPE, ROPE_GPTJ, SOFTMAX, ATTENTION, ATTENTION_INT8_HYBRID, REL_POS_BIAS, CONV1D_CAUSAL, CONV1D_K3, CONV1D_K7S3, CONV1D, CONV1D_SAME_DEPTHWISE_K9, CONV1D_POINTWISE, CONV2D_K3S2P1, CONV2D_DEPTHWISE_K3S2P1, CONV2D_POINTWISE_1X1, GLU, BATCHNORM,
+    SCALAR_ADD, SCALAR_SUBTRACT, SCALAR_MULTIPLY, SCALAR_DIVIDE, SCALAR_EXP, SCALAR_SQRT, SCALAR_COS, SCALAR_SIN, SCALAR_LOG,
     RELU, SILU, GELU, GELU_ERF, SIGMOID, TANH,
     SAMPLE, CONCAT,
     SCATTER_TOPK,
     TOPK, LAYERNORM, GROUPNORM,
+    MOE_LAYER,
     INDEX,
     PERSISTENT,
     QUANTIZE_ACTIVATIONS,
     LSTM_CELL,
-    STFT_MAGNITUDE
+    STFT
 };
 struct PrecisionTraits {
@@ -141,11 +152,20 @@ struct PrecisionTraits {
     static constexpr size_t packed_size_of(Precision prec, size_t count) {
         switch (prec) {
-            case Precision::INT4: return (count + 1) / 2;
+            case Precision::INT4: return (count + 1) / 2;
             default: return count * size_of(prec);
         }
     }
+    static size_t byte_offset_of(Precision prec, size_t element_offset) {
+        switch (prec) {
+            case Precision::INT4:
+                assert(element_offset % 32 == 0 && "INT4 byte offset must be group-aligned (multiple of 32)");
+                return element_offset / 2;
+            default: return element_offset * size_of(prec);
+        }
+    }
     static constexpr bool is_integer(Precision prec) {
         switch (prec) {
             case Precision::INT8: return true;
@@ -181,7 +201,6 @@ struct TensorConfig {
     Precision compute_precision = Precision::INT8;
     Precision output_precision = Precision::INT8;
     bool auto_mixed_precision = false;
-    bool enable_int4_packing = true;
     static TensorConfig& global();
 };
@@ -243,6 +262,10 @@ struct BufferDesc {
         return precision == Precision::INT8 && group_size > 0;
     }
+    bool is_grouped_int4() const {
+        return precision == Precision::INT4 && group_size > 0;
+    }
     void set_grouped_scales(size_t gs, size_t ng, void* scales_ptr) {
         group_size = gs;
         num_groups = ng;
@@ -291,6 +314,7 @@ struct OpParams {
     size_t slice_length = 0;
     size_t window_size = 0;
     bool is_causal = true;
+    bool attention_mask_is_additive = false;
     std::vector<size_t> new_shape;
     std::vector<size_t> permutation;
     Precision output_precision = Precision::INT8;
@@ -309,6 +333,11 @@ struct OpParams {
     size_t num_groups = 0;
     size_t dst_height = 0;
     size_t dst_width = 0;
+    bool normalize_routing = false;
+    size_t num_experts = 0;
+    size_t num_experts_per_tok = 0;
+    bool moe_gated = true;
+    Activation activation = Activation::SILU;
     std::vector<float> bias_values;
     std::vector<uint32_t> bias_indices;
@@ -356,7 +385,6 @@ void compute_index_node(GraphNode& node, const std::vector<std::unique_ptr<Graph
 void compute_lstm_cell_node(GraphNode& node, const std::vector<std::unique_ptr<GraphNode>>& nodes, const std::unordered_map<size_t, size_t>& node_index_map);
 void shrink_thread_local_buffers();
 class BufferPool {
 public:
     BufferPool() = default;
@@ -418,6 +446,7 @@ public:
     size_t scalar_sqrt(size_t input);
     size_t scalar_cos(size_t input);
     size_t scalar_sin(size_t input);
+    size_t scalar_log(size_t input);
     size_t relu(size_t input);
     size_t silu(size_t input);
@@ -425,6 +454,7 @@ public:
     size_t gelu_erf(size_t input);
     size_t sigmoid(size_t input);
     size_t tanh(size_t input);
+    size_t glu(size_t input, int axis = -1);
     size_t matmul(size_t input1, size_t input2, bool pretransposed_rhs = false, ComputeBackend backend = ComputeBackend::CPU);
     size_t transpose(size_t input, ComputeBackend backend = ComputeBackend::CPU);
@@ -455,7 +485,30 @@ public:
     size_t layernorm(size_t input, size_t weight, size_t bias, float epsilon = 1e-5f);
     size_t layernorm(size_t input, size_t weight, float epsilon = 1e-5f);  // No bias version
     size_t groupnorm(size_t input, size_t weight, size_t bias, size_t num_groups = 32, float epsilon = 1e-5f);
+    size_t batchnorm(size_t input, size_t weight, size_t bias, size_t running_mean, size_t running_var, int axis = 1, float epsilon = 1e-5f);
     size_t topk(size_t input, size_t k);
+    size_t moe_layer(size_t hidden,
+                     size_t routing_probs,
+                     size_t topk_indices,
+                     const std::vector<size_t>& w1_weights,
+                     const std::vector<size_t>& w3_weights,
+                     const std::vector<size_t>& w2_weights,
+                     size_t num_experts,
+                     size_t num_experts_per_tok,
+                     bool normalize_routing,
+                     float epsilon,
+                     float routed_scaling_factor);
+    size_t moe_layer(size_t hidden,
+                     size_t routing_probs,
+                     size_t topk_indices,
+                     const std::vector<size_t>& w1_weights,
+                     const std::vector<size_t>& w2_weights,
+                     size_t num_experts,
+                     size_t num_experts_per_tok,
+                     bool normalize_routing,
+                     float epsilon,
+                     float routed_scaling_factor,
+                     Activation activation);
     size_t rms_norm(size_t input, size_t weight, float epsilon = 1e-5f);
     size_t rope(size_t input, float theta, size_t position_offset = 0, ComputeBackend backend = ComputeBackend::CPU);
     size_t rope_gptj(size_t input, float theta, size_t position_offset = 0, size_t rot_dim = 0, ComputeBackend backend = ComputeBackend::CPU);
@@ -463,6 +516,10 @@ public:
     size_t attention(size_t query, size_t key, size_t value, float scale, bool is_causal = true, ComputeBackend backend = ComputeBackend::CPU);
     size_t attention(size_t query, size_t key, size_t value, float scale, size_t position_offset, ComputeBackend backend = ComputeBackend::CPU);
     size_t attention(size_t query, size_t key, size_t value, float scale, size_t position_offset, size_t window_size, ComputeBackend backend = ComputeBackend::CPU);
+    size_t attention_masked(size_t query, size_t key, size_t value, size_t mask, float scale,
+                            bool is_causal = true, ComputeBackend backend = ComputeBackend::CPU,
+                            bool additive_mask = false, size_t position_offset = 0, size_t window_size = 0);
+    size_t rel_pos_bias(size_t query, size_t relative_key, float scale);
     size_t attention_int8_hybrid(size_t query, size_t key_new, size_t value_new, float scale, size_t position_offset,
                                  const int8_t* cached_keys, const int8_t* cached_values,
@@ -474,9 +531,19 @@ public:
     size_t conv1d_k7s3(size_t input, size_t weight, size_t bias);
     size_t conv1d(size_t input, size_t weight, size_t stride);
     size_t conv1d(size_t input, size_t weight, size_t bias, size_t stride);
+    size_t conv1d_same_depthwise_k9(size_t input, size_t weight);
+    size_t conv1d_same_depthwise_k9(size_t input, size_t weight, size_t bias);
+    size_t conv1d_pointwise(size_t input, size_t weight);
+    size_t conv1d_pointwise(size_t input, size_t weight, size_t bias);
+    size_t conv2d_k3s2p1(size_t input, size_t weight);
+    size_t conv2d_k3s2p1(size_t input, size_t weight, size_t bias);
+    size_t conv2d_depthwise_k3s2p1(size_t input, size_t weight);
+    size_t conv2d_depthwise_k3s2p1(size_t input, size_t weight, size_t bias);
+    size_t conv2d_pointwise_1x1(size_t input, size_t weight);
+    size_t conv2d_pointwise_1x1(size_t input, size_t weight, size_t bias);
     size_t lstm_cell(size_t input, size_t h_prev, size_t c_prev, size_t weight_ih, size_t weight_hh, size_t bias_ih, size_t bias_hh);
-    size_t stft_magnitude(size_t input, size_t weight, size_t stride, size_t num_fft_bins);
+    size_t stft(size_t input, size_t weight, size_t stride, size_t num_fft_bins);
     size_t sample(size_t logits, float temperature = 0.6f, float top_p = 0.95f, size_t top_k = 20,
                   const std::unordered_map<uint32_t, float>& logit_bias = {});
@@ -581,12 +648,9 @@ namespace GraphFile {
         bool is_interleaved_ = false;
         size_t original_N_ = 0;
-        std::unique_ptr<int8_t[]> unpacked_data_;
         void parse_header();
         void apply_madvise_hints();
-        void unpack_int4_data();
     };
 }
-#endif
+#endif

package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h CHANGED Viewed

@@ -4,6 +4,8 @@
 #include <cstddef>
 #include <arm_neon.h>
+enum class Precision;
 enum class ScalarOpType {
     ADD,
     SUBTRACT,
@@ -12,7 +14,8 @@ enum class ScalarOpType {
     EXP,
     SQRT,
     COS,
-    SIN
+    SIN,
+    LOG
 };
 constexpr size_t KV_QUANT_GROUP_SIZE = 32;
@@ -21,6 +24,7 @@ void cactus_add_f16(const __fp16* a, const __fp16* b, __fp16* output, size_t num
 void cactus_add_f16_clipped(const __fp16* a, const __fp16* b, __fp16* output, size_t num_elements);
 void cactus_subtract_f16(const __fp16* a, const __fp16* b, __fp16* output, size_t num_elements);
 void cactus_multiply_f16(const __fp16* a, const __fp16* b, __fp16* output, size_t num_elements);
+void cactus_add_scaled_f16(const __fp16* base, const __fp16* src, __fp16* output, size_t num_elements, float scale);
 void cactus_divide_f16(const __fp16* a, const __fp16* b, __fp16* output, size_t num_elements);
 void cactus_add_broadcast_f16(const __fp16* a, const __fp16* b, __fp16* output,
@@ -50,6 +54,23 @@ void cactus_matmul_int8(const int8_t* A, const float* A_scales,
                         const int8_t* B, const __fp16* B_scales,
                         __fp16* C, size_t M, size_t K, size_t N, size_t group_size);
+void cactus_gemv_int4(const int8_t* A, float A_scale,
+                      const int8_t* B_packed, const __fp16* B_scales,
+                      __fp16* C, size_t K, size_t N, size_t group_size);
+void cactus_gemm_int4(const int8_t* A, const float* A_scales,
+                      const int8_t* B_packed, const __fp16* B_scales,
+                      __fp16* C, size_t M, size_t K, size_t N, size_t group_size);
+void cactus_matmul_int4(const int8_t* A, const float* A_scales,
+                        const int8_t* B_packed, const __fp16* B_scales,
+                        __fp16* C, size_t M, size_t K, size_t N, size_t group_size);
+void cactus_matmul_integer(Precision precision,
+                            const int8_t* A, const float* A_scales,
+                            const int8_t* B, const __fp16* B_scales,
+                            __fp16* C, size_t M, size_t K, size_t N, size_t group_size);
 void cactus_matmul_f16(const __fp16* a, const __fp16* b_transposed, __fp16* c,
                        size_t M, size_t K, size_t N);
@@ -97,10 +118,52 @@ void cactus_sigmoid_f16(const __fp16* input, __fp16* output, size_t num_elements
 void cactus_tanh_f16(const __fp16* input, __fp16* output, size_t num_elements);
+void cactus_glu_f16(
+    const __fp16* input,
+    __fp16* output,
+    size_t outer_size,
+    size_t split_size,
+    size_t inner_size
+);
+void cactus_glu_f32(
+    const float* input,
+    float* output,
+    size_t outer_size,
+    size_t split_size,
+    size_t inner_size
+);
+void cactus_batchnorm_f16(
+    const __fp16* input,
+    const float* weight,
+    const float* bias,
+    const float* running_mean,
+    const float* running_var,
+    __fp16* output,
+    size_t outer_size,
+    size_t channels,
+    size_t inner_size,
+    float epsilon
+);
+void cactus_batchnorm_f32(
+    const float* input,
+    const float* weight,
+    const float* bias,
+    const float* running_mean,
+    const float* running_var,
+    float* output,
+    size_t outer_size,
+    size_t channels,
+    size_t inner_size,
+    float epsilon
+);
 void cactus_attention_f16(const __fp16* queries, const __fp16* keys, const __fp16* values, __fp16* output,
                           size_t batch_size, size_t seq_len, size_t kv_seq_len, size_t num_q_heads, size_t num_kv_heads,
                           size_t head_dim, float scale, const __fp16* mask, size_t position_offset = 0, size_t window_size = 0,
-                          bool is_causal = true);
+                          bool is_causal = true, bool mask_is_additive = false, bool mask_per_head = false);
 void cactus_attention_hybrid_int8_fp16(
     const __fp16* queries,
@@ -150,7 +213,7 @@ void cactus_conv1d_f16(
     size_t stride
 );
-void cactus_stft_magnitude_f16(
+void cactus_stft_f16(
     const __fp16* input,
     const __fp16* weight,
     __fp16* output,
@@ -171,6 +234,62 @@ void cactus_conv1d_f16_k7s3_oc8(
     size_t C_out
 );
+void cactus_conv1d_same_depthwise_f16_k9(
+    const __fp16* input,
+    const __fp16* weight,
+    const __fp16* bias,
+    __fp16* output,
+    size_t N,
+    size_t L,
+    size_t C
+);
+void cactus_conv2d_f16_k3s2p1_nchw(
+    const __fp16* input,
+    const __fp16* weight,
+    const __fp16* bias,
+    __fp16* output,
+    size_t N,
+    size_t C_in,
+    size_t H,
+    size_t W,
+    size_t C_out
+);
+void cactus_conv2d_depthwise_f16_k3s2p1_nchw(
+    const __fp16* input,
+    const __fp16* weight,
+    const __fp16* bias,
+    __fp16* output,
+    size_t N,
+    size_t C,
+    size_t H,
+    size_t W
+);
+void cactus_conv2d_pointwise_f16_1x1_nchw_gemm(
+    const __fp16* input,
+    const __fp16* weight,
+    const __fp16* bias,
+    __fp16* output,
+    size_t N,
+    size_t C_in,
+    size_t H,
+    size_t W,
+    size_t C_out
+);
+void cactus_conv1d_pointwise_f16_gemm(
+    const __fp16* input,
+    const __fp16* weight,
+    const __fp16* bias,
+    __fp16* output,
+    size_t N,
+    size_t L,
+    size_t C_in,
+    size_t C_out
+);
 void cactus_bilinear_interpolation_f16(const __fp16* input, __fp16* output, size_t src_height, size_t src_width, size_t embed_dim,
                                        size_t dst_height, size_t dst_width);
@@ -224,4 +343,4 @@ void cactus_lstm_cell_f16(
     size_t hidden_size
 );
-#endif
+#endif

package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h CHANGED Viewed

@@ -44,6 +44,34 @@ inline void stream_store_f16x8(__fp16* dst, float16x8_t val) {
 #endif
 }
+inline bool cpu_has_sme2() {
+#if defined(__aarch64__)
+	static std::once_flag once;
+	static bool has = false;
+	std::call_once(once, []() {
+#if defined(__APPLE__)
+	int ret = 0;
+	size_t size = sizeof(ret);
+	if (sysctlbyname("hw.optional.arm.FEAT_SME2", &ret, &size, nullptr, 0) == 0) {
+		has = ret == 1;
+	}
+#elif defined(__ANDROID__)
+	unsigned long hwcap2 = getauxval(AT_HWCAP2);
+#ifdef HWCAP2_SME2
+	has = (hwcap2 & HWCAP2_SME2) != 0;
+#endif
+#endif
+	});
+	return has;
+#else
+	return false;
+#endif
+}
 inline float32x4_t fast_exp_f32x4(float32x4_t x) {
     const float32x4_t log2e = vdupq_n_f32(1.4426950408889634f);
@@ -102,6 +130,12 @@ inline float32x4_t fast_tanh_f32x4(float32x4_t x) {
     return result;
 }
+inline void unpack_int4_as_int8x16x2(const uint8_t* ptr, int8x16_t& high_decoded, int8x16_t& low_decoded) {
+    int8x16_t packed = vreinterpretq_s8_u8(vld1q_u8(ptr));
+    high_decoded = vshrq_n_s8(packed, 4);
+    low_decoded = vshrq_n_s8(vshlq_n_s8(packed, 4), 4);
+}
 namespace CactusThreading {
     class ThreadPool {
@@ -297,7 +331,7 @@ namespace CactusThreading {
         }
         static size_t get_gemv_threads(size_t N_blocks, size_t pool_size) {
             if (N_blocks < GEMV_MIN_N_BLOCKS) return 1;
-            return std::min(pool_size, static_cast<size_t>(2));
+            return std::min(pool_size, static_cast<size_t>(3));
         }
         #else
         static constexpr size_t GEMV_MIN_N_BLOCKS = 256;
@@ -308,7 +342,7 @@ namespace CactusThreading {
         static size_t get_gemv_threads(size_t N_blocks, size_t pool_size) {
             if (N_blocks < GEMV_MIN_N_BLOCKS) return 1;
             if (N_blocks < 512) return std::min(pool_size, static_cast<size_t>(2));
-            return std::min(pool_size, static_cast<size_t>(4));
+            return std::min(pool_size, static_cast<size_t>(5));
         }
         #endif
     };
@@ -465,4 +499,4 @@ namespace CactusThreading {
 }
-#endif // KERNEL_UTILS_H
+#endif // KERNEL_UTILS_H

package/ios/cactus.xcframework/ios-arm64/cactus.framework/Info.plist CHANGED Viewed

Binary file

package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus CHANGED Viewed

Binary file

package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_cloud.h ADDED Viewed

@@ -0,0 +1,48 @@
+#ifndef CACTUS_CLOUD_H
+#define CACTUS_CLOUD_H
+#include "cactus_utils.h"
+#include <string>
+#include <vector>
+namespace cactus {
+namespace ffi {
+struct CloudResponse {
+    std::string transcript;
+    std::string api_key_hash;
+    bool used_cloud = false;
+    std::string error;
+};
+struct CloudCompletionRequest {
+    std::vector<cactus::engine::ChatMessage> messages;
+    std::vector<ToolFunction> tools;
+    std::string local_output;
+    std::vector<std::string> local_function_calls;
+    bool has_images = false;
+    std::string cloud_key;
+};
+struct CloudCompletionResult {
+    bool ok = false;
+    bool used_cloud = false;
+    std::string response;
+    std::vector<std::string> function_calls;
+    std::string error;
+};
+std::string cloud_base64_encode(const uint8_t* data, size_t len);
+std::vector<uint8_t> cloud_build_wav(const uint8_t* pcm, size_t pcm_bytes);
+std::string resolve_cloud_api_key(const char* cloud_key_param);
+CloudResponse cloud_transcribe_request(const std::string& audio_b64,
+                                       const std::string& fallback_text,
+                                       long timeout_seconds = 15L,
+                                       const char* cloud_key = nullptr);
+CloudCompletionResult cloud_complete_request(const CloudCompletionRequest& request,
+                                             long timeout_ms);
+} // namespace ffi
+} // namespace cactus
+#endif // CACTUS_CLOUD_H

package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h CHANGED Viewed

@@ -76,6 +76,16 @@ CACTUS_FFI_EXPORT int cactus_transcribe(
     size_t pcm_buffer_size
 );
+CACTUS_FFI_EXPORT int cactus_detect_language(
+    cactus_model_t model,
+    const char* audio_file_path,            // NULL if using pcm_buffer
+    char* response_buffer,
+    size_t buffer_size,
+    const char* options_json,               // optional
+    const uint8_t* pcm_buffer,              // NULL if using audio_file_path
+    size_t pcm_buffer_size
+);
 CACTUS_FFI_EXPORT cactus_stream_transcribe_t cactus_stream_transcribe_start(
     cactus_model_t model,
     const char* options_json                // optional
@@ -189,7 +199,10 @@ CACTUS_FFI_EXPORT void cactus_index_destroy(cactus_index_t index);
 CACTUS_FFI_EXPORT const char* cactus_get_last_error(void);
-CACTUS_FFI_EXPORT void cactus_set_telemetry_environment(const char* framework, const char* cache_location);
+CACTUS_FFI_EXPORT void cactus_set_telemetry_environment(const char* framework, const char* cache_location, const char* version);
+CACTUS_FFI_EXPORT void cactus_set_app_id(const char* app_id);
+CACTUS_FFI_EXPORT void cactus_telemetry_flush(void);
+CACTUS_FFI_EXPORT void cactus_telemetry_shutdown(void);
 #ifdef __cplusplus
 }