npm - cui-llama.rn - Versions diffs - 1.6.0 → 1.6.1 - Mend

cui-llama.rn 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (195) hide show

package/cpp/ggml-cpu/amx/mmq.h ADDED Viewed

@@ -0,0 +1,10 @@
+#pragma once
+#include "common.h"
+size_t lm_ggml_backend_amx_desired_wsize(const struct lm_ggml_tensor * dst);
+size_t lm_ggml_backend_amx_get_alloc_size(const struct lm_ggml_tensor * tensor);
+void lm_ggml_backend_amx_convert_weight(struct lm_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
+void lm_ggml_backend_amx_mul_mat(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);

package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h RENAMED Viewed

@@ -1,6 +1,6 @@
 #pragma once
-#include "cpu-common.h"
+#include "common.h"
 #ifdef __cplusplus
 extern "C" {

package/cpp/ggml-cpu/common.h ADDED Viewed

@@ -0,0 +1,72 @@
+#pragma once
+#include "ggml.h"
+#include "ggml-cpu-traits.h"
+#include "ggml-cpu-impl.h"
+#include "ggml-impl.h"
+#ifdef __cplusplus
+#include <utility>
+// convenience functions/macros for use in template calls
+// note: these won't be required after the 'traits' lookup table is used.
+static inline lm_ggml_fp16_t f32_to_f16(float x) {
+    return LM_GGML_FP32_TO_FP16(x);
+}
+static inline float f16_to_f32(lm_ggml_fp16_t x) {
+    return LM_GGML_FP16_TO_FP32(x);
+}
+static inline lm_ggml_bf16_t f32_to_bf16(float x) {
+    return LM_GGML_FP32_TO_BF16(x);
+}
+static inline float bf16_to_f32(lm_ggml_bf16_t x) {
+    return LM_GGML_BF16_TO_FP32(x);
+}
+static inline float f32_to_f32(float x) {
+    return x;
+}
+// TODO - merge this into the traits table, after using row-based conversions
+template <class T>
+struct type_conversion_table;
+template <>
+struct type_conversion_table<lm_ggml_fp16_t> {
+    static constexpr float (*to_f32)(lm_ggml_fp16_t) = f16_to_f32;
+    static constexpr lm_ggml_fp16_t (*from_f32)(float) = f32_to_f16;
+};
+template <>
+struct type_conversion_table<float> {
+    static constexpr float (*to_f32)(float) = f32_to_f32;
+    static constexpr float (*from_f32)(float) = f32_to_f32;
+};
+template <>
+struct type_conversion_table<lm_ggml_bf16_t> {
+    static constexpr float (*to_f32)(lm_ggml_bf16_t) = bf16_to_f32;
+    static constexpr lm_ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
+};
+static std::pair<int64_t, int64_t> get_thread_range(const struct lm_ggml_compute_params * params, const struct lm_ggml_tensor * src0) {
+    const int64_t ith = params->ith;
+    const int64_t nth = params->nth;
+    const int64_t nr  = lm_ggml_nrows(src0);
+    // rows per thread
+    const int64_t dr = (nr + nth - 1)/nth;
+    // row range for this thread
+    const int64_t ir0 = dr*ith;
+    const int64_t ir1 = MIN(ir0 + dr, nr);
+    return {ir0, ir1};
+}
+#endif