PyPI - llama-cpp-bin - Versions diffs - 9660.0.0__tar.gz → 9670.0.0__tar.gz - Mend

llama-cpp-bin 9660.0.0tar.gz → 9670.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1038) hide show

{llama_cpp_bin-9660.0.0 → llama_cpp_bin-9670.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama-cpp-bin
-Version: 9660.0.0
+Version: 9670.0.0
 Summary: llama.cpp server binary built from source
 Author-email: Vladyslav Tkachenko <vladlearns@gmail.com>
 License: AGPL-3.0-or-later

{llama_cpp_bin-9660.0.0 → llama_cpp_bin-9670.0.0}/llama.cpp/common/speculative.cpp RENAMED Viewed

@@ -140,6 +140,8 @@ struct common_speculative_impl {
     size_t n_gen_tokens = 0; // number of tokens generated by this implementation.
     size_t n_acc_tokens = 0; // number of tokens accepted by the target model.
+    std::vector<size_t> n_acc_tokens_per_pos; // number of tokens accepted per draft position.
     // TODO: track performance of most recent calls
     const bool gen_perf = true; // whether to generate performance stats.
@@ -416,6 +418,9 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
     std::vector<common_sampler_ptr> smpls;
+    // backend sampler chain per seq, attached to ctx_dft
+    std::vector<llama_sampler *> backend_chains;
     int32_t n_embd_dec = 0;       // draft hidden size
     int32_t n_embd_enc = 0;       // target_layer_ids_n * target_hidden_size
     int32_t n_embd_tgt = 0;       // target model hidden size
@@ -441,7 +446,7 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
         , params(params.draft)
     {
         LOG_INF("%s: adding speculative implementation 'draft-eagle3'\n", __func__);
-        LOG_INF("%s: - n_max=%d, n_min=%d, p_min=%f\n", __func__, params.draft.n_max, params.draft.n_min, params.draft.p_min);
+        LOG_INF("%s: - n_max=%d, n_min=%d, p_min=%f, backend_sampling=%d\n", __func__, params.draft.n_max, params.draft.n_min, params.draft.p_min, (int) params.draft.backend_sampling);
         auto * ctx_tgt = this->params.ctx_tgt;
         auto * ctx_dft = this->params.ctx_dft;
@@ -476,6 +481,22 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
             s.reset(common_sampler_init(llama_get_model(ctx_dft), sparams));
         }
+        // offload draft sampling to the backend
+        backend_chains.assign(n_seq, nullptr);
+        if (this->params.backend_sampling) {
+            for (llama_seq_id seq_id = 0; seq_id < (llama_seq_id) n_seq; ++seq_id) {
+                llama_sampler * chain = llama_sampler_chain_init(llama_sampler_chain_default_params());
+                llama_sampler_chain_add(chain, llama_sampler_init_top_k(10));
+                if (!llama_set_sampler(ctx_dft, seq_id, chain)) {
+                    LOG_WRN("%s: backend offload failed for seq_id=%d; using CPU sampler\n", __func__, (int) seq_id);
+                    llama_sampler_free(chain);
+                    chain = nullptr;
+                }
+                backend_chains[seq_id] = chain;
+            }
+        }
         // turn on extraction of the target layers' input embeddings
         for (uint32_t k = 0; k < target_layer_ids_n; ++k) {
             llama_set_embeddings_layer_inp(ctx_tgt, (uint32_t) target_layer_ids[k], true);
@@ -494,6 +515,18 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
     }
     ~common_speculative_impl_draft_eagle3() override {
+        auto * ctx_dft = this->params.ctx_dft;
+        for (llama_seq_id seq_id = 0; seq_id < (llama_seq_id) backend_chains.size(); ++seq_id) {
+            if (backend_chains[seq_id] == nullptr) {
+                continue;
+            }
+            if (ctx_dft) {
+                llama_set_sampler(ctx_dft, seq_id, nullptr);
+            }
+            llama_sampler_free(backend_chains[seq_id]);
+        }
+        backend_chains.clear();
         if (batch.token != nullptr) {
             free(batch.token);
             batch.token = nullptr;
@@ -2059,6 +2092,15 @@ void common_speculative_accept(common_speculative * spec, llama_seq_id seq_id, u
     {
         common_time_meas tm(impl->t_accept_us, !impl->gen_perf);
+        if (impl->n_acc_tokens_per_pos.size() < n_accepted) {
+            impl->n_acc_tokens_per_pos.resize(n_accepted, 0);
+        }
+        for (size_t i = 0; i < n_accepted; ++i) {
+            impl->n_acc_tokens_per_pos[i]++;
+        }
         if (n_accepted > 0) {
             impl->n_acc_drafts++;
             impl->n_acc_tokens += n_accepted;
@@ -2093,13 +2135,31 @@ void common_speculative_print_stats(const common_speculative * spec) {
             str_perf = "";
         }
-        LOG_INF("statistics %16s: #calls(b,g,a) = %4zu %6zu %6zu, #gen drafts = %6zu, #acc drafts = %5zu, #gen tokens = %6zu, #acc tokens = %5zu%s\n",
+        std::string str_stats;
+        if (impl->n_call_accept > 0) {
+            const double mean =
+                1.0 + (double) impl->n_acc_tokens / (double) impl->n_call_accept;
+            std::ostringstream tmp;
+            tmp << std::fixed << std::setprecision(3);
+            for (size_t i = 0; i < impl->n_acc_tokens_per_pos.size(); ++i) {
+                if (i > 0) {
+                    tmp << ", ";
+                }
+                tmp << (double) impl->n_acc_tokens_per_pos[i] / (double) impl->n_call_accept;
+            }
+            std::ostringstream oss;
+            oss << std::fixed << std::setprecision(2) << mean;
+            str_stats = ", #mean acc len = " + oss.str() + ", #acc rate/pos = (" + tmp.str() + ")";
+        }
+        LOG_INF("statistics %16s: #calls(b,g,a) = %4zu %6zu %6zu, #gen drafts = %6zu, #acc drafts = %5zu, #gen tokens = %6zu, #acc tokens = %5zu%s%s\n",
                 common_speculative_type_to_str(impl->type).c_str(),
                 impl->n_call_begin, impl->n_call_draft, impl->n_call_accept,
                 impl->n_gen_drafts,
                 impl->n_acc_drafts,
                 impl->n_gen_tokens,
                 impl->n_acc_tokens,
+                str_stats.c_str(),
                 str_perf.c_str());
     }
 }

{llama_cpp_bin-9660.0.0 → llama_cpp_bin-9670.0.0}/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp RENAMED Viewed

@@ -287,6 +287,13 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t
              ne10, ne11, ne12, ne13, ne0, ne1, ne2, ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb0, nb1, nb2,
              nb3, ggml_is_contiguous(src0), ggml_is_contiguous(src1), ggml_is_permuted(src0), ggml_is_permuted(src1),
              main_stream);
+#ifdef GGML_SYCL_HAS_BF16
+    } else if (src0->type == GGML_TYPE_BF16 && src1->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_BF16) {
+        op()((const sycl::ext::oneapi::bfloat16 *) src0->data, (const sycl::ext::oneapi::bfloat16 *) src1->data,
+             (sycl::ext::oneapi::bfloat16 *) dst->data, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, ne0, ne1, ne2,
+             ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb0, nb1, nb2, nb3, ggml_is_contiguous(src0),
+             ggml_is_contiguous(src1), ggml_is_permuted(src0), ggml_is_permuted(src1), main_stream);
+#endif
     } else {
         fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__, ggml_type_name(dst->type),
                 ggml_type_name(src0->type), ggml_type_name(src1->type));

{llama_cpp_bin-9660.0.0 → llama_cpp_bin-9670.0.0}/llama.cpp/ggml/src/ggml-sycl/concat.cpp RENAMED Viewed

@@ -10,6 +10,8 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
+#include "ggml.h"
 #include "concat.hpp"
 static inline size_t elem_size(ggml_type t) {
@@ -192,11 +194,29 @@ void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
     case GGML_TYPE_F32:
         concat_impl_sycl<float>(ctx, dst);
         break;
+    case GGML_TYPE_F16:
+        concat_impl_sycl<sycl::half>(ctx, dst);
+        break;
+#ifdef GGML_SYCL_HAS_BF16
+    case GGML_TYPE_BF16:
+        concat_impl_sycl<sycl::ext::oneapi::bfloat16>(ctx, dst);
+        break;
+#endif
     case GGML_TYPE_I32:
         concat_impl_sycl<int32_t>(ctx, dst);
         break;
+    case GGML_TYPE_I16:
+        concat_impl_sycl<int16_t>(ctx, dst);
+        break;
+    case GGML_TYPE_I64:
+        concat_impl_sycl<int64_t>(ctx, dst);
+        break;
+    case GGML_TYPE_I8:
+        concat_impl_sycl<int8_t>(ctx, dst);
+        break;
     default:
-    GGML_ASSERT(false && "ggml_sycl_op_concat: unsupported type");
+        fprintf(stderr, "%s: unsupported types: dst: %s\n", __func__, ggml_type_name(dst->type));
+        GGML_ASSERT(false);
     break;
     }
 }

{llama_cpp_bin-9660.0.0 → llama_cpp_bin-9670.0.0}/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp RENAMED Viewed

@@ -1022,6 +1022,120 @@ static void dequantize_mul_mat_vec_q5_k(const void *__restrict__ vx,
     }
 }
+static void dequantize_mul_mat_vec_q5_k_reorder(const void *__restrict__ vx,
+                                                const float *__restrict__ yy,
+                                                float *__restrict__ dst,
+                                                const int ncols, int nrows,
+                                                const sycl::nd_item<3> &item_ct1) {
+    const int row = item_ct1.get_group(2);
+    const int num_blocks_per_row = ncols / QK_K;
+    const int ib0 = row*num_blocks_per_row;
+    // SOA base pointers for the reordered layout:
+    //   [qs: nb * QK_K/2] [qh: nb * QK_K/8] [scales: nb * K_SCALE_SIZE] [dm: nb * sizeof(half2)]
+    const int nb = nrows * num_blocks_per_row;
+    const uint8_t     * qs_base     = (const uint8_t *)vx;
+    const uint8_t     * qh_base     = qs_base + (size_t)nb * (QK_K / 2);
+    const uint8_t     * scales_base = qh_base + (size_t)nb * (QK_K / 8);
+    const sycl::half2 * dm_base     = (const sycl::half2 *)(scales_base + (size_t)nb * K_SCALE_SIZE);
+    float tmp = 0; // partial sum for thread in warp
+#if QK_K == 256
+    const uint16_t kmask1 = 0x3f3f;
+    const uint16_t kmask2 = 0x0f0f;
+    const uint16_t kmask3 = 0xc0c0;
+    const int tid = item_ct1.get_local_id(2) / 2; // 0...15
+    const int ix = item_ct1.get_local_id(2) % 2;
+    const int il  = tid/4;     // 0...3
+    const int ir  = tid - 4*il;// 0...3
+    const int n   = 2;
+    const int im = il/2;  // 0 or 1. 0 computes 0,32 + 128,160, 1 computes 64,96 + 192,224
+    const int in = il%2;
+    const int l0 = n*(2*ir + in);
+    const int q_offset = 32*im + l0;
+    const int y_offset = 64*im + l0;
+    const uint8_t hm1  = 1 << (2*im);
+    const uint8_t hm2  = hm1 << 4;
+    uint16_t aux[4];
+    const uint8_t * sc = (const uint8_t *)aux;
+    uint16_t q16[8];
+    const uint8_t * q4 = (const uint8_t *)q16;
+    for (int i = ix; i < num_blocks_per_row; i += 2) {
+        const int bi = ib0 + i;
+        const uint8_t * ql1 = qs_base + bi * (QK_K / 2) + q_offset;
+        const uint8_t * qh  = qh_base + bi * (QK_K / 8) + l0;
+        const float   * y1  = yy + i*QK_K + y_offset;
+        const float   * y2  = y1 + 128;
+        const sycl::half2 dm_val = dm_base[bi];
+        const float dall = dm_val[0];
+        const float dmin = dm_val[1];
+        const uint16_t * a = (const uint16_t *)(scales_base + bi * K_SCALE_SIZE);
+        aux[0] = a[im+0] & kmask1;
+        aux[1] = a[im+2] & kmask1;
+        aux[2] = ((a[im+4] >> 0) & kmask2) | ((a[im+0] & kmask3) >> 2);
+        aux[3] = ((a[im+4] >> 4) & kmask2) | ((a[im+2] & kmask3) >> 2);
+        sycl::float4 sum = {0.f, 0.f, 0.f, 0.f};
+        float smin = 0;
+        const uint16_t * q1 = (const uint16_t *)ql1;
+        const uint16_t * q2 = q1 + 32;
+        q16[0] = q1[0] & 0x0f0f;
+        q16[1] = q1[8] & 0x0f0f;
+        q16[2] = (q1[0] >> 4) & 0x0f0f;
+        q16[3] = (q1[8] >> 4) & 0x0f0f;
+        q16[4] = q2[0] & 0x0f0f;
+        q16[5] = q2[8] & 0x0f0f;
+        q16[6] = (q2[0] >> 4) & 0x0f0f;
+        q16[7] = (q2[8] >> 4) & 0x0f0f;
+        for (int l = 0; l < n; ++l) {
+            sum.x() +=
+                y1[l + 0] * (q4[l + 0] + (qh[l + 0] & (hm1 << 0) ? 16 : 0)) +
+                y1[l + 16] * (q4[l + 2] + (qh[l + 16] & (hm1 << 0) ? 16 : 0));
+            sum.y() +=
+                y1[l + 32] * (q4[l + 4] + (qh[l + 0] & (hm1 << 1) ? 16 : 0)) +
+                y1[l + 48] * (q4[l + 6] + (qh[l + 16] & (hm1 << 1) ? 16 : 0));
+            sum.z() +=
+                y2[l + 0] * (q4[l + 8] + (qh[l + 0] & (hm2 << 0) ? 16 : 0)) +
+                y2[l + 16] * (q4[l + 10] + (qh[l + 16] & (hm2 << 0) ? 16 : 0));
+            sum.w() +=
+                y2[l + 32] * (q4[l + 12] + (qh[l + 0] & (hm2 << 1) ? 16 : 0)) +
+                y2[l + 48] * (q4[l + 14] + (qh[l + 16] & (hm2 << 1) ? 16 : 0));
+            smin += (y1[l] + y1[l+16]) * sc[2] + (y1[l+32] + y1[l+48]) * sc[3]
+                  + (y2[l] + y2[l+16]) * sc[6] + (y2[l+32] + y2[l+48]) * sc[7];
+        }
+        tmp += dall * (sum.x() * sc[0] + sum.y() * sc[1] + sum.z() * sc[4] +
+                       sum.w() * sc[5]) -
+               dmin * smin;
+    }
+#else
+    // The reordered Q5_K layout is only produced for QK_K == 256.
+#endif
+    // sum up partial sums and write back result
+#pragma unroll
+    for (int mask = QK_WARP_SIZE / 2; mask > 0; mask >>= 1) {
+        tmp +=
+            dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), tmp, mask);
+    }
+    if (item_ct1.get_local_id(2) == 0) {
+        dst[row] = tmp;
+    }
+}
 static void dequantize_mul_mat_vec_q6_k(const void * __restrict__ vx, const float * __restrict__ yy, float * __restrict__ dst, const int ncols, int nrows,
                                         const sycl::nd_item<3> &item_ct1) {
@@ -1599,6 +1713,19 @@ static void dequantize_mul_mat_vec_q4_K_sycl_reorder(const void *vx, const float
         });
 }
+static void dequantize_mul_mat_vec_q5_K_sycl_reorder(const void *vx, const float *y,
+                                                     float *dst, const int ncols,
+                                                     const int nrows,
+                                                     dpct::queue_ptr stream) {
+    GGML_ASSERT(ncols % QK_K == 0);
+    const sycl::range<3> block_dims(1, 1, QK_WARP_SIZE);
+    stream->parallel_for(
+        sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims, block_dims),
+        [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(QK_WARP_SIZE)]] {
+            dequantize_mul_mat_vec_q5_k_reorder(vx, y, dst, ncols, nrows, item_ct1);
+        });
+}
 static void dequantize_mul_mat_vec_q6_K_sycl_reorder(const void *vx, const float *y,
                                                      float *dst, const int ncols,
                                                      const int nrows,
@@ -1695,7 +1822,12 @@ void ggml_sycl_op_dequantize_mul_mat_vec(
             }
             break;
         case GGML_TYPE_Q5_K:
-            dequantize_mul_mat_vec_q5_K_sycl(src0_dd_i, src1_ddf_i, dst_dd_i, ne00, row_diff, stream);
+            if ((ggml_tensor_extra_gpu *) dst->src[0]->extra &&
+                ((ggml_tensor_extra_gpu *) dst->src[0]->extra)->optimized_feature.reorder) {
+                dequantize_mul_mat_vec_q5_K_sycl_reorder(src0_dd_i, src1_ddf_i, dst_dd_i, ne00, row_diff, stream);
+            } else {
+                dequantize_mul_mat_vec_q5_K_sycl(src0_dd_i, src1_ddf_i, dst_dd_i, ne00, row_diff, stream);
+            }
             break;
         case GGML_TYPE_Q6_K:
             if ((ggml_tensor_extra_gpu *) dst->src[0]->extra &&

{llama_cpp_bin-9660.0.0 → llama_cpp_bin-9670.0.0}/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp RENAMED Viewed

@@ -124,6 +124,11 @@ static __dpct_inline__ T op_exp(T x) {
     return sycl::exp(x);
 }
+template<typename T>
+static __dpct_inline__ T op_expm1(T x) {
+    return sycl::expm1(x);
+}
 template<typename T>
 static __dpct_inline__ T op_log(T x) {
     if (x <= static_cast<T>(0)) {
@@ -266,13 +271,6 @@ static void unary_op_clamp_kernel(const T * x, T * dst, const int k, const sycl:
     }
 }
-template<typename T>
-static void unary_op_floor_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
-    SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
-        dst[i] = op_floor(x[i]);
-    }
-}
 template<typename T>
 static void unary_op_ceil_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
     SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
@@ -280,20 +278,6 @@ static void unary_op_ceil_kernel(const T * x, T * dst, const int k, const sycl::
     }
 }
-template<typename T>
-static void unary_op_round_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
-    SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
-        dst[i] = op_round(x[i]);
-    }
-}
-template<typename T>
-static void unary_op_trunc_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
-    SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
-        dst[i] = op_trunc(x[i]);
-    }
-}
 template<typename T>
 static void clamp(const T * x, T * dst, const float min, const float max, const int k,
                       const sycl::nd_item<1> &item_ct1) {
@@ -605,6 +589,12 @@ static inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor
     });
 }
+static inline void ggml_sycl_op_expm1(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    ggml_sycl_detail::ggml_sycl_op_unary(ctx, dst, [](auto x) {
+        return op_expm1(x);
+    });
+}
 static inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
         [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
@@ -728,16 +718,9 @@ static inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tens
 }
 static inline void ggml_sycl_op_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
-    ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
-        [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
-            const int num_blocks = ceil_div(k_elements, 256);
-            stream->parallel_for(
-                sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
-                                  sycl::range<1>(256)),
-                [=](sycl::nd_item<1> item_ct1) {
-                    unary_op_floor_kernel(src, dst_ptr, k_elements, item_ct1);
-                });
-        });
+    ggml_sycl_detail::ggml_sycl_op_unary(ctx, dst, [](auto x) {
+        return op_floor(x);
+    });
 }
 static inline void ggml_sycl_op_ceil(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
@@ -747,29 +730,15 @@ static inline void ggml_sycl_op_ceil(ggml_backend_sycl_context & ctx, ggml_tenso
 }
 static inline void ggml_sycl_op_round(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
-    ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
-        [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
-            const int num_blocks = ceil_div(k_elements, 256);
-            stream->parallel_for(
-                sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
-                                  sycl::range<1>(256)),
-                [=](sycl::nd_item<1> item_ct1) {
-                    unary_op_round_kernel(src, dst_ptr, k_elements, item_ct1);
-                });
-        });
+    ggml_sycl_detail::ggml_sycl_op_unary(ctx, dst, [](auto x) {
+        return op_round(x);
+    });
 }
 static inline void ggml_sycl_op_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
-    ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
-        [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
-            const int num_blocks = ceil_div(k_elements, 256);
-            stream->parallel_for(
-                sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
-                                  sycl::range<1>(256)),
-                [=](sycl::nd_item<1> item_ct1) {
-                    unary_op_trunc_kernel(src, dst_ptr, k_elements, item_ct1);
-                });
-        });
+    ggml_sycl_detail::ggml_sycl_op_unary(ctx, dst, [](auto x) {
+        return op_trunc(x);
+    });
 }
 static inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
@@ -1018,6 +987,11 @@ void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     ggml_sycl_op_exp(ctx, dst);
 }
+void ggml_sycl_expm1(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
+    ggml_sycl_op_expm1(ctx, dst);
+}
 void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
     ggml_sycl_op_log(ctx, dst);

{llama_cpp_bin-9660.0.0 → llama_cpp_bin-9670.0.0}/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp RENAMED Viewed

@@ -59,6 +59,8 @@ void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+void ggml_sycl_expm1(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 void ggml_sycl_softplus(ggml_backend_sycl_context & ctx, ggml_tensor * dst);

llama-cpp-bin 9660.0.0__tar.gz → 9670.0.0__tar.gz

llama-cpp-bin 9660.0.0tar.gz → 9670.0.0tar.gz