RubyGems - llama_cpp - Versions diffs - 0.9.0 → 0.9.2 - Mend

llama_cpp 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +13 -0
data/ext/llama_cpp/extconf.rb +3 -11
data/ext/llama_cpp/llama_cpp.cpp +147 -3
data/ext/llama_cpp/src/ggml-alloc.c +12 -9
data/ext/llama_cpp/src/ggml-cuda.cu +353 -119
data/ext/llama_cpp/src/ggml-cuda.h +5 -0
data/ext/llama_cpp/src/ggml-impl.h +237 -0
data/ext/llama_cpp/src/ggml-metal.m +60 -38
data/ext/llama_cpp/src/ggml-metal.metal +162 -34
data/ext/llama_cpp/src/{k_quants.c → ggml-quants.c} +3329 -1099
data/ext/llama_cpp/src/{k_quants.h → ggml-quants.h} +81 -22
data/ext/llama_cpp/src/ggml.c +1004 -3572
data/ext/llama_cpp/src/ggml.h +30 -4
data/ext/llama_cpp/src/llama.cpp +1945 -2648
data/ext/llama_cpp/src/llama.h +37 -17
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +23 -2
metadata +5 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 683f2d81aff9e82234925ba08cd5b46b56a2283ff8397a6c06ce50d34a95dbfc
-  data.tar.gz: d3005cab273b8d85f47f4cb4314fbab3a540d366a42829e5ec8d2c29576ae09e
+  metadata.gz: 66c53ea31dd93cc684d6bbc5331bb7e9f12abe2a23e6e16b8f8a3407e62961a0
+  data.tar.gz: 723d4f1d879c314d1733c84411e39d470f619a22be6a17d589406e831d8ea97b
 SHA512:
-  metadata.gz: 559f1ba1253a704c38480336decd315c65b4d80e6895ad1dc0faa3b5b81570a1faeaadcb6ec7ee3145f0fff758ab5e38e6cb8163382ce9b693d893deebe9a8f9
-  data.tar.gz: cb3d96b8c3f79cd20d4169a175270e8768c04bcaa24e51cb2c4d7872db88bc6e3349e6b1e93a130b89d21daab8be6e57b5305412059ea722084c7cb7d4a01e93
+  metadata.gz: bee0ffe56796ec8bf6240178246c7c95c38ec7cec2bd29f61c1cd85e1230291751c13da850c330fca644089ee2ff524a767b132b5bc6658e95205114e7399ba4
+  data.tar.gz: 382d05658c0a0d8df1c03dcaf93c8861bff3326e1d1e0c0cb3b0638f38cc3de5d36990b1f4df6d0bf3ce19337e9507cd5a2d196d893d8baf56d9b38a49738bc2

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,16 @@
+## [[0.9.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.9.1...v0.9.2)] - 2023-11-11
+- Bump bundled llama.cpp from b1472 to b1500.
+## [[0.9.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.9.0...v0.9.1)] - 2023-11-03
+- Bump bundled llama.cpp from b1429 to b1472
+  - Rename `kv_cahe_tokens_rm` method to `kv_cahce_clear` in Context.
+  - Add `sample_min_p method` to Context.
+  - Add `rope_scaling_type`, `rope_freq_base`, `rope_freq_scale`, `yarn_ext_factor`, `yarn_attn_factor`, `yarn_beta_fast`, `yarn_beta_slow`, and `yarn_orig_ctx` to ContextParams.
+  - Add `pure` to ModelQuantizeParams.
+  - Add contstants for RoPE scaling type.
 ## [[0.9.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.8.0...v0.9.0)] - 2023-10-28
 - Fix missing object file for ggml-backend when building with metal and cublas options.

data/ext/llama_cpp/extconf.rb CHANGED Viewed

@@ -5,7 +5,7 @@ require 'fileutils'
 abort 'libstdc++ is not found.' unless have_library('stdc++')
-$srcs = %w[ggml.c ggml-backend.c ggml-alloc.c llama.cpp llama_cpp.cpp]
+$srcs = %w[ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c llama.cpp llama_cpp.cpp]
 $srcs << 'ggml-opencl.cpp' if with_config('clblast')
 $srcs << 'ggml-mpi.c' if with_config('mpi')
 $CFLAGS << ' -w -DNDEBUG'
@@ -18,12 +18,6 @@ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>',
   $CXXFLAGS << ' -pthread'
 end
-unless with_config('no_k_quants')
-  $CFLAGS << ' -DGGML_USE_K_QUANTS'
-  $CXXFLAGS << ' -DGGML_USE_K_QUANTS'
-  $srcs << 'k_quants.c'
-end
 if with_config('qkk_64')
   $CFLAGS << ' -DGGML_QKK_64'
   $CXXFLAGS << ' -DGGML_QKK_64'
@@ -53,16 +47,14 @@ if with_config('metal')
   $CFLAGS << ' -DGGML_USE_METAL'
   $CXXFLAGS << ' -DGGML_USE_METAL'
   $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
-  $objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
-  $objs << 'k_quants.o' unless with_config('no_k_quants')
+  $objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-quants.o ggml-metal.o llama.o llama_cpp.o]
 end
 if with_config('cublas')
   $CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
   $CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
   $LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
-  $objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
-  $objs << 'k_quants.o' unless with_config('no_k_quants')
+  $objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-quants.o ggml-cuda.o llama.o llama_cpp.o]
 end
 if with_config('clblast')

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -796,10 +796,22 @@ public:
     rb_define_method(rb_cLLaMAContextParams, "n_threads", RUBY_METHOD_FUNC(_llama_context_params_get_n_threads), 0);
     rb_define_method(rb_cLLaMAContextParams, "n_threads_batch=", RUBY_METHOD_FUNC(_llama_context_params_set_n_threads_batch), 1);
     rb_define_method(rb_cLLaMAContextParams, "n_threads_batch", RUBY_METHOD_FUNC(_llama_context_params_get_n_threads_batch), 0);
+    rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_scaling_type), 1);
+    rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type", RUBY_METHOD_FUNC(_llama_context_params_get_rope_scaling_type), 0);
     rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
     rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
     rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
     rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_scale), 0);
+    rb_define_method(rb_cLLaMAContextParams, "yarn_ext_factor=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_ext_factor), 1);
+    rb_define_method(rb_cLLaMAContextParams, "yarn_ext_factor", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_ext_factor), 0);
+    rb_define_method(rb_cLLaMAContextParams, "yarn_attn_factor=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_attn_factor), 1);
+    rb_define_method(rb_cLLaMAContextParams, "yarn_attn_factor", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_attn_factor), 0);
+    rb_define_method(rb_cLLaMAContextParams, "yarn_beta_fast=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_beta_fast), 1);
+    rb_define_method(rb_cLLaMAContextParams, "yarn_beta_fast", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_fast), 0);
+    rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_beta_slow), 1);
+    rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_slow), 0);
+    rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_orig_ctx), 1);
+    rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_orig_ctx), 0);
     rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
     rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
     rb_define_method(rb_cLLaMAContextParams, "f16_kv=", RUBY_METHOD_FUNC(_llama_context_params_set_f16_kv), 1);
@@ -883,6 +895,18 @@ private:
     return INT2NUM(ptr->params.n_threads_batch);
   }
+  // rope_scaling_type
+  static VALUE _llama_context_params_set_rope_scaling_type(VALUE self, VALUE scaling_type) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    ptr->params.rope_scaling_type = NUM2INT(scaling_type);
+    return INT2NUM(ptr->params.rope_scaling_type);
+  }
+  static VALUE _llama_context_params_get_rope_scaling_type(VALUE self) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    return INT2NUM(ptr->params.rope_scaling_type);
+  }
   // rope_freq_base
   static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
     LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -907,6 +931,66 @@ private:
     return DBL2NUM(ptr->params.rope_freq_scale);
   }
+  // yarn_ext_factor
+  static VALUE _llama_context_params_set_yarn_ext_factor(VALUE self, VALUE yarn_ext_factor) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    ptr->params.yarn_ext_factor = NUM2DBL(yarn_ext_factor);
+    return DBL2NUM(ptr->params.yarn_ext_factor);
+  }
+  static VALUE _llama_context_params_get_yarn_ext_factor(VALUE self) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    return DBL2NUM(ptr->params.yarn_ext_factor);
+  }
+  // yarn_attn_factor
+  static VALUE _llama_context_params_set_yarn_attn_factor(VALUE self, VALUE yarn_attn_factor) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    ptr->params.yarn_attn_factor = NUM2DBL(yarn_attn_factor);
+    return DBL2NUM(ptr->params.yarn_attn_factor);
+  }
+  static VALUE _llama_context_params_get_yarn_attn_factor(VALUE self) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    return DBL2NUM(ptr->params.yarn_attn_factor);
+  }
+  // yarn_beta_fast
+  static VALUE _llama_context_params_set_yarn_beta_fast(VALUE self, VALUE yarn_beta_fast) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    ptr->params.yarn_beta_fast = NUM2DBL(yarn_beta_fast);
+    return DBL2NUM(ptr->params.yarn_beta_fast);
+  }
+  static VALUE _llama_context_params_get_yarn_beta_fast(VALUE self) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    return DBL2NUM(ptr->params.yarn_beta_fast);
+  }
+  // yarn_beta_slow
+  static VALUE _llama_context_params_set_yarn_beta_slow(VALUE self, VALUE yarn_beta_slow) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    ptr->params.yarn_beta_slow = NUM2DBL(yarn_beta_slow);
+    return DBL2NUM(ptr->params.yarn_beta_slow);
+  }
+  static VALUE _llama_context_params_get_yarn_beta_slow(VALUE self) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    return DBL2NUM(ptr->params.yarn_beta_slow);
+  }
+  // yarn_orig_ctx
+  static VALUE _llama_context_params_set_yarn_orig_ctx(VALUE self, VALUE yarn_orig_ctx) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    ptr->params.yarn_orig_ctx = NUM2UINT(yarn_orig_ctx);
+    return UINT2NUM(ptr->params.yarn_orig_ctx);
+  }
+  static VALUE _llama_context_params_get_yarn_orig_ctx(VALUE self) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    return UINT2NUM(ptr->params.yarn_orig_ctx);
+  }
   // mul_mat_q
   static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
     LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -1011,6 +1095,8 @@ public:
     rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_quantize_output_tensor), 0);
     rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_only_copy), 1);
     rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_only_copy), 0);
+    rb_define_method(rb_cLLaMAModelQuantizeParams, "pure=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_pure), 1);
+    rb_define_method(rb_cLLaMAModelQuantizeParams, "pure", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_pure), 0);
   }
 private:
@@ -1083,6 +1169,18 @@ private:
     LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
     return ptr->params.only_copy ? Qtrue : Qfalse;
   }
+  // pure
+  static VALUE _llama_model_quantize_params_set_pure(VALUE self, VALUE pure) {
+    LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
+    ptr->params.pure = RTEST(pure) ? true : false;
+    return ptr->params.pure ? Qtrue : Qfalse;
+  }
+  static VALUE _llama_model_quantize_params_get_pure(VALUE self) {
+    LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
+    return ptr->params.pure ? Qtrue : Qfalse;
+  }
 };
 const rb_data_type_t RbLLaMAModelQuantizeParams::llama_model_quantize_params_type = {
@@ -1741,7 +1839,7 @@ public:
     rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
     rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
     rb_define_method(rb_cLLaMAContext, "kv_cache_token_count", RUBY_METHOD_FUNC(_llama_context_kv_cache_token_count), 0);
-    rb_define_method(rb_cLLaMAContext, "kv_cache_tokens_rm", RUBY_METHOD_FUNC(_llama_context_kv_cache_tokens_rm), 2);
+    rb_define_method(rb_cLLaMAContext, "kv_cache_clear", RUBY_METHOD_FUNC(_llama_context_kv_cache_clear), 0);
     rb_define_method(rb_cLLaMAContext, "kv_cache_seq_rm", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_rm), 3);
     rb_define_method(rb_cLLaMAContext, "kv_cache_seq_cp", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_cp), 4);
     rb_define_method(rb_cLLaMAContext, "kv_cache_seq_keep", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_keep), 1);
@@ -1754,6 +1852,7 @@ public:
     rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
     rb_define_method(rb_cLLaMAContext, "sample_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_k), -1);
     rb_define_method(rb_cLLaMAContext, "sample_top_p", RUBY_METHOD_FUNC(_llama_context_sample_top_p), -1);
+    rb_define_method(rb_cLLaMAContext, "sample_min_p", RUBY_METHOD_FUNC(_llama_context_sample_min_p), -1);
     rb_define_method(rb_cLLaMAContext, "sample_tail_free", RUBY_METHOD_FUNC(_llama_context_sample_tail_free), -1);
     rb_define_method(rb_cLLaMAContext, "sample_typical", RUBY_METHOD_FUNC(_llama_context_sample_typical), -1);
     rb_define_method(rb_cLLaMAContext, "sample_temp", RUBY_METHOD_FUNC(_llama_context_sample_temp), -1);
@@ -2032,13 +2131,13 @@ private:
     return INT2NUM(llama_get_kv_cache_token_count(ptr->ctx));
   }
-  static VALUE _llama_context_kv_cache_tokens_rm(VALUE self, VALUE c0, VALUE c1) {
+  static VALUE _llama_context_kv_cache_clear(VALUE self) {
     LLaMAContextWrapper* ptr = get_llama_context(self);
     if (ptr->ctx == NULL) {
       rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
       return Qnil;
     }
-    llama_kv_cache_tokens_rm(ptr->ctx, NUM2INT(c0), NUM2INT(c1));
+    llama_kv_cache_clear(ptr->ctx);
     return Qnil;
   }
@@ -2386,6 +2485,45 @@ private:
     return Qnil;
   }
+  static VALUE _llama_context_sample_min_p(int argc, VALUE* argv, VALUE self) {
+    VALUE kw_args = Qnil;
+    ID kw_table[2] = { rb_intern("prob"), rb_intern("min_keep") };
+    VALUE kw_values[2] = { Qundef, Qundef };
+    VALUE candidates = Qnil;
+    rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
+    rb_get_kwargs(kw_args, kw_table, 1, 1, kw_values);
+    if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
+      rb_raise(rb_eArgError, "1st argument must be a TokenDataArray");
+      return Qnil;
+    }
+    if (!RB_FLOAT_TYPE_P(kw_values[0])) {
+      rb_raise(rb_eArgError, "prob must be a float");
+      return Qnil;
+    }
+    if (kw_values[1] != Qundef && !RB_INTEGER_TYPE_P(kw_values[1])) {
+      rb_raise(rb_eArgError, "min_keep must be an integer");
+      return Qnil;
+    }
+    LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
+    if (ctx_ptr->ctx == NULL) {
+      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
+      return Qnil;
+    }
+    LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
+    if (cnd_ptr->array.data == nullptr) {
+      rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
+      return Qnil;
+    }
+    const float prob = NUM2DBL(kw_values[0]);
+    const size_t min_keep = kw_values[1] != Qundef ? NUM2SIZET(kw_values[1]) : 1;
+    llama_sample_min_p(ctx_ptr->ctx, &(cnd_ptr->array), prob, min_keep);
+    return Qnil;
+  }
   static VALUE _llama_context_sample_tail_free(int argc, VALUE* argv, VALUE self) {
     VALUE kw_args = Qnil;
     ID kw_table[2] = { rb_intern("z"), rb_intern("min_keep") };
@@ -2881,6 +3019,12 @@ extern "C" void Init_llama_cpp(void) {
   rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
   rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
+  rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_UNSPECIFIED));
+  rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_NONE", INT2NUM(LLAMA_ROPE_SCALING_NONE));
+  rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_LINEAR", INT2NUM(LLAMA_ROPE_SCALING_LINEAR));
+  rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_YARN", INT2NUM(LLAMA_ROPE_SCALING_YARN));
+  rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_MAX_VALUE", INT2NUM(LLAMA_ROPE_SCALING_MAX_VALUE));
   std::stringstream ss_magic;
   ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
   rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));

data/ext/llama_cpp/src/ggml-alloc.c CHANGED Viewed

@@ -378,9 +378,13 @@ static bool ggml_op_can_inplace(enum ggml_op op) {
     }
 }
-static void init_view(struct ggml_allocr * alloc, struct ggml_tensor * view) {
+static void init_view(struct ggml_allocr * alloc, struct ggml_tensor * view, bool update_backend) {
     assert(view->view_src != NULL && view->view_src->data != NULL);
-    view->backend = view->view_src->backend;
+    if (update_backend) {
+        view->backend = view->view_src->backend;
+    }
     view->buffer  = view->view_src->buffer;
     view->data    = (char *)view->view_src->data + view->view_offs;
@@ -394,7 +398,7 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
     struct hash_node * ht = alloc->hash_table;
     if (node->data == NULL) {
         if (ggml_is_view(node)) {
-            init_view(alloc, node);
+            init_view(alloc, node, true);
         } else {
             // see if we can reuse a parent's buffer (inplace)
             if (ggml_op_can_inplace(node->op)) {
@@ -424,15 +428,14 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
                                 AT_PRINTF("reusing view parent %s (%s) for %s\n", parent->name, view_src->name, node->name);
                                 node->view_src = view_src;
                                 view_src_hn->n_views += 1;
-                                init_view(alloc, node);
+                                init_view(alloc, node, false);
                                 return;
                             }
-                        }
-                        else {
+                        } else {
                             AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
                             node->view_src = parent;
                             p_hn->n_views += 1;
-                            init_view(alloc, node);
+                            init_view(alloc, node, false);
                             return;
                         }
                     }
@@ -463,7 +466,7 @@ size_t ggml_allocr_alloc_graph_n(
                 hash_get(ht, view_src)->n_views += 1;
                 if (node->buffer == NULL && node->data != NULL) {
                     // view of a pre-allocated tensor, didn't call init_view() yet
-                    init_view(alloc, node);
+                    init_view(alloc, node, true);
                 }
             }
@@ -474,7 +477,7 @@ size_t ggml_allocr_alloc_graph_n(
                 }
                 hash_get(ht, parent)->n_children += 1;
                 if (ggml_is_view(parent) && parent->buffer == NULL && parent->data != NULL) {
-                    init_view(alloc, parent);
+                    init_view(alloc, parent, true);
                 }
             }
         }