RubyGems - llama_cpp - Versions diffs - 0.16.1 → 0.16.2 - Mend

llama_cpp 0.16.1 → 0.16.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +7 -0
data/ext/llama_cpp/extconf.rb +1 -0
data/ext/llama_cpp/llama_cpp.cpp +12 -0
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +2 -0
data/vendor/tmp/llama.cpp/Makefile +10 -2
data/vendor/tmp/llama.cpp/ggml-backend.c +14 -3
data/vendor/tmp/llama.cpp/ggml-backend.h +3 -0
data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +10 -10
data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +1 -1
data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +28 -0
data/vendor/tmp/llama.cpp/ggml-impl.h +1 -1
data/vendor/tmp/llama.cpp/ggml-metal.m +6 -0
data/vendor/tmp/llama.cpp/ggml-quants.c +982 -368
data/vendor/tmp/llama.cpp/ggml-rpc.cpp +8 -3
data/vendor/tmp/llama.cpp/ggml-sycl.cpp +2124 -13202
data/vendor/tmp/llama.cpp/ggml-sycl.h +1 -10
data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +27564 -23876
data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +278 -366
data/vendor/tmp/llama.cpp/ggml.c +67 -150
data/vendor/tmp/llama.cpp/ggml.h +6 -0
data/vendor/tmp/llama.cpp/llama.cpp +530 -237
data/vendor/tmp/llama.cpp/llama.h +5 -1
data/vendor/tmp/llama.cpp/sgemm.cpp +2 -0
data/vendor/tmp/llama.cpp/unicode-data.cpp +851 -801
data/vendor/tmp/llama.cpp/unicode.cpp +33 -19
data/vendor/tmp/llama.cpp/unicode.h +1 -1
metadata +2 -2

data/vendor/tmp/llama.cpp/ggml-rpc.cpp CHANGED Viewed

@@ -73,9 +73,13 @@ struct rpc_tensor {
     uint64_t view_offs;
     uint64_t data;
     char name[GGML_MAX_NAME];
+    char padding[4];
 };
 #pragma pack(pop)
+static_assert(sizeof(rpc_tensor) % 8 == 0, "rpc_tensor size must be multiple of 8");
 // RPC commands
 enum rpc_cmd {
     ALLOC_BUFFER = 0,
@@ -599,9 +603,8 @@ static void serialize_graph(const ggml_cgraph * cgraph, std::vector<uint8_t> & o
     int output_size = sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(rpc_tensor);
     output.resize(output_size, 0);
     memcpy(output.data(), &n_nodes, sizeof(n_nodes));
-    uint64_t * out_nodes = (uint64_t *)(output.data() + sizeof(n_nodes));
     for (uint32_t i = 0; i < n_nodes; i++) {
-        out_nodes[i] = reinterpret_cast<uint64_t>(cgraph->nodes[i]);
+        memcpy(output.data() + sizeof(n_nodes) + i * sizeof(uint64_t), &cgraph->nodes[i], sizeof(uint64_t));
     }
     uint32_t * out_ntensors = (uint32_t *)(output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t));
     *out_ntensors = n_tensors;
@@ -1036,7 +1039,9 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, std::vector<u
     }
     std::unordered_map<uint64_t, ggml_tensor*> tensor_map;
     for (uint32_t i = 0; i < n_nodes; i++) {
-        graph->nodes[i] = create_node(nodes[i], ctx, tensor_ptrs, tensor_map);
+        int64_t id;
+        memcpy(&id, &nodes[i], sizeof(id));
+        graph->nodes[i] = create_node(id, ctx, tensor_ptrs, tensor_map);
     }
     ggml_status status = ggml_backend_graph_compute(backend, graph);
     // output serialization format: | status (1 byte) |