llama_cpp 0.16.1 → 0.16.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +12 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- data/vendor/tmp/llama.cpp/Makefile +10 -2
- data/vendor/tmp/llama.cpp/ggml-backend.c +14 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +10 -10
- data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +28 -0
- data/vendor/tmp/llama.cpp/ggml-impl.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +6 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +982 -368
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +8 -3
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +2124 -13202
- data/vendor/tmp/llama.cpp/ggml-sycl.h +1 -10
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +27564 -23876
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +278 -366
- data/vendor/tmp/llama.cpp/ggml.c +67 -150
- data/vendor/tmp/llama.cpp/ggml.h +6 -0
- data/vendor/tmp/llama.cpp/llama.cpp +530 -237
- data/vendor/tmp/llama.cpp/llama.h +5 -1
- data/vendor/tmp/llama.cpp/sgemm.cpp +2 -0
- data/vendor/tmp/llama.cpp/unicode-data.cpp +851 -801
- data/vendor/tmp/llama.cpp/unicode.cpp +33 -19
- data/vendor/tmp/llama.cpp/unicode.h +1 -1
- metadata +2 -2
@@ -73,9 +73,13 @@ struct rpc_tensor {
|
|
73
73
|
uint64_t view_offs;
|
74
74
|
uint64_t data;
|
75
75
|
char name[GGML_MAX_NAME];
|
76
|
+
|
77
|
+
char padding[4];
|
76
78
|
};
|
77
79
|
#pragma pack(pop)
|
78
80
|
|
81
|
+
static_assert(sizeof(rpc_tensor) % 8 == 0, "rpc_tensor size must be multiple of 8");
|
82
|
+
|
79
83
|
// RPC commands
|
80
84
|
enum rpc_cmd {
|
81
85
|
ALLOC_BUFFER = 0,
|
@@ -599,9 +603,8 @@ static void serialize_graph(const ggml_cgraph * cgraph, std::vector<uint8_t> & o
|
|
599
603
|
int output_size = sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(rpc_tensor);
|
600
604
|
output.resize(output_size, 0);
|
601
605
|
memcpy(output.data(), &n_nodes, sizeof(n_nodes));
|
602
|
-
uint64_t * out_nodes = (uint64_t *)(output.data() + sizeof(n_nodes));
|
603
606
|
for (uint32_t i = 0; i < n_nodes; i++) {
|
604
|
-
|
607
|
+
memcpy(output.data() + sizeof(n_nodes) + i * sizeof(uint64_t), &cgraph->nodes[i], sizeof(uint64_t));
|
605
608
|
}
|
606
609
|
uint32_t * out_ntensors = (uint32_t *)(output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t));
|
607
610
|
*out_ntensors = n_tensors;
|
@@ -1036,7 +1039,9 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, std::vector<u
|
|
1036
1039
|
}
|
1037
1040
|
std::unordered_map<uint64_t, ggml_tensor*> tensor_map;
|
1038
1041
|
for (uint32_t i = 0; i < n_nodes; i++) {
|
1039
|
-
|
1042
|
+
int64_t id;
|
1043
|
+
memcpy(&id, &nodes[i], sizeof(id));
|
1044
|
+
graph->nodes[i] = create_node(id, ctx, tensor_ptrs, tensor_map);
|
1040
1045
|
}
|
1041
1046
|
ggml_status status = ggml_backend_graph_compute(backend, graph);
|
1042
1047
|
// output serialization format: | status (1 byte) |
|