llama_cpp 0.16.1 → 0.16.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -73,9 +73,13 @@ struct rpc_tensor {
73
73
  uint64_t view_offs;
74
74
  uint64_t data;
75
75
  char name[GGML_MAX_NAME];
76
+
77
+ char padding[4];
76
78
  };
77
79
  #pragma pack(pop)
78
80
 
81
+ static_assert(sizeof(rpc_tensor) % 8 == 0, "rpc_tensor size must be multiple of 8");
82
+
79
83
  // RPC commands
80
84
  enum rpc_cmd {
81
85
  ALLOC_BUFFER = 0,
@@ -599,9 +603,8 @@ static void serialize_graph(const ggml_cgraph * cgraph, std::vector<uint8_t> & o
599
603
  int output_size = sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(rpc_tensor);
600
604
  output.resize(output_size, 0);
601
605
  memcpy(output.data(), &n_nodes, sizeof(n_nodes));
602
- uint64_t * out_nodes = (uint64_t *)(output.data() + sizeof(n_nodes));
603
606
  for (uint32_t i = 0; i < n_nodes; i++) {
604
- out_nodes[i] = reinterpret_cast<uint64_t>(cgraph->nodes[i]);
607
+ memcpy(output.data() + sizeof(n_nodes) + i * sizeof(uint64_t), &cgraph->nodes[i], sizeof(uint64_t));
605
608
  }
606
609
  uint32_t * out_ntensors = (uint32_t *)(output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t));
607
610
  *out_ntensors = n_tensors;
@@ -1036,7 +1039,9 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, std::vector<u
1036
1039
  }
1037
1040
  std::unordered_map<uint64_t, ggml_tensor*> tensor_map;
1038
1041
  for (uint32_t i = 0; i < n_nodes; i++) {
1039
- graph->nodes[i] = create_node(nodes[i], ctx, tensor_ptrs, tensor_map);
1042
+ int64_t id;
1043
+ memcpy(&id, &nodes[i], sizeof(id));
1044
+ graph->nodes[i] = create_node(id, ctx, tensor_ptrs, tensor_map);
1040
1045
  }
1041
1046
  ggml_status status = ggml_backend_graph_compute(backend, graph);
1042
1047
  // output serialization format: | status (1 byte) |