llama_cpp 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1167,7 +1167,7 @@ size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct g
1167
1167
  return 0;
1168
1168
  }
1169
1169
 
1170
- void ggml_cl_transform_tensor(ggml_tensor * tensor) {
1170
+ void ggml_cl_transform_tensor(void * data, ggml_tensor * tensor) {
1171
1171
  const int64_t ne0 = tensor->ne[0];
1172
1172
  const int64_t ne1 = tensor->ne[1];
1173
1173
  const int64_t ne2 = tensor->ne[2];
@@ -1179,6 +1179,7 @@ void ggml_cl_transform_tensor(ggml_tensor * tensor) {
1179
1179
  size_t q_size;
1180
1180
  cl_mem dst = ggml_cl_pool_malloc(q_sz, &q_size);
1181
1181
 
1182
+ tensor->data = data;
1182
1183
  // copy tensor to device
1183
1184
  for (int64_t i3 = 0; i3 < ne3; i3++) {
1184
1185
  for (int64_t i2 = 0; i2 < ne2; i2++) {
@@ -1190,35 +1191,5 @@ void ggml_cl_transform_tensor(ggml_tensor * tensor) {
1190
1191
  CL_CHECK(clFinish(queue));
1191
1192
 
1192
1193
  tensor->data = dst;
1193
- tensor->backend = GGML_BACKEND_GPU;
1194
- }
1195
-
1196
- void ggml_cl_load_data(const char * fname, struct ggml_tensor * tensor, const size_t offset) {
1197
- cl_int err;
1198
- FILE * fp = fopen(fname, "rb");
1199
-
1200
- const size_t size = ggml_nbytes(tensor);
1201
-
1202
- cl_mem dst;
1203
- CL_CHECK((dst = clCreateBuffer(context, CL_MEM_READ_ONLY, size, nullptr, &err), err));
1204
- void * buf_host = malloc(size);
1205
-
1206
- #ifdef _WIN32
1207
- int ret = _fseeki64(fp, (__int64) offset, SEEK_SET);
1208
- #else
1209
- int ret = fseek(fp, (long) offset, SEEK_SET);
1210
- #endif
1211
- GGML_ASSERT(ret == 0); // same
1212
-
1213
- size_t ret2 = fread(buf_host, size, 1, fp);
1214
- if (ret2 != 1) {
1215
- fprintf(stderr, "unexpectedly reached end of file");
1216
- exit(1);
1217
- }
1218
-
1219
- clEnqueueWriteBuffer(queue, dst, CL_TRUE, 0, size, buf_host, 0, nullptr, nullptr);
1220
-
1221
- tensor->data = dst;
1222
- free(buf_host);
1223
- fclose(fp);
1194
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
1224
1195
  }
@@ -18,8 +18,7 @@ void ggml_cl_host_free(void * ptr);
18
18
 
19
19
  void ggml_cl_free_data(const struct ggml_tensor* tensor);
20
20
 
21
- void ggml_cl_transform_tensor(struct ggml_tensor * tensor);
22
- void ggml_cl_load_data(const char * fname, struct ggml_tensor * tensor, size_t offset);
21
+ void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
23
22
 
24
23
  #ifdef __cplusplus
25
24
  }