llama_cpp 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1167,7 +1167,7 @@ size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct g
1167
1167
  return 0;
1168
1168
  }
1169
1169
 
1170
- void ggml_cl_transform_tensor(ggml_tensor * tensor) {
1170
+ void ggml_cl_transform_tensor(void * data, ggml_tensor * tensor) {
1171
1171
  const int64_t ne0 = tensor->ne[0];
1172
1172
  const int64_t ne1 = tensor->ne[1];
1173
1173
  const int64_t ne2 = tensor->ne[2];
@@ -1179,6 +1179,7 @@ void ggml_cl_transform_tensor(ggml_tensor * tensor) {
1179
1179
  size_t q_size;
1180
1180
  cl_mem dst = ggml_cl_pool_malloc(q_sz, &q_size);
1181
1181
 
1182
+ tensor->data = data;
1182
1183
  // copy tensor to device
1183
1184
  for (int64_t i3 = 0; i3 < ne3; i3++) {
1184
1185
  for (int64_t i2 = 0; i2 < ne2; i2++) {
@@ -1190,35 +1191,5 @@ void ggml_cl_transform_tensor(ggml_tensor * tensor) {
1190
1191
  CL_CHECK(clFinish(queue));
1191
1192
 
1192
1193
  tensor->data = dst;
1193
- tensor->backend = GGML_BACKEND_GPU;
1194
- }
1195
-
1196
- void ggml_cl_load_data(const char * fname, struct ggml_tensor * tensor, const size_t offset) {
1197
- cl_int err;
1198
- FILE * fp = fopen(fname, "rb");
1199
-
1200
- const size_t size = ggml_nbytes(tensor);
1201
-
1202
- cl_mem dst;
1203
- CL_CHECK((dst = clCreateBuffer(context, CL_MEM_READ_ONLY, size, nullptr, &err), err));
1204
- void * buf_host = malloc(size);
1205
-
1206
- #ifdef _WIN32
1207
- int ret = _fseeki64(fp, (__int64) offset, SEEK_SET);
1208
- #else
1209
- int ret = fseek(fp, (long) offset, SEEK_SET);
1210
- #endif
1211
- GGML_ASSERT(ret == 0); // same
1212
-
1213
- size_t ret2 = fread(buf_host, size, 1, fp);
1214
- if (ret2 != 1) {
1215
- fprintf(stderr, "unexpectedly reached end of file");
1216
- exit(1);
1217
- }
1218
-
1219
- clEnqueueWriteBuffer(queue, dst, CL_TRUE, 0, size, buf_host, 0, nullptr, nullptr);
1220
-
1221
- tensor->data = dst;
1222
- free(buf_host);
1223
- fclose(fp);
1194
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
1224
1195
  }
@@ -18,8 +18,7 @@ void ggml_cl_host_free(void * ptr);
18
18
 
19
19
  void ggml_cl_free_data(const struct ggml_tensor* tensor);
20
20
 
21
- void ggml_cl_transform_tensor(struct ggml_tensor * tensor);
22
- void ggml_cl_load_data(const char * fname, struct ggml_tensor * tensor, size_t offset);
21
+ void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
23
22
 
24
23
  #ifdef __cplusplus
25
24
  }