llama_cpp 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/examples/README.md +60 -0
- data/examples/chat.rb +195 -0
- data/ext/llama_cpp/llama_cpp.cpp +52 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +697 -130
- data/ext/llama_cpp/src/ggml-cuda.h +4 -1
- data/ext/llama_cpp/src/ggml-metal.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.m +548 -497
- data/ext/llama_cpp/src/ggml-metal.metal +425 -122
- data/ext/llama_cpp/src/ggml-opencl.cpp +3 -32
- data/ext/llama_cpp/src/ggml-opencl.h +1 -2
- data/ext/llama_cpp/src/ggml.c +1904 -303
- data/ext/llama_cpp/src/ggml.h +126 -2
- data/ext/llama_cpp/src/llama.cpp +212 -108
- data/ext/llama_cpp/src/llama.h +12 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +3 -0
- metadata +4 -2
@@ -1167,7 +1167,7 @@ size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct g
|
|
1167
1167
|
return 0;
|
1168
1168
|
}
|
1169
1169
|
|
1170
|
-
void ggml_cl_transform_tensor(ggml_tensor * tensor) {
|
1170
|
+
void ggml_cl_transform_tensor(void * data, ggml_tensor * tensor) {
|
1171
1171
|
const int64_t ne0 = tensor->ne[0];
|
1172
1172
|
const int64_t ne1 = tensor->ne[1];
|
1173
1173
|
const int64_t ne2 = tensor->ne[2];
|
@@ -1179,6 +1179,7 @@ void ggml_cl_transform_tensor(ggml_tensor * tensor) {
|
|
1179
1179
|
size_t q_size;
|
1180
1180
|
cl_mem dst = ggml_cl_pool_malloc(q_sz, &q_size);
|
1181
1181
|
|
1182
|
+
tensor->data = data;
|
1182
1183
|
// copy tensor to device
|
1183
1184
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
1184
1185
|
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
@@ -1190,35 +1191,5 @@ void ggml_cl_transform_tensor(ggml_tensor * tensor) {
|
|
1190
1191
|
CL_CHECK(clFinish(queue));
|
1191
1192
|
|
1192
1193
|
tensor->data = dst;
|
1193
|
-
tensor->backend
|
1194
|
-
}
|
1195
|
-
|
1196
|
-
void ggml_cl_load_data(const char * fname, struct ggml_tensor * tensor, const size_t offset) {
|
1197
|
-
cl_int err;
|
1198
|
-
FILE * fp = fopen(fname, "rb");
|
1199
|
-
|
1200
|
-
const size_t size = ggml_nbytes(tensor);
|
1201
|
-
|
1202
|
-
cl_mem dst;
|
1203
|
-
CL_CHECK((dst = clCreateBuffer(context, CL_MEM_READ_ONLY, size, nullptr, &err), err));
|
1204
|
-
void * buf_host = malloc(size);
|
1205
|
-
|
1206
|
-
#ifdef _WIN32
|
1207
|
-
int ret = _fseeki64(fp, (__int64) offset, SEEK_SET);
|
1208
|
-
#else
|
1209
|
-
int ret = fseek(fp, (long) offset, SEEK_SET);
|
1210
|
-
#endif
|
1211
|
-
GGML_ASSERT(ret == 0); // same
|
1212
|
-
|
1213
|
-
size_t ret2 = fread(buf_host, size, 1, fp);
|
1214
|
-
if (ret2 != 1) {
|
1215
|
-
fprintf(stderr, "unexpectedly reached end of file");
|
1216
|
-
exit(1);
|
1217
|
-
}
|
1218
|
-
|
1219
|
-
clEnqueueWriteBuffer(queue, dst, CL_TRUE, 0, size, buf_host, 0, nullptr, nullptr);
|
1220
|
-
|
1221
|
-
tensor->data = dst;
|
1222
|
-
free(buf_host);
|
1223
|
-
fclose(fp);
|
1194
|
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
|
1224
1195
|
}
|
@@ -18,8 +18,7 @@ void ggml_cl_host_free(void * ptr);
|
|
18
18
|
|
19
19
|
void ggml_cl_free_data(const struct ggml_tensor* tensor);
|
20
20
|
|
21
|
-
void ggml_cl_transform_tensor(struct ggml_tensor * tensor);
|
22
|
-
void ggml_cl_load_data(const char * fname, struct ggml_tensor * tensor, size_t offset);
|
21
|
+
void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
|
23
22
|
|
24
23
|
#ifdef __cplusplus
|
25
24
|
}
|