llama_cpp 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/examples/README.md +60 -0
- data/examples/chat.rb +195 -0
- data/ext/llama_cpp/llama_cpp.cpp +52 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +697 -130
- data/ext/llama_cpp/src/ggml-cuda.h +4 -1
- data/ext/llama_cpp/src/ggml-metal.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.m +548 -497
- data/ext/llama_cpp/src/ggml-metal.metal +425 -122
- data/ext/llama_cpp/src/ggml-opencl.cpp +3 -32
- data/ext/llama_cpp/src/ggml-opencl.h +1 -2
- data/ext/llama_cpp/src/ggml.c +1904 -303
- data/ext/llama_cpp/src/ggml.h +126 -2
- data/ext/llama_cpp/src/llama.cpp +212 -108
- data/ext/llama_cpp/src/llama.h +12 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +3 -0
- metadata +4 -2
@@ -1167,7 +1167,7 @@ size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct g
|
|
1167
1167
|
return 0;
|
1168
1168
|
}
|
1169
1169
|
|
1170
|
-
void ggml_cl_transform_tensor(ggml_tensor * tensor) {
|
1170
|
+
void ggml_cl_transform_tensor(void * data, ggml_tensor * tensor) {
|
1171
1171
|
const int64_t ne0 = tensor->ne[0];
|
1172
1172
|
const int64_t ne1 = tensor->ne[1];
|
1173
1173
|
const int64_t ne2 = tensor->ne[2];
|
@@ -1179,6 +1179,7 @@ void ggml_cl_transform_tensor(ggml_tensor * tensor) {
|
|
1179
1179
|
size_t q_size;
|
1180
1180
|
cl_mem dst = ggml_cl_pool_malloc(q_sz, &q_size);
|
1181
1181
|
|
1182
|
+
tensor->data = data;
|
1182
1183
|
// copy tensor to device
|
1183
1184
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
1184
1185
|
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
@@ -1190,35 +1191,5 @@ void ggml_cl_transform_tensor(ggml_tensor * tensor) {
|
|
1190
1191
|
CL_CHECK(clFinish(queue));
|
1191
1192
|
|
1192
1193
|
tensor->data = dst;
|
1193
|
-
tensor->backend
|
1194
|
-
}
|
1195
|
-
|
1196
|
-
void ggml_cl_load_data(const char * fname, struct ggml_tensor * tensor, const size_t offset) {
|
1197
|
-
cl_int err;
|
1198
|
-
FILE * fp = fopen(fname, "rb");
|
1199
|
-
|
1200
|
-
const size_t size = ggml_nbytes(tensor);
|
1201
|
-
|
1202
|
-
cl_mem dst;
|
1203
|
-
CL_CHECK((dst = clCreateBuffer(context, CL_MEM_READ_ONLY, size, nullptr, &err), err));
|
1204
|
-
void * buf_host = malloc(size);
|
1205
|
-
|
1206
|
-
#ifdef _WIN32
|
1207
|
-
int ret = _fseeki64(fp, (__int64) offset, SEEK_SET);
|
1208
|
-
#else
|
1209
|
-
int ret = fseek(fp, (long) offset, SEEK_SET);
|
1210
|
-
#endif
|
1211
|
-
GGML_ASSERT(ret == 0); // same
|
1212
|
-
|
1213
|
-
size_t ret2 = fread(buf_host, size, 1, fp);
|
1214
|
-
if (ret2 != 1) {
|
1215
|
-
fprintf(stderr, "unexpectedly reached end of file");
|
1216
|
-
exit(1);
|
1217
|
-
}
|
1218
|
-
|
1219
|
-
clEnqueueWriteBuffer(queue, dst, CL_TRUE, 0, size, buf_host, 0, nullptr, nullptr);
|
1220
|
-
|
1221
|
-
tensor->data = dst;
|
1222
|
-
free(buf_host);
|
1223
|
-
fclose(fp);
|
1194
|
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
|
1224
1195
|
}
|
@@ -18,8 +18,7 @@ void ggml_cl_host_free(void * ptr);
|
|
18
18
|
|
19
19
|
void ggml_cl_free_data(const struct ggml_tensor* tensor);
|
20
20
|
|
21
|
-
void ggml_cl_transform_tensor(struct ggml_tensor * tensor);
|
22
|
-
void ggml_cl_load_data(const char * fname, struct ggml_tensor * tensor, size_t offset);
|
21
|
+
void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
|
23
22
|
|
24
23
|
#ifdef __cplusplus
|
25
24
|
}
|