llama_cpp 0.10.2 → 0.10.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/llama_cpp/src/ggml-alloc.c +1 -1
- data/ext/llama_cpp/src/ggml-backend.c +6 -10
- data/ext/llama_cpp/src/ggml-cuda.cu +510 -372
- data/ext/llama_cpp/src/ggml-quants.c +25 -344
- data/ext/llama_cpp/src/ggml.c +7 -8
- data/ext/llama_cpp/src/ggml.h +2 -0
- data/ext/llama_cpp/src/llama.cpp +432 -39
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e679eaf867f62033f7d586a8ef131f2126cb3efb2fde49af7c0be17492a66edf
|
4
|
+
data.tar.gz: da1e9828c456677dc877db6b9754e961ceff27ecfc93c48abd7624d9bb8cdd29
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1fd0737acaa229493e2cbacc79f5b0b6b91233d40e26b57ab7005945ddba79ea3f44e2cca8a0d75df3695373f8eaa2fdfd4ff766a166a688c051beb2acfb126
|
7
|
+
data.tar.gz: '01889a0ff9ebabd400fa374066659686ee84d4afab973cdd55b36ce5588bded1ed424a88296c1a26acc413f1e4f98f9f6e36eebaf7f37874b91a335dd147d3f4'
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
## [[0.10.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.10.2...v0.10.3)] - 2023-12-29
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b1686 to b1710.
|
4
|
+
- Add document comment and type declaration of `n_batch` method in `Context`.
|
5
|
+
|
1
6
|
## [[0.10.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.10.1...v0.10.2)] - 2023-12-23
|
2
7
|
|
3
8
|
- Bump bundled llama.cpp from b1641 to b1686.
|
@@ -72,7 +72,7 @@ static void remove_allocated_tensor(ggml_tallocr_t alloc, struct ggml_tensor * t
|
|
72
72
|
|
73
73
|
// check if a tensor is allocated by this buffer
|
74
74
|
static bool ggml_tallocr_is_own(ggml_tallocr_t alloc, const struct ggml_tensor * tensor) {
|
75
|
-
return tensor->buffer == alloc->buffer;
|
75
|
+
return tensor->buffer == alloc->buffer && (!tensor->view_src || tensor->view_src->buffer == alloc->buffer);
|
76
76
|
}
|
77
77
|
|
78
78
|
static bool ggml_is_view(struct ggml_tensor * t) {
|
@@ -297,7 +297,7 @@ static void ggml_backend_registry_init(void) {
|
|
297
297
|
void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
|
298
298
|
GGML_ASSERT(ggml_backend_registry_count < GGML_MAX_BACKENDS_REG);
|
299
299
|
|
300
|
-
|
300
|
+
size_t id = ggml_backend_registry_count;
|
301
301
|
|
302
302
|
ggml_backend_registry[id] = (struct ggml_backend_reg) {
|
303
303
|
/* .name = */ {0},
|
@@ -330,6 +330,8 @@ size_t ggml_backend_reg_find_by_name(const char * name) {
|
|
330
330
|
return i;
|
331
331
|
}
|
332
332
|
}
|
333
|
+
|
334
|
+
// not found
|
333
335
|
return SIZE_MAX;
|
334
336
|
}
|
335
337
|
|
@@ -340,15 +342,15 @@ ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str)
|
|
340
342
|
const char * params = strchr(backend_str, ':');
|
341
343
|
char backend_name[128];
|
342
344
|
if (params == NULL) {
|
343
|
-
|
345
|
+
snprintf(backend_name, sizeof(backend_name), "%s", backend_str);
|
344
346
|
params = "";
|
345
347
|
} else {
|
346
|
-
|
347
|
-
backend_name[params - backend_str] = '\0';
|
348
|
+
snprintf(backend_name, sizeof(backend_name), "%.*s", (int)(params - backend_str), backend_str);
|
348
349
|
params++;
|
349
350
|
}
|
350
351
|
|
351
352
|
size_t backend_i = ggml_backend_reg_find_by_name(backend_name);
|
353
|
+
|
352
354
|
if (backend_i == SIZE_MAX) {
|
353
355
|
fprintf(stderr, "%s: backend %s not found\n", __func__, backend_name);
|
354
356
|
return NULL;
|
@@ -396,18 +398,12 @@ static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
396
398
|
}
|
397
399
|
|
398
400
|
static void ggml_backend_cpu_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
399
|
-
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
400
|
-
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
401
|
-
|
402
401
|
memcpy((char *)tensor->data + offset, data, size);
|
403
402
|
|
404
403
|
GGML_UNUSED(buffer);
|
405
404
|
}
|
406
405
|
|
407
406
|
static void ggml_backend_cpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
408
|
-
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
|
409
|
-
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
410
|
-
|
411
407
|
memcpy(data, (const char *)tensor->data + offset, size);
|
412
408
|
|
413
409
|
GGML_UNUSED(buffer);
|