llama_cpp 0.10.2 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/llama_cpp/src/ggml-alloc.c +1 -1
- data/ext/llama_cpp/src/ggml-backend.c +6 -10
- data/ext/llama_cpp/src/ggml-cuda.cu +510 -372
- data/ext/llama_cpp/src/ggml-quants.c +25 -344
- data/ext/llama_cpp/src/ggml.c +7 -8
- data/ext/llama_cpp/src/ggml.h +2 -0
- data/ext/llama_cpp/src/llama.cpp +432 -39
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e679eaf867f62033f7d586a8ef131f2126cb3efb2fde49af7c0be17492a66edf
|
4
|
+
data.tar.gz: da1e9828c456677dc877db6b9754e961ceff27ecfc93c48abd7624d9bb8cdd29
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1fd0737acaa229493e2cbacc79f5b0b6b91233d40e26b57ab7005945ddba79ea3f44e2cca8a0d75df3695373f8eaa2fdfd4ff766a166a688c051beb2acfb126
|
7
|
+
data.tar.gz: '01889a0ff9ebabd400fa374066659686ee84d4afab973cdd55b36ce5588bded1ed424a88296c1a26acc413f1e4f98f9f6e36eebaf7f37874b91a335dd147d3f4'
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
## [[0.10.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.10.2...v0.10.3)] - 2023-12-29
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b1686 to b1710.
|
4
|
+
- Add document comment and type declaration of `n_batch` method in `Context`.
|
5
|
+
|
1
6
|
## [[0.10.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.10.1...v0.10.2)] - 2023-12-23
|
2
7
|
|
3
8
|
- Bump bundled llama.cpp from b1641 to b1686.
|
@@ -72,7 +72,7 @@ static void remove_allocated_tensor(ggml_tallocr_t alloc, struct ggml_tensor * t
|
|
72
72
|
|
73
73
|
// check if a tensor is allocated by this buffer
|
74
74
|
static bool ggml_tallocr_is_own(ggml_tallocr_t alloc, const struct ggml_tensor * tensor) {
|
75
|
-
return tensor->buffer == alloc->buffer;
|
75
|
+
return tensor->buffer == alloc->buffer && (!tensor->view_src || tensor->view_src->buffer == alloc->buffer);
|
76
76
|
}
|
77
77
|
|
78
78
|
static bool ggml_is_view(struct ggml_tensor * t) {
|
@@ -297,7 +297,7 @@ static void ggml_backend_registry_init(void) {
|
|
297
297
|
void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
|
298
298
|
GGML_ASSERT(ggml_backend_registry_count < GGML_MAX_BACKENDS_REG);
|
299
299
|
|
300
|
-
|
300
|
+
size_t id = ggml_backend_registry_count;
|
301
301
|
|
302
302
|
ggml_backend_registry[id] = (struct ggml_backend_reg) {
|
303
303
|
/* .name = */ {0},
|
@@ -330,6 +330,8 @@ size_t ggml_backend_reg_find_by_name(const char * name) {
|
|
330
330
|
return i;
|
331
331
|
}
|
332
332
|
}
|
333
|
+
|
334
|
+
// not found
|
333
335
|
return SIZE_MAX;
|
334
336
|
}
|
335
337
|
|
@@ -340,15 +342,15 @@ ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str)
|
|
340
342
|
const char * params = strchr(backend_str, ':');
|
341
343
|
char backend_name[128];
|
342
344
|
if (params == NULL) {
|
343
|
-
|
345
|
+
snprintf(backend_name, sizeof(backend_name), "%s", backend_str);
|
344
346
|
params = "";
|
345
347
|
} else {
|
346
|
-
|
347
|
-
backend_name[params - backend_str] = '\0';
|
348
|
+
snprintf(backend_name, sizeof(backend_name), "%.*s", (int)(params - backend_str), backend_str);
|
348
349
|
params++;
|
349
350
|
}
|
350
351
|
|
351
352
|
size_t backend_i = ggml_backend_reg_find_by_name(backend_name);
|
353
|
+
|
352
354
|
if (backend_i == SIZE_MAX) {
|
353
355
|
fprintf(stderr, "%s: backend %s not found\n", __func__, backend_name);
|
354
356
|
return NULL;
|
@@ -396,18 +398,12 @@ static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
396
398
|
}
|
397
399
|
|
398
400
|
static void ggml_backend_cpu_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
399
|
-
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
400
|
-
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
401
|
-
|
402
401
|
memcpy((char *)tensor->data + offset, data, size);
|
403
402
|
|
404
403
|
GGML_UNUSED(buffer);
|
405
404
|
}
|
406
405
|
|
407
406
|
static void ggml_backend_cpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
408
|
-
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
|
409
|
-
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
410
|
-
|
411
407
|
memcpy(data, (const char *)tensor->data + offset, size);
|
412
408
|
|
413
409
|
GGML_UNUSED(buffer);
|