llama_cpp 0.5.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/README.md +6 -5
- data/examples/chat.rb +13 -13
- data/examples/embedding.rb +9 -9
- data/ext/llama_cpp/llama_cpp.cpp +583 -262
- data/ext/llama_cpp/src/ggml-alloc.c +8 -2
- data/ext/llama_cpp/src/ggml-alloc.h +1 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +326 -149
- data/ext/llama_cpp/src/ggml-cuda.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.m +167 -89
- data/ext/llama_cpp/src/ggml-metal.metal +130 -40
- data/ext/llama_cpp/src/ggml-opencl.cpp +119 -53
- data/ext/llama_cpp/src/ggml.c +2355 -1166
- data/ext/llama_cpp/src/ggml.h +129 -35
- data/ext/llama_cpp/src/k_quants.c +744 -2
- data/ext/llama_cpp/src/llama.cpp +1766 -671
- data/ext/llama_cpp/src/llama.h +321 -120
- data/ext/llama_cpp/src/unicode.h +462 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +6 -10
- data/sig/llama_cpp.rbs +70 -34
- metadata +4 -3
@@ -77,7 +77,7 @@ struct free_block {
|
|
77
77
|
size_t size;
|
78
78
|
};
|
79
79
|
|
80
|
-
#define MAX_FREE_BLOCKS
|
80
|
+
#define MAX_FREE_BLOCKS 256
|
81
81
|
|
82
82
|
struct ggml_allocr {
|
83
83
|
void * data;
|
@@ -187,6 +187,7 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
|
|
187
187
|
}
|
188
188
|
|
189
189
|
tensor->data = addr;
|
190
|
+
AT_PRINTF("%s: allocated data at %p\n", __func__, tensor->data);
|
190
191
|
|
191
192
|
#ifdef GGML_ALLOCATOR_DEBUG
|
192
193
|
add_allocated_tensor(alloc, tensor);
|
@@ -218,7 +219,8 @@ static void ggml_allocr_free_tensor(struct ggml_allocr * alloc, struct ggml_tens
|
|
218
219
|
|
219
220
|
size_t size = ggml_allocr_get_alloc_size(alloc, tensor);
|
220
221
|
size = aligned_offset(NULL, size, alloc->alignment);
|
221
|
-
AT_PRINTF("%s: freeing %s (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, size, alloc->n_free_blocks);
|
222
|
+
AT_PRINTF("%s: freeing %s at %p (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, ptr, size, alloc->n_free_blocks);
|
223
|
+
AT_PRINTF("%s: alloc->data = %p alloc->data+alloc->size = %p alloc->data+alloc->max_size = %p\n", __func__, alloc->data, (char*)alloc->data + alloc->size, (char*)alloc->data + alloc->max_size);
|
222
224
|
|
223
225
|
#ifdef GGML_ALLOCATOR_DEBUG
|
224
226
|
remove_allocated_tensor(alloc, tensor);
|
@@ -631,3 +633,7 @@ static size_t ggml_allocr_alloc_graph_tensors_n(
|
|
631
633
|
size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph) {
|
632
634
|
return ggml_allocr_alloc_graph_tensors_n(alloc, &graph, 1, NULL, NULL);
|
633
635
|
}
|
636
|
+
|
637
|
+
size_t ggml_allocr_max_size(struct ggml_allocr * alloc) {
|
638
|
+
return alloc->max_size;
|
639
|
+
}
|
@@ -19,6 +19,7 @@ GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
|
|
19
19
|
GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);
|
20
20
|
GGML_API void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor);
|
21
21
|
GGML_API size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph);
|
22
|
+
GGML_API size_t ggml_allocr_max_size(struct ggml_allocr * alloc);
|
22
23
|
|
23
24
|
|
24
25
|
#ifdef __cplusplus
|