cui-llama.rn 1.2.6 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/android/src/main/CMakeLists.txt +26 -6
- package/android/src/main/java/com/rnllama/LlamaContext.java +115 -27
- package/android/src/main/java/com/rnllama/RNLlama.java +40 -7
- package/android/src/main/jni.cpp +228 -40
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +9 -4
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +9 -4
- package/cpp/amx/amx.cpp +196 -0
- package/cpp/amx/amx.h +20 -0
- package/cpp/amx/common.h +101 -0
- package/cpp/amx/mmq.cpp +2524 -0
- package/cpp/amx/mmq.h +16 -0
- package/cpp/common.cpp +118 -251
- package/cpp/common.h +53 -30
- package/cpp/ggml-aarch64.c +46 -3395
- package/cpp/ggml-aarch64.h +0 -20
- package/cpp/ggml-alloc.c +6 -8
- package/cpp/ggml-backend-impl.h +33 -11
- package/cpp/ggml-backend-reg.cpp +423 -0
- package/cpp/ggml-backend.cpp +14 -676
- package/cpp/ggml-backend.h +46 -9
- package/cpp/ggml-common.h +6 -0
- package/cpp/ggml-cpu-aarch64.c +3823 -0
- package/cpp/ggml-cpu-aarch64.h +32 -0
- package/cpp/ggml-cpu-impl.h +14 -242
- package/cpp/ggml-cpu-quants.c +10835 -0
- package/cpp/ggml-cpu-quants.h +63 -0
- package/cpp/ggml-cpu.c +13971 -13720
- package/cpp/ggml-cpu.cpp +715 -0
- package/cpp/ggml-cpu.h +65 -63
- package/cpp/ggml-impl.h +285 -25
- package/cpp/ggml-metal.h +8 -8
- package/cpp/ggml-metal.m +1221 -728
- package/cpp/ggml-quants.c +189 -10681
- package/cpp/ggml-quants.h +78 -125
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +12 -0
- package/cpp/ggml.c +688 -1460
- package/cpp/ggml.h +58 -244
- package/cpp/json-schema-to-grammar.cpp +1045 -1045
- package/cpp/json.hpp +24766 -24766
- package/cpp/llama-sampling.cpp +5 -2
- package/cpp/llama.cpp +409 -123
- package/cpp/llama.h +8 -4
- package/cpp/rn-llama.hpp +89 -25
- package/cpp/sampling.cpp +42 -3
- package/cpp/sampling.h +22 -1
- package/cpp/sgemm.cpp +608 -0
- package/cpp/speculative.cpp +270 -0
- package/cpp/speculative.h +28 -0
- package/cpp/unicode.cpp +11 -0
- package/ios/RNLlama.mm +43 -20
- package/ios/RNLlamaContext.h +9 -3
- package/ios/RNLlamaContext.mm +146 -33
- package/jest/mock.js +0 -1
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/grammar.js +4 -2
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +52 -15
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/grammar.js +2 -1
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +51 -15
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +122 -8
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/grammar.d.ts +5 -6
- package/lib/typescript/grammar.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +15 -6
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +2 -1
- package/src/NativeRNLlama.ts +135 -13
- package/src/grammar.ts +10 -8
- package/src/index.ts +104 -28
package/cpp/ggml-backend.cpp
CHANGED
@@ -252,6 +252,7 @@ void lm_ggml_backend_tensor_get_async(lm_ggml_backend_t backend, const struct lm
|
|
252
252
|
}
|
253
253
|
|
254
254
|
void lm_ggml_backend_tensor_set(struct lm_ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
255
|
+
LM_GGML_ASSERT(tensor);
|
255
256
|
lm_ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
256
257
|
|
257
258
|
if (size == 0) {
|
@@ -266,6 +267,7 @@ void lm_ggml_backend_tensor_set(struct lm_ggml_tensor * tensor, const void * dat
|
|
266
267
|
}
|
267
268
|
|
268
269
|
void lm_ggml_backend_tensor_get(const struct lm_ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
270
|
+
LM_GGML_ASSERT(tensor);
|
269
271
|
lm_ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
270
272
|
|
271
273
|
if (size == 0) {
|
@@ -279,7 +281,7 @@ void lm_ggml_backend_tensor_get(const struct lm_ggml_tensor * tensor, void * dat
|
|
279
281
|
buf->iface.get_tensor(buf, tensor, data, offset, size);
|
280
282
|
}
|
281
283
|
|
282
|
-
|
284
|
+
void lm_ggml_backend_tensor_memset(struct lm_ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
283
285
|
lm_ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
284
286
|
|
285
287
|
if (size == 0) {
|
@@ -525,197 +527,6 @@ void * lm_ggml_backend_reg_get_proc_address(lm_ggml_backend_reg_t reg, const cha
|
|
525
527
|
return reg->iface.get_proc_address(reg, name);
|
526
528
|
}
|
527
529
|
|
528
|
-
// Backend registry
|
529
|
-
|
530
|
-
#ifdef LM_GGML_USE_CUDA
|
531
|
-
#include "ggml-cuda.h"
|
532
|
-
#endif
|
533
|
-
|
534
|
-
#ifdef LM_GGML_USE_METAL
|
535
|
-
#include "ggml-metal.h"
|
536
|
-
#endif
|
537
|
-
|
538
|
-
#ifdef LM_GGML_USE_SYCL
|
539
|
-
#include "ggml-sycl.h"
|
540
|
-
#endif
|
541
|
-
|
542
|
-
#ifdef LM_GGML_USE_VULKAN
|
543
|
-
#include "ggml-vulkan.h"
|
544
|
-
#endif
|
545
|
-
|
546
|
-
#ifdef LM_GGML_USE_BLAS
|
547
|
-
#include "ggml-blas.h"
|
548
|
-
#endif
|
549
|
-
|
550
|
-
#ifdef LM_GGML_USE_RPC
|
551
|
-
#include "ggml-rpc.h"
|
552
|
-
#endif
|
553
|
-
|
554
|
-
#ifndef __AMX_INT8__
|
555
|
-
#undef LM_GGML_USE_AMX
|
556
|
-
#endif
|
557
|
-
|
558
|
-
#ifdef LM_GGML_USE_AMX
|
559
|
-
# include "ggml-amx.h"
|
560
|
-
#endif
|
561
|
-
|
562
|
-
#ifdef LM_GGML_USE_CANN
|
563
|
-
#include "ggml-cann.h"
|
564
|
-
#endif
|
565
|
-
|
566
|
-
#ifdef LM_GGML_USE_KOMPUTE
|
567
|
-
#include "ggml-kompute.h"
|
568
|
-
#endif
|
569
|
-
|
570
|
-
#include "ggml-cpu.h"
|
571
|
-
|
572
|
-
struct lm_ggml_backend_registry {
|
573
|
-
std::vector<lm_ggml_backend_reg_t> backends;
|
574
|
-
std::vector<lm_ggml_backend_dev_t> devices;
|
575
|
-
|
576
|
-
lm_ggml_backend_registry() {
|
577
|
-
#ifdef LM_GGML_USE_CUDA
|
578
|
-
register_backend(lm_ggml_backend_cuda_reg());
|
579
|
-
#endif
|
580
|
-
#ifdef LM_GGML_USE_METAL
|
581
|
-
register_backend(lm_ggml_backend_metal_reg());
|
582
|
-
#endif
|
583
|
-
#ifdef LM_GGML_USE_SYCL
|
584
|
-
register_backend(lm_ggml_backend_sycl_reg());
|
585
|
-
#endif
|
586
|
-
#ifdef LM_GGML_USE_VULKAN
|
587
|
-
register_backend(lm_ggml_backend_vk_reg());
|
588
|
-
#endif
|
589
|
-
#ifdef LM_GGML_USE_CANN
|
590
|
-
register_backend(lm_ggml_backend_cann_reg());
|
591
|
-
#endif
|
592
|
-
#ifdef LM_GGML_USE_BLAS
|
593
|
-
register_backend(lm_ggml_backend_blas_reg());
|
594
|
-
#endif
|
595
|
-
#ifdef LM_GGML_USE_RPC
|
596
|
-
register_backend(lm_ggml_backend_rpc_reg());
|
597
|
-
#endif
|
598
|
-
#ifdef LM_GGML_USE_AMX
|
599
|
-
register_backend(lm_ggml_backend_amx_reg());
|
600
|
-
#endif
|
601
|
-
#ifdef LM_GGML_USE_KOMPUTE
|
602
|
-
register_backend(lm_ggml_backend_kompute_reg());
|
603
|
-
#endif
|
604
|
-
|
605
|
-
register_backend(lm_ggml_backend_cpu_reg());
|
606
|
-
}
|
607
|
-
|
608
|
-
void register_backend(lm_ggml_backend_reg_t reg) {
|
609
|
-
#ifndef NDEBUG
|
610
|
-
LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
611
|
-
__func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
|
612
|
-
#endif
|
613
|
-
backends.push_back(reg);
|
614
|
-
for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
|
615
|
-
register_device(lm_ggml_backend_reg_dev_get(reg, i));
|
616
|
-
}
|
617
|
-
}
|
618
|
-
|
619
|
-
void register_device(lm_ggml_backend_dev_t device) {
|
620
|
-
#ifndef NDEBUG
|
621
|
-
LM_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, lm_ggml_backend_dev_name(device), lm_ggml_backend_dev_description(device));
|
622
|
-
#endif
|
623
|
-
devices.push_back(device);
|
624
|
-
}
|
625
|
-
};
|
626
|
-
|
627
|
-
static lm_ggml_backend_registry & get_reg() {
|
628
|
-
static lm_ggml_backend_registry reg;
|
629
|
-
return reg;
|
630
|
-
}
|
631
|
-
|
632
|
-
// Internal API
|
633
|
-
void lm_ggml_backend_register(lm_ggml_backend_reg_t reg) {
|
634
|
-
get_reg().register_backend(reg);
|
635
|
-
}
|
636
|
-
|
637
|
-
void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device) {
|
638
|
-
get_reg().register_device(device);
|
639
|
-
}
|
640
|
-
|
641
|
-
// Backend (reg) enumeration
|
642
|
-
size_t lm_ggml_backend_reg_count() {
|
643
|
-
return get_reg().backends.size();
|
644
|
-
}
|
645
|
-
|
646
|
-
lm_ggml_backend_reg_t lm_ggml_backend_reg_get(size_t index) {
|
647
|
-
LM_GGML_ASSERT(index < lm_ggml_backend_reg_count());
|
648
|
-
return get_reg().backends[index];
|
649
|
-
}
|
650
|
-
|
651
|
-
lm_ggml_backend_reg_t lm_ggml_backend_reg_by_name(const char * name) {
|
652
|
-
for (size_t i = 0; i < lm_ggml_backend_reg_count(); i++) {
|
653
|
-
lm_ggml_backend_reg_t reg = lm_ggml_backend_reg_get(i);
|
654
|
-
if (strcmp(lm_ggml_backend_reg_name(reg), name) == 0) {
|
655
|
-
return reg;
|
656
|
-
}
|
657
|
-
}
|
658
|
-
return NULL;
|
659
|
-
}
|
660
|
-
|
661
|
-
// Device enumeration
|
662
|
-
size_t lm_ggml_backend_dev_count() {
|
663
|
-
return get_reg().devices.size();
|
664
|
-
}
|
665
|
-
|
666
|
-
lm_ggml_backend_dev_t lm_ggml_backend_dev_get(size_t index) {
|
667
|
-
LM_GGML_ASSERT(index < lm_ggml_backend_dev_count());
|
668
|
-
return get_reg().devices[index];
|
669
|
-
}
|
670
|
-
|
671
|
-
lm_ggml_backend_dev_t lm_ggml_backend_dev_by_name(const char * name) {
|
672
|
-
for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
|
673
|
-
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
|
674
|
-
if (strcmp(lm_ggml_backend_dev_name(dev), name) == 0) {
|
675
|
-
return dev;
|
676
|
-
}
|
677
|
-
}
|
678
|
-
return NULL;
|
679
|
-
}
|
680
|
-
|
681
|
-
lm_ggml_backend_dev_t lm_ggml_backend_dev_by_type(enum lm_ggml_backend_dev_type type) {
|
682
|
-
for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
|
683
|
-
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
|
684
|
-
if (lm_ggml_backend_dev_type(dev) == type) {
|
685
|
-
return dev;
|
686
|
-
}
|
687
|
-
}
|
688
|
-
return NULL;
|
689
|
-
}
|
690
|
-
|
691
|
-
// Convenience functions
|
692
|
-
lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params) {
|
693
|
-
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_name(name);
|
694
|
-
if (!dev) {
|
695
|
-
return NULL;
|
696
|
-
}
|
697
|
-
return lm_ggml_backend_dev_init(dev, params);
|
698
|
-
}
|
699
|
-
|
700
|
-
lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params) {
|
701
|
-
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(type);
|
702
|
-
if (!dev) {
|
703
|
-
return NULL;
|
704
|
-
}
|
705
|
-
return lm_ggml_backend_dev_init(dev, params);
|
706
|
-
}
|
707
|
-
|
708
|
-
lm_ggml_backend_t lm_ggml_backend_init_best(void) {
|
709
|
-
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_GPU);
|
710
|
-
if (!dev) {
|
711
|
-
dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
|
712
|
-
}
|
713
|
-
if (!dev) {
|
714
|
-
return NULL;
|
715
|
-
}
|
716
|
-
return lm_ggml_backend_dev_init(dev, NULL);
|
717
|
-
}
|
718
|
-
|
719
530
|
// multi-buffer buffer
|
720
531
|
|
721
532
|
struct lm_ggml_backend_multi_buffer_context {
|
@@ -880,7 +691,7 @@ static int lm_ggml_backend_sched_backend_id(lm_ggml_backend_sched_t sched, lm_gg
|
|
880
691
|
}
|
881
692
|
|
882
693
|
static int lm_ggml_backend_sched_backend_from_buffer(lm_ggml_backend_sched_t sched, const struct lm_ggml_tensor * tensor, const struct lm_ggml_tensor * op) {
|
883
|
-
lm_ggml_backend_buffer_t buffer = tensor->buffer;
|
694
|
+
lm_ggml_backend_buffer_t buffer = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
884
695
|
if (buffer == NULL) {
|
885
696
|
return -1;
|
886
697
|
}
|
@@ -913,8 +724,6 @@ static char causes[LM_GGML_DEFAULT_GRAPH_SIZE*16 + LM_GGML_SCHED_MAX_SPLITS_DEBU
|
|
913
724
|
|
914
725
|
// returns the backend that should be used for the node based on the current locations
|
915
726
|
static int lm_ggml_backend_sched_backend_id_from_cur(lm_ggml_backend_sched_t sched, struct lm_ggml_tensor * tensor) {
|
916
|
-
// TODO: use supports_op to check if the backend supports the op
|
917
|
-
|
918
727
|
// assign pre-allocated nodes to their backend
|
919
728
|
int cur_backend_id = lm_ggml_backend_sched_backend_from_buffer(sched, tensor, tensor);
|
920
729
|
if (cur_backend_id != -1) {
|
@@ -933,7 +742,8 @@ static int lm_ggml_backend_sched_backend_id_from_cur(lm_ggml_backend_sched_t sch
|
|
933
742
|
|
934
743
|
if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) {
|
935
744
|
// since the tensor is pre-allocated, it cannot be moved to another backend
|
936
|
-
|
745
|
+
lm_ggml_backend_buffer_t buffer = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
746
|
+
LM_GGML_ABORT("pre-allocated tensor (%s) in a buffer (%s) that cannot run the operation (%s)", tensor->name, lm_ggml_backend_buffer_name(buffer), lm_ggml_op_name(tensor->op));
|
937
747
|
}
|
938
748
|
|
939
749
|
// graph input
|
@@ -1640,7 +1450,7 @@ lm_ggml_backend_sched_t lm_ggml_backend_sched_new(
|
|
1640
1450
|
bool parallel) {
|
1641
1451
|
LM_GGML_ASSERT(n_backends > 0);
|
1642
1452
|
LM_GGML_ASSERT(n_backends <= LM_GGML_SCHED_MAX_BACKENDS);
|
1643
|
-
LM_GGML_ASSERT(
|
1453
|
+
LM_GGML_ASSERT(lm_ggml_backend_dev_type(lm_ggml_backend_get_device(backends[n_backends - 1])) == LM_GGML_BACKEND_DEVICE_TYPE_CPU);
|
1644
1454
|
|
1645
1455
|
struct lm_ggml_backend_sched * sched = (lm_ggml_backend_sched *) calloc(1, sizeof(struct lm_ggml_backend_sched));
|
1646
1456
|
|
@@ -1729,12 +1539,13 @@ bool lm_ggml_backend_sched_reserve(lm_ggml_backend_sched_t sched, struct lm_ggml
|
|
1729
1539
|
|
1730
1540
|
lm_ggml_backend_sched_split_graph(sched, measure_graph);
|
1731
1541
|
|
1542
|
+
lm_ggml_backend_sched_synchronize(sched);
|
1543
|
+
|
1732
1544
|
if (!lm_ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
|
1733
1545
|
return false;
|
1734
1546
|
}
|
1735
1547
|
|
1736
1548
|
lm_ggml_backend_sched_reset(sched);
|
1737
|
-
lm_ggml_backend_sched_synchronize(sched);
|
1738
1549
|
|
1739
1550
|
return true;
|
1740
1551
|
}
|
@@ -2036,17 +1847,6 @@ bool lm_ggml_backend_compare_graph_backend(lm_ggml_backend_t backend1, lm_ggml_b
|
|
2036
1847
|
return true;
|
2037
1848
|
}
|
2038
1849
|
|
2039
|
-
|
2040
|
-
|
2041
|
-
#include "ggml-backend.h"
|
2042
|
-
#include "ggml-backend-impl.h"
|
2043
|
-
#include "ggml-cpu.h"
|
2044
|
-
#include "ggml-impl.h"
|
2045
|
-
#include <cctype>
|
2046
|
-
#include <string>
|
2047
|
-
|
2048
|
-
// ggml-backend interface
|
2049
|
-
|
2050
1850
|
// CPU backend - buffer
|
2051
1851
|
|
2052
1852
|
static void * lm_ggml_backend_cpu_buffer_get_base(lm_ggml_backend_buffer_t buffer) {
|
@@ -2120,7 +1920,9 @@ static const struct lm_ggml_backend_buffer_i lm_ggml_backend_cpu_buffer_from_ptr
|
|
2120
1920
|
/* .reset = */ NULL,
|
2121
1921
|
};
|
2122
1922
|
|
2123
|
-
// CPU backend
|
1923
|
+
// CPU backend buffer type
|
1924
|
+
|
1925
|
+
// this buffer type is defined here to make it available to all backends
|
2124
1926
|
|
2125
1927
|
static const char * lm_ggml_backend_cpu_buffer_type_get_name(lm_ggml_backend_buffer_type_t buft) {
|
2126
1928
|
return "CPU";
|
@@ -2161,7 +1963,7 @@ lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_buffer_type(void) {
|
|
2161
1963
|
/* .get_alloc_size = */ NULL, // defaults to lm_ggml_nbytes
|
2162
1964
|
/* .is_host = */ lm_ggml_backend_cpu_buffer_type_is_host,
|
2163
1965
|
},
|
2164
|
-
/* .device = */ lm_ggml_backend_reg_dev_get(lm_ggml_backend_cpu_reg(), 0),
|
1966
|
+
/* .device = */ NULL, // FIXME lm_ggml_backend_reg_dev_get(lm_ggml_backend_cpu_reg(), 0),
|
2165
1967
|
/* .context = */ NULL,
|
2166
1968
|
};
|
2167
1969
|
|
@@ -2184,478 +1986,14 @@ static lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_buffer_from_ptr_type(vo
|
|
2184
1986
|
/* .get_alloc_size = */ NULL, // defaults to lm_ggml_nbytes
|
2185
1987
|
/* .is_host = */ lm_ggml_backend_cpu_buffer_type_is_host,
|
2186
1988
|
},
|
2187
|
-
/* .device = */ lm_ggml_backend_reg_dev_get(lm_ggml_backend_cpu_reg(), 0),
|
1989
|
+
/* .device = */ NULL, // FIXME lm_ggml_backend_reg_dev_get(lm_ggml_backend_cpu_reg(), 0),
|
2188
1990
|
/* .context = */ NULL,
|
2189
1991
|
};
|
2190
1992
|
|
2191
1993
|
return &lm_ggml_backend_cpu_buffer_type;
|
2192
1994
|
}
|
2193
1995
|
|
2194
|
-
#ifdef LM_GGML_USE_CPU_HBM
|
2195
|
-
|
2196
|
-
// buffer type HBM
|
2197
|
-
|
2198
|
-
#include <hbwmalloc.h>
|
2199
|
-
|
2200
|
-
static const char * lm_ggml_backend_cpu_hbm_buffer_type_get_name(lm_ggml_backend_buffer_type_t buft) {
|
2201
|
-
return "CPU_HBM";
|
2202
|
-
|
2203
|
-
LM_GGML_UNUSED(buft);
|
2204
|
-
}
|
2205
|
-
|
2206
|
-
static void lm_ggml_backend_cpu_hbm_buffer_free_buffer(lm_ggml_backend_buffer_t buffer) {
|
2207
|
-
hbw_free(buffer->context);
|
2208
|
-
}
|
2209
|
-
|
2210
|
-
static lm_ggml_backend_buffer_t lm_ggml_backend_cpu_hbm_buffer_type_alloc_buffer(lm_ggml_backend_buffer_type_t buft, size_t size) {
|
2211
|
-
void * ptr;
|
2212
|
-
int result = hbw_posix_memalign(&ptr, lm_ggml_backend_cpu_buffer_type_get_alignment(buft), size);
|
2213
|
-
if (result != 0) {
|
2214
|
-
LM_GGML_LOG_ERROR("failed to allocate HBM buffer of size %zu\n", size);
|
2215
|
-
return NULL;
|
2216
|
-
}
|
2217
|
-
|
2218
|
-
lm_ggml_backend_buffer_t buffer = lm_ggml_backend_cpu_buffer_from_ptr(ptr, size);
|
2219
|
-
buffer->buft = buft;
|
2220
|
-
buffer->iface.free_buffer = lm_ggml_backend_cpu_hbm_buffer_free_buffer;
|
2221
|
-
|
2222
|
-
return buffer;
|
2223
|
-
}
|
2224
|
-
|
2225
|
-
lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_hbm_buffer_type(void) {
|
2226
|
-
static struct lm_ggml_backend_buffer_type lm_ggml_backend_cpu_buffer_type_hbm = {
|
2227
|
-
/* .iface = */ {
|
2228
|
-
/* .get_name = */ lm_ggml_backend_cpu_hbm_buffer_type_get_name,
|
2229
|
-
/* .alloc_buffer = */ lm_ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
|
2230
|
-
/* .get_alignment = */ lm_ggml_backend_cpu_buffer_type_get_alignment,
|
2231
|
-
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
2232
|
-
/* .get_alloc_size = */ NULL, // defaults to lm_ggml_nbytes
|
2233
|
-
/* .is_host = */ lm_ggml_backend_cpu_buffer_type_is_host,
|
2234
|
-
},
|
2235
|
-
/* .context = */ NULL,
|
2236
|
-
};
|
2237
|
-
|
2238
|
-
return &lm_ggml_backend_cpu_buffer_type_hbm;
|
2239
|
-
}
|
2240
|
-
#endif
|
2241
|
-
|
2242
|
-
static lm_ggml_backend_buffer_type_t * lm_ggml_backend_cpu_get_extra_bufts(lm_ggml_backend_dev_t device) {
|
2243
|
-
static lm_ggml_backend_buffer_type_t bufts[] = {
|
2244
|
-
#ifdef LM_GGML_USE_CPU_HBM
|
2245
|
-
lm_ggml_backend_cpu_hbm_buffer_type(),
|
2246
|
-
#endif
|
2247
|
-
NULL
|
2248
|
-
};
|
2249
|
-
|
2250
|
-
return bufts;
|
2251
|
-
|
2252
|
-
LM_GGML_UNUSED(device);
|
2253
|
-
}
|
2254
|
-
|
2255
|
-
// CPU backend - backend (stream)
|
2256
|
-
|
2257
|
-
struct lm_ggml_backend_cpu_context {
|
2258
|
-
int n_threads;
|
2259
|
-
lm_ggml_threadpool_t threadpool;
|
2260
|
-
|
2261
|
-
uint8_t * work_data;
|
2262
|
-
size_t work_size;
|
2263
|
-
|
2264
|
-
lm_ggml_abort_callback abort_callback;
|
2265
|
-
void * abort_callback_data;
|
2266
|
-
};
|
2267
|
-
|
2268
|
-
static const char * lm_ggml_backend_cpu_get_name(lm_ggml_backend_t backend) {
|
2269
|
-
return "CPU";
|
2270
|
-
|
2271
|
-
LM_GGML_UNUSED(backend);
|
2272
|
-
}
|
2273
|
-
|
2274
|
-
static void lm_ggml_backend_cpu_free(lm_ggml_backend_t backend) {
|
2275
|
-
struct lm_ggml_backend_cpu_context * cpu_ctx = (struct lm_ggml_backend_cpu_context *)backend->context;
|
2276
|
-
delete[] cpu_ctx->work_data;
|
2277
|
-
delete cpu_ctx;
|
2278
|
-
delete backend;
|
2279
|
-
}
|
2280
|
-
|
2281
|
-
struct lm_ggml_backend_plan_cpu {
|
2282
|
-
struct lm_ggml_cplan cplan;
|
2283
|
-
struct lm_ggml_cgraph cgraph;
|
2284
|
-
};
|
2285
|
-
|
2286
|
-
static lm_ggml_backend_graph_plan_t lm_ggml_backend_cpu_graph_plan_create(lm_ggml_backend_t backend, const struct lm_ggml_cgraph * cgraph) {
|
2287
|
-
struct lm_ggml_backend_cpu_context * cpu_ctx = (struct lm_ggml_backend_cpu_context *)backend->context;
|
2288
|
-
|
2289
|
-
struct lm_ggml_backend_plan_cpu * cpu_plan = new lm_ggml_backend_plan_cpu;
|
2290
|
-
|
2291
|
-
cpu_plan->cplan = lm_ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
|
2292
|
-
cpu_plan->cgraph = *cgraph; // FIXME: deep copy
|
2293
|
-
|
2294
|
-
if (cpu_plan->cplan.work_size > 0) {
|
2295
|
-
cpu_plan->cplan.work_data = new uint8_t[cpu_plan->cplan.work_size];
|
2296
|
-
if (cpu_plan->cplan.work_data == NULL) {
|
2297
|
-
delete cpu_plan;
|
2298
|
-
return NULL;
|
2299
|
-
}
|
2300
|
-
}
|
2301
|
-
|
2302
|
-
cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
|
2303
|
-
cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
|
2304
|
-
|
2305
|
-
return cpu_plan;
|
2306
|
-
}
|
2307
|
-
|
2308
|
-
static void lm_ggml_backend_cpu_graph_plan_free(lm_ggml_backend_t backend, lm_ggml_backend_graph_plan_t plan) {
|
2309
|
-
struct lm_ggml_backend_plan_cpu * cpu_plan = (struct lm_ggml_backend_plan_cpu *)plan;
|
2310
|
-
|
2311
|
-
delete[] cpu_plan->cplan.work_data;
|
2312
|
-
delete cpu_plan;
|
2313
|
-
|
2314
|
-
LM_GGML_UNUSED(backend);
|
2315
|
-
}
|
2316
|
-
|
2317
|
-
static enum lm_ggml_status lm_ggml_backend_cpu_graph_plan_compute(lm_ggml_backend_t backend, lm_ggml_backend_graph_plan_t plan) {
|
2318
|
-
struct lm_ggml_backend_plan_cpu * cpu_plan = (struct lm_ggml_backend_plan_cpu *)plan;
|
2319
|
-
|
2320
|
-
return lm_ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
|
2321
|
-
|
2322
|
-
LM_GGML_UNUSED(backend);
|
2323
|
-
}
|
2324
|
-
|
2325
|
-
static enum lm_ggml_status lm_ggml_backend_cpu_graph_compute(lm_ggml_backend_t backend, struct lm_ggml_cgraph * cgraph) {
|
2326
|
-
struct lm_ggml_backend_cpu_context * cpu_ctx = (struct lm_ggml_backend_cpu_context *)backend->context;
|
2327
|
-
|
2328
|
-
struct lm_ggml_cplan cplan = lm_ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
|
2329
|
-
|
2330
|
-
if (cpu_ctx->work_size < cplan.work_size) {
|
2331
|
-
delete[] cpu_ctx->work_data;
|
2332
|
-
cpu_ctx->work_data = new uint8_t[cplan.work_size];
|
2333
|
-
if (cpu_ctx->work_data == NULL) {
|
2334
|
-
cpu_ctx->work_size = 0;
|
2335
|
-
return LM_GGML_STATUS_ALLOC_FAILED;
|
2336
|
-
}
|
2337
|
-
cpu_ctx->work_size = cplan.work_size;
|
2338
|
-
}
|
2339
|
-
cplan.work_data = (uint8_t *)cpu_ctx->work_data;
|
2340
|
-
|
2341
|
-
cplan.abort_callback = cpu_ctx->abort_callback;
|
2342
|
-
cplan.abort_callback_data = cpu_ctx->abort_callback_data;
|
2343
|
-
|
2344
|
-
return lm_ggml_graph_compute(cgraph, &cplan);
|
2345
|
-
}
|
2346
|
-
|
2347
|
-
static const struct lm_ggml_backend_i lm_ggml_backend_cpu_i = {
|
2348
|
-
/* .get_name = */ lm_ggml_backend_cpu_get_name,
|
2349
|
-
/* .free = */ lm_ggml_backend_cpu_free,
|
2350
|
-
/* .set_tensor_async = */ NULL,
|
2351
|
-
/* .get_tensor_async = */ NULL,
|
2352
|
-
/* .cpy_tensor_async = */ NULL,
|
2353
|
-
/* .synchronize = */ NULL,
|
2354
|
-
/* .graph_plan_create = */ lm_ggml_backend_cpu_graph_plan_create,
|
2355
|
-
/* .graph_plan_free = */ lm_ggml_backend_cpu_graph_plan_free,
|
2356
|
-
/* .graph_plan_update = */ NULL,
|
2357
|
-
/* .graph_plan_compute = */ lm_ggml_backend_cpu_graph_plan_compute,
|
2358
|
-
/* .graph_compute = */ lm_ggml_backend_cpu_graph_compute,
|
2359
|
-
/* .event_record = */ NULL,
|
2360
|
-
/* .event_wait = */ NULL,
|
2361
|
-
};
|
2362
|
-
|
2363
|
-
static lm_ggml_guid_t lm_ggml_backend_cpu_guid(void) {
|
2364
|
-
static lm_ggml_guid guid = { 0xaa, 0x67, 0xc7, 0x43, 0x96, 0xe6, 0xa3, 0x8a, 0xe3, 0xaf, 0xea, 0x92, 0x36, 0xbc, 0xfc, 0x89 };
|
2365
|
-
return &guid;
|
2366
|
-
}
|
2367
|
-
|
2368
|
-
lm_ggml_backend_t lm_ggml_backend_cpu_init(void) {
|
2369
|
-
// initialize CPU backend now to avoid slowing the first graph computation
|
2370
|
-
lm_ggml_cpu_init();
|
2371
|
-
|
2372
|
-
struct lm_ggml_backend_cpu_context * ctx = new lm_ggml_backend_cpu_context;
|
2373
|
-
if (ctx == NULL) {
|
2374
|
-
return NULL;
|
2375
|
-
}
|
2376
|
-
|
2377
|
-
ctx->n_threads = LM_GGML_DEFAULT_N_THREADS;
|
2378
|
-
ctx->threadpool = NULL;
|
2379
|
-
ctx->work_data = NULL;
|
2380
|
-
ctx->work_size = 0;
|
2381
|
-
ctx->abort_callback = NULL;
|
2382
|
-
ctx->abort_callback_data = NULL;
|
2383
|
-
|
2384
|
-
lm_ggml_backend_t cpu_backend = new lm_ggml_backend {
|
2385
|
-
/* .guid = */ lm_ggml_backend_cpu_guid(),
|
2386
|
-
/* .interface = */ lm_ggml_backend_cpu_i,
|
2387
|
-
/* .device = */ lm_ggml_backend_reg_dev_get(lm_ggml_backend_cpu_reg(), 0),
|
2388
|
-
/* .context = */ ctx,
|
2389
|
-
};
|
2390
|
-
|
2391
|
-
if (cpu_backend == NULL) {
|
2392
|
-
delete ctx;
|
2393
|
-
return NULL;
|
2394
|
-
}
|
2395
|
-
|
2396
|
-
return cpu_backend;
|
2397
|
-
}
|
2398
|
-
|
2399
|
-
bool lm_ggml_backend_is_cpu(lm_ggml_backend_t backend) {
|
2400
|
-
return backend != NULL && lm_ggml_guid_matches(backend->guid, lm_ggml_backend_cpu_guid());
|
2401
|
-
}
|
2402
|
-
|
2403
|
-
void lm_ggml_backend_cpu_set_n_threads(lm_ggml_backend_t backend_cpu, int n_threads) {
|
2404
|
-
LM_GGML_ASSERT(lm_ggml_backend_is_cpu(backend_cpu));
|
2405
|
-
|
2406
|
-
struct lm_ggml_backend_cpu_context * ctx = (struct lm_ggml_backend_cpu_context *)backend_cpu->context;
|
2407
|
-
ctx->n_threads = n_threads;
|
2408
|
-
}
|
2409
|
-
|
2410
|
-
void lm_ggml_backend_cpu_set_threadpool(lm_ggml_backend_t backend_cpu, lm_ggml_threadpool_t threadpool) {
|
2411
|
-
LM_GGML_ASSERT(lm_ggml_backend_is_cpu(backend_cpu));
|
2412
|
-
|
2413
|
-
struct lm_ggml_backend_cpu_context * ctx = (struct lm_ggml_backend_cpu_context *)backend_cpu->context;
|
2414
|
-
|
2415
|
-
if (ctx->threadpool && ctx->threadpool != threadpool) {
|
2416
|
-
// already had a different threadpool, pause/suspend it before switching
|
2417
|
-
lm_ggml_threadpool_pause(ctx->threadpool);
|
2418
|
-
}
|
2419
|
-
ctx->threadpool = threadpool;
|
2420
|
-
}
|
2421
|
-
|
2422
|
-
void lm_ggml_backend_cpu_set_abort_callback(lm_ggml_backend_t backend_cpu, lm_ggml_abort_callback abort_callback, void * abort_callback_data) {
|
2423
|
-
LM_GGML_ASSERT(lm_ggml_backend_is_cpu(backend_cpu));
|
2424
|
-
|
2425
|
-
struct lm_ggml_backend_cpu_context * ctx = (struct lm_ggml_backend_cpu_context *)backend_cpu->context;
|
2426
|
-
ctx->abort_callback = abort_callback;
|
2427
|
-
ctx->abort_callback_data = abort_callback_data;
|
2428
|
-
}
|
2429
|
-
|
2430
1996
|
lm_ggml_backend_buffer_t lm_ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size) {
|
2431
1997
|
LM_GGML_ASSERT((uintptr_t)ptr % TENSOR_ALIGNMENT == 0 && "buffer pointer must be aligned");
|
2432
1998
|
return lm_ggml_backend_buffer_init(lm_ggml_backend_cpu_buffer_from_ptr_type(), lm_ggml_backend_cpu_buffer_from_ptr_i, ptr, size);
|
2433
1999
|
}
|
2434
|
-
|
2435
|
-
// CPU backend - device
|
2436
|
-
|
2437
|
-
struct lm_ggml_backend_cpu_device_context {
|
2438
|
-
std::string description = "CPU";
|
2439
|
-
|
2440
|
-
lm_ggml_backend_cpu_device_context() {
|
2441
|
-
#ifdef __APPLE__
|
2442
|
-
size_t len = 0;
|
2443
|
-
if (!sysctlbyname("machdep.cpu.brand_string", NULL, &len, NULL, 0)) {
|
2444
|
-
description.resize(len);
|
2445
|
-
sysctlbyname("machdep.cpu.brand_string", &description[0], &len, NULL, 0); // NOLINT
|
2446
|
-
}
|
2447
|
-
#elif defined(__linux__)
|
2448
|
-
FILE * f = fopen("/proc/cpuinfo", "r");
|
2449
|
-
if (f) {
|
2450
|
-
char buf[1024];
|
2451
|
-
while (fgets(buf, sizeof(buf), f)) {
|
2452
|
-
if (strncmp(buf, "model name", 10) == 0) {
|
2453
|
-
char * p = strchr(buf, ':');
|
2454
|
-
if (p) {
|
2455
|
-
p++;
|
2456
|
-
while (std::isspace(*p)) {
|
2457
|
-
p++;
|
2458
|
-
}
|
2459
|
-
while (std::isspace(p[strlen(p) - 1])) {
|
2460
|
-
p[strlen(p) - 1] = '\0';
|
2461
|
-
}
|
2462
|
-
description = p;
|
2463
|
-
break;
|
2464
|
-
}
|
2465
|
-
}
|
2466
|
-
}
|
2467
|
-
fclose(f);
|
2468
|
-
}
|
2469
|
-
#elif defined(_WIN32)
|
2470
|
-
HKEY hKey;
|
2471
|
-
if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
|
2472
|
-
TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
|
2473
|
-
0,
|
2474
|
-
KEY_READ,
|
2475
|
-
&hKey) == ERROR_SUCCESS) {
|
2476
|
-
DWORD cpu_brand_size = 0;
|
2477
|
-
if (RegQueryValueExA(hKey,
|
2478
|
-
TEXT("ProcessorNameString"),
|
2479
|
-
NULL,
|
2480
|
-
NULL,
|
2481
|
-
NULL,
|
2482
|
-
&cpu_brand_size) == ERROR_SUCCESS) {
|
2483
|
-
description.resize(cpu_brand_size);
|
2484
|
-
if (RegQueryValueExA(hKey,
|
2485
|
-
TEXT("ProcessorNameString"),
|
2486
|
-
NULL,
|
2487
|
-
NULL,
|
2488
|
-
(LPBYTE)&description[0], // NOLINT
|
2489
|
-
&cpu_brand_size) == ERROR_SUCCESS) {
|
2490
|
-
if (description.find('\0') != std::string::npos) {
|
2491
|
-
description.resize(description.find('\0'));
|
2492
|
-
}
|
2493
|
-
}
|
2494
|
-
}
|
2495
|
-
RegCloseKey(hKey);
|
2496
|
-
}
|
2497
|
-
#endif
|
2498
|
-
}
|
2499
|
-
};
|
2500
|
-
|
2501
|
-
static const char * lm_ggml_backend_cpu_device_get_name(lm_ggml_backend_dev_t dev) {
|
2502
|
-
return "CPU";
|
2503
|
-
|
2504
|
-
LM_GGML_UNUSED(dev);
|
2505
|
-
}
|
2506
|
-
|
2507
|
-
static const char * lm_ggml_backend_cpu_device_get_description(lm_ggml_backend_dev_t dev) {
|
2508
|
-
struct lm_ggml_backend_cpu_device_context * ctx = (struct lm_ggml_backend_cpu_device_context *)dev->context;
|
2509
|
-
|
2510
|
-
return ctx->description.c_str();
|
2511
|
-
}
|
2512
|
-
|
2513
|
-
static void lm_ggml_backend_cpu_device_get_memory(lm_ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
2514
|
-
// TODO
|
2515
|
-
*free = 0;
|
2516
|
-
*total = 0;
|
2517
|
-
|
2518
|
-
LM_GGML_UNUSED(dev);
|
2519
|
-
}
|
2520
|
-
|
2521
|
-
static enum lm_ggml_backend_dev_type lm_ggml_backend_cpu_device_get_type(lm_ggml_backend_dev_t dev) {
|
2522
|
-
return LM_GGML_BACKEND_DEVICE_TYPE_CPU;
|
2523
|
-
|
2524
|
-
LM_GGML_UNUSED(dev);
|
2525
|
-
}
|
2526
|
-
|
2527
|
-
static void lm_ggml_backend_cpu_device_get_props(lm_ggml_backend_dev_t dev, struct lm_ggml_backend_dev_props * props) {
|
2528
|
-
props->name = lm_ggml_backend_cpu_device_get_name(dev);
|
2529
|
-
props->description = lm_ggml_backend_cpu_device_get_description(dev);
|
2530
|
-
props->type = lm_ggml_backend_cpu_device_get_type(dev);
|
2531
|
-
lm_ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
2532
|
-
props->caps = {
|
2533
|
-
/* .async = */ false,
|
2534
|
-
/* .host_buffer = */ false,
|
2535
|
-
/* .buffer_from_host_ptr = */ true,
|
2536
|
-
/* .events = */ false,
|
2537
|
-
};
|
2538
|
-
}
|
2539
|
-
|
2540
|
-
static lm_ggml_backend_t lm_ggml_backend_cpu_device_init_backend(lm_ggml_backend_dev_t dev, const char * params) {
|
2541
|
-
return lm_ggml_backend_cpu_init();
|
2542
|
-
|
2543
|
-
LM_GGML_UNUSED(dev);
|
2544
|
-
LM_GGML_UNUSED(params);
|
2545
|
-
}
|
2546
|
-
|
2547
|
-
static lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_device_get_buffer_type(lm_ggml_backend_dev_t dev) {
|
2548
|
-
return lm_ggml_backend_cpu_buffer_type();
|
2549
|
-
|
2550
|
-
LM_GGML_UNUSED(dev);
|
2551
|
-
}
|
2552
|
-
|
2553
|
-
static lm_ggml_backend_buffer_t lm_ggml_backend_cpu_device_buffer_from_host_ptr(lm_ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
|
2554
|
-
return lm_ggml_backend_cpu_buffer_from_ptr(ptr, size);
|
2555
|
-
|
2556
|
-
LM_GGML_UNUSED(dev);
|
2557
|
-
LM_GGML_UNUSED(max_tensor_size);
|
2558
|
-
}
|
2559
|
-
|
2560
|
-
static bool lm_ggml_backend_cpu_device_supports_op(lm_ggml_backend_dev_t dev, const struct lm_ggml_tensor * op) {
|
2561
|
-
switch (op->op) {
|
2562
|
-
case LM_GGML_OP_CPY:
|
2563
|
-
return
|
2564
|
-
op->type != LM_GGML_TYPE_IQ2_XXS &&
|
2565
|
-
op->type != LM_GGML_TYPE_IQ2_XS &&
|
2566
|
-
op->type != LM_GGML_TYPE_IQ1_S &&
|
2567
|
-
op->type != LM_GGML_TYPE_IQ1_M; // missing type_traits.from_float
|
2568
|
-
case LM_GGML_OP_MUL_MAT:
|
2569
|
-
return op->src[1]->type == LM_GGML_TYPE_F32;// FIXME || op->src[1]->type == lm_ggml_get_type_traits(op->src[0]->type)->vec_dot_type;
|
2570
|
-
case LM_GGML_OP_ROPE_BACK:
|
2571
|
-
return op->src[2] == NULL && (op->op_params[2] & 4) == 0;
|
2572
|
-
case LM_GGML_OP_IM2COL_BACK:
|
2573
|
-
return op->src[0]->type == LM_GGML_TYPE_F32 && op->src[1]->type == LM_GGML_TYPE_F32;
|
2574
|
-
case LM_GGML_OP_OUT_PROD:
|
2575
|
-
return (op->src[0]->type == LM_GGML_TYPE_F32 || lm_ggml_is_quantized(op->src[0]->type)) && op->src[1]->type == LM_GGML_TYPE_F32;
|
2576
|
-
default:
|
2577
|
-
return true;
|
2578
|
-
}
|
2579
|
-
|
2580
|
-
LM_GGML_UNUSED(dev);
|
2581
|
-
}
|
2582
|
-
|
2583
|
-
static bool lm_ggml_backend_cpu_device_supports_buft(lm_ggml_backend_dev_t dev, lm_ggml_backend_buffer_type_t buft) {
|
2584
|
-
return lm_ggml_backend_buft_is_host(buft);
|
2585
|
-
|
2586
|
-
LM_GGML_UNUSED(dev);
|
2587
|
-
}
|
2588
|
-
|
2589
|
-
static const struct lm_ggml_backend_device_i lm_ggml_backend_cpu_device_i = {
|
2590
|
-
/* .get_name = */ lm_ggml_backend_cpu_device_get_name,
|
2591
|
-
/* .get_description = */ lm_ggml_backend_cpu_device_get_description,
|
2592
|
-
/* .get_memory = */ lm_ggml_backend_cpu_device_get_memory,
|
2593
|
-
/* .get_type = */ lm_ggml_backend_cpu_device_get_type,
|
2594
|
-
/* .get_props = */ lm_ggml_backend_cpu_device_get_props,
|
2595
|
-
/* .init_backend = */ lm_ggml_backend_cpu_device_init_backend,
|
2596
|
-
/* .get_buffer_type = */ lm_ggml_backend_cpu_device_get_buffer_type,
|
2597
|
-
/* .get_host_buffer_type = */ NULL,
|
2598
|
-
/* .buffer_from_host_ptr = */ lm_ggml_backend_cpu_device_buffer_from_host_ptr,
|
2599
|
-
/* .supports_op = */ lm_ggml_backend_cpu_device_supports_op,
|
2600
|
-
/* .supports_buft = */ lm_ggml_backend_cpu_device_supports_buft,
|
2601
|
-
/* .offload_op = */ NULL,
|
2602
|
-
/* .event_new = */ NULL,
|
2603
|
-
/* .event_free = */ NULL,
|
2604
|
-
/* .event_synchronize = */ NULL,
|
2605
|
-
};
|
2606
|
-
|
2607
|
-
// CPU backend - backend (reg)
|
2608
|
-
|
2609
|
-
static const char * lm_ggml_backend_cpu_reg_get_name(lm_ggml_backend_reg_t reg) {
|
2610
|
-
return "CPU";
|
2611
|
-
|
2612
|
-
LM_GGML_UNUSED(reg);
|
2613
|
-
}
|
2614
|
-
|
2615
|
-
static size_t lm_ggml_backend_cpu_reg_get_device_count(lm_ggml_backend_reg_t reg) {
|
2616
|
-
return 1;
|
2617
|
-
|
2618
|
-
LM_GGML_UNUSED(reg);
|
2619
|
-
}
|
2620
|
-
|
2621
|
-
static lm_ggml_backend_dev_t lm_ggml_backend_cpu_reg_get_device(lm_ggml_backend_reg_t reg, size_t index) {
|
2622
|
-
LM_GGML_ASSERT(index == 0);
|
2623
|
-
|
2624
|
-
static lm_ggml_backend_cpu_device_context ctx;
|
2625
|
-
static lm_ggml_backend_device lm_ggml_backend_cpu_device = {
|
2626
|
-
/* .iface = */ lm_ggml_backend_cpu_device_i,
|
2627
|
-
/* .reg = */ reg,
|
2628
|
-
/* .context = */ &ctx,
|
2629
|
-
};
|
2630
|
-
|
2631
|
-
return &lm_ggml_backend_cpu_device;
|
2632
|
-
}
|
2633
|
-
|
2634
|
-
static void * lm_ggml_backend_cpu_get_proc_address(lm_ggml_backend_reg_t reg, const char * name) {
|
2635
|
-
if (strcmp(name, "lm_ggml_backend_set_n_threads") == 0) {
|
2636
|
-
return (void *)lm_ggml_backend_cpu_set_n_threads;
|
2637
|
-
}
|
2638
|
-
if (strcmp(name, "lm_ggml_backend_dev_get_extra_bufts") == 0) {
|
2639
|
-
return (void *)lm_ggml_backend_cpu_get_extra_bufts;
|
2640
|
-
}
|
2641
|
-
|
2642
|
-
return NULL;
|
2643
|
-
|
2644
|
-
LM_GGML_UNUSED(reg);
|
2645
|
-
}
|
2646
|
-
|
2647
|
-
static const struct lm_ggml_backend_reg_i lm_ggml_backend_cpu_reg_i = {
|
2648
|
-
/* .get_name = */ lm_ggml_backend_cpu_reg_get_name,
|
2649
|
-
/* .get_device_count = */ lm_ggml_backend_cpu_reg_get_device_count,
|
2650
|
-
/* .get_device = */ lm_ggml_backend_cpu_reg_get_device,
|
2651
|
-
/* .get_proc_address = */ lm_ggml_backend_cpu_get_proc_address,
|
2652
|
-
};
|
2653
|
-
|
2654
|
-
lm_ggml_backend_reg_t lm_ggml_backend_cpu_reg(void) {
|
2655
|
-
static struct lm_ggml_backend_reg lm_ggml_backend_cpu_reg = {
|
2656
|
-
/* .iface = */ lm_ggml_backend_cpu_reg_i,
|
2657
|
-
/* .context = */ NULL,
|
2658
|
-
};
|
2659
|
-
|
2660
|
-
return &lm_ggml_backend_cpu_reg;
|
2661
|
-
}
|