cui-llama.rn 1.2.6 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +3 -2
  2. package/android/src/main/CMakeLists.txt +26 -6
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +115 -27
  4. package/android/src/main/java/com/rnllama/RNLlama.java +40 -7
  5. package/android/src/main/jni.cpp +228 -40
  6. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +9 -4
  7. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +9 -4
  8. package/cpp/amx/amx.cpp +196 -0
  9. package/cpp/amx/amx.h +20 -0
  10. package/cpp/amx/common.h +101 -0
  11. package/cpp/amx/mmq.cpp +2524 -0
  12. package/cpp/amx/mmq.h +16 -0
  13. package/cpp/common.cpp +118 -251
  14. package/cpp/common.h +53 -30
  15. package/cpp/ggml-aarch64.c +46 -3395
  16. package/cpp/ggml-aarch64.h +0 -20
  17. package/cpp/ggml-alloc.c +6 -8
  18. package/cpp/ggml-backend-impl.h +33 -11
  19. package/cpp/ggml-backend-reg.cpp +423 -0
  20. package/cpp/ggml-backend.cpp +14 -676
  21. package/cpp/ggml-backend.h +46 -9
  22. package/cpp/ggml-common.h +6 -0
  23. package/cpp/ggml-cpu-aarch64.c +3823 -0
  24. package/cpp/ggml-cpu-aarch64.h +32 -0
  25. package/cpp/ggml-cpu-impl.h +14 -242
  26. package/cpp/ggml-cpu-quants.c +10835 -0
  27. package/cpp/ggml-cpu-quants.h +63 -0
  28. package/cpp/ggml-cpu.c +13971 -13720
  29. package/cpp/ggml-cpu.cpp +715 -0
  30. package/cpp/ggml-cpu.h +65 -63
  31. package/cpp/ggml-impl.h +285 -25
  32. package/cpp/ggml-metal.h +8 -8
  33. package/cpp/ggml-metal.m +1221 -728
  34. package/cpp/ggml-quants.c +189 -10681
  35. package/cpp/ggml-quants.h +78 -125
  36. package/cpp/ggml-threading.cpp +12 -0
  37. package/cpp/ggml-threading.h +12 -0
  38. package/cpp/ggml.c +688 -1460
  39. package/cpp/ggml.h +58 -244
  40. package/cpp/json-schema-to-grammar.cpp +1045 -1045
  41. package/cpp/json.hpp +24766 -24766
  42. package/cpp/llama-sampling.cpp +5 -2
  43. package/cpp/llama.cpp +409 -123
  44. package/cpp/llama.h +8 -4
  45. package/cpp/rn-llama.hpp +89 -25
  46. package/cpp/sampling.cpp +42 -3
  47. package/cpp/sampling.h +22 -1
  48. package/cpp/sgemm.cpp +608 -0
  49. package/cpp/speculative.cpp +270 -0
  50. package/cpp/speculative.h +28 -0
  51. package/cpp/unicode.cpp +11 -0
  52. package/ios/RNLlama.mm +43 -20
  53. package/ios/RNLlamaContext.h +9 -3
  54. package/ios/RNLlamaContext.mm +146 -33
  55. package/jest/mock.js +0 -1
  56. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  57. package/lib/commonjs/grammar.js +4 -2
  58. package/lib/commonjs/grammar.js.map +1 -1
  59. package/lib/commonjs/index.js +52 -15
  60. package/lib/commonjs/index.js.map +1 -1
  61. package/lib/module/NativeRNLlama.js.map +1 -1
  62. package/lib/module/grammar.js +2 -1
  63. package/lib/module/grammar.js.map +1 -1
  64. package/lib/module/index.js +51 -15
  65. package/lib/module/index.js.map +1 -1
  66. package/lib/typescript/NativeRNLlama.d.ts +122 -8
  67. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  68. package/lib/typescript/grammar.d.ts +5 -6
  69. package/lib/typescript/grammar.d.ts.map +1 -1
  70. package/lib/typescript/index.d.ts +15 -6
  71. package/lib/typescript/index.d.ts.map +1 -1
  72. package/package.json +2 -1
  73. package/src/NativeRNLlama.ts +135 -13
  74. package/src/grammar.ts +10 -8
  75. package/src/index.ts +104 -28
@@ -1,9 +1,5 @@
1
- // SPDX-FileCopyrightText: Copyright 2024 Arm Ltd.
2
1
  #pragma once
3
2
 
4
- #define LM_GGML_COMMON_DECL_C
5
- #include "ggml-common.h"
6
-
7
3
  #include "ggml.h"
8
4
 
9
5
  // GGML internal header
@@ -12,27 +8,11 @@
12
8
  extern "C" {
13
9
  #endif
14
10
 
15
- // Quantization
16
- void quantize_q8_0_4x4(const float * LM_GGML_RESTRICT x, void * LM_GGML_RESTRICT y, int64_t k);
17
- void quantize_q8_0_4x8(const float * LM_GGML_RESTRICT x, void * LM_GGML_RESTRICT y, int64_t k);
18
-
19
- void quantize_mat_q8_0(const float * LM_GGML_RESTRICT x, void * LM_GGML_RESTRICT y, int64_t nrows, int64_t n_per_row, int64_t blck_size_interleave);
20
-
21
11
  // Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
22
12
  size_t quantize_q4_0_4x4(const float * LM_GGML_RESTRICT src, void * LM_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
23
13
  size_t quantize_q4_0_4x8(const float * LM_GGML_RESTRICT src, void * LM_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
24
14
  size_t quantize_q4_0_8x8(const float * LM_GGML_RESTRICT src, void * LM_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
25
15
 
26
- // GEMV
27
- void lm_ggml_gemv_q4_0_4x4_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
28
- void lm_ggml_gemv_q4_0_4x8_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
29
- void lm_ggml_gemv_q4_0_8x8_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
30
-
31
- // GEMM
32
- void lm_ggml_gemm_q4_0_4x4_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
33
- void lm_ggml_gemm_q4_0_4x8_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
34
- void lm_ggml_gemm_q4_0_8x8_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
35
-
36
16
  #ifdef __cplusplus
37
17
  }
38
18
  #endif
package/cpp/ggml-alloc.c CHANGED
@@ -466,18 +466,12 @@ static bool lm_ggml_gallocr_is_own(lm_ggml_gallocr_t galloc, struct lm_ggml_tens
466
466
  return lm_ggml_gallocr_hash_get(galloc, t)->allocated;
467
467
  }
468
468
 
469
- static void lm_ggml_gallocr_set_node_offset(lm_ggml_gallocr_t galloc, struct lm_ggml_tensor * node, int buffer_id, size_t offset) {
470
- struct hash_node * hn = lm_ggml_gallocr_hash_get(galloc, node);
471
- hn->buffer_id = buffer_id;
472
- hn->offset = offset;
473
- hn->allocated = true;
474
- }
475
-
476
469
  static bool lm_ggml_gallocr_is_allocated(lm_ggml_gallocr_t galloc, struct lm_ggml_tensor * t) {
477
470
  return t->data != NULL || lm_ggml_gallocr_hash_get(galloc, t)->allocated;
478
471
  }
479
472
 
480
473
  static void lm_ggml_gallocr_allocate_node(lm_ggml_gallocr_t galloc, struct lm_ggml_tensor * node, int buffer_id) {
474
+ LM_GGML_ASSERT(buffer_id >= 0);
481
475
  struct hash_node * hn = lm_ggml_gallocr_hash_get(galloc, node);
482
476
 
483
477
  if (!lm_ggml_gallocr_is_allocated(galloc, node) && !lm_ggml_is_view(node)) {
@@ -816,7 +810,11 @@ static void lm_ggml_gallocr_init_tensor(lm_ggml_gallocr_t galloc, struct lm_ggml
816
810
  }
817
811
 
818
812
  static bool lm_ggml_gallocr_node_needs_realloc(lm_ggml_gallocr_t galloc, struct lm_ggml_tensor * node, struct tensor_alloc * talloc) {
819
- size_t node_size = (node->data || node->view_src) ? 0 : lm_ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
813
+ size_t node_size = 0;
814
+ if (!node->data && !node->view_src) {
815
+ LM_GGML_ASSERT(talloc->buffer_id >= 0); // prevent segfault when misusing the API
816
+ node_size = lm_ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
817
+ }
820
818
  return talloc->size_max >= node_size;
821
819
  }
822
820
 
@@ -8,6 +8,8 @@
8
8
  extern "C" {
9
9
  #endif
10
10
 
11
+ #define LM_GGML_BACKEND_API_VERSION 1
12
+
11
13
  //
12
14
  // Backend buffer type
13
15
  //
@@ -63,20 +65,20 @@ extern "C" {
63
65
  enum lm_ggml_backend_buffer_usage usage;
64
66
  };
65
67
 
66
- lm_ggml_backend_buffer_t lm_ggml_backend_buffer_init(
68
+ LM_GGML_API lm_ggml_backend_buffer_t lm_ggml_backend_buffer_init(
67
69
  lm_ggml_backend_buffer_type_t buft,
68
70
  struct lm_ggml_backend_buffer_i iface,
69
71
  void * context,
70
72
  size_t size);
71
73
 
72
74
  // do not use directly, use lm_ggml_backend_tensor_copy instead
73
- bool lm_ggml_backend_buffer_copy_tensor(const struct lm_ggml_tensor * src, struct lm_ggml_tensor * dst);
75
+ LM_GGML_API bool lm_ggml_backend_buffer_copy_tensor(const struct lm_ggml_tensor * src, struct lm_ggml_tensor * dst);
74
76
 
75
77
  // multi-buffer
76
78
  // buffer that contains a collection of buffers
77
- lm_ggml_backend_buffer_t lm_ggml_backend_multi_buffer_alloc_buffer(lm_ggml_backend_buffer_t * buffers, size_t n_buffers);
78
- bool lm_ggml_backend_buffer_is_multi_buffer(lm_ggml_backend_buffer_t buffer);
79
- void lm_ggml_backend_multi_buffer_set_usage(lm_ggml_backend_buffer_t buffer, enum lm_ggml_backend_buffer_usage usage);
79
+ LM_GGML_API lm_ggml_backend_buffer_t lm_ggml_backend_multi_buffer_alloc_buffer(lm_ggml_backend_buffer_t * buffers, size_t n_buffers);
80
+ LM_GGML_API bool lm_ggml_backend_buffer_is_multi_buffer(lm_ggml_backend_buffer_t buffer);
81
+ LM_GGML_API void lm_ggml_backend_multi_buffer_set_usage(lm_ggml_backend_buffer_t buffer, enum lm_ggml_backend_buffer_usage usage);
80
82
 
81
83
  //
82
84
  // Backend (stream)
@@ -199,17 +201,37 @@ extern "C" {
199
201
  };
200
202
 
201
203
  struct lm_ggml_backend_reg {
202
- // int api_version; // TODO: for dynamic loading
204
+ int api_version; // initialize to LM_GGML_BACKEND_API_VERSION
203
205
  struct lm_ggml_backend_reg_i iface;
204
206
  void * context;
205
207
  };
206
208
 
207
-
208
209
  // Internal backend registry API
209
- void lm_ggml_backend_register(lm_ggml_backend_reg_t reg);
210
- void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device);
211
- // TODO: backends can be loaded as a dynamic library, in which case it needs to export this function
212
- // typedef lm_ggml_backend_register_t * (*lm_ggml_backend_init)(void);
210
+ LM_GGML_API void lm_ggml_backend_register(lm_ggml_backend_reg_t reg);
211
+ LM_GGML_API void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device);
212
+
213
+ // Add backend dynamic loading support to the backend
214
+ typedef lm_ggml_backend_reg_t (*lm_ggml_backend_init_t)(void);
215
+
216
+ #ifdef LM_GGML_BACKEND_DL
217
+ #ifdef __cplusplus
218
+ # define LM_GGML_BACKEND_DL_IMPL(reg_fn) \
219
+ extern "C" { \
220
+ LM_GGML_BACKEND_API lm_ggml_backend_reg_t lm_ggml_backend_init(void); \
221
+ } \
222
+ lm_ggml_backend_reg_t lm_ggml_backend_init(void) { \
223
+ return reg_fn(); \
224
+ }
225
+ #else
226
+ # define LM_GGML_BACKEND_DL_IMPL(reg_fn) \
227
+ LM_GGML_BACKEND_API lm_ggml_backend_reg_t lm_ggml_backend_init(void); \
228
+ lm_ggml_backend_reg_t lm_ggml_backend_init(void) { \
229
+ return reg_fn(); \
230
+ }
231
+ #endif
232
+ #else
233
+ # define LM_GGML_BACKEND_DL_IMPL(reg_fn)
234
+ #endif
213
235
 
214
236
  #ifdef __cplusplus
215
237
  }
@@ -0,0 +1,423 @@
1
+ #include "ggml-backend-impl.h"
2
+ #include "ggml-backend.h"
3
+ #include "ggml-impl.h"
4
+ #include <algorithm>
5
+ #include <cstring>
6
+ #include <string>
7
+ #include <vector>
8
+
9
+ #ifdef _WIN32
10
+ # define WIN32_LEAN_AND_MEAN
11
+ # ifndef NOMINMAX
12
+ # define NOMINMAX
13
+ # endif
14
+ # include <windows.h>
15
+ #elif defined(__APPLE__)
16
+ # include <mach-o/dyld.h>
17
+ # include <dlfcn.h>
18
+ #else
19
+ # include <dlfcn.h>
20
+ # include <unistd.h>
21
+ #endif
22
+
23
+ // Backend registry
24
+ #ifdef LM_GGML_USE_CPU
25
+ #include "ggml-cpu.h"
26
+ #endif
27
+
28
+ #ifdef LM_GGML_USE_CUDA
29
+ #include "ggml-cuda.h"
30
+ #endif
31
+
32
+ #ifdef LM_GGML_USE_METAL
33
+ #include "ggml-metal.h"
34
+ #endif
35
+
36
+ #ifdef LM_GGML_USE_SYCL
37
+ #include "ggml-sycl.h"
38
+ #endif
39
+
40
+ #ifdef LM_GGML_USE_VULKAN
41
+ #include "ggml-vulkan.h"
42
+ #endif
43
+
44
+ #ifdef LM_GGML_USE_BLAS
45
+ #include "ggml-blas.h"
46
+ #endif
47
+
48
+ #ifdef LM_GGML_USE_RPC
49
+ #include "ggml-rpc.h"
50
+ #endif
51
+
52
+ #ifdef LM_GGML_USE_CANN
53
+ #include "ggml-cann.h"
54
+ #endif
55
+
56
+ #ifdef LM_GGML_USE_KOMPUTE
57
+ #include "ggml-kompute.h"
58
+ #endif
59
+
60
+ struct lm_ggml_backend_reg_entry {
61
+ lm_ggml_backend_reg_t reg;
62
+ void * handle;
63
+ };
64
+
65
+ struct lm_ggml_backend_registry {
66
+ std::vector<lm_ggml_backend_reg_entry> backends;
67
+ std::vector<lm_ggml_backend_dev_t> devices;
68
+
69
+ lm_ggml_backend_registry() {
70
+ #ifdef LM_GGML_USE_CUDA
71
+ register_backend(lm_ggml_backend_cuda_reg());
72
+ #endif
73
+ #ifdef LM_GGML_USE_METAL
74
+ register_backend(lm_ggml_backend_metal_reg());
75
+ #endif
76
+ #ifdef LM_GGML_USE_SYCL
77
+ register_backend(lm_ggml_backend_sycl_reg());
78
+ #endif
79
+ #ifdef LM_GGML_USE_VULKAN
80
+ register_backend(lm_ggml_backend_vk_reg());
81
+ #endif
82
+ #ifdef LM_GGML_USE_CANN
83
+ register_backend(lm_ggml_backend_cann_reg());
84
+ #endif
85
+ #ifdef LM_GGML_USE_BLAS
86
+ register_backend(lm_ggml_backend_blas_reg());
87
+ #endif
88
+ #ifdef LM_GGML_USE_RPC
89
+ register_backend(lm_ggml_backend_rpc_reg());
90
+ #endif
91
+ #ifdef LM_GGML_USE_KOMPUTE
92
+ register_backend(lm_ggml_backend_kompute_reg());
93
+ #endif
94
+ #ifdef LM_GGML_USE_CPU
95
+ register_backend(lm_ggml_backend_cpu_reg());
96
+ #endif
97
+ }
98
+
99
+ ~lm_ggml_backend_registry() {
100
+ while (!backends.empty()) {
101
+ // use silent since the log system may have been destroyed at this point
102
+ unload_backend(backends.back().reg, true);
103
+ }
104
+ }
105
+
106
+ void register_backend(lm_ggml_backend_reg_t reg, void * handle = nullptr) {
107
+ if (!reg) {
108
+ return;
109
+ }
110
+
111
+ #ifndef NDEBUG
112
+ LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
113
+ __func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
114
+ #endif
115
+ backends.push_back({ reg, handle });
116
+ for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
117
+ register_device(lm_ggml_backend_reg_dev_get(reg, i));
118
+ }
119
+ }
120
+
121
+ void register_device(lm_ggml_backend_dev_t device) {
122
+ #ifndef NDEBUG
123
+ LM_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, lm_ggml_backend_dev_name(device), lm_ggml_backend_dev_description(device));
124
+ #endif
125
+ devices.push_back(device);
126
+ }
127
+
128
+ lm_ggml_backend_reg_t load_backend(const char * path, bool silent) {
129
+ #ifdef _WIN32
130
+ // suppress error dialogs for missing DLLs
131
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
132
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
133
+
134
+ HMODULE handle = LoadLibraryA(path);
135
+
136
+ if (!handle) {
137
+ if (!silent) {
138
+ LM_GGML_LOG_ERROR("%s: failed to load %s: %lu\n", __func__, path, GetLastError());
139
+ }
140
+ SetErrorMode(old_mode);
141
+ return nullptr;
142
+ }
143
+
144
+ lm_ggml_backend_init_t backend_init = (lm_ggml_backend_init_t) GetProcAddress(handle, "lm_ggml_backend_init");
145
+
146
+ SetErrorMode(old_mode);
147
+
148
+ if (!backend_init) {
149
+ if (!silent) {
150
+ LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
151
+ }
152
+ FreeLibrary(handle);
153
+ return nullptr;
154
+ }
155
+ #else
156
+ void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
157
+
158
+ if (!handle) {
159
+ if (!silent) {
160
+ LM_GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
161
+ }
162
+ return nullptr;
163
+ }
164
+
165
+ auto * backend_init = (lm_ggml_backend_init_t) dlsym(handle, "lm_ggml_backend_init");
166
+
167
+ if (!backend_init) {
168
+ if (!silent) {
169
+ LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s: %s\n", __func__, path, dlerror());
170
+ }
171
+ dlclose(handle);
172
+ return nullptr;
173
+ }
174
+ #endif
175
+ lm_ggml_backend_reg_t reg = backend_init();
176
+
177
+ if (!reg || reg->api_version != LM_GGML_BACKEND_API_VERSION) {
178
+ if (!silent) {
179
+ if (!reg) {
180
+ LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n", __func__, path);
181
+ } else {
182
+ LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
183
+ __func__, path, reg->api_version, LM_GGML_BACKEND_API_VERSION);
184
+ }
185
+ }
186
+ #ifdef _WIN32
187
+ FreeLibrary(handle);
188
+ #else
189
+ dlclose(handle);
190
+ #endif
191
+ return nullptr;
192
+ }
193
+
194
+ LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), path);
195
+ register_backend(reg, handle);
196
+ return reg;
197
+ }
198
+
199
+ void unload_backend(lm_ggml_backend_reg_t reg, bool silent) {
200
+ auto it = std::find_if(backends.begin(), backends.end(),
201
+ [reg](lm_ggml_backend_reg_entry entry) { return entry.reg == reg; });
202
+
203
+ if (it == backends.end()) {
204
+ if (!silent) {
205
+ LM_GGML_LOG_ERROR("%s: backend not found\n", __func__);
206
+ }
207
+ return;
208
+ }
209
+
210
+ if (!silent) {
211
+ LM_GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, lm_ggml_backend_reg_name(reg));
212
+ }
213
+
214
+ // remove devices
215
+ devices.erase(
216
+ std::remove_if(devices.begin(), devices.end(),
217
+ [reg](lm_ggml_backend_dev_t dev) { return lm_ggml_backend_dev_backend_reg(dev) == reg; }),
218
+ devices.end());
219
+
220
+ // unload library
221
+ if (it->handle) {
222
+ #ifdef _WIN32
223
+ FreeLibrary((HMODULE) it->handle);
224
+ #else
225
+ dlclose(it->handle);
226
+ #endif
227
+ }
228
+
229
+ // remove backend
230
+ backends.erase(it);
231
+ }
232
+ };
233
+
234
+ static lm_ggml_backend_registry & get_reg() {
235
+ static lm_ggml_backend_registry reg;
236
+ return reg;
237
+ }
238
+
239
+ // Internal API
240
+ void lm_ggml_backend_register(lm_ggml_backend_reg_t reg) {
241
+ get_reg().register_backend(reg);
242
+ }
243
+
244
+ void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device) {
245
+ get_reg().register_device(device);
246
+ }
247
+
248
+ // Backend (reg) enumeration
249
+ static bool striequals(const char * a, const char * b) {
250
+ for (; *a && *b; a++, b++) {
251
+ if (std::tolower(*a) != std::tolower(*b)) {
252
+ return false;
253
+ }
254
+ }
255
+ return *a == *b;
256
+ }
257
+
258
+ size_t lm_ggml_backend_reg_count() {
259
+ return get_reg().backends.size();
260
+ }
261
+
262
+ lm_ggml_backend_reg_t lm_ggml_backend_reg_get(size_t index) {
263
+ LM_GGML_ASSERT(index < lm_ggml_backend_reg_count());
264
+ return get_reg().backends[index].reg;
265
+ }
266
+
267
+ lm_ggml_backend_reg_t lm_ggml_backend_reg_by_name(const char * name) {
268
+ for (size_t i = 0; i < lm_ggml_backend_reg_count(); i++) {
269
+ lm_ggml_backend_reg_t reg = lm_ggml_backend_reg_get(i);
270
+ if (striequals(lm_ggml_backend_reg_name(reg), name)) {
271
+ return reg;
272
+ }
273
+ }
274
+ return nullptr;
275
+ }
276
+
277
+ // Device enumeration
278
+ size_t lm_ggml_backend_dev_count() {
279
+ return get_reg().devices.size();
280
+ }
281
+
282
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_get(size_t index) {
283
+ LM_GGML_ASSERT(index < lm_ggml_backend_dev_count());
284
+ return get_reg().devices[index];
285
+ }
286
+
287
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_by_name(const char * name) {
288
+ for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
289
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
290
+ if (striequals(lm_ggml_backend_dev_name(dev), name)) {
291
+ return dev;
292
+ }
293
+ }
294
+ return nullptr;
295
+ }
296
+
297
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_by_type(enum lm_ggml_backend_dev_type type) {
298
+ for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
299
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
300
+ if (lm_ggml_backend_dev_type(dev) == type) {
301
+ return dev;
302
+ }
303
+ }
304
+ return nullptr;
305
+ }
306
+
307
+ // Convenience functions
308
+ lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params) {
309
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_name(name);
310
+ if (!dev) {
311
+ return nullptr;
312
+ }
313
+ return lm_ggml_backend_dev_init(dev, params);
314
+ }
315
+
316
+ lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params) {
317
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(type);
318
+ if (!dev) {
319
+ return nullptr;
320
+ }
321
+ return lm_ggml_backend_dev_init(dev, params);
322
+ }
323
+
324
+ lm_ggml_backend_t lm_ggml_backend_init_best(void) {
325
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_GPU);
326
+ if (!dev) {
327
+ dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
328
+ }
329
+ if (!dev) {
330
+ return nullptr;
331
+ }
332
+ return lm_ggml_backend_dev_init(dev, nullptr);
333
+ }
334
+
335
+ // Dynamic loading
336
+ lm_ggml_backend_reg_t lm_ggml_backend_load(const char * path) {
337
+ return get_reg().load_backend(path, false);
338
+ }
339
+
340
+ void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg) {
341
+ get_reg().unload_backend(reg, true);
342
+ }
343
+
344
+ void lm_ggml_backend_load_all() {
345
+ std::vector<std::string> search_prefix;
346
+
347
+ // add the executable directory to the search path
348
+ // FIXME: this is convenient for development, but it should probably be disabled in production
349
+
350
+ #if defined(__APPLE__)
351
+ // get executable path
352
+ std::vector<char> path;
353
+ uint32_t size;
354
+ while (true) {
355
+ size = path.size();
356
+ if (_NSGetExecutablePath(path.data(), &size) == 0) {
357
+ break;
358
+ }
359
+ path.resize(size);
360
+ }
361
+ std::string base_path(path.data(), size);
362
+ // remove executable name
363
+ auto last_slash = base_path.find_last_of('/');
364
+ if (last_slash != std::string::npos) {
365
+ base_path = base_path.substr(0, last_slash);
366
+ }
367
+ search_prefix.push_back(base_path + "/");
368
+ #elif defined(__linux__)
369
+ std::string base_path = ".";
370
+ std::vector<char> path(1024);
371
+ while (true) {
372
+ // get executable path
373
+ ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
374
+ if (len == -1) {
375
+ break;
376
+ }
377
+ if (len < (ssize_t) path.size()) {
378
+ base_path = std::string(path.data(), len);
379
+ // remove executable name
380
+ auto last_slash = base_path.find_last_of('/');
381
+ if (last_slash != std::string::npos) {
382
+ base_path = base_path.substr(0, last_slash);
383
+ }
384
+ break;
385
+ }
386
+ path.resize(path.size() * 2);
387
+ }
388
+
389
+ search_prefix.push_back(base_path + "/");
390
+ #endif
391
+
392
+ auto & reg = get_reg();
393
+
394
+ auto try_load = [&](const std::string & name) {
395
+ std::string os_name;
396
+ #ifdef _WIN32
397
+ os_name = "ggml-" + name + ".dll";
398
+ #else
399
+ os_name = "libggml-" + name + ".so";
400
+ #endif
401
+ if (reg.load_backend(os_name.c_str(), true)) {
402
+ return;
403
+ }
404
+ for (const auto & prefix : search_prefix) {
405
+ if (reg.load_backend((prefix + os_name).c_str(), true)) {
406
+ return;
407
+ }
408
+ }
409
+ };
410
+
411
+ try_load("amx");
412
+ try_load("blas");
413
+ try_load("cann");
414
+ try_load("cuda");
415
+ try_load("hip");
416
+ try_load("kompute");
417
+ try_load("metal");
418
+ try_load("rpc");
419
+ try_load("sycl");
420
+ try_load("vulkan");
421
+ try_load("musa");
422
+ try_load("cpu");
423
+ }