@fugood/llama.node 0.4.7 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +4 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/lib/binding.ts +66 -6
- package/lib/index.js +59 -17
- package/lib/index.ts +74 -23
- package/package.json +1 -1
- package/src/DecodeAudioTokenWorker.cpp +40 -0
- package/src/DecodeAudioTokenWorker.h +22 -0
- package/src/EmbeddingWorker.cpp +7 -5
- package/src/LlamaCompletionWorker.cpp +68 -54
- package/src/LlamaCompletionWorker.h +7 -8
- package/src/LlamaContext.cpp +551 -235
- package/src/LlamaContext.h +26 -4
- package/src/LoadSessionWorker.cpp +4 -2
- package/src/SaveSessionWorker.cpp +10 -6
- package/src/TokenizeWorker.cpp +23 -14
- package/src/TokenizeWorker.h +2 -2
- package/src/addons.cc +8 -11
- package/src/common.hpp +129 -126
- package/src/llama.cpp/.github/workflows/build.yml +2 -2
- package/src/llama.cpp/.github/workflows/release.yml +152 -129
- package/src/llama.cpp/.github/workflows/winget.yml +42 -0
- package/src/llama.cpp/common/arg.cpp +14 -13
- package/src/llama.cpp/common/common.cpp +4 -75
- package/src/llama.cpp/common/common.h +7 -12
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
- package/src/llama.cpp/examples/simple/simple.cpp +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
- package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
- package/src/llama.cpp/ggml/include/ggml.h +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
- package/src/llama.cpp/ggml/src/ggml.c +64 -18
- package/src/llama.cpp/include/llama.h +24 -124
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/src/llama-batch.cpp +3 -1
- package/src/llama.cpp/src/llama-context.cpp +60 -110
- package/src/llama.cpp/src/llama-graph.cpp +137 -233
- package/src/llama.cpp/src/llama-graph.h +49 -7
- package/src/llama.cpp/src/llama-hparams.cpp +17 -1
- package/src/llama.cpp/src/llama-hparams.h +34 -5
- package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
- package/src/llama.cpp/src/llama-kv-cache.h +201 -85
- package/src/llama.cpp/src/llama-memory.h +3 -2
- package/src/llama.cpp/src/llama-model.cpp +273 -94
- package/src/llama.cpp/src/llama-model.h +4 -1
- package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
- package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
- package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
- package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
- package/src/llama.cpp/tools/mtmd/clip.h +6 -4
- package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
- package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
- package/src/llama.cpp/tools/run/run.cpp +2 -2
- package/src/llama.cpp/tools/server/server.cpp +158 -47
- package/src/llama.cpp/tools/server/utils.hpp +71 -43
- package/src/llama.cpp/tools/tts/tts.cpp +4 -2
- package/src/tts_utils.cpp +342 -0
- package/src/tts_utils.h +62 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
|
@@ -27,6 +27,7 @@
|
|
|
27
27
|
#include <cmath>
|
|
28
28
|
#include <memory>
|
|
29
29
|
#include <charconv>
|
|
30
|
+
#include <mutex>
|
|
30
31
|
|
|
31
32
|
#undef MIN
|
|
32
33
|
#undef MAX
|
|
@@ -74,6 +75,7 @@ struct ggml_cl_version {
|
|
|
74
75
|
cl_uint minor = 0;
|
|
75
76
|
};
|
|
76
77
|
|
|
78
|
+
|
|
77
79
|
struct ggml_cl_compiler_version {
|
|
78
80
|
ADRENO_CL_COMPILER_TYPE type;
|
|
79
81
|
int major = -1;
|
|
@@ -91,6 +93,14 @@ struct ggml_cl_compiler_version {
|
|
|
91
93
|
}
|
|
92
94
|
};
|
|
93
95
|
|
|
96
|
+
static size_t align_to(size_t value, size_t to_alignment) {
|
|
97
|
+
GGML_ASSERT(to_alignment && "Invalid alignment (must be non-zero)");
|
|
98
|
+
GGML_ASSERT((to_alignment & (to_alignment - 1)) == 0 && "to_alignment must be power-of-two");
|
|
99
|
+
|
|
100
|
+
return ((value + to_alignment - 1) / to_alignment) * to_alignment;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
94
104
|
// Parses a version string of form "XX.YY ". On an error returns ggml_cl_version with all zeroes.
|
|
95
105
|
static ggml_cl_version parse_cl_version(std::string_view str) {
|
|
96
106
|
size_t major_str_begin = 0;
|
|
@@ -221,13 +231,25 @@ static ggml_cl_compiler_version get_adreno_cl_compiler_version(const char *drive
|
|
|
221
231
|
return { type, major, minor, patch };
|
|
222
232
|
}
|
|
223
233
|
|
|
234
|
+
struct ggml_backend_opencl_context;
|
|
235
|
+
|
|
224
236
|
// backend device context
|
|
225
237
|
struct ggml_backend_opencl_device_context {
|
|
226
238
|
cl_platform_id platform;
|
|
227
239
|
std::string platform_name;
|
|
228
240
|
|
|
229
|
-
cl_device_id
|
|
230
|
-
std::string
|
|
241
|
+
cl_device_id device;
|
|
242
|
+
std::string device_name;
|
|
243
|
+
cl_device_type device_type;
|
|
244
|
+
std::string device_version;
|
|
245
|
+
|
|
246
|
+
// Initialized by ggml_cl2_init().
|
|
247
|
+
ggml_backend_opencl_context * backend_ctx = nullptr;
|
|
248
|
+
|
|
249
|
+
// Initialized by ggml_backend_opencl_device_get_buffer_type()
|
|
250
|
+
ggml_backend_buffer_type buffer_type;
|
|
251
|
+
|
|
252
|
+
cl_context context = nullptr;
|
|
231
253
|
};
|
|
232
254
|
|
|
233
255
|
// backend context
|
|
@@ -248,6 +270,8 @@ struct ggml_backend_opencl_context {
|
|
|
248
270
|
|
|
249
271
|
int adreno_wave_size;
|
|
250
272
|
|
|
273
|
+
cl_bool non_uniform_workgroups;
|
|
274
|
+
|
|
251
275
|
cl_context context;
|
|
252
276
|
cl_command_queue queue;
|
|
253
277
|
|
|
@@ -344,15 +368,8 @@ struct ggml_backend_opencl_context {
|
|
|
344
368
|
#endif // GGML_OPENCL_USE_ADRENO_KERNELS
|
|
345
369
|
};
|
|
346
370
|
|
|
347
|
-
|
|
348
|
-
static
|
|
349
|
-
/*.platform =*/ nullptr,
|
|
350
|
-
/*.platform_nane =*/ "",
|
|
351
|
-
/*.device =*/ nullptr,
|
|
352
|
-
/*.device_name =*/ "",
|
|
353
|
-
};
|
|
354
|
-
|
|
355
|
-
static int ggml_backend_opencl_n_devices = 0;
|
|
371
|
+
// All registered devices with a default device in the front.
|
|
372
|
+
static std::vector<ggml_backend_device> g_ggml_backend_opencl_devices;
|
|
356
373
|
|
|
357
374
|
// Profiling
|
|
358
375
|
#ifdef GGML_OPENCL_PROFILING
|
|
@@ -1107,25 +1124,19 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
|
|
|
1107
1124
|
GGML_LOG_CONT("\n");
|
|
1108
1125
|
}
|
|
1109
1126
|
|
|
1110
|
-
static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
1111
|
-
static bool initialized = false;
|
|
1112
|
-
static ggml_backend_opencl_context *backend_ctx = nullptr;
|
|
1127
|
+
// XXX static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
1128
|
+
// XXX static bool initialized = false;
|
|
1129
|
+
// XXX static ggml_backend_opencl_context *backend_ctx = nullptr;
|
|
1113
1130
|
|
|
1114
|
-
|
|
1115
|
-
return backend_ctx;
|
|
1116
|
-
}
|
|
1131
|
+
static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev);
|
|
1117
1132
|
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
GGML_ASSERT(dev_ctx->device == nullptr);
|
|
1122
|
-
GGML_ASSERT(backend_ctx == nullptr);
|
|
1123
|
-
|
|
1124
|
-
initialized = true;
|
|
1125
|
-
backend_ctx = new ggml_backend_opencl_context();
|
|
1126
|
-
backend_ctx->gpu_family = GPU_FAMILY::UNKNOWN;
|
|
1133
|
+
namespace /* anonymous */ {
|
|
1134
|
+
extern struct ggml_backend_device_i ggml_backend_opencl_device_i;
|
|
1135
|
+
}
|
|
1127
1136
|
|
|
1128
|
-
|
|
1137
|
+
// Look for available and suitable devices.
|
|
1138
|
+
static std::vector<ggml_backend_device> ggml_opencl_probe_devices(ggml_backend_reg * reg) {
|
|
1139
|
+
std::vector<ggml_backend_device> found_devices;
|
|
1129
1140
|
|
|
1130
1141
|
#ifdef GGML_OPENCL_PROFILING
|
|
1131
1142
|
GGML_LOG_INFO("ggml_opencl: OpenCL profiling enabled\n");
|
|
@@ -1158,11 +1169,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
|
1158
1169
|
struct cl_device devices[NDEV];
|
|
1159
1170
|
unsigned n_devices = 0;
|
|
1160
1171
|
struct cl_device * default_device = NULL;
|
|
1172
|
+
unsigned default_platform_number = 0;
|
|
1161
1173
|
|
|
1162
1174
|
cl_platform_id platform_ids[NPLAT];
|
|
1163
1175
|
if (clGetPlatformIDs(NPLAT, platform_ids, &n_platforms) != CL_SUCCESS) {
|
|
1164
1176
|
GGML_LOG_ERROR("ggml_opencl: plaform IDs not available.\n");
|
|
1165
|
-
return
|
|
1177
|
+
return found_devices;
|
|
1166
1178
|
}
|
|
1167
1179
|
|
|
1168
1180
|
for (unsigned i = 0; i < n_platforms; i++) {
|
|
@@ -1197,19 +1209,22 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
|
1197
1209
|
}
|
|
1198
1210
|
|
|
1199
1211
|
if (default_device == NULL && p->default_device != NULL) {
|
|
1200
|
-
default_device
|
|
1212
|
+
default_device = p->default_device;
|
|
1213
|
+
default_platform_number = i;
|
|
1201
1214
|
}
|
|
1202
1215
|
}
|
|
1203
1216
|
|
|
1204
1217
|
if (n_devices == 0) {
|
|
1205
1218
|
GGML_LOG_ERROR("ggml_opencl: could find any OpenCL devices.\n");
|
|
1206
|
-
return
|
|
1219
|
+
return found_devices;
|
|
1207
1220
|
}
|
|
1208
1221
|
|
|
1209
|
-
char *
|
|
1210
|
-
char *
|
|
1211
|
-
int
|
|
1212
|
-
int
|
|
1222
|
+
char * user_platform_string = getenv("GGML_OPENCL_PLATFORM");
|
|
1223
|
+
char * user_device_string = getenv("GGML_OPENCL_DEVICE");
|
|
1224
|
+
int user_platform_number = -1;
|
|
1225
|
+
int user_device_number = -1;
|
|
1226
|
+
cl_device * candidate_devices = nullptr;
|
|
1227
|
+
unsigned n_candidate_devices = 0;
|
|
1213
1228
|
|
|
1214
1229
|
unsigned n;
|
|
1215
1230
|
if (user_platform_string != NULL && sscanf(user_platform_string, " %u", &n) == 1 && n < n_platforms) {
|
|
@@ -1224,12 +1239,11 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
|
1224
1239
|
GGML_LOG_ERROR("ggml_opencl: invalid device number %d\n", user_device_number);
|
|
1225
1240
|
exit(1);
|
|
1226
1241
|
}
|
|
1227
|
-
default_device
|
|
1242
|
+
default_device = &platform->devices[user_device_number];
|
|
1243
|
+
candidate_devices = platform->devices;
|
|
1244
|
+
n_candidate_devices = platform->n_devices;
|
|
1228
1245
|
} else {
|
|
1229
|
-
|
|
1230
|
-
struct cl_device * selected_devices = devices;
|
|
1231
|
-
unsigned n_selected_devices = n_devices;
|
|
1232
|
-
|
|
1246
|
+
// Choose a platform by matching a substring.
|
|
1233
1247
|
if (user_platform_number == -1 && user_platform_string != NULL && user_platform_string[0] != 0) {
|
|
1234
1248
|
for (unsigned i = 0; i < n_platforms; i++) {
|
|
1235
1249
|
struct cl_platform * p = &platforms[i];
|
|
@@ -1244,20 +1258,20 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
|
1244
1258
|
exit(1);
|
|
1245
1259
|
}
|
|
1246
1260
|
}
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1261
|
+
|
|
1262
|
+
int platform_idx = user_platform_number != -1 ? user_platform_number : default_platform_number;
|
|
1263
|
+
struct cl_platform * p = &platforms[platform_idx];
|
|
1264
|
+
candidate_devices = p->devices;
|
|
1265
|
+
n_candidate_devices = p->n_devices;
|
|
1266
|
+
default_device = p->default_device;
|
|
1267
|
+
if (n_candidate_devices == 0) {
|
|
1268
|
+
GGML_LOG_ERROR("ggml_opencl: selected platform '%s' does not have any devices.\n", p->name);
|
|
1269
|
+
exit(1);
|
|
1256
1270
|
}
|
|
1257
1271
|
|
|
1258
1272
|
if (user_device_number == -1 && user_device_string != NULL && user_device_string[0] != 0) {
|
|
1259
|
-
for (unsigned i = 0; i <
|
|
1260
|
-
struct cl_device * d = &
|
|
1273
|
+
for (unsigned i = 0; i < n_candidate_devices; i++) {
|
|
1274
|
+
struct cl_device * d = &candidate_devices[i];
|
|
1261
1275
|
if (strstr(d->name, user_device_string) != NULL) {
|
|
1262
1276
|
user_device_number = d->number;
|
|
1263
1277
|
break;
|
|
@@ -1269,71 +1283,145 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
|
1269
1283
|
}
|
|
1270
1284
|
}
|
|
1271
1285
|
if (user_device_number != -1) {
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
default_device
|
|
1286
|
+
candidate_devices = &devices[user_device_number];
|
|
1287
|
+
n_candidate_devices = 1;
|
|
1288
|
+
default_device = &candidate_devices[0];
|
|
1275
1289
|
}
|
|
1276
1290
|
|
|
1277
|
-
GGML_ASSERT(
|
|
1291
|
+
GGML_ASSERT(n_candidate_devices > 0);
|
|
1278
1292
|
|
|
1279
1293
|
if (default_device == NULL) {
|
|
1280
|
-
default_device = &
|
|
1294
|
+
default_device = &candidate_devices[0];
|
|
1281
1295
|
}
|
|
1282
1296
|
}
|
|
1283
1297
|
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1298
|
+
GGML_ASSERT(n_candidate_devices != 0 && candidate_devices);
|
|
1299
|
+
|
|
1300
|
+
// Put the default device in front.
|
|
1301
|
+
for (unsigned i = 1; i < n_candidate_devices; i++) {
|
|
1302
|
+
if (&candidate_devices[i] == default_device) {
|
|
1303
|
+
std::swap(candidate_devices[0], candidate_devices[i]);
|
|
1304
|
+
default_device = &candidate_devices[0];
|
|
1305
|
+
break;
|
|
1306
|
+
}
|
|
1288
1307
|
}
|
|
1289
1308
|
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1309
|
+
GGML_LOG_INFO("ggml_opencl: selected platform: '%s'\n", default_device->platform->name);
|
|
1310
|
+
|
|
1311
|
+
std::vector<cl_device_id> device_ids;
|
|
1312
|
+
for (auto dev = candidate_devices, dev_end = candidate_devices + n_candidate_devices; dev != dev_end; dev++) {
|
|
1313
|
+
device_ids.push_back(dev->id);
|
|
1314
|
+
}
|
|
1293
1315
|
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1316
|
+
cl_int err;
|
|
1317
|
+
cl_context shared_context;
|
|
1318
|
+
cl_context_properties properties[] = { (intptr_t) CL_CONTEXT_PLATFORM, (intptr_t) default_device->platform->id, 0 };
|
|
1319
|
+
|
|
1320
|
+
CL_CHECK(
|
|
1321
|
+
(shared_context = clCreateContext(properties, device_ids.size(), device_ids.data(), NULL, NULL, &err), err));
|
|
1322
|
+
|
|
1323
|
+
for (auto dev = candidate_devices, dev_end = candidate_devices + n_candidate_devices; dev != dev_end; dev++) {
|
|
1324
|
+
GGML_LOG_INFO("\nggml_opencl: device: '%s (%s)'\n", dev->name, dev->version);
|
|
1325
|
+
|
|
1326
|
+
auto dev_ctx = std::unique_ptr<ggml_backend_opencl_device_context>(new ggml_backend_opencl_device_context{
|
|
1327
|
+
/*.platform =*/dev->platform->id,
|
|
1328
|
+
/*.platform_nane =*/dev->platform->name,
|
|
1329
|
+
/*.device =*/dev->id,
|
|
1330
|
+
/*.device_name =*/dev->name,
|
|
1331
|
+
/*.device_type =*/dev->type,
|
|
1332
|
+
/*.device_version =*/dev->version,
|
|
1333
|
+
/*.backend_ctx =*/nullptr,
|
|
1334
|
+
/*.buffer_type =*/{},
|
|
1335
|
+
/*.context =*/shared_context,
|
|
1336
|
+
});
|
|
1337
|
+
|
|
1338
|
+
found_devices.push_back(ggml_backend_device{
|
|
1339
|
+
/* .iface = */ ggml_backend_opencl_device_i,
|
|
1340
|
+
/* .reg = */ reg,
|
|
1341
|
+
/* .context = */ dev_ctx.get(),
|
|
1342
|
+
});
|
|
1343
|
+
|
|
1344
|
+
if (!ggml_cl2_init(&found_devices.back())) {
|
|
1345
|
+
found_devices.pop_back();
|
|
1346
|
+
GGML_LOG_INFO("ggml_opencl: drop unsupported device.\n");
|
|
1347
|
+
continue;
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
dev_ctx.release();
|
|
1351
|
+
}
|
|
1352
|
+
|
|
1353
|
+
if (found_devices.size()) {
|
|
1354
|
+
auto * dev_ctx = static_cast<ggml_backend_opencl_device_context *>(found_devices.front().context);
|
|
1355
|
+
GGML_LOG_INFO("ggml_opencl: default device: '%s (%s)'\n", dev_ctx->device_name.c_str(),
|
|
1356
|
+
dev_ctx->device_version.c_str());
|
|
1357
|
+
|
|
1358
|
+
if (dev_ctx->device_type != CL_DEVICE_TYPE_GPU) {
|
|
1359
|
+
GGML_LOG_WARN("ggml_opencl: warning, the default device is not a GPU: '%s'.\n",
|
|
1360
|
+
dev_ctx->device_name.c_str());
|
|
1361
|
+
}
|
|
1362
|
+
}
|
|
1363
|
+
|
|
1364
|
+
return found_devices;
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
// Initialize device if it is supported (returns nullptr if it is not).
|
|
1368
|
+
static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
1369
|
+
GGML_ASSERT(dev);
|
|
1370
|
+
GGML_ASSERT(dev->context);
|
|
1371
|
+
|
|
1372
|
+
ggml_backend_opencl_device_context * dev_ctx = (ggml_backend_opencl_device_context *) dev->context;
|
|
1373
|
+
GGML_ASSERT(dev_ctx->platform);
|
|
1374
|
+
GGML_ASSERT(dev_ctx->device);
|
|
1375
|
+
|
|
1376
|
+
if (dev_ctx->backend_ctx) {
|
|
1377
|
+
return dev_ctx->backend_ctx;
|
|
1378
|
+
}
|
|
1379
|
+
|
|
1380
|
+
auto backend_ctx = std::make_unique<ggml_backend_opencl_context>();
|
|
1381
|
+
backend_ctx->device = dev_ctx->device;
|
|
1382
|
+
backend_ctx->gpu_family = GPU_FAMILY::UNKNOWN;
|
|
1383
|
+
|
|
1384
|
+
if (strstr(dev_ctx->device_name.c_str(), "Adreno") ||
|
|
1385
|
+
strstr(dev_ctx->device_name.c_str(), "Qualcomm") ||
|
|
1386
|
+
strstr(dev_ctx->device_version.c_str(), "Adreno")) {
|
|
1297
1387
|
backend_ctx->gpu_family = GPU_FAMILY::ADRENO;
|
|
1298
1388
|
// Usually device version contains the detailed device name
|
|
1299
|
-
backend_ctx->adreno_gen = get_adreno_gpu_gen(
|
|
1389
|
+
backend_ctx->adreno_gen = get_adreno_gpu_gen(dev_ctx->device_version.c_str());
|
|
1300
1390
|
if (backend_ctx->adreno_gen == ADRENO_GPU_GEN::ADRENO_UNKNOWN) {
|
|
1301
|
-
backend_ctx->adreno_gen = get_adreno_gpu_gen(
|
|
1391
|
+
backend_ctx->adreno_gen = get_adreno_gpu_gen(dev_ctx->device_name.c_str());
|
|
1302
1392
|
}
|
|
1303
1393
|
|
|
1304
1394
|
// Use wave size of 64 for all Adreno GPUs.
|
|
1305
1395
|
backend_ctx->adreno_wave_size = 64;
|
|
1306
|
-
} else if (strstr(
|
|
1396
|
+
} else if (strstr(dev_ctx->device_name.c_str(), "Intel")) {
|
|
1307
1397
|
backend_ctx->gpu_family = GPU_FAMILY::INTEL;
|
|
1308
1398
|
} else {
|
|
1309
|
-
GGML_LOG_ERROR("Unsupported GPU: %s\n",
|
|
1399
|
+
GGML_LOG_ERROR("Unsupported GPU: %s\n", dev_ctx->device_name.c_str());
|
|
1310
1400
|
backend_ctx->gpu_family = GPU_FAMILY::UNKNOWN;
|
|
1311
|
-
return
|
|
1401
|
+
return nullptr;
|
|
1312
1402
|
}
|
|
1313
1403
|
|
|
1314
1404
|
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
|
|
1315
1405
|
if (backend_ctx->gpu_family != GPU_FAMILY::ADRENO) {
|
|
1316
1406
|
GGML_LOG_ERROR("ggml_opencl: Adreno-specific kernels should not be enabled for non-Adreno GPUs; "
|
|
1317
1407
|
"run on an Adreno GPU or recompile with CMake option `-DGGML_OPENCL_USE_ADRENO_KERNELS=OFF`\n");
|
|
1318
|
-
return
|
|
1408
|
+
return nullptr;
|
|
1319
1409
|
}
|
|
1320
1410
|
#endif
|
|
1321
1411
|
|
|
1322
1412
|
// Populate backend device name
|
|
1323
|
-
|
|
1324
|
-
dev_ctx->device_name = default_device->name;
|
|
1325
|
-
backend_ctx->device_name = default_device->name;
|
|
1413
|
+
backend_ctx->device_name = dev_ctx->device_name;
|
|
1326
1414
|
|
|
1327
1415
|
// A local ref of cl_device_id for convenience
|
|
1328
1416
|
cl_device_id device = backend_ctx->device;
|
|
1329
1417
|
|
|
1330
|
-
ggml_cl_version platform_version = get_opencl_platform_version(
|
|
1418
|
+
ggml_cl_version platform_version = get_opencl_platform_version(dev_ctx->platform);
|
|
1331
1419
|
|
|
1332
1420
|
// Check device OpenCL version, OpenCL 2.0 or above is required
|
|
1333
1421
|
ggml_cl_version opencl_c_version = get_opencl_c_version(platform_version, device);
|
|
1334
1422
|
if (opencl_c_version.major < 2) {
|
|
1335
1423
|
GGML_LOG_ERROR("ggml_opencl: OpenCL 2.0 or above is required\n");
|
|
1336
|
-
return
|
|
1424
|
+
return nullptr;
|
|
1337
1425
|
}
|
|
1338
1426
|
|
|
1339
1427
|
// Check driver version
|
|
@@ -1364,7 +1452,7 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
|
1364
1452
|
// fp16 is required
|
|
1365
1453
|
if (!backend_ctx->fp16_support) {
|
|
1366
1454
|
GGML_LOG_ERROR("ggml_opencl: device does not support FP16\n");
|
|
1367
|
-
return
|
|
1455
|
+
return nullptr;
|
|
1368
1456
|
}
|
|
1369
1457
|
|
|
1370
1458
|
// If OpenCL 3.0 is supported, then check for cl_khr_subgroups, which becomes
|
|
@@ -1373,7 +1461,7 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
|
1373
1461
|
strstr(ext_buffer, "cl_intel_subgroups") == NULL) {
|
|
1374
1462
|
GGML_LOG_ERROR("ggml_opencl: device does not support subgroups (cl_khr_subgroups or cl_intel_subgroups) "
|
|
1375
1463
|
"(note that subgroups is an optional feature in OpenCL 3.0)\n");
|
|
1376
|
-
return
|
|
1464
|
+
return nullptr;
|
|
1377
1465
|
}
|
|
1378
1466
|
|
|
1379
1467
|
cl_uint base_align_in_bits;
|
|
@@ -1397,6 +1485,15 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
|
1397
1485
|
GGML_LOG_INFO("ggml_opencl: SVM atomics support: %s\n",
|
|
1398
1486
|
svm_caps & CL_DEVICE_SVM_ATOMICS ? "true" : "false");
|
|
1399
1487
|
|
|
1488
|
+
if (opencl_c_version.major >= 3) {
|
|
1489
|
+
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool),
|
|
1490
|
+
&backend_ctx->non_uniform_workgroups, 0));
|
|
1491
|
+
} else {
|
|
1492
|
+
GGML_ASSERT(opencl_c_version.major == 2);
|
|
1493
|
+
// Non-uniform workgroup sizes is mandatory feature in v2.x.
|
|
1494
|
+
backend_ctx->non_uniform_workgroups = true;
|
|
1495
|
+
}
|
|
1496
|
+
|
|
1400
1497
|
// Print out configurations
|
|
1401
1498
|
#ifdef GGML_OPENCL_SOA_Q
|
|
1402
1499
|
GGML_LOG_INFO("ggml_opencl: flattening quantized weights representation as struct of arrays (GGML_OPENCL_SOA_Q)\n");
|
|
@@ -1406,14 +1503,10 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
|
1406
1503
|
GGML_LOG_INFO("ggml_opencl: using kernels optimized for Adreno (GGML_OPENCL_USE_ADRENO_KERNELS)\n");
|
|
1407
1504
|
#endif // GGML_OPENCL_USE_ADRENO_KERNELS
|
|
1408
1505
|
|
|
1409
|
-
|
|
1410
|
-
(intptr_t)CL_CONTEXT_PLATFORM, (intptr_t)dev_ctx->platform, 0
|
|
1411
|
-
};
|
|
1412
|
-
|
|
1413
|
-
CL_CHECK((backend_ctx->context = clCreateContext(properties, 1, &device, NULL, NULL, &err), err));
|
|
1506
|
+
cl_int err;
|
|
1414
1507
|
|
|
1415
1508
|
// A local ref of cl_context for convenience
|
|
1416
|
-
cl_context context = backend_ctx->context;
|
|
1509
|
+
cl_context context = backend_ctx->context = dev_ctx->context;
|
|
1417
1510
|
|
|
1418
1511
|
//CL_CHECK((queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err),
|
|
1419
1512
|
// (err != CL_INVALID_QUEUE_PROPERTIES && err != CL_INVALID_VALUE ? err :
|
|
@@ -1426,7 +1519,7 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
|
1426
1519
|
CL_CHECK((backend_ctx->queue = clCreateCommandQueue(context, device, command_queue_props, &err), err));
|
|
1427
1520
|
|
|
1428
1521
|
// Load kernels
|
|
1429
|
-
load_cl_kernels(backend_ctx, opencl_c_version);
|
|
1522
|
+
load_cl_kernels(backend_ctx.get(), opencl_c_version);
|
|
1430
1523
|
|
|
1431
1524
|
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
|
|
1432
1525
|
// Allocate intermediate buffers and images
|
|
@@ -1456,10 +1549,8 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
|
1456
1549
|
CL_CHECK((backend_ctx->B_d_max = clCreateBuffer(context, 0, max_B_d_bytes, NULL, &err), err));
|
|
1457
1550
|
#endif // GGML_OPENCL_USE_ADRENO_KERNELS
|
|
1458
1551
|
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
return backend_ctx;
|
|
1552
|
+
dev_ctx->backend_ctx = backend_ctx.release();
|
|
1553
|
+
return dev_ctx->backend_ctx;
|
|
1463
1554
|
}
|
|
1464
1555
|
|
|
1465
1556
|
static void ggml_cl2_free(void) {
|
|
@@ -1664,10 +1755,46 @@ static void ggml_backend_opencl_synchronize(ggml_backend_t backend) {
|
|
|
1664
1755
|
GGML_UNUSED(backend);
|
|
1665
1756
|
}
|
|
1666
1757
|
|
|
1758
|
+
// Syncronizes the 'backend_ctx's device with others so that commands
|
|
1759
|
+
// enqueued to it won't start until commands in the other devices have
|
|
1760
|
+
// completed.
|
|
1761
|
+
static void sync_with_other_backends(ggml_backend_opencl_context * backend_ctx) {
|
|
1762
|
+
if (g_ggml_backend_opencl_devices.size() < 2)
|
|
1763
|
+
return; // No other devices to synchronize with.
|
|
1764
|
+
|
|
1765
|
+
std::vector<cl_event> events;
|
|
1766
|
+
events.reserve(g_ggml_backend_opencl_devices.size());
|
|
1767
|
+
|
|
1768
|
+
for (ggml_backend_device & backend_dev : g_ggml_backend_opencl_devices) {
|
|
1769
|
+
auto * other_backend_ctx = ggml_cl2_init(&backend_dev);
|
|
1770
|
+
if (backend_ctx != other_backend_ctx) {
|
|
1771
|
+
cl_event ev;
|
|
1772
|
+
CL_CHECK(clEnqueueMarkerWithWaitList(other_backend_ctx->queue, 0, nullptr, &ev));
|
|
1773
|
+
CL_CHECK(clFlush(other_backend_ctx->queue));
|
|
1774
|
+
events.push_back(ev);
|
|
1775
|
+
}
|
|
1776
|
+
}
|
|
1777
|
+
|
|
1778
|
+
CL_CHECK(clEnqueueBarrierWithWaitList(backend_ctx->queue, events.size(), events.data(), nullptr));
|
|
1779
|
+
for (auto ev : events) {
|
|
1780
|
+
CL_CHECK(clReleaseEvent(ev));
|
|
1781
|
+
}
|
|
1782
|
+
}
|
|
1783
|
+
|
|
1784
|
+
static void sync_with_other_backends(ggml_backend_t backend) {
|
|
1785
|
+
auto * backend_ctx = static_cast<ggml_backend_opencl_context *>(backend->context);
|
|
1786
|
+
sync_with_other_backends(backend_ctx);
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1667
1789
|
static ggml_status ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
|
1668
1790
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
|
1669
1791
|
ggml_tensor * node = cgraph->nodes[i];
|
|
1670
1792
|
|
|
1793
|
+
// NOTE: this may oversynchronize by synchronizing with
|
|
1794
|
+
// backends/devices which don't compute 'cgraph's
|
|
1795
|
+
// dependencies.
|
|
1796
|
+
sync_with_other_backends(backend);
|
|
1797
|
+
|
|
1671
1798
|
if (node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
|
|
1672
1799
|
continue;
|
|
1673
1800
|
}
|
|
@@ -2058,15 +2185,16 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
|
|
2058
2185
|
// The original tensor memory is divided into scales and quants, i.e.,
|
|
2059
2186
|
// we first store scales, then quants.
|
|
2060
2187
|
// Create subbuffer for scales.
|
|
2061
|
-
region.origin = extra_orig->offset + tensor->view_offs + offset;
|
|
2188
|
+
region.origin = align_to(extra_orig->offset + tensor->view_offs + offset, backend_ctx->alignment);
|
|
2062
2189
|
region.size = size_d;
|
|
2063
2190
|
extra->d = clCreateSubBuffer(
|
|
2064
2191
|
extra_orig->data_device, CL_MEM_READ_WRITE,
|
|
2065
2192
|
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err);
|
|
2066
2193
|
CL_CHECK(err);
|
|
2194
|
+
auto previous_origin = region.origin;
|
|
2067
2195
|
|
|
2068
2196
|
// Create subbuffer for quants.
|
|
2069
|
-
region.origin =
|
|
2197
|
+
region.origin = align_to(previous_origin + size_d, backend_ctx->alignment);
|
|
2070
2198
|
region.size = size_q;
|
|
2071
2199
|
extra->q = clCreateSubBuffer(
|
|
2072
2200
|
extra_orig->data_device, CL_MEM_READ_WRITE,
|
|
@@ -2271,8 +2399,8 @@ static void ggml_backend_opencl_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
|
|
2271
2399
|
cl_context context = backend_ctx->context;
|
|
2272
2400
|
cl_command_queue queue = backend_ctx->queue;
|
|
2273
2401
|
|
|
2274
|
-
// Make sure all previously submitted commands are finished.
|
|
2275
|
-
|
|
2402
|
+
// Make sure all previously submitted commands in other devices are finished.
|
|
2403
|
+
sync_with_other_backends(backend_ctx);
|
|
2276
2404
|
|
|
2277
2405
|
#ifdef GGML_OPENCL_SOA_Q
|
|
2278
2406
|
// In end-to-end runs, get_tensor is usually used to get back the logits,
|
|
@@ -2376,13 +2504,8 @@ static ggml_backend_buffer_t ggml_backend_opencl_buffer_type_alloc_buffer(ggml_b
|
|
|
2376
2504
|
}
|
|
2377
2505
|
|
|
2378
2506
|
static size_t ggml_backend_opencl_buffer_type_get_alignment(ggml_backend_buffer_type_t buffer_type) {
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
if (alignment == (cl_uint)-1) {
|
|
2382
|
-
ggml_backend_opencl_context * backend_ctx = ggml_cl2_init(buffer_type->device);
|
|
2383
|
-
alignment = backend_ctx->alignment;
|
|
2384
|
-
}
|
|
2385
|
-
return alignment;
|
|
2507
|
+
ggml_backend_opencl_context * backend_ctx = ggml_cl2_init(buffer_type->device);
|
|
2508
|
+
return backend_ctx->alignment;
|
|
2386
2509
|
}
|
|
2387
2510
|
|
|
2388
2511
|
static size_t ggml_backend_opencl_buffer_type_get_max_size(ggml_backend_buffer_type_t buffer_type) {
|
|
@@ -2409,16 +2532,6 @@ static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = {
|
|
|
2409
2532
|
/* .is_host = */ NULL,
|
|
2410
2533
|
};
|
|
2411
2534
|
|
|
2412
|
-
ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type() {
|
|
2413
|
-
static ggml_backend_buffer_type buffer_type = {
|
|
2414
|
-
/* .iface = */ ggml_backend_opencl_buffer_type_interface,
|
|
2415
|
-
/* .device = */ &g_ggml_backend_opencl_device,
|
|
2416
|
-
/* .context = */ nullptr,
|
|
2417
|
-
};
|
|
2418
|
-
|
|
2419
|
-
return &buffer_type;
|
|
2420
|
-
}
|
|
2421
|
-
|
|
2422
2535
|
//
|
|
2423
2536
|
// backend device
|
|
2424
2537
|
//
|
|
@@ -2476,9 +2589,15 @@ static ggml_backend_t ggml_backend_opencl_device_init(ggml_backend_dev_t dev, co
|
|
|
2476
2589
|
}
|
|
2477
2590
|
|
|
2478
2591
|
static ggml_backend_buffer_type_t ggml_backend_opencl_device_get_buffer_type(ggml_backend_dev_t dev) {
|
|
2479
|
-
|
|
2592
|
+
auto * dev_ctx = static_cast<ggml_backend_opencl_device_context *>(dev->context);
|
|
2480
2593
|
|
|
2481
|
-
|
|
2594
|
+
dev_ctx->buffer_type = ggml_backend_buffer_type{
|
|
2595
|
+
/* .iface = */ ggml_backend_opencl_buffer_type_interface,
|
|
2596
|
+
/* .device = */ dev,
|
|
2597
|
+
/* .context = */ nullptr,
|
|
2598
|
+
};
|
|
2599
|
+
|
|
2600
|
+
return &dev_ctx->buffer_type;
|
|
2482
2601
|
}
|
|
2483
2602
|
|
|
2484
2603
|
static ggml_backend_buffer_t ggml_backend_opencl_device_buffer_from_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
|
|
@@ -2494,12 +2613,21 @@ static bool ggml_backend_opencl_device_supports_op(ggml_backend_dev_t dev, const
|
|
|
2494
2613
|
}
|
|
2495
2614
|
|
|
2496
2615
|
static bool ggml_backend_opencl_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
|
2497
|
-
|
|
2616
|
+
// Check 'dev' and 'buffer_type' are not objects belonging to this backend.
|
|
2617
|
+
if (dev->iface.get_name != ggml_backend_opencl_device_get_name ||
|
|
2618
|
+
buft->iface.get_name != ggml_backend_opencl_buffer_type_get_name) {
|
|
2619
|
+
return false;
|
|
2620
|
+
}
|
|
2498
2621
|
|
|
2499
|
-
|
|
2622
|
+
// Check cl_context is the same. clEnqueue* commands may not use
|
|
2623
|
+
// buffers from another cl_context.
|
|
2624
|
+
ggml_backend_opencl_context * backend_ctx0 = ggml_cl2_init(dev);
|
|
2625
|
+
ggml_backend_opencl_context * backend_ctx1 = ggml_cl2_init(buft->device);
|
|
2626
|
+
return backend_ctx0->context == backend_ctx1->context;
|
|
2500
2627
|
}
|
|
2501
2628
|
|
|
2502
|
-
|
|
2629
|
+
namespace /* anonymous */ {
|
|
2630
|
+
struct ggml_backend_device_i ggml_backend_opencl_device_i = {
|
|
2503
2631
|
/* .get_name = */ ggml_backend_opencl_device_get_name,
|
|
2504
2632
|
/* .get_description = */ ggml_backend_opencl_device_get_description,
|
|
2505
2633
|
/* .get_memory = */ ggml_backend_opencl_device_get_memory,
|
|
@@ -2516,6 +2644,7 @@ static struct ggml_backend_device_i ggml_backend_opencl_device_i = {
|
|
|
2516
2644
|
/* .event_free = */ NULL,
|
|
2517
2645
|
/* .event_synchronize = */ NULL,
|
|
2518
2646
|
};
|
|
2647
|
+
}
|
|
2519
2648
|
|
|
2520
2649
|
// Backend registry
|
|
2521
2650
|
|
|
@@ -2526,15 +2655,15 @@ static const char * ggml_backend_opencl_reg_get_name(ggml_backend_reg_t reg) {
|
|
|
2526
2655
|
}
|
|
2527
2656
|
|
|
2528
2657
|
static size_t ggml_backend_opencl_reg_device_count(ggml_backend_reg_t reg) {
|
|
2529
|
-
return
|
|
2658
|
+
return g_ggml_backend_opencl_devices.size();
|
|
2530
2659
|
|
|
2531
2660
|
GGML_UNUSED(reg);
|
|
2532
2661
|
}
|
|
2533
2662
|
|
|
2534
2663
|
static ggml_backend_dev_t ggml_backend_opencl_reg_device_get(ggml_backend_reg_t reg, size_t index) {
|
|
2535
|
-
GGML_ASSERT(index
|
|
2664
|
+
GGML_ASSERT(index < ggml_backend_opencl_reg_device_count(reg));
|
|
2536
2665
|
|
|
2537
|
-
return &
|
|
2666
|
+
return &g_ggml_backend_opencl_devices[index];
|
|
2538
2667
|
|
|
2539
2668
|
GGML_UNUSED(reg);
|
|
2540
2669
|
GGML_UNUSED(index);
|
|
@@ -2548,27 +2677,23 @@ static struct ggml_backend_reg_i ggml_backend_opencl_reg_i = {
|
|
|
2548
2677
|
};
|
|
2549
2678
|
|
|
2550
2679
|
ggml_backend_reg_t ggml_backend_opencl_reg(void) {
|
|
2551
|
-
|
|
2680
|
+
static std::mutex mutex;
|
|
2552
2681
|
static ggml_backend_reg reg;
|
|
2553
2682
|
static bool initialized = false;
|
|
2683
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
2554
2684
|
|
|
2555
|
-
if (
|
|
2556
|
-
reg
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
/* .context = */ NULL,
|
|
2560
|
-
};
|
|
2561
|
-
|
|
2562
|
-
g_ggml_backend_opencl_device = ggml_backend_device {
|
|
2563
|
-
/* .iface = */ ggml_backend_opencl_device_i,
|
|
2564
|
-
/* .reg = */ ®,
|
|
2565
|
-
/* .context = */ &g_ggml_ctx_dev_main,
|
|
2566
|
-
};
|
|
2685
|
+
if (initialized) {
|
|
2686
|
+
return ®
|
|
2687
|
+
}
|
|
2688
|
+
initialized = true;
|
|
2567
2689
|
|
|
2568
|
-
|
|
2690
|
+
g_ggml_backend_opencl_devices = ggml_opencl_probe_devices(®);
|
|
2569
2691
|
|
|
2570
|
-
|
|
2571
|
-
|
|
2692
|
+
reg = ggml_backend_reg{
|
|
2693
|
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
|
2694
|
+
/* .iface = */ ggml_backend_opencl_reg_i,
|
|
2695
|
+
/* .context = */ NULL,
|
|
2696
|
+
};
|
|
2572
2697
|
|
|
2573
2698
|
return ®
|
|
2574
2699
|
}
|
|
@@ -2942,14 +3067,19 @@ static void ggml_cl_add(ggml_backend_t backend, const ggml_tensor * src0, const
|
|
|
2942
3067
|
size_t global_work_size[] = {(size_t)n, 1, 1};
|
|
2943
3068
|
size_t local_work_size[] = {64, 1, 1};
|
|
2944
3069
|
|
|
3070
|
+
size_t * local_work_size_ptr = local_work_size;
|
|
3071
|
+
if (n % 64 != 0 && !backend_ctx->non_uniform_workgroups) {
|
|
3072
|
+
local_work_size_ptr = nullptr; // Let driver choose the work-group sizes.
|
|
3073
|
+
}
|
|
3074
|
+
|
|
2945
3075
|
#ifdef GGML_OPENCL_PROFILING
|
|
2946
3076
|
cl_event evt;
|
|
2947
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
3077
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt));
|
|
2948
3078
|
|
|
2949
3079
|
g_profiling_info.emplace_back();
|
|
2950
|
-
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size,
|
|
3080
|
+
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst);
|
|
2951
3081
|
#else
|
|
2952
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
3082
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL));
|
|
2953
3083
|
#endif
|
|
2954
3084
|
} else {
|
|
2955
3085
|
unsigned int nth = MIN(64, ne0);
|
|
@@ -3077,14 +3207,19 @@ static void ggml_cl_mul(ggml_backend_t backend, const ggml_tensor * src0, const
|
|
|
3077
3207
|
size_t global_work_size[] = {(size_t)n, 1, 1};
|
|
3078
3208
|
size_t local_work_size[] = {64, 1, 1};
|
|
3079
3209
|
|
|
3210
|
+
size_t * local_work_size_ptr = local_work_size;
|
|
3211
|
+
if (n % 64 != 0 && !backend_ctx->non_uniform_workgroups) {
|
|
3212
|
+
local_work_size_ptr = nullptr; // Let driver choose the work-group sizes.
|
|
3213
|
+
}
|
|
3214
|
+
|
|
3080
3215
|
#ifdef GGML_OPENCL_PROFILING
|
|
3081
3216
|
cl_event evt;
|
|
3082
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
3217
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt));
|
|
3083
3218
|
|
|
3084
3219
|
g_profiling_info.emplace_back();
|
|
3085
|
-
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size,
|
|
3220
|
+
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst);
|
|
3086
3221
|
#else
|
|
3087
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
3222
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL));
|
|
3088
3223
|
#endif
|
|
3089
3224
|
} else {
|
|
3090
3225
|
unsigned int nth = MIN(64, ne0);
|
|
@@ -3233,14 +3368,19 @@ static void ggml_cl_silu(ggml_backend_t backend, const ggml_tensor * src0, const
|
|
|
3233
3368
|
size_t global_work_size[] = {(size_t)n, 1, 1};
|
|
3234
3369
|
size_t local_work_size[] = {64, 1, 1};
|
|
3235
3370
|
|
|
3371
|
+
size_t * local_work_size_ptr = local_work_size;
|
|
3372
|
+
if (n % 64 != 0 && !backend_ctx->non_uniform_workgroups) {
|
|
3373
|
+
local_work_size_ptr = nullptr; // Let driver choose the work-group sizes.
|
|
3374
|
+
}
|
|
3375
|
+
|
|
3236
3376
|
#ifdef GGML_OPENCL_PROFILING
|
|
3237
3377
|
cl_event evt;
|
|
3238
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
3378
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt));
|
|
3239
3379
|
|
|
3240
3380
|
g_profiling_info.emplace_back();
|
|
3241
|
-
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size,
|
|
3381
|
+
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst);
|
|
3242
3382
|
#else
|
|
3243
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
3383
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL));
|
|
3244
3384
|
#endif
|
|
3245
3385
|
}
|
|
3246
3386
|
|
|
@@ -3273,14 +3413,19 @@ static void ggml_cl_relu(ggml_backend_t backend, const ggml_tensor * src0, const
|
|
|
3273
3413
|
size_t global_work_size[] = {(size_t)n, 1, 1};
|
|
3274
3414
|
size_t local_work_size[] = {64, 1, 1};
|
|
3275
3415
|
|
|
3416
|
+
size_t * local_work_size_ptr = local_work_size;
|
|
3417
|
+
if (n % 64 != 0 && !backend_ctx->non_uniform_workgroups) {
|
|
3418
|
+
local_work_size_ptr = nullptr; // Let driver choose the work-group sizes.
|
|
3419
|
+
}
|
|
3420
|
+
|
|
3276
3421
|
#ifdef GGML_OPENCL_PROFILING
|
|
3277
3422
|
cl_event evt;
|
|
3278
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
3423
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt));
|
|
3279
3424
|
|
|
3280
3425
|
g_profiling_info.emplace_back();
|
|
3281
|
-
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size,
|
|
3426
|
+
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst);
|
|
3282
3427
|
#else
|
|
3283
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
3428
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL));
|
|
3284
3429
|
#endif
|
|
3285
3430
|
}
|
|
3286
3431
|
|
|
@@ -3320,14 +3465,19 @@ static void ggml_cl_clamp(ggml_backend_t backend, const ggml_tensor * src0, cons
|
|
|
3320
3465
|
size_t global_work_size[] = {(size_t)n, 1, 1};
|
|
3321
3466
|
size_t local_work_size[] = {64, 1, 1};
|
|
3322
3467
|
|
|
3468
|
+
size_t * local_work_size_ptr = local_work_size;
|
|
3469
|
+
if (n % 64 != 0 && !backend_ctx->non_uniform_workgroups) {
|
|
3470
|
+
local_work_size_ptr = nullptr; // Let driver choose the work-group sizes.
|
|
3471
|
+
}
|
|
3472
|
+
|
|
3323
3473
|
#ifdef GGML_OPENCL_PROFILING
|
|
3324
3474
|
cl_event evt;
|
|
3325
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
3475
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt));
|
|
3326
3476
|
|
|
3327
3477
|
g_profiling_info.emplace_back();
|
|
3328
|
-
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size,
|
|
3478
|
+
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst);
|
|
3329
3479
|
#else
|
|
3330
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
3480
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL));
|
|
3331
3481
|
#endif
|
|
3332
3482
|
}
|
|
3333
3483
|
|
|
@@ -4230,14 +4380,19 @@ static void ggml_cl_scale(ggml_backend_t backend, const ggml_tensor * src0, cons
|
|
|
4230
4380
|
size_t global_work_size[] = {(size_t)n, 1, 1};
|
|
4231
4381
|
size_t local_work_size[] = {64, 1, 1};
|
|
4232
4382
|
|
|
4383
|
+
size_t * local_work_size_ptr = local_work_size;
|
|
4384
|
+
if (n % 64 != 0 && !backend_ctx->non_uniform_workgroups) {
|
|
4385
|
+
local_work_size_ptr = nullptr; // Let driver choose the work-group sizes.
|
|
4386
|
+
}
|
|
4387
|
+
|
|
4233
4388
|
#ifdef GGML_OPENCL_PROFILING
|
|
4234
4389
|
cl_event evt;
|
|
4235
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
4390
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt));
|
|
4236
4391
|
|
|
4237
4392
|
g_profiling_info.emplace_back();
|
|
4238
|
-
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size,
|
|
4393
|
+
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst);
|
|
4239
4394
|
#else
|
|
4240
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
4395
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL));
|
|
4241
4396
|
#endif
|
|
4242
4397
|
}
|
|
4243
4398
|
|
|
@@ -4418,14 +4573,19 @@ static void ggml_cl_diag_mask_inf(ggml_backend_t backend, const ggml_tensor * sr
|
|
|
4418
4573
|
size_t global_work_size[] = {(size_t)ne00, (size_t)ne01, (size_t)ne02};
|
|
4419
4574
|
size_t local_work_size[] = {64, 1, 1};
|
|
4420
4575
|
|
|
4576
|
+
size_t * local_work_size_ptr = local_work_size;
|
|
4577
|
+
if (ne00 % 64 != 0 && !backend_ctx->non_uniform_workgroups) {
|
|
4578
|
+
local_work_size_ptr = nullptr; // Let driver choose the work-group sizes.
|
|
4579
|
+
}
|
|
4580
|
+
|
|
4421
4581
|
#ifdef GGML_OPENCL_PROFILING
|
|
4422
4582
|
cl_event evt;
|
|
4423
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
4583
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt));
|
|
4424
4584
|
|
|
4425
4585
|
g_profiling_info.emplace_back();
|
|
4426
|
-
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size,
|
|
4586
|
+
populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst);
|
|
4427
4587
|
#else
|
|
4428
|
-
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
|
4588
|
+
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL));
|
|
4429
4589
|
#endif
|
|
4430
4590
|
}
|
|
4431
4591
|
}
|