llama_cpp 0.3.6 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/llama_cpp/src/ggml-alloc.c +8 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +1165 -721
- data/ext/llama_cpp/src/ggml-metal.m +39 -18
- data/ext/llama_cpp/src/ggml.c +396 -150
- data/ext/llama_cpp/src/ggml.h +113 -32
- data/ext/llama_cpp/src/llama-util.h +41 -1
- data/ext/llama_cpp/src/llama.cpp +214 -146
- data/ext/llama_cpp/src/llama.h +18 -1
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -183,6 +183,15 @@
|
|
183
183
|
# define GGML_API
|
184
184
|
#endif
|
185
185
|
|
186
|
+
// TODO: support for clang
|
187
|
+
#ifdef __GNUC__
|
188
|
+
# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
|
189
|
+
#elif defined(_MSC_VER)
|
190
|
+
# define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
|
191
|
+
#else
|
192
|
+
# define GGML_DEPRECATED(func, hint) func
|
193
|
+
#endif
|
194
|
+
|
186
195
|
#include <stdint.h>
|
187
196
|
#include <stddef.h>
|
188
197
|
#include <stdbool.h>
|
@@ -374,6 +383,10 @@ extern "C" {
|
|
374
383
|
GGML_OP_MAP_UNARY,
|
375
384
|
GGML_OP_MAP_BINARY,
|
376
385
|
|
386
|
+
GGML_OP_MAP_CUSTOM1_F32,
|
387
|
+
GGML_OP_MAP_CUSTOM2_F32,
|
388
|
+
GGML_OP_MAP_CUSTOM3_F32,
|
389
|
+
|
377
390
|
GGML_OP_MAP_CUSTOM1,
|
378
391
|
GGML_OP_MAP_CUSTOM2,
|
379
392
|
GGML_OP_MAP_CUSTOM3,
|
@@ -570,6 +583,8 @@ extern "C" {
|
|
570
583
|
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
571
584
|
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
572
585
|
|
586
|
+
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
587
|
+
|
573
588
|
// use this to compute the memory overhead of a tensor
|
574
589
|
GGML_API size_t ggml_tensor_overhead(void);
|
575
590
|
|
@@ -1240,7 +1255,7 @@ extern "C" {
|
|
1240
1255
|
|
1241
1256
|
// conv_1d with padding = half
|
1242
1257
|
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
1243
|
-
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
1258
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_ph(
|
1244
1259
|
struct ggml_context * ctx,
|
1245
1260
|
struct ggml_tensor * a,
|
1246
1261
|
struct ggml_tensor * b,
|
@@ -1253,7 +1268,7 @@ extern "C" {
|
|
1253
1268
|
GGML_OP_POOL_COUNT,
|
1254
1269
|
};
|
1255
1270
|
|
1256
|
-
GGML_API struct ggml_tensor* ggml_pool_1d(
|
1271
|
+
GGML_API struct ggml_tensor * ggml_pool_1d(
|
1257
1272
|
struct ggml_context * ctx,
|
1258
1273
|
struct ggml_tensor * a,
|
1259
1274
|
enum ggml_op_pool op,
|
@@ -1261,7 +1276,7 @@ extern "C" {
|
|
1261
1276
|
int s0, // stride
|
1262
1277
|
int p0); // padding
|
1263
1278
|
|
1264
|
-
GGML_API struct ggml_tensor* ggml_pool_2d(
|
1279
|
+
GGML_API struct ggml_tensor * ggml_pool_2d(
|
1265
1280
|
struct ggml_context * ctx,
|
1266
1281
|
struct ggml_tensor * a,
|
1267
1282
|
enum ggml_op_pool op,
|
@@ -1315,15 +1330,6 @@ extern "C" {
|
|
1315
1330
|
int h0,
|
1316
1331
|
int w);
|
1317
1332
|
|
1318
|
-
// custom operators
|
1319
|
-
|
1320
|
-
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
1321
|
-
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
1322
|
-
|
1323
|
-
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
1324
|
-
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1325
|
-
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1326
|
-
|
1327
1333
|
GGML_API struct ggml_tensor * ggml_unary(
|
1328
1334
|
struct ggml_context * ctx,
|
1329
1335
|
struct ggml_tensor * a,
|
@@ -1334,63 +1340,138 @@ extern "C" {
|
|
1334
1340
|
struct ggml_tensor * a,
|
1335
1341
|
enum ggml_unary_op op);
|
1336
1342
|
|
1337
|
-
|
1343
|
+
// custom operators
|
1344
|
+
|
1345
|
+
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
1346
|
+
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
1347
|
+
|
1348
|
+
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
1349
|
+
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1350
|
+
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1351
|
+
|
1352
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
1338
1353
|
struct ggml_context * ctx,
|
1339
1354
|
struct ggml_tensor * a,
|
1340
|
-
ggml_unary_op_f32_t fun)
|
1355
|
+
ggml_unary_op_f32_t fun),
|
1356
|
+
"use ggml_map_custom1 instead");
|
1341
1357
|
|
1342
|
-
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
1358
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
1343
1359
|
struct ggml_context * ctx,
|
1344
1360
|
struct ggml_tensor * a,
|
1345
|
-
ggml_unary_op_f32_t fun)
|
1361
|
+
ggml_unary_op_f32_t fun),
|
1362
|
+
"use ggml_map_custom1_inplace instead");
|
1346
1363
|
|
1347
|
-
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
1364
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
1348
1365
|
struct ggml_context * ctx,
|
1349
1366
|
struct ggml_tensor * a,
|
1350
1367
|
struct ggml_tensor * b,
|
1351
|
-
ggml_binary_op_f32_t fun)
|
1368
|
+
ggml_binary_op_f32_t fun),
|
1369
|
+
"use ggml_map_custom2 instead");
|
1352
1370
|
|
1353
|
-
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
1371
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
1354
1372
|
struct ggml_context * ctx,
|
1355
1373
|
struct ggml_tensor * a,
|
1356
1374
|
struct ggml_tensor * b,
|
1357
|
-
ggml_binary_op_f32_t fun)
|
1375
|
+
ggml_binary_op_f32_t fun),
|
1376
|
+
"use ggml_map_custom2_inplace instead");
|
1358
1377
|
|
1359
|
-
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
1378
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
1360
1379
|
struct ggml_context * ctx,
|
1361
1380
|
struct ggml_tensor * a,
|
1362
|
-
ggml_custom1_op_f32_t fun)
|
1381
|
+
ggml_custom1_op_f32_t fun),
|
1382
|
+
"use ggml_map_custom1 instead");
|
1363
1383
|
|
1364
|
-
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
1384
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
1365
1385
|
struct ggml_context * ctx,
|
1366
1386
|
struct ggml_tensor * a,
|
1367
|
-
ggml_custom1_op_f32_t fun)
|
1387
|
+
ggml_custom1_op_f32_t fun),
|
1388
|
+
"use ggml_map_custom1_inplace instead");
|
1368
1389
|
|
1369
|
-
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
1390
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
1370
1391
|
struct ggml_context * ctx,
|
1371
1392
|
struct ggml_tensor * a,
|
1372
1393
|
struct ggml_tensor * b,
|
1373
|
-
ggml_custom2_op_f32_t fun)
|
1394
|
+
ggml_custom2_op_f32_t fun),
|
1395
|
+
"use ggml_map_custom2 instead");
|
1374
1396
|
|
1375
|
-
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
1397
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
1376
1398
|
struct ggml_context * ctx,
|
1377
1399
|
struct ggml_tensor * a,
|
1378
1400
|
struct ggml_tensor * b,
|
1379
|
-
ggml_custom2_op_f32_t fun)
|
1401
|
+
ggml_custom2_op_f32_t fun),
|
1402
|
+
"use ggml_map_custom2_inplace instead");
|
1380
1403
|
|
1381
|
-
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
1404
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
1382
1405
|
struct ggml_context * ctx,
|
1383
1406
|
struct ggml_tensor * a,
|
1384
1407
|
struct ggml_tensor * b,
|
1385
1408
|
struct ggml_tensor * c,
|
1386
|
-
ggml_custom3_op_f32_t fun)
|
1409
|
+
ggml_custom3_op_f32_t fun),
|
1410
|
+
"use ggml_map_custom3 instead");
|
1387
1411
|
|
1388
|
-
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
1412
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
1389
1413
|
struct ggml_context * ctx,
|
1390
1414
|
struct ggml_tensor * a,
|
1391
1415
|
struct ggml_tensor * b,
|
1392
1416
|
struct ggml_tensor * c,
|
1393
|
-
ggml_custom3_op_f32_t fun)
|
1417
|
+
ggml_custom3_op_f32_t fun),
|
1418
|
+
"use ggml_map_custom3_inplace instead");
|
1419
|
+
|
1420
|
+
// custom operators v2
|
1421
|
+
|
1422
|
+
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
1423
|
+
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
1424
|
+
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
|
1425
|
+
|
1426
|
+
#define GGML_N_TASKS_MAX -1
|
1427
|
+
|
1428
|
+
GGML_API struct ggml_tensor * ggml_map_custom1(
|
1429
|
+
struct ggml_context * ctx,
|
1430
|
+
struct ggml_tensor * a,
|
1431
|
+
ggml_custom1_op_t fun,
|
1432
|
+
int n_tasks,
|
1433
|
+
void * userdata);
|
1434
|
+
|
1435
|
+
GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
|
1436
|
+
struct ggml_context * ctx,
|
1437
|
+
struct ggml_tensor * a,
|
1438
|
+
ggml_custom1_op_t fun,
|
1439
|
+
int n_tasks,
|
1440
|
+
void * userdata);
|
1441
|
+
|
1442
|
+
GGML_API struct ggml_tensor * ggml_map_custom2(
|
1443
|
+
struct ggml_context * ctx,
|
1444
|
+
struct ggml_tensor * a,
|
1445
|
+
struct ggml_tensor * b,
|
1446
|
+
ggml_custom2_op_t fun,
|
1447
|
+
int n_tasks,
|
1448
|
+
void * userdata);
|
1449
|
+
|
1450
|
+
GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
|
1451
|
+
struct ggml_context * ctx,
|
1452
|
+
struct ggml_tensor * a,
|
1453
|
+
struct ggml_tensor * b,
|
1454
|
+
ggml_custom2_op_t fun,
|
1455
|
+
int n_tasks,
|
1456
|
+
void * userdata);
|
1457
|
+
|
1458
|
+
GGML_API struct ggml_tensor * ggml_map_custom3(
|
1459
|
+
struct ggml_context * ctx,
|
1460
|
+
struct ggml_tensor * a,
|
1461
|
+
struct ggml_tensor * b,
|
1462
|
+
struct ggml_tensor * c,
|
1463
|
+
ggml_custom3_op_t fun,
|
1464
|
+
int n_tasks,
|
1465
|
+
void * userdata);
|
1466
|
+
|
1467
|
+
GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
|
1468
|
+
struct ggml_context * ctx,
|
1469
|
+
struct ggml_tensor * a,
|
1470
|
+
struct ggml_tensor * b,
|
1471
|
+
struct ggml_tensor * c,
|
1472
|
+
ggml_custom3_op_t fun,
|
1473
|
+
int n_tasks,
|
1474
|
+
void * userdata);
|
1394
1475
|
|
1395
1476
|
// loss function
|
1396
1477
|
|
@@ -149,6 +149,46 @@ struct llama_file {
|
|
149
149
|
}
|
150
150
|
};
|
151
151
|
|
152
|
+
// llama_context_data
|
153
|
+
struct llama_data_context {
|
154
|
+
virtual void write(const void * src, size_t size) = 0;
|
155
|
+
virtual size_t get_size_written() = 0;
|
156
|
+
virtual ~llama_data_context() = default;
|
157
|
+
};
|
158
|
+
|
159
|
+
struct llama_data_buffer_context : llama_data_context {
|
160
|
+
uint8_t* ptr;
|
161
|
+
size_t size_written = 0;
|
162
|
+
|
163
|
+
llama_data_buffer_context(uint8_t * p) : ptr(p) {}
|
164
|
+
|
165
|
+
void write(const void * src, size_t size) override {
|
166
|
+
memcpy(ptr, src, size);
|
167
|
+
ptr += size;
|
168
|
+
size_written += size;
|
169
|
+
}
|
170
|
+
|
171
|
+
size_t get_size_written() override {
|
172
|
+
return size_written;
|
173
|
+
}
|
174
|
+
};
|
175
|
+
|
176
|
+
struct llama_data_file_context : llama_data_context {
|
177
|
+
llama_file* file;
|
178
|
+
size_t size_written = 0;
|
179
|
+
|
180
|
+
llama_data_file_context(llama_file * f) : file(f) {}
|
181
|
+
|
182
|
+
void write(const void * src, size_t size) override {
|
183
|
+
file->write_raw(src, size);
|
184
|
+
size_written += size;
|
185
|
+
}
|
186
|
+
|
187
|
+
size_t get_size_written() override {
|
188
|
+
return size_written;
|
189
|
+
}
|
190
|
+
};
|
191
|
+
|
152
192
|
#if defined(_WIN32)
|
153
193
|
static std::string llama_format_win_err(DWORD err) {
|
154
194
|
LPSTR buf;
|
@@ -179,7 +219,7 @@ struct llama_mmap {
|
|
179
219
|
// prefetch/readahead impairs performance on NUMA systems
|
180
220
|
if (numa) { prefetch = 0; }
|
181
221
|
#ifdef __linux__
|
182
|
-
if (prefetch) { flags |= MAP_POPULATE; }
|
222
|
+
if (prefetch >= file->size) { flags |= MAP_POPULATE; }
|
183
223
|
#endif
|
184
224
|
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
|
185
225
|
if (addr == MAP_FAILED) {
|