llama_cpp 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/llama_cpp/src/ggml-alloc.c +8 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +1165 -721
- data/ext/llama_cpp/src/ggml-metal.m +39 -18
- data/ext/llama_cpp/src/ggml.c +396 -150
- data/ext/llama_cpp/src/ggml.h +113 -32
- data/ext/llama_cpp/src/llama-util.h +41 -1
- data/ext/llama_cpp/src/llama.cpp +214 -146
- data/ext/llama_cpp/src/llama.h +18 -1
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -183,6 +183,15 @@
|
|
183
183
|
# define GGML_API
|
184
184
|
#endif
|
185
185
|
|
186
|
+
// TODO: support for clang
|
187
|
+
#ifdef __GNUC__
|
188
|
+
# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
|
189
|
+
#elif defined(_MSC_VER)
|
190
|
+
# define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
|
191
|
+
#else
|
192
|
+
# define GGML_DEPRECATED(func, hint) func
|
193
|
+
#endif
|
194
|
+
|
186
195
|
#include <stdint.h>
|
187
196
|
#include <stddef.h>
|
188
197
|
#include <stdbool.h>
|
@@ -374,6 +383,10 @@ extern "C" {
|
|
374
383
|
GGML_OP_MAP_UNARY,
|
375
384
|
GGML_OP_MAP_BINARY,
|
376
385
|
|
386
|
+
GGML_OP_MAP_CUSTOM1_F32,
|
387
|
+
GGML_OP_MAP_CUSTOM2_F32,
|
388
|
+
GGML_OP_MAP_CUSTOM3_F32,
|
389
|
+
|
377
390
|
GGML_OP_MAP_CUSTOM1,
|
378
391
|
GGML_OP_MAP_CUSTOM2,
|
379
392
|
GGML_OP_MAP_CUSTOM3,
|
@@ -570,6 +583,8 @@ extern "C" {
|
|
570
583
|
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
571
584
|
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
572
585
|
|
586
|
+
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
587
|
+
|
573
588
|
// use this to compute the memory overhead of a tensor
|
574
589
|
GGML_API size_t ggml_tensor_overhead(void);
|
575
590
|
|
@@ -1240,7 +1255,7 @@ extern "C" {
|
|
1240
1255
|
|
1241
1256
|
// conv_1d with padding = half
|
1242
1257
|
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
1243
|
-
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
1258
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_ph(
|
1244
1259
|
struct ggml_context * ctx,
|
1245
1260
|
struct ggml_tensor * a,
|
1246
1261
|
struct ggml_tensor * b,
|
@@ -1253,7 +1268,7 @@ extern "C" {
|
|
1253
1268
|
GGML_OP_POOL_COUNT,
|
1254
1269
|
};
|
1255
1270
|
|
1256
|
-
GGML_API struct ggml_tensor* ggml_pool_1d(
|
1271
|
+
GGML_API struct ggml_tensor * ggml_pool_1d(
|
1257
1272
|
struct ggml_context * ctx,
|
1258
1273
|
struct ggml_tensor * a,
|
1259
1274
|
enum ggml_op_pool op,
|
@@ -1261,7 +1276,7 @@ extern "C" {
|
|
1261
1276
|
int s0, // stride
|
1262
1277
|
int p0); // padding
|
1263
1278
|
|
1264
|
-
GGML_API struct ggml_tensor* ggml_pool_2d(
|
1279
|
+
GGML_API struct ggml_tensor * ggml_pool_2d(
|
1265
1280
|
struct ggml_context * ctx,
|
1266
1281
|
struct ggml_tensor * a,
|
1267
1282
|
enum ggml_op_pool op,
|
@@ -1315,15 +1330,6 @@ extern "C" {
|
|
1315
1330
|
int h0,
|
1316
1331
|
int w);
|
1317
1332
|
|
1318
|
-
// custom operators
|
1319
|
-
|
1320
|
-
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
1321
|
-
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
1322
|
-
|
1323
|
-
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
1324
|
-
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1325
|
-
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1326
|
-
|
1327
1333
|
GGML_API struct ggml_tensor * ggml_unary(
|
1328
1334
|
struct ggml_context * ctx,
|
1329
1335
|
struct ggml_tensor * a,
|
@@ -1334,63 +1340,138 @@ extern "C" {
|
|
1334
1340
|
struct ggml_tensor * a,
|
1335
1341
|
enum ggml_unary_op op);
|
1336
1342
|
|
1337
|
-
|
1343
|
+
// custom operators
|
1344
|
+
|
1345
|
+
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
1346
|
+
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
1347
|
+
|
1348
|
+
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
1349
|
+
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1350
|
+
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1351
|
+
|
1352
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
1338
1353
|
struct ggml_context * ctx,
|
1339
1354
|
struct ggml_tensor * a,
|
1340
|
-
ggml_unary_op_f32_t fun)
|
1355
|
+
ggml_unary_op_f32_t fun),
|
1356
|
+
"use ggml_map_custom1 instead");
|
1341
1357
|
|
1342
|
-
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
1358
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
1343
1359
|
struct ggml_context * ctx,
|
1344
1360
|
struct ggml_tensor * a,
|
1345
|
-
ggml_unary_op_f32_t fun)
|
1361
|
+
ggml_unary_op_f32_t fun),
|
1362
|
+
"use ggml_map_custom1_inplace instead");
|
1346
1363
|
|
1347
|
-
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
1364
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
1348
1365
|
struct ggml_context * ctx,
|
1349
1366
|
struct ggml_tensor * a,
|
1350
1367
|
struct ggml_tensor * b,
|
1351
|
-
ggml_binary_op_f32_t fun)
|
1368
|
+
ggml_binary_op_f32_t fun),
|
1369
|
+
"use ggml_map_custom2 instead");
|
1352
1370
|
|
1353
|
-
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
1371
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
1354
1372
|
struct ggml_context * ctx,
|
1355
1373
|
struct ggml_tensor * a,
|
1356
1374
|
struct ggml_tensor * b,
|
1357
|
-
ggml_binary_op_f32_t fun)
|
1375
|
+
ggml_binary_op_f32_t fun),
|
1376
|
+
"use ggml_map_custom2_inplace instead");
|
1358
1377
|
|
1359
|
-
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
1378
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
1360
1379
|
struct ggml_context * ctx,
|
1361
1380
|
struct ggml_tensor * a,
|
1362
|
-
ggml_custom1_op_f32_t fun)
|
1381
|
+
ggml_custom1_op_f32_t fun),
|
1382
|
+
"use ggml_map_custom1 instead");
|
1363
1383
|
|
1364
|
-
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
1384
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
1365
1385
|
struct ggml_context * ctx,
|
1366
1386
|
struct ggml_tensor * a,
|
1367
|
-
ggml_custom1_op_f32_t fun)
|
1387
|
+
ggml_custom1_op_f32_t fun),
|
1388
|
+
"use ggml_map_custom1_inplace instead");
|
1368
1389
|
|
1369
|
-
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
1390
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
1370
1391
|
struct ggml_context * ctx,
|
1371
1392
|
struct ggml_tensor * a,
|
1372
1393
|
struct ggml_tensor * b,
|
1373
|
-
ggml_custom2_op_f32_t fun)
|
1394
|
+
ggml_custom2_op_f32_t fun),
|
1395
|
+
"use ggml_map_custom2 instead");
|
1374
1396
|
|
1375
|
-
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
1397
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
1376
1398
|
struct ggml_context * ctx,
|
1377
1399
|
struct ggml_tensor * a,
|
1378
1400
|
struct ggml_tensor * b,
|
1379
|
-
ggml_custom2_op_f32_t fun)
|
1401
|
+
ggml_custom2_op_f32_t fun),
|
1402
|
+
"use ggml_map_custom2_inplace instead");
|
1380
1403
|
|
1381
|
-
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
1404
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
1382
1405
|
struct ggml_context * ctx,
|
1383
1406
|
struct ggml_tensor * a,
|
1384
1407
|
struct ggml_tensor * b,
|
1385
1408
|
struct ggml_tensor * c,
|
1386
|
-
ggml_custom3_op_f32_t fun)
|
1409
|
+
ggml_custom3_op_f32_t fun),
|
1410
|
+
"use ggml_map_custom3 instead");
|
1387
1411
|
|
1388
|
-
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
1412
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
1389
1413
|
struct ggml_context * ctx,
|
1390
1414
|
struct ggml_tensor * a,
|
1391
1415
|
struct ggml_tensor * b,
|
1392
1416
|
struct ggml_tensor * c,
|
1393
|
-
ggml_custom3_op_f32_t fun)
|
1417
|
+
ggml_custom3_op_f32_t fun),
|
1418
|
+
"use ggml_map_custom3_inplace instead");
|
1419
|
+
|
1420
|
+
// custom operators v2
|
1421
|
+
|
1422
|
+
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
1423
|
+
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
1424
|
+
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
|
1425
|
+
|
1426
|
+
#define GGML_N_TASKS_MAX -1
|
1427
|
+
|
1428
|
+
GGML_API struct ggml_tensor * ggml_map_custom1(
|
1429
|
+
struct ggml_context * ctx,
|
1430
|
+
struct ggml_tensor * a,
|
1431
|
+
ggml_custom1_op_t fun,
|
1432
|
+
int n_tasks,
|
1433
|
+
void * userdata);
|
1434
|
+
|
1435
|
+
GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
|
1436
|
+
struct ggml_context * ctx,
|
1437
|
+
struct ggml_tensor * a,
|
1438
|
+
ggml_custom1_op_t fun,
|
1439
|
+
int n_tasks,
|
1440
|
+
void * userdata);
|
1441
|
+
|
1442
|
+
GGML_API struct ggml_tensor * ggml_map_custom2(
|
1443
|
+
struct ggml_context * ctx,
|
1444
|
+
struct ggml_tensor * a,
|
1445
|
+
struct ggml_tensor * b,
|
1446
|
+
ggml_custom2_op_t fun,
|
1447
|
+
int n_tasks,
|
1448
|
+
void * userdata);
|
1449
|
+
|
1450
|
+
GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
|
1451
|
+
struct ggml_context * ctx,
|
1452
|
+
struct ggml_tensor * a,
|
1453
|
+
struct ggml_tensor * b,
|
1454
|
+
ggml_custom2_op_t fun,
|
1455
|
+
int n_tasks,
|
1456
|
+
void * userdata);
|
1457
|
+
|
1458
|
+
GGML_API struct ggml_tensor * ggml_map_custom3(
|
1459
|
+
struct ggml_context * ctx,
|
1460
|
+
struct ggml_tensor * a,
|
1461
|
+
struct ggml_tensor * b,
|
1462
|
+
struct ggml_tensor * c,
|
1463
|
+
ggml_custom3_op_t fun,
|
1464
|
+
int n_tasks,
|
1465
|
+
void * userdata);
|
1466
|
+
|
1467
|
+
GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
|
1468
|
+
struct ggml_context * ctx,
|
1469
|
+
struct ggml_tensor * a,
|
1470
|
+
struct ggml_tensor * b,
|
1471
|
+
struct ggml_tensor * c,
|
1472
|
+
ggml_custom3_op_t fun,
|
1473
|
+
int n_tasks,
|
1474
|
+
void * userdata);
|
1394
1475
|
|
1395
1476
|
// loss function
|
1396
1477
|
|
@@ -149,6 +149,46 @@ struct llama_file {
|
|
149
149
|
}
|
150
150
|
};
|
151
151
|
|
152
|
+
// llama_context_data
|
153
|
+
struct llama_data_context {
|
154
|
+
virtual void write(const void * src, size_t size) = 0;
|
155
|
+
virtual size_t get_size_written() = 0;
|
156
|
+
virtual ~llama_data_context() = default;
|
157
|
+
};
|
158
|
+
|
159
|
+
struct llama_data_buffer_context : llama_data_context {
|
160
|
+
uint8_t* ptr;
|
161
|
+
size_t size_written = 0;
|
162
|
+
|
163
|
+
llama_data_buffer_context(uint8_t * p) : ptr(p) {}
|
164
|
+
|
165
|
+
void write(const void * src, size_t size) override {
|
166
|
+
memcpy(ptr, src, size);
|
167
|
+
ptr += size;
|
168
|
+
size_written += size;
|
169
|
+
}
|
170
|
+
|
171
|
+
size_t get_size_written() override {
|
172
|
+
return size_written;
|
173
|
+
}
|
174
|
+
};
|
175
|
+
|
176
|
+
struct llama_data_file_context : llama_data_context {
|
177
|
+
llama_file* file;
|
178
|
+
size_t size_written = 0;
|
179
|
+
|
180
|
+
llama_data_file_context(llama_file * f) : file(f) {}
|
181
|
+
|
182
|
+
void write(const void * src, size_t size) override {
|
183
|
+
file->write_raw(src, size);
|
184
|
+
size_written += size;
|
185
|
+
}
|
186
|
+
|
187
|
+
size_t get_size_written() override {
|
188
|
+
return size_written;
|
189
|
+
}
|
190
|
+
};
|
191
|
+
|
152
192
|
#if defined(_WIN32)
|
153
193
|
static std::string llama_format_win_err(DWORD err) {
|
154
194
|
LPSTR buf;
|
@@ -179,7 +219,7 @@ struct llama_mmap {
|
|
179
219
|
// prefetch/readahead impairs performance on NUMA systems
|
180
220
|
if (numa) { prefetch = 0; }
|
181
221
|
#ifdef __linux__
|
182
|
-
if (prefetch) { flags |= MAP_POPULATE; }
|
222
|
+
if (prefetch >= file->size) { flags |= MAP_POPULATE; }
|
183
223
|
#endif
|
184
224
|
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
|
185
225
|
if (addr == MAP_FAILED) {
|