llama_cpp 0.3.6 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -183,6 +183,15 @@
183
183
  # define GGML_API
184
184
  #endif
185
185
 
186
+ // TODO: support for clang
187
+ #ifdef __GNUC__
188
+ # define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
189
+ #elif defined(_MSC_VER)
190
+ # define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
191
+ #else
192
+ # define GGML_DEPRECATED(func, hint) func
193
+ #endif
194
+
186
195
  #include <stdint.h>
187
196
  #include <stddef.h>
188
197
  #include <stdbool.h>
@@ -374,6 +383,10 @@ extern "C" {
374
383
  GGML_OP_MAP_UNARY,
375
384
  GGML_OP_MAP_BINARY,
376
385
 
386
+ GGML_OP_MAP_CUSTOM1_F32,
387
+ GGML_OP_MAP_CUSTOM2_F32,
388
+ GGML_OP_MAP_CUSTOM3_F32,
389
+
377
390
  GGML_OP_MAP_CUSTOM1,
378
391
  GGML_OP_MAP_CUSTOM2,
379
392
  GGML_OP_MAP_CUSTOM3,
@@ -570,6 +583,8 @@ extern "C" {
570
583
  GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
571
584
  GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
572
585
 
586
+ GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
587
+
573
588
  // use this to compute the memory overhead of a tensor
574
589
  GGML_API size_t ggml_tensor_overhead(void);
575
590
 
@@ -1240,7 +1255,7 @@ extern "C" {
1240
1255
 
1241
1256
  // conv_1d with padding = half
1242
1257
  // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1243
- GGML_API struct ggml_tensor* ggml_conv_1d_ph(
1258
+ GGML_API struct ggml_tensor * ggml_conv_1d_ph(
1244
1259
  struct ggml_context * ctx,
1245
1260
  struct ggml_tensor * a,
1246
1261
  struct ggml_tensor * b,
@@ -1253,7 +1268,7 @@ extern "C" {
1253
1268
  GGML_OP_POOL_COUNT,
1254
1269
  };
1255
1270
 
1256
- GGML_API struct ggml_tensor* ggml_pool_1d(
1271
+ GGML_API struct ggml_tensor * ggml_pool_1d(
1257
1272
  struct ggml_context * ctx,
1258
1273
  struct ggml_tensor * a,
1259
1274
  enum ggml_op_pool op,
@@ -1261,7 +1276,7 @@ extern "C" {
1261
1276
  int s0, // stride
1262
1277
  int p0); // padding
1263
1278
 
1264
- GGML_API struct ggml_tensor* ggml_pool_2d(
1279
+ GGML_API struct ggml_tensor * ggml_pool_2d(
1265
1280
  struct ggml_context * ctx,
1266
1281
  struct ggml_tensor * a,
1267
1282
  enum ggml_op_pool op,
@@ -1315,15 +1330,6 @@ extern "C" {
1315
1330
  int h0,
1316
1331
  int w);
1317
1332
 
1318
- // custom operators
1319
-
1320
- typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
1321
- typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1322
-
1323
- typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
1324
- typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1325
- typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1326
-
1327
1333
  GGML_API struct ggml_tensor * ggml_unary(
1328
1334
  struct ggml_context * ctx,
1329
1335
  struct ggml_tensor * a,
@@ -1334,63 +1340,138 @@ extern "C" {
1334
1340
  struct ggml_tensor * a,
1335
1341
  enum ggml_unary_op op);
1336
1342
 
1337
- GGML_API struct ggml_tensor * ggml_map_unary_f32(
1343
+ // custom operators
1344
+
1345
+ typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
1346
+ typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1347
+
1348
+ typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
1349
+ typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1350
+ typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1351
+
1352
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
1338
1353
  struct ggml_context * ctx,
1339
1354
  struct ggml_tensor * a,
1340
- ggml_unary_op_f32_t fun);
1355
+ ggml_unary_op_f32_t fun),
1356
+ "use ggml_map_custom1 instead");
1341
1357
 
1342
- GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
1358
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
1343
1359
  struct ggml_context * ctx,
1344
1360
  struct ggml_tensor * a,
1345
- ggml_unary_op_f32_t fun);
1361
+ ggml_unary_op_f32_t fun),
1362
+ "use ggml_map_custom1_inplace instead");
1346
1363
 
1347
- GGML_API struct ggml_tensor * ggml_map_binary_f32(
1364
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
1348
1365
  struct ggml_context * ctx,
1349
1366
  struct ggml_tensor * a,
1350
1367
  struct ggml_tensor * b,
1351
- ggml_binary_op_f32_t fun);
1368
+ ggml_binary_op_f32_t fun),
1369
+ "use ggml_map_custom2 instead");
1352
1370
 
1353
- GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
1371
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
1354
1372
  struct ggml_context * ctx,
1355
1373
  struct ggml_tensor * a,
1356
1374
  struct ggml_tensor * b,
1357
- ggml_binary_op_f32_t fun);
1375
+ ggml_binary_op_f32_t fun),
1376
+ "use ggml_map_custom2_inplace instead");
1358
1377
 
1359
- GGML_API struct ggml_tensor * ggml_map_custom1_f32(
1378
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
1360
1379
  struct ggml_context * ctx,
1361
1380
  struct ggml_tensor * a,
1362
- ggml_custom1_op_f32_t fun);
1381
+ ggml_custom1_op_f32_t fun),
1382
+ "use ggml_map_custom1 instead");
1363
1383
 
1364
- GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
1384
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
1365
1385
  struct ggml_context * ctx,
1366
1386
  struct ggml_tensor * a,
1367
- ggml_custom1_op_f32_t fun);
1387
+ ggml_custom1_op_f32_t fun),
1388
+ "use ggml_map_custom1_inplace instead");
1368
1389
 
1369
- GGML_API struct ggml_tensor * ggml_map_custom2_f32(
1390
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
1370
1391
  struct ggml_context * ctx,
1371
1392
  struct ggml_tensor * a,
1372
1393
  struct ggml_tensor * b,
1373
- ggml_custom2_op_f32_t fun);
1394
+ ggml_custom2_op_f32_t fun),
1395
+ "use ggml_map_custom2 instead");
1374
1396
 
1375
- GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
1397
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
1376
1398
  struct ggml_context * ctx,
1377
1399
  struct ggml_tensor * a,
1378
1400
  struct ggml_tensor * b,
1379
- ggml_custom2_op_f32_t fun);
1401
+ ggml_custom2_op_f32_t fun),
1402
+ "use ggml_map_custom2_inplace instead");
1380
1403
 
1381
- GGML_API struct ggml_tensor * ggml_map_custom3_f32(
1404
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
1382
1405
  struct ggml_context * ctx,
1383
1406
  struct ggml_tensor * a,
1384
1407
  struct ggml_tensor * b,
1385
1408
  struct ggml_tensor * c,
1386
- ggml_custom3_op_f32_t fun);
1409
+ ggml_custom3_op_f32_t fun),
1410
+ "use ggml_map_custom3 instead");
1387
1411
 
1388
- GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
1412
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
1389
1413
  struct ggml_context * ctx,
1390
1414
  struct ggml_tensor * a,
1391
1415
  struct ggml_tensor * b,
1392
1416
  struct ggml_tensor * c,
1393
- ggml_custom3_op_f32_t fun);
1417
+ ggml_custom3_op_f32_t fun),
1418
+ "use ggml_map_custom3_inplace instead");
1419
+
1420
+ // custom operators v2
1421
+
1422
+ typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
1423
+ typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
1424
+ typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
1425
+
1426
+ #define GGML_N_TASKS_MAX -1
1427
+
1428
+ GGML_API struct ggml_tensor * ggml_map_custom1(
1429
+ struct ggml_context * ctx,
1430
+ struct ggml_tensor * a,
1431
+ ggml_custom1_op_t fun,
1432
+ int n_tasks,
1433
+ void * userdata);
1434
+
1435
+ GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
1436
+ struct ggml_context * ctx,
1437
+ struct ggml_tensor * a,
1438
+ ggml_custom1_op_t fun,
1439
+ int n_tasks,
1440
+ void * userdata);
1441
+
1442
+ GGML_API struct ggml_tensor * ggml_map_custom2(
1443
+ struct ggml_context * ctx,
1444
+ struct ggml_tensor * a,
1445
+ struct ggml_tensor * b,
1446
+ ggml_custom2_op_t fun,
1447
+ int n_tasks,
1448
+ void * userdata);
1449
+
1450
+ GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
1451
+ struct ggml_context * ctx,
1452
+ struct ggml_tensor * a,
1453
+ struct ggml_tensor * b,
1454
+ ggml_custom2_op_t fun,
1455
+ int n_tasks,
1456
+ void * userdata);
1457
+
1458
+ GGML_API struct ggml_tensor * ggml_map_custom3(
1459
+ struct ggml_context * ctx,
1460
+ struct ggml_tensor * a,
1461
+ struct ggml_tensor * b,
1462
+ struct ggml_tensor * c,
1463
+ ggml_custom3_op_t fun,
1464
+ int n_tasks,
1465
+ void * userdata);
1466
+
1467
+ GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
1468
+ struct ggml_context * ctx,
1469
+ struct ggml_tensor * a,
1470
+ struct ggml_tensor * b,
1471
+ struct ggml_tensor * c,
1472
+ ggml_custom3_op_t fun,
1473
+ int n_tasks,
1474
+ void * userdata);
1394
1475
 
1395
1476
  // loss function
1396
1477
 
@@ -149,6 +149,46 @@ struct llama_file {
149
149
  }
150
150
  };
151
151
 
152
+ // llama_context_data
153
+ struct llama_data_context {
154
+ virtual void write(const void * src, size_t size) = 0;
155
+ virtual size_t get_size_written() = 0;
156
+ virtual ~llama_data_context() = default;
157
+ };
158
+
159
+ struct llama_data_buffer_context : llama_data_context {
160
+ uint8_t* ptr;
161
+ size_t size_written = 0;
162
+
163
+ llama_data_buffer_context(uint8_t * p) : ptr(p) {}
164
+
165
+ void write(const void * src, size_t size) override {
166
+ memcpy(ptr, src, size);
167
+ ptr += size;
168
+ size_written += size;
169
+ }
170
+
171
+ size_t get_size_written() override {
172
+ return size_written;
173
+ }
174
+ };
175
+
176
+ struct llama_data_file_context : llama_data_context {
177
+ llama_file* file;
178
+ size_t size_written = 0;
179
+
180
+ llama_data_file_context(llama_file * f) : file(f) {}
181
+
182
+ void write(const void * src, size_t size) override {
183
+ file->write_raw(src, size);
184
+ size_written += size;
185
+ }
186
+
187
+ size_t get_size_written() override {
188
+ return size_written;
189
+ }
190
+ };
191
+
152
192
  #if defined(_WIN32)
153
193
  static std::string llama_format_win_err(DWORD err) {
154
194
  LPSTR buf;
@@ -179,7 +219,7 @@ struct llama_mmap {
179
219
  // prefetch/readahead impairs performance on NUMA systems
180
220
  if (numa) { prefetch = 0; }
181
221
  #ifdef __linux__
182
- if (prefetch) { flags |= MAP_POPULATE; }
222
+ if (prefetch >= file->size) { flags |= MAP_POPULATE; }
183
223
  #endif
184
224
  addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
185
225
  if (addr == MAP_FAILED) {