llama_cpp 0.3.6 → 0.3.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -183,6 +183,15 @@
183
183
  # define GGML_API
184
184
  #endif
185
185
 
186
+ // TODO: support for clang
187
+ #ifdef __GNUC__
188
+ # define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
189
+ #elif defined(_MSC_VER)
190
+ # define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
191
+ #else
192
+ # define GGML_DEPRECATED(func, hint) func
193
+ #endif
194
+
186
195
  #include <stdint.h>
187
196
  #include <stddef.h>
188
197
  #include <stdbool.h>
@@ -374,6 +383,10 @@ extern "C" {
374
383
  GGML_OP_MAP_UNARY,
375
384
  GGML_OP_MAP_BINARY,
376
385
 
386
+ GGML_OP_MAP_CUSTOM1_F32,
387
+ GGML_OP_MAP_CUSTOM2_F32,
388
+ GGML_OP_MAP_CUSTOM3_F32,
389
+
377
390
  GGML_OP_MAP_CUSTOM1,
378
391
  GGML_OP_MAP_CUSTOM2,
379
392
  GGML_OP_MAP_CUSTOM3,
@@ -570,6 +583,8 @@ extern "C" {
570
583
  GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
571
584
  GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
572
585
 
586
+ GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
587
+
573
588
  // use this to compute the memory overhead of a tensor
574
589
  GGML_API size_t ggml_tensor_overhead(void);
575
590
 
@@ -1240,7 +1255,7 @@ extern "C" {
1240
1255
 
1241
1256
  // conv_1d with padding = half
1242
1257
  // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1243
- GGML_API struct ggml_tensor* ggml_conv_1d_ph(
1258
+ GGML_API struct ggml_tensor * ggml_conv_1d_ph(
1244
1259
  struct ggml_context * ctx,
1245
1260
  struct ggml_tensor * a,
1246
1261
  struct ggml_tensor * b,
@@ -1253,7 +1268,7 @@ extern "C" {
1253
1268
  GGML_OP_POOL_COUNT,
1254
1269
  };
1255
1270
 
1256
- GGML_API struct ggml_tensor* ggml_pool_1d(
1271
+ GGML_API struct ggml_tensor * ggml_pool_1d(
1257
1272
  struct ggml_context * ctx,
1258
1273
  struct ggml_tensor * a,
1259
1274
  enum ggml_op_pool op,
@@ -1261,7 +1276,7 @@ extern "C" {
1261
1276
  int s0, // stride
1262
1277
  int p0); // padding
1263
1278
 
1264
- GGML_API struct ggml_tensor* ggml_pool_2d(
1279
+ GGML_API struct ggml_tensor * ggml_pool_2d(
1265
1280
  struct ggml_context * ctx,
1266
1281
  struct ggml_tensor * a,
1267
1282
  enum ggml_op_pool op,
@@ -1315,15 +1330,6 @@ extern "C" {
1315
1330
  int h0,
1316
1331
  int w);
1317
1332
 
1318
- // custom operators
1319
-
1320
- typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
1321
- typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1322
-
1323
- typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
1324
- typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1325
- typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1326
-
1327
1333
  GGML_API struct ggml_tensor * ggml_unary(
1328
1334
  struct ggml_context * ctx,
1329
1335
  struct ggml_tensor * a,
@@ -1334,63 +1340,138 @@ extern "C" {
1334
1340
  struct ggml_tensor * a,
1335
1341
  enum ggml_unary_op op);
1336
1342
 
1337
- GGML_API struct ggml_tensor * ggml_map_unary_f32(
1343
+ // custom operators
1344
+
1345
+ typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
1346
+ typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1347
+
1348
+ typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
1349
+ typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1350
+ typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1351
+
1352
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
1338
1353
  struct ggml_context * ctx,
1339
1354
  struct ggml_tensor * a,
1340
- ggml_unary_op_f32_t fun);
1355
+ ggml_unary_op_f32_t fun),
1356
+ "use ggml_map_custom1 instead");
1341
1357
 
1342
- GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
1358
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
1343
1359
  struct ggml_context * ctx,
1344
1360
  struct ggml_tensor * a,
1345
- ggml_unary_op_f32_t fun);
1361
+ ggml_unary_op_f32_t fun),
1362
+ "use ggml_map_custom1_inplace instead");
1346
1363
 
1347
- GGML_API struct ggml_tensor * ggml_map_binary_f32(
1364
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
1348
1365
  struct ggml_context * ctx,
1349
1366
  struct ggml_tensor * a,
1350
1367
  struct ggml_tensor * b,
1351
- ggml_binary_op_f32_t fun);
1368
+ ggml_binary_op_f32_t fun),
1369
+ "use ggml_map_custom2 instead");
1352
1370
 
1353
- GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
1371
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
1354
1372
  struct ggml_context * ctx,
1355
1373
  struct ggml_tensor * a,
1356
1374
  struct ggml_tensor * b,
1357
- ggml_binary_op_f32_t fun);
1375
+ ggml_binary_op_f32_t fun),
1376
+ "use ggml_map_custom2_inplace instead");
1358
1377
 
1359
- GGML_API struct ggml_tensor * ggml_map_custom1_f32(
1378
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
1360
1379
  struct ggml_context * ctx,
1361
1380
  struct ggml_tensor * a,
1362
- ggml_custom1_op_f32_t fun);
1381
+ ggml_custom1_op_f32_t fun),
1382
+ "use ggml_map_custom1 instead");
1363
1383
 
1364
- GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
1384
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
1365
1385
  struct ggml_context * ctx,
1366
1386
  struct ggml_tensor * a,
1367
- ggml_custom1_op_f32_t fun);
1387
+ ggml_custom1_op_f32_t fun),
1388
+ "use ggml_map_custom1_inplace instead");
1368
1389
 
1369
- GGML_API struct ggml_tensor * ggml_map_custom2_f32(
1390
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
1370
1391
  struct ggml_context * ctx,
1371
1392
  struct ggml_tensor * a,
1372
1393
  struct ggml_tensor * b,
1373
- ggml_custom2_op_f32_t fun);
1394
+ ggml_custom2_op_f32_t fun),
1395
+ "use ggml_map_custom2 instead");
1374
1396
 
1375
- GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
1397
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
1376
1398
  struct ggml_context * ctx,
1377
1399
  struct ggml_tensor * a,
1378
1400
  struct ggml_tensor * b,
1379
- ggml_custom2_op_f32_t fun);
1401
+ ggml_custom2_op_f32_t fun),
1402
+ "use ggml_map_custom2_inplace instead");
1380
1403
 
1381
- GGML_API struct ggml_tensor * ggml_map_custom3_f32(
1404
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
1382
1405
  struct ggml_context * ctx,
1383
1406
  struct ggml_tensor * a,
1384
1407
  struct ggml_tensor * b,
1385
1408
  struct ggml_tensor * c,
1386
- ggml_custom3_op_f32_t fun);
1409
+ ggml_custom3_op_f32_t fun),
1410
+ "use ggml_map_custom3 instead");
1387
1411
 
1388
- GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
1412
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
1389
1413
  struct ggml_context * ctx,
1390
1414
  struct ggml_tensor * a,
1391
1415
  struct ggml_tensor * b,
1392
1416
  struct ggml_tensor * c,
1393
- ggml_custom3_op_f32_t fun);
1417
+ ggml_custom3_op_f32_t fun),
1418
+ "use ggml_map_custom3_inplace instead");
1419
+
1420
+ // custom operators v2
1421
+
1422
+ typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
1423
+ typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
1424
+ typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
1425
+
1426
+ #define GGML_N_TASKS_MAX -1
1427
+
1428
+ GGML_API struct ggml_tensor * ggml_map_custom1(
1429
+ struct ggml_context * ctx,
1430
+ struct ggml_tensor * a,
1431
+ ggml_custom1_op_t fun,
1432
+ int n_tasks,
1433
+ void * userdata);
1434
+
1435
+ GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
1436
+ struct ggml_context * ctx,
1437
+ struct ggml_tensor * a,
1438
+ ggml_custom1_op_t fun,
1439
+ int n_tasks,
1440
+ void * userdata);
1441
+
1442
+ GGML_API struct ggml_tensor * ggml_map_custom2(
1443
+ struct ggml_context * ctx,
1444
+ struct ggml_tensor * a,
1445
+ struct ggml_tensor * b,
1446
+ ggml_custom2_op_t fun,
1447
+ int n_tasks,
1448
+ void * userdata);
1449
+
1450
+ GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
1451
+ struct ggml_context * ctx,
1452
+ struct ggml_tensor * a,
1453
+ struct ggml_tensor * b,
1454
+ ggml_custom2_op_t fun,
1455
+ int n_tasks,
1456
+ void * userdata);
1457
+
1458
+ GGML_API struct ggml_tensor * ggml_map_custom3(
1459
+ struct ggml_context * ctx,
1460
+ struct ggml_tensor * a,
1461
+ struct ggml_tensor * b,
1462
+ struct ggml_tensor * c,
1463
+ ggml_custom3_op_t fun,
1464
+ int n_tasks,
1465
+ void * userdata);
1466
+
1467
+ GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
1468
+ struct ggml_context * ctx,
1469
+ struct ggml_tensor * a,
1470
+ struct ggml_tensor * b,
1471
+ struct ggml_tensor * c,
1472
+ ggml_custom3_op_t fun,
1473
+ int n_tasks,
1474
+ void * userdata);
1394
1475
 
1395
1476
  // loss function
1396
1477
 
@@ -149,6 +149,46 @@ struct llama_file {
149
149
  }
150
150
  };
151
151
 
152
+ // llama_context_data
153
+ struct llama_data_context {
154
+ virtual void write(const void * src, size_t size) = 0;
155
+ virtual size_t get_size_written() = 0;
156
+ virtual ~llama_data_context() = default;
157
+ };
158
+
159
+ struct llama_data_buffer_context : llama_data_context {
160
+ uint8_t* ptr;
161
+ size_t size_written = 0;
162
+
163
+ llama_data_buffer_context(uint8_t * p) : ptr(p) {}
164
+
165
+ void write(const void * src, size_t size) override {
166
+ memcpy(ptr, src, size);
167
+ ptr += size;
168
+ size_written += size;
169
+ }
170
+
171
+ size_t get_size_written() override {
172
+ return size_written;
173
+ }
174
+ };
175
+
176
+ struct llama_data_file_context : llama_data_context {
177
+ llama_file* file;
178
+ size_t size_written = 0;
179
+
180
+ llama_data_file_context(llama_file * f) : file(f) {}
181
+
182
+ void write(const void * src, size_t size) override {
183
+ file->write_raw(src, size);
184
+ size_written += size;
185
+ }
186
+
187
+ size_t get_size_written() override {
188
+ return size_written;
189
+ }
190
+ };
191
+
152
192
  #if defined(_WIN32)
153
193
  static std::string llama_format_win_err(DWORD err) {
154
194
  LPSTR buf;
@@ -179,7 +219,7 @@ struct llama_mmap {
179
219
  // prefetch/readahead impairs performance on NUMA systems
180
220
  if (numa) { prefetch = 0; }
181
221
  #ifdef __linux__
182
- if (prefetch) { flags |= MAP_POPULATE; }
222
+ if (prefetch >= file->size) { flags |= MAP_POPULATE; }
183
223
  #endif
184
224
  addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
185
225
  if (addr == MAP_FAILED) {