llama_cpp 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -177,11 +177,12 @@ extern "C" {
177
177
  #include <stddef.h>
178
178
  #include <stdbool.h>
179
179
 
180
- #define GGML_MAX_DIMS 4
181
- #define GGML_MAX_NODES 4096
182
- #define GGML_MAX_PARAMS 16
183
- #define GGML_MAX_CONTEXTS 64
184
- #define GGML_MAX_OPT 4
180
+ #define GGML_MAX_DIMS 4
181
+ #define GGML_MAX_NODES 4096
182
+ #define GGML_MAX_PARAMS 16
183
+ #define GGML_MAX_CONTEXTS 64
184
+ #define GGML_MAX_OPT 4
185
+ #define GGML_DEFAULT_N_THREADS 4
185
186
 
186
187
  #ifdef __ARM_NEON
187
188
  // we use the built-in 16-bit float type
@@ -198,13 +199,14 @@ struct ggml_object;
198
199
  struct ggml_context;
199
200
 
200
201
  enum ggml_type {
201
- GGML_TYPE_Q4_0,
202
- GGML_TYPE_Q4_1,
202
+ // explicitly numbered values are used in llama.cpp files
203
+ GGML_TYPE_F32 = 0,
204
+ GGML_TYPE_F16 = 1,
205
+ GGML_TYPE_Q4_0 = 2,
206
+ GGML_TYPE_Q4_1 = 3,
203
207
  GGML_TYPE_I8,
204
208
  GGML_TYPE_I16,
205
209
  GGML_TYPE_I32,
206
- GGML_TYPE_F16,
207
- GGML_TYPE_F32,
208
210
  GGML_TYPE_COUNT,
209
211
  };
210
212
 
@@ -236,6 +238,7 @@ enum ggml_op {
236
238
 
237
239
  GGML_OP_SCALE,
238
240
  GGML_OP_CPY,
241
+ GGML_OP_CONT,
239
242
  GGML_OP_RESHAPE,
240
243
  GGML_OP_VIEW,
241
244
  GGML_OP_PERMUTE,
@@ -250,19 +253,35 @@ enum ggml_op {
250
253
  GGML_OP_FLASH_ATTN,
251
254
  GGML_OP_FLASH_FF,
252
255
 
256
+ GGML_OP_MAP_UNARY,
257
+ GGML_OP_MAP_BINARY,
258
+
253
259
  GGML_OP_COUNT,
254
260
  };
255
261
 
262
+
263
+ // ggml object
264
+ struct ggml_object {
265
+ size_t offs;
266
+ size_t size;
267
+
268
+ struct ggml_object * next;
269
+
270
+ char padding[8];
271
+ };
272
+
273
+ static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
274
+
256
275
  // n-dimensional tensor
257
276
  struct ggml_tensor {
258
277
  enum ggml_type type;
259
278
 
260
279
  int n_dims;
261
- int ne[GGML_MAX_DIMS]; // number of elements
262
- size_t nb[GGML_MAX_DIMS]; // stride in bytes:
263
- // nb[0] = sizeof(type)
264
- // nb[1] = nb[0] * ne[0] + padding
265
- // nb[i] = nb[i-1] * ne[i-1]
280
+ int64_t ne[GGML_MAX_DIMS]; // number of elements
281
+ size_t nb[GGML_MAX_DIMS]; // stride in bytes:
282
+ // nb[0] = sizeof(type)
283
+ // nb[1] = nb[0] * ne[0] + padding
284
+ // nb[i] = nb[i-1] * ne[i-1]
266
285
 
267
286
  // compute data
268
287
  enum ggml_op op;
@@ -328,13 +347,15 @@ int64_t ggml_cycles_per_ms(void);
328
347
  void ggml_print_object (const struct ggml_object * obj);
329
348
  void ggml_print_objects(const struct ggml_context * ctx);
330
349
 
331
- int ggml_nelements(const struct ggml_tensor * tensor);
332
- size_t ggml_nbytes (const struct ggml_tensor * tensor);
350
+ int64_t ggml_nelements(const struct ggml_tensor * tensor);
351
+ size_t ggml_nbytes (const struct ggml_tensor * tensor);
333
352
 
334
353
  int ggml_blck_size (enum ggml_type type);
335
354
  size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
336
355
  float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
337
356
 
357
+ const char * ggml_type_name(enum ggml_type type);
358
+
338
359
  size_t ggml_element_size(const struct ggml_tensor * tensor);
339
360
 
340
361
  struct ggml_context * ggml_init(struct ggml_init_params params);
@@ -344,44 +365,37 @@ size_t ggml_used_mem(const struct ggml_context * ctx);
344
365
 
345
366
  size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
346
367
 
347
- bool ggml_mlock_supported(void);
348
- bool ggml_mlock(
349
- struct ggml_context * ctx,
350
- const void *opt_extra_addr,
351
- size_t opt_extra_len,
352
- char **err_p);
353
-
354
368
  struct ggml_tensor * ggml_new_tensor(
355
369
  struct ggml_context * ctx,
356
370
  enum ggml_type type,
357
371
  int n_dims,
358
- const int *ne);
372
+ const int64_t *ne);
359
373
 
360
374
  struct ggml_tensor * ggml_new_tensor_1d(
361
375
  struct ggml_context * ctx,
362
376
  enum ggml_type type,
363
- int ne0);
377
+ int64_t ne0);
364
378
 
365
379
  struct ggml_tensor * ggml_new_tensor_2d(
366
380
  struct ggml_context * ctx,
367
381
  enum ggml_type type,
368
- int ne0,
369
- int ne1);
382
+ int64_t ne0,
383
+ int64_t ne1);
370
384
 
371
385
  struct ggml_tensor * ggml_new_tensor_3d(
372
386
  struct ggml_context * ctx,
373
387
  enum ggml_type type,
374
- int ne0,
375
- int ne1,
376
- int ne2);
388
+ int64_t ne0,
389
+ int64_t ne1,
390
+ int64_t ne2);
377
391
 
378
392
  struct ggml_tensor * ggml_new_tensor_4d(
379
393
  struct ggml_context * ctx,
380
394
  enum ggml_type type,
381
- int ne0,
382
- int ne1,
383
- int ne2,
384
- int ne3);
395
+ int64_t ne0,
396
+ int64_t ne1,
397
+ int64_t ne2,
398
+ int64_t ne3);
385
399
 
386
400
  struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
387
401
  struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
@@ -519,6 +533,11 @@ struct ggml_tensor * ggml_cpy(
519
533
  struct ggml_tensor * a,
520
534
  struct ggml_tensor * b);
521
535
 
536
+ // make contiguous
537
+ struct ggml_tensor * ggml_cont(
538
+ struct ggml_context * ctx,
539
+ struct ggml_tensor * a);
540
+
522
541
  // return view(a), b specifies the new shape
523
542
  // TODO: when we start computing gradient, make a copy instead of view
524
543
  struct ggml_tensor * ggml_reshape(
@@ -531,33 +550,43 @@ struct ggml_tensor * ggml_reshape(
531
550
  struct ggml_tensor * ggml_reshape_2d(
532
551
  struct ggml_context * ctx,
533
552
  struct ggml_tensor * a,
534
- int ne0,
535
- int ne1);
553
+ int64_t ne0,
554
+ int64_t ne1);
536
555
 
537
556
  // return view(a)
538
557
  // TODO: when we start computing gradient, make a copy instead of view
539
558
  struct ggml_tensor * ggml_reshape_3d(
540
559
  struct ggml_context * ctx,
541
560
  struct ggml_tensor * a,
542
- int ne0,
543
- int ne1,
544
- int ne2);
561
+ int64_t ne0,
562
+ int64_t ne1,
563
+ int64_t ne2);
545
564
 
546
565
  // offset in bytes
547
566
  struct ggml_tensor * ggml_view_1d(
548
567
  struct ggml_context * ctx,
549
568
  struct ggml_tensor * a,
550
- int ne0,
569
+ int64_t ne0,
551
570
  size_t offset);
552
571
 
553
572
  struct ggml_tensor * ggml_view_2d(
554
573
  struct ggml_context * ctx,
555
574
  struct ggml_tensor * a,
556
- int ne0,
557
- int ne1,
575
+ int64_t ne0,
576
+ int64_t ne1,
558
577
  size_t nb1, // row stride in bytes
559
578
  size_t offset);
560
579
 
580
+ struct ggml_tensor * ggml_view_3d(
581
+ struct ggml_context * ctx,
582
+ struct ggml_tensor * a,
583
+ int64_t ne0,
584
+ int64_t ne1,
585
+ int64_t ne2,
586
+ size_t nb1, // row stride in bytes
587
+ size_t nb2, // slice stride in bytes
588
+ size_t offset);
589
+
561
590
  struct ggml_tensor * ggml_permute(
562
591
  struct ggml_context * ctx,
563
592
  struct ggml_tensor * a,
@@ -628,6 +657,21 @@ struct ggml_tensor * ggml_flash_ff(
628
657
  struct ggml_tensor * c0,
629
658
  struct ggml_tensor * c1);
630
659
 
660
+ // Mapping operations
661
+ typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
662
+ typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
663
+
664
+ struct ggml_tensor * ggml_map_unary_f32(
665
+ struct ggml_context * ctx,
666
+ struct ggml_tensor * a,
667
+ const ggml_unary_op_f32_t fun);
668
+
669
+ struct ggml_tensor * ggml_map_binary_f32(
670
+ struct ggml_context * ctx,
671
+ struct ggml_tensor * a,
672
+ struct ggml_tensor * b,
673
+ const ggml_binary_op_f32_t fun);
674
+
631
675
  //
632
676
  // automatic differentiation
633
677
  //
@@ -773,6 +817,30 @@ int ggml_cpu_has_blas(void);
773
817
  int ggml_cpu_has_sse3(void);
774
818
  int ggml_cpu_has_vsx(void);
775
819
 
820
+
821
+ //
822
+ // Internal types and functions exposed for tests and benchmarks
823
+ //
824
+
825
+ #ifdef __cplusplus
826
+ // restrict not standard in C++
827
+ #define GGML_RESTRICT
828
+ #else
829
+ #define GGML_RESTRICT restrict
830
+ #endif
831
+ typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
832
+ typedef void (*quantize_row_q_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
833
+ typedef void (*vec_dot_q_t)(const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
834
+
835
+ typedef struct {
836
+ dequantize_row_q_t dequantize_row_q;
837
+ quantize_row_q_t quantize_row_q;
838
+ quantize_row_q_t quantize_row_q_reference;
839
+ vec_dot_q_t vec_dot_q;
840
+ } quantize_fns_t;
841
+
842
+ quantize_fns_t ggml_internal_get_quantize_fn(size_t i);
843
+
776
844
  #ifdef __cplusplus
777
845
  }
778
846
  #endif