llama_cpp 0.0.2 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -177,11 +177,12 @@ extern "C" {
177
177
  #include <stddef.h>
178
178
  #include <stdbool.h>
179
179
 
180
- #define GGML_MAX_DIMS 4
181
- #define GGML_MAX_NODES 4096
182
- #define GGML_MAX_PARAMS 16
183
- #define GGML_MAX_CONTEXTS 64
184
- #define GGML_MAX_OPT 4
180
+ #define GGML_MAX_DIMS 4
181
+ #define GGML_MAX_NODES 4096
182
+ #define GGML_MAX_PARAMS 16
183
+ #define GGML_MAX_CONTEXTS 64
184
+ #define GGML_MAX_OPT 4
185
+ #define GGML_DEFAULT_N_THREADS 4
185
186
 
186
187
  #ifdef __ARM_NEON
187
188
  // we use the built-in 16-bit float type
@@ -198,13 +199,14 @@ struct ggml_object;
198
199
  struct ggml_context;
199
200
 
200
201
  enum ggml_type {
201
- GGML_TYPE_Q4_0,
202
- GGML_TYPE_Q4_1,
202
+ // explicitly numbered values are used in llama.cpp files
203
+ GGML_TYPE_F32 = 0,
204
+ GGML_TYPE_F16 = 1,
205
+ GGML_TYPE_Q4_0 = 2,
206
+ GGML_TYPE_Q4_1 = 3,
203
207
  GGML_TYPE_I8,
204
208
  GGML_TYPE_I16,
205
209
  GGML_TYPE_I32,
206
- GGML_TYPE_F16,
207
- GGML_TYPE_F32,
208
210
  GGML_TYPE_COUNT,
209
211
  };
210
212
 
@@ -236,6 +238,7 @@ enum ggml_op {
236
238
 
237
239
  GGML_OP_SCALE,
238
240
  GGML_OP_CPY,
241
+ GGML_OP_CONT,
239
242
  GGML_OP_RESHAPE,
240
243
  GGML_OP_VIEW,
241
244
  GGML_OP_PERMUTE,
@@ -250,19 +253,35 @@ enum ggml_op {
250
253
  GGML_OP_FLASH_ATTN,
251
254
  GGML_OP_FLASH_FF,
252
255
 
256
+ GGML_OP_MAP_UNARY,
257
+ GGML_OP_MAP_BINARY,
258
+
253
259
  GGML_OP_COUNT,
254
260
  };
255
261
 
262
+
263
+ // ggml object
264
+ struct ggml_object {
265
+ size_t offs;
266
+ size_t size;
267
+
268
+ struct ggml_object * next;
269
+
270
+ char padding[8];
271
+ };
272
+
273
+ static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
274
+
256
275
  // n-dimensional tensor
257
276
  struct ggml_tensor {
258
277
  enum ggml_type type;
259
278
 
260
279
  int n_dims;
261
- int ne[GGML_MAX_DIMS]; // number of elements
262
- size_t nb[GGML_MAX_DIMS]; // stride in bytes:
263
- // nb[0] = sizeof(type)
264
- // nb[1] = nb[0] * ne[0] + padding
265
- // nb[i] = nb[i-1] * ne[i-1]
280
+ int64_t ne[GGML_MAX_DIMS]; // number of elements
281
+ size_t nb[GGML_MAX_DIMS]; // stride in bytes:
282
+ // nb[0] = sizeof(type)
283
+ // nb[1] = nb[0] * ne[0] + padding
284
+ // nb[i] = nb[i-1] * ne[i-1]
266
285
 
267
286
  // compute data
268
287
  enum ggml_op op;
@@ -328,13 +347,15 @@ int64_t ggml_cycles_per_ms(void);
328
347
  void ggml_print_object (const struct ggml_object * obj);
329
348
  void ggml_print_objects(const struct ggml_context * ctx);
330
349
 
331
- int ggml_nelements(const struct ggml_tensor * tensor);
332
- size_t ggml_nbytes (const struct ggml_tensor * tensor);
350
+ int64_t ggml_nelements(const struct ggml_tensor * tensor);
351
+ size_t ggml_nbytes (const struct ggml_tensor * tensor);
333
352
 
334
353
  int ggml_blck_size (enum ggml_type type);
335
354
  size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
336
355
  float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
337
356
 
357
+ const char * ggml_type_name(enum ggml_type type);
358
+
338
359
  size_t ggml_element_size(const struct ggml_tensor * tensor);
339
360
 
340
361
  struct ggml_context * ggml_init(struct ggml_init_params params);
@@ -344,44 +365,37 @@ size_t ggml_used_mem(const struct ggml_context * ctx);
344
365
 
345
366
  size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
346
367
 
347
- bool ggml_mlock_supported(void);
348
- bool ggml_mlock(
349
- struct ggml_context * ctx,
350
- const void *opt_extra_addr,
351
- size_t opt_extra_len,
352
- char **err_p);
353
-
354
368
  struct ggml_tensor * ggml_new_tensor(
355
369
  struct ggml_context * ctx,
356
370
  enum ggml_type type,
357
371
  int n_dims,
358
- const int *ne);
372
+ const int64_t *ne);
359
373
 
360
374
  struct ggml_tensor * ggml_new_tensor_1d(
361
375
  struct ggml_context * ctx,
362
376
  enum ggml_type type,
363
- int ne0);
377
+ int64_t ne0);
364
378
 
365
379
  struct ggml_tensor * ggml_new_tensor_2d(
366
380
  struct ggml_context * ctx,
367
381
  enum ggml_type type,
368
- int ne0,
369
- int ne1);
382
+ int64_t ne0,
383
+ int64_t ne1);
370
384
 
371
385
  struct ggml_tensor * ggml_new_tensor_3d(
372
386
  struct ggml_context * ctx,
373
387
  enum ggml_type type,
374
- int ne0,
375
- int ne1,
376
- int ne2);
388
+ int64_t ne0,
389
+ int64_t ne1,
390
+ int64_t ne2);
377
391
 
378
392
  struct ggml_tensor * ggml_new_tensor_4d(
379
393
  struct ggml_context * ctx,
380
394
  enum ggml_type type,
381
- int ne0,
382
- int ne1,
383
- int ne2,
384
- int ne3);
395
+ int64_t ne0,
396
+ int64_t ne1,
397
+ int64_t ne2,
398
+ int64_t ne3);
385
399
 
386
400
  struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
387
401
  struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
@@ -519,6 +533,11 @@ struct ggml_tensor * ggml_cpy(
519
533
  struct ggml_tensor * a,
520
534
  struct ggml_tensor * b);
521
535
 
536
+ // make contiguous
537
+ struct ggml_tensor * ggml_cont(
538
+ struct ggml_context * ctx,
539
+ struct ggml_tensor * a);
540
+
522
541
  // return view(a), b specifies the new shape
523
542
  // TODO: when we start computing gradient, make a copy instead of view
524
543
  struct ggml_tensor * ggml_reshape(
@@ -531,33 +550,43 @@ struct ggml_tensor * ggml_reshape(
531
550
  struct ggml_tensor * ggml_reshape_2d(
532
551
  struct ggml_context * ctx,
533
552
  struct ggml_tensor * a,
534
- int ne0,
535
- int ne1);
553
+ int64_t ne0,
554
+ int64_t ne1);
536
555
 
537
556
  // return view(a)
538
557
  // TODO: when we start computing gradient, make a copy instead of view
539
558
  struct ggml_tensor * ggml_reshape_3d(
540
559
  struct ggml_context * ctx,
541
560
  struct ggml_tensor * a,
542
- int ne0,
543
- int ne1,
544
- int ne2);
561
+ int64_t ne0,
562
+ int64_t ne1,
563
+ int64_t ne2);
545
564
 
546
565
  // offset in bytes
547
566
  struct ggml_tensor * ggml_view_1d(
548
567
  struct ggml_context * ctx,
549
568
  struct ggml_tensor * a,
550
- int ne0,
569
+ int64_t ne0,
551
570
  size_t offset);
552
571
 
553
572
  struct ggml_tensor * ggml_view_2d(
554
573
  struct ggml_context * ctx,
555
574
  struct ggml_tensor * a,
556
- int ne0,
557
- int ne1,
575
+ int64_t ne0,
576
+ int64_t ne1,
558
577
  size_t nb1, // row stride in bytes
559
578
  size_t offset);
560
579
 
580
+ struct ggml_tensor * ggml_view_3d(
581
+ struct ggml_context * ctx,
582
+ struct ggml_tensor * a,
583
+ int64_t ne0,
584
+ int64_t ne1,
585
+ int64_t ne2,
586
+ size_t nb1, // row stride in bytes
587
+ size_t nb2, // slice stride in bytes
588
+ size_t offset);
589
+
561
590
  struct ggml_tensor * ggml_permute(
562
591
  struct ggml_context * ctx,
563
592
  struct ggml_tensor * a,
@@ -628,6 +657,21 @@ struct ggml_tensor * ggml_flash_ff(
628
657
  struct ggml_tensor * c0,
629
658
  struct ggml_tensor * c1);
630
659
 
660
+ // Mapping operations
661
+ typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
662
+ typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
663
+
664
+ struct ggml_tensor * ggml_map_unary_f32(
665
+ struct ggml_context * ctx,
666
+ struct ggml_tensor * a,
667
+ const ggml_unary_op_f32_t fun);
668
+
669
+ struct ggml_tensor * ggml_map_binary_f32(
670
+ struct ggml_context * ctx,
671
+ struct ggml_tensor * a,
672
+ struct ggml_tensor * b,
673
+ const ggml_binary_op_f32_t fun);
674
+
631
675
  //
632
676
  // automatic differentiation
633
677
  //
@@ -773,6 +817,30 @@ int ggml_cpu_has_blas(void);
773
817
  int ggml_cpu_has_sse3(void);
774
818
  int ggml_cpu_has_vsx(void);
775
819
 
820
+
821
+ //
822
+ // Internal types and functions exposed for tests and benchmarks
823
+ //
824
+
825
+ #ifdef __cplusplus
826
+ // restrict not standard in C++
827
+ #define GGML_RESTRICT
828
+ #else
829
+ #define GGML_RESTRICT restrict
830
+ #endif
831
+ typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
832
+ typedef void (*quantize_row_q_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
833
+ typedef void (*vec_dot_q_t)(const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
834
+
835
+ typedef struct {
836
+ dequantize_row_q_t dequantize_row_q;
837
+ quantize_row_q_t quantize_row_q;
838
+ quantize_row_q_t quantize_row_q_reference;
839
+ vec_dot_q_t vec_dot_q;
840
+ } quantize_fns_t;
841
+
842
+ quantize_fns_t ggml_internal_get_quantize_fn(size_t i);
843
+
776
844
  #ifdef __cplusplus
777
845
  }
778
846
  #endif