llama_cpp 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -177,11 +177,12 @@ extern "C" {
177
177
  #include <stddef.h>
178
178
  #include <stdbool.h>
179
179
 
180
- #define GGML_MAX_DIMS 4
181
- #define GGML_MAX_NODES 4096
182
- #define GGML_MAX_PARAMS 16
183
- #define GGML_MAX_CONTEXTS 64
184
- #define GGML_MAX_OPT 4
180
+ #define GGML_MAX_DIMS 4
181
+ #define GGML_MAX_NODES 4096
182
+ #define GGML_MAX_PARAMS 16
183
+ #define GGML_MAX_CONTEXTS 64
184
+ #define GGML_MAX_OPT 4
185
+ #define GGML_DEFAULT_N_THREADS 4
185
186
 
186
187
  #ifdef __ARM_NEON
187
188
  // we use the built-in 16-bit float type
@@ -198,13 +199,14 @@ struct ggml_object;
198
199
  struct ggml_context;
199
200
 
200
201
  enum ggml_type {
201
- GGML_TYPE_Q4_0,
202
- GGML_TYPE_Q4_1,
202
+ // explicitly numbered values are used in llama.cpp files
203
+ GGML_TYPE_F32 = 0,
204
+ GGML_TYPE_F16 = 1,
205
+ GGML_TYPE_Q4_0 = 2,
206
+ GGML_TYPE_Q4_1 = 3,
203
207
  GGML_TYPE_I8,
204
208
  GGML_TYPE_I16,
205
209
  GGML_TYPE_I32,
206
- GGML_TYPE_F16,
207
- GGML_TYPE_F32,
208
210
  GGML_TYPE_COUNT,
209
211
  };
210
212
 
@@ -236,6 +238,7 @@ enum ggml_op {
236
238
 
237
239
  GGML_OP_SCALE,
238
240
  GGML_OP_CPY,
241
+ GGML_OP_CONT,
239
242
  GGML_OP_RESHAPE,
240
243
  GGML_OP_VIEW,
241
244
  GGML_OP_PERMUTE,
@@ -250,9 +253,25 @@ enum ggml_op {
250
253
  GGML_OP_FLASH_ATTN,
251
254
  GGML_OP_FLASH_FF,
252
255
 
256
+ GGML_OP_MAP_UNARY,
257
+ GGML_OP_MAP_BINARY,
258
+
253
259
  GGML_OP_COUNT,
254
260
  };
255
261
 
262
+
263
+ // ggml object
264
+ struct ggml_object {
265
+ size_t offs;
266
+ size_t size;
267
+
268
+ struct ggml_object * next;
269
+
270
+ char padding[8];
271
+ };
272
+
273
+ static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
274
+
256
275
  // n-dimensional tensor
257
276
  struct ggml_tensor {
258
277
  enum ggml_type type;
@@ -335,6 +354,8 @@ int ggml_blck_size (enum ggml_type type);
335
354
  size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
336
355
  float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
337
356
 
357
+ const char * ggml_type_name(enum ggml_type type);
358
+
338
359
  size_t ggml_element_size(const struct ggml_tensor * tensor);
339
360
 
340
361
  struct ggml_context * ggml_init(struct ggml_init_params params);
@@ -344,13 +365,6 @@ size_t ggml_used_mem(const struct ggml_context * ctx);
344
365
 
345
366
  size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
346
367
 
347
- bool ggml_mlock_supported(void);
348
- bool ggml_mlock(
349
- struct ggml_context * ctx,
350
- const void *opt_extra_addr,
351
- size_t opt_extra_len,
352
- char **err_p);
353
-
354
368
  struct ggml_tensor * ggml_new_tensor(
355
369
  struct ggml_context * ctx,
356
370
  enum ggml_type type,
@@ -519,6 +533,11 @@ struct ggml_tensor * ggml_cpy(
519
533
  struct ggml_tensor * a,
520
534
  struct ggml_tensor * b);
521
535
 
536
+ // make contiguous
537
+ struct ggml_tensor * ggml_cont(
538
+ struct ggml_context * ctx,
539
+ struct ggml_tensor * a);
540
+
522
541
  // return view(a), b specifies the new shape
523
542
  // TODO: when we start computing gradient, make a copy instead of view
524
543
  struct ggml_tensor * ggml_reshape(
@@ -638,6 +657,21 @@ struct ggml_tensor * ggml_flash_ff(
638
657
  struct ggml_tensor * c0,
639
658
  struct ggml_tensor * c1);
640
659
 
660
+ // Mapping operations
661
+ typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
662
+ typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
663
+
664
+ struct ggml_tensor * ggml_map_unary_f32(
665
+ struct ggml_context * ctx,
666
+ struct ggml_tensor * a,
667
+ const ggml_unary_op_f32_t fun);
668
+
669
+ struct ggml_tensor * ggml_map_binary_f32(
670
+ struct ggml_context * ctx,
671
+ struct ggml_tensor * a,
672
+ struct ggml_tensor * b,
673
+ const ggml_binary_op_f32_t fun);
674
+
641
675
  //
642
676
  // automatic differentiation
643
677
  //
@@ -783,6 +817,30 @@ int ggml_cpu_has_blas(void);
783
817
  int ggml_cpu_has_sse3(void);
784
818
  int ggml_cpu_has_vsx(void);
785
819
 
820
+
821
+ //
822
+ // Internal types and functions exposed for tests and benchmarks
823
+ //
824
+
825
+ #ifdef __cplusplus
826
+ // restrict not standard in C++
827
+ #define GGML_RESTRICT
828
+ #else
829
+ #define GGML_RESTRICT restrict
830
+ #endif
831
+ typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
832
+ typedef void (*quantize_row_q_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
833
+ typedef void (*vec_dot_q_t)(const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
834
+
835
+ typedef struct {
836
+ dequantize_row_q_t dequantize_row_q;
837
+ quantize_row_q_t quantize_row_q;
838
+ quantize_row_q_t quantize_row_q_reference;
839
+ vec_dot_q_t vec_dot_q;
840
+ } quantize_fns_t;
841
+
842
+ quantize_fns_t ggml_internal_get_quantize_fn(size_t i);
843
+
786
844
  #ifdef __cplusplus
787
845
  }
788
846
  #endif