llama_cpp 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -258,11 +258,11 @@ struct ggml_tensor {
258
258
  enum ggml_type type;
259
259
 
260
260
  int n_dims;
261
- int ne[GGML_MAX_DIMS]; // number of elements
262
- size_t nb[GGML_MAX_DIMS]; // stride in bytes:
263
- // nb[0] = sizeof(type)
264
- // nb[1] = nb[0] * ne[0] + padding
265
- // nb[i] = nb[i-1] * ne[i-1]
261
+ int64_t ne[GGML_MAX_DIMS]; // number of elements
262
+ size_t nb[GGML_MAX_DIMS]; // stride in bytes:
263
+ // nb[0] = sizeof(type)
264
+ // nb[1] = nb[0] * ne[0] + padding
265
+ // nb[i] = nb[i-1] * ne[i-1]
266
266
 
267
267
  // compute data
268
268
  enum ggml_op op;
@@ -316,6 +316,7 @@ struct ggml_init_params {
316
316
  // memory pool
317
317
  size_t mem_size; // bytes
318
318
  void * mem_buffer; // if NULL, memory will be allocated internally
319
+ bool no_alloc; // don't allocate memory for the tensor data
319
320
  };
320
321
 
321
322
  void ggml_time_init(void); // call this once at the beginning of the program
@@ -327,8 +328,8 @@ int64_t ggml_cycles_per_ms(void);
327
328
  void ggml_print_object (const struct ggml_object * obj);
328
329
  void ggml_print_objects(const struct ggml_context * ctx);
329
330
 
330
- int ggml_nelements(const struct ggml_tensor * tensor);
331
- size_t ggml_nbytes (const struct ggml_tensor * tensor);
331
+ int64_t ggml_nelements(const struct ggml_tensor * tensor);
332
+ size_t ggml_nbytes (const struct ggml_tensor * tensor);
332
333
 
333
334
  int ggml_blck_size (enum ggml_type type);
334
335
  size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
@@ -344,39 +345,43 @@ size_t ggml_used_mem(const struct ggml_context * ctx);
344
345
  size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
345
346
 
346
347
  bool ggml_mlock_supported(void);
347
- bool ggml_mlock(struct ggml_context * ctx, char ** err_p);
348
+ bool ggml_mlock(
349
+ struct ggml_context * ctx,
350
+ const void *opt_extra_addr,
351
+ size_t opt_extra_len,
352
+ char **err_p);
348
353
 
349
354
  struct ggml_tensor * ggml_new_tensor(
350
355
  struct ggml_context * ctx,
351
356
  enum ggml_type type,
352
357
  int n_dims,
353
- const int *ne);
358
+ const int64_t *ne);
354
359
 
355
360
  struct ggml_tensor * ggml_new_tensor_1d(
356
361
  struct ggml_context * ctx,
357
362
  enum ggml_type type,
358
- int ne0);
363
+ int64_t ne0);
359
364
 
360
365
  struct ggml_tensor * ggml_new_tensor_2d(
361
366
  struct ggml_context * ctx,
362
367
  enum ggml_type type,
363
- int ne0,
364
- int ne1);
368
+ int64_t ne0,
369
+ int64_t ne1);
365
370
 
366
371
  struct ggml_tensor * ggml_new_tensor_3d(
367
372
  struct ggml_context * ctx,
368
373
  enum ggml_type type,
369
- int ne0,
370
- int ne1,
371
- int ne2);
374
+ int64_t ne0,
375
+ int64_t ne1,
376
+ int64_t ne2);
372
377
 
373
378
  struct ggml_tensor * ggml_new_tensor_4d(
374
379
  struct ggml_context * ctx,
375
380
  enum ggml_type type,
376
- int ne0,
377
- int ne1,
378
- int ne2,
379
- int ne3);
381
+ int64_t ne0,
382
+ int64_t ne1,
383
+ int64_t ne2,
384
+ int64_t ne3);
380
385
 
381
386
  struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
382
387
  struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
@@ -526,33 +531,43 @@ struct ggml_tensor * ggml_reshape(
526
531
  struct ggml_tensor * ggml_reshape_2d(
527
532
  struct ggml_context * ctx,
528
533
  struct ggml_tensor * a,
529
- int ne0,
530
- int ne1);
534
+ int64_t ne0,
535
+ int64_t ne1);
531
536
 
532
537
  // return view(a)
533
538
  // TODO: when we start computing gradient, make a copy instead of view
534
539
  struct ggml_tensor * ggml_reshape_3d(
535
540
  struct ggml_context * ctx,
536
541
  struct ggml_tensor * a,
537
- int ne0,
538
- int ne1,
539
- int ne2);
542
+ int64_t ne0,
543
+ int64_t ne1,
544
+ int64_t ne2);
540
545
 
541
546
  // offset in bytes
542
547
  struct ggml_tensor * ggml_view_1d(
543
548
  struct ggml_context * ctx,
544
549
  struct ggml_tensor * a,
545
- int ne0,
550
+ int64_t ne0,
546
551
  size_t offset);
547
552
 
548
553
  struct ggml_tensor * ggml_view_2d(
549
554
  struct ggml_context * ctx,
550
555
  struct ggml_tensor * a,
551
- int ne0,
552
- int ne1,
556
+ int64_t ne0,
557
+ int64_t ne1,
553
558
  size_t nb1, // row stride in bytes
554
559
  size_t offset);
555
560
 
561
+ struct ggml_tensor * ggml_view_3d(
562
+ struct ggml_context * ctx,
563
+ struct ggml_tensor * a,
564
+ int64_t ne0,
565
+ int64_t ne1,
566
+ int64_t ne2,
567
+ size_t nb1, // row stride in bytes
568
+ size_t nb2, // slice stride in bytes
569
+ size_t offset);
570
+
556
571
  struct ggml_tensor * ggml_permute(
557
572
  struct ggml_context * ctx,
558
573
  struct ggml_tensor * a,