llama_cpp 0.9.3 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9611,10 +9611,12 @@ static void ggml_compute_forward_out_prod_f32(
9611
9611
  const int ith = params->ith;
9612
9612
  const int nth = params->nth;
9613
9613
 
9614
+ GGML_ASSERT(ne0 == ne00);
9615
+ GGML_ASSERT(ne1 == ne10);
9616
+ GGML_ASSERT(ne2 == ne02);
9614
9617
  GGML_ASSERT(ne02 == ne12);
9615
- GGML_ASSERT(ne03 == ne13);
9616
- GGML_ASSERT(ne2 == ne12);
9617
9618
  GGML_ASSERT(ne3 == ne13);
9619
+ GGML_ASSERT(ne03 == ne13);
9618
9620
 
9619
9621
  // we don't support permuted src0 or src1
9620
9622
  GGML_ASSERT(nb00 == sizeof(float));
@@ -9625,18 +9627,25 @@ static void ggml_compute_forward_out_prod_f32(
9625
9627
  // GGML_ASSERT(nb1 <= nb2);
9626
9628
  // GGML_ASSERT(nb2 <= nb3);
9627
9629
 
9628
- GGML_ASSERT(ne0 == ne00);
9629
- GGML_ASSERT(ne1 == ne10);
9630
- GGML_ASSERT(ne2 == ne02);
9631
- GGML_ASSERT(ne3 == ne03);
9632
-
9633
9630
  // nb01 >= nb00 - src0 is not transposed
9634
9631
  // compute by src0 rows
9635
9632
 
9636
9633
  // TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod
9637
- // TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
9634
+ // TODO: #if defined(GGML_USE_CLBLAST)
9635
+
9636
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
9637
+ bool use_blas = ggml_is_matrix(src0) &&
9638
+ ggml_is_matrix(src1) &&
9639
+ ggml_is_contiguous(src0) &&
9640
+ (ggml_is_contiguous(src1) || ggml_is_transposed(src1));
9641
+ #endif
9638
9642
 
9639
9643
  if (params->type == GGML_TASK_INIT) {
9644
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
9645
+ if (use_blas) {
9646
+ return;
9647
+ }
9648
+ #endif
9640
9649
  ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0);
9641
9650
  return;
9642
9651
  }
@@ -9645,6 +9654,50 @@ static void ggml_compute_forward_out_prod_f32(
9645
9654
  return;
9646
9655
  }
9647
9656
 
9657
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
9658
+ if (use_blas) {
9659
+ if (params->ith != 0) { // All threads other than the first do no work.
9660
+ return;
9661
+ }
9662
+ // Arguments to ggml_compute_forward_out_prod (expressed as major,minor)
9663
+ // src0: (k,n)
9664
+ // src1: (k,m)
9665
+ // dst: (m,n)
9666
+ //
9667
+ // Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f)
9668
+ // Also expressed as (major,minor)
9669
+ // a: (m,k): so src1 transposed
9670
+ // b: (k,n): so src0
9671
+ // c: (m,n)
9672
+ //
9673
+ // However, if ggml_is_transposed(src1) is true, then
9674
+ // src1->data already contains a transposed version, so sgemm mustn't
9675
+ // transpose it further.
9676
+
9677
+ int n = src0->ne[0];
9678
+ int k = src0->ne[1];
9679
+ int m = src1->ne[0];
9680
+
9681
+ int transposeA, lda;
9682
+
9683
+ if (!ggml_is_transposed(src1)) {
9684
+ transposeA = CblasTrans;
9685
+ lda = m;
9686
+ } else {
9687
+ transposeA = CblasNoTrans;
9688
+ lda = k;
9689
+ }
9690
+
9691
+ float * a = (float *) ((char *) src1->data);
9692
+ float * b = (float *) ((char *) src0->data);
9693
+ float * c = (float *) ((char *) dst->data);
9694
+
9695
+ cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n);
9696
+
9697
+ return;
9698
+ }
9699
+ #endif
9700
+
9648
9701
  // dst[:,:,:,:] = 0
9649
9702
  // for i2,i3:
9650
9703
  // for i1:
@@ -18399,24 +18452,29 @@ int gguf_find_key(const struct gguf_context * ctx, const char * key) {
18399
18452
  }
18400
18453
 
18401
18454
  const char * gguf_get_key(const struct gguf_context * ctx, int key_id) {
18455
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18402
18456
  return ctx->kv[key_id].key.data;
18403
18457
  }
18404
18458
 
18405
18459
  enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {
18460
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18406
18461
  return ctx->kv[key_id].type;
18407
18462
  }
18408
18463
 
18409
18464
  enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
18465
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18410
18466
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
18411
18467
  return ctx->kv[key_id].value.arr.type;
18412
18468
  }
18413
18469
 
18414
18470
  const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {
18471
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18415
18472
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
18416
18473
  return ctx->kv[key_id].value.arr.data;
18417
18474
  }
18418
18475
 
18419
18476
  const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
18477
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18420
18478
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
18421
18479
  struct gguf_kv * kv = &ctx->kv[key_id];
18422
18480
  struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
@@ -18424,70 +18482,90 @@ const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i
18424
18482
  }
18425
18483
 
18426
18484
  int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
18485
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18427
18486
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
18428
18487
  return ctx->kv[key_id].value.arr.n;
18429
18488
  }
18430
18489
 
18431
18490
  uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int key_id) {
18491
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18432
18492
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT8);
18433
18493
  return ctx->kv[key_id].value.uint8;
18434
18494
  }
18435
18495
 
18436
18496
  int8_t gguf_get_val_i8(const struct gguf_context * ctx, int key_id) {
18497
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18437
18498
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT8);
18438
18499
  return ctx->kv[key_id].value.int8;
18439
18500
  }
18440
18501
 
18441
18502
  uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int key_id) {
18503
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18442
18504
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT16);
18443
18505
  return ctx->kv[key_id].value.uint16;
18444
18506
  }
18445
18507
 
18446
18508
  int16_t gguf_get_val_i16(const struct gguf_context * ctx, int key_id) {
18509
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18447
18510
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT16);
18448
18511
  return ctx->kv[key_id].value.int16;
18449
18512
  }
18450
18513
 
18451
18514
  uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int key_id) {
18515
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18452
18516
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT32);
18453
18517
  return ctx->kv[key_id].value.uint32;
18454
18518
  }
18455
18519
 
18456
18520
  int32_t gguf_get_val_i32(const struct gguf_context * ctx, int key_id) {
18521
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18457
18522
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT32);
18458
18523
  return ctx->kv[key_id].value.int32;
18459
18524
  }
18460
18525
 
18461
18526
  float gguf_get_val_f32(const struct gguf_context * ctx, int key_id) {
18527
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18462
18528
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT32);
18463
18529
  return ctx->kv[key_id].value.float32;
18464
18530
  }
18465
18531
 
18466
18532
  uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int key_id) {
18533
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18467
18534
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT64);
18468
18535
  return ctx->kv[key_id].value.uint64;
18469
18536
  }
18470
18537
 
18471
18538
  int64_t gguf_get_val_i64(const struct gguf_context * ctx, int key_id) {
18539
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18472
18540
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT64);
18473
18541
  return ctx->kv[key_id].value.int64;
18474
18542
  }
18475
18543
 
18476
18544
  double gguf_get_val_f64(const struct gguf_context * ctx, int key_id) {
18545
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18477
18546
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT64);
18478
18547
  return ctx->kv[key_id].value.float64;
18479
18548
  }
18480
18549
 
18481
18550
  bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id) {
18551
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18482
18552
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_BOOL);
18483
18553
  return ctx->kv[key_id].value.bool_;
18484
18554
  }
18485
18555
 
18486
18556
  const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
18557
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18487
18558
  GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_STRING);
18488
18559
  return ctx->kv[key_id].value.str.data;
18489
18560
  }
18490
18561
 
18562
+ const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
18563
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
18564
+ GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
18565
+ GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
18566
+ return &ctx->kv[key_id].value;
18567
+ }
18568
+
18491
18569
  int gguf_get_n_tensors(const struct gguf_context * ctx) {
18492
18570
  return ctx->header.n_tensors;
18493
18571
  }
@@ -2045,6 +2045,7 @@ extern "C" {
2045
2045
  GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
2046
2046
  GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
2047
2047
  GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
2048
+ GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
2048
2049
  GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
2049
2050
  GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
2050
2051
  GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);