llama_cpp 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -7
- data/ext/llama_cpp/extconf.rb +1 -2
- data/ext/llama_cpp/src/ggml-opencl.cpp +1028 -0
- data/ext/llama_cpp/src/ggml-opencl.h +8 -10
- data/ext/llama_cpp/src/ggml.c +568 -57
- data/ext/llama_cpp/src/ggml.h +21 -2
- data/ext/llama_cpp/src/llama.cpp +37 -2
- data/ext/llama_cpp/src/llama.h +5 -0
- data/lib/llama_cpp/version.rb +2 -2
- metadata +3 -3
- data/ext/llama_cpp/src/ggml-opencl.c +0 -474
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -186,10 +186,12 @@ typedef double ggml_float;
|
|
186
186
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
187
187
|
#include <intrin.h>
|
188
188
|
#else
|
189
|
+
#if !defined(__riscv)
|
189
190
|
#include <immintrin.h>
|
190
191
|
#endif
|
191
192
|
#endif
|
192
193
|
#endif
|
194
|
+
#endif
|
193
195
|
|
194
196
|
#ifdef __F16C__
|
195
197
|
|
@@ -3494,7 +3496,7 @@ static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = {
|
|
3494
3496
|
};
|
3495
3497
|
static_assert(GGML_TYPE_COUNT == 13, "GGML_IS_QUANTIZED is outdated");
|
3496
3498
|
|
3497
|
-
static const char *
|
3499
|
+
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
3498
3500
|
"NONE",
|
3499
3501
|
|
3500
3502
|
"DUP",
|
@@ -3749,6 +3751,9 @@ const char * ggml_type_name(enum ggml_type type) {
|
|
3749
3751
|
return GGML_TYPE_NAME[type];
|
3750
3752
|
}
|
3751
3753
|
|
3754
|
+
const char * ggml_op_name(enum ggml_op op) {
|
3755
|
+
return GGML_OP_NAME[op];
|
3756
|
+
}
|
3752
3757
|
|
3753
3758
|
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
3754
3759
|
return GGML_TYPE_SIZE[tensor->type];
|
@@ -3805,6 +3810,10 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
3805
3810
|
return wtype;
|
3806
3811
|
}
|
3807
3812
|
|
3813
|
+
size_t ggml_tensor_overhead(void) {
|
3814
|
+
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE + 16;
|
3815
|
+
}
|
3816
|
+
|
3808
3817
|
static inline bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
3809
3818
|
return tensor->nb[0] > tensor->nb[1];
|
3810
3819
|
}
|
@@ -4017,6 +4026,18 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
|
|
4017
4026
|
return result;
|
4018
4027
|
}
|
4019
4028
|
|
4029
|
+
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
|
4030
|
+
ctx->no_alloc = no_alloc;
|
4031
|
+
}
|
4032
|
+
|
4033
|
+
void * ggml_get_mem_buffer(struct ggml_context * ctx) {
|
4034
|
+
return ctx->mem_buffer;
|
4035
|
+
}
|
4036
|
+
|
4037
|
+
size_t ggml_get_mem_size(struct ggml_context * ctx) {
|
4038
|
+
return ctx->mem_size;
|
4039
|
+
}
|
4040
|
+
|
4020
4041
|
// IMPORTANT:
|
4021
4042
|
// when creating "opt" tensors, always save and load the scratch buffer
|
4022
4043
|
// this is an error prone process, but it is necessary to support inplace
|
@@ -4061,7 +4082,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4061
4082
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
4062
4083
|
|
4063
4084
|
if (ctx->scratch.data == NULL || data != NULL) {
|
4064
|
-
size_needed +=
|
4085
|
+
size_needed += GGML_TENSOR_SIZE;
|
4065
4086
|
|
4066
4087
|
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
4067
4088
|
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
@@ -4077,14 +4098,15 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4077
4098
|
};
|
4078
4099
|
} else {
|
4079
4100
|
if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
|
4080
|
-
GGML_PRINT("%s: not enough space in the scratch memory\n",
|
4101
|
+
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
4102
|
+
__func__, ctx->scratch.offs + size_needed, ctx->scratch.size);
|
4081
4103
|
assert(false);
|
4082
4104
|
return NULL;
|
4083
4105
|
}
|
4084
4106
|
|
4085
|
-
if (cur_end +
|
4107
|
+
if (cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE > ctx->mem_size) {
|
4086
4108
|
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
4087
|
-
__func__, cur_end +
|
4109
|
+
__func__, cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE, ctx->mem_size);
|
4088
4110
|
assert(false);
|
4089
4111
|
return NULL;
|
4090
4112
|
}
|
@@ -4093,7 +4115,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4093
4115
|
|
4094
4116
|
*obj_new = (struct ggml_object) {
|
4095
4117
|
.offs = cur_end + GGML_OBJECT_SIZE,
|
4096
|
-
.size =
|
4118
|
+
.size = GGML_TENSOR_SIZE,
|
4097
4119
|
.next = NULL,
|
4098
4120
|
};
|
4099
4121
|
|
@@ -4509,6 +4531,23 @@ struct ggml_tensor * ggml_view_tensor(
|
|
4509
4531
|
return result;
|
4510
4532
|
}
|
4511
4533
|
|
4534
|
+
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
|
4535
|
+
struct ggml_object * obj = ctx->objects_begin;
|
4536
|
+
|
4537
|
+
char * const mem_buffer = ctx->mem_buffer;
|
4538
|
+
|
4539
|
+
while (obj != NULL) {
|
4540
|
+
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
4541
|
+
if (strcmp(cur->name, name) == 0) {
|
4542
|
+
return cur;
|
4543
|
+
}
|
4544
|
+
|
4545
|
+
obj = obj->next;
|
4546
|
+
}
|
4547
|
+
|
4548
|
+
return NULL;
|
4549
|
+
}
|
4550
|
+
|
4512
4551
|
////////////////////////////////////////////////////////////////////////////////
|
4513
4552
|
|
4514
4553
|
// ggml_dup
|
@@ -6303,7 +6342,7 @@ struct ggml_tensor * ggml_alibi(
|
|
6303
6342
|
|
6304
6343
|
ggml_scratch_save(ctx);
|
6305
6344
|
|
6306
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32,
|
6345
|
+
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
|
6307
6346
|
|
6308
6347
|
((int32_t *) b->data)[0] = n_past;
|
6309
6348
|
((int32_t *) b->data)[1] = n_head;
|
@@ -9431,7 +9470,7 @@ static void ggml_compute_forward_rms_norm_back(
|
|
9431
9470
|
|
9432
9471
|
// ggml_compute_forward_mul_mat
|
9433
9472
|
|
9434
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9473
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9435
9474
|
// helper function to determine if it is better to use BLAS or not
|
9436
9475
|
// for large matrices, BLAS is faster
|
9437
9476
|
static bool ggml_compute_forward_mul_mat_use_blas(
|
@@ -9472,7 +9511,7 @@ static void ggml_compute_forward_mul_mat_f32(
|
|
9472
9511
|
const int64_t ne02 = src0->ne[2];
|
9473
9512
|
const int64_t ne03 = src0->ne[3];
|
9474
9513
|
|
9475
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9514
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9476
9515
|
const int64_t ne10 = src1->ne[0];
|
9477
9516
|
#endif
|
9478
9517
|
const int64_t ne11 = src1->ne[1];
|
@@ -9536,9 +9575,16 @@ static void ggml_compute_forward_mul_mat_f32(
|
|
9536
9575
|
}
|
9537
9576
|
return;
|
9538
9577
|
}
|
9578
|
+
#elif defined(GGML_USE_CLBLAST)
|
9579
|
+
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
9580
|
+
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
|
9581
|
+
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
9582
|
+
}
|
9583
|
+
return;
|
9584
|
+
}
|
9539
9585
|
#endif
|
9540
9586
|
|
9541
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9587
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9542
9588
|
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
9543
9589
|
if (params->ith != 0) {
|
9544
9590
|
return;
|
@@ -9558,21 +9604,11 @@ static void ggml_compute_forward_mul_mat_f32(
|
|
9558
9604
|
const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
|
9559
9605
|
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
9560
9606
|
|
9561
|
-
#if defined(GGML_USE_CLBLAST)
|
9562
|
-
// zT = y * xT
|
9563
|
-
ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
|
9564
|
-
ne11, ne01, ne10,
|
9565
|
-
1.0f, y, ne10,
|
9566
|
-
x, ne10,
|
9567
|
-
0.0f, d, ne01,
|
9568
|
-
GGML_TYPE_F32);
|
9569
|
-
#else
|
9570
9607
|
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
9571
9608
|
ne11, ne01, ne10,
|
9572
9609
|
1.0f, y, ne10,
|
9573
9610
|
x, ne00,
|
9574
9611
|
0.0f, d, ne01);
|
9575
|
-
#endif
|
9576
9612
|
}
|
9577
9613
|
}
|
9578
9614
|
//printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
|
@@ -9711,9 +9747,16 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|
9711
9747
|
}
|
9712
9748
|
return;
|
9713
9749
|
}
|
9750
|
+
#elif defined(GGML_USE_CLBLAST)
|
9751
|
+
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
9752
|
+
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
|
9753
|
+
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
9754
|
+
}
|
9755
|
+
return;
|
9756
|
+
}
|
9714
9757
|
#endif
|
9715
9758
|
|
9716
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9759
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9717
9760
|
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
9718
9761
|
GGML_ASSERT(nb10 == sizeof(float));
|
9719
9762
|
|
@@ -9743,20 +9786,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|
9743
9786
|
assert(id*sizeof(float) <= params->wsize);
|
9744
9787
|
}
|
9745
9788
|
|
9746
|
-
#if defined(GGML_USE_CLBLAST)
|
9747
|
-
const float * x = wdata;
|
9748
|
-
const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
|
9749
|
-
|
9750
|
-
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
9751
|
-
|
9752
|
-
// zT = y * xT
|
9753
|
-
ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
|
9754
|
-
ne11, ne01, ne10,
|
9755
|
-
1.0f, y, ne10,
|
9756
|
-
x, ne10,
|
9757
|
-
0.0f, d, ne01,
|
9758
|
-
GGML_TYPE_F32);
|
9759
|
-
#else
|
9760
9789
|
const float * x = wdata;
|
9761
9790
|
const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
|
9762
9791
|
|
@@ -9768,7 +9797,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|
9768
9797
|
1.0f, y, ne10,
|
9769
9798
|
x, ne00,
|
9770
9799
|
0.0f, d, ne01);
|
9771
|
-
#endif
|
9772
9800
|
}
|
9773
9801
|
}
|
9774
9802
|
|
@@ -9931,9 +9959,16 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
|
9931
9959
|
}
|
9932
9960
|
return;
|
9933
9961
|
}
|
9962
|
+
#elif defined(GGML_USE_CLBLAST)
|
9963
|
+
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
9964
|
+
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
|
9965
|
+
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
9966
|
+
}
|
9967
|
+
return;
|
9968
|
+
}
|
9934
9969
|
#endif
|
9935
9970
|
|
9936
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9971
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9937
9972
|
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
9938
9973
|
if (params->ith != 0) {
|
9939
9974
|
return;
|
@@ -9956,9 +9991,6 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
|
9956
9991
|
|
9957
9992
|
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
9958
9993
|
|
9959
|
-
#if defined(GGML_USE_CLBLAST)
|
9960
|
-
const void* x = (char *) src0->data + i03*nb03 + i02*nb02;
|
9961
|
-
#else
|
9962
9994
|
{
|
9963
9995
|
size_t id = 0;
|
9964
9996
|
for (int64_t i01 = 0; i01 < ne01; ++i01) {
|
@@ -9970,23 +10002,12 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
|
9970
10002
|
}
|
9971
10003
|
|
9972
10004
|
const float * x = wdata;
|
9973
|
-
#endif
|
9974
10005
|
|
9975
|
-
#if defined(GGML_USE_CLBLAST)
|
9976
|
-
// zT = y * xT
|
9977
|
-
ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
|
9978
|
-
ne11, ne01, ne10,
|
9979
|
-
1.0f, y, ne10,
|
9980
|
-
x, ne10,
|
9981
|
-
0.0f, d, ne01,
|
9982
|
-
type);
|
9983
|
-
#else
|
9984
10006
|
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
9985
10007
|
ne11, ne01, ne10,
|
9986
10008
|
1.0f, y, ne10,
|
9987
10009
|
x, ne00,
|
9988
10010
|
0.0f, d, ne01);
|
9989
|
-
#endif
|
9990
10011
|
}
|
9991
10012
|
}
|
9992
10013
|
|
@@ -13810,11 +13831,19 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
13810
13831
|
// reached a leaf node, not part of the gradient graph (e.g. a constant)
|
13811
13832
|
GGML_ASSERT(cgraph->n_leafs < GGML_MAX_NODES);
|
13812
13833
|
|
13834
|
+
if (strlen(node->name) == 0) {
|
13835
|
+
snprintf(node->name, sizeof(node->name), "leaf_%d", cgraph->n_leafs);
|
13836
|
+
}
|
13837
|
+
|
13813
13838
|
cgraph->leafs[cgraph->n_leafs] = node;
|
13814
13839
|
cgraph->n_leafs++;
|
13815
13840
|
} else {
|
13816
13841
|
GGML_ASSERT(cgraph->n_nodes < GGML_MAX_NODES);
|
13817
13842
|
|
13843
|
+
if (strlen(node->name) == 0) {
|
13844
|
+
snprintf(node->name, sizeof(node->name), "node_%d", cgraph->n_nodes);
|
13845
|
+
}
|
13846
|
+
|
13818
13847
|
cgraph->nodes[cgraph->n_nodes] = node;
|
13819
13848
|
cgraph->grads[cgraph->n_nodes] = node->grad;
|
13820
13849
|
cgraph->n_nodes++;
|
@@ -14165,9 +14194,16 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|
14165
14194
|
cur = ggml_cuda_mul_mat_get_wsize(node->src0, node->src1, node);
|
14166
14195
|
}
|
14167
14196
|
else
|
14197
|
+
#elif defined(GGML_USE_CLBLAST)
|
14198
|
+
if (ggml_cl_can_mul_mat(node->src0, node->src1, node)) {
|
14199
|
+
node->n_tasks = 1; // TODO: this actually is doing nothing
|
14200
|
+
// the threads are still spinning
|
14201
|
+
cur = ggml_cl_mul_mat_get_wsize(node->src0, node->src1, node);
|
14202
|
+
}
|
14203
|
+
else
|
14168
14204
|
#endif
|
14169
14205
|
if (node->src0->type == GGML_TYPE_F16 && node->src1->type == GGML_TYPE_F32) {
|
14170
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
14206
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
14171
14207
|
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
|
14172
14208
|
node->n_tasks = 1; // TODO: this actually is doing nothing
|
14173
14209
|
// the threads are still spinning
|
@@ -14181,13 +14217,13 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|
14181
14217
|
#endif
|
14182
14218
|
} else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) {
|
14183
14219
|
cur = 0;
|
14184
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
14220
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
14185
14221
|
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
|
14186
14222
|
node->n_tasks = 1;
|
14187
14223
|
}
|
14188
14224
|
#endif
|
14189
14225
|
} else if (ggml_is_quantized(node->src0->type) && node->src1->type == GGML_TYPE_F32) {
|
14190
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
14226
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
14191
14227
|
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
|
14192
14228
|
node->n_tasks = 1;
|
14193
14229
|
cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]);
|
@@ -14521,6 +14557,481 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
|
14521
14557
|
}
|
14522
14558
|
}
|
14523
14559
|
|
14560
|
+
struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
|
14561
|
+
for (int i = 0; i < cgraph->n_leafs; i++) {
|
14562
|
+
struct ggml_tensor * leaf = cgraph->leafs[i];
|
14563
|
+
|
14564
|
+
if (strcmp(leaf->name, name) == 0) {
|
14565
|
+
return leaf;
|
14566
|
+
}
|
14567
|
+
}
|
14568
|
+
|
14569
|
+
for (int i = 0; i < cgraph->n_nodes; i++) {
|
14570
|
+
struct ggml_tensor * node = cgraph->nodes[i];
|
14571
|
+
|
14572
|
+
if (strcmp(node->name, name) == 0) {
|
14573
|
+
return node;
|
14574
|
+
}
|
14575
|
+
}
|
14576
|
+
|
14577
|
+
return NULL;
|
14578
|
+
}
|
14579
|
+
|
14580
|
+
static void ggml_graph_export_leaf(const struct ggml_tensor * tensor, FILE * fout) {
|
14581
|
+
const int64_t * ne = tensor->ne;
|
14582
|
+
const size_t * nb = tensor->nb;
|
14583
|
+
|
14584
|
+
fprintf(fout, "%-6s %-12s %8d %8lld %8lld %8lld %8lld %16zu %16zu %16zu %16zu %16p %16s\n",
|
14585
|
+
ggml_type_name(tensor->type),
|
14586
|
+
ggml_op_name (tensor->op),
|
14587
|
+
tensor->n_dims,
|
14588
|
+
ne[0], ne[1], ne[2], ne[3],
|
14589
|
+
nb[0], nb[1], nb[2], nb[3],
|
14590
|
+
tensor->data,
|
14591
|
+
tensor->name);
|
14592
|
+
}
|
14593
|
+
|
14594
|
+
static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char * arg, FILE * fout) {
|
14595
|
+
const int64_t * ne = tensor->ne;
|
14596
|
+
const size_t * nb = tensor->nb;
|
14597
|
+
|
14598
|
+
fprintf(fout, "%-6s %-6s %-12s %8d %8lld %8lld %8lld %8lld %16zu %16zu %16zu %16zu %8d %16p %16s\n",
|
14599
|
+
arg,
|
14600
|
+
ggml_type_name(tensor->type),
|
14601
|
+
ggml_op_name (tensor->op),
|
14602
|
+
tensor->n_dims,
|
14603
|
+
ne[0], ne[1], ne[2], ne[3],
|
14604
|
+
nb[0], nb[1], nb[2], nb[3],
|
14605
|
+
tensor->n_tasks,
|
14606
|
+
tensor->data,
|
14607
|
+
tensor->name);
|
14608
|
+
}
|
14609
|
+
|
14610
|
+
void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
14611
|
+
assert(cgraph->work == NULL);
|
14612
|
+
assert(cgraph->work_size == 0);
|
14613
|
+
|
14614
|
+
uint64_t size_eval = 0;
|
14615
|
+
|
14616
|
+
// compute size of intermediate results
|
14617
|
+
// TODO: does not take into account scratch buffers !!!!
|
14618
|
+
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
14619
|
+
size_eval += ggml_nbytes(cgraph->nodes[i]);
|
14620
|
+
}
|
14621
|
+
|
14622
|
+
// print
|
14623
|
+
{
|
14624
|
+
FILE * fout = stdout;
|
14625
|
+
|
14626
|
+
fprintf(fout, "\n");
|
14627
|
+
fprintf(fout, "%-16s %8x\n", "magic", GGML_FILE_MAGIC);
|
14628
|
+
fprintf(fout, "%-16s %8d\n", "version", GGML_FILE_VERSION);
|
14629
|
+
fprintf(fout, "%-16s %8d\n", "leafs", cgraph->n_leafs);
|
14630
|
+
fprintf(fout, "%-16s %8d\n", "nodes", cgraph->n_nodes);
|
14631
|
+
fprintf(fout, "%-16s %8llu\n", "eval", size_eval);
|
14632
|
+
|
14633
|
+
// header
|
14634
|
+
fprintf(fout, "\n");
|
14635
|
+
fprintf(fout, "%-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %16s %16s\n",
|
14636
|
+
"TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "DATA", "NAME");
|
14637
|
+
|
14638
|
+
for (int i = 0; i < cgraph->n_leafs; ++i) {
|
14639
|
+
ggml_graph_export_leaf(cgraph->leafs[i], fout);
|
14640
|
+
|
14641
|
+
GGML_ASSERT(cgraph->leafs[i]->op == GGML_OP_NONE);
|
14642
|
+
GGML_ASSERT(cgraph->leafs[i]->src0 == NULL);
|
14643
|
+
GGML_ASSERT(cgraph->leafs[i]->src1 == NULL);
|
14644
|
+
}
|
14645
|
+
|
14646
|
+
// header
|
14647
|
+
fprintf(fout, "\n");
|
14648
|
+
fprintf(fout, "%-6s %-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %8s %16s %16s\n",
|
14649
|
+
"ARG", "TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "NTASKS", "DATA", "NAME");
|
14650
|
+
|
14651
|
+
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
14652
|
+
ggml_graph_export_node(cgraph->nodes[i], "DST", fout);
|
14653
|
+
|
14654
|
+
if (cgraph->nodes[i]->src0) {
|
14655
|
+
ggml_graph_export_node(cgraph->nodes[i]->src0, "SRC0", fout);
|
14656
|
+
}
|
14657
|
+
|
14658
|
+
if (cgraph->nodes[i]->src1) {
|
14659
|
+
ggml_graph_export_node(cgraph->nodes[i]->src1, "SRC1", fout);
|
14660
|
+
}
|
14661
|
+
|
14662
|
+
for (int j = 0; j < GGML_MAX_OPT; ++j) {
|
14663
|
+
if (cgraph->nodes[i]->opt[j]) {
|
14664
|
+
ggml_graph_export_node(cgraph->nodes[i]->opt[j], "OPT", fout);
|
14665
|
+
}
|
14666
|
+
}
|
14667
|
+
|
14668
|
+
fprintf(fout, "\n");
|
14669
|
+
}
|
14670
|
+
|
14671
|
+
fprintf(fout, "\n");
|
14672
|
+
}
|
14673
|
+
|
14674
|
+
// write binary data
|
14675
|
+
{
|
14676
|
+
FILE * fout = fopen(fname, "wb");
|
14677
|
+
|
14678
|
+
if (!fout) {
|
14679
|
+
fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
|
14680
|
+
return;
|
14681
|
+
}
|
14682
|
+
|
14683
|
+
// header
|
14684
|
+
{
|
14685
|
+
const uint32_t magic = GGML_FILE_MAGIC;
|
14686
|
+
const uint32_t version = GGML_FILE_VERSION;
|
14687
|
+
const uint32_t n_leafs = cgraph->n_leafs;
|
14688
|
+
const uint32_t nodes = cgraph->n_nodes;
|
14689
|
+
|
14690
|
+
fwrite(&magic, sizeof(uint32_t), 1, fout);
|
14691
|
+
fwrite(&version, sizeof(uint32_t), 1, fout);
|
14692
|
+
fwrite(&n_leafs, sizeof(uint32_t), 1, fout);
|
14693
|
+
fwrite(&nodes, sizeof(uint32_t), 1, fout);
|
14694
|
+
fwrite(&size_eval, sizeof(uint64_t), 1, fout);
|
14695
|
+
}
|
14696
|
+
|
14697
|
+
// leafs
|
14698
|
+
{
|
14699
|
+
for (int i = 0; i < cgraph->n_leafs; ++i) {
|
14700
|
+
const struct ggml_tensor * tensor = cgraph->leafs[i];
|
14701
|
+
|
14702
|
+
const uint32_t type = tensor->type;
|
14703
|
+
const uint32_t op = tensor->op;
|
14704
|
+
const uint32_t n_dims = tensor->n_dims;
|
14705
|
+
|
14706
|
+
fwrite(&type, sizeof(uint32_t), 1, fout);
|
14707
|
+
fwrite(&op, sizeof(uint32_t), 1, fout);
|
14708
|
+
fwrite(&n_dims, sizeof(uint32_t), 1, fout);
|
14709
|
+
|
14710
|
+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
14711
|
+
const uint64_t ne = tensor->ne[j];
|
14712
|
+
const uint64_t nb = tensor->nb[j];
|
14713
|
+
|
14714
|
+
fwrite(&ne, sizeof(uint64_t), 1, fout);
|
14715
|
+
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
14716
|
+
}
|
14717
|
+
|
14718
|
+
// store the pointer address
|
14719
|
+
{
|
14720
|
+
const uint64_t ptr = (uint64_t) tensor->data;
|
14721
|
+
|
14722
|
+
fwrite(&ptr, sizeof(uint64_t), 1, fout);
|
14723
|
+
}
|
14724
|
+
|
14725
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
14726
|
+
|
14727
|
+
// dump the data
|
14728
|
+
// TODO: pad this to 32 byte boundary
|
14729
|
+
{
|
14730
|
+
const size_t size = ggml_nbytes(tensor);
|
14731
|
+
|
14732
|
+
fwrite(tensor->data, sizeof(char), size, fout);
|
14733
|
+
}
|
14734
|
+
}
|
14735
|
+
}
|
14736
|
+
|
14737
|
+
// nodes
|
14738
|
+
{
|
14739
|
+
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
14740
|
+
const struct ggml_tensor * tensor = cgraph->nodes[i];
|
14741
|
+
|
14742
|
+
const uint32_t type = tensor->type;
|
14743
|
+
const uint32_t op = tensor->op;
|
14744
|
+
const uint32_t n_dims = tensor->n_dims;
|
14745
|
+
|
14746
|
+
fwrite(&type, sizeof(uint32_t), 1, fout);
|
14747
|
+
fwrite(&op, sizeof(uint32_t), 1, fout);
|
14748
|
+
fwrite(&n_dims, sizeof(uint32_t), 1, fout);
|
14749
|
+
|
14750
|
+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
14751
|
+
const uint64_t ne = tensor->ne[j];
|
14752
|
+
const uint64_t nb = tensor->nb[j];
|
14753
|
+
|
14754
|
+
fwrite(&ne, sizeof(uint64_t), 1, fout);
|
14755
|
+
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
14756
|
+
}
|
14757
|
+
|
14758
|
+
// store the pointer address
|
14759
|
+
{
|
14760
|
+
const uint64_t ptr = (uint64_t) tensor->data;
|
14761
|
+
|
14762
|
+
fwrite(&ptr, sizeof(uint64_t), 1, fout);
|
14763
|
+
}
|
14764
|
+
|
14765
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
14766
|
+
|
14767
|
+
// output the op arguments
|
14768
|
+
{
|
14769
|
+
struct ggml_tensor * args[2 + GGML_MAX_OPT] = { NULL };
|
14770
|
+
|
14771
|
+
args[0] = tensor->src0;
|
14772
|
+
args[1] = tensor->src1;
|
14773
|
+
|
14774
|
+
for (int j = 0; j < GGML_MAX_OPT; ++j) {
|
14775
|
+
args[2 + j] = tensor->opt[j];
|
14776
|
+
}
|
14777
|
+
|
14778
|
+
for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) {
|
14779
|
+
if (args[j]) {
|
14780
|
+
int32_t idx = -1;
|
14781
|
+
|
14782
|
+
// check if leaf
|
14783
|
+
{
|
14784
|
+
for (int k = 0; k < cgraph->n_leafs; ++k) {
|
14785
|
+
if (args[j] == cgraph->leafs[k]) {
|
14786
|
+
idx = k;
|
14787
|
+
break;
|
14788
|
+
}
|
14789
|
+
}
|
14790
|
+
}
|
14791
|
+
|
14792
|
+
// check if node
|
14793
|
+
if (idx == -1) {
|
14794
|
+
for (int k = 0; k < cgraph->n_nodes; ++k) {
|
14795
|
+
if (args[j] == cgraph->nodes[k]) {
|
14796
|
+
idx = GGML_MAX_NODES + k;
|
14797
|
+
break;
|
14798
|
+
}
|
14799
|
+
}
|
14800
|
+
}
|
14801
|
+
|
14802
|
+
if (idx == -1) {
|
14803
|
+
fprintf(stderr, "%s: failed to find tensor, arg = %d, node = %d\n", __func__, j, i);
|
14804
|
+
return;
|
14805
|
+
}
|
14806
|
+
|
14807
|
+
fwrite(&idx, sizeof(int32_t), 1, fout);
|
14808
|
+
} else {
|
14809
|
+
const int32_t nul = -1;
|
14810
|
+
|
14811
|
+
fwrite(&nul, sizeof(int32_t), 1, fout);
|
14812
|
+
}
|
14813
|
+
}
|
14814
|
+
}
|
14815
|
+
}
|
14816
|
+
}
|
14817
|
+
|
14818
|
+
fclose(fout);
|
14819
|
+
}
|
14820
|
+
}
|
14821
|
+
|
14822
|
+
struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) {
|
14823
|
+
assert(*ctx_data == NULL);
|
14824
|
+
assert(*ctx_eval == NULL);
|
14825
|
+
|
14826
|
+
struct ggml_cgraph result = { 0 };
|
14827
|
+
|
14828
|
+
struct ggml_tensor * data = NULL;
|
14829
|
+
|
14830
|
+
// read file into data
|
14831
|
+
{
|
14832
|
+
FILE * fin = fopen(fname, "rb");
|
14833
|
+
|
14834
|
+
if (!fin) {
|
14835
|
+
fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
|
14836
|
+
return result;
|
14837
|
+
}
|
14838
|
+
|
14839
|
+
size_t fsize = 0;
|
14840
|
+
|
14841
|
+
fseek(fin, 0, SEEK_END);
|
14842
|
+
fsize = ftell(fin);
|
14843
|
+
fseek(fin, 0, SEEK_SET);
|
14844
|
+
|
14845
|
+
// create the data context
|
14846
|
+
{
|
14847
|
+
const size_t overhead = 1*ggml_tensor_overhead();
|
14848
|
+
|
14849
|
+
struct ggml_init_params params = {
|
14850
|
+
.mem_size = fsize + overhead,
|
14851
|
+
.mem_buffer = NULL,
|
14852
|
+
.no_alloc = false,
|
14853
|
+
};
|
14854
|
+
|
14855
|
+
*ctx_data = ggml_init(params);
|
14856
|
+
|
14857
|
+
if (!*ctx_data) {
|
14858
|
+
fprintf(stderr, "%s: failed to create ggml context\n", __func__);
|
14859
|
+
return result;
|
14860
|
+
}
|
14861
|
+
}
|
14862
|
+
|
14863
|
+
data = ggml_new_tensor_1d(*ctx_data, GGML_TYPE_I8, fsize);
|
14864
|
+
|
14865
|
+
fread(data->data, sizeof(char), fsize, fin);
|
14866
|
+
|
14867
|
+
fclose(fin);
|
14868
|
+
}
|
14869
|
+
|
14870
|
+
// populate result
|
14871
|
+
{
|
14872
|
+
char * ptr = (char *) data->data;
|
14873
|
+
|
14874
|
+
const uint32_t magic = *(const uint32_t *) ptr; ptr += sizeof(magic);
|
14875
|
+
|
14876
|
+
if (magic != GGML_FILE_MAGIC) {
|
14877
|
+
fprintf(stderr, "%s: invalid magic number, got %08x\n", __func__, magic);
|
14878
|
+
return result;
|
14879
|
+
}
|
14880
|
+
|
14881
|
+
const uint32_t version = *(const uint32_t *) ptr; ptr += sizeof(version);
|
14882
|
+
|
14883
|
+
if (version != GGML_FILE_VERSION) {
|
14884
|
+
fprintf(stderr, "%s: invalid version number\n", __func__);
|
14885
|
+
return result;
|
14886
|
+
}
|
14887
|
+
|
14888
|
+
const uint32_t n_leafs = *(const uint32_t *) ptr; ptr += sizeof(n_leafs);
|
14889
|
+
const uint32_t n_nodes = *(const uint32_t *) ptr; ptr += sizeof(n_nodes);
|
14890
|
+
const uint64_t size_eval = *(const uint64_t *) ptr; ptr += sizeof(size_eval);
|
14891
|
+
|
14892
|
+
result.n_leafs = n_leafs;
|
14893
|
+
result.n_nodes = n_nodes;
|
14894
|
+
|
14895
|
+
// create the data context
|
14896
|
+
{
|
14897
|
+
const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead();
|
14898
|
+
|
14899
|
+
struct ggml_init_params params = {
|
14900
|
+
.mem_size = size_eval + overhead,
|
14901
|
+
.mem_buffer = NULL,
|
14902
|
+
.no_alloc = true,
|
14903
|
+
};
|
14904
|
+
|
14905
|
+
*ctx_eval = ggml_init(params);
|
14906
|
+
|
14907
|
+
if (!*ctx_eval) {
|
14908
|
+
fprintf(stderr, "%s: failed to create ggml context\n", __func__);
|
14909
|
+
return result;
|
14910
|
+
}
|
14911
|
+
}
|
14912
|
+
|
14913
|
+
// leafs
|
14914
|
+
{
|
14915
|
+
uint32_t type;
|
14916
|
+
uint32_t op;
|
14917
|
+
uint32_t n_dims;
|
14918
|
+
|
14919
|
+
for (uint32_t i = 0; i < n_leafs; ++i) {
|
14920
|
+
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
14921
|
+
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
14922
|
+
n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
|
14923
|
+
|
14924
|
+
int64_t ne[GGML_MAX_DIMS];
|
14925
|
+
size_t nb[GGML_MAX_DIMS];
|
14926
|
+
|
14927
|
+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
14928
|
+
uint64_t ne_cur;
|
14929
|
+
uint64_t nb_cur;
|
14930
|
+
|
14931
|
+
ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur);
|
14932
|
+
nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur);
|
14933
|
+
|
14934
|
+
ne[j] = ne_cur;
|
14935
|
+
nb[j] = nb_cur;
|
14936
|
+
}
|
14937
|
+
|
14938
|
+
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
|
14939
|
+
|
14940
|
+
tensor->op = (enum ggml_op) op;
|
14941
|
+
|
14942
|
+
uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur);
|
14943
|
+
|
14944
|
+
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
14945
|
+
|
14946
|
+
tensor->data = (void *) ptr;
|
14947
|
+
|
14948
|
+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
14949
|
+
tensor->nb[j] = nb[j];
|
14950
|
+
}
|
14951
|
+
|
14952
|
+
result.leafs[i] = tensor;
|
14953
|
+
|
14954
|
+
ptr += ggml_nbytes(tensor);
|
14955
|
+
|
14956
|
+
fprintf(stderr, "%s: loaded leaf %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
|
14957
|
+
}
|
14958
|
+
}
|
14959
|
+
|
14960
|
+
ggml_set_no_alloc(*ctx_eval, false);
|
14961
|
+
|
14962
|
+
// nodes
|
14963
|
+
{
|
14964
|
+
uint32_t type;
|
14965
|
+
uint32_t op;
|
14966
|
+
uint32_t n_dims;
|
14967
|
+
|
14968
|
+
for (uint32_t i = 0; i < n_nodes; ++i) {
|
14969
|
+
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
14970
|
+
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
14971
|
+
n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
|
14972
|
+
|
14973
|
+
int64_t ne[GGML_MAX_DIMS];
|
14974
|
+
size_t nb[GGML_MAX_DIMS];
|
14975
|
+
|
14976
|
+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
14977
|
+
uint64_t ne_cur;
|
14978
|
+
uint64_t nb_cur;
|
14979
|
+
|
14980
|
+
ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur);
|
14981
|
+
nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur);
|
14982
|
+
|
14983
|
+
ne[j] = ne_cur;
|
14984
|
+
nb[j] = nb_cur;
|
14985
|
+
}
|
14986
|
+
|
14987
|
+
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
|
14988
|
+
|
14989
|
+
tensor->op = (enum ggml_op) op;
|
14990
|
+
|
14991
|
+
uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur);
|
14992
|
+
|
14993
|
+
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
14994
|
+
|
14995
|
+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
14996
|
+
tensor->nb[j] = nb[j];
|
14997
|
+
}
|
14998
|
+
|
14999
|
+
// parse args
|
15000
|
+
{
|
15001
|
+
struct ggml_tensor ** args[2 + GGML_MAX_OPT] = {
|
15002
|
+
&tensor->src0,
|
15003
|
+
&tensor->src1,
|
15004
|
+
};
|
15005
|
+
|
15006
|
+
for (int j = 0; j < GGML_MAX_OPT; ++j) {
|
15007
|
+
args[2 + j] = &tensor->opt[j];
|
15008
|
+
}
|
15009
|
+
|
15010
|
+
for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) {
|
15011
|
+
const int32_t arg_idx = *(const int32_t *) ptr; ptr += sizeof(arg_idx);
|
15012
|
+
|
15013
|
+
if (arg_idx == -1) {
|
15014
|
+
continue;
|
15015
|
+
}
|
15016
|
+
|
15017
|
+
if (arg_idx < GGML_MAX_NODES) {
|
15018
|
+
*args[j] = result.leafs[arg_idx];
|
15019
|
+
} else {
|
15020
|
+
*args[j] = result.nodes[arg_idx - GGML_MAX_NODES];
|
15021
|
+
}
|
15022
|
+
}
|
15023
|
+
}
|
15024
|
+
|
15025
|
+
result.nodes[i] = tensor;
|
15026
|
+
|
15027
|
+
fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
|
15028
|
+
}
|
15029
|
+
}
|
15030
|
+
}
|
15031
|
+
|
15032
|
+
return result;
|
15033
|
+
}
|
15034
|
+
|
14524
15035
|
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
14525
15036
|
int64_t perf_total_per_op_us[GGML_OP_COUNT] = {0};
|
14526
15037
|
|
@@ -14538,7 +15049,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
14538
15049
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
14539
15050
|
i,
|
14540
15051
|
node->ne[0], node->ne[1], node->ne[2],
|
14541
|
-
|
15052
|
+
GGML_OP_NAME[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
14542
15053
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
14543
15054
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
14544
15055
|
(double) node->perf_time_us / 1000.0,
|
@@ -14552,7 +15063,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
14552
15063
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
14553
15064
|
i,
|
14554
15065
|
node->ne[0], node->ne[1],
|
14555
|
-
|
15066
|
+
GGML_OP_NAME[node->op]);
|
14556
15067
|
}
|
14557
15068
|
|
14558
15069
|
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
@@ -14560,7 +15071,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
14560
15071
|
continue;
|
14561
15072
|
}
|
14562
15073
|
|
14563
|
-
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n",
|
15074
|
+
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", GGML_OP_NAME[i], (double) perf_total_per_op_us[i] / 1000.0);
|
14564
15075
|
}
|
14565
15076
|
|
14566
15077
|
GGML_PRINT("========================================\n");
|