llama_cpp 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -7
- data/ext/llama_cpp/extconf.rb +1 -2
- data/ext/llama_cpp/src/ggml-opencl.cpp +1028 -0
- data/ext/llama_cpp/src/ggml-opencl.h +8 -10
- data/ext/llama_cpp/src/ggml.c +568 -57
- data/ext/llama_cpp/src/ggml.h +21 -2
- data/ext/llama_cpp/src/llama.cpp +37 -2
- data/ext/llama_cpp/src/llama.h +5 -0
- data/lib/llama_cpp/version.rb +2 -2
- metadata +3 -3
- data/ext/llama_cpp/src/ggml-opencl.c +0 -474
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -186,10 +186,12 @@ typedef double ggml_float;
|
|
186
186
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
187
187
|
#include <intrin.h>
|
188
188
|
#else
|
189
|
+
#if !defined(__riscv)
|
189
190
|
#include <immintrin.h>
|
190
191
|
#endif
|
191
192
|
#endif
|
192
193
|
#endif
|
194
|
+
#endif
|
193
195
|
|
194
196
|
#ifdef __F16C__
|
195
197
|
|
@@ -3494,7 +3496,7 @@ static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = {
|
|
3494
3496
|
};
|
3495
3497
|
static_assert(GGML_TYPE_COUNT == 13, "GGML_IS_QUANTIZED is outdated");
|
3496
3498
|
|
3497
|
-
static const char *
|
3499
|
+
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
3498
3500
|
"NONE",
|
3499
3501
|
|
3500
3502
|
"DUP",
|
@@ -3749,6 +3751,9 @@ const char * ggml_type_name(enum ggml_type type) {
|
|
3749
3751
|
return GGML_TYPE_NAME[type];
|
3750
3752
|
}
|
3751
3753
|
|
3754
|
+
const char * ggml_op_name(enum ggml_op op) {
|
3755
|
+
return GGML_OP_NAME[op];
|
3756
|
+
}
|
3752
3757
|
|
3753
3758
|
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
3754
3759
|
return GGML_TYPE_SIZE[tensor->type];
|
@@ -3805,6 +3810,10 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
3805
3810
|
return wtype;
|
3806
3811
|
}
|
3807
3812
|
|
3813
|
+
size_t ggml_tensor_overhead(void) {
|
3814
|
+
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE + 16;
|
3815
|
+
}
|
3816
|
+
|
3808
3817
|
static inline bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
3809
3818
|
return tensor->nb[0] > tensor->nb[1];
|
3810
3819
|
}
|
@@ -4017,6 +4026,18 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
|
|
4017
4026
|
return result;
|
4018
4027
|
}
|
4019
4028
|
|
4029
|
+
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
|
4030
|
+
ctx->no_alloc = no_alloc;
|
4031
|
+
}
|
4032
|
+
|
4033
|
+
void * ggml_get_mem_buffer(struct ggml_context * ctx) {
|
4034
|
+
return ctx->mem_buffer;
|
4035
|
+
}
|
4036
|
+
|
4037
|
+
size_t ggml_get_mem_size(struct ggml_context * ctx) {
|
4038
|
+
return ctx->mem_size;
|
4039
|
+
}
|
4040
|
+
|
4020
4041
|
// IMPORTANT:
|
4021
4042
|
// when creating "opt" tensors, always save and load the scratch buffer
|
4022
4043
|
// this is an error prone process, but it is necessary to support inplace
|
@@ -4061,7 +4082,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4061
4082
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
4062
4083
|
|
4063
4084
|
if (ctx->scratch.data == NULL || data != NULL) {
|
4064
|
-
size_needed +=
|
4085
|
+
size_needed += GGML_TENSOR_SIZE;
|
4065
4086
|
|
4066
4087
|
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
4067
4088
|
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
@@ -4077,14 +4098,15 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4077
4098
|
};
|
4078
4099
|
} else {
|
4079
4100
|
if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
|
4080
|
-
GGML_PRINT("%s: not enough space in the scratch memory\n",
|
4101
|
+
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
4102
|
+
__func__, ctx->scratch.offs + size_needed, ctx->scratch.size);
|
4081
4103
|
assert(false);
|
4082
4104
|
return NULL;
|
4083
4105
|
}
|
4084
4106
|
|
4085
|
-
if (cur_end +
|
4107
|
+
if (cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE > ctx->mem_size) {
|
4086
4108
|
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
4087
|
-
__func__, cur_end +
|
4109
|
+
__func__, cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE, ctx->mem_size);
|
4088
4110
|
assert(false);
|
4089
4111
|
return NULL;
|
4090
4112
|
}
|
@@ -4093,7 +4115,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4093
4115
|
|
4094
4116
|
*obj_new = (struct ggml_object) {
|
4095
4117
|
.offs = cur_end + GGML_OBJECT_SIZE,
|
4096
|
-
.size =
|
4118
|
+
.size = GGML_TENSOR_SIZE,
|
4097
4119
|
.next = NULL,
|
4098
4120
|
};
|
4099
4121
|
|
@@ -4509,6 +4531,23 @@ struct ggml_tensor * ggml_view_tensor(
|
|
4509
4531
|
return result;
|
4510
4532
|
}
|
4511
4533
|
|
4534
|
+
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
|
4535
|
+
struct ggml_object * obj = ctx->objects_begin;
|
4536
|
+
|
4537
|
+
char * const mem_buffer = ctx->mem_buffer;
|
4538
|
+
|
4539
|
+
while (obj != NULL) {
|
4540
|
+
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
4541
|
+
if (strcmp(cur->name, name) == 0) {
|
4542
|
+
return cur;
|
4543
|
+
}
|
4544
|
+
|
4545
|
+
obj = obj->next;
|
4546
|
+
}
|
4547
|
+
|
4548
|
+
return NULL;
|
4549
|
+
}
|
4550
|
+
|
4512
4551
|
////////////////////////////////////////////////////////////////////////////////
|
4513
4552
|
|
4514
4553
|
// ggml_dup
|
@@ -6303,7 +6342,7 @@ struct ggml_tensor * ggml_alibi(
|
|
6303
6342
|
|
6304
6343
|
ggml_scratch_save(ctx);
|
6305
6344
|
|
6306
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32,
|
6345
|
+
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
|
6307
6346
|
|
6308
6347
|
((int32_t *) b->data)[0] = n_past;
|
6309
6348
|
((int32_t *) b->data)[1] = n_head;
|
@@ -9431,7 +9470,7 @@ static void ggml_compute_forward_rms_norm_back(
|
|
9431
9470
|
|
9432
9471
|
// ggml_compute_forward_mul_mat
|
9433
9472
|
|
9434
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9473
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9435
9474
|
// helper function to determine if it is better to use BLAS or not
|
9436
9475
|
// for large matrices, BLAS is faster
|
9437
9476
|
static bool ggml_compute_forward_mul_mat_use_blas(
|
@@ -9472,7 +9511,7 @@ static void ggml_compute_forward_mul_mat_f32(
|
|
9472
9511
|
const int64_t ne02 = src0->ne[2];
|
9473
9512
|
const int64_t ne03 = src0->ne[3];
|
9474
9513
|
|
9475
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9514
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9476
9515
|
const int64_t ne10 = src1->ne[0];
|
9477
9516
|
#endif
|
9478
9517
|
const int64_t ne11 = src1->ne[1];
|
@@ -9536,9 +9575,16 @@ static void ggml_compute_forward_mul_mat_f32(
|
|
9536
9575
|
}
|
9537
9576
|
return;
|
9538
9577
|
}
|
9578
|
+
#elif defined(GGML_USE_CLBLAST)
|
9579
|
+
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
9580
|
+
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
|
9581
|
+
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
9582
|
+
}
|
9583
|
+
return;
|
9584
|
+
}
|
9539
9585
|
#endif
|
9540
9586
|
|
9541
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9587
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9542
9588
|
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
9543
9589
|
if (params->ith != 0) {
|
9544
9590
|
return;
|
@@ -9558,21 +9604,11 @@ static void ggml_compute_forward_mul_mat_f32(
|
|
9558
9604
|
const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
|
9559
9605
|
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
9560
9606
|
|
9561
|
-
#if defined(GGML_USE_CLBLAST)
|
9562
|
-
// zT = y * xT
|
9563
|
-
ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
|
9564
|
-
ne11, ne01, ne10,
|
9565
|
-
1.0f, y, ne10,
|
9566
|
-
x, ne10,
|
9567
|
-
0.0f, d, ne01,
|
9568
|
-
GGML_TYPE_F32);
|
9569
|
-
#else
|
9570
9607
|
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
9571
9608
|
ne11, ne01, ne10,
|
9572
9609
|
1.0f, y, ne10,
|
9573
9610
|
x, ne00,
|
9574
9611
|
0.0f, d, ne01);
|
9575
|
-
#endif
|
9576
9612
|
}
|
9577
9613
|
}
|
9578
9614
|
//printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
|
@@ -9711,9 +9747,16 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|
9711
9747
|
}
|
9712
9748
|
return;
|
9713
9749
|
}
|
9750
|
+
#elif defined(GGML_USE_CLBLAST)
|
9751
|
+
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
9752
|
+
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
|
9753
|
+
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
9754
|
+
}
|
9755
|
+
return;
|
9756
|
+
}
|
9714
9757
|
#endif
|
9715
9758
|
|
9716
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9759
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9717
9760
|
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
9718
9761
|
GGML_ASSERT(nb10 == sizeof(float));
|
9719
9762
|
|
@@ -9743,20 +9786,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|
9743
9786
|
assert(id*sizeof(float) <= params->wsize);
|
9744
9787
|
}
|
9745
9788
|
|
9746
|
-
#if defined(GGML_USE_CLBLAST)
|
9747
|
-
const float * x = wdata;
|
9748
|
-
const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
|
9749
|
-
|
9750
|
-
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
9751
|
-
|
9752
|
-
// zT = y * xT
|
9753
|
-
ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
|
9754
|
-
ne11, ne01, ne10,
|
9755
|
-
1.0f, y, ne10,
|
9756
|
-
x, ne10,
|
9757
|
-
0.0f, d, ne01,
|
9758
|
-
GGML_TYPE_F32);
|
9759
|
-
#else
|
9760
9789
|
const float * x = wdata;
|
9761
9790
|
const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
|
9762
9791
|
|
@@ -9768,7 +9797,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|
9768
9797
|
1.0f, y, ne10,
|
9769
9798
|
x, ne00,
|
9770
9799
|
0.0f, d, ne01);
|
9771
|
-
#endif
|
9772
9800
|
}
|
9773
9801
|
}
|
9774
9802
|
|
@@ -9931,9 +9959,16 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
|
9931
9959
|
}
|
9932
9960
|
return;
|
9933
9961
|
}
|
9962
|
+
#elif defined(GGML_USE_CLBLAST)
|
9963
|
+
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
9964
|
+
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
|
9965
|
+
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
9966
|
+
}
|
9967
|
+
return;
|
9968
|
+
}
|
9934
9969
|
#endif
|
9935
9970
|
|
9936
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9971
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9937
9972
|
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
9938
9973
|
if (params->ith != 0) {
|
9939
9974
|
return;
|
@@ -9956,9 +9991,6 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
|
9956
9991
|
|
9957
9992
|
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
9958
9993
|
|
9959
|
-
#if defined(GGML_USE_CLBLAST)
|
9960
|
-
const void* x = (char *) src0->data + i03*nb03 + i02*nb02;
|
9961
|
-
#else
|
9962
9994
|
{
|
9963
9995
|
size_t id = 0;
|
9964
9996
|
for (int64_t i01 = 0; i01 < ne01; ++i01) {
|
@@ -9970,23 +10002,12 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
|
9970
10002
|
}
|
9971
10003
|
|
9972
10004
|
const float * x = wdata;
|
9973
|
-
#endif
|
9974
10005
|
|
9975
|
-
#if defined(GGML_USE_CLBLAST)
|
9976
|
-
// zT = y * xT
|
9977
|
-
ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
|
9978
|
-
ne11, ne01, ne10,
|
9979
|
-
1.0f, y, ne10,
|
9980
|
-
x, ne10,
|
9981
|
-
0.0f, d, ne01,
|
9982
|
-
type);
|
9983
|
-
#else
|
9984
10006
|
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
9985
10007
|
ne11, ne01, ne10,
|
9986
10008
|
1.0f, y, ne10,
|
9987
10009
|
x, ne00,
|
9988
10010
|
0.0f, d, ne01);
|
9989
|
-
#endif
|
9990
10011
|
}
|
9991
10012
|
}
|
9992
10013
|
|
@@ -13810,11 +13831,19 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
13810
13831
|
// reached a leaf node, not part of the gradient graph (e.g. a constant)
|
13811
13832
|
GGML_ASSERT(cgraph->n_leafs < GGML_MAX_NODES);
|
13812
13833
|
|
13834
|
+
if (strlen(node->name) == 0) {
|
13835
|
+
snprintf(node->name, sizeof(node->name), "leaf_%d", cgraph->n_leafs);
|
13836
|
+
}
|
13837
|
+
|
13813
13838
|
cgraph->leafs[cgraph->n_leafs] = node;
|
13814
13839
|
cgraph->n_leafs++;
|
13815
13840
|
} else {
|
13816
13841
|
GGML_ASSERT(cgraph->n_nodes < GGML_MAX_NODES);
|
13817
13842
|
|
13843
|
+
if (strlen(node->name) == 0) {
|
13844
|
+
snprintf(node->name, sizeof(node->name), "node_%d", cgraph->n_nodes);
|
13845
|
+
}
|
13846
|
+
|
13818
13847
|
cgraph->nodes[cgraph->n_nodes] = node;
|
13819
13848
|
cgraph->grads[cgraph->n_nodes] = node->grad;
|
13820
13849
|
cgraph->n_nodes++;
|
@@ -14165,9 +14194,16 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|
14165
14194
|
cur = ggml_cuda_mul_mat_get_wsize(node->src0, node->src1, node);
|
14166
14195
|
}
|
14167
14196
|
else
|
14197
|
+
#elif defined(GGML_USE_CLBLAST)
|
14198
|
+
if (ggml_cl_can_mul_mat(node->src0, node->src1, node)) {
|
14199
|
+
node->n_tasks = 1; // TODO: this actually is doing nothing
|
14200
|
+
// the threads are still spinning
|
14201
|
+
cur = ggml_cl_mul_mat_get_wsize(node->src0, node->src1, node);
|
14202
|
+
}
|
14203
|
+
else
|
14168
14204
|
#endif
|
14169
14205
|
if (node->src0->type == GGML_TYPE_F16 && node->src1->type == GGML_TYPE_F32) {
|
14170
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
14206
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
14171
14207
|
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
|
14172
14208
|
node->n_tasks = 1; // TODO: this actually is doing nothing
|
14173
14209
|
// the threads are still spinning
|
@@ -14181,13 +14217,13 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|
14181
14217
|
#endif
|
14182
14218
|
} else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) {
|
14183
14219
|
cur = 0;
|
14184
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
14220
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
14185
14221
|
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
|
14186
14222
|
node->n_tasks = 1;
|
14187
14223
|
}
|
14188
14224
|
#endif
|
14189
14225
|
} else if (ggml_is_quantized(node->src0->type) && node->src1->type == GGML_TYPE_F32) {
|
14190
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
14226
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
14191
14227
|
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
|
14192
14228
|
node->n_tasks = 1;
|
14193
14229
|
cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]);
|
@@ -14521,6 +14557,481 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
|
14521
14557
|
}
|
14522
14558
|
}
|
14523
14559
|
|
14560
|
+
struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
|
14561
|
+
for (int i = 0; i < cgraph->n_leafs; i++) {
|
14562
|
+
struct ggml_tensor * leaf = cgraph->leafs[i];
|
14563
|
+
|
14564
|
+
if (strcmp(leaf->name, name) == 0) {
|
14565
|
+
return leaf;
|
14566
|
+
}
|
14567
|
+
}
|
14568
|
+
|
14569
|
+
for (int i = 0; i < cgraph->n_nodes; i++) {
|
14570
|
+
struct ggml_tensor * node = cgraph->nodes[i];
|
14571
|
+
|
14572
|
+
if (strcmp(node->name, name) == 0) {
|
14573
|
+
return node;
|
14574
|
+
}
|
14575
|
+
}
|
14576
|
+
|
14577
|
+
return NULL;
|
14578
|
+
}
|
14579
|
+
|
14580
|
+
static void ggml_graph_export_leaf(const struct ggml_tensor * tensor, FILE * fout) {
|
14581
|
+
const int64_t * ne = tensor->ne;
|
14582
|
+
const size_t * nb = tensor->nb;
|
14583
|
+
|
14584
|
+
fprintf(fout, "%-6s %-12s %8d %8lld %8lld %8lld %8lld %16zu %16zu %16zu %16zu %16p %16s\n",
|
14585
|
+
ggml_type_name(tensor->type),
|
14586
|
+
ggml_op_name (tensor->op),
|
14587
|
+
tensor->n_dims,
|
14588
|
+
ne[0], ne[1], ne[2], ne[3],
|
14589
|
+
nb[0], nb[1], nb[2], nb[3],
|
14590
|
+
tensor->data,
|
14591
|
+
tensor->name);
|
14592
|
+
}
|
14593
|
+
|
14594
|
+
static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char * arg, FILE * fout) {
|
14595
|
+
const int64_t * ne = tensor->ne;
|
14596
|
+
const size_t * nb = tensor->nb;
|
14597
|
+
|
14598
|
+
fprintf(fout, "%-6s %-6s %-12s %8d %8lld %8lld %8lld %8lld %16zu %16zu %16zu %16zu %8d %16p %16s\n",
|
14599
|
+
arg,
|
14600
|
+
ggml_type_name(tensor->type),
|
14601
|
+
ggml_op_name (tensor->op),
|
14602
|
+
tensor->n_dims,
|
14603
|
+
ne[0], ne[1], ne[2], ne[3],
|
14604
|
+
nb[0], nb[1], nb[2], nb[3],
|
14605
|
+
tensor->n_tasks,
|
14606
|
+
tensor->data,
|
14607
|
+
tensor->name);
|
14608
|
+
}
|
14609
|
+
|
14610
|
+
void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
14611
|
+
assert(cgraph->work == NULL);
|
14612
|
+
assert(cgraph->work_size == 0);
|
14613
|
+
|
14614
|
+
uint64_t size_eval = 0;
|
14615
|
+
|
14616
|
+
// compute size of intermediate results
|
14617
|
+
// TODO: does not take into account scratch buffers !!!!
|
14618
|
+
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
14619
|
+
size_eval += ggml_nbytes(cgraph->nodes[i]);
|
14620
|
+
}
|
14621
|
+
|
14622
|
+
// print
|
14623
|
+
{
|
14624
|
+
FILE * fout = stdout;
|
14625
|
+
|
14626
|
+
fprintf(fout, "\n");
|
14627
|
+
fprintf(fout, "%-16s %8x\n", "magic", GGML_FILE_MAGIC);
|
14628
|
+
fprintf(fout, "%-16s %8d\n", "version", GGML_FILE_VERSION);
|
14629
|
+
fprintf(fout, "%-16s %8d\n", "leafs", cgraph->n_leafs);
|
14630
|
+
fprintf(fout, "%-16s %8d\n", "nodes", cgraph->n_nodes);
|
14631
|
+
fprintf(fout, "%-16s %8llu\n", "eval", size_eval);
|
14632
|
+
|
14633
|
+
// header
|
14634
|
+
fprintf(fout, "\n");
|
14635
|
+
fprintf(fout, "%-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %16s %16s\n",
|
14636
|
+
"TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "DATA", "NAME");
|
14637
|
+
|
14638
|
+
for (int i = 0; i < cgraph->n_leafs; ++i) {
|
14639
|
+
ggml_graph_export_leaf(cgraph->leafs[i], fout);
|
14640
|
+
|
14641
|
+
GGML_ASSERT(cgraph->leafs[i]->op == GGML_OP_NONE);
|
14642
|
+
GGML_ASSERT(cgraph->leafs[i]->src0 == NULL);
|
14643
|
+
GGML_ASSERT(cgraph->leafs[i]->src1 == NULL);
|
14644
|
+
}
|
14645
|
+
|
14646
|
+
// header
|
14647
|
+
fprintf(fout, "\n");
|
14648
|
+
fprintf(fout, "%-6s %-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %8s %16s %16s\n",
|
14649
|
+
"ARG", "TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "NTASKS", "DATA", "NAME");
|
14650
|
+
|
14651
|
+
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
14652
|
+
ggml_graph_export_node(cgraph->nodes[i], "DST", fout);
|
14653
|
+
|
14654
|
+
if (cgraph->nodes[i]->src0) {
|
14655
|
+
ggml_graph_export_node(cgraph->nodes[i]->src0, "SRC0", fout);
|
14656
|
+
}
|
14657
|
+
|
14658
|
+
if (cgraph->nodes[i]->src1) {
|
14659
|
+
ggml_graph_export_node(cgraph->nodes[i]->src1, "SRC1", fout);
|
14660
|
+
}
|
14661
|
+
|
14662
|
+
for (int j = 0; j < GGML_MAX_OPT; ++j) {
|
14663
|
+
if (cgraph->nodes[i]->opt[j]) {
|
14664
|
+
ggml_graph_export_node(cgraph->nodes[i]->opt[j], "OPT", fout);
|
14665
|
+
}
|
14666
|
+
}
|
14667
|
+
|
14668
|
+
fprintf(fout, "\n");
|
14669
|
+
}
|
14670
|
+
|
14671
|
+
fprintf(fout, "\n");
|
14672
|
+
}
|
14673
|
+
|
14674
|
+
// write binary data
|
14675
|
+
{
|
14676
|
+
FILE * fout = fopen(fname, "wb");
|
14677
|
+
|
14678
|
+
if (!fout) {
|
14679
|
+
fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
|
14680
|
+
return;
|
14681
|
+
}
|
14682
|
+
|
14683
|
+
// header
|
14684
|
+
{
|
14685
|
+
const uint32_t magic = GGML_FILE_MAGIC;
|
14686
|
+
const uint32_t version = GGML_FILE_VERSION;
|
14687
|
+
const uint32_t n_leafs = cgraph->n_leafs;
|
14688
|
+
const uint32_t nodes = cgraph->n_nodes;
|
14689
|
+
|
14690
|
+
fwrite(&magic, sizeof(uint32_t), 1, fout);
|
14691
|
+
fwrite(&version, sizeof(uint32_t), 1, fout);
|
14692
|
+
fwrite(&n_leafs, sizeof(uint32_t), 1, fout);
|
14693
|
+
fwrite(&nodes, sizeof(uint32_t), 1, fout);
|
14694
|
+
fwrite(&size_eval, sizeof(uint64_t), 1, fout);
|
14695
|
+
}
|
14696
|
+
|
14697
|
+
// leafs
|
14698
|
+
{
|
14699
|
+
for (int i = 0; i < cgraph->n_leafs; ++i) {
|
14700
|
+
const struct ggml_tensor * tensor = cgraph->leafs[i];
|
14701
|
+
|
14702
|
+
const uint32_t type = tensor->type;
|
14703
|
+
const uint32_t op = tensor->op;
|
14704
|
+
const uint32_t n_dims = tensor->n_dims;
|
14705
|
+
|
14706
|
+
fwrite(&type, sizeof(uint32_t), 1, fout);
|
14707
|
+
fwrite(&op, sizeof(uint32_t), 1, fout);
|
14708
|
+
fwrite(&n_dims, sizeof(uint32_t), 1, fout);
|
14709
|
+
|
14710
|
+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
14711
|
+
const uint64_t ne = tensor->ne[j];
|
14712
|
+
const uint64_t nb = tensor->nb[j];
|
14713
|
+
|
14714
|
+
fwrite(&ne, sizeof(uint64_t), 1, fout);
|
14715
|
+
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
14716
|
+
}
|
14717
|
+
|
14718
|
+
// store the pointer address
|
14719
|
+
{
|
14720
|
+
const uint64_t ptr = (uint64_t) tensor->data;
|
14721
|
+
|
14722
|
+
fwrite(&ptr, sizeof(uint64_t), 1, fout);
|
14723
|
+
}
|
14724
|
+
|
14725
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
14726
|
+
|
14727
|
+
// dump the data
|
14728
|
+
// TODO: pad this to 32 byte boundary
|
14729
|
+
{
|
14730
|
+
const size_t size = ggml_nbytes(tensor);
|
14731
|
+
|
14732
|
+
fwrite(tensor->data, sizeof(char), size, fout);
|
14733
|
+
}
|
14734
|
+
}
|
14735
|
+
}
|
14736
|
+
|
14737
|
+
// nodes
|
14738
|
+
{
|
14739
|
+
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
14740
|
+
const struct ggml_tensor * tensor = cgraph->nodes[i];
|
14741
|
+
|
14742
|
+
const uint32_t type = tensor->type;
|
14743
|
+
const uint32_t op = tensor->op;
|
14744
|
+
const uint32_t n_dims = tensor->n_dims;
|
14745
|
+
|
14746
|
+
fwrite(&type, sizeof(uint32_t), 1, fout);
|
14747
|
+
fwrite(&op, sizeof(uint32_t), 1, fout);
|
14748
|
+
fwrite(&n_dims, sizeof(uint32_t), 1, fout);
|
14749
|
+
|
14750
|
+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
14751
|
+
const uint64_t ne = tensor->ne[j];
|
14752
|
+
const uint64_t nb = tensor->nb[j];
|
14753
|
+
|
14754
|
+
fwrite(&ne, sizeof(uint64_t), 1, fout);
|
14755
|
+
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
14756
|
+
}
|
14757
|
+
|
14758
|
+
// store the pointer address
|
14759
|
+
{
|
14760
|
+
const uint64_t ptr = (uint64_t) tensor->data;
|
14761
|
+
|
14762
|
+
fwrite(&ptr, sizeof(uint64_t), 1, fout);
|
14763
|
+
}
|
14764
|
+
|
14765
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
14766
|
+
|
14767
|
+
// output the op arguments
|
14768
|
+
{
|
14769
|
+
struct ggml_tensor * args[2 + GGML_MAX_OPT] = { NULL };
|
14770
|
+
|
14771
|
+
args[0] = tensor->src0;
|
14772
|
+
args[1] = tensor->src1;
|
14773
|
+
|
14774
|
+
for (int j = 0; j < GGML_MAX_OPT; ++j) {
|
14775
|
+
args[2 + j] = tensor->opt[j];
|
14776
|
+
}
|
14777
|
+
|
14778
|
+
for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) {
|
14779
|
+
if (args[j]) {
|
14780
|
+
int32_t idx = -1;
|
14781
|
+
|
14782
|
+
// check if leaf
|
14783
|
+
{
|
14784
|
+
for (int k = 0; k < cgraph->n_leafs; ++k) {
|
14785
|
+
if (args[j] == cgraph->leafs[k]) {
|
14786
|
+
idx = k;
|
14787
|
+
break;
|
14788
|
+
}
|
14789
|
+
}
|
14790
|
+
}
|
14791
|
+
|
14792
|
+
// check if node
|
14793
|
+
if (idx == -1) {
|
14794
|
+
for (int k = 0; k < cgraph->n_nodes; ++k) {
|
14795
|
+
if (args[j] == cgraph->nodes[k]) {
|
14796
|
+
idx = GGML_MAX_NODES + k;
|
14797
|
+
break;
|
14798
|
+
}
|
14799
|
+
}
|
14800
|
+
}
|
14801
|
+
|
14802
|
+
if (idx == -1) {
|
14803
|
+
fprintf(stderr, "%s: failed to find tensor, arg = %d, node = %d\n", __func__, j, i);
|
14804
|
+
return;
|
14805
|
+
}
|
14806
|
+
|
14807
|
+
fwrite(&idx, sizeof(int32_t), 1, fout);
|
14808
|
+
} else {
|
14809
|
+
const int32_t nul = -1;
|
14810
|
+
|
14811
|
+
fwrite(&nul, sizeof(int32_t), 1, fout);
|
14812
|
+
}
|
14813
|
+
}
|
14814
|
+
}
|
14815
|
+
}
|
14816
|
+
}
|
14817
|
+
|
14818
|
+
fclose(fout);
|
14819
|
+
}
|
14820
|
+
}
|
14821
|
+
|
14822
|
+
struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) {
|
14823
|
+
assert(*ctx_data == NULL);
|
14824
|
+
assert(*ctx_eval == NULL);
|
14825
|
+
|
14826
|
+
struct ggml_cgraph result = { 0 };
|
14827
|
+
|
14828
|
+
struct ggml_tensor * data = NULL;
|
14829
|
+
|
14830
|
+
// read file into data
|
14831
|
+
{
|
14832
|
+
FILE * fin = fopen(fname, "rb");
|
14833
|
+
|
14834
|
+
if (!fin) {
|
14835
|
+
fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
|
14836
|
+
return result;
|
14837
|
+
}
|
14838
|
+
|
14839
|
+
size_t fsize = 0;
|
14840
|
+
|
14841
|
+
fseek(fin, 0, SEEK_END);
|
14842
|
+
fsize = ftell(fin);
|
14843
|
+
fseek(fin, 0, SEEK_SET);
|
14844
|
+
|
14845
|
+
// create the data context
|
14846
|
+
{
|
14847
|
+
const size_t overhead = 1*ggml_tensor_overhead();
|
14848
|
+
|
14849
|
+
struct ggml_init_params params = {
|
14850
|
+
.mem_size = fsize + overhead,
|
14851
|
+
.mem_buffer = NULL,
|
14852
|
+
.no_alloc = false,
|
14853
|
+
};
|
14854
|
+
|
14855
|
+
*ctx_data = ggml_init(params);
|
14856
|
+
|
14857
|
+
if (!*ctx_data) {
|
14858
|
+
fprintf(stderr, "%s: failed to create ggml context\n", __func__);
|
14859
|
+
return result;
|
14860
|
+
}
|
14861
|
+
}
|
14862
|
+
|
14863
|
+
data = ggml_new_tensor_1d(*ctx_data, GGML_TYPE_I8, fsize);
|
14864
|
+
|
14865
|
+
fread(data->data, sizeof(char), fsize, fin);
|
14866
|
+
|
14867
|
+
fclose(fin);
|
14868
|
+
}
|
14869
|
+
|
14870
|
+
// populate result
|
14871
|
+
{
|
14872
|
+
char * ptr = (char *) data->data;
|
14873
|
+
|
14874
|
+
const uint32_t magic = *(const uint32_t *) ptr; ptr += sizeof(magic);
|
14875
|
+
|
14876
|
+
if (magic != GGML_FILE_MAGIC) {
|
14877
|
+
fprintf(stderr, "%s: invalid magic number, got %08x\n", __func__, magic);
|
14878
|
+
return result;
|
14879
|
+
}
|
14880
|
+
|
14881
|
+
const uint32_t version = *(const uint32_t *) ptr; ptr += sizeof(version);
|
14882
|
+
|
14883
|
+
if (version != GGML_FILE_VERSION) {
|
14884
|
+
fprintf(stderr, "%s: invalid version number\n", __func__);
|
14885
|
+
return result;
|
14886
|
+
}
|
14887
|
+
|
14888
|
+
const uint32_t n_leafs = *(const uint32_t *) ptr; ptr += sizeof(n_leafs);
|
14889
|
+
const uint32_t n_nodes = *(const uint32_t *) ptr; ptr += sizeof(n_nodes);
|
14890
|
+
const uint64_t size_eval = *(const uint64_t *) ptr; ptr += sizeof(size_eval);
|
14891
|
+
|
14892
|
+
result.n_leafs = n_leafs;
|
14893
|
+
result.n_nodes = n_nodes;
|
14894
|
+
|
14895
|
+
// create the data context
|
14896
|
+
{
|
14897
|
+
const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead();
|
14898
|
+
|
14899
|
+
struct ggml_init_params params = {
|
14900
|
+
.mem_size = size_eval + overhead,
|
14901
|
+
.mem_buffer = NULL,
|
14902
|
+
.no_alloc = true,
|
14903
|
+
};
|
14904
|
+
|
14905
|
+
*ctx_eval = ggml_init(params);
|
14906
|
+
|
14907
|
+
if (!*ctx_eval) {
|
14908
|
+
fprintf(stderr, "%s: failed to create ggml context\n", __func__);
|
14909
|
+
return result;
|
14910
|
+
}
|
14911
|
+
}
|
14912
|
+
|
14913
|
+
// leafs
|
14914
|
+
{
|
14915
|
+
uint32_t type;
|
14916
|
+
uint32_t op;
|
14917
|
+
uint32_t n_dims;
|
14918
|
+
|
14919
|
+
for (uint32_t i = 0; i < n_leafs; ++i) {
|
14920
|
+
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
14921
|
+
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
14922
|
+
n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
|
14923
|
+
|
14924
|
+
int64_t ne[GGML_MAX_DIMS];
|
14925
|
+
size_t nb[GGML_MAX_DIMS];
|
14926
|
+
|
14927
|
+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
14928
|
+
uint64_t ne_cur;
|
14929
|
+
uint64_t nb_cur;
|
14930
|
+
|
14931
|
+
ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur);
|
14932
|
+
nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur);
|
14933
|
+
|
14934
|
+
ne[j] = ne_cur;
|
14935
|
+
nb[j] = nb_cur;
|
14936
|
+
}
|
14937
|
+
|
14938
|
+
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
|
14939
|
+
|
14940
|
+
tensor->op = (enum ggml_op) op;
|
14941
|
+
|
14942
|
+
uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur);
|
14943
|
+
|
14944
|
+
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
14945
|
+
|
14946
|
+
tensor->data = (void *) ptr;
|
14947
|
+
|
14948
|
+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
14949
|
+
tensor->nb[j] = nb[j];
|
14950
|
+
}
|
14951
|
+
|
14952
|
+
result.leafs[i] = tensor;
|
14953
|
+
|
14954
|
+
ptr += ggml_nbytes(tensor);
|
14955
|
+
|
14956
|
+
fprintf(stderr, "%s: loaded leaf %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
|
14957
|
+
}
|
14958
|
+
}
|
14959
|
+
|
14960
|
+
ggml_set_no_alloc(*ctx_eval, false);
|
14961
|
+
|
14962
|
+
// nodes
|
14963
|
+
{
|
14964
|
+
uint32_t type;
|
14965
|
+
uint32_t op;
|
14966
|
+
uint32_t n_dims;
|
14967
|
+
|
14968
|
+
for (uint32_t i = 0; i < n_nodes; ++i) {
|
14969
|
+
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
14970
|
+
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
14971
|
+
n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
|
14972
|
+
|
14973
|
+
int64_t ne[GGML_MAX_DIMS];
|
14974
|
+
size_t nb[GGML_MAX_DIMS];
|
14975
|
+
|
14976
|
+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
14977
|
+
uint64_t ne_cur;
|
14978
|
+
uint64_t nb_cur;
|
14979
|
+
|
14980
|
+
ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur);
|
14981
|
+
nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur);
|
14982
|
+
|
14983
|
+
ne[j] = ne_cur;
|
14984
|
+
nb[j] = nb_cur;
|
14985
|
+
}
|
14986
|
+
|
14987
|
+
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
|
14988
|
+
|
14989
|
+
tensor->op = (enum ggml_op) op;
|
14990
|
+
|
14991
|
+
uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur);
|
14992
|
+
|
14993
|
+
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
14994
|
+
|
14995
|
+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
14996
|
+
tensor->nb[j] = nb[j];
|
14997
|
+
}
|
14998
|
+
|
14999
|
+
// parse args
|
15000
|
+
{
|
15001
|
+
struct ggml_tensor ** args[2 + GGML_MAX_OPT] = {
|
15002
|
+
&tensor->src0,
|
15003
|
+
&tensor->src1,
|
15004
|
+
};
|
15005
|
+
|
15006
|
+
for (int j = 0; j < GGML_MAX_OPT; ++j) {
|
15007
|
+
args[2 + j] = &tensor->opt[j];
|
15008
|
+
}
|
15009
|
+
|
15010
|
+
for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) {
|
15011
|
+
const int32_t arg_idx = *(const int32_t *) ptr; ptr += sizeof(arg_idx);
|
15012
|
+
|
15013
|
+
if (arg_idx == -1) {
|
15014
|
+
continue;
|
15015
|
+
}
|
15016
|
+
|
15017
|
+
if (arg_idx < GGML_MAX_NODES) {
|
15018
|
+
*args[j] = result.leafs[arg_idx];
|
15019
|
+
} else {
|
15020
|
+
*args[j] = result.nodes[arg_idx - GGML_MAX_NODES];
|
15021
|
+
}
|
15022
|
+
}
|
15023
|
+
}
|
15024
|
+
|
15025
|
+
result.nodes[i] = tensor;
|
15026
|
+
|
15027
|
+
fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
|
15028
|
+
}
|
15029
|
+
}
|
15030
|
+
}
|
15031
|
+
|
15032
|
+
return result;
|
15033
|
+
}
|
15034
|
+
|
14524
15035
|
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
14525
15036
|
int64_t perf_total_per_op_us[GGML_OP_COUNT] = {0};
|
14526
15037
|
|
@@ -14538,7 +15049,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
14538
15049
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
14539
15050
|
i,
|
14540
15051
|
node->ne[0], node->ne[1], node->ne[2],
|
14541
|
-
|
15052
|
+
GGML_OP_NAME[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
14542
15053
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
14543
15054
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
14544
15055
|
(double) node->perf_time_us / 1000.0,
|
@@ -14552,7 +15063,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
14552
15063
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
14553
15064
|
i,
|
14554
15065
|
node->ne[0], node->ne[1],
|
14555
|
-
|
15066
|
+
GGML_OP_NAME[node->op]);
|
14556
15067
|
}
|
14557
15068
|
|
14558
15069
|
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
@@ -14560,7 +15071,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
14560
15071
|
continue;
|
14561
15072
|
}
|
14562
15073
|
|
14563
|
-
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n",
|
15074
|
+
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", GGML_OP_NAME[i], (double) perf_total_per_op_us[i] / 1000.0);
|
14564
15075
|
}
|
14565
15076
|
|
14566
15077
|
GGML_PRINT("========================================\n");
|