llama_cpp 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -7
- data/ext/llama_cpp/extconf.rb +1 -2
- data/ext/llama_cpp/src/ggml-opencl.cpp +1028 -0
- data/ext/llama_cpp/src/ggml-opencl.h +8 -10
- data/ext/llama_cpp/src/ggml.c +568 -57
- data/ext/llama_cpp/src/ggml.h +21 -2
- data/ext/llama_cpp/src/llama.cpp +37 -2
- data/ext/llama_cpp/src/llama.h +5 -0
- data/lib/llama_cpp/version.rb +2 -2
- metadata +3 -3
- data/ext/llama_cpp/src/ggml-opencl.c +0 -474
    
        data/ext/llama_cpp/src/ggml.c
    CHANGED
    
    | @@ -186,10 +186,12 @@ typedef double ggml_float; | |
| 186 186 | 
             
            #if defined(_MSC_VER) || defined(__MINGW32__)
         | 
| 187 187 | 
             
            #include <intrin.h>
         | 
| 188 188 | 
             
            #else
         | 
| 189 | 
            +
            #if !defined(__riscv)
         | 
| 189 190 | 
             
            #include <immintrin.h>
         | 
| 190 191 | 
             
            #endif
         | 
| 191 192 | 
             
            #endif
         | 
| 192 193 | 
             
            #endif
         | 
| 194 | 
            +
            #endif
         | 
| 193 195 |  | 
| 194 196 | 
             
            #ifdef __F16C__
         | 
| 195 197 |  | 
| @@ -3494,7 +3496,7 @@ static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = { | |
| 3494 3496 | 
             
            };
         | 
| 3495 3497 | 
             
            static_assert(GGML_TYPE_COUNT == 13, "GGML_IS_QUANTIZED is outdated");
         | 
| 3496 3498 |  | 
| 3497 | 
            -
            static const char *  | 
| 3499 | 
            +
            static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
         | 
| 3498 3500 | 
             
                "NONE",
         | 
| 3499 3501 |  | 
| 3500 3502 | 
             
                "DUP",
         | 
| @@ -3749,6 +3751,9 @@ const char * ggml_type_name(enum ggml_type type) { | |
| 3749 3751 | 
             
                return GGML_TYPE_NAME[type];
         | 
| 3750 3752 | 
             
            }
         | 
| 3751 3753 |  | 
| 3754 | 
            +
            const char * ggml_op_name(enum ggml_op op) {
         | 
| 3755 | 
            +
                return GGML_OP_NAME[op];
         | 
| 3756 | 
            +
            }
         | 
| 3752 3757 |  | 
| 3753 3758 | 
             
            size_t ggml_element_size(const struct ggml_tensor * tensor) {
         | 
| 3754 3759 | 
             
                return GGML_TYPE_SIZE[tensor->type];
         | 
| @@ -3805,6 +3810,10 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) { | |
| 3805 3810 | 
             
                return wtype;
         | 
| 3806 3811 | 
             
            }
         | 
| 3807 3812 |  | 
| 3813 | 
            +
            size_t ggml_tensor_overhead(void) {
         | 
| 3814 | 
            +
                return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE + 16;
         | 
| 3815 | 
            +
            }
         | 
| 3816 | 
            +
             | 
| 3808 3817 | 
             
            static inline bool ggml_is_transposed(const struct ggml_tensor * tensor) {
         | 
| 3809 3818 | 
             
                return tensor->nb[0] > tensor->nb[1];
         | 
| 3810 3819 | 
             
            }
         | 
| @@ -4017,6 +4026,18 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) | |
| 4017 4026 | 
             
                return result;
         | 
| 4018 4027 | 
             
            }
         | 
| 4019 4028 |  | 
| 4029 | 
            +
            void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
         | 
| 4030 | 
            +
                ctx->no_alloc = no_alloc;
         | 
| 4031 | 
            +
            }
         | 
| 4032 | 
            +
             | 
| 4033 | 
            +
            void * ggml_get_mem_buffer(struct ggml_context * ctx) {
         | 
| 4034 | 
            +
                return ctx->mem_buffer;
         | 
| 4035 | 
            +
            }
         | 
| 4036 | 
            +
             | 
| 4037 | 
            +
            size_t ggml_get_mem_size(struct ggml_context * ctx) {
         | 
| 4038 | 
            +
                return ctx->mem_size;
         | 
| 4039 | 
            +
            }
         | 
| 4040 | 
            +
             | 
| 4020 4041 | 
             
            // IMPORTANT:
         | 
| 4021 4042 | 
             
            // when creating "opt" tensors, always save and load the scratch buffer
         | 
| 4022 4043 | 
             
            // this is an error prone process, but it is necessary to support inplace
         | 
| @@ -4061,7 +4082,7 @@ struct ggml_tensor * ggml_new_tensor_impl( | |
| 4061 4082 | 
             
                struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
         | 
| 4062 4083 |  | 
| 4063 4084 | 
             
                if (ctx->scratch.data == NULL || data != NULL) {
         | 
| 4064 | 
            -
                    size_needed +=  | 
| 4085 | 
            +
                    size_needed += GGML_TENSOR_SIZE;
         | 
| 4065 4086 |  | 
| 4066 4087 | 
             
                    if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
         | 
| 4067 4088 | 
             
                        GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
         | 
| @@ -4077,14 +4098,15 @@ struct ggml_tensor * ggml_new_tensor_impl( | |
| 4077 4098 | 
             
                    };
         | 
| 4078 4099 | 
             
                } else {
         | 
| 4079 4100 | 
             
                    if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
         | 
| 4080 | 
            -
                        GGML_PRINT("%s: not enough space in the scratch memory\n", | 
| 4101 | 
            +
                        GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
         | 
| 4102 | 
            +
                                __func__, ctx->scratch.offs + size_needed, ctx->scratch.size);
         | 
| 4081 4103 | 
             
                        assert(false);
         | 
| 4082 4104 | 
             
                        return NULL;
         | 
| 4083 4105 | 
             
                    }
         | 
| 4084 4106 |  | 
| 4085 | 
            -
                    if (cur_end +  | 
| 4107 | 
            +
                    if (cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE > ctx->mem_size) {
         | 
| 4086 4108 | 
             
                        GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
         | 
| 4087 | 
            -
                                __func__, cur_end +  | 
| 4109 | 
            +
                                __func__, cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE, ctx->mem_size);
         | 
| 4088 4110 | 
             
                        assert(false);
         | 
| 4089 4111 | 
             
                        return NULL;
         | 
| 4090 4112 | 
             
                    }
         | 
| @@ -4093,7 +4115,7 @@ struct ggml_tensor * ggml_new_tensor_impl( | |
| 4093 4115 |  | 
| 4094 4116 | 
             
                    *obj_new = (struct ggml_object) {
         | 
| 4095 4117 | 
             
                        .offs = cur_end + GGML_OBJECT_SIZE,
         | 
| 4096 | 
            -
                        .size =  | 
| 4118 | 
            +
                        .size = GGML_TENSOR_SIZE,
         | 
| 4097 4119 | 
             
                        .next = NULL,
         | 
| 4098 4120 | 
             
                    };
         | 
| 4099 4121 |  | 
| @@ -4509,6 +4531,23 @@ struct ggml_tensor * ggml_view_tensor( | |
| 4509 4531 | 
             
                return result;
         | 
| 4510 4532 | 
             
            }
         | 
| 4511 4533 |  | 
| 4534 | 
            +
            struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
         | 
| 4535 | 
            +
                struct ggml_object * obj = ctx->objects_begin;
         | 
| 4536 | 
            +
             | 
| 4537 | 
            +
                char * const mem_buffer = ctx->mem_buffer;
         | 
| 4538 | 
            +
             | 
| 4539 | 
            +
                while (obj != NULL) {
         | 
| 4540 | 
            +
                    struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
         | 
| 4541 | 
            +
                    if (strcmp(cur->name, name) == 0) {
         | 
| 4542 | 
            +
                        return cur;
         | 
| 4543 | 
            +
                    }
         | 
| 4544 | 
            +
             | 
| 4545 | 
            +
                    obj = obj->next;
         | 
| 4546 | 
            +
                }
         | 
| 4547 | 
            +
             | 
| 4548 | 
            +
                return NULL;
         | 
| 4549 | 
            +
            }
         | 
| 4550 | 
            +
             | 
| 4512 4551 | 
             
            ////////////////////////////////////////////////////////////////////////////////
         | 
| 4513 4552 |  | 
| 4514 4553 | 
             
            // ggml_dup
         | 
| @@ -6303,7 +6342,7 @@ struct ggml_tensor * ggml_alibi( | |
| 6303 6342 |  | 
| 6304 6343 | 
             
                ggml_scratch_save(ctx);
         | 
| 6305 6344 |  | 
| 6306 | 
            -
                struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32,  | 
| 6345 | 
            +
                struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
         | 
| 6307 6346 |  | 
| 6308 6347 | 
             
                ((int32_t *) b->data)[0] = n_past;
         | 
| 6309 6348 | 
             
                ((int32_t *) b->data)[1] = n_head;
         | 
| @@ -9431,7 +9470,7 @@ static void ggml_compute_forward_rms_norm_back( | |
| 9431 9470 |  | 
| 9432 9471 | 
             
            // ggml_compute_forward_mul_mat
         | 
| 9433 9472 |  | 
| 9434 | 
            -
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) | 
| 9473 | 
            +
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
         | 
| 9435 9474 | 
             
            // helper function to determine if it is better to use BLAS or not
         | 
| 9436 9475 | 
             
            // for large matrices, BLAS is faster
         | 
| 9437 9476 | 
             
            static bool ggml_compute_forward_mul_mat_use_blas(
         | 
| @@ -9472,7 +9511,7 @@ static void ggml_compute_forward_mul_mat_f32( | |
| 9472 9511 | 
             
                const int64_t ne02 = src0->ne[2];
         | 
| 9473 9512 | 
             
                const int64_t ne03 = src0->ne[3];
         | 
| 9474 9513 |  | 
| 9475 | 
            -
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) | 
| 9514 | 
            +
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
         | 
| 9476 9515 | 
             
                const int64_t ne10 = src1->ne[0];
         | 
| 9477 9516 | 
             
            #endif
         | 
| 9478 9517 | 
             
                const int64_t ne11 = src1->ne[1];
         | 
| @@ -9536,9 +9575,16 @@ static void ggml_compute_forward_mul_mat_f32( | |
| 9536 9575 | 
             
                    }
         | 
| 9537 9576 | 
             
                    return;
         | 
| 9538 9577 | 
             
                }
         | 
| 9578 | 
            +
            #elif defined(GGML_USE_CLBLAST)
         | 
| 9579 | 
            +
                if (ggml_cl_can_mul_mat(src0, src1, dst)) {
         | 
| 9580 | 
            +
                    if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
         | 
| 9581 | 
            +
                        ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
         | 
| 9582 | 
            +
                    }
         | 
| 9583 | 
            +
                    return;
         | 
| 9584 | 
            +
                }
         | 
| 9539 9585 | 
             
            #endif
         | 
| 9540 9586 |  | 
| 9541 | 
            -
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) | 
| 9587 | 
            +
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
         | 
| 9542 9588 | 
             
                if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
         | 
| 9543 9589 | 
             
                    if (params->ith != 0) {
         | 
| 9544 9590 | 
             
                        return;
         | 
| @@ -9558,21 +9604,11 @@ static void ggml_compute_forward_mul_mat_f32( | |
| 9558 9604 | 
             
                            const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
         | 
| 9559 9605 | 
             
                            float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
         | 
| 9560 9606 |  | 
| 9561 | 
            -
            #if defined(GGML_USE_CLBLAST)
         | 
| 9562 | 
            -
                            // zT = y * xT
         | 
| 9563 | 
            -
                            ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
         | 
| 9564 | 
            -
                                    ne11, ne01, ne10,
         | 
| 9565 | 
            -
                                    1.0f,    y, ne10,
         | 
| 9566 | 
            -
                                             x, ne10,
         | 
| 9567 | 
            -
                                    0.0f,    d, ne01,
         | 
| 9568 | 
            -
                                    GGML_TYPE_F32);
         | 
| 9569 | 
            -
            #else
         | 
| 9570 9607 | 
             
                            cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
         | 
| 9571 9608 | 
             
                                    ne11, ne01, ne10,
         | 
| 9572 9609 | 
             
                                    1.0f,    y, ne10,
         | 
| 9573 9610 | 
             
                                             x, ne00,
         | 
| 9574 9611 | 
             
                                    0.0f,    d, ne01);
         | 
| 9575 | 
            -
            #endif
         | 
| 9576 9612 | 
             
                        }
         | 
| 9577 9613 | 
             
                    }
         | 
| 9578 9614 | 
             
                    //printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
         | 
| @@ -9711,9 +9747,16 @@ static void ggml_compute_forward_mul_mat_f16_f32( | |
| 9711 9747 | 
             
                    }
         | 
| 9712 9748 | 
             
                    return;
         | 
| 9713 9749 | 
             
                }
         | 
| 9750 | 
            +
            #elif defined(GGML_USE_CLBLAST)
         | 
| 9751 | 
            +
                if (ggml_cl_can_mul_mat(src0, src1, dst)) {
         | 
| 9752 | 
            +
                    if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
         | 
| 9753 | 
            +
                        ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
         | 
| 9754 | 
            +
                    }
         | 
| 9755 | 
            +
                    return;
         | 
| 9756 | 
            +
                }
         | 
| 9714 9757 | 
             
            #endif
         | 
| 9715 9758 |  | 
| 9716 | 
            -
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) | 
| 9759 | 
            +
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
         | 
| 9717 9760 | 
             
                if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
         | 
| 9718 9761 | 
             
                    GGML_ASSERT(nb10 == sizeof(float));
         | 
| 9719 9762 |  | 
| @@ -9743,20 +9786,6 @@ static void ggml_compute_forward_mul_mat_f16_f32( | |
| 9743 9786 | 
             
                                assert(id*sizeof(float) <= params->wsize);
         | 
| 9744 9787 | 
             
                            }
         | 
| 9745 9788 |  | 
| 9746 | 
            -
            #if defined(GGML_USE_CLBLAST)
         | 
| 9747 | 
            -
                            const float * x = wdata;
         | 
| 9748 | 
            -
                            const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
         | 
| 9749 | 
            -
             | 
| 9750 | 
            -
                            float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
         | 
| 9751 | 
            -
             | 
| 9752 | 
            -
                            // zT = y * xT
         | 
| 9753 | 
            -
                            ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
         | 
| 9754 | 
            -
                                    ne11, ne01, ne10,
         | 
| 9755 | 
            -
                                    1.0f,    y, ne10,
         | 
| 9756 | 
            -
                                             x, ne10,
         | 
| 9757 | 
            -
                                    0.0f,    d, ne01,
         | 
| 9758 | 
            -
                                    GGML_TYPE_F32);
         | 
| 9759 | 
            -
            #else
         | 
| 9760 9789 | 
             
                            const float * x = wdata;
         | 
| 9761 9790 | 
             
                            const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
         | 
| 9762 9791 |  | 
| @@ -9768,7 +9797,6 @@ static void ggml_compute_forward_mul_mat_f16_f32( | |
| 9768 9797 | 
             
                                    1.0f,    y, ne10,
         | 
| 9769 9798 | 
             
                                             x, ne00,
         | 
| 9770 9799 | 
             
                                    0.0f,    d, ne01);
         | 
| 9771 | 
            -
            #endif
         | 
| 9772 9800 | 
             
                        }
         | 
| 9773 9801 | 
             
                    }
         | 
| 9774 9802 |  | 
| @@ -9931,9 +9959,16 @@ static void ggml_compute_forward_mul_mat_q_f32( | |
| 9931 9959 | 
             
                    }
         | 
| 9932 9960 | 
             
                    return;
         | 
| 9933 9961 | 
             
                }
         | 
| 9962 | 
            +
            #elif defined(GGML_USE_CLBLAST)
         | 
| 9963 | 
            +
                if (ggml_cl_can_mul_mat(src0, src1, dst)) {
         | 
| 9964 | 
            +
                    if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
         | 
| 9965 | 
            +
                        ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
         | 
| 9966 | 
            +
                    }
         | 
| 9967 | 
            +
                    return;
         | 
| 9968 | 
            +
                }
         | 
| 9934 9969 | 
             
            #endif
         | 
| 9935 9970 |  | 
| 9936 | 
            -
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) | 
| 9971 | 
            +
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
         | 
| 9937 9972 | 
             
                if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
         | 
| 9938 9973 | 
             
                    if (params->ith != 0) {
         | 
| 9939 9974 | 
             
                        return;
         | 
| @@ -9956,9 +9991,6 @@ static void ggml_compute_forward_mul_mat_q_f32( | |
| 9956 9991 |  | 
| 9957 9992 | 
             
                            float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
         | 
| 9958 9993 |  | 
| 9959 | 
            -
            #if defined(GGML_USE_CLBLAST)
         | 
| 9960 | 
            -
                            const void* x = (char *) src0->data + i03*nb03 + i02*nb02;
         | 
| 9961 | 
            -
            #else
         | 
| 9962 9994 | 
             
                            {
         | 
| 9963 9995 | 
             
                                size_t id = 0;
         | 
| 9964 9996 | 
             
                                for (int64_t i01 = 0; i01 < ne01; ++i01) {
         | 
| @@ -9970,23 +10002,12 @@ static void ggml_compute_forward_mul_mat_q_f32( | |
| 9970 10002 | 
             
                            }
         | 
| 9971 10003 |  | 
| 9972 10004 | 
             
                            const float * x = wdata;
         | 
| 9973 | 
            -
            #endif
         | 
| 9974 10005 |  | 
| 9975 | 
            -
            #if defined(GGML_USE_CLBLAST)
         | 
| 9976 | 
            -
                            // zT = y * xT
         | 
| 9977 | 
            -
                            ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
         | 
| 9978 | 
            -
                                    ne11, ne01, ne10,
         | 
| 9979 | 
            -
                                    1.0f,    y, ne10,
         | 
| 9980 | 
            -
                                             x, ne10,
         | 
| 9981 | 
            -
                                    0.0f,    d, ne01,
         | 
| 9982 | 
            -
                                    type);
         | 
| 9983 | 
            -
            #else
         | 
| 9984 10006 | 
             
                            cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
         | 
| 9985 10007 | 
             
                                    ne11, ne01, ne10,
         | 
| 9986 10008 | 
             
                                    1.0f,    y, ne10,
         | 
| 9987 10009 | 
             
                                             x, ne00,
         | 
| 9988 10010 | 
             
                                    0.0f,    d, ne01);
         | 
| 9989 | 
            -
            #endif
         | 
| 9990 10011 | 
             
                        }
         | 
| 9991 10012 | 
             
                    }
         | 
| 9992 10013 |  | 
| @@ -13810,11 +13831,19 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * | |
| 13810 13831 | 
             
                    // reached a leaf node, not part of the gradient graph (e.g. a constant)
         | 
| 13811 13832 | 
             
                    GGML_ASSERT(cgraph->n_leafs < GGML_MAX_NODES);
         | 
| 13812 13833 |  | 
| 13834 | 
            +
                    if (strlen(node->name) == 0) {
         | 
| 13835 | 
            +
                        snprintf(node->name, sizeof(node->name), "leaf_%d", cgraph->n_leafs);
         | 
| 13836 | 
            +
                    }
         | 
| 13837 | 
            +
             | 
| 13813 13838 | 
             
                    cgraph->leafs[cgraph->n_leafs] = node;
         | 
| 13814 13839 | 
             
                    cgraph->n_leafs++;
         | 
| 13815 13840 | 
             
                } else {
         | 
| 13816 13841 | 
             
                    GGML_ASSERT(cgraph->n_nodes < GGML_MAX_NODES);
         | 
| 13817 13842 |  | 
| 13843 | 
            +
                    if (strlen(node->name) == 0) {
         | 
| 13844 | 
            +
                        snprintf(node->name, sizeof(node->name), "node_%d", cgraph->n_nodes);
         | 
| 13845 | 
            +
                    }
         | 
| 13846 | 
            +
             | 
| 13818 13847 | 
             
                    cgraph->nodes[cgraph->n_nodes] = node;
         | 
| 13819 13848 | 
             
                    cgraph->grads[cgraph->n_nodes] = node->grad;
         | 
| 13820 13849 | 
             
                    cgraph->n_nodes++;
         | 
| @@ -14165,9 +14194,16 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) | |
| 14165 14194 | 
             
                                        cur = ggml_cuda_mul_mat_get_wsize(node->src0, node->src1, node);
         | 
| 14166 14195 | 
             
                                    }
         | 
| 14167 14196 | 
             
                                    else
         | 
| 14197 | 
            +
            #elif defined(GGML_USE_CLBLAST)
         | 
| 14198 | 
            +
                                    if (ggml_cl_can_mul_mat(node->src0, node->src1, node)) {
         | 
| 14199 | 
            +
                                        node->n_tasks = 1; // TODO: this actually is doing nothing
         | 
| 14200 | 
            +
                                                            //       the threads are still spinning
         | 
| 14201 | 
            +
                                        cur = ggml_cl_mul_mat_get_wsize(node->src0, node->src1, node);
         | 
| 14202 | 
            +
                                    }
         | 
| 14203 | 
            +
                                    else
         | 
| 14168 14204 | 
             
            #endif
         | 
| 14169 14205 | 
             
                                    if (node->src0->type == GGML_TYPE_F16 && node->src1->type == GGML_TYPE_F32) {
         | 
| 14170 | 
            -
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) | 
| 14206 | 
            +
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
         | 
| 14171 14207 | 
             
                                        if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
         | 
| 14172 14208 | 
             
                                            node->n_tasks = 1; // TODO: this actually is doing nothing
         | 
| 14173 14209 | 
             
                                                               //       the threads are still spinning
         | 
| @@ -14181,13 +14217,13 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) | |
| 14181 14217 | 
             
            #endif
         | 
| 14182 14218 | 
             
                                    } else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) {
         | 
| 14183 14219 | 
             
                                        cur = 0;
         | 
| 14184 | 
            -
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) | 
| 14220 | 
            +
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
         | 
| 14185 14221 | 
             
                                        if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
         | 
| 14186 14222 | 
             
                                            node->n_tasks = 1;
         | 
| 14187 14223 | 
             
                                        }
         | 
| 14188 14224 | 
             
            #endif
         | 
| 14189 14225 | 
             
                                    } else if (ggml_is_quantized(node->src0->type) && node->src1->type == GGML_TYPE_F32) {
         | 
| 14190 | 
            -
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) | 
| 14226 | 
            +
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
         | 
| 14191 14227 | 
             
                                        if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
         | 
| 14192 14228 | 
             
                                            node->n_tasks = 1;
         | 
| 14193 14229 | 
             
                                            cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]);
         | 
| @@ -14521,6 +14557,481 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) { | |
| 14521 14557 | 
             
                }
         | 
| 14522 14558 | 
             
            }
         | 
| 14523 14559 |  | 
| 14560 | 
            +
            struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
         | 
| 14561 | 
            +
                for (int i = 0; i < cgraph->n_leafs; i++) {
         | 
| 14562 | 
            +
                    struct ggml_tensor * leaf = cgraph->leafs[i];
         | 
| 14563 | 
            +
             | 
| 14564 | 
            +
                    if (strcmp(leaf->name, name) == 0) {
         | 
| 14565 | 
            +
                        return leaf;
         | 
| 14566 | 
            +
                    }
         | 
| 14567 | 
            +
                }
         | 
| 14568 | 
            +
             | 
| 14569 | 
            +
                for (int i = 0; i < cgraph->n_nodes; i++) {
         | 
| 14570 | 
            +
                    struct ggml_tensor * node = cgraph->nodes[i];
         | 
| 14571 | 
            +
             | 
| 14572 | 
            +
                    if (strcmp(node->name, name) == 0) {
         | 
| 14573 | 
            +
                        return node;
         | 
| 14574 | 
            +
                    }
         | 
| 14575 | 
            +
                }
         | 
| 14576 | 
            +
             | 
| 14577 | 
            +
                return NULL;
         | 
| 14578 | 
            +
            }
         | 
| 14579 | 
            +
             | 
| 14580 | 
            +
            static void ggml_graph_export_leaf(const struct ggml_tensor * tensor, FILE * fout) {
         | 
| 14581 | 
            +
                const int64_t * ne = tensor->ne;
         | 
| 14582 | 
            +
                const size_t  * nb = tensor->nb;
         | 
| 14583 | 
            +
             | 
| 14584 | 
            +
                fprintf(fout, "%-6s %-12s %8d %8lld %8lld %8lld %8lld %16zu %16zu %16zu %16zu %16p %16s\n",
         | 
| 14585 | 
            +
                        ggml_type_name(tensor->type),
         | 
| 14586 | 
            +
                        ggml_op_name  (tensor->op),
         | 
| 14587 | 
            +
                        tensor->n_dims,
         | 
| 14588 | 
            +
                        ne[0], ne[1], ne[2], ne[3],
         | 
| 14589 | 
            +
                        nb[0], nb[1], nb[2], nb[3],
         | 
| 14590 | 
            +
                        tensor->data,
         | 
| 14591 | 
            +
                        tensor->name);
         | 
| 14592 | 
            +
            }
         | 
| 14593 | 
            +
             | 
| 14594 | 
            +
            static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char * arg, FILE * fout) {
         | 
| 14595 | 
            +
                const int64_t * ne = tensor->ne;
         | 
| 14596 | 
            +
                const size_t  * nb = tensor->nb;
         | 
| 14597 | 
            +
             | 
| 14598 | 
            +
                fprintf(fout, "%-6s %-6s %-12s %8d %8lld %8lld %8lld %8lld %16zu %16zu %16zu %16zu %8d %16p %16s\n",
         | 
| 14599 | 
            +
                        arg,
         | 
| 14600 | 
            +
                        ggml_type_name(tensor->type),
         | 
| 14601 | 
            +
                        ggml_op_name  (tensor->op),
         | 
| 14602 | 
            +
                        tensor->n_dims,
         | 
| 14603 | 
            +
                        ne[0], ne[1], ne[2], ne[3],
         | 
| 14604 | 
            +
                        nb[0], nb[1], nb[2], nb[3],
         | 
| 14605 | 
            +
                        tensor->n_tasks,
         | 
| 14606 | 
            +
                        tensor->data,
         | 
| 14607 | 
            +
                        tensor->name);
         | 
| 14608 | 
            +
            }
         | 
| 14609 | 
            +
             | 
| 14610 | 
            +
            void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
         | 
| 14611 | 
            +
                assert(cgraph->work      == NULL);
         | 
| 14612 | 
            +
                assert(cgraph->work_size == 0);
         | 
| 14613 | 
            +
             | 
| 14614 | 
            +
                uint64_t size_eval = 0;
         | 
| 14615 | 
            +
             | 
| 14616 | 
            +
                // compute size of intermediate results
         | 
| 14617 | 
            +
                // TODO: does not take into account scratch buffers !!!!
         | 
| 14618 | 
            +
                for (int i = 0; i < cgraph->n_nodes; ++i) {
         | 
| 14619 | 
            +
                    size_eval += ggml_nbytes(cgraph->nodes[i]);
         | 
| 14620 | 
            +
                }
         | 
| 14621 | 
            +
             | 
| 14622 | 
            +
                // print
         | 
| 14623 | 
            +
                {
         | 
| 14624 | 
            +
                    FILE * fout = stdout;
         | 
| 14625 | 
            +
             | 
| 14626 | 
            +
                    fprintf(fout, "\n");
         | 
| 14627 | 
            +
                    fprintf(fout, "%-16s %8x\n",   "magic",   GGML_FILE_MAGIC);
         | 
| 14628 | 
            +
                    fprintf(fout, "%-16s %8d\n",   "version", GGML_FILE_VERSION);
         | 
| 14629 | 
            +
                    fprintf(fout, "%-16s %8d\n",   "leafs",   cgraph->n_leafs);
         | 
| 14630 | 
            +
                    fprintf(fout, "%-16s %8d\n",   "nodes",   cgraph->n_nodes);
         | 
| 14631 | 
            +
                    fprintf(fout, "%-16s %8llu\n", "eval",    size_eval);
         | 
| 14632 | 
            +
             | 
| 14633 | 
            +
                    // header
         | 
| 14634 | 
            +
                    fprintf(fout, "\n");
         | 
| 14635 | 
            +
                    fprintf(fout, "%-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %16s %16s\n",
         | 
| 14636 | 
            +
                            "TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "DATA", "NAME");
         | 
| 14637 | 
            +
             | 
| 14638 | 
            +
                    for (int i = 0; i < cgraph->n_leafs; ++i) {
         | 
| 14639 | 
            +
                        ggml_graph_export_leaf(cgraph->leafs[i], fout);
         | 
| 14640 | 
            +
             | 
| 14641 | 
            +
                        GGML_ASSERT(cgraph->leafs[i]->op   == GGML_OP_NONE);
         | 
| 14642 | 
            +
                        GGML_ASSERT(cgraph->leafs[i]->src0 == NULL);
         | 
| 14643 | 
            +
                        GGML_ASSERT(cgraph->leafs[i]->src1 == NULL);
         | 
| 14644 | 
            +
                    }
         | 
| 14645 | 
            +
             | 
| 14646 | 
            +
                    // header
         | 
| 14647 | 
            +
                    fprintf(fout, "\n");
         | 
| 14648 | 
            +
                    fprintf(fout, "%-6s %-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %8s %16s %16s\n",
         | 
| 14649 | 
            +
                            "ARG", "TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "NTASKS", "DATA", "NAME");
         | 
| 14650 | 
            +
             | 
| 14651 | 
            +
                    for (int i = 0; i < cgraph->n_nodes; ++i) {
         | 
| 14652 | 
            +
                        ggml_graph_export_node(cgraph->nodes[i], "DST", fout);
         | 
| 14653 | 
            +
             | 
| 14654 | 
            +
                        if (cgraph->nodes[i]->src0) {
         | 
| 14655 | 
            +
                            ggml_graph_export_node(cgraph->nodes[i]->src0, "SRC0", fout);
         | 
| 14656 | 
            +
                        }
         | 
| 14657 | 
            +
             | 
| 14658 | 
            +
                        if (cgraph->nodes[i]->src1) {
         | 
| 14659 | 
            +
                            ggml_graph_export_node(cgraph->nodes[i]->src1, "SRC1", fout);
         | 
| 14660 | 
            +
                        }
         | 
| 14661 | 
            +
             | 
| 14662 | 
            +
                        for (int j = 0; j < GGML_MAX_OPT; ++j) {
         | 
| 14663 | 
            +
                            if (cgraph->nodes[i]->opt[j]) {
         | 
| 14664 | 
            +
                                ggml_graph_export_node(cgraph->nodes[i]->opt[j], "OPT", fout);
         | 
| 14665 | 
            +
                            }
         | 
| 14666 | 
            +
                        }
         | 
| 14667 | 
            +
             | 
| 14668 | 
            +
                        fprintf(fout, "\n");
         | 
| 14669 | 
            +
                    }
         | 
| 14670 | 
            +
             | 
| 14671 | 
            +
                    fprintf(fout, "\n");
         | 
| 14672 | 
            +
                }
         | 
| 14673 | 
            +
             | 
| 14674 | 
            +
                // write binary data
         | 
| 14675 | 
            +
                {
         | 
| 14676 | 
            +
                    FILE * fout = fopen(fname, "wb");
         | 
| 14677 | 
            +
             | 
| 14678 | 
            +
                    if (!fout) {
         | 
| 14679 | 
            +
                        fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
         | 
| 14680 | 
            +
                        return;
         | 
| 14681 | 
            +
                    }
         | 
| 14682 | 
            +
             | 
| 14683 | 
            +
                    // header
         | 
| 14684 | 
            +
                    {
         | 
| 14685 | 
            +
                        const uint32_t magic   = GGML_FILE_MAGIC;
         | 
| 14686 | 
            +
                        const uint32_t version = GGML_FILE_VERSION;
         | 
| 14687 | 
            +
                        const uint32_t n_leafs = cgraph->n_leafs;
         | 
| 14688 | 
            +
                        const uint32_t nodes   = cgraph->n_nodes;
         | 
| 14689 | 
            +
             | 
| 14690 | 
            +
                        fwrite(&magic,     sizeof(uint32_t), 1, fout);
         | 
| 14691 | 
            +
                        fwrite(&version,   sizeof(uint32_t), 1, fout);
         | 
| 14692 | 
            +
                        fwrite(&n_leafs,   sizeof(uint32_t), 1, fout);
         | 
| 14693 | 
            +
                        fwrite(&nodes,     sizeof(uint32_t), 1, fout);
         | 
| 14694 | 
            +
                        fwrite(&size_eval, sizeof(uint64_t), 1, fout);
         | 
| 14695 | 
            +
                    }
         | 
| 14696 | 
            +
             | 
| 14697 | 
            +
                    // leafs
         | 
| 14698 | 
            +
                    {
         | 
| 14699 | 
            +
                        for (int i = 0; i < cgraph->n_leafs; ++i) {
         | 
| 14700 | 
            +
                            const struct ggml_tensor * tensor = cgraph->leafs[i];
         | 
| 14701 | 
            +
             | 
| 14702 | 
            +
                            const uint32_t type   = tensor->type;
         | 
| 14703 | 
            +
                            const uint32_t op     = tensor->op;
         | 
| 14704 | 
            +
                            const uint32_t n_dims = tensor->n_dims;
         | 
| 14705 | 
            +
             | 
| 14706 | 
            +
                            fwrite(&type,   sizeof(uint32_t), 1, fout);
         | 
| 14707 | 
            +
                            fwrite(&op,     sizeof(uint32_t), 1, fout);
         | 
| 14708 | 
            +
                            fwrite(&n_dims, sizeof(uint32_t), 1, fout);
         | 
| 14709 | 
            +
             | 
| 14710 | 
            +
                            for (int j = 0; j < GGML_MAX_DIMS; ++j) {
         | 
| 14711 | 
            +
                                const uint64_t ne = tensor->ne[j];
         | 
| 14712 | 
            +
                                const uint64_t nb = tensor->nb[j];
         | 
| 14713 | 
            +
             | 
| 14714 | 
            +
                                fwrite(&ne, sizeof(uint64_t), 1, fout);
         | 
| 14715 | 
            +
                                fwrite(&nb, sizeof(uint64_t), 1, fout);
         | 
| 14716 | 
            +
                            }
         | 
| 14717 | 
            +
             | 
| 14718 | 
            +
                            // store the pointer address
         | 
| 14719 | 
            +
                            {
         | 
| 14720 | 
            +
                                const uint64_t ptr = (uint64_t) tensor->data;
         | 
| 14721 | 
            +
             | 
| 14722 | 
            +
                                fwrite(&ptr, sizeof(uint64_t), 1, fout);
         | 
| 14723 | 
            +
                            }
         | 
| 14724 | 
            +
             | 
| 14725 | 
            +
                            fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
         | 
| 14726 | 
            +
             | 
| 14727 | 
            +
                            // dump the data
         | 
| 14728 | 
            +
                            // TODO: pad this to 32 byte boundary
         | 
| 14729 | 
            +
                            {
         | 
| 14730 | 
            +
                                const size_t size = ggml_nbytes(tensor);
         | 
| 14731 | 
            +
             | 
| 14732 | 
            +
                                fwrite(tensor->data, sizeof(char), size, fout);
         | 
| 14733 | 
            +
                            }
         | 
| 14734 | 
            +
                        }
         | 
| 14735 | 
            +
                    }
         | 
| 14736 | 
            +
             | 
| 14737 | 
            +
                    // nodes
         | 
| 14738 | 
            +
                    {
         | 
| 14739 | 
            +
                        for (int i = 0; i < cgraph->n_nodes; ++i) {
         | 
| 14740 | 
            +
                            const struct ggml_tensor * tensor = cgraph->nodes[i];
         | 
| 14741 | 
            +
             | 
| 14742 | 
            +
                            const uint32_t type   = tensor->type;
         | 
| 14743 | 
            +
                            const uint32_t op     = tensor->op;
         | 
| 14744 | 
            +
                            const uint32_t n_dims = tensor->n_dims;
         | 
| 14745 | 
            +
             | 
| 14746 | 
            +
                            fwrite(&type,   sizeof(uint32_t), 1, fout);
         | 
| 14747 | 
            +
                            fwrite(&op,     sizeof(uint32_t), 1, fout);
         | 
| 14748 | 
            +
                            fwrite(&n_dims, sizeof(uint32_t), 1, fout);
         | 
| 14749 | 
            +
             | 
| 14750 | 
            +
                            for (int j = 0; j < GGML_MAX_DIMS; ++j) {
         | 
| 14751 | 
            +
                                const uint64_t ne = tensor->ne[j];
         | 
| 14752 | 
            +
                                const uint64_t nb = tensor->nb[j];
         | 
| 14753 | 
            +
             | 
| 14754 | 
            +
                                fwrite(&ne, sizeof(uint64_t), 1, fout);
         | 
| 14755 | 
            +
                                fwrite(&nb, sizeof(uint64_t), 1, fout);
         | 
| 14756 | 
            +
                            }
         | 
| 14757 | 
            +
             | 
| 14758 | 
            +
                            // store the pointer address
         | 
| 14759 | 
            +
                            {
         | 
| 14760 | 
            +
                                const uint64_t ptr = (uint64_t) tensor->data;
         | 
| 14761 | 
            +
             | 
| 14762 | 
            +
                                fwrite(&ptr, sizeof(uint64_t), 1, fout);
         | 
| 14763 | 
            +
                            }
         | 
| 14764 | 
            +
             | 
| 14765 | 
            +
                            fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
         | 
| 14766 | 
            +
             | 
| 14767 | 
            +
                            // output the op arguments
         | 
| 14768 | 
            +
                            {
         | 
| 14769 | 
            +
                                struct ggml_tensor * args[2 + GGML_MAX_OPT] = { NULL };
         | 
| 14770 | 
            +
             | 
| 14771 | 
            +
                                args[0] = tensor->src0;
         | 
| 14772 | 
            +
                                args[1] = tensor->src1;
         | 
| 14773 | 
            +
             | 
| 14774 | 
            +
                                for (int j = 0; j < GGML_MAX_OPT; ++j) {
         | 
| 14775 | 
            +
                                    args[2 + j] = tensor->opt[j];
         | 
| 14776 | 
            +
                                }
         | 
| 14777 | 
            +
             | 
| 14778 | 
            +
                                for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) {
         | 
| 14779 | 
            +
                                    if (args[j]) {
         | 
| 14780 | 
            +
                                        int32_t idx = -1;
         | 
| 14781 | 
            +
             | 
| 14782 | 
            +
                                        // check if leaf
         | 
| 14783 | 
            +
                                        {
         | 
| 14784 | 
            +
                                            for (int k = 0; k < cgraph->n_leafs; ++k) {
         | 
| 14785 | 
            +
                                                if (args[j] == cgraph->leafs[k]) {
         | 
| 14786 | 
            +
                                                    idx = k;
         | 
| 14787 | 
            +
                                                    break;
         | 
| 14788 | 
            +
                                                }
         | 
| 14789 | 
            +
                                            }
         | 
| 14790 | 
            +
                                        }
         | 
| 14791 | 
            +
             | 
| 14792 | 
            +
                                        // check if node
         | 
| 14793 | 
            +
                                        if (idx == -1) {
         | 
| 14794 | 
            +
                                            for (int k = 0; k < cgraph->n_nodes; ++k) {
         | 
| 14795 | 
            +
                                                if (args[j] == cgraph->nodes[k]) {
         | 
| 14796 | 
            +
                                                    idx = GGML_MAX_NODES + k;
         | 
| 14797 | 
            +
                                                    break;
         | 
| 14798 | 
            +
                                                }
         | 
| 14799 | 
            +
                                            }
         | 
| 14800 | 
            +
                                        }
         | 
| 14801 | 
            +
             | 
| 14802 | 
            +
                                        if (idx == -1) {
         | 
| 14803 | 
            +
                                            fprintf(stderr, "%s: failed to find tensor, arg = %d, node = %d\n", __func__, j, i);
         | 
| 14804 | 
            +
                                            return;
         | 
| 14805 | 
            +
                                        }
         | 
| 14806 | 
            +
             | 
| 14807 | 
            +
                                        fwrite(&idx, sizeof(int32_t), 1, fout);
         | 
| 14808 | 
            +
                                    } else {
         | 
| 14809 | 
            +
                                        const int32_t nul = -1;
         | 
| 14810 | 
            +
             | 
| 14811 | 
            +
                                        fwrite(&nul, sizeof(int32_t), 1, fout);
         | 
| 14812 | 
            +
                                    }
         | 
| 14813 | 
            +
                                }
         | 
| 14814 | 
            +
                            }
         | 
| 14815 | 
            +
                        }
         | 
| 14816 | 
            +
                    }
         | 
| 14817 | 
            +
             | 
| 14818 | 
            +
                    fclose(fout);
         | 
| 14819 | 
            +
                }
         | 
| 14820 | 
            +
            }
         | 
| 14821 | 
            +
             | 
| 14822 | 
            +
            struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) {
         | 
| 14823 | 
            +
                assert(*ctx_data == NULL);
         | 
| 14824 | 
            +
                assert(*ctx_eval == NULL);
         | 
| 14825 | 
            +
             | 
| 14826 | 
            +
                struct ggml_cgraph result = { 0 };
         | 
| 14827 | 
            +
             | 
| 14828 | 
            +
                struct ggml_tensor * data = NULL;
         | 
| 14829 | 
            +
             | 
| 14830 | 
            +
                // read file into data
         | 
| 14831 | 
            +
                {
         | 
| 14832 | 
            +
                    FILE * fin = fopen(fname, "rb");
         | 
| 14833 | 
            +
             | 
| 14834 | 
            +
                    if (!fin) {
         | 
| 14835 | 
            +
                        fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
         | 
| 14836 | 
            +
                        return result;
         | 
| 14837 | 
            +
                    }
         | 
| 14838 | 
            +
             | 
| 14839 | 
            +
                    size_t fsize = 0;
         | 
| 14840 | 
            +
             | 
| 14841 | 
            +
                    fseek(fin, 0, SEEK_END);
         | 
| 14842 | 
            +
                    fsize = ftell(fin);
         | 
| 14843 | 
            +
                    fseek(fin, 0, SEEK_SET);
         | 
| 14844 | 
            +
             | 
| 14845 | 
            +
                    // create the data context
         | 
| 14846 | 
            +
                    {
         | 
| 14847 | 
            +
                        const size_t overhead = 1*ggml_tensor_overhead();
         | 
| 14848 | 
            +
             | 
| 14849 | 
            +
                        struct ggml_init_params params = {
         | 
| 14850 | 
            +
                            .mem_size   = fsize + overhead,
         | 
| 14851 | 
            +
                            .mem_buffer = NULL,
         | 
| 14852 | 
            +
                            .no_alloc   = false,
         | 
| 14853 | 
            +
                        };
         | 
| 14854 | 
            +
             | 
| 14855 | 
            +
                        *ctx_data = ggml_init(params);
         | 
| 14856 | 
            +
             | 
| 14857 | 
            +
                        if (!*ctx_data) {
         | 
| 14858 | 
            +
                            fprintf(stderr, "%s: failed to create ggml context\n", __func__);
         | 
| 14859 | 
            +
                            return result;
         | 
| 14860 | 
            +
                        }
         | 
| 14861 | 
            +
                    }
         | 
| 14862 | 
            +
             | 
| 14863 | 
            +
                    data = ggml_new_tensor_1d(*ctx_data, GGML_TYPE_I8, fsize);
         | 
| 14864 | 
            +
             | 
| 14865 | 
            +
                    fread(data->data, sizeof(char), fsize, fin);
         | 
| 14866 | 
            +
             | 
| 14867 | 
            +
                    fclose(fin);
         | 
| 14868 | 
            +
                }
         | 
| 14869 | 
            +
             | 
| 14870 | 
            +
                // populate result
         | 
| 14871 | 
            +
                {
         | 
| 14872 | 
            +
                    char * ptr = (char *) data->data;
         | 
| 14873 | 
            +
             | 
| 14874 | 
            +
                    const uint32_t magic = *(const uint32_t *) ptr; ptr += sizeof(magic);
         | 
| 14875 | 
            +
             | 
| 14876 | 
            +
                    if (magic != GGML_FILE_MAGIC) {
         | 
| 14877 | 
            +
                        fprintf(stderr, "%s: invalid magic number, got %08x\n", __func__, magic);
         | 
| 14878 | 
            +
                        return result;
         | 
| 14879 | 
            +
                    }
         | 
| 14880 | 
            +
             | 
| 14881 | 
            +
                    const uint32_t version = *(const uint32_t *) ptr; ptr += sizeof(version);
         | 
| 14882 | 
            +
             | 
| 14883 | 
            +
                    if (version != GGML_FILE_VERSION) {
         | 
| 14884 | 
            +
                        fprintf(stderr, "%s: invalid version number\n", __func__);
         | 
| 14885 | 
            +
                        return result;
         | 
| 14886 | 
            +
                    }
         | 
| 14887 | 
            +
             | 
| 14888 | 
            +
                    const uint32_t n_leafs   = *(const uint32_t *) ptr; ptr += sizeof(n_leafs);
         | 
| 14889 | 
            +
                    const uint32_t n_nodes   = *(const uint32_t *) ptr; ptr += sizeof(n_nodes);
         | 
| 14890 | 
            +
                    const uint64_t size_eval = *(const uint64_t *) ptr; ptr += sizeof(size_eval);
         | 
| 14891 | 
            +
             | 
| 14892 | 
            +
                    result.n_leafs = n_leafs;
         | 
| 14893 | 
            +
                    result.n_nodes = n_nodes;
         | 
| 14894 | 
            +
             | 
| 14895 | 
            +
                    // create the data context
         | 
| 14896 | 
            +
                    {
         | 
| 14897 | 
            +
                        const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead();
         | 
| 14898 | 
            +
             | 
| 14899 | 
            +
                        struct ggml_init_params params = {
         | 
| 14900 | 
            +
                            .mem_size   = size_eval + overhead,
         | 
| 14901 | 
            +
                            .mem_buffer = NULL,
         | 
| 14902 | 
            +
                            .no_alloc   = true,
         | 
| 14903 | 
            +
                        };
         | 
| 14904 | 
            +
             | 
| 14905 | 
            +
                        *ctx_eval = ggml_init(params);
         | 
| 14906 | 
            +
             | 
| 14907 | 
            +
                        if (!*ctx_eval) {
         | 
| 14908 | 
            +
                            fprintf(stderr, "%s: failed to create ggml context\n", __func__);
         | 
| 14909 | 
            +
                            return result;
         | 
| 14910 | 
            +
                        }
         | 
| 14911 | 
            +
                    }
         | 
| 14912 | 
            +
             | 
| 14913 | 
            +
                    // leafs
         | 
| 14914 | 
            +
                    {
         | 
| 14915 | 
            +
                        uint32_t type;
         | 
| 14916 | 
            +
                        uint32_t op;
         | 
| 14917 | 
            +
                        uint32_t n_dims;
         | 
| 14918 | 
            +
             | 
| 14919 | 
            +
                        for (uint32_t i = 0; i < n_leafs; ++i) {
         | 
| 14920 | 
            +
                            type   = *(const uint32_t *) ptr; ptr += sizeof(type);
         | 
| 14921 | 
            +
                            op     = *(const uint32_t *) ptr; ptr += sizeof(op);
         | 
| 14922 | 
            +
                            n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
         | 
| 14923 | 
            +
             | 
| 14924 | 
            +
                            int64_t ne[GGML_MAX_DIMS];
         | 
| 14925 | 
            +
                            size_t  nb[GGML_MAX_DIMS];
         | 
| 14926 | 
            +
             | 
| 14927 | 
            +
                            for (int j = 0; j < GGML_MAX_DIMS; ++j) {
         | 
| 14928 | 
            +
                                uint64_t ne_cur;
         | 
| 14929 | 
            +
                                uint64_t nb_cur;
         | 
| 14930 | 
            +
             | 
| 14931 | 
            +
                                ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur);
         | 
| 14932 | 
            +
                                nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur);
         | 
| 14933 | 
            +
             | 
| 14934 | 
            +
                                ne[j] = ne_cur;
         | 
| 14935 | 
            +
                                nb[j] = nb_cur;
         | 
| 14936 | 
            +
                            }
         | 
| 14937 | 
            +
             | 
| 14938 | 
            +
                            struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
         | 
| 14939 | 
            +
             | 
| 14940 | 
            +
                            tensor->op = (enum ggml_op) op;
         | 
| 14941 | 
            +
             | 
| 14942 | 
            +
                            uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur);
         | 
| 14943 | 
            +
             | 
| 14944 | 
            +
                            memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
         | 
| 14945 | 
            +
             | 
| 14946 | 
            +
                            tensor->data = (void *) ptr;
         | 
| 14947 | 
            +
             | 
| 14948 | 
            +
                            for (int j = 0; j < GGML_MAX_DIMS; ++j) {
         | 
| 14949 | 
            +
                                tensor->nb[j] = nb[j];
         | 
| 14950 | 
            +
                            }
         | 
| 14951 | 
            +
             | 
| 14952 | 
            +
                            result.leafs[i] = tensor;
         | 
| 14953 | 
            +
             | 
| 14954 | 
            +
                            ptr += ggml_nbytes(tensor);
         | 
| 14955 | 
            +
             | 
| 14956 | 
            +
                            fprintf(stderr, "%s: loaded leaf %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
         | 
| 14957 | 
            +
                        }
         | 
| 14958 | 
            +
                    }
         | 
| 14959 | 
            +
             | 
| 14960 | 
            +
                    ggml_set_no_alloc(*ctx_eval, false);
         | 
| 14961 | 
            +
             | 
| 14962 | 
            +
                    // nodes
         | 
| 14963 | 
            +
                    {
         | 
| 14964 | 
            +
                        uint32_t type;
         | 
| 14965 | 
            +
                        uint32_t op;
         | 
| 14966 | 
            +
                        uint32_t n_dims;
         | 
| 14967 | 
            +
             | 
| 14968 | 
            +
                        for (uint32_t i = 0; i < n_nodes; ++i) {
         | 
| 14969 | 
            +
                            type   = *(const uint32_t *) ptr; ptr += sizeof(type);
         | 
| 14970 | 
            +
                            op     = *(const uint32_t *) ptr; ptr += sizeof(op);
         | 
| 14971 | 
            +
                            n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
         | 
| 14972 | 
            +
             | 
| 14973 | 
            +
                            int64_t ne[GGML_MAX_DIMS];
         | 
| 14974 | 
            +
                            size_t  nb[GGML_MAX_DIMS];
         | 
| 14975 | 
            +
             | 
| 14976 | 
            +
                            for (int j = 0; j < GGML_MAX_DIMS; ++j) {
         | 
| 14977 | 
            +
                                uint64_t ne_cur;
         | 
| 14978 | 
            +
                                uint64_t nb_cur;
         | 
| 14979 | 
            +
             | 
| 14980 | 
            +
                                ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur);
         | 
| 14981 | 
            +
                                nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur);
         | 
| 14982 | 
            +
             | 
| 14983 | 
            +
                                ne[j] = ne_cur;
         | 
| 14984 | 
            +
                                nb[j] = nb_cur;
         | 
| 14985 | 
            +
                            }
         | 
| 14986 | 
            +
             | 
| 14987 | 
            +
                            struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
         | 
| 14988 | 
            +
             | 
| 14989 | 
            +
                            tensor->op = (enum ggml_op) op;
         | 
| 14990 | 
            +
             | 
| 14991 | 
            +
                            uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur);
         | 
| 14992 | 
            +
             | 
| 14993 | 
            +
                            memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
         | 
| 14994 | 
            +
             | 
| 14995 | 
            +
                            for (int j = 0; j < GGML_MAX_DIMS; ++j) {
         | 
| 14996 | 
            +
                                tensor->nb[j] = nb[j];
         | 
| 14997 | 
            +
                            }
         | 
| 14998 | 
            +
             | 
| 14999 | 
            +
                            // parse args
         | 
| 15000 | 
            +
                            {
         | 
| 15001 | 
            +
                                struct ggml_tensor ** args[2 + GGML_MAX_OPT] = {
         | 
| 15002 | 
            +
                                    &tensor->src0,
         | 
| 15003 | 
            +
                                    &tensor->src1,
         | 
| 15004 | 
            +
                                };
         | 
| 15005 | 
            +
             | 
| 15006 | 
            +
                                for (int j = 0; j < GGML_MAX_OPT; ++j) {
         | 
| 15007 | 
            +
                                    args[2 + j] = &tensor->opt[j];
         | 
| 15008 | 
            +
                                }
         | 
| 15009 | 
            +
             | 
| 15010 | 
            +
                                for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) {
         | 
| 15011 | 
            +
                                    const int32_t arg_idx = *(const int32_t *) ptr; ptr += sizeof(arg_idx);
         | 
| 15012 | 
            +
             | 
| 15013 | 
            +
                                    if (arg_idx == -1) {
         | 
| 15014 | 
            +
                                        continue;
         | 
| 15015 | 
            +
                                    }
         | 
| 15016 | 
            +
             | 
| 15017 | 
            +
                                    if (arg_idx < GGML_MAX_NODES) {
         | 
| 15018 | 
            +
                                        *args[j] = result.leafs[arg_idx];
         | 
| 15019 | 
            +
                                    } else {
         | 
| 15020 | 
            +
                                        *args[j] = result.nodes[arg_idx - GGML_MAX_NODES];
         | 
| 15021 | 
            +
                                    }
         | 
| 15022 | 
            +
                                }
         | 
| 15023 | 
            +
                            }
         | 
| 15024 | 
            +
             | 
| 15025 | 
            +
                            result.nodes[i] = tensor;
         | 
| 15026 | 
            +
             | 
| 15027 | 
            +
                            fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
         | 
| 15028 | 
            +
                        }
         | 
| 15029 | 
            +
                    }
         | 
| 15030 | 
            +
                }
         | 
| 15031 | 
            +
             | 
| 15032 | 
            +
                return result;
         | 
| 15033 | 
            +
            }
         | 
| 15034 | 
            +
             | 
| 14524 15035 | 
             
            void ggml_graph_print(const struct ggml_cgraph * cgraph) {
         | 
| 14525 15036 | 
             
                int64_t perf_total_per_op_us[GGML_OP_COUNT] = {0};
         | 
| 14526 15037 |  | 
| @@ -14538,7 +15049,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { | |
| 14538 15049 | 
             
                    GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
         | 
| 14539 15050 | 
             
                            i,
         | 
| 14540 15051 | 
             
                            node->ne[0], node->ne[1], node->ne[2],
         | 
| 14541 | 
            -
                             | 
| 15052 | 
            +
                            GGML_OP_NAME[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
         | 
| 14542 15053 | 
             
                            (double) node->perf_cycles  / (double) ggml_cycles_per_ms(),
         | 
| 14543 15054 | 
             
                            (double) node->perf_cycles  / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
         | 
| 14544 15055 | 
             
                            (double) node->perf_time_us / 1000.0,
         | 
| @@ -14552,7 +15063,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { | |
| 14552 15063 | 
             
                    GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
         | 
| 14553 15064 | 
             
                            i,
         | 
| 14554 15065 | 
             
                            node->ne[0], node->ne[1],
         | 
| 14555 | 
            -
                             | 
| 15066 | 
            +
                            GGML_OP_NAME[node->op]);
         | 
| 14556 15067 | 
             
                }
         | 
| 14557 15068 |  | 
| 14558 15069 | 
             
                for (int i = 0; i < GGML_OP_COUNT; i++) {
         | 
| @@ -14560,7 +15071,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { | |
| 14560 15071 | 
             
                        continue;
         | 
| 14561 15072 | 
             
                    }
         | 
| 14562 15073 |  | 
| 14563 | 
            -
                    GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n",  | 
| 15074 | 
            +
                    GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", GGML_OP_NAME[i], (double) perf_total_per_op_us[i] / 1000.0);
         | 
| 14564 15075 | 
             
                }
         | 
| 14565 15076 |  | 
| 14566 15077 | 
             
                GGML_PRINT("========================================\n");
         |