llama_cpp 0.12.7 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +24 -0
 - data/ext/llama_cpp/llama_cpp.cpp +131 -288
 - data/lib/llama_cpp/version.rb +2 -2
 - data/sig/llama_cpp.rbs +29 -29
 - data/vendor/tmp/llama.cpp/Makefile +10 -6
 - data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -3
 - data/vendor/tmp/llama.cpp/ggml-backend.c +32 -23
 - data/vendor/tmp/llama.cpp/ggml-backend.h +17 -16
 - data/vendor/tmp/llama.cpp/ggml-cuda.cu +949 -168
 - data/vendor/tmp/llama.cpp/ggml-kompute.cpp +9 -3
 - data/vendor/tmp/llama.cpp/ggml-metal.m +159 -22
 - data/vendor/tmp/llama.cpp/ggml-metal.metal +1195 -139
 - data/vendor/tmp/llama.cpp/ggml-opencl.cpp +27 -27
 - data/vendor/tmp/llama.cpp/ggml-quants.c +1971 -271
 - data/vendor/tmp/llama.cpp/ggml-quants.h +52 -0
 - data/vendor/tmp/llama.cpp/ggml-sycl.cpp +3586 -1201
 - data/vendor/tmp/llama.cpp/ggml-sycl.h +5 -0
 - data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +39336 -43461
 - data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +1391 -825
 - data/vendor/tmp/llama.cpp/ggml-vulkan.h +1 -0
 - data/vendor/tmp/llama.cpp/ggml.c +545 -210
 - data/vendor/tmp/llama.cpp/ggml.h +65 -23
 - data/vendor/tmp/llama.cpp/llama.cpp +1458 -763
 - data/vendor/tmp/llama.cpp/llama.h +81 -75
 - data/vendor/tmp/llama.cpp/unicode.h +310 -1
 - metadata +2 -2
 
    
        data/vendor/tmp/llama.cpp/ggml.c
    CHANGED
    
    | 
         @@ -320,6 +320,17 @@ static ggml_fp16_t ggml_table_exp_f16[1 << 16]; 
     | 
|
| 
       320 
320 
     | 
    
         
             
            // precomputed f32 table for f16 (256 KB) (ggml-impl.h)
         
     | 
| 
       321 
321 
     | 
    
         
             
            float ggml_table_f32_f16[1 << 16];
         
     | 
| 
       322 
322 
     | 
    
         | 
| 
      
 323 
     | 
    
         
            +
            const char * ggml_status_to_string(enum ggml_status status) {
         
     | 
| 
      
 324 
     | 
    
         
            +
                switch (status) {
         
     | 
| 
      
 325 
     | 
    
         
            +
                    case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
         
     | 
| 
      
 326 
     | 
    
         
            +
                    case GGML_STATUS_FAILED:       return "GGML status: error (operation failed)";
         
     | 
| 
      
 327 
     | 
    
         
            +
                    case GGML_STATUS_SUCCESS:      return "GGML status: success";
         
     | 
| 
      
 328 
     | 
    
         
            +
                    case GGML_STATUS_ABORTED:      return "GGML status: warning (operation aborted)";
         
     | 
| 
      
 329 
     | 
    
         
            +
                }
         
     | 
| 
      
 330 
     | 
    
         
            +
             
     | 
| 
      
 331 
     | 
    
         
            +
                return "GGML status: unknown";
         
     | 
| 
      
 332 
     | 
    
         
            +
            }
         
     | 
| 
      
 333 
     | 
    
         
            +
             
     | 
| 
       323 
334 
     | 
    
         
             
            // note: do not use these inside ggml.c
         
     | 
| 
       324 
335 
     | 
    
         
             
            // these are meant to be used via the ggml.h API
         
     | 
| 
       325 
336 
     | 
    
         
             
            float ggml_fp16_to_fp32(ggml_fp16_t x) {
         
     | 
| 
         @@ -355,6 +366,10 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) { 
     | 
|
| 
       355 
366 
     | 
    
         
             
                }
         
     | 
| 
       356 
367 
     | 
    
         
             
            }
         
     | 
| 
       357 
368 
     | 
    
         | 
| 
      
 369 
     | 
    
         
            +
            bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
         
     | 
| 
      
 370 
     | 
    
         
            +
                return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
         
     | 
| 
      
 371 
     | 
    
         
            +
            }
         
     | 
| 
      
 372 
     | 
    
         
            +
             
     | 
| 
       358 
373 
     | 
    
         
             
            //
         
     | 
| 
       359 
374 
     | 
    
         
             
            // timing
         
     | 
| 
       360 
375 
     | 
    
         
             
            //
         
     | 
| 
         @@ -678,6 +693,30 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { 
     | 
|
| 
       678 
693 
     | 
    
         
             
                    .vec_dot_type             = GGML_TYPE_Q8_K,
         
     | 
| 
       679 
694 
     | 
    
         
             
                    .nrows                    = 1,
         
     | 
| 
       680 
695 
     | 
    
         
             
                },
         
     | 
| 
      
 696 
     | 
    
         
            +
                [GGML_TYPE_IQ3_S] = {
         
     | 
| 
      
 697 
     | 
    
         
            +
                    .type_name                = "iq3_s",
         
     | 
| 
      
 698 
     | 
    
         
            +
                    .blck_size                = QK_K,
         
     | 
| 
      
 699 
     | 
    
         
            +
                    .type_size                = sizeof(block_iq3_s),
         
     | 
| 
      
 700 
     | 
    
         
            +
                    .is_quantized             = true,
         
     | 
| 
      
 701 
     | 
    
         
            +
                    .to_float                 = (ggml_to_float_t) dequantize_row_iq3_s,
         
     | 
| 
      
 702 
     | 
    
         
            +
                    .from_float               = quantize_row_iq3_s,
         
     | 
| 
      
 703 
     | 
    
         
            +
                    .from_float_reference     = (ggml_from_float_t)quantize_row_iq3_s_reference,
         
     | 
| 
      
 704 
     | 
    
         
            +
                    .vec_dot                  = ggml_vec_dot_iq3_s_q8_K,
         
     | 
| 
      
 705 
     | 
    
         
            +
                    .vec_dot_type             = GGML_TYPE_Q8_K,
         
     | 
| 
      
 706 
     | 
    
         
            +
                    .nrows                    = 1,
         
     | 
| 
      
 707 
     | 
    
         
            +
                },
         
     | 
| 
      
 708 
     | 
    
         
            +
                [GGML_TYPE_IQ2_S] = {
         
     | 
| 
      
 709 
     | 
    
         
            +
                    .type_name                = "iq2_s",
         
     | 
| 
      
 710 
     | 
    
         
            +
                    .blck_size                = QK_K,
         
     | 
| 
      
 711 
     | 
    
         
            +
                    .type_size                = sizeof(block_iq2_s),
         
     | 
| 
      
 712 
     | 
    
         
            +
                    .is_quantized             = true,
         
     | 
| 
      
 713 
     | 
    
         
            +
                    .to_float                 = (ggml_to_float_t) dequantize_row_iq2_s,
         
     | 
| 
      
 714 
     | 
    
         
            +
                    .from_float               = quantize_row_iq2_s,
         
     | 
| 
      
 715 
     | 
    
         
            +
                    .from_float_reference     = (ggml_from_float_t)quantize_row_iq2_s_reference,
         
     | 
| 
      
 716 
     | 
    
         
            +
                    .vec_dot                  = ggml_vec_dot_iq2_s_q8_K,
         
     | 
| 
      
 717 
     | 
    
         
            +
                    .vec_dot_type             = GGML_TYPE_Q8_K,
         
     | 
| 
      
 718 
     | 
    
         
            +
                    .nrows                    = 1,
         
     | 
| 
      
 719 
     | 
    
         
            +
                },
         
     | 
| 
       681 
720 
     | 
    
         
             
                [GGML_TYPE_IQ1_S] = {
         
     | 
| 
       682 
721 
     | 
    
         
             
                    .type_name                = "iq1_s",
         
     | 
| 
       683 
722 
     | 
    
         
             
                    .blck_size                = QK_K,
         
     | 
| 
         @@ -702,6 +741,26 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { 
     | 
|
| 
       702 
741 
     | 
    
         
             
                    .vec_dot_type             = GGML_TYPE_Q8_0,
         
     | 
| 
       703 
742 
     | 
    
         
             
                    .nrows                    = 1,
         
     | 
| 
       704 
743 
     | 
    
         
             
                },
         
     | 
| 
      
 744 
     | 
    
         
            +
                [GGML_TYPE_IQ4_XS] = {
         
     | 
| 
      
 745 
     | 
    
         
            +
                    .type_name                = "iq4_xs",
         
     | 
| 
      
 746 
     | 
    
         
            +
            #if QK_K == 64
         
     | 
| 
      
 747 
     | 
    
         
            +
                    .blck_size                = QK4_NL,
         
     | 
| 
      
 748 
     | 
    
         
            +
            #else
         
     | 
| 
      
 749 
     | 
    
         
            +
                    .blck_size                = QK_K,
         
     | 
| 
      
 750 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 751 
     | 
    
         
            +
                    .type_size                = sizeof(block_iq4_xs),
         
     | 
| 
      
 752 
     | 
    
         
            +
                    .is_quantized             = true,
         
     | 
| 
      
 753 
     | 
    
         
            +
                    .to_float                 = (ggml_to_float_t) dequantize_row_iq4_xs,
         
     | 
| 
      
 754 
     | 
    
         
            +
                    .from_float               = quantize_row_iq4_xs,
         
     | 
| 
      
 755 
     | 
    
         
            +
                    .from_float_reference     = (ggml_from_float_t)quantize_row_iq4_xs_reference,
         
     | 
| 
      
 756 
     | 
    
         
            +
                    .vec_dot                  = ggml_vec_dot_iq4_xs_q8_K,
         
     | 
| 
      
 757 
     | 
    
         
            +
            #if QK_K == 64
         
     | 
| 
      
 758 
     | 
    
         
            +
                    .vec_dot_type             = GGML_TYPE_Q8_0,
         
     | 
| 
      
 759 
     | 
    
         
            +
            #else
         
     | 
| 
      
 760 
     | 
    
         
            +
                    .vec_dot_type             = GGML_TYPE_Q8_K,
         
     | 
| 
      
 761 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 762 
     | 
    
         
            +
                    .nrows                    = 1,
         
     | 
| 
      
 763 
     | 
    
         
            +
                },
         
     | 
| 
       705 
764 
     | 
    
         
             
                [GGML_TYPE_Q8_K] = {
         
     | 
| 
       706 
765 
     | 
    
         
             
                    .type_name                = "q8_K",
         
     | 
| 
       707 
766 
     | 
    
         
             
                    .blck_size                = QK_K,
         
     | 
| 
         @@ -1560,9 +1619,15 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp 
     | 
|
| 
       1560 
1619 
     | 
    
         
             
            inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
         
     | 
| 
       1561 
1620 
     | 
    
         
             
                uint16_t t;
         
     | 
| 
       1562 
1621 
     | 
    
         
             
                for (int i = 0; i < n; ++i) {
         
     | 
| 
       1563 
     | 
    
         
            -
                     
     | 
| 
       1564 
     | 
    
         
            -
             
     | 
| 
       1565 
     | 
    
         
            -
                     
     | 
| 
      
 1622 
     | 
    
         
            +
                    if (x[i] <= -10.0f) {
         
     | 
| 
      
 1623 
     | 
    
         
            +
                        y[i] = 0.0f;
         
     | 
| 
      
 1624 
     | 
    
         
            +
                    } else if (x[i] >= 10.0f) {
         
     | 
| 
      
 1625 
     | 
    
         
            +
                        y[i] = x[i];
         
     | 
| 
      
 1626 
     | 
    
         
            +
                    } else {
         
     | 
| 
      
 1627 
     | 
    
         
            +
                        ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
         
     | 
| 
      
 1628 
     | 
    
         
            +
                        memcpy(&t, &fp16, sizeof(uint16_t));
         
     | 
| 
      
 1629 
     | 
    
         
            +
                        y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
         
     | 
| 
      
 1630 
     | 
    
         
            +
                    }
         
     | 
| 
       1566 
1631 
     | 
    
         
             
                }
         
     | 
| 
       1567 
1632 
     | 
    
         
             
            }
         
     | 
| 
       1568 
1633 
     | 
    
         
             
            #else
         
     | 
| 
         @@ -1768,6 +1833,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { 
     | 
|
| 
       1768 
1833 
     | 
    
         
             
                "POOL_2D",
         
     | 
| 
       1769 
1834 
     | 
    
         
             
                "UPSCALE",
         
     | 
| 
       1770 
1835 
     | 
    
         
             
                "PAD",
         
     | 
| 
      
 1836 
     | 
    
         
            +
                "ARANGE",
         
     | 
| 
      
 1837 
     | 
    
         
            +
                "TIMESTEP_EMBEDDING",
         
     | 
| 
       1771 
1838 
     | 
    
         
             
                "ARGSORT",
         
     | 
| 
       1772 
1839 
     | 
    
         
             
                "LEAKY_RELU",
         
     | 
| 
       1773 
1840 
     | 
    
         | 
| 
         @@ -1796,7 +1863,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { 
     | 
|
| 
       1796 
1863 
     | 
    
         
             
                "CROSS_ENTROPY_LOSS_BACK",
         
     | 
| 
       1797 
1864 
     | 
    
         
             
            };
         
     | 
| 
       1798 
1865 
     | 
    
         | 
| 
       1799 
     | 
    
         
            -
            static_assert(GGML_OP_COUNT ==  
     | 
| 
      
 1866 
     | 
    
         
            +
            static_assert(GGML_OP_COUNT == 74, "GGML_OP_COUNT != 74");
         
     | 
| 
       1800 
1867 
     | 
    
         | 
| 
       1801 
1868 
     | 
    
         
             
            static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
         
     | 
| 
       1802 
1869 
     | 
    
         
             
                "none",
         
     | 
| 
         @@ -1854,6 +1921,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { 
     | 
|
| 
       1854 
1921 
     | 
    
         
             
                "pool_2d(x)",
         
     | 
| 
       1855 
1922 
     | 
    
         
             
                "upscale(x)",
         
     | 
| 
       1856 
1923 
     | 
    
         
             
                "pad(x)",
         
     | 
| 
      
 1924 
     | 
    
         
            +
                "arange(start, stop, step)",
         
     | 
| 
      
 1925 
     | 
    
         
            +
                "timestep_embedding(timesteps, dim, max_period)",
         
     | 
| 
       1857 
1926 
     | 
    
         
             
                "argsort(x)",
         
     | 
| 
       1858 
1927 
     | 
    
         
             
                "leaky_relu(x)",
         
     | 
| 
       1859 
1928 
     | 
    
         | 
| 
         @@ -1882,7 +1951,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { 
     | 
|
| 
       1882 
1951 
     | 
    
         
             
                "cross_entropy_loss_back(x,y)",
         
     | 
| 
       1883 
1952 
     | 
    
         
             
            };
         
     | 
| 
       1884 
1953 
     | 
    
         | 
| 
       1885 
     | 
    
         
            -
            static_assert(GGML_OP_COUNT ==  
     | 
| 
      
 1954 
     | 
    
         
            +
            static_assert(GGML_OP_COUNT == 74, "GGML_OP_COUNT != 74");
         
     | 
| 
       1886 
1955 
     | 
    
         | 
| 
       1887 
1956 
     | 
    
         
             
            static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
         
     | 
| 
       1888 
1957 
     | 
    
         | 
| 
         @@ -2085,7 +2154,10 @@ void ggml_numa_init(enum ggml_numa_strategy numa_flag) { 
     | 
|
| 
       2085 
2154 
     | 
    
         
             
                getcpu_ret = getcpu(¤t_cpu, &g_state.numa.current_node);
         
     | 
| 
       2086 
2155 
     | 
    
         
             
            #else
         
     | 
| 
       2087 
2156 
     | 
    
         
             
                // old glibc doesn't have a wrapper for this call. Fall back on direct syscall
         
     | 
| 
       2088 
     | 
    
         
            -
             
     | 
| 
      
 2157 
     | 
    
         
            +
            #   if !defined(SYS_getcpu) && defined(SYS_get_cpu)
         
     | 
| 
      
 2158 
     | 
    
         
            +
            #       define SYS_getcpu SYS_get_cpu // some older glibc versions use this name
         
     | 
| 
      
 2159 
     | 
    
         
            +
            #   endif
         
     | 
| 
      
 2160 
     | 
    
         
            +
                getcpu_ret = syscall(SYS_getcpu, ¤t_cpu, &g_state.numa.current_node);
         
     | 
| 
       2089 
2161 
     | 
    
         
             
            #endif
         
     | 
| 
       2090 
2162 
     | 
    
         | 
| 
       2091 
2163 
     | 
    
         
             
                if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1 || getcpu_ret != 0) {
         
     | 
| 
         @@ -2304,6 +2376,9 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) { 
     | 
|
| 
       2304 
2376 
     | 
    
         
             
                    case GGML_FTYPE_MOSTLY_IQ3_XXS:       wtype = GGML_TYPE_IQ3_XXS;  break;
         
     | 
| 
       2305 
2377 
     | 
    
         
             
                    case GGML_FTYPE_MOSTLY_IQ1_S:         wtype = GGML_TYPE_IQ1_S;    break;
         
     | 
| 
       2306 
2378 
     | 
    
         
             
                    case GGML_FTYPE_MOSTLY_IQ4_NL:        wtype = GGML_TYPE_IQ4_NL;   break;
         
     | 
| 
      
 2379 
     | 
    
         
            +
                    case GGML_FTYPE_MOSTLY_IQ4_XS:        wtype = GGML_TYPE_IQ4_XS;   break;
         
     | 
| 
      
 2380 
     | 
    
         
            +
                    case GGML_FTYPE_MOSTLY_IQ3_S:         wtype = GGML_TYPE_IQ3_S;    break;
         
     | 
| 
      
 2381 
     | 
    
         
            +
                    case GGML_FTYPE_MOSTLY_IQ2_S:         wtype = GGML_TYPE_IQ2_S;    break;
         
     | 
| 
       2307 
2382 
     | 
    
         
             
                    case GGML_FTYPE_UNKNOWN:              wtype = GGML_TYPE_COUNT; break;
         
     | 
| 
       2308 
2383 
     | 
    
         
             
                    case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
         
     | 
| 
       2309 
2384 
     | 
    
         
             
                }
         
     | 
| 
         @@ -2708,7 +2783,7 @@ static struct ggml_tensor * ggml_new_tensor_impl( 
     | 
|
| 
       2708 
2783 
     | 
    
         
             
                    }
         
     | 
| 
       2709 
2784 
     | 
    
         
             
                }
         
     | 
| 
       2710 
2785 
     | 
    
         | 
| 
       2711 
     | 
    
         
            -
                struct ggml_object * const obj_new = ggml_new_object(ctx,  
     | 
| 
      
 2786 
     | 
    
         
            +
                struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
         
     | 
| 
       2712 
2787 
     | 
    
         | 
| 
       2713 
2788 
     | 
    
         
             
                // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
         
     | 
| 
       2714 
2789 
     | 
    
         | 
| 
         @@ -2716,7 +2791,7 @@ static struct ggml_tensor * ggml_new_tensor_impl( 
     | 
|
| 
       2716 
2791 
     | 
    
         | 
| 
       2717 
2792 
     | 
    
         
             
                *result = (struct ggml_tensor) {
         
     | 
| 
       2718 
2793 
     | 
    
         
             
                    /*.type         =*/ type,
         
     | 
| 
       2719 
     | 
    
         
            -
                    /*.backend      =*/  
     | 
| 
      
 2794 
     | 
    
         
            +
                    /*.backend      =*/ GGML_BACKEND_TYPE_CPU,
         
     | 
| 
       2720 
2795 
     | 
    
         
             
                    /*.buffer       =*/ NULL,
         
     | 
| 
       2721 
2796 
     | 
    
         
             
                    /*.ne           =*/ { 1, 1, 1, 1 },
         
     | 
| 
       2722 
2797 
     | 
    
         
             
                    /*.nb           =*/ { 0, 0, 0, 0 },
         
     | 
| 
         @@ -2838,11 +2913,21 @@ static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_ 
     | 
|
| 
       2838 
2913 
     | 
    
         
             
                return ((const int32_t *)(tensor->op_params))[i];
         
     | 
| 
       2839 
2914 
     | 
    
         
             
            }
         
     | 
| 
       2840 
2915 
     | 
    
         | 
| 
      
 2916 
     | 
    
         
            +
            static float ggml_get_op_params_f32(const struct ggml_tensor * tensor, uint32_t i) {
         
     | 
| 
      
 2917 
     | 
    
         
            +
                assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
         
     | 
| 
      
 2918 
     | 
    
         
            +
                return ((const float *)(tensor->op_params))[i];
         
     | 
| 
      
 2919 
     | 
    
         
            +
            }
         
     | 
| 
      
 2920 
     | 
    
         
            +
             
     | 
| 
       2841 
2921 
     | 
    
         
             
            static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
         
     | 
| 
       2842 
2922 
     | 
    
         
             
                assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
         
     | 
| 
       2843 
2923 
     | 
    
         
             
                ((int32_t *)(tensor->op_params))[i] = value;
         
     | 
| 
       2844 
2924 
     | 
    
         
             
            }
         
     | 
| 
       2845 
2925 
     | 
    
         | 
| 
      
 2926 
     | 
    
         
            +
            static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, float value) {
         
     | 
| 
      
 2927 
     | 
    
         
            +
                assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
         
     | 
| 
      
 2928 
     | 
    
         
            +
                ((float *)(tensor->op_params))[i] = value;
         
     | 
| 
      
 2929 
     | 
    
         
            +
            }
         
     | 
| 
      
 2930 
     | 
    
         
            +
             
     | 
| 
       2846 
2931 
     | 
    
         
             
            struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
         
     | 
| 
       2847 
2932 
     | 
    
         
             
                memset(tensor->data, 0, ggml_nbytes(tensor));
         
     | 
| 
       2848 
2933 
     | 
    
         
             
                return tensor;
         
     | 
| 
         @@ -3289,7 +3374,7 @@ struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) { 
     | 
|
| 
       3289 
3374 
     | 
    
         
             
                char * const mem_buffer = ctx->mem_buffer;
         
     | 
| 
       3290 
3375 
     | 
    
         | 
| 
       3291 
3376 
     | 
    
         
             
                while (obj != NULL) {
         
     | 
| 
       3292 
     | 
    
         
            -
                    if (obj->type ==  
     | 
| 
      
 3377 
     | 
    
         
            +
                    if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
         
     | 
| 
       3293 
3378 
     | 
    
         
             
                        return (struct ggml_tensor *)(mem_buffer + obj->offs);
         
     | 
| 
       3294 
3379 
     | 
    
         
             
                    }
         
     | 
| 
       3295 
3380 
     | 
    
         | 
| 
         @@ -3306,7 +3391,7 @@ struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struc 
     | 
|
| 
       3306 
3391 
     | 
    
         
             
                char * const mem_buffer = ctx->mem_buffer;
         
     | 
| 
       3307 
3392 
     | 
    
         | 
| 
       3308 
3393 
     | 
    
         
             
                while (obj != NULL) {
         
     | 
| 
       3309 
     | 
    
         
            -
                    if (obj->type ==  
     | 
| 
      
 3394 
     | 
    
         
            +
                    if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
         
     | 
| 
       3310 
3395 
     | 
    
         
             
                        return (struct ggml_tensor *)(mem_buffer + obj->offs);
         
     | 
| 
       3311 
3396 
     | 
    
         
             
                    }
         
     | 
| 
       3312 
3397 
     | 
    
         | 
| 
         @@ -3322,7 +3407,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam 
     | 
|
| 
       3322 
3407 
     | 
    
         
             
                char * const mem_buffer = ctx->mem_buffer;
         
     | 
| 
       3323 
3408 
     | 
    
         | 
| 
       3324 
3409 
     | 
    
         
             
                while (obj != NULL) {
         
     | 
| 
       3325 
     | 
    
         
            -
                    if (obj->type ==  
     | 
| 
      
 3410 
     | 
    
         
            +
                    if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
         
     | 
| 
       3326 
3411 
     | 
    
         
             
                        struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
         
     | 
| 
       3327 
3412 
     | 
    
         
             
                        if (strcmp(cur->name, name) == 0) {
         
     | 
| 
       3328 
3413 
     | 
    
         
             
                            return cur;
         
     | 
| 
         @@ -5729,11 +5814,13 @@ struct ggml_tensor * ggml_pool_1d( 
     | 
|
| 
       5729 
5814 
     | 
    
         
             
                    is_node = true;
         
     | 
| 
       5730 
5815 
     | 
    
         
             
                }
         
     | 
| 
       5731 
5816 
     | 
    
         | 
| 
       5732 
     | 
    
         
            -
                const int64_t ne[ 
     | 
| 
      
 5817 
     | 
    
         
            +
                const int64_t ne[4] = {
         
     | 
| 
       5733 
5818 
     | 
    
         
             
                    ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
         
     | 
| 
       5734 
5819 
     | 
    
         
             
                    a->ne[1],
         
     | 
| 
      
 5820 
     | 
    
         
            +
                    a->ne[2],
         
     | 
| 
      
 5821 
     | 
    
         
            +
                    a->ne[3],
         
     | 
| 
       5735 
5822 
     | 
    
         
             
                };
         
     | 
| 
       5736 
     | 
    
         
            -
                struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,  
     | 
| 
      
 5823 
     | 
    
         
            +
                struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
         
     | 
| 
       5737 
5824 
     | 
    
         | 
| 
       5738 
5825 
     | 
    
         
             
                int32_t params[] = { op, k0, s0, p0 };
         
     | 
| 
       5739 
5826 
     | 
    
         
             
                ggml_set_op_params(result, params, sizeof(params));
         
     | 
| 
         @@ -5839,6 +5926,55 @@ struct ggml_tensor * ggml_upscale( 
     | 
|
| 
       5839 
5926 
     | 
    
         
             
                return ggml_upscale_impl(ctx, a, scale_factor);
         
     | 
| 
       5840 
5927 
     | 
    
         
             
            }
         
     | 
| 
       5841 
5928 
     | 
    
         | 
| 
      
 5929 
     | 
    
         
            +
            struct ggml_tensor * ggml_arange(
         
     | 
| 
      
 5930 
     | 
    
         
            +
                struct ggml_context * ctx,
         
     | 
| 
      
 5931 
     | 
    
         
            +
                float start,
         
     | 
| 
      
 5932 
     | 
    
         
            +
                float stop,
         
     | 
| 
      
 5933 
     | 
    
         
            +
                float step) {
         
     | 
| 
      
 5934 
     | 
    
         
            +
             
     | 
| 
      
 5935 
     | 
    
         
            +
                GGML_ASSERT(stop > start);
         
     | 
| 
      
 5936 
     | 
    
         
            +
             
     | 
| 
      
 5937 
     | 
    
         
            +
                const int64_t steps = (int64_t) ceilf((stop - start) / step);
         
     | 
| 
      
 5938 
     | 
    
         
            +
             
     | 
| 
      
 5939 
     | 
    
         
            +
                struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
         
     | 
| 
      
 5940 
     | 
    
         
            +
             
     | 
| 
      
 5941 
     | 
    
         
            +
                result->op = GGML_OP_ARANGE;
         
     | 
| 
      
 5942 
     | 
    
         
            +
                ggml_set_op_params_f32(result, 0, start);
         
     | 
| 
      
 5943 
     | 
    
         
            +
                ggml_set_op_params_f32(result, 1, stop);
         
     | 
| 
      
 5944 
     | 
    
         
            +
                ggml_set_op_params_f32(result, 2, step);
         
     | 
| 
      
 5945 
     | 
    
         
            +
             
     | 
| 
      
 5946 
     | 
    
         
            +
                return result;
         
     | 
| 
      
 5947 
     | 
    
         
            +
            }
         
     | 
| 
      
 5948 
     | 
    
         
            +
             
     | 
| 
      
 5949 
     | 
    
         
            +
            struct ggml_tensor * ggml_timestep_embedding(
         
     | 
| 
      
 5950 
     | 
    
         
            +
                        struct ggml_context * ctx,
         
     | 
| 
      
 5951 
     | 
    
         
            +
                        struct ggml_tensor  * timesteps,
         
     | 
| 
      
 5952 
     | 
    
         
            +
                        int                   dim,
         
     | 
| 
      
 5953 
     | 
    
         
            +
                        int                   max_period) {
         
     | 
| 
      
 5954 
     | 
    
         
            +
                bool is_node = false;
         
     | 
| 
      
 5955 
     | 
    
         
            +
             
     | 
| 
      
 5956 
     | 
    
         
            +
                if (timesteps->grad) {
         
     | 
| 
      
 5957 
     | 
    
         
            +
                    GGML_ASSERT(false); // TODO: implement backward
         
     | 
| 
      
 5958 
     | 
    
         
            +
                    is_node = true;
         
     | 
| 
      
 5959 
     | 
    
         
            +
                }
         
     | 
| 
      
 5960 
     | 
    
         
            +
             
     | 
| 
      
 5961 
     | 
    
         
            +
                int actual_dim = dim;
         
     | 
| 
      
 5962 
     | 
    
         
            +
                if (dim % 2 != 0) {
         
     | 
| 
      
 5963 
     | 
    
         
            +
                    actual_dim = dim + 1;
         
     | 
| 
      
 5964 
     | 
    
         
            +
                }
         
     | 
| 
      
 5965 
     | 
    
         
            +
             
     | 
| 
      
 5966 
     | 
    
         
            +
                struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
         
     | 
| 
      
 5967 
     | 
    
         
            +
             
     | 
| 
      
 5968 
     | 
    
         
            +
                result->op = GGML_OP_TIMESTEP_EMBEDDING;
         
     | 
| 
      
 5969 
     | 
    
         
            +
                ggml_set_op_params_i32(result, 0, dim);
         
     | 
| 
      
 5970 
     | 
    
         
            +
                ggml_set_op_params_i32(result, 1, max_period);
         
     | 
| 
      
 5971 
     | 
    
         
            +
             
     | 
| 
      
 5972 
     | 
    
         
            +
                result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
         
     | 
| 
      
 5973 
     | 
    
         
            +
                result->src[0] = timesteps;
         
     | 
| 
      
 5974 
     | 
    
         
            +
             
     | 
| 
      
 5975 
     | 
    
         
            +
                return result;
         
     | 
| 
      
 5976 
     | 
    
         
            +
            }
         
     | 
| 
      
 5977 
     | 
    
         
            +
             
     | 
| 
       5842 
5978 
     | 
    
         
             
            // ggml_argsort
         
     | 
| 
       5843 
5979 
     | 
    
         | 
| 
       5844 
5980 
     | 
    
         
             
            struct ggml_tensor * ggml_argsort(
         
     | 
| 
         @@ -5866,7 +6002,7 @@ struct ggml_tensor * ggml_top_k( 
     | 
|
| 
       5866 
6002 
     | 
    
         
             
                    int                   k) {
         
     | 
| 
       5867 
6003 
     | 
    
         
             
                GGML_ASSERT(a->ne[0] >= k);
         
     | 
| 
       5868 
6004 
     | 
    
         | 
| 
       5869 
     | 
    
         
            -
                struct ggml_tensor * result = ggml_argsort(ctx, a,  
     | 
| 
      
 6005 
     | 
    
         
            +
                struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
         
     | 
| 
       5870 
6006 
     | 
    
         | 
| 
       5871 
6007 
     | 
    
         
             
                result = ggml_view_4d(ctx, result,
         
     | 
| 
       5872 
6008 
     | 
    
         
             
                            k, result->ne[1], result->ne[2], result->ne[3],
         
     | 
| 
         @@ -6660,7 +6796,7 @@ static void ggml_compute_forward_dup_same_cont( 
     | 
|
| 
       6660 
6796 
     | 
    
         
             
                GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
         
     | 
| 
       6661 
6797 
     | 
    
         
             
                GGML_ASSERT(src0->type == dst->type);
         
     | 
| 
       6662 
6798 
     | 
    
         | 
| 
       6663 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 6799 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       6664 
6800 
     | 
    
         
             
                    return;
         
     | 
| 
       6665 
6801 
     | 
    
         
             
                }
         
     | 
| 
       6666 
6802 
     | 
    
         | 
| 
         @@ -6692,7 +6828,7 @@ static void ggml_compute_forward_dup_f16( 
     | 
|
| 
       6692 
6828 
     | 
    
         | 
| 
       6693 
6829 
     | 
    
         
             
                GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
         
     | 
| 
       6694 
6830 
     | 
    
         | 
| 
       6695 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 6831 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       6696 
6832 
     | 
    
         
             
                    return;
         
     | 
| 
       6697 
6833 
     | 
    
         
             
                }
         
     | 
| 
       6698 
6834 
     | 
    
         | 
| 
         @@ -6965,7 +7101,7 @@ static void ggml_compute_forward_dup_f32( 
     | 
|
| 
       6965 
7101 
     | 
    
         | 
| 
       6966 
7102 
     | 
    
         
             
                GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
         
     | 
| 
       6967 
7103 
     | 
    
         | 
| 
       6968 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 7104 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       6969 
7105 
     | 
    
         
             
                    return;
         
     | 
| 
       6970 
7106 
     | 
    
         
             
                }
         
     | 
| 
       6971 
7107 
     | 
    
         | 
| 
         @@ -7218,7 +7354,7 @@ static void ggml_compute_forward_dup_bytes( 
     | 
|
| 
       7218 
7354 
     | 
    
         
             
                GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
         
     | 
| 
       7219 
7355 
     | 
    
         
             
                GGML_ASSERT(src0->type == dst->type);
         
     | 
| 
       7220 
7356 
     | 
    
         | 
| 
       7221 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 7357 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       7222 
7358 
     | 
    
         
             
                    return;
         
     | 
| 
       7223 
7359 
     | 
    
         
             
                }
         
     | 
| 
       7224 
7360 
     | 
    
         | 
| 
         @@ -7398,7 +7534,7 @@ static void ggml_compute_forward_add_f32( 
     | 
|
| 
       7398 
7534 
     | 
    
         | 
| 
       7399 
7535 
     | 
    
         
             
                GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
         
     | 
| 
       7400 
7536 
     | 
    
         | 
| 
       7401 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 7537 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       7402 
7538 
     | 
    
         
             
                    return;
         
     | 
| 
       7403 
7539 
     | 
    
         
             
                }
         
     | 
| 
       7404 
7540 
     | 
    
         | 
| 
         @@ -7406,7 +7542,7 @@ static void ggml_compute_forward_add_f32( 
     | 
|
| 
       7406 
7542 
     | 
    
         
             
                const int nth = params->nth;
         
     | 
| 
       7407 
7543 
     | 
    
         | 
| 
       7408 
7544 
     | 
    
         
             
            #ifdef GGML_USE_CLBLAST
         
     | 
| 
       7409 
     | 
    
         
            -
                if (src1->backend ==  
     | 
| 
      
 7545 
     | 
    
         
            +
                if (src1->backend == GGML_BACKEND_TYPE_GPU) {
         
     | 
| 
       7410 
7546 
     | 
    
         
             
                    // TODO: OpenCL kernel support full broadcast
         
     | 
| 
       7411 
7547 
     | 
    
         
             
                    GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
         
     | 
| 
       7412 
7548 
     | 
    
         
             
                    if (ith == 0) {
         
     | 
| 
         @@ -7488,7 +7624,7 @@ static void ggml_compute_forward_add_f16_f32( 
     | 
|
| 
       7488 
7624 
     | 
    
         | 
| 
       7489 
7625 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
         
     | 
| 
       7490 
7626 
     | 
    
         | 
| 
       7491 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 7627 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       7492 
7628 
     | 
    
         
             
                    return;
         
     | 
| 
       7493 
7629 
     | 
    
         
             
                }
         
     | 
| 
       7494 
7630 
     | 
    
         | 
| 
         @@ -7567,7 +7703,7 @@ static void ggml_compute_forward_add_f16_f16( 
     | 
|
| 
       7567 
7703 
     | 
    
         | 
| 
       7568 
7704 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
         
     | 
| 
       7569 
7705 
     | 
    
         | 
| 
       7570 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 7706 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       7571 
7707 
     | 
    
         
             
                    return;
         
     | 
| 
       7572 
7708 
     | 
    
         
             
                }
         
     | 
| 
       7573 
7709 
     | 
    
         | 
| 
         @@ -7623,7 +7759,7 @@ static void ggml_compute_forward_add_q_f32( 
     | 
|
| 
       7623 
7759 
     | 
    
         | 
| 
       7624 
7760 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
         
     | 
| 
       7625 
7761 
     | 
    
         | 
| 
       7626 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 7762 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       7627 
7763 
     | 
    
         
             
                    return;
         
     | 
| 
       7628 
7764 
     | 
    
         
             
                }
         
     | 
| 
       7629 
7765 
     | 
    
         | 
| 
         @@ -7738,6 +7874,9 @@ static void ggml_compute_forward_add( 
     | 
|
| 
       7738 
7874 
     | 
    
         
             
                    case GGML_TYPE_IQ3_XXS:
         
     | 
| 
       7739 
7875 
     | 
    
         
             
                    case GGML_TYPE_IQ1_S:
         
     | 
| 
       7740 
7876 
     | 
    
         
             
                    case GGML_TYPE_IQ4_NL:
         
     | 
| 
      
 7877 
     | 
    
         
            +
                    case GGML_TYPE_IQ4_XS:
         
     | 
| 
      
 7878 
     | 
    
         
            +
                    case GGML_TYPE_IQ3_S:
         
     | 
| 
      
 7879 
     | 
    
         
            +
                    case GGML_TYPE_IQ2_S:
         
     | 
| 
       7741 
7880 
     | 
    
         
             
                        {
         
     | 
| 
       7742 
7881 
     | 
    
         
             
                            ggml_compute_forward_add_q_f32(params, dst);
         
     | 
| 
       7743 
7882 
     | 
    
         
             
                        } break;
         
     | 
| 
         @@ -7760,7 +7899,7 @@ static void ggml_compute_forward_add1_f32( 
     | 
|
| 
       7760 
7899 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       7761 
7900 
     | 
    
         
             
                GGML_ASSERT(ggml_is_scalar(src1));
         
     | 
| 
       7762 
7901 
     | 
    
         | 
| 
       7763 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 7902 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       7764 
7903 
     | 
    
         
             
                    return;
         
     | 
| 
       7765 
7904 
     | 
    
         
             
                }
         
     | 
| 
       7766 
7905 
     | 
    
         | 
| 
         @@ -7814,7 +7953,7 @@ static void ggml_compute_forward_add1_f16_f32( 
     | 
|
| 
       7814 
7953 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       7815 
7954 
     | 
    
         
             
                GGML_ASSERT(ggml_is_scalar(src1));
         
     | 
| 
       7816 
7955 
     | 
    
         | 
| 
       7817 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 7956 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       7818 
7957 
     | 
    
         
             
                    return;
         
     | 
| 
       7819 
7958 
     | 
    
         
             
                }
         
     | 
| 
       7820 
7959 
     | 
    
         | 
| 
         @@ -7866,7 +8005,7 @@ static void ggml_compute_forward_add1_f16_f16( 
     | 
|
| 
       7866 
8005 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       7867 
8006 
     | 
    
         
             
                GGML_ASSERT(ggml_is_scalar(src1));
         
     | 
| 
       7868 
8007 
     | 
    
         | 
| 
       7869 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8008 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       7870 
8009 
     | 
    
         
             
                    return;
         
     | 
| 
       7871 
8010 
     | 
    
         
             
                }
         
     | 
| 
       7872 
8011 
     | 
    
         | 
| 
         @@ -7918,7 +8057,7 @@ static void ggml_compute_forward_add1_q_f32( 
     | 
|
| 
       7918 
8057 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       7919 
8058 
     | 
    
         
             
                GGML_ASSERT(ggml_is_scalar(src1));
         
     | 
| 
       7920 
8059 
     | 
    
         | 
| 
       7921 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8060 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       7922 
8061 
     | 
    
         
             
                    return;
         
     | 
| 
       7923 
8062 
     | 
    
         
             
                }
         
     | 
| 
       7924 
8063 
     | 
    
         | 
| 
         @@ -8017,6 +8156,9 @@ static void ggml_compute_forward_add1( 
     | 
|
| 
       8017 
8156 
     | 
    
         
             
                    case GGML_TYPE_IQ3_XXS:
         
     | 
| 
       8018 
8157 
     | 
    
         
             
                    case GGML_TYPE_IQ1_S:
         
     | 
| 
       8019 
8158 
     | 
    
         
             
                    case GGML_TYPE_IQ4_NL:
         
     | 
| 
      
 8159 
     | 
    
         
            +
                    case GGML_TYPE_IQ4_XS:
         
     | 
| 
      
 8160 
     | 
    
         
            +
                    case GGML_TYPE_IQ3_S:
         
     | 
| 
      
 8161 
     | 
    
         
            +
                    case GGML_TYPE_IQ2_S:
         
     | 
| 
       8020 
8162 
     | 
    
         
             
                        {
         
     | 
| 
       8021 
8163 
     | 
    
         
             
                            ggml_compute_forward_add1_q_f32(params, dst);
         
     | 
| 
       8022 
8164 
     | 
    
         
             
                        } break;
         
     | 
| 
         @@ -8047,7 +8189,7 @@ static void ggml_compute_forward_acc_f32( 
     | 
|
| 
       8047 
8189 
     | 
    
         
             
                size_t offset  = ((int32_t *) dst->op_params)[3];
         
     | 
| 
       8048 
8190 
     | 
    
         
             
                bool   inplace = (bool) ((int32_t *) dst->op_params)[4];
         
     | 
| 
       8049 
8191 
     | 
    
         | 
| 
       8050 
     | 
    
         
            -
                if (!inplace && (params->type ==  
     | 
| 
      
 8192 
     | 
    
         
            +
                if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
         
     | 
| 
       8051 
8193 
     | 
    
         
             
                    if (params->ith != 0) {
         
     | 
| 
       8052 
8194 
     | 
    
         
             
                        return;
         
     | 
| 
       8053 
8195 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -8059,7 +8201,7 @@ static void ggml_compute_forward_acc_f32( 
     | 
|
| 
       8059 
8201 
     | 
    
         
             
                        ggml_nbytes(dst));
         
     | 
| 
       8060 
8202 
     | 
    
         
             
                }
         
     | 
| 
       8061 
8203 
     | 
    
         | 
| 
       8062 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8204 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8063 
8205 
     | 
    
         
             
                    return;
         
     | 
| 
       8064 
8206 
     | 
    
         
             
                }
         
     | 
| 
       8065 
8207 
     | 
    
         | 
| 
         @@ -8141,6 +8283,9 @@ static void ggml_compute_forward_acc( 
     | 
|
| 
       8141 
8283 
     | 
    
         
             
                    case GGML_TYPE_IQ3_XXS:
         
     | 
| 
       8142 
8284 
     | 
    
         
             
                    case GGML_TYPE_IQ1_S:
         
     | 
| 
       8143 
8285 
     | 
    
         
             
                    case GGML_TYPE_IQ4_NL:
         
     | 
| 
      
 8286 
     | 
    
         
            +
                    case GGML_TYPE_IQ4_XS:
         
     | 
| 
      
 8287 
     | 
    
         
            +
                    case GGML_TYPE_IQ3_S:
         
     | 
| 
      
 8288 
     | 
    
         
            +
                    case GGML_TYPE_IQ2_S:
         
     | 
| 
       8144 
8289 
     | 
    
         
             
                    default:
         
     | 
| 
       8145 
8290 
     | 
    
         
             
                        {
         
     | 
| 
       8146 
8291 
     | 
    
         
             
                            GGML_ASSERT(false);
         
     | 
| 
         @@ -8160,7 +8305,7 @@ static void ggml_compute_forward_sub_f32( 
     | 
|
| 
       8160 
8305 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       8161 
8306 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
         
     | 
| 
       8162 
8307 
     | 
    
         | 
| 
       8163 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8308 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8164 
8309 
     | 
    
         
             
                    return;
         
     | 
| 
       8165 
8310 
     | 
    
         
             
                }
         
     | 
| 
       8166 
8311 
     | 
    
         | 
| 
         @@ -8241,14 +8386,14 @@ static void ggml_compute_forward_mul_f32( 
     | 
|
| 
       8241 
8386 
     | 
    
         | 
| 
       8242 
8387 
     | 
    
         
             
                GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
         
     | 
| 
       8243 
8388 
     | 
    
         | 
| 
       8244 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8389 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8245 
8390 
     | 
    
         
             
                    return;
         
     | 
| 
       8246 
8391 
     | 
    
         
             
                }
         
     | 
| 
       8247 
8392 
     | 
    
         
             
                const int ith = params->ith;
         
     | 
| 
       8248 
8393 
     | 
    
         
             
                const int nth = params->nth;
         
     | 
| 
       8249 
8394 
     | 
    
         | 
| 
       8250 
8395 
     | 
    
         
             
            #if defined(GGML_USE_CLBLAST)
         
     | 
| 
       8251 
     | 
    
         
            -
                if (src1->backend ==  
     | 
| 
      
 8396 
     | 
    
         
            +
                if (src1->backend == GGML_BACKEND_TYPE_GPU) {
         
     | 
| 
       8252 
8397 
     | 
    
         
             
                    // TODO: OpenCL kernel support full broadcast
         
     | 
| 
       8253 
8398 
     | 
    
         
             
                    GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
         
     | 
| 
       8254 
8399 
     | 
    
         
             
                    if (ith == 0) {
         
     | 
| 
         @@ -8349,7 +8494,7 @@ static void ggml_compute_forward_div_f32( 
     | 
|
| 
       8349 
8494 
     | 
    
         | 
| 
       8350 
8495 
     | 
    
         
             
                GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
         
     | 
| 
       8351 
8496 
     | 
    
         | 
| 
       8352 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8497 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8353 
8498 
     | 
    
         
             
                    return;
         
     | 
| 
       8354 
8499 
     | 
    
         
             
                }
         
     | 
| 
       8355 
8500 
     | 
    
         | 
| 
         @@ -8444,7 +8589,7 @@ static void ggml_compute_forward_sqr_f32( 
     | 
|
| 
       8444 
8589 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       8445 
8590 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       8446 
8591 
     | 
    
         | 
| 
       8447 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8592 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8448 
8593 
     | 
    
         
             
                    return;
         
     | 
| 
       8449 
8594 
     | 
    
         
             
                }
         
     | 
| 
       8450 
8595 
     | 
    
         | 
| 
         @@ -8490,7 +8635,7 @@ static void ggml_compute_forward_sqrt_f32( 
     | 
|
| 
       8490 
8635 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       8491 
8636 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       8492 
8637 
     | 
    
         | 
| 
       8493 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8638 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8494 
8639 
     | 
    
         
             
                    return;
         
     | 
| 
       8495 
8640 
     | 
    
         
             
                }
         
     | 
| 
       8496 
8641 
     | 
    
         | 
| 
         @@ -8536,7 +8681,7 @@ static void ggml_compute_forward_log_f32( 
     | 
|
| 
       8536 
8681 
     | 
    
         
             
                GGML_ASSERT(params->ith == 0);
         
     | 
| 
       8537 
8682 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       8538 
8683 
     | 
    
         | 
| 
       8539 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8684 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8540 
8685 
     | 
    
         
             
                    return;
         
     | 
| 
       8541 
8686 
     | 
    
         
             
                }
         
     | 
| 
       8542 
8687 
     | 
    
         | 
| 
         @@ -8582,7 +8727,7 @@ static void ggml_compute_forward_sum_f32( 
     | 
|
| 
       8582 
8727 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       8583 
8728 
     | 
    
         
             
                assert(ggml_is_scalar(dst));
         
     | 
| 
       8584 
8729 
     | 
    
         | 
| 
       8585 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8730 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8586 
8731 
     | 
    
         
             
                    return;
         
     | 
| 
       8587 
8732 
     | 
    
         
             
                }
         
     | 
| 
       8588 
8733 
     | 
    
         | 
| 
         @@ -8617,7 +8762,7 @@ static void ggml_compute_forward_sum_f16( 
     | 
|
| 
       8617 
8762 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       8618 
8763 
     | 
    
         
             
                assert(ggml_is_scalar(dst));
         
     | 
| 
       8619 
8764 
     | 
    
         | 
| 
       8620 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8765 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8621 
8766 
     | 
    
         
             
                    return;
         
     | 
| 
       8622 
8767 
     | 
    
         
             
                }
         
     | 
| 
       8623 
8768 
     | 
    
         | 
| 
         @@ -8674,7 +8819,7 @@ static void ggml_compute_forward_sum_rows_f32( 
     | 
|
| 
       8674 
8819 
     | 
    
         | 
| 
       8675 
8820 
     | 
    
         
             
                GGML_ASSERT(params->ith == 0);
         
     | 
| 
       8676 
8821 
     | 
    
         | 
| 
       8677 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8822 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8678 
8823 
     | 
    
         
             
                    return;
         
     | 
| 
       8679 
8824 
     | 
    
         
             
                }
         
     | 
| 
       8680 
8825 
     | 
    
         | 
| 
         @@ -8729,7 +8874,7 @@ static void ggml_compute_forward_mean_f32( 
     | 
|
| 
       8729 
8874 
     | 
    
         | 
| 
       8730 
8875 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       8731 
8876 
     | 
    
         | 
| 
       8732 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8877 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8733 
8878 
     | 
    
         
             
                    return;
         
     | 
| 
       8734 
8879 
     | 
    
         
             
                }
         
     | 
| 
       8735 
8880 
     | 
    
         | 
| 
         @@ -8788,7 +8933,7 @@ static void ggml_compute_forward_argmax_f32( 
     | 
|
| 
       8788 
8933 
     | 
    
         | 
| 
       8789 
8934 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       8790 
8935 
     | 
    
         | 
| 
       8791 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8936 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8792 
8937 
     | 
    
         
             
                    return;
         
     | 
| 
       8793 
8938 
     | 
    
         
             
                }
         
     | 
| 
       8794 
8939 
     | 
    
         | 
| 
         @@ -8839,7 +8984,7 @@ static void ggml_compute_forward_repeat_f32( 
     | 
|
| 
       8839 
8984 
     | 
    
         
             
                GGML_ASSERT(params->ith == 0);
         
     | 
| 
       8840 
8985 
     | 
    
         
             
                GGML_ASSERT(ggml_can_repeat(src0, dst));
         
     | 
| 
       8841 
8986 
     | 
    
         | 
| 
       8842 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 8987 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8843 
8988 
     | 
    
         
             
                    return;
         
     | 
| 
       8844 
8989 
     | 
    
         
             
                }
         
     | 
| 
       8845 
8990 
     | 
    
         | 
| 
         @@ -8884,7 +9029,7 @@ static void ggml_compute_forward_repeat_f16( 
     | 
|
| 
       8884 
9029 
     | 
    
         
             
                GGML_ASSERT(params->ith == 0);
         
     | 
| 
       8885 
9030 
     | 
    
         
             
                GGML_ASSERT(ggml_can_repeat(src0, dst));
         
     | 
| 
       8886 
9031 
     | 
    
         | 
| 
       8887 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9032 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8888 
9033 
     | 
    
         
             
                    return;
         
     | 
| 
       8889 
9034 
     | 
    
         
             
                }
         
     | 
| 
       8890 
9035 
     | 
    
         | 
| 
         @@ -8958,7 +9103,7 @@ static void ggml_compute_forward_repeat_back_f32( 
     | 
|
| 
       8958 
9103 
     | 
    
         
             
                GGML_ASSERT(params->ith == 0);
         
     | 
| 
       8959 
9104 
     | 
    
         
             
                GGML_ASSERT(ggml_can_repeat(dst, src0));
         
     | 
| 
       8960 
9105 
     | 
    
         | 
| 
       8961 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9106 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       8962 
9107 
     | 
    
         
             
                    return;
         
     | 
| 
       8963 
9108 
     | 
    
         
             
                }
         
     | 
| 
       8964 
9109 
     | 
    
         | 
| 
         @@ -9035,7 +9180,7 @@ static void ggml_compute_forward_concat_f32( 
     | 
|
| 
       9035 
9180 
     | 
    
         
             
                const struct ggml_tensor * src0 = dst->src[0];
         
     | 
| 
       9036 
9181 
     | 
    
         
             
                const struct ggml_tensor * src1 = dst->src[1];
         
     | 
| 
       9037 
9182 
     | 
    
         | 
| 
       9038 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9183 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9039 
9184 
     | 
    
         
             
                    return;
         
     | 
| 
       9040 
9185 
     | 
    
         
             
                }
         
     | 
| 
       9041 
9186 
     | 
    
         | 
| 
         @@ -9107,7 +9252,7 @@ static void ggml_compute_forward_abs_f32( 
     | 
|
| 
       9107 
9252 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       9108 
9253 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9109 
9254 
     | 
    
         | 
| 
       9110 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9255 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9111 
9256 
     | 
    
         
             
                    return;
         
     | 
| 
       9112 
9257 
     | 
    
         
             
                }
         
     | 
| 
       9113 
9258 
     | 
    
         | 
| 
         @@ -9153,7 +9298,7 @@ static void ggml_compute_forward_sgn_f32( 
     | 
|
| 
       9153 
9298 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       9154 
9299 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9155 
9300 
     | 
    
         | 
| 
       9156 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9301 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9157 
9302 
     | 
    
         
             
                    return;
         
     | 
| 
       9158 
9303 
     | 
    
         
             
                }
         
     | 
| 
       9159 
9304 
     | 
    
         | 
| 
         @@ -9199,7 +9344,7 @@ static void ggml_compute_forward_neg_f32( 
     | 
|
| 
       9199 
9344 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       9200 
9345 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9201 
9346 
     | 
    
         | 
| 
       9202 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9347 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9203 
9348 
     | 
    
         
             
                    return;
         
     | 
| 
       9204 
9349 
     | 
    
         
             
                }
         
     | 
| 
       9205 
9350 
     | 
    
         | 
| 
         @@ -9245,7 +9390,7 @@ static void ggml_compute_forward_step_f32( 
     | 
|
| 
       9245 
9390 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       9246 
9391 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9247 
9392 
     | 
    
         | 
| 
       9248 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9393 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9249 
9394 
     | 
    
         
             
                    return;
         
     | 
| 
       9250 
9395 
     | 
    
         
             
                }
         
     | 
| 
       9251 
9396 
     | 
    
         | 
| 
         @@ -9291,7 +9436,7 @@ static void ggml_compute_forward_tanh_f32( 
     | 
|
| 
       9291 
9436 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       9292 
9437 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9293 
9438 
     | 
    
         | 
| 
       9294 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9439 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9295 
9440 
     | 
    
         
             
                    return;
         
     | 
| 
       9296 
9441 
     | 
    
         
             
                }
         
     | 
| 
       9297 
9442 
     | 
    
         | 
| 
         @@ -9337,7 +9482,7 @@ static void ggml_compute_forward_elu_f32( 
     | 
|
| 
       9337 
9482 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       9338 
9483 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9339 
9484 
     | 
    
         | 
| 
       9340 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9485 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9341 
9486 
     | 
    
         
             
                    return;
         
     | 
| 
       9342 
9487 
     | 
    
         
             
                }
         
     | 
| 
       9343 
9488 
     | 
    
         | 
| 
         @@ -9383,7 +9528,7 @@ static void ggml_compute_forward_relu_f32( 
     | 
|
| 
       9383 
9528 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       9384 
9529 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9385 
9530 
     | 
    
         | 
| 
       9386 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9531 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9387 
9532 
     | 
    
         
             
                    return;
         
     | 
| 
       9388 
9533 
     | 
    
         
             
                }
         
     | 
| 
       9389 
9534 
     | 
    
         | 
| 
         @@ -9430,7 +9575,7 @@ static void ggml_compute_forward_gelu_f32( 
     | 
|
| 
       9430 
9575 
     | 
    
         
             
                GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
         
     | 
| 
       9431 
9576 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9432 
9577 
     | 
    
         | 
| 
       9433 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9578 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9434 
9579 
     | 
    
         
             
                    return;
         
     | 
| 
       9435 
9580 
     | 
    
         
             
                }
         
     | 
| 
       9436 
9581 
     | 
    
         | 
| 
         @@ -9493,7 +9638,7 @@ static void ggml_compute_forward_gelu_quick_f32( 
     | 
|
| 
       9493 
9638 
     | 
    
         
             
                GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
         
     | 
| 
       9494 
9639 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9495 
9640 
     | 
    
         | 
| 
       9496 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9641 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9497 
9642 
     | 
    
         
             
                    return;
         
     | 
| 
       9498 
9643 
     | 
    
         
             
                }
         
     | 
| 
       9499 
9644 
     | 
    
         | 
| 
         @@ -9556,7 +9701,7 @@ static void ggml_compute_forward_silu_f32( 
     | 
|
| 
       9556 
9701 
     | 
    
         
             
                GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
         
     | 
| 
       9557 
9702 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9558 
9703 
     | 
    
         | 
| 
       9559 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9704 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9560 
9705 
     | 
    
         
             
                    return;
         
     | 
| 
       9561 
9706 
     | 
    
         
             
                }
         
     | 
| 
       9562 
9707 
     | 
    
         | 
| 
         @@ -9617,7 +9762,7 @@ static void ggml_compute_forward_leaky_relu_f32( 
     | 
|
| 
       9617 
9762 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       9618 
9763 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9619 
9764 
     | 
    
         | 
| 
       9620 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9765 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9621 
9766 
     | 
    
         
             
                    return;
         
     | 
| 
       9622 
9767 
     | 
    
         
             
                }
         
     | 
| 
       9623 
9768 
     | 
    
         | 
| 
         @@ -9670,7 +9815,7 @@ static void ggml_compute_forward_silu_back_f32( 
     | 
|
| 
       9670 
9815 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9671 
9816 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, grad));
         
     | 
| 
       9672 
9817 
     | 
    
         | 
| 
       9673 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9818 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9674 
9819 
     | 
    
         
             
                    return;
         
     | 
| 
       9675 
9820 
     | 
    
         
             
                }
         
     | 
| 
       9676 
9821 
     | 
    
         | 
| 
         @@ -9732,7 +9877,7 @@ static void ggml_compute_forward_hardswish_f32( 
     | 
|
| 
       9732 
9877 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       9733 
9878 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9734 
9879 
     | 
    
         | 
| 
       9735 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9880 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9736 
9881 
     | 
    
         
             
                    return;
         
     | 
| 
       9737 
9882 
     | 
    
         
             
                }
         
     | 
| 
       9738 
9883 
     | 
    
         | 
| 
         @@ -9775,7 +9920,7 @@ static void ggml_compute_forward_hardsigmoid_f32( 
     | 
|
| 
       9775 
9920 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       9776 
9921 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9777 
9922 
     | 
    
         | 
| 
       9778 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9923 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9779 
9924 
     | 
    
         
             
                    return;
         
     | 
| 
       9780 
9925 
     | 
    
         
             
                }
         
     | 
| 
       9781 
9926 
     | 
    
         | 
| 
         @@ -9821,7 +9966,7 @@ static void ggml_compute_forward_norm_f32( 
     | 
|
| 
       9821 
9966 
     | 
    
         | 
| 
       9822 
9967 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9823 
9968 
     | 
    
         | 
| 
       9824 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 9969 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9825 
9970 
     | 
    
         
             
                    return;
         
     | 
| 
       9826 
9971 
     | 
    
         
             
                }
         
     | 
| 
       9827 
9972 
     | 
    
         | 
| 
         @@ -9896,7 +10041,7 @@ static void ggml_compute_forward_rms_norm_f32( 
     | 
|
| 
       9896 
10041 
     | 
    
         | 
| 
       9897 
10042 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       9898 
10043 
     | 
    
         | 
| 
       9899 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 10044 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9900 
10045 
     | 
    
         
             
                    return;
         
     | 
| 
       9901 
10046 
     | 
    
         
             
                }
         
     | 
| 
       9902 
10047 
     | 
    
         | 
| 
         @@ -9967,7 +10112,7 @@ static void ggml_compute_forward_rms_norm_back_f32( 
     | 
|
| 
       9967 
10112 
     | 
    
         | 
| 
       9968 
10113 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
         
     | 
| 
       9969 
10114 
     | 
    
         | 
| 
       9970 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 10115 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       9971 
10116 
     | 
    
         
             
                    return;
         
     | 
| 
       9972 
10117 
     | 
    
         
             
                }
         
     | 
| 
       9973 
10118 
     | 
    
         | 
| 
         @@ -10145,7 +10290,7 @@ static void ggml_compute_forward_group_norm_f32( 
     | 
|
| 
       10145 
10290 
     | 
    
         | 
| 
       10146 
10291 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       10147 
10292 
     | 
    
         | 
| 
       10148 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 10293 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       10149 
10294 
     | 
    
         
             
                    return;
         
     | 
| 
       10150 
10295 
     | 
    
         
             
                }
         
     | 
| 
       10151 
10296 
     | 
    
         | 
| 
         @@ -10163,7 +10308,7 @@ static void ggml_compute_forward_group_norm_f32( 
     | 
|
| 
       10163 
10308 
     | 
    
         
             
                int n_channels = src0->ne[2];
         
     | 
| 
       10164 
10309 
     | 
    
         
             
                int n_groups = dst->op_params[0];
         
     | 
| 
       10165 
10310 
     | 
    
         
             
                int n_channels_per_group = (n_channels + n_groups - 1) / n_groups;
         
     | 
| 
       10166 
     | 
    
         
            -
                for (int i = ith; i < n_groups; i+=nth) {
         
     | 
| 
      
 10311 
     | 
    
         
            +
                for (int i = ith; i < n_groups; i += nth) {
         
     | 
| 
       10167 
10312 
     | 
    
         
             
                    int start = i * n_channels_per_group;
         
     | 
| 
       10168 
10313 
     | 
    
         
             
                    int end = start + n_channels_per_group;
         
     | 
| 
       10169 
10314 
     | 
    
         
             
                    if (end > n_channels) {
         
     | 
| 
         @@ -10177,28 +10322,32 @@ static void ggml_compute_forward_group_norm_f32( 
     | 
|
| 
       10177 
10322 
     | 
    
         
             
                            for (int64_t i01 = 0; i01 < ne01; i01++) {
         
     | 
| 
       10178 
10323 
     | 
    
         
             
                                const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03);
         
     | 
| 
       10179 
10324 
     | 
    
         | 
| 
      
 10325 
     | 
    
         
            +
                                ggml_float sumr = 0.0;
         
     | 
| 
       10180 
10326 
     | 
    
         
             
                                for (int64_t i00 = 0; i00 < ne00; i00++) {
         
     | 
| 
       10181 
     | 
    
         
            -
                                     
     | 
| 
      
 10327 
     | 
    
         
            +
                                    sumr += (ggml_float)x[i00];
         
     | 
| 
       10182 
10328 
     | 
    
         
             
                                }
         
     | 
| 
      
 10329 
     | 
    
         
            +
                                sum += sumr;
         
     | 
| 
       10183 
10330 
     | 
    
         
             
                            }
         
     | 
| 
       10184 
10331 
     | 
    
         
             
                        }
         
     | 
| 
       10185 
     | 
    
         
            -
                        float mean = sum / (ne00 * ne01 * step);
         
     | 
| 
       10186 
     | 
    
         
            -
                        ggml_float sum2 = 0.0;
         
     | 
| 
      
 10332 
     | 
    
         
            +
                        const float mean = sum / (ne00 * ne01 * step);
         
     | 
| 
       10187 
10333 
     | 
    
         | 
| 
      
 10334 
     | 
    
         
            +
                        ggml_float sum2 = 0.0;
         
     | 
| 
       10188 
10335 
     | 
    
         
             
                        for (int64_t i02 = start; i02 < end; i02++) {
         
     | 
| 
       10189 
10336 
     | 
    
         
             
                            for (int64_t i01 = 0; i01 < ne01; i01++) {
         
     | 
| 
       10190 
10337 
     | 
    
         
             
                                const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03);
         
     | 
| 
       10191 
10338 
     | 
    
         | 
| 
       10192 
10339 
     | 
    
         
             
                                float * y = (float *)((char *) dst->data + i01 * nb1 + i02 * nb2 + i03 * nb3);
         
     | 
| 
       10193 
10340 
     | 
    
         | 
| 
      
 10341 
     | 
    
         
            +
                                ggml_float sumr = 0.0;
         
     | 
| 
       10194 
10342 
     | 
    
         
             
                                for (int64_t i00 = 0; i00 < ne00; i00++) {
         
     | 
| 
       10195 
10343 
     | 
    
         
             
                                    float v = x[i00] - mean;
         
     | 
| 
       10196 
10344 
     | 
    
         
             
                                    y[i00] = v;
         
     | 
| 
       10197 
     | 
    
         
            -
                                     
     | 
| 
      
 10345 
     | 
    
         
            +
                                    sumr += (ggml_float)(v * v);
         
     | 
| 
       10198 
10346 
     | 
    
         
             
                                }
         
     | 
| 
      
 10347 
     | 
    
         
            +
                                sum2 += sumr;
         
     | 
| 
       10199 
10348 
     | 
    
         
             
                            }
         
     | 
| 
       10200 
10349 
     | 
    
         
             
                        }
         
     | 
| 
       10201 
     | 
    
         
            -
                        float variance = sum2 / (ne00 * ne01 * step);
         
     | 
| 
      
 10350 
     | 
    
         
            +
                        const float variance = sum2 / (ne00 * ne01 * step);
         
     | 
| 
       10202 
10351 
     | 
    
         
             
                        const float scale = 1.0f / sqrtf(variance + eps);
         
     | 
| 
       10203 
10352 
     | 
    
         | 
| 
       10204 
10353 
     | 
    
         
             
                        for (int64_t i02 = start; i02 < end; i02++) {
         
     | 
| 
         @@ -10312,7 +10461,7 @@ static void ggml_compute_forward_mul_mat( 
     | 
|
| 
       10312 
10461 
     | 
    
         | 
| 
       10313 
10462 
     | 
    
         
             
            #if defined(GGML_USE_CLBLAST)
         
     | 
| 
       10314 
10463 
     | 
    
         
             
                if (ggml_cl_can_mul_mat(src0, src1, dst)) {
         
     | 
| 
       10315 
     | 
    
         
            -
                    if (params->ith == 0 && params->type ==  
     | 
| 
      
 10464 
     | 
    
         
            +
                    if (params->ith == 0 && params->type == GGML_TASK_TYPE_COMPUTE) {
         
     | 
| 
       10316 
10465 
     | 
    
         
             
                        ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
         
     | 
| 
       10317 
10466 
     | 
    
         
             
                    }
         
     | 
| 
       10318 
10467 
     | 
    
         
             
                    return;
         
     | 
| 
         @@ -10325,7 +10474,7 @@ static void ggml_compute_forward_mul_mat( 
     | 
|
| 
       10325 
10474 
     | 
    
         
             
                    const size_t  desired_wsize = ne13*ne12*ne_plane*sizeof(float);
         
     | 
| 
       10326 
10475 
     | 
    
         
             
                    UNUSED(desired_wsize);
         
     | 
| 
       10327 
10476 
     | 
    
         | 
| 
       10328 
     | 
    
         
            -
                    if (params->type ==  
     | 
| 
      
 10477 
     | 
    
         
            +
                    if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       10329 
10478 
     | 
    
         
             
                        if (type != GGML_TYPE_F32) {
         
     | 
| 
       10330 
10479 
     | 
    
         
             
                            assert(params->wsize >= desired_wsize);
         
     | 
| 
       10331 
10480 
     | 
    
         
             
                            // parallelize by src0 rows
         
     | 
| 
         @@ -10348,7 +10497,7 @@ static void ggml_compute_forward_mul_mat( 
     | 
|
| 
       10348 
10497 
     | 
    
         
             
                        return;
         
     | 
| 
       10349 
10498 
     | 
    
         
             
                    }
         
     | 
| 
       10350 
10499 
     | 
    
         | 
| 
       10351 
     | 
    
         
            -
                    if (params->type ==  
     | 
| 
      
 10500 
     | 
    
         
            +
                    if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       10352 
10501 
     | 
    
         
             
                        return;
         
     | 
| 
       10353 
10502 
     | 
    
         
             
                    }
         
     | 
| 
       10354 
10503 
     | 
    
         | 
| 
         @@ -10386,7 +10535,7 @@ static void ggml_compute_forward_mul_mat( 
     | 
|
| 
       10386 
10535 
     | 
    
         
             
                }
         
     | 
| 
       10387 
10536 
     | 
    
         
             
            #endif
         
     | 
| 
       10388 
10537 
     | 
    
         | 
| 
       10389 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 10538 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       10390 
10539 
     | 
    
         
             
                    if (ith != 0) {
         
     | 
| 
       10391 
10540 
     | 
    
         
             
                        return;
         
     | 
| 
       10392 
10541 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -10410,7 +10559,7 @@ static void ggml_compute_forward_mul_mat( 
     | 
|
| 
       10410 
10559 
     | 
    
         
             
                    return;
         
     | 
| 
       10411 
10560 
     | 
    
         
             
                }
         
     | 
| 
       10412 
10561 
     | 
    
         | 
| 
       10413 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 10562 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       10414 
10563 
     | 
    
         
             
                    return;
         
     | 
| 
       10415 
10564 
     | 
    
         
             
                }
         
     | 
| 
       10416 
10565 
     | 
    
         | 
| 
         @@ -10567,7 +10716,7 @@ static void ggml_compute_forward_mul_mat_id( 
     | 
|
| 
       10567 
10716 
     | 
    
         | 
| 
       10568 
10717 
     | 
    
         
             
                #define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
         
     | 
| 
       10569 
10718 
     | 
    
         | 
| 
       10570 
     | 
    
         
            -
               if (params->type ==  
     | 
| 
      
 10719 
     | 
    
         
            +
               if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       10571 
10720 
     | 
    
         
             
                    if (ith != 0) {
         
     | 
| 
       10572 
10721 
     | 
    
         
             
                        return;
         
     | 
| 
       10573 
10722 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -10604,7 +10753,7 @@ static void ggml_compute_forward_mul_mat_id( 
     | 
|
| 
       10604 
10753 
     | 
    
         
             
                    return;
         
     | 
| 
       10605 
10754 
     | 
    
         
             
                }
         
     | 
| 
       10606 
10755 
     | 
    
         | 
| 
       10607 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 10756 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       10608 
10757 
     | 
    
         
             
                    return;
         
     | 
| 
       10609 
10758 
     | 
    
         
             
                }
         
     | 
| 
       10610 
10759 
     | 
    
         | 
| 
         @@ -10752,7 +10901,7 @@ static void ggml_compute_forward_out_prod_f32( 
     | 
|
| 
       10752 
10901 
     | 
    
         
             
                    (ggml_is_contiguous(src1) || ggml_is_transposed(src1));
         
     | 
| 
       10753 
10902 
     | 
    
         
             
            #endif
         
     | 
| 
       10754 
10903 
     | 
    
         | 
| 
       10755 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 10904 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       10756 
10905 
     | 
    
         
             
            #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
         
     | 
| 
       10757 
10906 
     | 
    
         
             
                    if (use_blas) {
         
     | 
| 
       10758 
10907 
     | 
    
         
             
                        return;
         
     | 
| 
         @@ -10765,7 +10914,7 @@ static void ggml_compute_forward_out_prod_f32( 
     | 
|
| 
       10765 
10914 
     | 
    
         
             
                    return;
         
     | 
| 
       10766 
10915 
     | 
    
         
             
                }
         
     | 
| 
       10767 
10916 
     | 
    
         | 
| 
       10768 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 10917 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       10769 
10918 
     | 
    
         
             
                    return;
         
     | 
| 
       10770 
10919 
     | 
    
         
             
                }
         
     | 
| 
       10771 
10920 
     | 
    
         | 
| 
         @@ -10945,7 +11094,7 @@ static void ggml_compute_forward_out_prod_q_f32( 
     | 
|
| 
       10945 
11094 
     | 
    
         
             
                // TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod
         
     | 
| 
       10946 
11095 
     | 
    
         
             
                // TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
         
     | 
| 
       10947 
11096 
     | 
    
         | 
| 
       10948 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11097 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       10949 
11098 
     | 
    
         
             
                    if (ith != 0) {
         
     | 
| 
       10950 
11099 
     | 
    
         
             
                        return;
         
     | 
| 
       10951 
11100 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -10953,7 +11102,7 @@ static void ggml_compute_forward_out_prod_q_f32( 
     | 
|
| 
       10953 
11102 
     | 
    
         
             
                    return;
         
     | 
| 
       10954 
11103 
     | 
    
         
             
                }
         
     | 
| 
       10955 
11104 
     | 
    
         | 
| 
       10956 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11105 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       10957 
11106 
     | 
    
         
             
                    return;
         
     | 
| 
       10958 
11107 
     | 
    
         
             
                }
         
     | 
| 
       10959 
11108 
     | 
    
         | 
| 
         @@ -11039,6 +11188,9 @@ static void ggml_compute_forward_out_prod( 
     | 
|
| 
       11039 
11188 
     | 
    
         
             
                    case GGML_TYPE_IQ3_XXS:
         
     | 
| 
       11040 
11189 
     | 
    
         
             
                    case GGML_TYPE_IQ1_S:
         
     | 
| 
       11041 
11190 
     | 
    
         
             
                    case GGML_TYPE_IQ4_NL:
         
     | 
| 
      
 11191 
     | 
    
         
            +
                    case GGML_TYPE_IQ4_XS:
         
     | 
| 
      
 11192 
     | 
    
         
            +
                    case GGML_TYPE_IQ3_S:
         
     | 
| 
      
 11193 
     | 
    
         
            +
                    case GGML_TYPE_IQ2_S:
         
     | 
| 
       11042 
11194 
     | 
    
         
             
                        {
         
     | 
| 
       11043 
11195 
     | 
    
         
             
                            ggml_compute_forward_out_prod_q_f32(params, dst);
         
     | 
| 
       11044 
11196 
     | 
    
         
             
                        } break;
         
     | 
| 
         @@ -11070,7 +11222,7 @@ static void ggml_compute_forward_scale_f32( 
     | 
|
| 
       11070 
11222 
     | 
    
         
             
                GGML_ASSERT(ggml_is_contiguous(dst));
         
     | 
| 
       11071 
11223 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       11072 
11224 
     | 
    
         | 
| 
       11073 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11225 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       11074 
11226 
     | 
    
         
             
                    return;
         
     | 
| 
       11075 
11227 
     | 
    
         
             
                }
         
     | 
| 
       11076 
11228 
     | 
    
         | 
| 
         @@ -11142,7 +11294,7 @@ static void ggml_compute_forward_set_f32( 
     | 
|
| 
       11142 
11294 
     | 
    
         
             
                size_t offset  = ((int32_t *) dst->op_params)[3];
         
     | 
| 
       11143 
11295 
     | 
    
         
             
                bool   inplace = (bool) ((int32_t *) dst->op_params)[4];
         
     | 
| 
       11144 
11296 
     | 
    
         | 
| 
       11145 
     | 
    
         
            -
                if (!inplace && (params->type ==  
     | 
| 
      
 11297 
     | 
    
         
            +
                if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
         
     | 
| 
       11146 
11298 
     | 
    
         
             
                    if (params->ith != 0) {
         
     | 
| 
       11147 
11299 
     | 
    
         
             
                        return;
         
     | 
| 
       11148 
11300 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -11154,7 +11306,7 @@ static void ggml_compute_forward_set_f32( 
     | 
|
| 
       11154 
11306 
     | 
    
         
             
                        ggml_nbytes(dst));
         
     | 
| 
       11155 
11307 
     | 
    
         
             
                }
         
     | 
| 
       11156 
11308 
     | 
    
         | 
| 
       11157 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11309 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       11158 
11310 
     | 
    
         
             
                    return;
         
     | 
| 
       11159 
11311 
     | 
    
         
             
                }
         
     | 
| 
       11160 
11312 
     | 
    
         | 
| 
         @@ -11227,6 +11379,9 @@ static void ggml_compute_forward_set( 
     | 
|
| 
       11227 
11379 
     | 
    
         
             
                    case GGML_TYPE_IQ3_XXS:
         
     | 
| 
       11228 
11380 
     | 
    
         
             
                    case GGML_TYPE_IQ1_S:
         
     | 
| 
       11229 
11381 
     | 
    
         
             
                    case GGML_TYPE_IQ4_NL:
         
     | 
| 
      
 11382 
     | 
    
         
            +
                    case GGML_TYPE_IQ4_XS:
         
     | 
| 
      
 11383 
     | 
    
         
            +
                    case GGML_TYPE_IQ3_S:
         
     | 
| 
      
 11384 
     | 
    
         
            +
                    case GGML_TYPE_IQ2_S:
         
     | 
| 
       11230 
11385 
     | 
    
         
             
                    default:
         
     | 
| 
       11231 
11386 
     | 
    
         
             
                        {
         
     | 
| 
       11232 
11387 
     | 
    
         
             
                            GGML_ASSERT(false);
         
     | 
| 
         @@ -11301,7 +11456,7 @@ static void ggml_compute_forward_get_rows_q( 
     | 
|
| 
       11301 
11456 
     | 
    
         | 
| 
       11302 
11457 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       11303 
11458 
     | 
    
         | 
| 
       11304 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11459 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       11305 
11460 
     | 
    
         
             
                    return;
         
     | 
| 
       11306 
11461 
     | 
    
         
             
                }
         
     | 
| 
       11307 
11462 
     | 
    
         | 
| 
         @@ -11341,7 +11496,7 @@ static void ggml_compute_forward_get_rows_f16( 
     | 
|
| 
       11341 
11496 
     | 
    
         | 
| 
       11342 
11497 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       11343 
11498 
     | 
    
         | 
| 
       11344 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11499 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       11345 
11500 
     | 
    
         
             
                    return;
         
     | 
| 
       11346 
11501 
     | 
    
         
             
                }
         
     | 
| 
       11347 
11502 
     | 
    
         | 
| 
         @@ -11378,7 +11533,7 @@ static void ggml_compute_forward_get_rows_f32( 
     | 
|
| 
       11378 
11533 
     | 
    
         | 
| 
       11379 
11534 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       11380 
11535 
     | 
    
         | 
| 
       11381 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11536 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       11382 
11537 
     | 
    
         
             
                    return;
         
     | 
| 
       11383 
11538 
     | 
    
         
             
                }
         
     | 
| 
       11384 
11539 
     | 
    
         | 
| 
         @@ -11429,6 +11584,9 @@ static void ggml_compute_forward_get_rows( 
     | 
|
| 
       11429 
11584 
     | 
    
         
             
                    case GGML_TYPE_IQ3_XXS:
         
     | 
| 
       11430 
11585 
     | 
    
         
             
                    case GGML_TYPE_IQ1_S:
         
     | 
| 
       11431 
11586 
     | 
    
         
             
                    case GGML_TYPE_IQ4_NL:
         
     | 
| 
      
 11587 
     | 
    
         
            +
                    case GGML_TYPE_IQ4_XS:
         
     | 
| 
      
 11588 
     | 
    
         
            +
                    case GGML_TYPE_IQ3_S:
         
     | 
| 
      
 11589 
     | 
    
         
            +
                    case GGML_TYPE_IQ2_S:
         
     | 
| 
       11432 
11590 
     | 
    
         
             
                        {
         
     | 
| 
       11433 
11591 
     | 
    
         
             
                            ggml_compute_forward_get_rows_q(params, dst);
         
     | 
| 
       11434 
11592 
     | 
    
         
             
                        } break;
         
     | 
| 
         @@ -11480,14 +11638,14 @@ static void ggml_compute_forward_get_rows_back_f32_f16( 
     | 
|
| 
       11480 
11638 
     | 
    
         | 
| 
       11481 
11639 
     | 
    
         
             
                // ggml_compute_forward_dup_same_cont(params, opt0, dst);
         
     | 
| 
       11482 
11640 
     | 
    
         | 
| 
       11483 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11641 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       11484 
11642 
     | 
    
         
             
                    if (params->ith != 0) {
         
     | 
| 
       11485 
11643 
     | 
    
         
             
                        return;
         
     | 
| 
       11486 
11644 
     | 
    
         
             
                    }
         
     | 
| 
       11487 
11645 
     | 
    
         
             
                    memset(dst->data, 0, ggml_nbytes(dst));
         
     | 
| 
       11488 
11646 
     | 
    
         
             
                }
         
     | 
| 
       11489 
11647 
     | 
    
         | 
| 
       11490 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11648 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       11491 
11649 
     | 
    
         
             
                    return;
         
     | 
| 
       11492 
11650 
     | 
    
         
             
                }
         
     | 
| 
       11493 
11651 
     | 
    
         | 
| 
         @@ -11519,14 +11677,14 @@ static void ggml_compute_forward_get_rows_back_f32( 
     | 
|
| 
       11519 
11677 
     | 
    
         | 
| 
       11520 
11678 
     | 
    
         
             
                // ggml_compute_forward_dup_same_cont(params, opt0, dst);
         
     | 
| 
       11521 
11679 
     | 
    
         | 
| 
       11522 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11680 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       11523 
11681 
     | 
    
         
             
                    if (params->ith != 0) {
         
     | 
| 
       11524 
11682 
     | 
    
         
             
                        return;
         
     | 
| 
       11525 
11683 
     | 
    
         
             
                    }
         
     | 
| 
       11526 
11684 
     | 
    
         
             
                    memset(dst->data, 0, ggml_nbytes(dst));
         
     | 
| 
       11527 
11685 
     | 
    
         
             
                }
         
     | 
| 
       11528 
11686 
     | 
    
         | 
| 
       11529 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11687 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       11530 
11688 
     | 
    
         
             
                    return;
         
     | 
| 
       11531 
11689 
     | 
    
         
             
                }
         
     | 
| 
       11532 
11690 
     | 
    
         | 
| 
         @@ -11596,7 +11754,7 @@ static void ggml_compute_forward_diag_f32( 
     | 
|
| 
       11596 
11754 
     | 
    
         | 
| 
       11597 
11755 
     | 
    
         
             
                GGML_ASSERT(params->ith == 0);
         
     | 
| 
       11598 
11756 
     | 
    
         | 
| 
       11599 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11757 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       11600 
11758 
     | 
    
         
             
                    return;
         
     | 
| 
       11601 
11759 
     | 
    
         
             
                }
         
     | 
| 
       11602 
11760 
     | 
    
         | 
| 
         @@ -11665,7 +11823,7 @@ static void ggml_compute_forward_diag_mask_f32( 
     | 
|
| 
       11665 
11823 
     | 
    
         | 
| 
       11666 
11824 
     | 
    
         
             
                GGML_ASSERT(n_past >= 0);
         
     | 
| 
       11667 
11825 
     | 
    
         | 
| 
       11668 
     | 
    
         
            -
                if (!inplace && (params->type ==  
     | 
| 
      
 11826 
     | 
    
         
            +
                if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
         
     | 
| 
       11669 
11827 
     | 
    
         
             
                    if (ith != 0) {
         
     | 
| 
       11670 
11828 
     | 
    
         
             
                        return;
         
     | 
| 
       11671 
11829 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -11679,7 +11837,7 @@ static void ggml_compute_forward_diag_mask_f32( 
     | 
|
| 
       11679 
11837 
     | 
    
         
             
                        ggml_nbytes(dst));
         
     | 
| 
       11680 
11838 
     | 
    
         
             
                }
         
     | 
| 
       11681 
11839 
     | 
    
         | 
| 
       11682 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11840 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       11683 
11841 
     | 
    
         
             
                    return;
         
     | 
| 
       11684 
11842 
     | 
    
         
             
                }
         
     | 
| 
       11685 
11843 
     | 
    
         | 
| 
         @@ -11753,7 +11911,7 @@ static void ggml_compute_forward_soft_max_f32( 
     | 
|
| 
       11753 
11911 
     | 
    
         
             
                assert(ggml_is_contiguous(dst));
         
     | 
| 
       11754 
11912 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, dst));
         
     | 
| 
       11755 
11913 
     | 
    
         | 
| 
       11756 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 11914 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       11757 
11915 
     | 
    
         
             
                    return;
         
     | 
| 
       11758 
11916 
     | 
    
         
             
                }
         
     | 
| 
       11759 
11917 
     | 
    
         | 
| 
         @@ -11891,7 +12049,7 @@ static void ggml_compute_forward_soft_max_back_f32( 
     | 
|
| 
       11891 
12049 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       11892 
12050 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src1, dst));
         
     | 
| 
       11893 
12051 
     | 
    
         | 
| 
       11894 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 12052 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       11895 
12053 
     | 
    
         
             
                    return;
         
     | 
| 
       11896 
12054 
     | 
    
         
             
                }
         
     | 
| 
       11897 
12055 
     | 
    
         | 
| 
         @@ -11985,7 +12143,7 @@ static void ggml_compute_forward_alibi_f32( 
     | 
|
| 
       11985 
12143 
     | 
    
         | 
| 
       11986 
12144 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       11987 
12145 
     | 
    
         | 
| 
       11988 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 12146 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       11989 
12147 
     | 
    
         
             
                    return;
         
     | 
| 
       11990 
12148 
     | 
    
         
             
                }
         
     | 
| 
       11991 
12149 
     | 
    
         | 
| 
         @@ -12044,7 +12202,7 @@ static void ggml_compute_forward_alibi_f16( 
     | 
|
| 
       12044 
12202 
     | 
    
         | 
| 
       12045 
12203 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       12046 
12204 
     | 
    
         | 
| 
       12047 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 12205 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       12048 
12206 
     | 
    
         
             
                    return;
         
     | 
| 
       12049 
12207 
     | 
    
         
             
                }
         
     | 
| 
       12050 
12208 
     | 
    
         | 
| 
         @@ -12129,6 +12287,9 @@ static void ggml_compute_forward_alibi( 
     | 
|
| 
       12129 
12287 
     | 
    
         
             
                    case GGML_TYPE_IQ3_XXS:
         
     | 
| 
       12130 
12288 
     | 
    
         
             
                    case GGML_TYPE_IQ1_S:
         
     | 
| 
       12131 
12289 
     | 
    
         
             
                    case GGML_TYPE_IQ4_NL:
         
     | 
| 
      
 12290 
     | 
    
         
            +
                    case GGML_TYPE_IQ4_XS:
         
     | 
| 
      
 12291 
     | 
    
         
            +
                    case GGML_TYPE_IQ3_S:
         
     | 
| 
      
 12292 
     | 
    
         
            +
                    case GGML_TYPE_IQ2_S:
         
     | 
| 
       12132 
12293 
     | 
    
         
             
                    case GGML_TYPE_Q8_K:
         
     | 
| 
       12133 
12294 
     | 
    
         
             
                    case GGML_TYPE_I8:
         
     | 
| 
       12134 
12295 
     | 
    
         
             
                    case GGML_TYPE_I16:
         
     | 
| 
         @@ -12150,7 +12311,7 @@ static void ggml_compute_forward_clamp_f32( 
     | 
|
| 
       12150 
12311 
     | 
    
         | 
| 
       12151 
12312 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       12152 
12313 
     | 
    
         | 
| 
       12153 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 12314 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       12154 
12315 
     | 
    
         
             
                    return;
         
     | 
| 
       12155 
12316 
     | 
    
         
             
                }
         
     | 
| 
       12156 
12317 
     | 
    
         | 
| 
         @@ -12212,6 +12373,9 @@ static void ggml_compute_forward_clamp( 
     | 
|
| 
       12212 
12373 
     | 
    
         
             
                    case GGML_TYPE_IQ3_XXS:
         
     | 
| 
       12213 
12374 
     | 
    
         
             
                    case GGML_TYPE_IQ1_S:
         
     | 
| 
       12214 
12375 
     | 
    
         
             
                    case GGML_TYPE_IQ4_NL:
         
     | 
| 
      
 12376 
     | 
    
         
            +
                    case GGML_TYPE_IQ4_XS:
         
     | 
| 
      
 12377 
     | 
    
         
            +
                    case GGML_TYPE_IQ3_S:
         
     | 
| 
      
 12378 
     | 
    
         
            +
                    case GGML_TYPE_IQ2_S:
         
     | 
| 
       12215 
12379 
     | 
    
         
             
                    case GGML_TYPE_Q8_K:
         
     | 
| 
       12216 
12380 
     | 
    
         
             
                    case GGML_TYPE_I8:
         
     | 
| 
       12217 
12381 
     | 
    
         
             
                    case GGML_TYPE_I16:
         
     | 
| 
         @@ -12289,7 +12453,7 @@ static void ggml_compute_forward_rope_f32( 
     | 
|
| 
       12289 
12453 
     | 
    
         
             
                const struct ggml_tensor * src0 = dst->src[0];
         
     | 
| 
       12290 
12454 
     | 
    
         
             
                const struct ggml_tensor * src1 = dst->src[1];
         
     | 
| 
       12291 
12455 
     | 
    
         | 
| 
       12292 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 12456 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       12293 
12457 
     | 
    
         
             
                    return;
         
     | 
| 
       12294 
12458 
     | 
    
         
             
                }
         
     | 
| 
       12295 
12459 
     | 
    
         | 
| 
         @@ -12467,7 +12631,7 @@ static void ggml_compute_forward_rope_f16( 
     | 
|
| 
       12467 
12631 
     | 
    
         
             
                const struct ggml_tensor * src0 = dst->src[0];
         
     | 
| 
       12468 
12632 
     | 
    
         
             
                const struct ggml_tensor * src1 = dst->src[1];
         
     | 
| 
       12469 
12633 
     | 
    
         | 
| 
       12470 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 12634 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       12471 
12635 
     | 
    
         
             
                    return;
         
     | 
| 
       12472 
12636 
     | 
    
         
             
                }
         
     | 
| 
       12473 
12637 
     | 
    
         | 
| 
         @@ -12698,7 +12862,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32( 
     | 
|
| 
       12698 
12862 
     | 
    
         
             
                GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
         
     | 
| 
       12699 
12863 
     | 
    
         
             
                GGML_ASSERT(nb10 == sizeof(float));
         
     | 
| 
       12700 
12864 
     | 
    
         | 
| 
       12701 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 12865 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       12702 
12866 
     | 
    
         
             
                    if (ith != 0) {
         
     | 
| 
       12703 
12867 
     | 
    
         
             
                        return;
         
     | 
| 
       12704 
12868 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -12738,7 +12902,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32( 
     | 
|
| 
       12738 
12902 
     | 
    
         
             
                    return;
         
     | 
| 
       12739 
12903 
     | 
    
         
             
                }
         
     | 
| 
       12740 
12904 
     | 
    
         | 
| 
       12741 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 12905 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       12742 
12906 
     | 
    
         
             
                    return;
         
     | 
| 
       12743 
12907 
     | 
    
         
             
                }
         
     | 
| 
       12744 
12908 
     | 
    
         | 
| 
         @@ -12797,7 +12961,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32( 
     | 
|
| 
       12797 
12961 
     | 
    
         
             
                GGML_ASSERT(nb00 == sizeof(float));
         
     | 
| 
       12798 
12962 
     | 
    
         
             
                GGML_ASSERT(nb10 == sizeof(float));
         
     | 
| 
       12799 
12963 
     | 
    
         | 
| 
       12800 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 12964 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       12801 
12965 
     | 
    
         
             
                    if (ith != 0) {
         
     | 
| 
       12802 
12966 
     | 
    
         
             
                        return;
         
     | 
| 
       12803 
12967 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -12837,7 +13001,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32( 
     | 
|
| 
       12837 
13001 
     | 
    
         
             
                    return;
         
     | 
| 
       12838 
13002 
     | 
    
         
             
                }
         
     | 
| 
       12839 
13003 
     | 
    
         | 
| 
       12840 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13004 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       12841 
13005 
     | 
    
         
             
                    return;
         
     | 
| 
       12842 
13006 
     | 
    
         
             
                }
         
     | 
| 
       12843 
13007 
     | 
    
         | 
| 
         @@ -12941,11 +13105,11 @@ static void ggml_compute_forward_im2col_f32( 
     | 
|
| 
       12941 
13105 
     | 
    
         
             
                GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
         
     | 
| 
       12942 
13106 
     | 
    
         
             
                GGML_ASSERT(nb10 == sizeof(float));
         
     | 
| 
       12943 
13107 
     | 
    
         | 
| 
       12944 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13108 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       12945 
13109 
     | 
    
         
             
                    return;
         
     | 
| 
       12946 
13110 
     | 
    
         
             
                }
         
     | 
| 
       12947 
13111 
     | 
    
         | 
| 
       12948 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13112 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       12949 
13113 
     | 
    
         
             
                    return;
         
     | 
| 
       12950 
13114 
     | 
    
         
             
                }
         
     | 
| 
       12951 
13115 
     | 
    
         | 
| 
         @@ -13029,11 +13193,11 @@ static void ggml_compute_forward_im2col_f16( 
     | 
|
| 
       13029 
13193 
     | 
    
         
             
                GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
         
     | 
| 
       13030 
13194 
     | 
    
         
             
                GGML_ASSERT(nb10 == sizeof(float));
         
     | 
| 
       13031 
13195 
     | 
    
         | 
| 
       13032 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13196 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       13033 
13197 
     | 
    
         
             
                    return;
         
     | 
| 
       13034 
13198 
     | 
    
         
             
                }
         
     | 
| 
       13035 
13199 
     | 
    
         | 
| 
       13036 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13200 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       13037 
13201 
     | 
    
         
             
                    return;
         
     | 
| 
       13038 
13202 
     | 
    
         
             
                }
         
     | 
| 
       13039 
13203 
     | 
    
         | 
| 
         @@ -13115,7 +13279,7 @@ static void ggml_compute_forward_conv_transpose_2d( 
     | 
|
| 
       13115 
13279 
     | 
    
         
             
                GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
         
     | 
| 
       13116 
13280 
     | 
    
         
             
                GGML_ASSERT(nb10 == sizeof(float));
         
     | 
| 
       13117 
13281 
     | 
    
         | 
| 
       13118 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13282 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       13119 
13283 
     | 
    
         
             
                    if (ith != 0) {
         
     | 
| 
       13120 
13284 
     | 
    
         
             
                        return;
         
     | 
| 
       13121 
13285 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -13157,7 +13321,7 @@ static void ggml_compute_forward_conv_transpose_2d( 
     | 
|
| 
       13157 
13321 
     | 
    
         
             
                    return;
         
     | 
| 
       13158 
13322 
     | 
    
         
             
                }
         
     | 
| 
       13159 
13323 
     | 
    
         | 
| 
       13160 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13324 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       13161 
13325 
     | 
    
         
             
                    return;
         
     | 
| 
       13162 
13326 
     | 
    
         
             
                }
         
     | 
| 
       13163 
13327 
     | 
    
         | 
| 
         @@ -13209,7 +13373,7 @@ static void ggml_compute_forward_pool_1d_sk_p0( 
     | 
|
| 
       13209 
13373 
     | 
    
         
             
                assert(src->type == GGML_TYPE_F32);
         
     | 
| 
       13210 
13374 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       13211 
13375 
     | 
    
         | 
| 
       13212 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13376 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       13213 
13377 
     | 
    
         
             
                    return;
         
     | 
| 
       13214 
13378 
     | 
    
         
             
                }
         
     | 
| 
       13215 
13379 
     | 
    
         | 
| 
         @@ -13278,7 +13442,7 @@ static void ggml_compute_forward_pool_2d( 
     | 
|
| 
       13278 
13442 
     | 
    
         
             
                GGML_ASSERT(src->type == GGML_TYPE_F32);
         
     | 
| 
       13279 
13443 
     | 
    
         
             
                GGML_ASSERT(params->ith == 0);
         
     | 
| 
       13280 
13444 
     | 
    
         | 
| 
       13281 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13445 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       13282 
13446 
     | 
    
         
             
                    return;
         
     | 
| 
       13283 
13447 
     | 
    
         
             
                }
         
     | 
| 
       13284 
13448 
     | 
    
         | 
| 
         @@ -13351,7 +13515,7 @@ static void ggml_compute_forward_upscale_f32( 
     | 
|
| 
       13351 
13515 
     | 
    
         | 
| 
       13352 
13516 
     | 
    
         
             
                const struct ggml_tensor * src0 = dst->src[0];
         
     | 
| 
       13353 
13517 
     | 
    
         | 
| 
       13354 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13518 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       13355 
13519 
     | 
    
         
             
                    return;
         
     | 
| 
       13356 
13520 
     | 
    
         
             
                }
         
     | 
| 
       13357 
13521 
     | 
    
         | 
| 
         @@ -13411,7 +13575,7 @@ static void ggml_compute_forward_pad_f32( 
     | 
|
| 
       13411 
13575 
     | 
    
         | 
| 
       13412 
13576 
     | 
    
         
             
                const struct ggml_tensor * src0 = dst->src[0];
         
     | 
| 
       13413 
13577 
     | 
    
         | 
| 
       13414 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13578 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       13415 
13579 
     | 
    
         
             
                    return;
         
     | 
| 
       13416 
13580 
     | 
    
         
             
                }
         
     | 
| 
       13417 
13581 
     | 
    
         | 
| 
         @@ -13464,6 +13628,106 @@ static void ggml_compute_forward_pad( 
     | 
|
| 
       13464 
13628 
     | 
    
         
             
                }
         
     | 
| 
       13465 
13629 
     | 
    
         
             
            }
         
     | 
| 
       13466 
13630 
     | 
    
         | 
| 
      
 13631 
     | 
    
         
            +
             
     | 
| 
      
 13632 
     | 
    
         
            +
            // ggml_compute_forward_arange
         
     | 
| 
      
 13633 
     | 
    
         
            +
             
     | 
| 
      
 13634 
     | 
    
         
            +
            static void ggml_compute_forward_arange_f32(
         
     | 
| 
      
 13635 
     | 
    
         
            +
                const struct ggml_compute_params * params,
         
     | 
| 
      
 13636 
     | 
    
         
            +
                struct ggml_tensor * dst) {
         
     | 
| 
      
 13637 
     | 
    
         
            +
             
     | 
| 
      
 13638 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
      
 13639 
     | 
    
         
            +
                    return;
         
     | 
| 
      
 13640 
     | 
    
         
            +
                }
         
     | 
| 
      
 13641 
     | 
    
         
            +
             
     | 
| 
      
 13642 
     | 
    
         
            +
                GGML_ASSERT(dst->nb[0] == sizeof(float));
         
     | 
| 
      
 13643 
     | 
    
         
            +
             
     | 
| 
      
 13644 
     | 
    
         
            +
                const int ith = params->ith;
         
     | 
| 
      
 13645 
     | 
    
         
            +
                const int nth = params->nth;
         
     | 
| 
      
 13646 
     | 
    
         
            +
             
     | 
| 
      
 13647 
     | 
    
         
            +
                const float start = ggml_get_op_params_f32(dst, 0);
         
     | 
| 
      
 13648 
     | 
    
         
            +
                const float stop  = ggml_get_op_params_f32(dst, 1);
         
     | 
| 
      
 13649 
     | 
    
         
            +
                const float step  = ggml_get_op_params_f32(dst, 2);
         
     | 
| 
      
 13650 
     | 
    
         
            +
             
     | 
| 
      
 13651 
     | 
    
         
            +
                const int64_t steps = (int64_t) ceilf((stop - start) / step);
         
     | 
| 
      
 13652 
     | 
    
         
            +
             
     | 
| 
      
 13653 
     | 
    
         
            +
                GGML_ASSERT(ggml_nelements(dst) == steps);
         
     | 
| 
      
 13654 
     | 
    
         
            +
             
     | 
| 
      
 13655 
     | 
    
         
            +
                for (int64_t i = ith; i < steps; i+= nth) {
         
     | 
| 
      
 13656 
     | 
    
         
            +
                    float value = start + step * i;
         
     | 
| 
      
 13657 
     | 
    
         
            +
                    ((float *)dst->data)[i] = value;
         
     | 
| 
      
 13658 
     | 
    
         
            +
                }
         
     | 
| 
      
 13659 
     | 
    
         
            +
            }
         
     | 
| 
      
 13660 
     | 
    
         
            +
             
     | 
| 
      
 13661 
     | 
    
         
            +
            static void ggml_compute_forward_arange(
         
     | 
| 
      
 13662 
     | 
    
         
            +
                const struct ggml_compute_params * params,
         
     | 
| 
      
 13663 
     | 
    
         
            +
                struct ggml_tensor * dst) {
         
     | 
| 
      
 13664 
     | 
    
         
            +
                switch (dst->type) {
         
     | 
| 
      
 13665 
     | 
    
         
            +
                    case GGML_TYPE_F32:
         
     | 
| 
      
 13666 
     | 
    
         
            +
                        {
         
     | 
| 
      
 13667 
     | 
    
         
            +
                            ggml_compute_forward_arange_f32(params, dst);
         
     | 
| 
      
 13668 
     | 
    
         
            +
                        } break;
         
     | 
| 
      
 13669 
     | 
    
         
            +
                    default:
         
     | 
| 
      
 13670 
     | 
    
         
            +
                        {
         
     | 
| 
      
 13671 
     | 
    
         
            +
                            GGML_ASSERT(false);
         
     | 
| 
      
 13672 
     | 
    
         
            +
                        } break;
         
     | 
| 
      
 13673 
     | 
    
         
            +
                }
         
     | 
| 
      
 13674 
     | 
    
         
            +
            }
         
     | 
| 
      
 13675 
     | 
    
         
            +
             
     | 
| 
      
 13676 
     | 
    
         
            +
            static void ggml_compute_forward_timestep_embedding_f32(
         
     | 
| 
      
 13677 
     | 
    
         
            +
                const struct ggml_compute_params * params,
         
     | 
| 
      
 13678 
     | 
    
         
            +
                struct ggml_tensor * dst) {
         
     | 
| 
      
 13679 
     | 
    
         
            +
             
     | 
| 
      
 13680 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
      
 13681 
     | 
    
         
            +
                    return;
         
     | 
| 
      
 13682 
     | 
    
         
            +
                }
         
     | 
| 
      
 13683 
     | 
    
         
            +
             
     | 
| 
      
 13684 
     | 
    
         
            +
                const struct ggml_tensor * src0 = dst->src[0];
         
     | 
| 
      
 13685 
     | 
    
         
            +
             
     | 
| 
      
 13686 
     | 
    
         
            +
                GGML_ASSERT(src0->nb[0] == sizeof(float));
         
     | 
| 
      
 13687 
     | 
    
         
            +
             
     | 
| 
      
 13688 
     | 
    
         
            +
                const int ith = params->ith;
         
     | 
| 
      
 13689 
     | 
    
         
            +
                const int nth = params->nth;
         
     | 
| 
      
 13690 
     | 
    
         
            +
             
     | 
| 
      
 13691 
     | 
    
         
            +
                GGML_TENSOR_UNARY_OP_LOCALS
         
     | 
| 
      
 13692 
     | 
    
         
            +
             
     | 
| 
      
 13693 
     | 
    
         
            +
                const int dim = ggml_get_op_params_i32(dst, 0);
         
     | 
| 
      
 13694 
     | 
    
         
            +
                const int max_period = ggml_get_op_params_i32(dst, 1);
         
     | 
| 
      
 13695 
     | 
    
         
            +
             
     | 
| 
      
 13696 
     | 
    
         
            +
                int half = dim / 2;
         
     | 
| 
      
 13697 
     | 
    
         
            +
             
     | 
| 
      
 13698 
     | 
    
         
            +
                for (int64_t i = 0; i < ne00; i++) {
         
     | 
| 
      
 13699 
     | 
    
         
            +
                    float * embed_data = (float *)((char *)  dst->data +  i*nb1);
         
     | 
| 
      
 13700 
     | 
    
         
            +
                    for (int64_t j = ith; j < half; j += nth) {
         
     | 
| 
      
 13701 
     | 
    
         
            +
                        float timestep = ((float *)src0->data)[i];
         
     | 
| 
      
 13702 
     | 
    
         
            +
                        float freq = (float)expf(-logf(max_period) * j / half);
         
     | 
| 
      
 13703 
     | 
    
         
            +
                        float arg = timestep * freq;
         
     | 
| 
      
 13704 
     | 
    
         
            +
                        embed_data[j] = cosf(arg);
         
     | 
| 
      
 13705 
     | 
    
         
            +
                        embed_data[j + half] = sinf(arg);
         
     | 
| 
      
 13706 
     | 
    
         
            +
                    }
         
     | 
| 
      
 13707 
     | 
    
         
            +
                    if (dim % 2 != 0 && ith == 0) {
         
     | 
| 
      
 13708 
     | 
    
         
            +
                        embed_data[dim] = 0.f;
         
     | 
| 
      
 13709 
     | 
    
         
            +
                    }
         
     | 
| 
      
 13710 
     | 
    
         
            +
                }
         
     | 
| 
      
 13711 
     | 
    
         
            +
            }
         
     | 
| 
      
 13712 
     | 
    
         
            +
             
     | 
| 
      
 13713 
     | 
    
         
            +
            static void ggml_compute_forward_timestep_embedding(
         
     | 
| 
      
 13714 
     | 
    
         
            +
                const struct ggml_compute_params * params,
         
     | 
| 
      
 13715 
     | 
    
         
            +
                struct ggml_tensor * dst) {
         
     | 
| 
      
 13716 
     | 
    
         
            +
             
     | 
| 
      
 13717 
     | 
    
         
            +
                const struct ggml_tensor * src0 = dst->src[0];
         
     | 
| 
      
 13718 
     | 
    
         
            +
             
     | 
| 
      
 13719 
     | 
    
         
            +
                switch (src0->type) {
         
     | 
| 
      
 13720 
     | 
    
         
            +
                    case GGML_TYPE_F32:
         
     | 
| 
      
 13721 
     | 
    
         
            +
                        {
         
     | 
| 
      
 13722 
     | 
    
         
            +
                            ggml_compute_forward_timestep_embedding_f32(params, dst);
         
     | 
| 
      
 13723 
     | 
    
         
            +
                        } break;
         
     | 
| 
      
 13724 
     | 
    
         
            +
                    default:
         
     | 
| 
      
 13725 
     | 
    
         
            +
                        {
         
     | 
| 
      
 13726 
     | 
    
         
            +
                            GGML_ASSERT(false);
         
     | 
| 
      
 13727 
     | 
    
         
            +
                        } break;
         
     | 
| 
      
 13728 
     | 
    
         
            +
                }
         
     | 
| 
      
 13729 
     | 
    
         
            +
            }
         
     | 
| 
      
 13730 
     | 
    
         
            +
             
     | 
| 
       13467 
13731 
     | 
    
         
             
            // ggml_compute_forward_argsort
         
     | 
| 
       13468 
13732 
     | 
    
         | 
| 
       13469 
13733 
     | 
    
         
             
            static void ggml_compute_forward_argsort_f32(
         
     | 
| 
         @@ -13472,7 +13736,7 @@ static void ggml_compute_forward_argsort_f32( 
     | 
|
| 
       13472 
13736 
     | 
    
         | 
| 
       13473 
13737 
     | 
    
         
             
                const struct ggml_tensor * src0 = dst->src[0];
         
     | 
| 
       13474 
13738 
     | 
    
         | 
| 
       13475 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13739 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       13476 
13740 
     | 
    
         
             
                    return;
         
     | 
| 
       13477 
13741 
     | 
    
         
             
                }
         
     | 
| 
       13478 
13742 
     | 
    
         | 
| 
         @@ -13498,8 +13762,8 @@ static void ggml_compute_forward_argsort_f32( 
     | 
|
| 
       13498 
13762 
     | 
    
         
             
                    // C doesn't have a functional sort, so we do a bubble sort instead
         
     | 
| 
       13499 
13763 
     | 
    
         
             
                    for (int64_t j = 0; j < ne0; j++) {
         
     | 
| 
       13500 
13764 
     | 
    
         
             
                        for (int64_t k = j + 1; k < ne0; k++) {
         
     | 
| 
       13501 
     | 
    
         
            -
                            if ((order ==  
     | 
| 
       13502 
     | 
    
         
            -
                                (order ==  
     | 
| 
      
 13765 
     | 
    
         
            +
                            if ((order == GGML_SORT_ORDER_ASC  && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
         
     | 
| 
      
 13766 
     | 
    
         
            +
                                (order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
         
     | 
| 
       13503 
13767 
     | 
    
         
             
                                int32_t tmp = dst_data[j];
         
     | 
| 
       13504 
13768 
     | 
    
         
             
                                dst_data[j] = dst_data[k];
         
     | 
| 
       13505 
13769 
     | 
    
         
             
                                dst_data[k] = tmp;
         
     | 
| 
         @@ -13582,11 +13846,11 @@ static void ggml_compute_forward_flash_attn_f32( 
     | 
|
| 
       13582 
13846 
     | 
    
         
             
                GGML_ASSERT(nb1 <= nb2);
         
     | 
| 
       13583 
13847 
     | 
    
         
             
                GGML_ASSERT(nb2 <= nb3);
         
     | 
| 
       13584 
13848 
     | 
    
         | 
| 
       13585 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13849 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       13586 
13850 
     | 
    
         
             
                    return;
         
     | 
| 
       13587 
13851 
     | 
    
         
             
                }
         
     | 
| 
       13588 
13852 
     | 
    
         | 
| 
       13589 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 13853 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       13590 
13854 
     | 
    
         
             
                    return;
         
     | 
| 
       13591 
13855 
     | 
    
         
             
                }
         
     | 
| 
       13592 
13856 
     | 
    
         | 
| 
         @@ -13774,11 +14038,11 @@ static void ggml_compute_forward_flash_attn_f16( 
     | 
|
| 
       13774 
14038 
     | 
    
         
             
                GGML_ASSERT(nb1 <= nb2);
         
     | 
| 
       13775 
14039 
     | 
    
         
             
                GGML_ASSERT(nb2 <= nb3);
         
     | 
| 
       13776 
14040 
     | 
    
         | 
| 
       13777 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 14041 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       13778 
14042 
     | 
    
         
             
                    return;
         
     | 
| 
       13779 
14043 
     | 
    
         
             
                }
         
     | 
| 
       13780 
14044 
     | 
    
         | 
| 
       13781 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 14045 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       13782 
14046 
     | 
    
         
             
                    return;
         
     | 
| 
       13783 
14047 
     | 
    
         
             
                }
         
     | 
| 
       13784 
14048 
     | 
    
         | 
| 
         @@ -14033,11 +14297,11 @@ static void ggml_compute_forward_flash_ff_f16( 
     | 
|
| 
       14033 
14297 
     | 
    
         
             
                GGML_ASSERT(nb1 <= nb2);
         
     | 
| 
       14034 
14298 
     | 
    
         
             
                GGML_ASSERT(nb2 <= nb3);
         
     | 
| 
       14035 
14299 
     | 
    
         | 
| 
       14036 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 14300 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       14037 
14301 
     | 
    
         
             
                    return;
         
     | 
| 
       14038 
14302 
     | 
    
         
             
                }
         
     | 
| 
       14039 
14303 
     | 
    
         | 
| 
       14040 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 14304 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       14041 
14305 
     | 
    
         
             
                    return;
         
     | 
| 
       14042 
14306 
     | 
    
         
             
                }
         
     | 
| 
       14043 
14307 
     | 
    
         | 
| 
         @@ -14192,14 +14456,14 @@ static void ggml_compute_forward_flash_attn_back_f32( 
     | 
|
| 
       14192 
14456 
     | 
    
         
             
                GGML_ASSERT(nb1 <= nb2);
         
     | 
| 
       14193 
14457 
     | 
    
         
             
                GGML_ASSERT(nb2 <= nb3);
         
     | 
| 
       14194 
14458 
     | 
    
         | 
| 
       14195 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 14459 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       14196 
14460 
     | 
    
         
             
                    if (ith == 0) {
         
     | 
| 
       14197 
14461 
     | 
    
         
             
                        memset(dst->data, 0, nb0*ne0*ne1*ne2*ne3);
         
     | 
| 
       14198 
14462 
     | 
    
         
             
                    }
         
     | 
| 
       14199 
14463 
     | 
    
         
             
                    return;
         
     | 
| 
       14200 
14464 
     | 
    
         
             
                }
         
     | 
| 
       14201 
14465 
     | 
    
         | 
| 
       14202 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 14466 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       14203 
14467 
     | 
    
         
             
                    return;
         
     | 
| 
       14204 
14468 
     | 
    
         
             
                }
         
     | 
| 
       14205 
14469 
     | 
    
         | 
| 
         @@ -14515,7 +14779,7 @@ static void ggml_compute_forward_win_part_f32( 
     | 
|
| 
       14515 
14779 
     | 
    
         | 
| 
       14516 
14780 
     | 
    
         
             
                const struct ggml_tensor * src0 = dst->src[0];
         
     | 
| 
       14517 
14781 
     | 
    
         | 
| 
       14518 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 14782 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       14519 
14783 
     | 
    
         
             
                    return;
         
     | 
| 
       14520 
14784 
     | 
    
         
             
                }
         
     | 
| 
       14521 
14785 
     | 
    
         | 
| 
         @@ -14581,7 +14845,7 @@ static void ggml_compute_forward_win_unpart_f32( 
     | 
|
| 
       14581 
14845 
     | 
    
         | 
| 
       14582 
14846 
     | 
    
         
             
                const struct ggml_tensor * src0 = dst->src[0];
         
     | 
| 
       14583 
14847 
     | 
    
         | 
| 
       14584 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 14848 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       14585 
14849 
     | 
    
         
             
                    return;
         
     | 
| 
       14586 
14850 
     | 
    
         
             
                }
         
     | 
| 
       14587 
14851 
     | 
    
         | 
| 
         @@ -14709,7 +14973,7 @@ static void ggml_compute_forward_get_rel_pos_f16( 
     | 
|
| 
       14709 
14973 
     | 
    
         | 
| 
       14710 
14974 
     | 
    
         
             
                const struct ggml_tensor * src0 = dst->src[0];
         
     | 
| 
       14711 
14975 
     | 
    
         | 
| 
       14712 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 14976 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       14713 
14977 
     | 
    
         
             
                    return;
         
     | 
| 
       14714 
14978 
     | 
    
         
             
                }
         
     | 
| 
       14715 
14979 
     | 
    
         | 
| 
         @@ -14761,14 +15025,14 @@ static void ggml_compute_forward_add_rel_pos_f32( 
     | 
|
| 
       14761 
15025 
     | 
    
         
             
                const struct ggml_tensor * src2 = dst->src[2];
         
     | 
| 
       14762 
15026 
     | 
    
         | 
| 
       14763 
15027 
     | 
    
         
             
                const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
         
     | 
| 
       14764 
     | 
    
         
            -
                if (!inplace && params->type ==  
     | 
| 
      
 15028 
     | 
    
         
            +
                if (!inplace && params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       14765 
15029 
     | 
    
         
             
                    if (params->ith != 0) {
         
     | 
| 
       14766 
15030 
     | 
    
         
             
                        return;
         
     | 
| 
       14767 
15031 
     | 
    
         
             
                    }
         
     | 
| 
       14768 
15032 
     | 
    
         
             
                    memcpy((char *) dst->data, (char *) src0->data, ggml_nbytes(dst));
         
     | 
| 
       14769 
15033 
     | 
    
         
             
                    return;
         
     | 
| 
       14770 
15034 
     | 
    
         
             
                }
         
     | 
| 
       14771 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 15035 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       14772 
15036 
     | 
    
         
             
                    return;
         
     | 
| 
       14773 
15037 
     | 
    
         
             
                }
         
     | 
| 
       14774 
15038 
     | 
    
         | 
| 
         @@ -14850,7 +15114,7 @@ static void ggml_compute_forward_map_unary_f32( 
     | 
|
| 
       14850 
15114 
     | 
    
         | 
| 
       14851 
15115 
     | 
    
         
             
                GGML_ASSERT(ggml_are_same_shape(src0, dst));
         
     | 
| 
       14852 
15116 
     | 
    
         | 
| 
       14853 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 15117 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       14854 
15118 
     | 
    
         
             
                    return;
         
     | 
| 
       14855 
15119 
     | 
    
         
             
                }
         
     | 
| 
       14856 
15120 
     | 
    
         | 
| 
         @@ -14899,7 +15163,7 @@ static void ggml_compute_forward_map_binary_f32( 
     | 
|
| 
       14899 
15163 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       14900 
15164 
     | 
    
         
             
                assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
         
     | 
| 
       14901 
15165 
     | 
    
         | 
| 
       14902 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 15166 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       14903 
15167 
     | 
    
         
             
                    return;
         
     | 
| 
       14904 
15168 
     | 
    
         
             
                }
         
     | 
| 
       14905 
15169 
     | 
    
         | 
| 
         @@ -14948,7 +15212,7 @@ static void ggml_compute_forward_map_custom1_f32( 
     | 
|
| 
       14948 
15212 
     | 
    
         | 
| 
       14949 
15213 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       14950 
15214 
     | 
    
         | 
| 
       14951 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 15215 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       14952 
15216 
     | 
    
         
             
                    return;
         
     | 
| 
       14953 
15217 
     | 
    
         
             
                }
         
     | 
| 
       14954 
15218 
     | 
    
         | 
| 
         @@ -14967,7 +15231,7 @@ static void ggml_compute_forward_map_custom2_f32( 
     | 
|
| 
       14967 
15231 
     | 
    
         | 
| 
       14968 
15232 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       14969 
15233 
     | 
    
         | 
| 
       14970 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 15234 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       14971 
15235 
     | 
    
         
             
                    return;
         
     | 
| 
       14972 
15236 
     | 
    
         
             
                }
         
     | 
| 
       14973 
15237 
     | 
    
         | 
| 
         @@ -14987,7 +15251,7 @@ static void ggml_compute_forward_map_custom3_f32( 
     | 
|
| 
       14987 
15251 
     | 
    
         | 
| 
       14988 
15252 
     | 
    
         
             
                assert(params->ith == 0);
         
     | 
| 
       14989 
15253 
     | 
    
         | 
| 
       14990 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 15254 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       14991 
15255 
     | 
    
         
             
                    return;
         
     | 
| 
       14992 
15256 
     | 
    
         
             
                }
         
     | 
| 
       14993 
15257 
     | 
    
         | 
| 
         @@ -15002,13 +15266,14 @@ static void ggml_compute_forward_map_custom1( 
     | 
|
| 
       15002 
15266 
     | 
    
         | 
| 
       15003 
15267 
     | 
    
         
             
                const struct ggml_tensor * a = dst->src[0];
         
     | 
| 
       15004 
15268 
     | 
    
         | 
| 
       15005 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 15269 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       15006 
15270 
     | 
    
         
             
                    return;
         
     | 
| 
       15007 
15271 
     | 
    
         
             
                }
         
     | 
| 
       15008 
15272 
     | 
    
         | 
| 
       15009 
     | 
    
         
            -
                struct ggml_map_custom1_op_params  
     | 
| 
      
 15273 
     | 
    
         
            +
                struct ggml_map_custom1_op_params p;
         
     | 
| 
      
 15274 
     | 
    
         
            +
                memcpy(&p, dst->op_params, sizeof(p));
         
     | 
| 
       15010 
15275 
     | 
    
         | 
| 
       15011 
     | 
    
         
            -
                p 
     | 
| 
      
 15276 
     | 
    
         
            +
                p.fun(dst, a, params->ith, params->nth, p.userdata);
         
     | 
| 
       15012 
15277 
     | 
    
         
             
            }
         
     | 
| 
       15013 
15278 
     | 
    
         | 
| 
       15014 
15279 
     | 
    
         
             
            // ggml_compute_forward_map_custom2
         
     | 
| 
         @@ -15020,13 +15285,14 @@ static void ggml_compute_forward_map_custom2( 
     | 
|
| 
       15020 
15285 
     | 
    
         
             
                const struct ggml_tensor * a = dst->src[0];
         
     | 
| 
       15021 
15286 
     | 
    
         
             
                const struct ggml_tensor * b = dst->src[1];
         
     | 
| 
       15022 
15287 
     | 
    
         | 
| 
       15023 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 15288 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       15024 
15289 
     | 
    
         
             
                    return;
         
     | 
| 
       15025 
15290 
     | 
    
         
             
                }
         
     | 
| 
       15026 
15291 
     | 
    
         | 
| 
       15027 
     | 
    
         
            -
                struct ggml_map_custom2_op_params  
     | 
| 
      
 15292 
     | 
    
         
            +
                struct ggml_map_custom2_op_params p;
         
     | 
| 
      
 15293 
     | 
    
         
            +
                memcpy(&p, dst->op_params, sizeof(p));
         
     | 
| 
       15028 
15294 
     | 
    
         | 
| 
       15029 
     | 
    
         
            -
                p 
     | 
| 
      
 15295 
     | 
    
         
            +
                p.fun(dst, a, b, params->ith, params->nth, p.userdata);
         
     | 
| 
       15030 
15296 
     | 
    
         
             
            }
         
     | 
| 
       15031 
15297 
     | 
    
         | 
| 
       15032 
15298 
     | 
    
         
             
            // ggml_compute_forward_map_custom3
         
     | 
| 
         @@ -15039,13 +15305,14 @@ static void ggml_compute_forward_map_custom3( 
     | 
|
| 
       15039 
15305 
     | 
    
         
             
                const struct ggml_tensor * b = dst->src[1];
         
     | 
| 
       15040 
15306 
     | 
    
         
             
                const struct ggml_tensor * c = dst->src[2];
         
     | 
| 
       15041 
15307 
     | 
    
         | 
| 
       15042 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 15308 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       15043 
15309 
     | 
    
         
             
                    return;
         
     | 
| 
       15044 
15310 
     | 
    
         
             
                }
         
     | 
| 
       15045 
15311 
     | 
    
         | 
| 
       15046 
     | 
    
         
            -
                struct ggml_map_custom3_op_params  
     | 
| 
      
 15312 
     | 
    
         
            +
                struct ggml_map_custom3_op_params p;
         
     | 
| 
      
 15313 
     | 
    
         
            +
                memcpy(&p, dst->op_params, sizeof(p));
         
     | 
| 
       15047 
15314 
     | 
    
         | 
| 
       15048 
     | 
    
         
            -
                p 
     | 
| 
      
 15315 
     | 
    
         
            +
                p.fun(dst, a, b, c, params->ith, params->nth, p.userdata);
         
     | 
| 
       15049 
15316 
     | 
    
         
             
            }
         
     | 
| 
       15050 
15317 
     | 
    
         | 
| 
       15051 
15318 
     | 
    
         
             
            // ggml_compute_forward_cross_entropy_loss
         
     | 
| 
         @@ -15073,14 +15340,14 @@ static void ggml_compute_forward_cross_entropy_loss_f32( 
     | 
|
| 
       15073 
15340 
     | 
    
         | 
| 
       15074 
15341 
     | 
    
         
             
                GGML_ASSERT(params->wsize >= sizeof(float) * (nth + nth * nc));
         
     | 
| 
       15075 
15342 
     | 
    
         | 
| 
       15076 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 15343 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT) {
         
     | 
| 
       15077 
15344 
     | 
    
         
             
                    if (ith == 0) {
         
     | 
| 
       15078 
15345 
     | 
    
         
             
                        memset(sums, 0, sizeof(float) * (nth + nth * nc));
         
     | 
| 
       15079 
15346 
     | 
    
         
             
                    }
         
     | 
| 
       15080 
15347 
     | 
    
         
             
                    return;
         
     | 
| 
       15081 
15348 
     | 
    
         
             
                }
         
     | 
| 
       15082 
15349 
     | 
    
         | 
| 
       15083 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 15350 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       15084 
15351 
     | 
    
         
             
                    if (ith == 0) {
         
     | 
| 
       15085 
15352 
     | 
    
         
             
                        float * dp = (float *) dst->data;
         
     | 
| 
       15086 
15353 
     | 
    
         
             
                        ggml_vec_sum_f32(nth, dp, sums);
         
     | 
| 
         @@ -15195,7 +15462,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32( 
     | 
|
| 
       15195 
15462 
     | 
    
         
             
                const int64_t ith = params->ith;
         
     | 
| 
       15196 
15463 
     | 
    
         
             
                const int64_t nth = params->nth;
         
     | 
| 
       15197 
15464 
     | 
    
         | 
| 
       15198 
     | 
    
         
            -
                if (params->type ==  
     | 
| 
      
 15465 
     | 
    
         
            +
                if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         
     | 
| 
       15199 
15466 
     | 
    
         
             
                    return;
         
     | 
| 
       15200 
15467 
     | 
    
         
             
                }
         
     | 
| 
       15201 
15468 
     | 
    
         | 
| 
         @@ -15302,8 +15569,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm 
     | 
|
| 
       15302 
15569 
     | 
    
         
             
                if (skip_cpu) {
         
     | 
| 
       15303 
15570 
     | 
    
         
             
                    return;
         
     | 
| 
       15304 
15571 
     | 
    
         
             
                }
         
     | 
| 
       15305 
     | 
    
         
            -
                GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend ==  
     | 
| 
       15306 
     | 
    
         
            -
                GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend ==  
     | 
| 
      
 15572 
     | 
    
         
            +
                GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
         
     | 
| 
      
 15573 
     | 
    
         
            +
                GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
         
     | 
| 
       15307 
15574 
     | 
    
         
             
            #elif defined(GGML_USE_VULKAN)
         
     | 
| 
       15308 
15575 
     | 
    
         
             
                const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
         
     | 
| 
       15309 
15576 
     | 
    
         
             
            #ifdef GGML_VULKAN_CHECK_RESULTS
         
     | 
| 
         @@ -15314,8 +15581,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm 
     | 
|
| 
       15314 
15581 
     | 
    
         
             
                if (skip_cpu) {
         
     | 
| 
       15315 
15582 
     | 
    
         
             
                    return;
         
     | 
| 
       15316 
15583 
     | 
    
         
             
                }
         
     | 
| 
       15317 
     | 
    
         
            -
                GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend ==  
     | 
| 
       15318 
     | 
    
         
            -
                GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend ==  
     | 
| 
      
 15584 
     | 
    
         
            +
                GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
         
     | 
| 
      
 15585 
     | 
    
         
            +
                GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
         
     | 
| 
       15319 
15586 
     | 
    
         
             
            #endif // GGML_USE_CUBLAS
         
     | 
| 
       15320 
15587 
     | 
    
         | 
| 
       15321 
15588 
     | 
    
         
             
            #ifdef GGML_USE_SYCL
         
     | 
| 
         @@ -15529,6 +15796,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm 
     | 
|
| 
       15529 
15796 
     | 
    
         
             
                        {
         
     | 
| 
       15530 
15797 
     | 
    
         
             
                            ggml_compute_forward_pad(params, tensor);
         
     | 
| 
       15531 
15798 
     | 
    
         
             
                        } break;
         
     | 
| 
      
 15799 
     | 
    
         
            +
                    case GGML_OP_ARANGE:
         
     | 
| 
      
 15800 
     | 
    
         
            +
                        {
         
     | 
| 
      
 15801 
     | 
    
         
            +
                            ggml_compute_forward_arange(params, tensor);
         
     | 
| 
      
 15802 
     | 
    
         
            +
                        } break;
         
     | 
| 
      
 15803 
     | 
    
         
            +
                    case GGML_OP_TIMESTEP_EMBEDDING:
         
     | 
| 
      
 15804 
     | 
    
         
            +
                        {
         
     | 
| 
      
 15805 
     | 
    
         
            +
                            ggml_compute_forward_timestep_embedding(params, tensor);
         
     | 
| 
      
 15806 
     | 
    
         
            +
                        } break;
         
     | 
| 
       15532 
15807 
     | 
    
         
             
                    case GGML_OP_ARGSORT:
         
     | 
| 
       15533 
15808 
     | 
    
         
             
                        {
         
     | 
| 
       15534 
15809 
     | 
    
         
             
                            ggml_compute_forward_argsort(params, tensor);
         
     | 
| 
         @@ -16531,6 +16806,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor 
     | 
|
| 
       16531 
16806 
     | 
    
         
             
                        {
         
     | 
| 
       16532 
16807 
     | 
    
         
             
                            GGML_ASSERT(false); // TODO: not implemented
         
     | 
| 
       16533 
16808 
     | 
    
         
             
                        } break;
         
     | 
| 
      
 16809 
     | 
    
         
            +
                    case GGML_OP_ARANGE:
         
     | 
| 
      
 16810 
     | 
    
         
            +
                        {
         
     | 
| 
      
 16811 
     | 
    
         
            +
                            GGML_ASSERT(false); // TODO: not implemented
         
     | 
| 
      
 16812 
     | 
    
         
            +
                        } break;
         
     | 
| 
      
 16813 
     | 
    
         
            +
                    case GGML_OP_TIMESTEP_EMBEDDING:
         
     | 
| 
      
 16814 
     | 
    
         
            +
                        {
         
     | 
| 
      
 16815 
     | 
    
         
            +
                            GGML_ASSERT(false); // TODO: not implemented
         
     | 
| 
      
 16816 
     | 
    
         
            +
                        } break;
         
     | 
| 
       16534 
16817 
     | 
    
         
             
                    case GGML_OP_ARGSORT:
         
     | 
| 
       16535 
16818 
     | 
    
         
             
                        {
         
     | 
| 
       16536 
16819 
     | 
    
         
             
                            GGML_ASSERT(false); // TODO: not implemented
         
     | 
| 
         @@ -16861,7 +17144,7 @@ size_t ggml_graph_overhead(void) { 
     | 
|
| 
       16861 
17144 
     | 
    
         | 
| 
       16862 
17145 
     | 
    
         
             
            struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
         
     | 
| 
       16863 
17146 
     | 
    
         
             
                const size_t obj_size = ggml_graph_nbytes(size, grads);
         
     | 
| 
       16864 
     | 
    
         
            -
                struct ggml_object * obj = ggml_new_object(ctx,  
     | 
| 
      
 17147 
     | 
    
         
            +
                struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
         
     | 
| 
       16865 
17148 
     | 
    
         
             
                struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
         
     | 
| 
       16866 
17149 
     | 
    
         | 
| 
       16867 
17150 
     | 
    
         
             
                struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
         
     | 
| 
         @@ -17131,6 +17414,7 @@ struct ggml_compute_state { 
     | 
|
| 
       17131 
17414 
     | 
    
         
             
                ggml_thread_t thrd;
         
     | 
| 
       17132 
17415 
     | 
    
         
             
                int ith;
         
     | 
| 
       17133 
17416 
     | 
    
         
             
                struct ggml_compute_state_shared * shared;
         
     | 
| 
      
 17417 
     | 
    
         
            +
                enum ggml_status ec;
         
     | 
| 
       17134 
17418 
     | 
    
         
             
            };
         
     | 
| 
       17135 
17419 
     | 
    
         | 
| 
       17136 
17420 
     | 
    
         
             
            static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
         
     | 
| 
         @@ -17282,6 +17566,14 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { 
     | 
|
| 
       17282 
17566 
     | 
    
         
             
                        {
         
     | 
| 
       17283 
17567 
     | 
    
         
             
                            n_tasks = n_threads;
         
     | 
| 
       17284 
17568 
     | 
    
         
             
                        } break;
         
     | 
| 
      
 17569 
     | 
    
         
            +
                    case GGML_OP_ARANGE:
         
     | 
| 
      
 17570 
     | 
    
         
            +
                        {
         
     | 
| 
      
 17571 
     | 
    
         
            +
                            n_tasks = n_threads;
         
     | 
| 
      
 17572 
     | 
    
         
            +
                        } break;
         
     | 
| 
      
 17573 
     | 
    
         
            +
                    case GGML_OP_TIMESTEP_EMBEDDING:
         
     | 
| 
      
 17574 
     | 
    
         
            +
                        {
         
     | 
| 
      
 17575 
     | 
    
         
            +
                            n_tasks = n_threads;
         
     | 
| 
      
 17576 
     | 
    
         
            +
                        } break;
         
     | 
| 
       17285 
17577 
     | 
    
         
             
                    case GGML_OP_ARGSORT:
         
     | 
| 
       17286 
17578 
     | 
    
         
             
                        {
         
     | 
| 
       17287 
17579 
     | 
    
         
             
                            n_tasks = n_threads;
         
     | 
| 
         @@ -17311,29 +17603,32 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { 
     | 
|
| 
       17311 
17603 
     | 
    
         
             
                        } break;
         
     | 
| 
       17312 
17604 
     | 
    
         
             
                    case GGML_OP_MAP_CUSTOM1:
         
     | 
| 
       17313 
17605 
     | 
    
         
             
                        {
         
     | 
| 
       17314 
     | 
    
         
            -
                            struct ggml_map_custom1_op_params  
     | 
| 
       17315 
     | 
    
         
            -
                             
     | 
| 
      
 17606 
     | 
    
         
            +
                            struct ggml_map_custom1_op_params p;
         
     | 
| 
      
 17607 
     | 
    
         
            +
                            memcpy(&p, node->op_params, sizeof(p));
         
     | 
| 
      
 17608 
     | 
    
         
            +
                            if (p.n_tasks == GGML_N_TASKS_MAX) {
         
     | 
| 
       17316 
17609 
     | 
    
         
             
                                n_tasks = n_threads;
         
     | 
| 
       17317 
17610 
     | 
    
         
             
                            } else {
         
     | 
| 
       17318 
     | 
    
         
            -
                                n_tasks = MIN(p 
     | 
| 
      
 17611 
     | 
    
         
            +
                                n_tasks = MIN(p.n_tasks, n_threads);
         
     | 
| 
       17319 
17612 
     | 
    
         
             
                            }
         
     | 
| 
       17320 
17613 
     | 
    
         
             
                        } break;
         
     | 
| 
       17321 
17614 
     | 
    
         
             
                    case GGML_OP_MAP_CUSTOM2:
         
     | 
| 
       17322 
17615 
     | 
    
         
             
                        {
         
     | 
| 
       17323 
     | 
    
         
            -
                            struct ggml_map_custom2_op_params  
     | 
| 
       17324 
     | 
    
         
            -
                             
     | 
| 
      
 17616 
     | 
    
         
            +
                            struct ggml_map_custom2_op_params p;
         
     | 
| 
      
 17617 
     | 
    
         
            +
                            memcpy(&p, node->op_params, sizeof(p));
         
     | 
| 
      
 17618 
     | 
    
         
            +
                            if (p.n_tasks == GGML_N_TASKS_MAX) {
         
     | 
| 
       17325 
17619 
     | 
    
         
             
                                n_tasks = n_threads;
         
     | 
| 
       17326 
17620 
     | 
    
         
             
                            } else {
         
     | 
| 
       17327 
     | 
    
         
            -
                                n_tasks = MIN(p 
     | 
| 
      
 17621 
     | 
    
         
            +
                                n_tasks = MIN(p.n_tasks, n_threads);
         
     | 
| 
       17328 
17622 
     | 
    
         
             
                            }
         
     | 
| 
       17329 
17623 
     | 
    
         
             
                        } break;
         
     | 
| 
       17330 
17624 
     | 
    
         
             
                    case GGML_OP_MAP_CUSTOM3:
         
     | 
| 
       17331 
17625 
     | 
    
         
             
                        {
         
     | 
| 
       17332 
     | 
    
         
            -
                            struct ggml_map_custom3_op_params  
     | 
| 
       17333 
     | 
    
         
            -
                             
     | 
| 
      
 17626 
     | 
    
         
            +
                            struct ggml_map_custom3_op_params p;
         
     | 
| 
      
 17627 
     | 
    
         
            +
                            memcpy(&p, node->op_params, sizeof(p));
         
     | 
| 
      
 17628 
     | 
    
         
            +
                            if (p.n_tasks == GGML_N_TASKS_MAX) {
         
     | 
| 
       17334 
17629 
     | 
    
         
             
                                n_tasks = n_threads;
         
     | 
| 
       17335 
17630 
     | 
    
         
             
                            } else {
         
     | 
| 
       17336 
     | 
    
         
            -
                                n_tasks = MIN(p 
     | 
| 
      
 17631 
     | 
    
         
            +
                                n_tasks = MIN(p.n_tasks, n_threads);
         
     | 
| 
       17337 
17632 
     | 
    
         
             
                            }
         
     | 
| 
       17338 
17633 
     | 
    
         
             
                        } break;
         
     | 
| 
       17339 
17634 
     | 
    
         
             
                    case GGML_OP_CROSS_ENTROPY_LOSS:
         
     | 
| 
         @@ -17408,19 +17703,20 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { 
     | 
|
| 
       17408 
17703 
     | 
    
         
             
                set_numa_thread_affinity(state->ith);
         
     | 
| 
       17409 
17704 
     | 
    
         | 
| 
       17410 
17705 
     | 
    
         
             
                int node_n     = -1;
         
     | 
| 
       17411 
     | 
    
         
            -
                int task_phase =  
     | 
| 
      
 17706 
     | 
    
         
            +
                int task_phase = GGML_TASK_TYPE_FINALIZE;
         
     | 
| 
       17412 
17707 
     | 
    
         | 
| 
       17413 
17708 
     | 
    
         
             
                while (true) {
         
     | 
| 
       17414 
17709 
     | 
    
         
             
                    if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
         
     | 
| 
       17415 
17710 
     | 
    
         
             
                        state->shared->node_n += 1;
         
     | 
| 
       17416 
     | 
    
         
            -
                         
     | 
| 
      
 17711 
     | 
    
         
            +
                        state->ec = GGML_STATUS_ABORTED;
         
     | 
| 
      
 17712 
     | 
    
         
            +
                        return 0;
         
     | 
| 
       17417 
17713 
     | 
    
         
             
                    }
         
     | 
| 
       17418 
17714 
     | 
    
         | 
| 
       17419 
17715 
     | 
    
         
             
                    if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
         
     | 
| 
       17420 
17716 
     | 
    
         
             
                        // all other threads are finished and spinning
         
     | 
| 
       17421 
17717 
     | 
    
         
             
                        // do finalize and init here so we don't have synchronize again
         
     | 
| 
       17422 
17718 
     | 
    
         
             
                        struct ggml_compute_params params = {
         
     | 
| 
       17423 
     | 
    
         
            -
                            /*.type  =*/  
     | 
| 
      
 17719 
     | 
    
         
            +
                            /*.type  =*/ GGML_TASK_TYPE_FINALIZE,
         
     | 
| 
       17424 
17720 
     | 
    
         
             
                            /*.ith   =*/ 0,
         
     | 
| 
       17425 
17721 
     | 
    
         
             
                            /*.nth   =*/ 0,
         
     | 
| 
       17426 
17722 
     | 
    
         
             
                            /*.wsize =*/ cplan->work_size,
         
     | 
| 
         @@ -17451,17 +17747,17 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { 
     | 
|
| 
       17451 
17747 
     | 
    
         
             
                            if (n_tasks == 1) {
         
     | 
| 
       17452 
17748 
     | 
    
         
             
                                /* INIT */
         
     | 
| 
       17453 
17749 
     | 
    
         
             
                                if (GGML_OP_HAS_INIT[node->op]) {
         
     | 
| 
       17454 
     | 
    
         
            -
                                    params.type =  
     | 
| 
      
 17750 
     | 
    
         
            +
                                    params.type = GGML_TASK_TYPE_INIT;
         
     | 
| 
       17455 
17751 
     | 
    
         
             
                                    ggml_compute_forward(¶ms, node);
         
     | 
| 
       17456 
17752 
     | 
    
         
             
                                }
         
     | 
| 
       17457 
17753 
     | 
    
         | 
| 
       17458 
17754 
     | 
    
         
             
                                // TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
         
     | 
| 
       17459 
17755 
     | 
    
         
             
                                // they do something more efficient than spinning (?)
         
     | 
| 
       17460 
     | 
    
         
            -
                                params.type =  
     | 
| 
      
 17756 
     | 
    
         
            +
                                params.type = GGML_TASK_TYPE_COMPUTE;
         
     | 
| 
       17461 
17757 
     | 
    
         
             
                                ggml_compute_forward(¶ms, node);
         
     | 
| 
       17462 
17758 
     | 
    
         | 
| 
       17463 
17759 
     | 
    
         
             
                                if (GGML_OP_HAS_FINALIZE[node->op]) {
         
     | 
| 
       17464 
     | 
    
         
            -
                                    params.type =  
     | 
| 
      
 17760 
     | 
    
         
            +
                                    params.type = GGML_TASK_TYPE_FINALIZE;
         
     | 
| 
       17465 
17761 
     | 
    
         
             
                                    ggml_compute_forward(¶ms, node);
         
     | 
| 
       17466 
17762 
     | 
    
         
             
                                }
         
     | 
| 
       17467 
17763 
     | 
    
         | 
| 
         @@ -17475,7 +17771,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { 
     | 
|
| 
       17475 
17771 
     | 
    
         
             
                            }
         
     | 
| 
       17476 
17772 
     | 
    
         
             
                        }
         
     | 
| 
       17477 
17773 
     | 
    
         | 
| 
       17478 
     | 
    
         
            -
                        task_phase =  
     | 
| 
      
 17774 
     | 
    
         
            +
                        task_phase = GGML_TASK_TYPE_INIT;
         
     | 
| 
       17479 
17775 
     | 
    
         
             
                        atomic_store(&state->shared->n_active,  n_threads);
         
     | 
| 
       17480 
17776 
     | 
    
         
             
                        atomic_store(&state->shared->node_n,    node_n);
         
     | 
| 
       17481 
17777 
     | 
    
         
             
                        atomic_store(&state->shared->node_task, task_phase);
         
     | 
| 
         @@ -17492,7 +17788,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { 
     | 
|
| 
       17492 
17788 
     | 
    
         
             
                    const int n_tasks = ggml_get_n_tasks(node, n_threads);
         
     | 
| 
       17493 
17789 
     | 
    
         | 
| 
       17494 
17790 
     | 
    
         
             
                    struct ggml_compute_params params = {
         
     | 
| 
       17495 
     | 
    
         
            -
                        /*.type  =*/  
     | 
| 
      
 17791 
     | 
    
         
            +
                        /*.type  =*/ GGML_TASK_TYPE_INIT,
         
     | 
| 
       17496 
17792 
     | 
    
         
             
                        /*.ith   =*/ state->ith,
         
     | 
| 
       17497 
17793 
     | 
    
         
             
                        /*.nth   =*/ n_tasks,
         
     | 
| 
       17498 
17794 
     | 
    
         
             
                        /*.wsize =*/ cplan->work_size,
         
     | 
| 
         @@ -17506,7 +17802,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { 
     | 
|
| 
       17506 
17802 
     | 
    
         
             
                    }
         
     | 
| 
       17507 
17803 
     | 
    
         | 
| 
       17508 
17804 
     | 
    
         
             
                    if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
         
     | 
| 
       17509 
     | 
    
         
            -
                        task_phase =  
     | 
| 
      
 17805 
     | 
    
         
            +
                        task_phase = GGML_TASK_TYPE_COMPUTE;
         
     | 
| 
       17510 
17806 
     | 
    
         
             
                        atomic_store(&state->shared->n_active,  n_threads);
         
     | 
| 
       17511 
17807 
     | 
    
         
             
                        atomic_store(&state->shared->node_task, task_phase);
         
     | 
| 
       17512 
17808 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -17521,12 +17817,12 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { 
     | 
|
| 
       17521 
17817 
     | 
    
         
             
                    }
         
     | 
| 
       17522 
17818 
     | 
    
         | 
| 
       17523 
17819 
     | 
    
         
             
                    if (state->ith < n_tasks) {
         
     | 
| 
       17524 
     | 
    
         
            -
                        params.type =  
     | 
| 
      
 17820 
     | 
    
         
            +
                        params.type = GGML_TASK_TYPE_COMPUTE;
         
     | 
| 
       17525 
17821 
     | 
    
         
             
                        ggml_compute_forward(¶ms, node);
         
     | 
| 
       17526 
17822 
     | 
    
         
             
                    }
         
     | 
| 
       17527 
17823 
     | 
    
         | 
| 
       17528 
17824 
     | 
    
         
             
                    if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
         
     | 
| 
       17529 
     | 
    
         
            -
                        task_phase =  
     | 
| 
      
 17825 
     | 
    
         
            +
                        task_phase = GGML_TASK_TYPE_FINALIZE;
         
     | 
| 
       17530 
17826 
     | 
    
         
             
                        atomic_store(&state->shared->n_active,  n_threads);
         
     | 
| 
       17531 
17827 
     | 
    
         
             
                        atomic_store(&state->shared->node_task, task_phase);
         
     | 
| 
       17532 
17828 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -17535,7 +17831,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { 
     | 
|
| 
       17535 
17831 
     | 
    
         
             
                    }
         
     | 
| 
       17536 
17832 
     | 
    
         
             
                }
         
     | 
| 
       17537 
17833 
     | 
    
         | 
| 
       17538 
     | 
    
         
            -
                return  
     | 
| 
      
 17834 
     | 
    
         
            +
                return 0;
         
     | 
| 
       17539 
17835 
     | 
    
         
             
            }
         
     | 
| 
       17540 
17836 
     | 
    
         | 
| 
       17541 
17837 
     | 
    
         
             
            struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threads) {
         
     | 
| 
         @@ -17731,7 +18027,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa 
     | 
|
| 
       17731 
18027 
     | 
    
         
             
                return cplan;
         
     | 
| 
       17732 
18028 
     | 
    
         
             
            }
         
     | 
| 
       17733 
18029 
     | 
    
         | 
| 
       17734 
     | 
    
         
            -
             
     | 
| 
      
 18030 
     | 
    
         
            +
            enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
         
     | 
| 
       17735 
18031 
     | 
    
         
             
                {
         
     | 
| 
       17736 
18032 
     | 
    
         
             
                    GGML_ASSERT(cplan);
         
     | 
| 
       17737 
18033 
     | 
    
         
             
                    GGML_ASSERT(cplan->n_threads > 0);
         
     | 
| 
         @@ -17762,7 +18058,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { 
     | 
|
| 
       17762 
18058 
     | 
    
         
             
                    /*.n_threads               =*/ n_threads,
         
     | 
| 
       17763 
18059 
     | 
    
         
             
                    /*.n_active                =*/ n_threads,
         
     | 
| 
       17764 
18060 
     | 
    
         
             
                    /*.node_n                  =*/ -1,
         
     | 
| 
       17765 
     | 
    
         
            -
                    /*.node_task               =*/  
     | 
| 
      
 18061 
     | 
    
         
            +
                    /*.node_task               =*/ GGML_TASK_TYPE_FINALIZE,
         
     | 
| 
       17766 
18062 
     | 
    
         
             
                    /*.abort_callback          =*/ NULL,
         
     | 
| 
       17767 
18063 
     | 
    
         
             
                    /*.abort_callback_data     =*/ NULL,
         
     | 
| 
       17768 
18064 
     | 
    
         
             
                };
         
     | 
| 
         @@ -17775,6 +18071,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { 
     | 
|
| 
       17775 
18071 
     | 
    
         
             
                            .thrd   = 0,
         
     | 
| 
       17776 
18072 
     | 
    
         
             
                            .ith = j,
         
     | 
| 
       17777 
18073 
     | 
    
         
             
                            .shared = &state_shared,
         
     | 
| 
      
 18074 
     | 
    
         
            +
                            .ec = GGML_STATUS_SUCCESS,
         
     | 
| 
       17778 
18075 
     | 
    
         
             
                        };
         
     | 
| 
       17779 
18076 
     | 
    
         | 
| 
       17780 
18077 
     | 
    
         
             
                        const int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
         
     | 
| 
         @@ -17785,12 +18082,14 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { 
     | 
|
| 
       17785 
18082 
     | 
    
         | 
| 
       17786 
18083 
     | 
    
         
             
                workers[0].ith = 0;
         
     | 
| 
       17787 
18084 
     | 
    
         
             
                workers[0].shared = &state_shared;
         
     | 
| 
      
 18085 
     | 
    
         
            +
                workers[0].ec = GGML_STATUS_SUCCESS;
         
     | 
| 
       17788 
18086 
     | 
    
         | 
| 
       17789 
18087 
     | 
    
         
             
                const int64_t perf_start_cycles  = ggml_perf_cycles();
         
     | 
| 
       17790 
18088 
     | 
    
         
             
                const int64_t perf_start_time_us = ggml_perf_time_us();
         
     | 
| 
       17791 
18089 
     | 
    
         | 
| 
       17792 
18090 
     | 
    
         
             
                // this is a work thread too
         
     | 
| 
       17793 
     | 
    
         
            -
                 
     | 
| 
      
 18091 
     | 
    
         
            +
                ggml_graph_compute_thread(&workers[0]);
         
     | 
| 
      
 18092 
     | 
    
         
            +
                enum ggml_status compute_status = workers[0].ec;
         
     | 
| 
       17794 
18093 
     | 
    
         | 
| 
       17795 
18094 
     | 
    
         
             
                // don't leave affinity set on the main thread
         
     | 
| 
       17796 
18095 
     | 
    
         
             
                clear_numa_thread_affinity();
         
     | 
| 
         @@ -17800,6 +18099,8 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { 
     | 
|
| 
       17800 
18099 
     | 
    
         
             
                    for (int j = 1; j < n_threads; j++) {
         
     | 
| 
       17801 
18100 
     | 
    
         
             
                        const int rc = ggml_thread_join(workers[j].thrd, NULL);
         
     | 
| 
       17802 
18101 
     | 
    
         
             
                        GGML_ASSERT(rc == 0);
         
     | 
| 
      
 18102 
     | 
    
         
            +
                        if (workers[j].ec != GGML_STATUS_SUCCESS)
         
     | 
| 
      
 18103 
     | 
    
         
            +
                            compute_status = workers[j].ec;
         
     | 
| 
       17803 
18104 
     | 
    
         
             
                    }
         
     | 
| 
       17804 
18105 
     | 
    
         
             
                }
         
     | 
| 
       17805 
18106 
     | 
    
         | 
| 
         @@ -17827,14 +18128,14 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { 
     | 
|
| 
       17827 
18128 
     | 
    
         
             
                return compute_status;
         
     | 
| 
       17828 
18129 
     | 
    
         
             
            }
         
     | 
| 
       17829 
18130 
     | 
    
         | 
| 
       17830 
     | 
    
         
            -
             
     | 
| 
      
 18131 
     | 
    
         
            +
            enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
         
     | 
| 
       17831 
18132 
     | 
    
         
             
                struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
         
     | 
| 
       17832 
18133 
     | 
    
         | 
| 
       17833 
     | 
    
         
            -
                struct ggml_object * obj = ggml_new_object(ctx,  
     | 
| 
      
 18134 
     | 
    
         
            +
                struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
         
     | 
| 
       17834 
18135 
     | 
    
         | 
| 
       17835 
18136 
     | 
    
         
             
                cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
         
     | 
| 
       17836 
18137 
     | 
    
         | 
| 
       17837 
     | 
    
         
            -
                ggml_graph_compute(cgraph, &cplan);
         
     | 
| 
      
 18138 
     | 
    
         
            +
                return ggml_graph_compute(cgraph, &cplan);
         
     | 
| 
       17838 
18139 
     | 
    
         
             
            }
         
     | 
| 
       17839 
18140 
     | 
    
         | 
| 
       17840 
18141 
     | 
    
         
             
            struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
         
     | 
| 
         @@ -18638,7 +18939,7 @@ static enum ggml_opt_result ggml_opt_adam( 
     | 
|
| 
       18638 
18939 
     | 
    
         
             
                float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values
         
     | 
| 
       18639 
18940 
     | 
    
         | 
| 
       18640 
18941 
     | 
    
         
             
                struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
         
     | 
| 
       18641 
     | 
    
         
            -
                struct ggml_object * obj = ggml_new_object(ctx,  
     | 
| 
      
 18942 
     | 
    
         
            +
                struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
         
     | 
| 
       18642 
18943 
     | 
    
         
             
                cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
         
     | 
| 
       18643 
18944 
     | 
    
         | 
| 
       18644 
18945 
     | 
    
         
             
                bool cancel = false;
         
     | 
| 
         @@ -18650,7 +18951,7 @@ static enum ggml_opt_result ggml_opt_adam( 
     | 
|
| 
       18650 
18951 
     | 
    
         
             
                    if (callback) {
         
     | 
| 
       18651 
18952 
     | 
    
         
             
                        callback(callback_data, accum_step, &sched, &cancel);
         
     | 
| 
       18652 
18953 
     | 
    
         
             
                        if (cancel) {
         
     | 
| 
       18653 
     | 
    
         
            -
                            return  
     | 
| 
      
 18954 
     | 
    
         
            +
                            return GGML_OPT_RESULT_CANCEL;
         
     | 
| 
       18654 
18955 
     | 
    
         
             
                        }
         
     | 
| 
       18655 
18956 
     | 
    
         
             
                    }
         
     | 
| 
       18656 
18957 
     | 
    
         
             
                    // ggml_graph_reset  (gf);
         
     | 
| 
         @@ -18741,7 +19042,7 @@ static enum ggml_opt_result ggml_opt_adam( 
     | 
|
| 
       18741 
19042 
     | 
    
         
             
                        if (callback) {
         
     | 
| 
       18742 
19043 
     | 
    
         
             
                            callback(callback_data, accum_step, &sched, &cancel);
         
     | 
| 
       18743 
19044 
     | 
    
         
             
                            if (cancel) {
         
     | 
| 
       18744 
     | 
    
         
            -
                                return  
     | 
| 
      
 19045 
     | 
    
         
            +
                                return GGML_OPT_RESULT_CANCEL;;
         
     | 
| 
       18745 
19046 
     | 
    
         
             
                            }
         
     | 
| 
       18746 
19047 
     | 
    
         
             
                        }
         
     | 
| 
       18747 
19048 
     | 
    
         
             
                        // ggml_graph_reset  (gf);
         
     | 
| 
         @@ -18758,7 +19059,7 @@ static enum ggml_opt_result ggml_opt_adam( 
     | 
|
| 
       18758 
19059 
     | 
    
         
             
                    if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) {
         
     | 
| 
       18759 
19060 
     | 
    
         
             
                        GGML_PRINT_DEBUG("converged\n");
         
     | 
| 
       18760 
19061 
     | 
    
         | 
| 
       18761 
     | 
    
         
            -
                        return  
     | 
| 
      
 19062 
     | 
    
         
            +
                        return GGML_OPT_RESULT_OK;
         
     | 
| 
       18762 
19063 
     | 
    
         
             
                    }
         
     | 
| 
       18763 
19064 
     | 
    
         | 
| 
       18764 
19065 
     | 
    
         
             
                    // delta-based convergence test
         
     | 
| 
         @@ -18768,7 +19069,7 @@ static enum ggml_opt_result ggml_opt_adam( 
     | 
|
| 
       18768 
19069 
     | 
    
         
             
                            const float rate = (pf[(iter0 + t)%params.past] - fx)/fx;
         
     | 
| 
       18769 
19070 
     | 
    
         | 
| 
       18770 
19071 
     | 
    
         
             
                            if (fabsf(rate) < params.delta) {
         
     | 
| 
       18771 
     | 
    
         
            -
                                return  
     | 
| 
      
 19072 
     | 
    
         
            +
                                return GGML_OPT_RESULT_OK;
         
     | 
| 
       18772 
19073 
     | 
    
         
             
                            }
         
     | 
| 
       18773 
19074 
     | 
    
         
             
                        }
         
     | 
| 
       18774 
19075 
     | 
    
         | 
| 
         @@ -18784,7 +19085,7 @@ static enum ggml_opt_result ggml_opt_adam( 
     | 
|
| 
       18784 
19085 
     | 
    
         
             
                            ++n_no_improvement[0];
         
     | 
| 
       18785 
19086 
     | 
    
         | 
| 
       18786 
19087 
     | 
    
         
             
                            if (n_no_improvement[0] >= params.max_no_improvement) {
         
     | 
| 
       18787 
     | 
    
         
            -
                                return  
     | 
| 
      
 19088 
     | 
    
         
            +
                                return GGML_OPT_RESULT_OK;
         
     | 
| 
       18788 
19089 
     | 
    
         
             
                            }
         
     | 
| 
       18789 
19090 
     | 
    
         
             
                        }
         
     | 
| 
       18790 
19091 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -18802,7 +19103,7 @@ static enum ggml_opt_result ggml_opt_adam( 
     | 
|
| 
       18802 
19103 
     | 
    
         
             
                    }
         
     | 
| 
       18803 
19104 
     | 
    
         
             
                }
         
     | 
| 
       18804 
19105 
     | 
    
         | 
| 
       18805 
     | 
    
         
            -
                return  
     | 
| 
      
 19106 
     | 
    
         
            +
                return GGML_OPT_RESULT_DID_NOT_CONVERGE;
         
     | 
| 
       18806 
19107 
     | 
    
         
             
            }
         
     | 
| 
       18807 
19108 
     | 
    
         | 
| 
       18808 
19109 
     | 
    
         
             
            //
         
     | 
| 
         @@ -18883,7 +19184,7 @@ static enum ggml_opt_result linesearch_backtracking( 
     | 
|
| 
       18883 
19184 
     | 
    
         
             
                                float sched = 0;
         
     | 
| 
       18884 
19185 
     | 
    
         
             
                                callback(callback_data, accum_step, &sched, cancel);
         
     | 
| 
       18885 
19186 
     | 
    
         
             
                                if (*cancel) {
         
     | 
| 
       18886 
     | 
    
         
            -
                                    return  
     | 
| 
      
 19187 
     | 
    
         
            +
                                    return GGML_OPT_RESULT_CANCEL;
         
     | 
| 
       18887 
19188 
     | 
    
         
             
                                }
         
     | 
| 
       18888 
19189 
     | 
    
         
             
                            }
         
     | 
| 
       18889 
19190 
     | 
    
         
             
                            // ggml_graph_reset  (gf);
         
     | 
| 
         @@ -18956,7 +19257,7 @@ static enum ggml_opt_result ggml_opt_lbfgs( 
     | 
|
| 
       18956 
19257 
     | 
    
         
             
                if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
         
     | 
| 
       18957 
19258 
     | 
    
         
             
                    params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
         
     | 
| 
       18958 
19259 
     | 
    
         
             
                    if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) {
         
     | 
| 
       18959 
     | 
    
         
            -
                        return  
     | 
| 
      
 19260 
     | 
    
         
            +
                        return GGML_OPT_RESULT_INVALID_WOLFE;
         
     | 
| 
       18960 
19261 
     | 
    
         
             
                    }
         
     | 
| 
       18961 
19262 
     | 
    
         
             
                }
         
     | 
| 
       18962 
19263 
     | 
    
         | 
| 
         @@ -18985,7 +19286,7 @@ static enum ggml_opt_result ggml_opt_lbfgs( 
     | 
|
| 
       18985 
19286 
     | 
    
         
             
                }
         
     | 
| 
       18986 
19287 
     | 
    
         | 
| 
       18987 
19288 
     | 
    
         
             
                struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
         
     | 
| 
       18988 
     | 
    
         
            -
                struct ggml_object * obj = ggml_new_object(ctx,  
     | 
| 
      
 19289 
     | 
    
         
            +
                struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
         
     | 
| 
       18989 
19290 
     | 
    
         
             
                cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
         
     | 
| 
       18990 
19291 
     | 
    
         | 
| 
       18991 
19292 
     | 
    
         
             
                float * x  = opt->lbfgs.x->data;  // current parameters
         
     | 
| 
         @@ -19026,7 +19327,7 @@ static enum ggml_opt_result ggml_opt_lbfgs( 
     | 
|
| 
       19026 
19327 
     | 
    
         
             
                            float sched = 0;
         
     | 
| 
       19027 
19328 
     | 
    
         
             
                            callback(callback_data, accum_step, &sched, &cancel);
         
     | 
| 
       19028 
19329 
     | 
    
         
             
                            if (cancel) {
         
     | 
| 
       19029 
     | 
    
         
            -
                                return  
     | 
| 
      
 19330 
     | 
    
         
            +
                                return GGML_OPT_RESULT_CANCEL;
         
     | 
| 
       19030 
19331 
     | 
    
         
             
                            }
         
     | 
| 
       19031 
19332 
     | 
    
         
             
                        }
         
     | 
| 
       19032 
19333 
     | 
    
         
             
                        // ggml_graph_reset  (gf);
         
     | 
| 
         @@ -19054,7 +19355,7 @@ static enum ggml_opt_result ggml_opt_lbfgs( 
     | 
|
| 
       19054 
19355 
     | 
    
         | 
| 
       19055 
19356 
     | 
    
         
             
                // already optimized
         
     | 
| 
       19056 
19357 
     | 
    
         
             
                if (gnorm/xnorm <= params.lbfgs.eps) {
         
     | 
| 
       19057 
     | 
    
         
            -
                    return  
     | 
| 
      
 19358 
     | 
    
         
            +
                    return GGML_OPT_RESULT_OK;
         
     | 
| 
       19058 
19359 
     | 
    
         
             
                }
         
     | 
| 
       19059 
19360 
     | 
    
         | 
| 
       19060 
19361 
     | 
    
         
             
                if (opt->just_initialized) {
         
     | 
| 
         @@ -19099,7 +19400,7 @@ static enum ggml_opt_result ggml_opt_lbfgs( 
     | 
|
| 
       19099 
19400 
     | 
    
         
             
                    //       way to test and don't want to break something with so many changes lined up
         
     | 
| 
       19100 
19401 
     | 
    
         
             
                    ls = linesearch_backtracking(¶ms, nx, x, &fx, g, d, step, xp, f, gb, &cplan, np, ps, &cancel, callback, callback_data);
         
     | 
| 
       19101 
19402 
     | 
    
         
             
                    if (cancel) {
         
     | 
| 
       19102 
     | 
    
         
            -
                        return  
     | 
| 
      
 19403 
     | 
    
         
            +
                        return GGML_OPT_RESULT_CANCEL;
         
     | 
| 
       19103 
19404 
     | 
    
         
             
                    }
         
     | 
| 
       19104 
19405 
     | 
    
         | 
| 
       19105 
19406 
     | 
    
         
             
                    if (ls < 0) {
         
     | 
| 
         @@ -19122,7 +19423,7 @@ static enum ggml_opt_result ggml_opt_lbfgs( 
     | 
|
| 
       19122 
19423 
     | 
    
         
             
                    }
         
     | 
| 
       19123 
19424 
     | 
    
         
             
                    if (gnorm/xnorm <= params.lbfgs.eps) {
         
     | 
| 
       19124 
19425 
     | 
    
         
             
                        // converged
         
     | 
| 
       19125 
     | 
    
         
            -
                        return  
     | 
| 
      
 19426 
     | 
    
         
            +
                        return GGML_OPT_RESULT_OK;
         
     | 
| 
       19126 
19427 
     | 
    
         
             
                    }
         
     | 
| 
       19127 
19428 
     | 
    
         | 
| 
       19128 
19429 
     | 
    
         
             
                    // delta-based convergence test
         
     | 
| 
         @@ -19132,7 +19433,7 @@ static enum ggml_opt_result ggml_opt_lbfgs( 
     | 
|
| 
       19132 
19433 
     | 
    
         
             
                            const float rate = (pf[k[0]%params.past] - fx)/fx;
         
     | 
| 
       19133 
19434 
     | 
    
         | 
| 
       19134 
19435 
     | 
    
         
             
                            if (fabsf(rate) < params.delta) {
         
     | 
| 
       19135 
     | 
    
         
            -
                                return  
     | 
| 
      
 19436 
     | 
    
         
            +
                                return GGML_OPT_RESULT_OK;
         
     | 
| 
       19136 
19437 
     | 
    
         
             
                            }
         
     | 
| 
       19137 
19438 
     | 
    
         
             
                        }
         
     | 
| 
       19138 
19439 
     | 
    
         | 
| 
         @@ -19148,14 +19449,14 @@ static enum ggml_opt_result ggml_opt_lbfgs( 
     | 
|
| 
       19148 
19449 
     | 
    
         
             
                            n_no_improvement[0]++;
         
     | 
| 
       19149 
19450 
     | 
    
         | 
| 
       19150 
19451 
     | 
    
         
             
                            if (n_no_improvement[0] >= params.max_no_improvement) {
         
     | 
| 
       19151 
     | 
    
         
            -
                                return  
     | 
| 
      
 19452 
     | 
    
         
            +
                                return GGML_OPT_RESULT_OK;
         
     | 
| 
       19152 
19453 
     | 
    
         
             
                            }
         
     | 
| 
       19153 
19454 
     | 
    
         
             
                        }
         
     | 
| 
       19154 
19455 
     | 
    
         
             
                    }
         
     | 
| 
       19155 
19456 
     | 
    
         | 
| 
       19156 
19457 
     | 
    
         
             
                    if (params.lbfgs.n_iter != 0 && params.lbfgs.n_iter < it + 1) {
         
     | 
| 
       19157 
19458 
     | 
    
         
             
                        // reached the maximum number of iterations
         
     | 
| 
       19158 
     | 
    
         
            -
                        return  
     | 
| 
      
 19459 
     | 
    
         
            +
                        return GGML_OPT_RESULT_DID_NOT_CONVERGE;
         
     | 
| 
       19159 
19460 
     | 
    
         
             
                    }
         
     | 
| 
       19160 
19461 
     | 
    
         | 
| 
       19161 
19462 
     | 
    
         
             
                    // update vectors s and y:
         
     | 
| 
         @@ -19211,17 +19512,17 @@ static enum ggml_opt_result ggml_opt_lbfgs( 
     | 
|
| 
       19211 
19512 
     | 
    
         | 
| 
       19212 
19513 
     | 
    
         
             
                GGML_ASSERT(false && "lbfgs failed");
         
     | 
| 
       19213 
19514 
     | 
    
         | 
| 
       19214 
     | 
    
         
            -
                return  
     | 
| 
      
 19515 
     | 
    
         
            +
                return GGML_OPT_RESULT_DID_NOT_CONVERGE;
         
     | 
| 
       19215 
19516 
     | 
    
         
             
            }
         
     | 
| 
       19216 
19517 
     | 
    
         | 
| 
       19217 
19518 
     | 
    
         
             
            struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
         
     | 
| 
       19218 
19519 
     | 
    
         
             
                struct ggml_opt_params result;
         
     | 
| 
       19219 
19520 
     | 
    
         | 
| 
       19220 
19521 
     | 
    
         
             
                switch (type) {
         
     | 
| 
       19221 
     | 
    
         
            -
                    case  
     | 
| 
      
 19522 
     | 
    
         
            +
                    case GGML_OPT_TYPE_ADAM:
         
     | 
| 
       19222 
19523 
     | 
    
         
             
                        {
         
     | 
| 
       19223 
19524 
     | 
    
         
             
                            result = (struct ggml_opt_params) {
         
     | 
| 
       19224 
     | 
    
         
            -
                                .type       =  
     | 
| 
      
 19525 
     | 
    
         
            +
                                .type       = GGML_OPT_TYPE_ADAM,
         
     | 
| 
       19225 
19526 
     | 
    
         
             
                                .graph_size = GGML_DEFAULT_GRAPH_SIZE,
         
     | 
| 
       19226 
19527 
     | 
    
         
             
                                .n_threads  = 1, // FIXME: GGML_DEFAULT_N_THREADS ?
         
     | 
| 
       19227 
19528 
     | 
    
         
             
                                .past       = 0,
         
     | 
| 
         @@ -19249,10 +19550,10 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) { 
     | 
|
| 
       19249 
19550 
     | 
    
         
             
                                },
         
     | 
| 
       19250 
19551 
     | 
    
         
             
                            };
         
     | 
| 
       19251 
19552 
     | 
    
         
             
                        } break;
         
     | 
| 
       19252 
     | 
    
         
            -
                    case  
     | 
| 
      
 19553 
     | 
    
         
            +
                    case GGML_OPT_TYPE_LBFGS:
         
     | 
| 
       19253 
19554 
     | 
    
         
             
                        {
         
     | 
| 
       19254 
19555 
     | 
    
         
             
                            result = (struct ggml_opt_params) {
         
     | 
| 
       19255 
     | 
    
         
            -
                                .type       =  
     | 
| 
      
 19556 
     | 
    
         
            +
                                .type       = GGML_OPT_TYPE_LBFGS,
         
     | 
| 
       19256 
19557 
     | 
    
         
             
                                .graph_size = GGML_DEFAULT_GRAPH_SIZE,
         
     | 
| 
       19257 
19558 
     | 
    
         
             
                                .n_threads  = 1,
         
     | 
| 
       19258 
19559 
     | 
    
         
             
                                .past       = 0,
         
     | 
| 
         @@ -19297,12 +19598,12 @@ GGML_API void ggml_opt_init( 
     | 
|
| 
       19297 
19598 
     | 
    
         
             
                opt->just_initialized = true;
         
     | 
| 
       19298 
19599 
     | 
    
         
             
                if (opt->ctx == NULL) {
         
     | 
| 
       19299 
19600 
     | 
    
         
             
                    struct ggml_init_params ctx_opt_params;
         
     | 
| 
       19300 
     | 
    
         
            -
                    if (opt->params.type ==  
     | 
| 
      
 19601 
     | 
    
         
            +
                    if (opt->params.type == GGML_OPT_TYPE_ADAM) {
         
     | 
| 
       19301 
19602 
     | 
    
         
             
                        ctx_opt_params.mem_size = GGML_MEM_ALIGN*3 + ggml_tensor_overhead()*3 + ggml_type_size(GGML_TYPE_F32)*nx*3;
         
     | 
| 
       19302 
19603 
     | 
    
         
             
                        if (opt->params.past > 0) {
         
     | 
| 
       19303 
19604 
     | 
    
         
             
                            ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
         
     | 
| 
       19304 
19605 
     | 
    
         
             
                        }
         
     | 
| 
       19305 
     | 
    
         
            -
                    } else if (opt->params.type ==  
     | 
| 
      
 19606 
     | 
    
         
            +
                    } else if (opt->params.type == GGML_OPT_TYPE_LBFGS) {
         
     | 
| 
       19306 
19607 
     | 
    
         
             
                        ctx_opt_params.mem_size = GGML_MEM_ALIGN*9 + ggml_tensor_overhead()*9 + ggml_type_size(GGML_TYPE_F32)*(nx*5 + opt->params.lbfgs.m*2 + nx*opt->params.lbfgs.m*2);
         
     | 
| 
       19307 
19608 
     | 
    
         
             
                        if (opt->params.past > 0) {
         
     | 
| 
       19308 
19609 
     | 
    
         
             
                            ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
         
     | 
| 
         @@ -19314,7 +19615,7 @@ GGML_API void ggml_opt_init( 
     | 
|
| 
       19314 
19615 
     | 
    
         
             
                    opt->ctx = ggml_init(ctx_opt_params);
         
     | 
| 
       19315 
19616 
     | 
    
         
             
                }
         
     | 
| 
       19316 
19617 
     | 
    
         
             
                switch (opt->params.type) {
         
     | 
| 
       19317 
     | 
    
         
            -
                    case  
     | 
| 
      
 19618 
     | 
    
         
            +
                    case GGML_OPT_TYPE_ADAM:
         
     | 
| 
       19318 
19619 
     | 
    
         
             
                        {
         
     | 
| 
       19319 
19620 
     | 
    
         
             
                            opt->adam.g  = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
         
     | 
| 
       19320 
19621 
     | 
    
         
             
                            opt->adam.m  = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
         
     | 
| 
         @@ -19328,7 +19629,7 @@ GGML_API void ggml_opt_init( 
     | 
|
| 
       19328 
19629 
     | 
    
         
             
                                ggml_set_zero(opt->adam.pf);
         
     | 
| 
       19329 
19630 
     | 
    
         
             
                            }
         
     | 
| 
       19330 
19631 
     | 
    
         
             
                        } break;
         
     | 
| 
       19331 
     | 
    
         
            -
                    case  
     | 
| 
      
 19632 
     | 
    
         
            +
                    case GGML_OPT_TYPE_LBFGS:
         
     | 
| 
       19332 
19633 
     | 
    
         
             
                        {
         
     | 
| 
       19333 
19634 
     | 
    
         
             
                            opt->lbfgs.x  = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
         
     | 
| 
       19334 
19635 
     | 
    
         
             
                            opt->lbfgs.xp = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
         
     | 
| 
         @@ -19372,13 +19673,13 @@ enum ggml_opt_result ggml_opt( 
     | 
|
| 
       19372 
19673 
     | 
    
         | 
| 
       19373 
19674 
     | 
    
         
             
                    ctx = ggml_init(params_ctx);
         
     | 
| 
       19374 
19675 
     | 
    
         
             
                    if (ctx == NULL) {
         
     | 
| 
       19375 
     | 
    
         
            -
                        return  
     | 
| 
      
 19676 
     | 
    
         
            +
                        return GGML_OPT_RESULT_NO_CONTEXT;
         
     | 
| 
       19376 
19677 
     | 
    
         
             
                    }
         
     | 
| 
       19377 
19678 
     | 
    
         | 
| 
       19378 
19679 
     | 
    
         
             
                    free_ctx = true;
         
     | 
| 
       19379 
19680 
     | 
    
         
             
                }
         
     | 
| 
       19380 
19681 
     | 
    
         | 
| 
       19381 
     | 
    
         
            -
                enum ggml_opt_result result =  
     | 
| 
      
 19682 
     | 
    
         
            +
                enum ggml_opt_result result = GGML_OPT_RESULT_OK;
         
     | 
| 
       19382 
19683 
     | 
    
         | 
| 
       19383 
19684 
     | 
    
         
             
                struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
         
     | 
| 
       19384 
19685 
     | 
    
         | 
| 
         @@ -19417,14 +19718,14 @@ enum ggml_opt_result ggml_opt_resume_g( 
     | 
|
| 
       19417 
19718 
     | 
    
         
             
                    void * callback_data) {
         
     | 
| 
       19418 
19719 
     | 
    
         | 
| 
       19419 
19720 
     | 
    
         
             
                // build forward + backward compute graphs
         
     | 
| 
       19420 
     | 
    
         
            -
                enum ggml_opt_result result =  
     | 
| 
      
 19721 
     | 
    
         
            +
                enum ggml_opt_result result = GGML_OPT_RESULT_OK;
         
     | 
| 
       19421 
19722 
     | 
    
         | 
| 
       19422 
19723 
     | 
    
         
             
                switch (opt->params.type) {
         
     | 
| 
       19423 
     | 
    
         
            -
                    case  
     | 
| 
      
 19724 
     | 
    
         
            +
                    case GGML_OPT_TYPE_ADAM:
         
     | 
| 
       19424 
19725 
     | 
    
         
             
                        {
         
     | 
| 
       19425 
19726 
     | 
    
         
             
                            result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
         
     | 
| 
       19426 
19727 
     | 
    
         
             
                        } break;
         
     | 
| 
       19427 
     | 
    
         
            -
                    case  
     | 
| 
      
 19728 
     | 
    
         
            +
                    case GGML_OPT_TYPE_LBFGS:
         
     | 
| 
       19428 
19729 
     | 
    
         
             
                        {
         
     | 
| 
       19429 
19730 
     | 
    
         
             
                            result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
         
     | 
| 
       19430 
19731 
     | 
    
         
             
                        } break;
         
     | 
| 
         @@ -19461,8 +19762,10 @@ void ggml_quantize_init(enum ggml_type type) { 
     | 
|
| 
       19461 
19762 
     | 
    
         
             
                switch (type) {
         
     | 
| 
       19462 
19763 
     | 
    
         
             
                    case GGML_TYPE_IQ2_XXS:
         
     | 
| 
       19463 
19764 
     | 
    
         
             
                    case GGML_TYPE_IQ2_XS:
         
     | 
| 
      
 19765 
     | 
    
         
            +
                    case GGML_TYPE_IQ2_S:
         
     | 
| 
       19464 
19766 
     | 
    
         
             
                    case GGML_TYPE_IQ1_S:   iq2xs_init_impl(type); break;
         
     | 
| 
       19465 
19767 
     | 
    
         
             
                    case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
         
     | 
| 
      
 19768 
     | 
    
         
            +
                    case GGML_TYPE_IQ3_S:   iq3xs_init_impl(512); break;
         
     | 
| 
       19466 
19769 
     | 
    
         
             
                    default: // nothing
         
     | 
| 
       19467 
19770 
     | 
    
         
             
                        break;
         
     | 
| 
       19468 
19771 
     | 
    
         
             
                }
         
     | 
| 
         @@ -19737,6 +20040,24 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i 
     | 
|
| 
       19737 
20040 
     | 
    
         
             
                            result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
         
     | 
| 
       19738 
20041 
     | 
    
         
             
                            GGML_ASSERT(result == row_size * nrows);
         
     | 
| 
       19739 
20042 
     | 
    
         
             
                        } break;
         
     | 
| 
      
 20043 
     | 
    
         
            +
                    case GGML_TYPE_IQ3_S:
         
     | 
| 
      
 20044 
     | 
    
         
            +
                        {
         
     | 
| 
      
 20045 
     | 
    
         
            +
                            GGML_ASSERT(start % QK_K == 0);
         
     | 
| 
      
 20046 
     | 
    
         
            +
                            GGML_ASSERT(start % n_per_row == 0);
         
     | 
| 
      
 20047 
     | 
    
         
            +
                            size_t start_row = start / n_per_row;
         
     | 
| 
      
 20048 
     | 
    
         
            +
                            size_t row_size = ggml_row_size(type, n_per_row);
         
     | 
| 
      
 20049 
     | 
    
         
            +
                            result = quantize_iq3_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
         
     | 
| 
      
 20050 
     | 
    
         
            +
                            GGML_ASSERT(result == row_size * nrows);
         
     | 
| 
      
 20051 
     | 
    
         
            +
                        } break;
         
     | 
| 
      
 20052 
     | 
    
         
            +
                    case GGML_TYPE_IQ2_S:
         
     | 
| 
      
 20053 
     | 
    
         
            +
                        {
         
     | 
| 
      
 20054 
     | 
    
         
            +
                            GGML_ASSERT(start % QK_K == 0);
         
     | 
| 
      
 20055 
     | 
    
         
            +
                            GGML_ASSERT(start % n_per_row == 0);
         
     | 
| 
      
 20056 
     | 
    
         
            +
                            size_t start_row = start / n_per_row;
         
     | 
| 
      
 20057 
     | 
    
         
            +
                            size_t row_size = ggml_row_size(type, n_per_row);
         
     | 
| 
      
 20058 
     | 
    
         
            +
                            result = quantize_iq2_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
         
     | 
| 
      
 20059 
     | 
    
         
            +
                            GGML_ASSERT(result == row_size * nrows);
         
     | 
| 
      
 20060 
     | 
    
         
            +
                        } break;
         
     | 
| 
       19740 
20061 
     | 
    
         
             
                    case GGML_TYPE_IQ1_S:
         
     | 
| 
       19741 
20062 
     | 
    
         
             
                        {
         
     | 
| 
       19742 
20063 
     | 
    
         
             
                            GGML_ASSERT(start % QK_K == 0);
         
     | 
| 
         @@ -19747,6 +20068,9 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i 
     | 
|
| 
       19747 
20068 
     | 
    
         
             
                            GGML_ASSERT(result == row_size * nrows);
         
     | 
| 
       19748 
20069 
     | 
    
         
             
                        } break;
         
     | 
| 
       19749 
20070 
     | 
    
         
             
                    case GGML_TYPE_IQ4_NL:
         
     | 
| 
      
 20071 
     | 
    
         
            +
            #if QK_K == 64
         
     | 
| 
      
 20072 
     | 
    
         
            +
                    case GGML_TYPE_IQ4_XS:
         
     | 
| 
      
 20073 
     | 
    
         
            +
            #endif
         
     | 
| 
       19750 
20074 
     | 
    
         
             
                        {
         
     | 
| 
       19751 
20075 
     | 
    
         
             
                            GGML_ASSERT(start % QK4_NL == 0);
         
     | 
| 
       19752 
20076 
     | 
    
         
             
                            GGML_ASSERT(start % n_per_row == 0);
         
     | 
| 
         @@ -19755,6 +20079,17 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i 
     | 
|
| 
       19755 
20079 
     | 
    
         
             
                            result = quantize_iq4_nl(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
         
     | 
| 
       19756 
20080 
     | 
    
         
             
                            GGML_ASSERT(result == row_size * nrows);
         
     | 
| 
       19757 
20081 
     | 
    
         
             
                        } break;
         
     | 
| 
      
 20082 
     | 
    
         
            +
            #if QK_K != 64
         
     | 
| 
      
 20083 
     | 
    
         
            +
                    case GGML_TYPE_IQ4_XS:
         
     | 
| 
      
 20084 
     | 
    
         
            +
                        {
         
     | 
| 
      
 20085 
     | 
    
         
            +
                            GGML_ASSERT(start % QK_K == 0);
         
     | 
| 
      
 20086 
     | 
    
         
            +
                            GGML_ASSERT(start % n_per_row == 0);
         
     | 
| 
      
 20087 
     | 
    
         
            +
                            size_t start_row = start / n_per_row;
         
     | 
| 
      
 20088 
     | 
    
         
            +
                            size_t row_size = ggml_row_size(type, n_per_row);
         
     | 
| 
      
 20089 
     | 
    
         
            +
                            result = quantize_iq4_xs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
         
     | 
| 
      
 20090 
     | 
    
         
            +
                            GGML_ASSERT(result == row_size * nrows);
         
     | 
| 
      
 20091 
     | 
    
         
            +
                        } break;
         
     | 
| 
      
 20092 
     | 
    
         
            +
            #endif
         
     | 
| 
       19758 
20093 
     | 
    
         
             
                    case GGML_TYPE_F16:
         
     | 
| 
       19759 
20094 
     | 
    
         
             
                        {
         
     | 
| 
       19760 
20095 
     | 
    
         
             
                            size_t elemsize = sizeof(ggml_fp16_t);
         
     |