npm - whisper.rn - Versions diffs - 0.3.2 → 0.3.4 - Mend

whisper.rn 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/cpp/ggml.c +5349 -5349
package/cpp/ggml.h +810 -810
package/cpp/whisper.cpp +518 -518
package/cpp/whisper.h +2 -2
package/lib/commonjs/NativeRNWhisper.js.map +1 -1
package/lib/commonjs/index.js +3 -0
package/lib/commonjs/index.js.map +1 -1
package/lib/module/NativeRNWhisper.js +3 -0
package/lib/module/NativeRNWhisper.js.map +1 -1
package/lib/module/index.js +3 -0
package/lib/module/index.js.map +1 -1
package/lib/typescript/NativeRNWhisper.d.ts +1 -3
package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
package/lib/typescript/index.d.ts.map +1 -1
package/package.json +1 -1
package/src/NativeRNWhisper.ts +2 -3
package/src/index.ts +2 -1
package/whisper-rn.podspec +1 -1

package/cpp/ggml.h CHANGED Viewed

@@ -32,22 +32,22 @@
 // For example, here we define the function: f(x) = a*x^2 + b
 //
 //   {
-//       struct ggml_init_params params = {
+//       struct wsp_ggml_init_params params = {
 //           .mem_size   = 16*1024*1024,
 //           .mem_buffer = NULL,
 //       };
 //
 //       // memory allocation happens here
-//       struct ggml_context * ctx = ggml_init(params);
+//       struct wsp_ggml_context * ctx = wsp_ggml_init(params);
 //
-//       struct ggml_tensor * x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
+//       struct wsp_ggml_tensor * x = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, 1);
 //
-//       ggml_set_param(ctx, x); // x is an input variable
+//       wsp_ggml_set_param(ctx, x); // x is an input variable
 //
-//       struct ggml_tensor * a  = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
-//       struct ggml_tensor * b  = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
-//       struct ggml_tensor * x2 = ggml_mul(ctx, x, x);
-//       struct ggml_tensor * f  = ggml_add(ctx, ggml_mul(ctx, a, x2), b);
+//       struct wsp_ggml_tensor * a  = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, 1);
+//       struct wsp_ggml_tensor * b  = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, 1);
+//       struct wsp_ggml_tensor * x2 = wsp_ggml_mul(ctx, x, x);
+//       struct wsp_ggml_tensor * f  = wsp_ggml_add(ctx, wsp_ggml_mul(ctx, a, x2), b);
 //
 //       ...
 //   }
@@ -58,33 +58,33 @@
 //   {
 //       ...
 //
-//       struct ggml_cgraph gf = ggml_build_forward(f);
+//       struct wsp_ggml_cgraph gf = wsp_ggml_build_forward(f);
 //
 //       // set the input variable and parameter values
-//       ggml_set_f32(x, 2.0f);
-//       ggml_set_f32(a, 3.0f);
-//       ggml_set_f32(b, 4.0f);
+//       wsp_ggml_set_f32(x, 2.0f);
+//       wsp_ggml_set_f32(a, 3.0f);
+//       wsp_ggml_set_f32(b, 4.0f);
 //
-//       ggml_graph_compute(ctx0, &gf);
+//       wsp_ggml_graph_compute(ctx0, &gf);
 //
-//       printf("f = %f\n", ggml_get_f32_1d(f, 0));
+//       printf("f = %f\n", wsp_ggml_get_f32_1d(f, 0));
 //
 //       ...
 //   }
 //
-// The actual computation is performed in the ggml_graph_compute() function.
+// The actual computation is performed in the wsp_ggml_graph_compute() function.
 //
-// The ggml_new_tensor_...() functions create new tensors. They are allocated in the memory buffer provided to the
-// ggml_init() function. You have to be careful not to exceed the memory buffer size. Therefore, you have to know
+// The wsp_ggml_new_tensor_...() functions create new tensors. They are allocated in the memory buffer provided to the
+// wsp_ggml_init() function. You have to be careful not to exceed the memory buffer size. Therefore, you have to know
 // in advance how much memory you need for your computation. Alternatively, you can allocate a large enough memory
-// and after defining the computation graph, call the ggml_used_mem() function to find out how much memory was
+// and after defining the computation graph, call the wsp_ggml_used_mem() function to find out how much memory was
 // actually needed.
 //
-// The ggml_set_param() function marks a tensor as an input variable. This is used by the automatic
+// The wsp_ggml_set_param() function marks a tensor as an input variable. This is used by the automatic
 // differentiation and optimization algorithms.
 //
 // The described approach allows to define the function graph once and then compute its forward or backward graphs
-// multiple times. All computations will use the same memory buffer allocated in the ggml_init() function. This way
+// multiple times. All computations will use the same memory buffer allocated in the wsp_ggml_init() function. This way
 // the user can avoid the memory allocation overhead at runtime.
 //
 // The library supports multi-dimensional tensors - up to 4 dimensions. The FP16 and FP32 data types are first class
@@ -95,9 +95,9 @@
 // clear that the library needs to support more complex operations. The way to support these operations is not clear
 // yet, but a few examples are demonstrated in the following operations:
 //
-//   - ggml_permute()
-//   - ggml_conv_1d_1s()
-//   - ggml_conv_1d_2s()
+//   - wsp_ggml_permute()
+//   - wsp_ggml_conv_1d_1s()
+//   - wsp_ggml_conv_1d_2s()
 //
 // For each tensor operator, the library implements a forward and backward computation function. The forward function
 // computes the output tensor value given the input tensor values. The backward function computes the adjoint of the
@@ -108,20 +108,20 @@
 //   https://www.youtube.com/watch?v=wG_nF1awSSY
 //
 //
-// ## Tensor data (struct ggml_tensor)
+// ## Tensor data (struct wsp_ggml_tensor)
 //
-// The tensors are stored in memory via the ggml_tensor struct. The structure provides information about the size of
+// The tensors are stored in memory via the wsp_ggml_tensor struct. The structure provides information about the size of
 // the tensor, the data type, and the memory buffer where the tensor data is stored. Additionally, it contains
 // pointers to the "source" tensors - i.e. the tensors that were used to compute the current tensor. For example:
 //
 //   {
-//       struct ggml_tensor * c = ggml_add(ctx, a, b);
+//       struct wsp_ggml_tensor * c = wsp_ggml_add(ctx, a, b);
 //
 //       assert(c->src[0] == a);
 //       assert(c->src[1] == b);
 //   }
 //
-// The multi-dimensional tensors are stored in row-major order. The ggml_tensor struct contains fields for the
+// The multi-dimensional tensors are stored in row-major order. The wsp_ggml_tensor struct contains fields for the
 // number of elements in each dimension ("ne") as well as the number of bytes ("nb", a.k.a. stride). This allows
 // to store tensors that are not contiguous in memory, which is useful for operations such as transposition and
 // permutation. All tensor operations have to take the stride into account and not assume that the tensor is
@@ -130,7 +130,7 @@
 // The data of the tensor is accessed via the "data" pointer. For example:
 //
 //   {
-//       struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
+//       struct wsp_ggml_tensor * a = wsp_ggml_new_tensor_2d(ctx, WSP_GGML_TYPE_F32, 2, 3);
 //
 //       // a[1, 2] = 1.0f;
 //       *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
@@ -141,9 +141,9 @@
 //       ...
 //   }
 //
-// Alternatively, there are helper functions, such as ggml_get_f32_1d() and ggml_set_f32_1d() that can be used.
+// Alternatively, there are helper functions, such as wsp_ggml_get_f32_1d() and wsp_ggml_set_f32_1d() that can be used.
 //
-// ## The matrix multiplication operator (ggml_mul_mat)
+// ## The matrix multiplication operator (wsp_ggml_mul_mat)
 //
 // TODO
 //
@@ -169,44 +169,44 @@
 //
 //
-#ifdef GGML_SHARED
+#ifdef WSP_GGML_SHARED
 #    if defined(_WIN32) && !defined(__MINGW32__)
-#        ifdef GGML_BUILD
-#            define GGML_API __declspec(dllexport)
+#        ifdef WSP_GGML_BUILD
+#            define WSP_GGML_API __declspec(dllexport)
 #        else
-#            define GGML_API __declspec(dllimport)
+#            define WSP_GGML_API __declspec(dllimport)
 #        endif
 #    else
-#        define GGML_API __attribute__ ((visibility ("default")))
+#        define WSP_GGML_API __attribute__ ((visibility ("default")))
 #    endif
 #else
-#    define GGML_API
+#    define WSP_GGML_API
 #endif
 #include <stdint.h>
 #include <stddef.h>
 #include <stdbool.h>
-#define GGML_FILE_MAGIC   0x67676d6c // "ggml"
-#define GGML_FILE_VERSION 1
+#define WSP_GGML_FILE_MAGIC   0x67676d6c // "ggml"
+#define WSP_GGML_FILE_VERSION 1
-#define GGML_QNT_VERSION        2    // bump this on quantization format changes
-#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
+#define WSP_GGML_QNT_VERSION        2    // bump this on quantization format changes
+#define WSP_GGML_QNT_VERSION_FACTOR 1000 // do not change this
-#define GGML_MAX_DIMS          4
-#define GGML_MAX_NODES         4096
-#define GGML_MAX_PARAMS        256
-#define GGML_MAX_CONTEXTS      64
-#define GGML_MAX_OPT           4
-#define GGML_MAX_NAME          48
-#define GGML_DEFAULT_N_THREADS 4
+#define WSP_GGML_MAX_DIMS          4
+#define WSP_GGML_MAX_NODES         4096
+#define WSP_GGML_MAX_PARAMS        256
+#define WSP_GGML_MAX_CONTEXTS      64
+#define WSP_GGML_MAX_OPT           4
+#define WSP_GGML_MAX_NAME          48
+#define WSP_GGML_DEFAULT_N_THREADS 4
-#define GGML_UNUSED(x) (void)(x)
+#define WSP_GGML_UNUSED(x) (void)(x)
-#define GGML_ASSERT(x) \
+#define WSP_GGML_ASSERT(x) \
     do { \
         if (!(x)) { \
-            fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
+            fprintf(stderr, "WSP_GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
             abort(); \
         } \
     } while (0)
@@ -216,24 +216,24 @@
 //
 // example:
 //
-//    GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
-//    GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb);
+//    WSP_GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
+//    WSP_GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb);
 //
-#define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
+#define WSP_GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
     const type prefix##0 = (pointer)->array[0]; \
-    GGML_UNUSED(prefix##0);
-#define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
-    GGML_TENSOR_LOCALS_1    (type, prefix, pointer, array) \
+    WSP_GGML_UNUSED(prefix##0);
+#define WSP_GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
+    WSP_GGML_TENSOR_LOCALS_1    (type, prefix, pointer, array) \
     const type prefix##1 = (pointer)->array[1]; \
-    GGML_UNUSED(prefix##1);
-#define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
-    GGML_TENSOR_LOCALS_2    (type, prefix, pointer, array) \
+    WSP_GGML_UNUSED(prefix##1);
+#define WSP_GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
+    WSP_GGML_TENSOR_LOCALS_2    (type, prefix, pointer, array) \
     const type prefix##2 = (pointer)->array[2]; \
-    GGML_UNUSED(prefix##2);
-#define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
-    GGML_TENSOR_LOCALS_3  (type, prefix, pointer, array) \
+    WSP_GGML_UNUSED(prefix##2);
+#define WSP_GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
+    WSP_GGML_TENSOR_LOCALS_3  (type, prefix, pointer, array) \
     const type prefix##3 = (pointer)->array[3]; \
-    GGML_UNUSED(prefix##3);
+    WSP_GGML_UNUSED(prefix##3);
 #ifdef  __cplusplus
 extern "C" {
@@ -241,182 +241,182 @@ extern "C" {
 #ifdef __ARM_NEON
     // we use the built-in 16-bit float type
-    typedef __fp16 ggml_fp16_t;
+    typedef __fp16 wsp_ggml_fp16_t;
 #else
-    typedef uint16_t ggml_fp16_t;
+    typedef uint16_t wsp_ggml_fp16_t;
 #endif
     // convert FP16 <-> FP32
-    GGML_API float       ggml_fp16_to_fp32(ggml_fp16_t x);
-    GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
-    GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n);
-    GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n);
-    struct ggml_object;
-    struct ggml_context;
-    enum ggml_type {
-        GGML_TYPE_F32  = 0,
-        GGML_TYPE_F16  = 1,
-        GGML_TYPE_Q4_0 = 2,
-        GGML_TYPE_Q4_1 = 3,
-        // GGML_TYPE_Q4_2 = 4, support has been removed
-        // GGML_TYPE_Q4_3 (5) support has been removed
-        GGML_TYPE_Q5_0 = 6,
-        GGML_TYPE_Q5_1 = 7,
-        GGML_TYPE_Q8_0 = 8,
-        GGML_TYPE_Q8_1 = 9,
+    WSP_GGML_API float       wsp_ggml_fp16_to_fp32(wsp_ggml_fp16_t x);
+    WSP_GGML_API wsp_ggml_fp16_t wsp_ggml_fp32_to_fp16(float x);
+    WSP_GGML_API void wsp_ggml_fp16_to_fp32_row(const wsp_ggml_fp16_t * x, float * y, size_t n);
+    WSP_GGML_API void wsp_ggml_fp32_to_fp16_row(const float * x, wsp_ggml_fp16_t * y, size_t n);
+    struct wsp_ggml_object;
+    struct wsp_ggml_context;
+    enum wsp_ggml_type {
+        WSP_GGML_TYPE_F32  = 0,
+        WSP_GGML_TYPE_F16  = 1,
+        WSP_GGML_TYPE_Q4_0 = 2,
+        WSP_GGML_TYPE_Q4_1 = 3,
+        // WSP_GGML_TYPE_Q4_2 = 4, support has been removed
+        // WSP_GGML_TYPE_Q4_3 (5) support has been removed
+        WSP_GGML_TYPE_Q5_0 = 6,
+        WSP_GGML_TYPE_Q5_1 = 7,
+        WSP_GGML_TYPE_Q8_0 = 8,
+        WSP_GGML_TYPE_Q8_1 = 9,
         // k-quantizations
-        GGML_TYPE_Q2_K = 10,
-        GGML_TYPE_Q3_K = 11,
-        GGML_TYPE_Q4_K = 12,
-        GGML_TYPE_Q5_K = 13,
-        GGML_TYPE_Q6_K = 14,
-        GGML_TYPE_Q8_K = 15,
-        GGML_TYPE_I8,
-        GGML_TYPE_I16,
-        GGML_TYPE_I32,
-        GGML_TYPE_COUNT,
+        WSP_GGML_TYPE_Q2_K = 10,
+        WSP_GGML_TYPE_Q3_K = 11,
+        WSP_GGML_TYPE_Q4_K = 12,
+        WSP_GGML_TYPE_Q5_K = 13,
+        WSP_GGML_TYPE_Q6_K = 14,
+        WSP_GGML_TYPE_Q8_K = 15,
+        WSP_GGML_TYPE_I8,
+        WSP_GGML_TYPE_I16,
+        WSP_GGML_TYPE_I32,
+        WSP_GGML_TYPE_COUNT,
     };
-    enum ggml_backend {
-        GGML_BACKEND_CPU = 0,
-        GGML_BACKEND_GPU = 10,
-        GGML_BACKEND_GPU_SPLIT = 20,
+    enum wsp_ggml_backend {
+        WSP_GGML_BACKEND_CPU = 0,
+        WSP_GGML_BACKEND_GPU = 10,
+        WSP_GGML_BACKEND_GPU_SPLIT = 20,
     };
     // model file types
-    enum ggml_ftype {
-        GGML_FTYPE_UNKNOWN     = -1,
-        GGML_FTYPE_ALL_F32     = 0,
-        GGML_FTYPE_MOSTLY_F16  = 1,  // except 1d tensors
-        GGML_FTYPE_MOSTLY_Q4_0 = 2,  // except 1d tensors
-        GGML_FTYPE_MOSTLY_Q4_1 = 3,  // except 1d tensors
-        GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
-        GGML_FTYPE_MOSTLY_Q8_0 = 7,  // except 1d tensors
-        GGML_FTYPE_MOSTLY_Q5_0 = 8,  // except 1d tensors
-        GGML_FTYPE_MOSTLY_Q5_1 = 9,  // except 1d tensors
-        GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
-        GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
-        GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
-        GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
-        GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
+    enum wsp_ggml_ftype {
+        WSP_GGML_FTYPE_UNKNOWN     = -1,
+        WSP_GGML_FTYPE_ALL_F32     = 0,
+        WSP_GGML_FTYPE_MOSTLY_F16  = 1,  // except 1d tensors
+        WSP_GGML_FTYPE_MOSTLY_Q4_0 = 2,  // except 1d tensors
+        WSP_GGML_FTYPE_MOSTLY_Q4_1 = 3,  // except 1d tensors
+        WSP_GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
+        WSP_GGML_FTYPE_MOSTLY_Q8_0 = 7,  // except 1d tensors
+        WSP_GGML_FTYPE_MOSTLY_Q5_0 = 8,  // except 1d tensors
+        WSP_GGML_FTYPE_MOSTLY_Q5_1 = 9,  // except 1d tensors
+        WSP_GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
+        WSP_GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
+        WSP_GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
+        WSP_GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
+        WSP_GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
     };
     // available tensor operations:
-    enum ggml_op {
-        GGML_OP_NONE = 0,
-        GGML_OP_DUP,
-        GGML_OP_ADD,
-        GGML_OP_ADD1,
-        GGML_OP_ACC,
-        GGML_OP_SUB,
-        GGML_OP_MUL,
-        GGML_OP_DIV,
-        GGML_OP_SQR,
-        GGML_OP_SQRT,
-        GGML_OP_LOG,
-        GGML_OP_SUM,
-        GGML_OP_SUM_ROWS,
-        GGML_OP_MEAN,
-        GGML_OP_ARGMAX,
-        GGML_OP_REPEAT,
-        GGML_OP_REPEAT_BACK,
-        GGML_OP_ABS,
-        GGML_OP_SGN,
-        GGML_OP_NEG,
-        GGML_OP_STEP,
-        GGML_OP_TANH,
-        GGML_OP_ELU,
-        GGML_OP_RELU,
-        GGML_OP_GELU,
-        GGML_OP_GELU_QUICK,
-        GGML_OP_SILU,
-        GGML_OP_SILU_BACK,
-        GGML_OP_NORM, // normalize
-        GGML_OP_RMS_NORM,
-        GGML_OP_RMS_NORM_BACK,
-        GGML_OP_MUL_MAT,
-        GGML_OP_OUT_PROD,
-        GGML_OP_SCALE,
-        GGML_OP_SET,
-        GGML_OP_CPY,
-        GGML_OP_CONT,
-        GGML_OP_RESHAPE,
-        GGML_OP_VIEW,
-        GGML_OP_PERMUTE,
-        GGML_OP_TRANSPOSE,
-        GGML_OP_GET_ROWS,
-        GGML_OP_GET_ROWS_BACK,
-        GGML_OP_DIAG,
-        GGML_OP_DIAG_MASK_INF,
-        GGML_OP_DIAG_MASK_ZERO,
-        GGML_OP_SOFT_MAX,
-        GGML_OP_SOFT_MAX_BACK,
-        GGML_OP_ROPE,
-        GGML_OP_ROPE_BACK,
-        GGML_OP_ALIBI,
-        GGML_OP_CLAMP,
-        GGML_OP_CONV_1D,
-        GGML_OP_CONV_2D,
-        GGML_OP_FLASH_ATTN,
-        GGML_OP_FLASH_FF,
-        GGML_OP_FLASH_ATTN_BACK,
-        GGML_OP_WIN_PART,
-        GGML_OP_WIN_UNPART,
-        GGML_OP_MAP_UNARY,
-        GGML_OP_MAP_BINARY,
-        GGML_OP_MAP_CUSTOM1,
-        GGML_OP_MAP_CUSTOM2,
-        GGML_OP_MAP_CUSTOM3,
-        GGML_OP_CROSS_ENTROPY_LOSS,
-        GGML_OP_CROSS_ENTROPY_LOSS_BACK,
-        GGML_OP_COUNT,
+    enum wsp_ggml_op {
+        WSP_GGML_OP_NONE = 0,
+        WSP_GGML_OP_DUP,
+        WSP_GGML_OP_ADD,
+        WSP_GGML_OP_ADD1,
+        WSP_GGML_OP_ACC,
+        WSP_GGML_OP_SUB,
+        WSP_GGML_OP_MUL,
+        WSP_GGML_OP_DIV,
+        WSP_GGML_OP_SQR,
+        WSP_GGML_OP_SQRT,
+        WSP_GGML_OP_LOG,
+        WSP_GGML_OP_SUM,
+        WSP_GGML_OP_SUM_ROWS,
+        WSP_GGML_OP_MEAN,
+        WSP_GGML_OP_ARGMAX,
+        WSP_GGML_OP_REPEAT,
+        WSP_GGML_OP_REPEAT_BACK,
+        WSP_GGML_OP_ABS,
+        WSP_GGML_OP_SGN,
+        WSP_GGML_OP_NEG,
+        WSP_GGML_OP_STEP,
+        WSP_GGML_OP_TANH,
+        WSP_GGML_OP_ELU,
+        WSP_GGML_OP_RELU,
+        WSP_GGML_OP_GELU,
+        WSP_GGML_OP_GELU_QUICK,
+        WSP_GGML_OP_SILU,
+        WSP_GGML_OP_SILU_BACK,
+        WSP_GGML_OP_NORM, // normalize
+        WSP_GGML_OP_RMS_NORM,
+        WSP_GGML_OP_RMS_NORM_BACK,
+        WSP_GGML_OP_MUL_MAT,
+        WSP_GGML_OP_OUT_PROD,
+        WSP_GGML_OP_SCALE,
+        WSP_GGML_OP_SET,
+        WSP_GGML_OP_CPY,
+        WSP_GGML_OP_CONT,
+        WSP_GGML_OP_RESHAPE,
+        WSP_GGML_OP_VIEW,
+        WSP_GGML_OP_PERMUTE,
+        WSP_GGML_OP_TRANSPOSE,
+        WSP_GGML_OP_GET_ROWS,
+        WSP_GGML_OP_GET_ROWS_BACK,
+        WSP_GGML_OP_DIAG,
+        WSP_GGML_OP_DIAG_MASK_INF,
+        WSP_GGML_OP_DIAG_MASK_ZERO,
+        WSP_GGML_OP_SOFT_MAX,
+        WSP_GGML_OP_SOFT_MAX_BACK,
+        WSP_GGML_OP_ROPE,
+        WSP_GGML_OP_ROPE_BACK,
+        WSP_GGML_OP_ALIBI,
+        WSP_GGML_OP_CLAMP,
+        WSP_GGML_OP_CONV_1D,
+        WSP_GGML_OP_CONV_2D,
+        WSP_GGML_OP_FLASH_ATTN,
+        WSP_GGML_OP_FLASH_FF,
+        WSP_GGML_OP_FLASH_ATTN_BACK,
+        WSP_GGML_OP_WIN_PART,
+        WSP_GGML_OP_WIN_UNPART,
+        WSP_GGML_OP_MAP_UNARY,
+        WSP_GGML_OP_MAP_BINARY,
+        WSP_GGML_OP_MAP_CUSTOM1,
+        WSP_GGML_OP_MAP_CUSTOM2,
+        WSP_GGML_OP_MAP_CUSTOM3,
+        WSP_GGML_OP_CROSS_ENTROPY_LOSS,
+        WSP_GGML_OP_CROSS_ENTROPY_LOSS_BACK,
+        WSP_GGML_OP_COUNT,
     };
     // ggml object
-    struct ggml_object {
+    struct wsp_ggml_object {
         size_t offs;
         size_t size;
-        struct ggml_object * next;
+        struct wsp_ggml_object * next;
         char padding[8];
     };
-    static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
+    static const size_t WSP_GGML_OBJECT_SIZE = sizeof(struct wsp_ggml_object);
     // n-dimensional tensor
-    struct ggml_tensor {
-        enum ggml_type    type;
-        enum ggml_backend backend;
+    struct wsp_ggml_tensor {
+        enum wsp_ggml_type    type;
+        enum wsp_ggml_backend backend;
         int     n_dims;
-        int64_t ne[GGML_MAX_DIMS]; // number of elements
-        size_t  nb[GGML_MAX_DIMS]; // stride in bytes:
+        int64_t ne[WSP_GGML_MAX_DIMS]; // number of elements
+        size_t  nb[WSP_GGML_MAX_DIMS]; // stride in bytes:
                                    // nb[0] = sizeof(type)
                                    // nb[1] = nb[0]   * ne[0] + padding
                                    // nb[i] = nb[i-1] * ne[i-1]
         // compute data
-        enum ggml_op op;
+        enum wsp_ggml_op op;
         bool is_param;
-        struct ggml_tensor * grad;
-        struct ggml_tensor * src0;
-        struct ggml_tensor * src1;
-        struct ggml_tensor * opt[GGML_MAX_OPT];
+        struct wsp_ggml_tensor * grad;
+        struct wsp_ggml_tensor * src0;
+        struct wsp_ggml_tensor * src1;
+        struct wsp_ggml_tensor * opt[WSP_GGML_MAX_OPT];
         // thread scheduling
         int n_tasks;
@@ -428,27 +428,27 @@ extern "C" {
         void * data;
-        char name[GGML_MAX_NAME];
+        char name[WSP_GGML_MAX_NAME];
         void * extra; // extra things e.g. for ggml-cuda.cu
         char padding[4];
     };
-    static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
+    static const size_t WSP_GGML_TENSOR_SIZE = sizeof(struct wsp_ggml_tensor);
     // computation graph
-    struct ggml_cgraph {
+    struct wsp_ggml_cgraph {
         int n_nodes;
         int n_leafs;
         int n_threads;
         size_t work_size;
-        struct ggml_tensor * work;
+        struct wsp_ggml_tensor * work;
-        struct ggml_tensor * nodes[GGML_MAX_NODES];
-        struct ggml_tensor * grads[GGML_MAX_NODES];
-        struct ggml_tensor * leafs[GGML_MAX_NODES];
+        struct wsp_ggml_tensor * nodes[WSP_GGML_MAX_NODES];
+        struct wsp_ggml_tensor * grads[WSP_GGML_MAX_NODES];
+        struct wsp_ggml_tensor * leafs[WSP_GGML_MAX_NODES];
         // performance
         int     perf_runs;
@@ -457,13 +457,13 @@ extern "C" {
     };
     // scratch buffer
-    struct ggml_scratch {
+    struct wsp_ggml_scratch {
         size_t offs;
         size_t size;
         void * data;
     };
-    struct ggml_init_params {
+    struct wsp_ggml_init_params {
         // memory pool
         size_t mem_size;   // bytes
         void * mem_buffer; // if NULL, memory will be allocated internally
@@ -475,14 +475,14 @@ extern "C" {
     // NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
     // This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
-    enum ggml_task_type {
-        GGML_TASK_INIT = 0,
-        GGML_TASK_COMPUTE,
-        GGML_TASK_FINALIZE,
+    enum wsp_ggml_task_type {
+        WSP_GGML_TASK_INIT = 0,
+        WSP_GGML_TASK_COMPUTE,
+        WSP_GGML_TASK_FINALIZE,
     };
-    struct ggml_compute_params {
-        enum ggml_task_type type;
+    struct wsp_ggml_compute_params {
+        enum wsp_ggml_task_type type;
         // ith = thread index, nth = number of threads
         int ith, nth;
@@ -494,506 +494,506 @@ extern "C" {
     // misc
-    GGML_API void    ggml_time_init(void); // call this once at the beginning of the program
-    GGML_API int64_t ggml_time_ms(void);
-    GGML_API int64_t ggml_time_us(void);
-    GGML_API int64_t ggml_cycles(void);
-    GGML_API int64_t ggml_cycles_per_ms(void);
+    WSP_GGML_API void    wsp_ggml_time_init(void); // call this once at the beginning of the program
+    WSP_GGML_API int64_t wsp_ggml_time_ms(void);
+    WSP_GGML_API int64_t wsp_ggml_time_us(void);
+    WSP_GGML_API int64_t wsp_ggml_cycles(void);
+    WSP_GGML_API int64_t wsp_ggml_cycles_per_ms(void);
-    GGML_API void    ggml_numa_init(void); // call once for better performance on NUMA systems
-    GGML_API bool    ggml_is_numa(void); // true if init detected that system has >1 NUMA node
+    WSP_GGML_API void    wsp_ggml_numa_init(void); // call once for better performance on NUMA systems
+    WSP_GGML_API bool    wsp_ggml_is_numa(void); // true if init detected that system has >1 NUMA node
-    GGML_API void    ggml_print_object (const struct ggml_object * obj);
-    GGML_API void    ggml_print_objects(const struct ggml_context * ctx);
+    WSP_GGML_API void    wsp_ggml_print_object (const struct wsp_ggml_object * obj);
+    WSP_GGML_API void    wsp_ggml_print_objects(const struct wsp_ggml_context * ctx);
-    GGML_API int64_t ggml_nelements   (const struct ggml_tensor * tensor);
-    GGML_API int64_t ggml_nrows       (const struct ggml_tensor * tensor);
-    GGML_API size_t  ggml_nbytes      (const struct ggml_tensor * tensor);
-    GGML_API size_t  ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
+    WSP_GGML_API int64_t wsp_ggml_nelements   (const struct wsp_ggml_tensor * tensor);
+    WSP_GGML_API int64_t wsp_ggml_nrows       (const struct wsp_ggml_tensor * tensor);
+    WSP_GGML_API size_t  wsp_ggml_nbytes      (const struct wsp_ggml_tensor * tensor);
+    WSP_GGML_API size_t  wsp_ggml_nbytes_split(const struct wsp_ggml_tensor * tensor, int nrows_split);
-    GGML_API int     ggml_blck_size (enum ggml_type type);
-    GGML_API size_t  ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
-    GGML_API float   ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
+    WSP_GGML_API int     wsp_ggml_blck_size (enum wsp_ggml_type type);
+    WSP_GGML_API size_t  wsp_ggml_type_size (enum wsp_ggml_type type); // size in bytes for all elements in a block
+    WSP_GGML_API float   wsp_ggml_type_sizef(enum wsp_ggml_type type); // wsp_ggml_type_size()/wsp_ggml_blck_size() as float
-    GGML_API const char * ggml_type_name(enum ggml_type type);
-    GGML_API const char * ggml_op_name  (enum ggml_op   op);
+    WSP_GGML_API const char * wsp_ggml_type_name(enum wsp_ggml_type type);
+    WSP_GGML_API const char * wsp_ggml_op_name  (enum wsp_ggml_op   op);
-    GGML_API size_t  ggml_element_size(const struct ggml_tensor * tensor);
+    WSP_GGML_API size_t  wsp_ggml_element_size(const struct wsp_ggml_tensor * tensor);
-    GGML_API bool    ggml_is_quantized(enum ggml_type type);
+    WSP_GGML_API bool    wsp_ggml_is_quantized(enum wsp_ggml_type type);
     // TODO: temporary until model loading of ggml examples is refactored
-    GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
+    WSP_GGML_API enum wsp_ggml_type wsp_ggml_ftype_to_wsp_ggml_type(enum wsp_ggml_ftype ftype);
-    GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
-    GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
-    GGML_API bool ggml_is_permuted  (const struct ggml_tensor * tensor);
+    WSP_GGML_API bool wsp_ggml_is_transposed(const struct wsp_ggml_tensor * tensor);
+    WSP_GGML_API bool wsp_ggml_is_contiguous(const struct wsp_ggml_tensor * tensor);
+    WSP_GGML_API bool wsp_ggml_is_permuted  (const struct wsp_ggml_tensor * tensor);
     // use this to compute the memory overhead of a tensor
-    GGML_API size_t ggml_tensor_overhead(void);
+    WSP_GGML_API size_t wsp_ggml_tensor_overhead(void);
     // main
-    GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
-    GGML_API void                  ggml_free(struct ggml_context * ctx);
+    WSP_GGML_API struct wsp_ggml_context * wsp_ggml_init(struct wsp_ggml_init_params params);
+    WSP_GGML_API void                  wsp_ggml_free(struct wsp_ggml_context * ctx);
-    GGML_API size_t  ggml_used_mem(const struct ggml_context * ctx);
+    WSP_GGML_API size_t  wsp_ggml_used_mem(const struct wsp_ggml_context * ctx);
-    GGML_API size_t  ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
-    GGML_API void    ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
+    WSP_GGML_API size_t  wsp_ggml_set_scratch (struct wsp_ggml_context * ctx, struct wsp_ggml_scratch scratch);
+    WSP_GGML_API void    wsp_ggml_set_no_alloc(struct wsp_ggml_context * ctx, bool no_alloc);
-    GGML_API void *  ggml_get_mem_buffer     (const struct ggml_context * ctx);
-    GGML_API size_t  ggml_get_mem_size       (const struct ggml_context * ctx);
-    GGML_API size_t  ggml_get_max_tensor_size(const struct ggml_context * ctx);
+    WSP_GGML_API void *  wsp_ggml_get_mem_buffer     (const struct wsp_ggml_context * ctx);
+    WSP_GGML_API size_t  wsp_ggml_get_mem_size       (const struct wsp_ggml_context * ctx);
+    WSP_GGML_API size_t  wsp_ggml_get_max_tensor_size(const struct wsp_ggml_context * ctx);
-    GGML_API struct ggml_tensor * ggml_new_tensor(
-            struct ggml_context * ctx,
-            enum   ggml_type type,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor(
+            struct wsp_ggml_context * ctx,
+            enum   wsp_ggml_type type,
             int    n_dims,
             const int64_t *ne);
-    GGML_API struct ggml_tensor * ggml_new_tensor_1d(
-            struct ggml_context * ctx,
-            enum   ggml_type type,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor_1d(
+            struct wsp_ggml_context * ctx,
+            enum   wsp_ggml_type type,
             int64_t ne0);
-    GGML_API struct ggml_tensor * ggml_new_tensor_2d(
-            struct ggml_context * ctx,
-            enum   ggml_type type,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor_2d(
+            struct wsp_ggml_context * ctx,
+            enum   wsp_ggml_type type,
             int64_t ne0,
             int64_t ne1);
-    GGML_API struct ggml_tensor * ggml_new_tensor_3d(
-            struct ggml_context * ctx,
-            enum   ggml_type type,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor_3d(
+            struct wsp_ggml_context * ctx,
+            enum   wsp_ggml_type type,
             int64_t ne0,
             int64_t ne1,
             int64_t ne2);
-    GGML_API struct ggml_tensor * ggml_new_tensor_4d(
-            struct ggml_context * ctx,
-            enum   ggml_type type,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor_4d(
+            struct wsp_ggml_context * ctx,
+            enum   wsp_ggml_type type,
             int64_t ne0,
             int64_t ne1,
             int64_t ne2,
             int64_t ne3);
-    GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
-    GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_i32(struct wsp_ggml_context * ctx, int32_t value);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_f32(struct wsp_ggml_context * ctx, float value);
-    GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
-    GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_dup_tensor (struct wsp_ggml_context * ctx, const struct wsp_ggml_tensor * src);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_tensor(struct wsp_ggml_context * ctx, const struct wsp_ggml_tensor * src);
-    GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_tensor(struct wsp_ggml_context * ctx, const char * name);
-    GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
-    GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
-    GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_zero(struct wsp_ggml_tensor * tensor);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_i32 (struct wsp_ggml_tensor * tensor, int32_t value);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_f32 (struct wsp_ggml_tensor * tensor, float value);
-    GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
-    GGML_API void    ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
+    WSP_GGML_API int32_t wsp_ggml_get_i32_1d(const struct wsp_ggml_tensor * tensor, int i);
+    WSP_GGML_API void    wsp_ggml_set_i32_1d(const struct wsp_ggml_tensor * tensor, int i, int32_t value);
-    GGML_API float   ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
-    GGML_API void    ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
+    WSP_GGML_API float   wsp_ggml_get_f32_1d(const struct wsp_ggml_tensor * tensor, int i);
+    WSP_GGML_API void    wsp_ggml_set_f32_1d(const struct wsp_ggml_tensor * tensor, int i, float value);
-    GGML_API void *  ggml_get_data    (const struct ggml_tensor * tensor);
-    GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
+    WSP_GGML_API void *  wsp_ggml_get_data    (const struct wsp_ggml_tensor * tensor);
+    WSP_GGML_API float * wsp_ggml_get_data_f32(const struct wsp_ggml_tensor * tensor);
-    GGML_API const char *         ggml_get_name(const struct ggml_tensor * tensor);
-    GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
-    GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
+    WSP_GGML_API const char *         wsp_ggml_get_name(const struct wsp_ggml_tensor * tensor);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_name(struct wsp_ggml_tensor * tensor, const char * name);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_format_name(struct wsp_ggml_tensor * tensor, const char * fmt, ...);
     //
     // operations on tensors with backpropagation
     //
-    GGML_API struct ggml_tensor * ggml_dup(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_add(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_add_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_add1(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_add1_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_acc(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_dup(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add1(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add1_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_acc(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
             size_t                nb1,
             size_t                nb2,
             size_t                nb3,
             size_t                offset);
-    GGML_API struct ggml_tensor * ggml_acc_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_acc_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
             size_t                nb1,
             size_t                nb2,
             size_t                nb3,
             size_t                offset);
-    GGML_API struct ggml_tensor * ggml_sub(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sub(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_sub_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sub_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_mul(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mul(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_mul_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mul_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_div(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_div(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_div_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_div_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_sqr(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sqr(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_sqr_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sqr_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_sqrt(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sqrt(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_sqrt_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sqrt_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_log(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_log(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_log_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_log_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     // return scalar
-    GGML_API struct ggml_tensor * ggml_sum(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sum(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     // sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
-    GGML_API struct ggml_tensor * ggml_sum_rows(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sum_rows(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     // mean along rows
-    GGML_API struct ggml_tensor * ggml_mean(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mean(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     // argmax along rows
-    GGML_API struct ggml_tensor * ggml_argmax(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_argmax(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     // if a is the same shape as b, and a is not parameter, return a
     // otherwise, return a new tensor: repeat(a) to fit in b
-    GGML_API struct ggml_tensor * ggml_repeat(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_repeat(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_repeat_back(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_repeat_back(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_abs(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_abs(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_abs_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_abs_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_sgn(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sgn(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_sgn_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sgn_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_neg(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_neg(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_neg_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_neg_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_step(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_step(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_step_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_step_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_tanh(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_tanh(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_tanh_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_tanh_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_elu(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_elu(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_elu_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_elu_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_relu(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_relu(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_relu_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_relu_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     // TODO: double-check this computation is correct
-    GGML_API struct ggml_tensor * ggml_gelu(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gelu(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_gelu_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gelu_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_gelu_quick(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gelu_quick(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gelu_quick_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_silu(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_silu(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_silu_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_silu_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     // a - x
     // b - dy
-    GGML_API struct ggml_tensor * ggml_silu_back(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_silu_back(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
     // normalize along rows
     // TODO: eps is hardcoded to 1e-5 for now
-    GGML_API struct ggml_tensor * ggml_norm(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_norm(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_norm_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_norm_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_rms_norm(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rms_norm(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rms_norm_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     // a - x
     // b - dy
-    GGML_API struct ggml_tensor * ggml_rms_norm_back(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rms_norm_back(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
     // A: n columns, m rows
     // B: n columns, p rows  (i.e. we transpose it internally)
     // result is m columns, p rows
-    GGML_API struct ggml_tensor * ggml_mul_mat(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mul_mat(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
     // A: m columns, n rows,
     // B: p columns, n rows,
     // result is m columns, p rows
-    GGML_API struct ggml_tensor * ggml_out_prod(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_out_prod(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
     //
     // operations on tensors without backpropagation
     //
-    GGML_API struct ggml_tensor * ggml_scale(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_scale(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
     // in-place, returns view(a)
-    GGML_API struct ggml_tensor * ggml_scale_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_scale_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
     // b -> view(a,offset,nb1,nb2,3), return modified a
-    GGML_API struct ggml_tensor * ggml_set(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
             size_t                nb1,
             size_t                nb2,
             size_t                nb3,
             size_t                offset);
     // b -> view(a,offset,nb1,nb2,3), return view(a)
-    GGML_API struct ggml_tensor * ggml_set_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
             size_t                nb1,
             size_t                nb2,
             size_t                nb3,
             size_t                offset);
-    GGML_API struct ggml_tensor * ggml_set_1d(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_1d(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
             size_t                offset);
-    GGML_API struct ggml_tensor * ggml_set_1d_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_1d_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
             size_t                offset);
     // b -> view(a,offset,nb1,nb2,3), return modified a
-    GGML_API struct ggml_tensor * ggml_set_2d(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_2d(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
             size_t                nb1,
             size_t                offset);
     // b -> view(a,offset,nb1,nb2,3), return view(a)
-    GGML_API struct ggml_tensor * ggml_set_2d_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_2d_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
             size_t                nb1,
             size_t                offset);
     // a -> b, return view(b)
-    GGML_API struct ggml_tensor * ggml_cpy(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cpy(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
     // make contiguous
-    GGML_API struct ggml_tensor * ggml_cont(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cont(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     // return view(a), b specifies the new shape
     // TODO: when we start computing gradient, make a copy instead of view
-    GGML_API struct ggml_tensor * ggml_reshape(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
     // return view(a)
     // TODO: when we start computing gradient, make a copy instead of view
-    GGML_API struct ggml_tensor * ggml_reshape_1d(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape_1d(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int64_t               ne0);
-    GGML_API struct ggml_tensor * ggml_reshape_2d(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape_2d(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int64_t               ne0,
             int64_t               ne1);
     // return view(a)
     // TODO: when we start computing gradient, make a copy instead of view
-    GGML_API struct ggml_tensor * ggml_reshape_3d(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape_3d(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int64_t               ne0,
             int64_t               ne1,
             int64_t               ne2);
-    GGML_API struct ggml_tensor * ggml_reshape_4d(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape_4d(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int64_t               ne0,
             int64_t               ne1,
             int64_t               ne2,
             int64_t               ne3);
     // offset in bytes
-    GGML_API struct ggml_tensor * ggml_view_1d(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_1d(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int64_t               ne0,
             size_t                offset);
-    GGML_API struct ggml_tensor * ggml_view_2d(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_2d(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int64_t               ne0,
             int64_t               ne1,
             size_t                nb1, // row stride in bytes
             size_t                offset);
-    GGML_API struct ggml_tensor * ggml_view_3d(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_3d(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int64_t               ne0,
             int64_t               ne1,
             int64_t               ne2,
@@ -1001,9 +1001,9 @@ extern "C" {
             size_t                nb2, // slice stride in bytes
             size_t                offset);
-    GGML_API struct ggml_tensor * ggml_view_4d(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_4d(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int64_t               ne0,
             int64_t               ne1,
             int64_t               ne2,
@@ -1013,95 +1013,95 @@ extern "C" {
             size_t                nb3,
             size_t                offset);
-    GGML_API struct ggml_tensor * ggml_permute(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_permute(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int                   axis0,
             int                   axis1,
             int                   axis2,
             int                   axis3);
-    // alias for ggml_permute(ctx, a, 1, 0, 2, 3)
-    GGML_API struct ggml_tensor * ggml_transpose(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    // alias for wsp_ggml_permute(ctx, a, 1, 0, 2, 3)
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_transpose(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_get_rows(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_rows(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
-    GGML_API struct ggml_tensor * ggml_get_rows_back(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
-            struct ggml_tensor  * c);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_rows_back(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
+            struct wsp_ggml_tensor  * c);
-    GGML_API struct ggml_tensor * ggml_diag(
-        struct ggml_context     * ctx,
-        struct ggml_tensor      * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag(
+        struct wsp_ggml_context     * ctx,
+        struct wsp_ggml_tensor      * a);
     // set elements above the diagonal to -INF
-    GGML_API struct ggml_tensor * ggml_diag_mask_inf(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag_mask_inf(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int                   n_past);
     // in-place, returns view(a)
-    GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag_mask_inf_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int                   n_past);
     // set elements above the diagonal to 0
-    GGML_API struct ggml_tensor * ggml_diag_mask_zero(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag_mask_zero(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int                   n_past);
     // in-place, returns view(a)
-    GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag_mask_zero_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int                   n_past);
-    GGML_API struct ggml_tensor * ggml_soft_max(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     // in-place, returns view(a)
-    GGML_API struct ggml_tensor * ggml_soft_max_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
-    GGML_API struct ggml_tensor * ggml_soft_max_back(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_back(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
     // in-place, returns view(a)
-    GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_back_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
     // rotary position embedding
     // if mode & 1 == 1, skip n_past elements
     // if mode & 2 == 1, GPT-NeoX style
     // if mode & 4 == 1, ChatGLM style
     // TODO: avoid creating a new tensor every time
-    GGML_API struct ggml_tensor * ggml_rope(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int                   n_past,
             int                   n_dims,
             int                   mode,
             int                   n_ctx);
     // in-place, returns view(a)
-    GGML_API struct ggml_tensor * ggml_rope_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int                   n_past,
             int                   n_dims,
             int                   mode,
@@ -1109,42 +1109,42 @@ extern "C" {
     // rotary position embedding backward, i.e compute dx from dy
     // a - dy
-    GGML_API struct ggml_tensor * ggml_rope_back(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_back(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int                   n_past,
             int                   n_dims,
             int                   mode);
     // alibi position embedding
     // in-place, returns view(a)
-    struct ggml_tensor * ggml_alibi(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    struct wsp_ggml_tensor * wsp_ggml_alibi(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int                   n_past,
             int                   n_head,
             float                 bias_max);
     // clamp
     // in-place, returns view(a)
-    struct ggml_tensor * ggml_clamp(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    struct wsp_ggml_tensor * wsp_ggml_clamp(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             float                 min,
             float                 max);
-    GGML_API struct ggml_tensor * ggml_conv_1d(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_1d(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
             int                   s0,  // stride
             int                   p0,  // padding
             int                   d0); // dilation
-    GGML_API struct ggml_tensor * ggml_conv_2d(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_2d(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
             int                   s0,
             int                   s1,
             int                   p0,
@@ -1153,36 +1153,36 @@ extern "C" {
             int                   d1);
     // conv_1d with padding = half
-    // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
-    GGML_API struct ggml_tensor* ggml_conv_1d_ph(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b,
+    // alias for wsp_ggml_conv_1d(a, b, s, a->ne[0]/2, d)
+    WSP_GGML_API struct wsp_ggml_tensor* wsp_ggml_conv_1d_ph(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
             int                   s,
             int                   d);
-    GGML_API struct ggml_tensor * ggml_flash_attn(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * q,
-            struct ggml_tensor  * k,
-            struct ggml_tensor  * v,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_attn(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * q,
+            struct wsp_ggml_tensor  * k,
+            struct wsp_ggml_tensor  * v,
             bool                  masked);
-    GGML_API struct ggml_tensor * ggml_flash_attn_back(
-           struct ggml_context * ctx,
-           struct ggml_tensor  * q,
-           struct ggml_tensor  * k,
-           struct ggml_tensor  * v,
-           struct ggml_tensor  * d,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_attn_back(
+           struct wsp_ggml_context * ctx,
+           struct wsp_ggml_tensor  * q,
+           struct wsp_ggml_tensor  * k,
+           struct wsp_ggml_tensor  * v,
+           struct wsp_ggml_tensor  * d,
            bool                  masked);
-    GGML_API struct ggml_tensor * ggml_flash_ff(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b0,
-            struct ggml_tensor  * b1,
-            struct ggml_tensor  * c0,
-            struct ggml_tensor  * c1);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_ff(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b0,
+            struct wsp_ggml_tensor  * b1,
+            struct wsp_ggml_tensor  * c0,
+            struct wsp_ggml_tensor  * c1);
     // partition into non-overlapping windows with padding if needed
     // example:
@@ -1190,167 +1190,167 @@ extern "C" {
     // w:    14
     // res: 768   14   14    25
     // used in sam
-    GGML_API struct ggml_tensor * ggml_win_part(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_win_part(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int                   w);
-    // reverse of ggml_win_part
+    // reverse of wsp_ggml_win_part
     // used in sam
-    GGML_API struct ggml_tensor * ggml_win_unpart(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_win_unpart(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
             int                   w0,
             int                   h0,
             int                   w);
     // custom operators
-    typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
-    typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
-    typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
-    typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
-    typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
-    GGML_API struct ggml_tensor * ggml_map_unary_f32(
-            struct ggml_context        * ctx,
-            struct ggml_tensor         * a,
-                   ggml_unary_op_f32_t   fun);
-    GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
-            struct ggml_context        * ctx,
-            struct ggml_tensor         * a,
-                   ggml_unary_op_f32_t   fun);
-    GGML_API struct ggml_tensor * ggml_map_binary_f32(
-            struct ggml_context         * ctx,
-            struct ggml_tensor          * a,
-            struct ggml_tensor          * b,
-                   ggml_binary_op_f32_t   fun);
-    GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
-            struct ggml_context         * ctx,
-            struct ggml_tensor          * a,
-            struct ggml_tensor          * b,
-                   ggml_binary_op_f32_t   fun);
-    GGML_API struct ggml_tensor * ggml_map_custom1_f32(
-            struct ggml_context          * ctx,
-            struct ggml_tensor           * a,
-                   ggml_custom1_op_f32_t   fun);
-    GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
-            struct ggml_context          * ctx,
-            struct ggml_tensor           * a,
-                   ggml_custom1_op_f32_t   fun);
-    GGML_API struct ggml_tensor * ggml_map_custom2_f32(
-            struct ggml_context          * ctx,
-            struct ggml_tensor           * a,
-            struct ggml_tensor           * b,
-                   ggml_custom2_op_f32_t   fun);
-    GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
-            struct ggml_context          * ctx,
-            struct ggml_tensor           * a,
-            struct ggml_tensor           * b,
-                   ggml_custom2_op_f32_t   fun);
-    GGML_API struct ggml_tensor * ggml_map_custom3_f32(
-            struct ggml_context          * ctx,
-            struct ggml_tensor           * a,
-            struct ggml_tensor           * b,
-            struct ggml_tensor           * c,
-                   ggml_custom3_op_f32_t   fun);
-    GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
-            struct ggml_context          * ctx,
-            struct ggml_tensor           * a,
-            struct ggml_tensor           * b,
-            struct ggml_tensor           * c,
-                   ggml_custom3_op_f32_t   fun);
+    typedef void (*wsp_ggml_unary_op_f32_t) (const int, float *, const float *);
+    typedef void (*wsp_ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
+    typedef void (*wsp_ggml_custom1_op_f32_t)(struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *);
+    typedef void (*wsp_ggml_custom2_op_f32_t)(struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *);
+    typedef void (*wsp_ggml_custom3_op_f32_t)(struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_unary_f32(
+            struct wsp_ggml_context        * ctx,
+            struct wsp_ggml_tensor         * a,
+                   wsp_ggml_unary_op_f32_t   fun);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_unary_inplace_f32(
+            struct wsp_ggml_context        * ctx,
+            struct wsp_ggml_tensor         * a,
+                   wsp_ggml_unary_op_f32_t   fun);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_binary_f32(
+            struct wsp_ggml_context         * ctx,
+            struct wsp_ggml_tensor          * a,
+            struct wsp_ggml_tensor          * b,
+                   wsp_ggml_binary_op_f32_t   fun);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_binary_inplace_f32(
+            struct wsp_ggml_context         * ctx,
+            struct wsp_ggml_tensor          * a,
+            struct wsp_ggml_tensor          * b,
+                   wsp_ggml_binary_op_f32_t   fun);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom1_f32(
+            struct wsp_ggml_context          * ctx,
+            struct wsp_ggml_tensor           * a,
+                   wsp_ggml_custom1_op_f32_t   fun);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom1_inplace_f32(
+            struct wsp_ggml_context          * ctx,
+            struct wsp_ggml_tensor           * a,
+                   wsp_ggml_custom1_op_f32_t   fun);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom2_f32(
+            struct wsp_ggml_context          * ctx,
+            struct wsp_ggml_tensor           * a,
+            struct wsp_ggml_tensor           * b,
+                   wsp_ggml_custom2_op_f32_t   fun);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom2_inplace_f32(
+            struct wsp_ggml_context          * ctx,
+            struct wsp_ggml_tensor           * a,
+            struct wsp_ggml_tensor           * b,
+                   wsp_ggml_custom2_op_f32_t   fun);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom3_f32(
+            struct wsp_ggml_context          * ctx,
+            struct wsp_ggml_tensor           * a,
+            struct wsp_ggml_tensor           * b,
+            struct wsp_ggml_tensor           * c,
+                   wsp_ggml_custom3_op_f32_t   fun);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom3_inplace_f32(
+            struct wsp_ggml_context          * ctx,
+            struct wsp_ggml_tensor           * a,
+            struct wsp_ggml_tensor           * b,
+            struct wsp_ggml_tensor           * c,
+                   wsp_ggml_custom3_op_f32_t   fun);
     // loss function
-    GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
-            struct ggml_context         * ctx,
-            struct ggml_tensor          * a,
-            struct ggml_tensor          * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cross_entropy_loss(
+            struct wsp_ggml_context         * ctx,
+            struct wsp_ggml_tensor          * a,
+            struct wsp_ggml_tensor          * b);
-    GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
-            struct ggml_context         * ctx,
-            struct ggml_tensor          * a,
-            struct ggml_tensor          * b,
-            struct ggml_tensor          * c);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cross_entropy_loss_back(
+            struct wsp_ggml_context         * ctx,
+            struct wsp_ggml_tensor          * a,
+            struct wsp_ggml_tensor          * b,
+            struct wsp_ggml_tensor          * c);
     //
     // automatic differentiation
     //
-    GGML_API void ggml_set_param(
-            struct ggml_context * ctx,
-            struct ggml_tensor * tensor);
+    WSP_GGML_API void wsp_ggml_set_param(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor * tensor);
-    GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
+    WSP_GGML_API void wsp_ggml_build_forward_expand(struct wsp_ggml_cgraph * cgraph, struct wsp_ggml_tensor * tensor);
-    GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
-    GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
+    WSP_GGML_API struct wsp_ggml_cgraph wsp_ggml_build_forward (struct wsp_ggml_tensor * tensor);
+    WSP_GGML_API struct wsp_ggml_cgraph wsp_ggml_build_backward(struct wsp_ggml_context * ctx, struct wsp_ggml_cgraph * gf, bool keep);
-    GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
-    GGML_API void ggml_graph_reset  (struct ggml_cgraph * cgraph);
+    WSP_GGML_API void wsp_ggml_graph_compute(struct wsp_ggml_context * ctx, struct wsp_ggml_cgraph * cgraph);
+    WSP_GGML_API void wsp_ggml_graph_reset  (struct wsp_ggml_cgraph * cgraph);
-    GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_graph_get_tensor(struct wsp_ggml_cgraph * cgraph, const char * name);
-    GGML_API void               ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
-    GGML_API struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
+    WSP_GGML_API void               wsp_ggml_graph_export(const struct wsp_ggml_cgraph * cgraph, const char * fname);
+    WSP_GGML_API struct wsp_ggml_cgraph wsp_ggml_graph_import(const char * fname, struct wsp_ggml_context ** ctx_data, struct wsp_ggml_context ** ctx_eval);
     // print info and performance information for the graph
-    GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
+    WSP_GGML_API void wsp_ggml_graph_print(const struct wsp_ggml_cgraph * cgraph);
     // dump the graph into a file using the dot format
-    GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
+    WSP_GGML_API void wsp_ggml_graph_dump_dot(const struct wsp_ggml_cgraph * gb, const struct wsp_ggml_cgraph * gf, const char * filename);
     //
     // optimization
     //
     // optimization methods
-    enum ggml_opt_type {
-        GGML_OPT_ADAM,
-        GGML_OPT_LBFGS,
+    enum wsp_ggml_opt_type {
+        WSP_GGML_OPT_ADAM,
+        WSP_GGML_OPT_LBFGS,
     };
     // linesearch methods
-    enum ggml_linesearch {
-        GGML_LINESEARCH_DEFAULT = 1,
+    enum wsp_ggml_linesearch {
+        WSP_GGML_LINESEARCH_DEFAULT = 1,
-        GGML_LINESEARCH_BACKTRACKING_ARMIJO       = 0,
-        GGML_LINESEARCH_BACKTRACKING_WOLFE        = 1,
-        GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
+        WSP_GGML_LINESEARCH_BACKTRACKING_ARMIJO       = 0,
+        WSP_GGML_LINESEARCH_BACKTRACKING_WOLFE        = 1,
+        WSP_GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
     };
     // optimization return values
-    enum ggml_opt_result {
-        GGML_OPT_OK = 0,
-        GGML_OPT_DID_NOT_CONVERGE,
-        GGML_OPT_NO_CONTEXT,
-        GGML_OPT_INVALID_WOLFE,
-        GGML_OPT_FAIL,
-        GGML_LINESEARCH_FAIL = -128,
-        GGML_LINESEARCH_MINIMUM_STEP,
-        GGML_LINESEARCH_MAXIMUM_STEP,
-        GGML_LINESEARCH_MAXIMUM_ITERATIONS,
-        GGML_LINESEARCH_INVALID_PARAMETERS,
+    enum wsp_ggml_opt_result {
+        WSP_GGML_OPT_OK = 0,
+        WSP_GGML_OPT_DID_NOT_CONVERGE,
+        WSP_GGML_OPT_NO_CONTEXT,
+        WSP_GGML_OPT_INVALID_WOLFE,
+        WSP_GGML_OPT_FAIL,
+        WSP_GGML_LINESEARCH_FAIL = -128,
+        WSP_GGML_LINESEARCH_MINIMUM_STEP,
+        WSP_GGML_LINESEARCH_MAXIMUM_STEP,
+        WSP_GGML_LINESEARCH_MAXIMUM_ITERATIONS,
+        WSP_GGML_LINESEARCH_INVALID_PARAMETERS,
     };
     // optimization parameters
     //
-    //   see ggml.c (ggml_opt_default_params) for default values
+    //   see ggml.c (wsp_ggml_opt_default_params) for default values
     //
-    struct ggml_opt_params {
-        enum ggml_opt_type type;
+    struct wsp_ggml_opt_params {
+        enum wsp_ggml_opt_type type;
         int n_threads;
@@ -1400,13 +1400,13 @@ extern "C" {
             float min_step;
             float max_step;
-            enum ggml_linesearch linesearch;
+            enum wsp_ggml_linesearch linesearch;
         } lbfgs;
     };
-    struct ggml_opt_context {
-        struct ggml_context * ctx;
-        struct ggml_opt_params params;
+    struct wsp_ggml_opt_context {
+        struct wsp_ggml_context * ctx;
+        struct wsp_ggml_opt_params params;
         int iter;
         int64_t nx; // number of parameter elements
@@ -1414,30 +1414,30 @@ extern "C" {
         bool just_initialized;
         struct {
-            struct ggml_tensor * x;  // view of the parameters
-            struct ggml_tensor * g1; // gradient
-            struct ggml_tensor * g2; // gradient squared
-            struct ggml_tensor * m;  // first moment
-            struct ggml_tensor * v;  // second moment
-            struct ggml_tensor * mh; // first moment hat
-            struct ggml_tensor * vh; // second moment hat
-            struct ggml_tensor * pf; // past function values
+            struct wsp_ggml_tensor * x;  // view of the parameters
+            struct wsp_ggml_tensor * g1; // gradient
+            struct wsp_ggml_tensor * g2; // gradient squared
+            struct wsp_ggml_tensor * m;  // first moment
+            struct wsp_ggml_tensor * v;  // second moment
+            struct wsp_ggml_tensor * mh; // first moment hat
+            struct wsp_ggml_tensor * vh; // second moment hat
+            struct wsp_ggml_tensor * pf; // past function values
             float fx_best;
             float fx_prev;
             int n_no_improvement;
         } adam;
         struct {
-            struct ggml_tensor * x;    // current parameters
-            struct ggml_tensor * xp;   // previous parameters
-            struct ggml_tensor * g;    // current gradient
-            struct ggml_tensor * gp;   // previous gradient
-            struct ggml_tensor * d;    // search direction
-            struct ggml_tensor * pf;   // past function values
-            struct ggml_tensor * lmal; // the L-BFGS memory alpha
-            struct ggml_tensor * lmys; // the L-BFGS memory ys
-            struct ggml_tensor * lms;  // the L-BFGS memory s
-            struct ggml_tensor * lmy;  // the L-BFGS memory y
+            struct wsp_ggml_tensor * x;    // current parameters
+            struct wsp_ggml_tensor * xp;   // previous parameters
+            struct wsp_ggml_tensor * g;    // current gradient
+            struct wsp_ggml_tensor * gp;   // previous gradient
+            struct wsp_ggml_tensor * d;    // search direction
+            struct wsp_ggml_tensor * pf;   // past function values
+            struct wsp_ggml_tensor * lmal; // the L-BFGS memory alpha
+            struct wsp_ggml_tensor * lmys; // the L-BFGS memory ys
+            struct wsp_ggml_tensor * lms;  // the L-BFGS memory s
+            struct wsp_ggml_tensor * lmy;  // the L-BFGS memory y
             float fx_best;
             float step;
             int j;
@@ -1447,68 +1447,68 @@ extern "C" {
         } lbfgs;
     };
-    GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
+    WSP_GGML_API struct wsp_ggml_opt_params wsp_ggml_opt_default_params(enum wsp_ggml_opt_type type);
     // optimize the function defined by the tensor f
-    GGML_API enum ggml_opt_result ggml_opt(
-            struct ggml_context * ctx,
-            struct ggml_opt_params params,
-            struct ggml_tensor * f);
+    WSP_GGML_API enum wsp_ggml_opt_result wsp_ggml_opt(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_opt_params params,
+            struct wsp_ggml_tensor * f);
     // initialize optimizer context
-    GGML_API void ggml_opt_init(
-            struct ggml_context * ctx,
-            struct ggml_opt_context * opt,
-            struct ggml_opt_params params,
+    WSP_GGML_API void wsp_ggml_opt_init(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_opt_context * opt,
+            struct wsp_ggml_opt_params params,
             int64_t nx);
     // continue optimizing the function defined by the tensor f
-    GGML_API enum ggml_opt_result ggml_opt_resume(
-            struct ggml_context * ctx,
-            struct ggml_opt_context * opt,
-            struct ggml_tensor * f);
+    WSP_GGML_API enum wsp_ggml_opt_result wsp_ggml_opt_resume(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_opt_context * opt,
+            struct wsp_ggml_tensor * f);
     // continue optimizing the function defined by the tensor f
-    GGML_API enum ggml_opt_result ggml_opt_resume_g(
-            struct ggml_context * ctx,
-            struct ggml_opt_context * opt,
-            struct ggml_tensor * f,
-            struct ggml_cgraph * gf,
-            struct ggml_cgraph * gb);
+    WSP_GGML_API enum wsp_ggml_opt_result wsp_ggml_opt_resume_g(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_opt_context * opt,
+            struct wsp_ggml_tensor * f,
+            struct wsp_ggml_cgraph * gf,
+            struct wsp_ggml_cgraph * gb);
     //
     // quantization
     //
-    GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
-    GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
-    GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
-    GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
-    GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
+    WSP_GGML_API size_t wsp_ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
+    WSP_GGML_API size_t wsp_ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
+    WSP_GGML_API size_t wsp_ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
+    WSP_GGML_API size_t wsp_ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
+    WSP_GGML_API size_t wsp_ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
-    GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
+    WSP_GGML_API size_t wsp_ggml_quantize_chunk(enum wsp_ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
     //
     // system info
     //
-    GGML_API int ggml_cpu_has_avx        (void);
-    GGML_API int ggml_cpu_has_avx2       (void);
-    GGML_API int ggml_cpu_has_avx512     (void);
-    GGML_API int ggml_cpu_has_avx512_vbmi(void);
-    GGML_API int ggml_cpu_has_avx512_vnni(void);
-    GGML_API int ggml_cpu_has_fma        (void);
-    GGML_API int ggml_cpu_has_neon       (void);
-    GGML_API int ggml_cpu_has_arm_fma    (void);
-    GGML_API int ggml_cpu_has_f16c       (void);
-    GGML_API int ggml_cpu_has_fp16_va    (void);
-    GGML_API int ggml_cpu_has_wasm_simd  (void);
-    GGML_API int ggml_cpu_has_blas       (void);
-    GGML_API int ggml_cpu_has_cublas     (void);
-    GGML_API int ggml_cpu_has_clblast    (void);
-    GGML_API int ggml_cpu_has_gpublas    (void);
-    GGML_API int ggml_cpu_has_sse3       (void);
-    GGML_API int ggml_cpu_has_vsx        (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_avx        (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_avx2       (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_avx512     (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_avx512_vbmi(void);
+    WSP_GGML_API int wsp_ggml_cpu_has_avx512_vnni(void);
+    WSP_GGML_API int wsp_ggml_cpu_has_fma        (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_neon       (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_arm_fma    (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_f16c       (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_fp16_va    (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_wasm_simd  (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_blas       (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_cublas     (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_clblast    (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_gpublas    (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_sse3       (void);
+    WSP_GGML_API int wsp_ggml_cpu_has_vsx        (void);
     //
     // Internal types and functions exposed for tests and benchmarks
@@ -1516,13 +1516,13 @@ extern "C" {
 #ifdef  __cplusplus
     // restrict not standard in C++
-#define GGML_RESTRICT
+#define WSP_GGML_RESTRICT
 #else
-#define GGML_RESTRICT restrict
+#define WSP_GGML_RESTRICT restrict
 #endif
-    typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
-    typedef void (*quantize_row_q_t)  (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
-    typedef void (*vec_dot_q_t)       (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
+    typedef void (*dequantize_row_q_t)(const void * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int k);
+    typedef void (*quantize_row_q_t)  (const float * WSP_GGML_RESTRICT x, void * WSP_GGML_RESTRICT y, int k);
+    typedef void (*vec_dot_q_t)       (const int n, float * WSP_GGML_RESTRICT s, const void * WSP_GGML_RESTRICT x, const void * WSP_GGML_RESTRICT y);
     typedef struct {
         dequantize_row_q_t dequantize_row_q;
@@ -1530,10 +1530,10 @@ extern "C" {
         quantize_row_q_t   quantize_row_q_reference;
         quantize_row_q_t   quantize_row_q_dot;
         vec_dot_q_t        vec_dot_q;
-        enum ggml_type     vec_dot_type;
+        enum wsp_ggml_type     vec_dot_type;
     } quantize_fns_t;
-    quantize_fns_t ggml_internal_get_quantize_fn(size_t i);
+    quantize_fns_t wsp_ggml_internal_get_quantize_fn(size_t i);
 #ifdef  __cplusplus
 }