llama_cpp 0.16.2 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/README.md +7 -12
  4. data/ext/llama_cpp/extconf.rb +2 -43
  5. data/ext/llama_cpp/llama_cpp.cpp +8 -0
  6. data/lib/llama_cpp/version.rb +3 -3
  7. data/sig/llama_cpp.rbs +3 -0
  8. metadata +2 -171
  9. data/vendor/include/.gitkeep +0 -0
  10. data/vendor/lib/.gitkeep +0 -0
  11. data/vendor/tmp/llama.cpp/LICENSE +0 -21
  12. data/vendor/tmp/llama.cpp/Makefile +0 -1124
  13. data/vendor/tmp/llama.cpp/ggml-alloc.c +0 -1041
  14. data/vendor/tmp/llama.cpp/ggml-alloc.h +0 -76
  15. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +0 -153
  16. data/vendor/tmp/llama.cpp/ggml-backend.c +0 -2225
  17. data/vendor/tmp/llama.cpp/ggml-backend.h +0 -236
  18. data/vendor/tmp/llama.cpp/ggml-blas.cpp +0 -363
  19. data/vendor/tmp/llama.cpp/ggml-blas.h +0 -23
  20. data/vendor/tmp/llama.cpp/ggml-common.h +0 -1805
  21. data/vendor/tmp/llama.cpp/ggml-cuda/acc.cu +0 -47
  22. data/vendor/tmp/llama.cpp/ggml-cuda/arange.cu +0 -34
  23. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +0 -104
  24. data/vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu +0 -280
  25. data/vendor/tmp/llama.cpp/ggml-cuda/clamp.cu +0 -34
  26. data/vendor/tmp/llama.cpp/ggml-cuda/concat.cu +0 -196
  27. data/vendor/tmp/llama.cpp/ggml-cuda/convert.cu +0 -686
  28. data/vendor/tmp/llama.cpp/ggml-cuda/cpy.cu +0 -490
  29. data/vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu +0 -40
  30. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +0 -674
  31. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +0 -319
  32. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu +0 -312
  33. data/vendor/tmp/llama.cpp/ggml-cuda/fattn.cu +0 -345
  34. data/vendor/tmp/llama.cpp/ggml-cuda/getrows.cu +0 -178
  35. data/vendor/tmp/llama.cpp/ggml-cuda/im2col.cu +0 -104
  36. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +0 -88
  37. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +0 -419
  38. data/vendor/tmp/llama.cpp/ggml-cuda/norm.cu +0 -221
  39. data/vendor/tmp/llama.cpp/ggml-cuda/pad.cu +0 -49
  40. data/vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu +0 -94
  41. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +0 -112
  42. data/vendor/tmp/llama.cpp/ggml-cuda/rope.cu +0 -271
  43. data/vendor/tmp/llama.cpp/ggml-cuda/scale.cu +0 -31
  44. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +0 -206
  45. data/vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu +0 -40
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
  123. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
  124. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
  125. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
  126. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
  127. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
  128. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
  129. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
  130. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
  131. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
  132. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +0 -10
  133. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +0 -9
  134. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +0 -10
  135. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +0 -10
  136. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +0 -8
  137. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +0 -5
  138. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +0 -5
  139. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +0 -5
  140. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +0 -5
  141. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +0 -5
  142. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +0 -5
  143. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +0 -5
  144. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +0 -5
  145. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +0 -5
  146. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +0 -5
  147. data/vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu +0 -47
  148. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +0 -314
  149. data/vendor/tmp/llama.cpp/ggml-cuda/upscale.cu +0 -51
  150. data/vendor/tmp/llama.cpp/ggml-cuda.cu +0 -3069
  151. data/vendor/tmp/llama.cpp/ggml-cuda.h +0 -44
  152. data/vendor/tmp/llama.cpp/ggml-impl.h +0 -651
  153. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +0 -2038
  154. data/vendor/tmp/llama.cpp/ggml-kompute.h +0 -46
  155. data/vendor/tmp/llama.cpp/ggml-metal.h +0 -66
  156. data/vendor/tmp/llama.cpp/ggml-metal.m +0 -3273
  157. data/vendor/tmp/llama.cpp/ggml-metal.metal +0 -6540
  158. data/vendor/tmp/llama.cpp/ggml-quants.c +0 -14994
  159. data/vendor/tmp/llama.cpp/ggml-quants.h +0 -133
  160. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +0 -1178
  161. data/vendor/tmp/llama.cpp/ggml-rpc.h +0 -24
  162. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +0 -6351
  163. data/vendor/tmp/llama.cpp/ggml-sycl.h +0 -40
  164. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +0 -144508
  165. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +0 -7183
  166. data/vendor/tmp/llama.cpp/ggml-vulkan.h +0 -29
  167. data/vendor/tmp/llama.cpp/ggml.c +0 -22506
  168. data/vendor/tmp/llama.cpp/ggml.h +0 -2458
  169. data/vendor/tmp/llama.cpp/llama.cpp +0 -18985
  170. data/vendor/tmp/llama.cpp/llama.h +0 -1147
  171. data/vendor/tmp/llama.cpp/scripts/get-flags.mk +0 -38
  172. data/vendor/tmp/llama.cpp/sgemm.cpp +0 -1032
  173. data/vendor/tmp/llama.cpp/sgemm.h +0 -14
  174. data/vendor/tmp/llama.cpp/unicode-data.cpp +0 -7033
  175. data/vendor/tmp/llama.cpp/unicode-data.h +0 -20
  176. data/vendor/tmp/llama.cpp/unicode.cpp +0 -810
  177. data/vendor/tmp/llama.cpp/unicode.h +0 -63
@@ -1,44 +0,0 @@
1
- #pragma once
2
-
3
- #include "ggml.h"
4
- #include "ggml-backend.h"
5
-
6
- #ifdef GGML_USE_HIPBLAS
7
- #define GGML_CUDA_NAME "ROCm"
8
- #define GGML_CUBLAS_NAME "hipBLAS"
9
- #else
10
- #define GGML_CUDA_NAME "CUDA"
11
- #define GGML_CUBLAS_NAME "cuBLAS"
12
- #endif
13
-
14
- #ifdef __cplusplus
15
- extern "C" {
16
- #endif
17
-
18
- #define GGML_CUDA_MAX_DEVICES 16
19
-
20
- // backend API
21
- GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device);
22
-
23
- GGML_API GGML_CALL bool ggml_backend_is_cuda(ggml_backend_t backend);
24
-
25
- // device buffer
26
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
27
-
28
- // split tensor buffer that splits matrices by rows across multiple devices
29
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
30
-
31
- // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
32
- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
33
-
34
- GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void);
35
- GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
36
- GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
37
-
38
- GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
39
- GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer);
40
-
41
- GGML_API void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data);
42
- #ifdef __cplusplus
43
- }
44
- #endif
@@ -1,651 +0,0 @@
1
- #pragma once
2
-
3
- #include "ggml.h"
4
-
5
- // GGML internal header
6
-
7
- #include <assert.h>
8
- #include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
9
- #include <stddef.h>
10
- #include <stdbool.h>
11
- #include <string.h> // memcpy
12
- #include <math.h> // fabsf
13
-
14
- #undef MIN
15
- #undef MAX
16
-
17
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
18
- #define MAX(a, b) ((a) > (b) ? (a) : (b))
19
-
20
- #if defined(_MSC_VER)
21
-
22
- #define m512bh(p) p
23
- #define m512i(p) p
24
-
25
- #else
26
-
27
- #define m512bh(p) (__m512bh)(p)
28
- #define m512i(p) (__m512i)(p)
29
-
30
- #endif
31
-
32
- /**
33
- * Converts brain16 to float32.
34
- *
35
- * The bfloat16 floating point format has the following structure:
36
- *
37
- * ┌sign
38
- * │
39
- * │ ┌exponent
40
- * │ │
41
- * │ │ ┌mantissa
42
- * │ │ │
43
- * │┌──┴───┐┌─┴───┐
44
- * 0b0000000000000000 brain16
45
- *
46
- * Since bf16 has the same number of exponent bits as a 32bit float,
47
- * encoding and decoding numbers becomes relatively straightforward.
48
- *
49
- * ┌sign
50
- * │
51
- * │ ┌exponent
52
- * │ │
53
- * │ │ ┌mantissa
54
- * │ │ │
55
- * │┌──┴───┐┌─┴───────────────────┐
56
- * 0b00000000000000000000000000000000 IEEE binary32
57
- *
58
- * For comparison, the standard fp16 format has fewer exponent bits.
59
- *
60
- * ┌sign
61
- * │
62
- * │ ┌exponent
63
- * │ │
64
- * │ │ ┌mantissa
65
- * │ │ │
66
- * │┌─┴─┐┌─┴──────┐
67
- * 0b0000000000000000 IEEE binary16
68
- *
69
- * @see IEEE 754-2008
70
- */
71
- static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
72
- union {
73
- float f;
74
- uint32_t i;
75
- } u;
76
- u.i = (uint32_t)h.bits << 16;
77
- return u.f;
78
- }
79
-
80
- /**
81
- * Converts float32 to brain16.
82
- *
83
- * This function is binary identical to AMD Zen4 VCVTNEPS2BF16.
84
- * Subnormals shall be flushed to zero, and NANs will be quiet.
85
- * This code should vectorize nicely if using modern compilers.
86
- */
87
- static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
88
- ggml_bf16_t h;
89
- union {
90
- float f;
91
- uint32_t i;
92
- } u;
93
- u.f = s;
94
- if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
95
- h.bits = (u.i >> 16) | 64; /* force to quiet */
96
- return h;
97
- }
98
- if (!(u.i & 0x7f800000)) { /* subnormal */
99
- h.bits = (u.i & 0x80000000) >> 16; /* flush to zero */
100
- return h;
101
- }
102
- h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
103
- return h;
104
- }
105
-
106
- #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
107
- #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
108
-
109
- #ifdef __cplusplus
110
- extern "C" {
111
- #endif
112
-
113
- // static_assert should be a #define, but if it's not,
114
- // fall back to the _Static_assert C11 keyword.
115
- // if C99 - static_assert is noop
116
- // ref: https://stackoverflow.com/a/53923785/4039976
117
- #ifndef __cplusplus
118
- #ifndef static_assert
119
- #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
120
- #define static_assert(cond, msg) _Static_assert(cond, msg)
121
- #else
122
- #define static_assert(cond, msg) struct global_scope_noop_trick
123
- #endif
124
- #endif
125
- #endif
126
-
127
- // __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
128
- #if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
129
- #ifndef __FMA__
130
- #define __FMA__
131
- #endif
132
- #ifndef __F16C__
133
- #define __F16C__
134
- #endif
135
- #endif
136
-
137
- // __SSE3__ and __SSSE3__ are not defined in MSVC, but SSE3/SSSE3 are present when AVX/AVX2/AVX512 are available
138
- #if defined(_MSC_VER) && (defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__))
139
- #ifndef __SSE3__
140
- #define __SSE3__
141
- #endif
142
- #ifndef __SSSE3__
143
- #define __SSSE3__
144
- #endif
145
- #endif
146
-
147
- #if defined(__ARM_FEATURE_SVE)
148
- #include <arm_sve.h>
149
- #endif
150
-
151
- // 16-bit float
152
- // on Arm, we use __fp16
153
- // on x86, we use uint16_t
154
- #if defined(__ARM_NEON)
155
-
156
- // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
157
- //
158
- // $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
159
- //
160
- #include <arm_neon.h>
161
-
162
- #ifdef _MSC_VER
163
-
164
- typedef uint16_t ggml_fp16_internal_t;
165
-
166
- #define ggml_vld1q_u32(w,x,y,z) { ((w) + ((uint64_t)(x) << 32)), ((y) + ((uint64_t)(z) << 32)) }
167
-
168
- #else
169
-
170
- typedef __fp16 ggml_fp16_internal_t;
171
-
172
- #define ggml_vld1q_u32(w,x,y,z) { (w), (x), (y), (z) }
173
-
174
- #endif // _MSC_VER
175
-
176
- #if !defined(__aarch64__)
177
-
178
- // 32-bit ARM compatibility
179
-
180
- // vaddvq_s16
181
- // vpaddq_s16
182
- // vpaddq_s32
183
- // vaddvq_s32
184
- // vaddvq_f32
185
- // vmaxvq_f32
186
- // vcvtnq_s32_f32
187
- // vzip1_u8
188
- // vzip2_u8
189
-
190
- inline static int32_t vaddvq_s16(int16x8_t v) {
191
- return
192
- (int32_t)vgetq_lane_s16(v, 0) + (int32_t)vgetq_lane_s16(v, 1) +
193
- (int32_t)vgetq_lane_s16(v, 2) + (int32_t)vgetq_lane_s16(v, 3) +
194
- (int32_t)vgetq_lane_s16(v, 4) + (int32_t)vgetq_lane_s16(v, 5) +
195
- (int32_t)vgetq_lane_s16(v, 6) + (int32_t)vgetq_lane_s16(v, 7);
196
- }
197
-
198
- inline static int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
199
- int16x4_t a0 = vpadd_s16(vget_low_s16(a), vget_high_s16(a));
200
- int16x4_t b0 = vpadd_s16(vget_low_s16(b), vget_high_s16(b));
201
- return vcombine_s16(a0, b0);
202
- }
203
-
204
- inline static int32x4_t vpaddq_s32(int32x4_t a, int32x4_t b) {
205
- int32x2_t a0 = vpadd_s32(vget_low_s32(a), vget_high_s32(a));
206
- int32x2_t b0 = vpadd_s32(vget_low_s32(b), vget_high_s32(b));
207
- return vcombine_s32(a0, b0);
208
- }
209
-
210
- inline static int32_t vaddvq_s32(int32x4_t v) {
211
- return vgetq_lane_s32(v, 0) + vgetq_lane_s32(v, 1) + vgetq_lane_s32(v, 2) + vgetq_lane_s32(v, 3);
212
- }
213
-
214
- inline static float vaddvq_f32(float32x4_t v) {
215
- return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
216
- }
217
-
218
- inline static float vmaxvq_f32(float32x4_t v) {
219
- return
220
- MAX(MAX(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
221
- MAX(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
222
- }
223
-
224
- inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) {
225
- int32x4_t res;
226
-
227
- res[0] = roundf(vgetq_lane_f32(v, 0));
228
- res[1] = roundf(vgetq_lane_f32(v, 1));
229
- res[2] = roundf(vgetq_lane_f32(v, 2));
230
- res[3] = roundf(vgetq_lane_f32(v, 3));
231
-
232
- return res;
233
- }
234
-
235
- inline static uint8x8_t vzip1_u8(uint8x8_t a, uint8x8_t b) {
236
- uint8x8_t res;
237
-
238
- res[0] = a[0]; res[1] = b[0];
239
- res[2] = a[1]; res[3] = b[1];
240
- res[4] = a[2]; res[5] = b[2];
241
- res[6] = a[3]; res[7] = b[3];
242
-
243
- return res;
244
- }
245
-
246
- inline static uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) {
247
- uint8x8_t res;
248
-
249
- res[0] = a[4]; res[1] = b[4];
250
- res[2] = a[5]; res[3] = b[5];
251
- res[4] = a[6]; res[5] = b[6];
252
- res[6] = a[7]; res[7] = b[7];
253
-
254
- return res;
255
- }
256
-
257
- // vld1q_s16_x2
258
- // vld1q_u8_x2
259
- // vld1q_u8_x4
260
- // vld1q_s8_x2
261
- // vld1q_s8_x4
262
- // TODO: double-check these work correctly
263
-
264
- typedef struct ggml_int16x8x2_t {
265
- int16x8_t val[2];
266
- } ggml_int16x8x2_t;
267
-
268
- inline static ggml_int16x8x2_t ggml_vld1q_s16_x2(const int16_t * ptr) {
269
- ggml_int16x8x2_t res;
270
-
271
- res.val[0] = vld1q_s16(ptr + 0);
272
- res.val[1] = vld1q_s16(ptr + 8);
273
-
274
- return res;
275
- }
276
-
277
- typedef struct ggml_uint8x16x2_t {
278
- uint8x16_t val[2];
279
- } ggml_uint8x16x2_t;
280
-
281
- inline static ggml_uint8x16x2_t ggml_vld1q_u8_x2(const uint8_t * ptr) {
282
- ggml_uint8x16x2_t res;
283
-
284
- res.val[0] = vld1q_u8(ptr + 0);
285
- res.val[1] = vld1q_u8(ptr + 16);
286
-
287
- return res;
288
- }
289
-
290
- typedef struct ggml_uint8x16x4_t {
291
- uint8x16_t val[4];
292
- } ggml_uint8x16x4_t;
293
-
294
- inline static ggml_uint8x16x4_t ggml_vld1q_u8_x4(const uint8_t * ptr) {
295
- ggml_uint8x16x4_t res;
296
-
297
- res.val[0] = vld1q_u8(ptr + 0);
298
- res.val[1] = vld1q_u8(ptr + 16);
299
- res.val[2] = vld1q_u8(ptr + 32);
300
- res.val[3] = vld1q_u8(ptr + 48);
301
-
302
- return res;
303
- }
304
-
305
- typedef struct ggml_int8x16x2_t {
306
- int8x16_t val[2];
307
- } ggml_int8x16x2_t;
308
-
309
- inline static ggml_int8x16x2_t ggml_vld1q_s8_x2(const int8_t * ptr) {
310
- ggml_int8x16x2_t res;
311
-
312
- res.val[0] = vld1q_s8(ptr + 0);
313
- res.val[1] = vld1q_s8(ptr + 16);
314
-
315
- return res;
316
- }
317
-
318
- typedef struct ggml_int8x16x4_t {
319
- int8x16_t val[4];
320
- } ggml_int8x16x4_t;
321
-
322
- inline static ggml_int8x16x4_t ggml_vld1q_s8_x4(const int8_t * ptr) {
323
- ggml_int8x16x4_t res;
324
-
325
- res.val[0] = vld1q_s8(ptr + 0);
326
- res.val[1] = vld1q_s8(ptr + 16);
327
- res.val[2] = vld1q_s8(ptr + 32);
328
- res.val[3] = vld1q_s8(ptr + 48);
329
-
330
- return res;
331
- }
332
-
333
- // NOTE: not tested
334
- inline static int8x16_t ggml_vqtbl1q_s8(int8x16_t a, uint8x16_t b) {
335
- int8x16_t res;
336
-
337
- res[ 0] = a[b[ 0]];
338
- res[ 1] = a[b[ 1]];
339
- res[ 2] = a[b[ 2]];
340
- res[ 3] = a[b[ 3]];
341
- res[ 4] = a[b[ 4]];
342
- res[ 5] = a[b[ 5]];
343
- res[ 6] = a[b[ 6]];
344
- res[ 7] = a[b[ 7]];
345
- res[ 8] = a[b[ 8]];
346
- res[ 9] = a[b[ 9]];
347
- res[10] = a[b[10]];
348
- res[11] = a[b[11]];
349
- res[12] = a[b[12]];
350
- res[13] = a[b[13]];
351
- res[14] = a[b[14]];
352
- res[15] = a[b[15]];
353
-
354
- return res;
355
- }
356
-
357
- // NOTE: not tested
358
- inline static uint8x16_t ggml_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
359
- uint8x16_t res;
360
-
361
- res[ 0] = a[b[ 0]];
362
- res[ 1] = a[b[ 1]];
363
- res[ 2] = a[b[ 2]];
364
- res[ 3] = a[b[ 3]];
365
- res[ 4] = a[b[ 4]];
366
- res[ 5] = a[b[ 5]];
367
- res[ 6] = a[b[ 6]];
368
- res[ 7] = a[b[ 7]];
369
- res[ 8] = a[b[ 8]];
370
- res[ 9] = a[b[ 9]];
371
- res[10] = a[b[10]];
372
- res[11] = a[b[11]];
373
- res[12] = a[b[12]];
374
- res[13] = a[b[13]];
375
- res[14] = a[b[14]];
376
- res[15] = a[b[15]];
377
-
378
- return res;
379
- }
380
-
381
- #else
382
-
383
- #define ggml_int16x8x2_t int16x8x2_t
384
- #define ggml_uint8x16x2_t uint8x16x2_t
385
- #define ggml_uint8x16x4_t uint8x16x4_t
386
- #define ggml_int8x16x2_t int8x16x2_t
387
- #define ggml_int8x16x4_t int8x16x4_t
388
-
389
- #define ggml_vld1q_s16_x2 vld1q_s16_x2
390
- #define ggml_vld1q_u8_x2 vld1q_u8_x2
391
- #define ggml_vld1q_u8_x4 vld1q_u8_x4
392
- #define ggml_vld1q_s8_x2 vld1q_s8_x2
393
- #define ggml_vld1q_s8_x4 vld1q_s8_x4
394
- #define ggml_vqtbl1q_s8 vqtbl1q_s8
395
- #define ggml_vqtbl1q_u8 vqtbl1q_u8
396
-
397
- #endif // !defined(__aarch64__)
398
-
399
- #if !defined(__ARM_FEATURE_DOTPROD)
400
-
401
- inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b) {
402
- const int16x8_t p0 = vmull_s8(vget_low_s8 (a), vget_low_s8 (b));
403
- const int16x8_t p1 = vmull_s8(vget_high_s8(a), vget_high_s8(b));
404
-
405
- return vaddq_s32(acc, vaddq_s32(vpaddlq_s16(p0), vpaddlq_s16(p1)));
406
- }
407
-
408
- #else
409
-
410
- #define ggml_vdotq_s32(a, b, c) vdotq_s32(a, b, c)
411
-
412
- #endif // !defined(__ARM_FEATURE_DOTPROD)
413
-
414
- #endif // defined(__ARM_NEON)
415
-
416
- #if defined(__ARM_NEON) && !defined(_MSC_VER)
417
-
418
- #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
419
- #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
420
-
421
- #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
422
-
423
- static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
424
- ggml_fp16_internal_t tmp;
425
- memcpy(&tmp, &h, sizeof(ggml_fp16_t));
426
- return (float)tmp;
427
- }
428
-
429
- static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
430
- ggml_fp16_t res;
431
- ggml_fp16_internal_t tmp = f;
432
- memcpy(&res, &tmp, sizeof(ggml_fp16_t));
433
- return res;
434
- }
435
-
436
- #else
437
-
438
- #ifdef __wasm_simd128__
439
- #include <wasm_simd128.h>
440
- #else
441
- #ifdef __POWER9_VECTOR__
442
- #include <altivec.h>
443
- #undef bool
444
- #define bool _Bool
445
- #else
446
- #if defined(_MSC_VER) || defined(__MINGW32__)
447
- #include <intrin.h>
448
- #else
449
- #if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__)
450
- #if !defined(__riscv)
451
- #include <immintrin.h>
452
- #endif
453
- #endif
454
- #endif
455
- #endif
456
- #endif
457
-
458
- #ifdef __riscv_v_intrinsic
459
- #include <riscv_vector.h>
460
- #endif
461
-
462
- #if defined(__loongarch64)
463
- #if defined(__loongarch_asx)
464
- #include <lasxintrin.h>
465
- #endif
466
- #if defined(__loongarch_sx)
467
- #include <lsxintrin.h>
468
- #endif
469
- #endif
470
-
471
- #if defined(__loongarch_asx)
472
-
473
- typedef union {
474
- int32_t i;
475
- float f;
476
- } ft_union;
477
-
478
- /* float type data load instructions */
479
- static __m128 __lsx_vreplfr2vr_s(float val) {
480
- ft_union fi_tmpval = {.f = val};
481
- return (__m128)__lsx_vreplgr2vr_w(fi_tmpval.i);
482
- }
483
-
484
- static __m256 __lasx_xvreplfr2vr_s(float val) {
485
- ft_union fi_tmpval = {.f = val};
486
- return (__m256)__lasx_xvreplgr2vr_w(fi_tmpval.i);
487
- }
488
- #endif
489
-
490
- #ifdef __F16C__
491
-
492
- #ifdef _MSC_VER
493
- #define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
494
- #define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
495
- #else
496
- #define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
497
- #define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
498
- #endif
499
-
500
- #elif defined(__POWER9_VECTOR__)
501
-
502
- #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
503
- #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
504
- /* the inline asm below is about 12% faster than the lookup method */
505
- #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
506
- #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
507
-
508
- static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
509
- register float f;
510
- register double d;
511
- __asm__(
512
- "mtfprd %0,%2\n"
513
- "xscvhpdp %0,%0\n"
514
- "frsp %1,%0\n" :
515
- /* temp */ "=d"(d),
516
- /* out */ "=f"(f):
517
- /* in */ "r"(h));
518
- return f;
519
- }
520
-
521
- static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
522
- register double d;
523
- register ggml_fp16_t r;
524
- __asm__( /* xscvdphp can work on double or single precision */
525
- "xscvdphp %0,%2\n"
526
- "mffprd %1,%0\n" :
527
- /* temp */ "=d"(d),
528
- /* out */ "=r"(r):
529
- /* in */ "f"(f));
530
- return r;
531
- }
532
-
533
- #else
534
-
535
- // FP16 <-> FP32
536
- // ref: https://github.com/Maratyszcza/FP16
537
-
538
- static inline float fp32_from_bits(uint32_t w) {
539
- union {
540
- uint32_t as_bits;
541
- float as_value;
542
- } fp32;
543
- fp32.as_bits = w;
544
- return fp32.as_value;
545
- }
546
-
547
- static inline uint32_t fp32_to_bits(float f) {
548
- union {
549
- float as_value;
550
- uint32_t as_bits;
551
- } fp32;
552
- fp32.as_value = f;
553
- return fp32.as_bits;
554
- }
555
-
556
- static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
557
- const uint32_t w = (uint32_t) h << 16;
558
- const uint32_t sign = w & UINT32_C(0x80000000);
559
- const uint32_t two_w = w + w;
560
-
561
- const uint32_t exp_offset = UINT32_C(0xE0) << 23;
562
- #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
563
- const float exp_scale = 0x1.0p-112f;
564
- #else
565
- const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
566
- #endif
567
- const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
568
-
569
- const uint32_t magic_mask = UINT32_C(126) << 23;
570
- const float magic_bias = 0.5f;
571
- const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
572
-
573
- const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
574
- const uint32_t result = sign |
575
- (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
576
- return fp32_from_bits(result);
577
- }
578
-
579
- static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
580
- #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
581
- const float scale_to_inf = 0x1.0p+112f;
582
- const float scale_to_zero = 0x1.0p-110f;
583
- #else
584
- const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
585
- const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
586
- #endif
587
- float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
588
-
589
- const uint32_t w = fp32_to_bits(f);
590
- const uint32_t shl1_w = w + w;
591
- const uint32_t sign = w & UINT32_C(0x80000000);
592
- uint32_t bias = shl1_w & UINT32_C(0xFF000000);
593
- if (bias < UINT32_C(0x71000000)) {
594
- bias = UINT32_C(0x71000000);
595
- }
596
-
597
- base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
598
- const uint32_t bits = fp32_to_bits(base);
599
- const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
600
- const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
601
- const uint32_t nonsign = exp_bits + mantissa_bits;
602
- return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
603
- }
604
-
605
- #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
606
- #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
607
-
608
- #endif // __F16C__
609
-
610
- #endif // defined(__ARM_NEON) && (!defined(__MSC_VER)
611
-
612
- // precomputed f32 table for f16 (256 KB)
613
- // defined in ggml.c, initialized in ggml_init()
614
- extern float ggml_table_f32_f16[1 << 16];
615
-
616
- // On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
617
- // so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
618
- // This is also true for POWER9.
619
- #if !defined(GGML_FP16_TO_FP32)
620
- inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
621
- uint16_t s;
622
- memcpy(&s, &f, sizeof(uint16_t));
623
- return ggml_table_f32_f16[s];
624
- }
625
-
626
- #define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
627
- #endif
628
-
629
- #if !defined(GGML_FP32_TO_FP16)
630
- #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
631
- #endif
632
-
633
- #define GGML_HASHTABLE_FULL ((size_t)-1)
634
- #define GGML_HASHTABLE_ALREADY_EXISTS ((size_t)-2)
635
-
636
- struct ggml_hash_set ggml_hash_set_new(size_t size);
637
-
638
- bool ggml_hash_contains (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
639
-
640
- // returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted
641
- size_t ggml_hash_find (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
642
-
643
- // returns GGML_HASHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
644
- size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml_tensor * key);
645
-
646
- // return index, asserts if table is full
647
- size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key);
648
-
649
- #ifdef __cplusplus
650
- }
651
- #endif