@nativescript/visionos 8.6.0-alpha.0 → 8.7.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (395) hide show
  1. package/framework/.build_env_vars.sh +147 -143
  2. package/framework/__PROJECT_NAME__/__PROJECT_NAME__.entitlements +3 -1
  3. package/framework/__PROJECT_NAME__.xcodeproj/project.pbxproj +18 -6
  4. package/framework/__PROJECT_NAME__.xcodeproj/project.xcworkspace/xcuserdata/nstudio.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  5. package/framework/internal/NativeScriptEmbedder.h +29 -0
  6. package/framework/internal/NativeScriptEmbedder.m +25 -0
  7. package/framework/internal/NativeScriptStart.m +7 -8
  8. package/framework/internal/Swift-ObjC-Bridging-Header.h +1 -0
  9. package/framework/internal/XCFrameworks.zip +0 -0
  10. package/framework/internal/macros.h +17 -0
  11. package/framework/internal/metadata-generator-arm64/bin/build-step-metadata-generator.py +14 -0
  12. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/__clang_cuda_complex_builtins.h +3 -3
  13. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_intrinsics.h +38 -6
  14. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/__clang_cuda_libdevice_declares.h +6 -0
  15. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_math.h +1 -1
  16. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_runtime_wrapper.h +61 -2
  17. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/__clang_cuda_texture_intrinsics.h +740 -0
  18. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_hip_runtime_wrapper.h +17 -4
  19. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/__wmmintrin_aes.h +1 -1
  20. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/__wmmintrin_pclmul.h +10 -10
  21. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/altivec.h +677 -328
  22. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/ammintrin.h +4 -0
  23. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/amxintrin.h +17 -18
  24. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/arm_acle.h +6 -0
  25. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/arm_neon.h +8010 -8012
  26. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/arm_sve.h +65 -61
  27. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/avx2intrin.h +189 -189
  28. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512bf16intrin.h +3 -3
  29. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/avx512bwintrin.h +83 -83
  30. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512dqintrin.h +365 -365
  31. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512erintrin.h +102 -102
  32. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/avx512fintrin.h +1625 -1618
  33. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/avx512fp16intrin.h +3349 -0
  34. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/avx512vbmi2intrin.h +48 -48
  35. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vlbf16intrin.h +52 -4
  36. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/avx512vlbwintrin.h +102 -102
  37. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/avx512vldqintrin.h +134 -134
  38. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/avx512vlfp16intrin.h +2068 -0
  39. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vlintrin.h +596 -610
  40. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vlvbmi2intrin.h +96 -96
  41. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/avx512vlvnniintrin.h +24 -24
  42. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/avxintrin.h +254 -222
  43. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avxvnniintrin.h +16 -16
  44. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/bmiintrin.h +6 -2
  45. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/cetintrin.h +20 -4
  46. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/cpuid.h +3 -1
  47. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/crc32intrin.h +100 -0
  48. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/emmintrin.h +563 -779
  49. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/f16cintrin.h +4 -4
  50. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/float.h +15 -6
  51. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/gfniintrin.h +47 -48
  52. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/hexagon_protos.h +0 -11
  53. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/hexagon_types.h +0 -32
  54. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/hlsl.h +15 -0
  55. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/hlsl_basic_types.h +64 -0
  56. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/hlsl_intrinsics.h +15 -0
  57. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/hresetintrin.h +2 -2
  58. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/hvx_hexagon_protos.h +1200 -409
  59. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/ia32intrin.h +17 -17
  60. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/immintrin.h +41 -23
  61. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/intrin.h +34 -35
  62. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/keylockerintrin.h +24 -30
  63. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/limits.h +20 -0
  64. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/mm_malloc.h +3 -3
  65. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/mmintrin.h +4 -0
  66. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/nmmintrin.h +4 -0
  67. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/opencl-c-base.h +79 -11
  68. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/opencl-c.h +7145 -6131
  69. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/openmp_wrappers/complex +10 -1
  70. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/openmp_wrappers/complex.h +9 -0
  71. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/pmmintrin.h +5 -1
  72. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/ppc_wrappers/bmi2intrin.h +134 -0
  73. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/ppc_wrappers/bmiintrin.h +165 -0
  74. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/ppc_wrappers/emmintrin.h +2268 -0
  75. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/ppc_wrappers/immintrin.h +27 -0
  76. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/ppc_wrappers/mm_malloc.h +12 -17
  77. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/ppc_wrappers/mmintrin.h +389 -386
  78. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/ppc_wrappers/pmmintrin.h +145 -0
  79. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/ppc_wrappers/smmintrin.h +663 -0
  80. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/ppc_wrappers/tmmintrin.h +453 -0
  81. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1/include/stdnoreturn.h → metadata-generator-arm64/bin/lib/clang/15.0.7/include/ppc_wrappers/x86gprintrin.h} +7 -6
  82. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/ppc_wrappers/x86intrin.h +28 -0
  83. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/ppc_wrappers/xmmintrin.h +1827 -0
  84. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/prfchwintrin.h +5 -2
  85. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/rdpruintrin.h +57 -0
  86. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/rdseedintrin.h +3 -3
  87. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/riscv_vector.h +202 -0
  88. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/rtmintrin.h +1 -1
  89. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/smmintrin.h +258 -360
  90. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/stdatomic.h +11 -1
  91. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/stdbool.h +9 -6
  92. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/stddef.h +1 -1
  93. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/stdint.h +168 -0
  94. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/stdnoreturn.h +29 -0
  95. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/tmmintrin.h +10 -6
  96. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/uintrintrin.h +8 -8
  97. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/unwind.h +10 -6
  98. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/vaesintrin.h +1 -1
  99. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/velintrin.h +71 -0
  100. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/velintrin_approx.h +120 -0
  101. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/velintrin_gen.h +1257 -0
  102. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/vpclmulqdqintrin.h +6 -6
  103. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/wasm_simd128.h +169 -26
  104. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/wmmintrin.h +4 -0
  105. package/framework/internal/metadata-generator-arm64/bin/lib/clang/15.0.7/include/x86gprintrin.h +53 -0
  106. package/framework/internal/{metadata-generator-x86_64/bin/lib/clang/13.0.1 → metadata-generator-arm64/bin/lib/clang/15.0.7}/include/x86intrin.h +4 -0
  107. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xmmintrin.h +18 -8
  108. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xopintrin.h +31 -31
  109. package/framework/internal/metadata-generator-arm64/bin/objc-metadata-generator +0 -0
  110. package/framework/internal/metadata-generator-x86_64/bin/build-step-metadata-generator.py +14 -0
  111. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/__clang_cuda_complex_builtins.h +3 -3
  112. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_intrinsics.h +38 -6
  113. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/__clang_cuda_libdevice_declares.h +6 -0
  114. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_math.h +1 -1
  115. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_runtime_wrapper.h +61 -2
  116. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/__clang_cuda_texture_intrinsics.h +740 -0
  117. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_hip_runtime_wrapper.h +17 -4
  118. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/__wmmintrin_aes.h +1 -1
  119. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/__wmmintrin_pclmul.h +10 -10
  120. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/altivec.h +677 -328
  121. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/ammintrin.h +4 -0
  122. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/amxintrin.h +17 -18
  123. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/arm_acle.h +6 -0
  124. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/arm_neon.h +8010 -8012
  125. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/arm_sve.h +65 -61
  126. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/avx2intrin.h +189 -189
  127. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512bf16intrin.h +3 -3
  128. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/avx512bwintrin.h +83 -83
  129. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512dqintrin.h +365 -365
  130. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512erintrin.h +102 -102
  131. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/avx512fintrin.h +1625 -1618
  132. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/avx512fp16intrin.h +3349 -0
  133. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/avx512vbmi2intrin.h +48 -48
  134. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vlbf16intrin.h +52 -4
  135. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/avx512vlbwintrin.h +102 -102
  136. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/avx512vldqintrin.h +134 -134
  137. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/avx512vlfp16intrin.h +2068 -0
  138. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vlintrin.h +596 -610
  139. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vlvbmi2intrin.h +96 -96
  140. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/avx512vlvnniintrin.h +24 -24
  141. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/avxintrin.h +254 -222
  142. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avxvnniintrin.h +16 -16
  143. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/bmiintrin.h +6 -2
  144. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/cetintrin.h +20 -4
  145. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/cpuid.h +3 -1
  146. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/crc32intrin.h +100 -0
  147. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/emmintrin.h +563 -779
  148. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/f16cintrin.h +4 -4
  149. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/float.h +15 -6
  150. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/gfniintrin.h +47 -48
  151. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/hexagon_protos.h +0 -11
  152. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/hexagon_types.h +0 -32
  153. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/hlsl.h +15 -0
  154. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/hlsl_basic_types.h +64 -0
  155. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/hlsl_intrinsics.h +15 -0
  156. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/hresetintrin.h +2 -2
  157. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/hvx_hexagon_protos.h +1200 -409
  158. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/ia32intrin.h +17 -17
  159. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/immintrin.h +41 -23
  160. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/intrin.h +34 -35
  161. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/keylockerintrin.h +24 -30
  162. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/limits.h +20 -0
  163. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/mm_malloc.h +3 -3
  164. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/mmintrin.h +4 -0
  165. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/nmmintrin.h +4 -0
  166. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/opencl-c-base.h +79 -11
  167. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/opencl-c.h +7145 -6131
  168. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/openmp_wrappers/complex +10 -1
  169. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/openmp_wrappers/complex.h +9 -0
  170. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/pmmintrin.h +5 -1
  171. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/ppc_wrappers/bmi2intrin.h +134 -0
  172. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/ppc_wrappers/bmiintrin.h +165 -0
  173. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/ppc_wrappers/emmintrin.h +2268 -0
  174. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/ppc_wrappers/immintrin.h +27 -0
  175. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/ppc_wrappers/mm_malloc.h +12 -17
  176. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/ppc_wrappers/mmintrin.h +389 -386
  177. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/ppc_wrappers/pmmintrin.h +145 -0
  178. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/ppc_wrappers/smmintrin.h +663 -0
  179. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/ppc_wrappers/tmmintrin.h +453 -0
  180. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1/include/stdnoreturn.h → metadata-generator-x86_64/bin/lib/clang/15.0.7/include/ppc_wrappers/x86gprintrin.h} +7 -6
  181. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/ppc_wrappers/x86intrin.h +28 -0
  182. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/ppc_wrappers/xmmintrin.h +1827 -0
  183. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/prfchwintrin.h +5 -2
  184. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/rdpruintrin.h +57 -0
  185. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/rdseedintrin.h +3 -3
  186. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/riscv_vector.h +202 -0
  187. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/rtmintrin.h +1 -1
  188. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/smmintrin.h +258 -360
  189. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/stdatomic.h +11 -1
  190. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/stdbool.h +9 -6
  191. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/stddef.h +1 -1
  192. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/stdint.h +168 -0
  193. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/stdnoreturn.h +29 -0
  194. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/tmmintrin.h +10 -6
  195. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/uintrintrin.h +8 -8
  196. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/unwind.h +10 -6
  197. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/vaesintrin.h +1 -1
  198. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/velintrin.h +71 -0
  199. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/velintrin_approx.h +120 -0
  200. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/velintrin_gen.h +1257 -0
  201. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/vpclmulqdqintrin.h +6 -6
  202. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/wasm_simd128.h +169 -26
  203. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/wmmintrin.h +4 -0
  204. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/15.0.7/include/x86gprintrin.h +53 -0
  205. package/framework/internal/{metadata-generator-arm64/bin/lib/clang/13.0.1 → metadata-generator-x86_64/bin/lib/clang/15.0.7}/include/x86intrin.h +4 -0
  206. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xmmintrin.h +18 -8
  207. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xopintrin.h +31 -31
  208. package/framework/internal/metadata-generator-x86_64/bin/objc-metadata-generator +0 -0
  209. package/framework/internal/nsld.sh +5 -3
  210. package/package.json +4 -3
  211. package/framework/internal/main.m +0 -68
  212. package/framework/internal/metadata-generator-arm64/bin/lib/clang/13.0.1/include/ppc_wrappers/emmintrin.h +0 -2324
  213. package/framework/internal/metadata-generator-arm64/bin/lib/clang/13.0.1/include/ppc_wrappers/pmmintrin.h +0 -150
  214. package/framework/internal/metadata-generator-arm64/bin/lib/clang/13.0.1/include/ppc_wrappers/smmintrin.h +0 -109
  215. package/framework/internal/metadata-generator-arm64/bin/lib/clang/13.0.1/include/ppc_wrappers/tmmintrin.h +0 -495
  216. package/framework/internal/metadata-generator-arm64/bin/lib/clang/13.0.1/include/ppc_wrappers/xmmintrin.h +0 -1843
  217. package/framework/internal/metadata-generator-arm64/bin/lib/clang/13.0.1/include/riscv_vector.h +0 -123865
  218. package/framework/internal/metadata-generator-arm64/bin/lib/clang/13.0.1/include/x86gprintrin.h +0 -23
  219. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/13.0.1/include/ppc_wrappers/emmintrin.h +0 -2324
  220. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/13.0.1/include/ppc_wrappers/pmmintrin.h +0 -150
  221. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/13.0.1/include/ppc_wrappers/smmintrin.h +0 -109
  222. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/13.0.1/include/ppc_wrappers/tmmintrin.h +0 -495
  223. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/13.0.1/include/ppc_wrappers/xmmintrin.h +0 -1843
  224. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/13.0.1/include/riscv_vector.h +0 -123865
  225. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/13.0.1/include/x86gprintrin.h +0 -23
  226. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_builtin_vars.h +0 -0
  227. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_cmath.h +0 -0
  228. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_device_functions.h +0 -0
  229. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_math_forward_declares.h +0 -0
  230. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_hip_cmath.h +0 -0
  231. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_hip_libdevice_declares.h +0 -0
  232. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_hip_math.h +0 -0
  233. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__stddef_max_align_t.h +0 -0
  234. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/adxintrin.h +0 -0
  235. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/arm64intr.h +0 -0
  236. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/arm_bf16.h +0 -0
  237. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/arm_cde.h +0 -0
  238. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/arm_cmse.h +0 -0
  239. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/arm_fp16.h +89 -89
  240. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/arm_mve.h +0 -0
  241. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/armintr.h +0 -0
  242. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512bitalgintrin.h +0 -0
  243. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512cdintrin.h +0 -0
  244. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512ifmaintrin.h +0 -0
  245. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512ifmavlintrin.h +0 -0
  246. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512pfintrin.h +0 -0
  247. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vbmiintrin.h +0 -0
  248. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vbmivlintrin.h +0 -0
  249. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vlbitalgintrin.h +0 -0
  250. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vlcdintrin.h +0 -0
  251. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vlvp2intersectintrin.h +0 -0
  252. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vnniintrin.h +0 -0
  253. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vp2intersectintrin.h +0 -0
  254. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vpopcntdqintrin.h +0 -0
  255. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vpopcntdqvlintrin.h +0 -0
  256. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/bmi2intrin.h +0 -0
  257. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/builtins.h +0 -0
  258. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/cet.h +0 -0
  259. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/cldemoteintrin.h +0 -0
  260. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/clflushoptintrin.h +0 -0
  261. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/clwbintrin.h +0 -0
  262. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/clzerointrin.h +0 -0
  263. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/cuda_wrappers/algorithm +0 -0
  264. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/cuda_wrappers/complex +0 -0
  265. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/cuda_wrappers/new +0 -0
  266. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/enqcmdintrin.h +0 -0
  267. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/fma4intrin.h +0 -0
  268. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/fmaintrin.h +0 -0
  269. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/fxsrintrin.h +0 -0
  270. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/hexagon_circ_brev_intrinsics.h +0 -0
  271. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/htmintrin.h +0 -0
  272. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/htmxlintrin.h +0 -0
  273. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/inttypes.h +0 -0
  274. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/invpcidintrin.h +0 -0
  275. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/iso646.h +0 -0
  276. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/lwpintrin.h +0 -0
  277. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/lzcntintrin.h +0 -0
  278. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/mm3dnow.h +0 -0
  279. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/module.modulemap +0 -0
  280. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/movdirintrin.h +0 -0
  281. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/msa.h +0 -0
  282. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/mwaitxintrin.h +0 -0
  283. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/openmp_wrappers/__clang_openmp_device_functions.h +0 -0
  284. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/openmp_wrappers/cmath +0 -0
  285. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/openmp_wrappers/complex_cmath.h +0 -0
  286. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/openmp_wrappers/math.h +0 -0
  287. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/openmp_wrappers/new +0 -0
  288. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/pconfigintrin.h +0 -0
  289. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/pkuintrin.h +0 -0
  290. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/popcntintrin.h +0 -0
  291. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/ptwriteintrin.h +0 -0
  292. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/s390intrin.h +0 -0
  293. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/serializeintrin.h +0 -0
  294. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/sgxintrin.h +0 -0
  295. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/shaintrin.h +0 -0
  296. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/stdalign.h +0 -0
  297. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/stdarg.h +0 -0
  298. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/tbmintrin.h +0 -0
  299. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/tgmath.h +0 -0
  300. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/tsxldtrkintrin.h +0 -0
  301. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/vadefs.h +0 -0
  302. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/varargs.h +0 -0
  303. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/vecintrin.h +0 -0
  304. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/waitpkgintrin.h +0 -0
  305. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/wbnoinvdintrin.h +0 -0
  306. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xsavecintrin.h +0 -0
  307. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xsaveintrin.h +0 -0
  308. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xsaveoptintrin.h +0 -0
  309. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xsavesintrin.h +0 -0
  310. package/framework/internal/metadata-generator-arm64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xtestintrin.h +0 -0
  311. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_builtin_vars.h +0 -0
  312. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_cmath.h +0 -0
  313. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_device_functions.h +0 -0
  314. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_cuda_math_forward_declares.h +0 -0
  315. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_hip_cmath.h +0 -0
  316. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_hip_libdevice_declares.h +0 -0
  317. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__clang_hip_math.h +0 -0
  318. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/__stddef_max_align_t.h +0 -0
  319. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/adxintrin.h +0 -0
  320. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/arm64intr.h +0 -0
  321. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/arm_bf16.h +0 -0
  322. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/arm_cde.h +0 -0
  323. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/arm_cmse.h +0 -0
  324. package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/arm_fp16.h +89 -89
  325. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/arm_mve.h +0 -0
  326. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/armintr.h +0 -0
  327. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512bitalgintrin.h +0 -0
  328. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512cdintrin.h +0 -0
  329. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512ifmaintrin.h +0 -0
  330. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512ifmavlintrin.h +0 -0
  331. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512pfintrin.h +0 -0
  332. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vbmiintrin.h +0 -0
  333. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vbmivlintrin.h +0 -0
  334. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vlbitalgintrin.h +0 -0
  335. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vlcdintrin.h +0 -0
  336. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vlvp2intersectintrin.h +0 -0
  337. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vnniintrin.h +0 -0
  338. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vp2intersectintrin.h +0 -0
  339. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vpopcntdqintrin.h +0 -0
  340. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/avx512vpopcntdqvlintrin.h +0 -0
  341. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/bmi2intrin.h +0 -0
  342. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/builtins.h +0 -0
  343. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/cet.h +0 -0
  344. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/cldemoteintrin.h +0 -0
  345. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/clflushoptintrin.h +0 -0
  346. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/clwbintrin.h +0 -0
  347. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/clzerointrin.h +0 -0
  348. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/cuda_wrappers/algorithm +0 -0
  349. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/cuda_wrappers/complex +0 -0
  350. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/cuda_wrappers/new +0 -0
  351. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/enqcmdintrin.h +0 -0
  352. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/fma4intrin.h +0 -0
  353. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/fmaintrin.h +0 -0
  354. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/fxsrintrin.h +0 -0
  355. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/hexagon_circ_brev_intrinsics.h +0 -0
  356. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/htmintrin.h +0 -0
  357. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/htmxlintrin.h +0 -0
  358. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/inttypes.h +0 -0
  359. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/invpcidintrin.h +0 -0
  360. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/iso646.h +0 -0
  361. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/lwpintrin.h +0 -0
  362. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/lzcntintrin.h +0 -0
  363. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/mm3dnow.h +0 -0
  364. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/module.modulemap +0 -0
  365. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/movdirintrin.h +0 -0
  366. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/msa.h +0 -0
  367. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/mwaitxintrin.h +0 -0
  368. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/openmp_wrappers/__clang_openmp_device_functions.h +0 -0
  369. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/openmp_wrappers/cmath +0 -0
  370. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/openmp_wrappers/complex_cmath.h +0 -0
  371. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/openmp_wrappers/math.h +0 -0
  372. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/openmp_wrappers/new +0 -0
  373. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/pconfigintrin.h +0 -0
  374. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/pkuintrin.h +0 -0
  375. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/popcntintrin.h +0 -0
  376. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/ptwriteintrin.h +0 -0
  377. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/s390intrin.h +0 -0
  378. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/serializeintrin.h +0 -0
  379. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/sgxintrin.h +0 -0
  380. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/shaintrin.h +0 -0
  381. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/stdalign.h +0 -0
  382. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/stdarg.h +0 -0
  383. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/tbmintrin.h +0 -0
  384. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/tgmath.h +0 -0
  385. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/tsxldtrkintrin.h +0 -0
  386. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/vadefs.h +0 -0
  387. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/varargs.h +0 -0
  388. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/vecintrin.h +0 -0
  389. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/waitpkgintrin.h +0 -0
  390. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/wbnoinvdintrin.h +0 -0
  391. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xsavecintrin.h +0 -0
  392. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xsaveintrin.h +0 -0
  393. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xsaveoptintrin.h +0 -0
  394. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xsavesintrin.h +0 -0
  395. /package/framework/internal/metadata-generator-x86_64/bin/lib/clang/{13.0.1 → 15.0.7}/include/xtestintrin.h +0 -0
@@ -0,0 +1,3349 @@
1
+ /*===----------- avx512fp16intrin.h - AVX512-FP16 intrinsics ---------------===
2
+ *
3
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
+ * See https://llvm.org/LICENSE.txt for license information.
5
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
+ *
7
+ *===-----------------------------------------------------------------------===
8
+ */
9
+ #ifndef __IMMINTRIN_H
10
+ #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
11
+ #endif
12
+
13
+ #ifndef __AVX512FP16INTRIN_H
14
+ #define __AVX512FP16INTRIN_H
15
+
16
+ /* Define the default attributes for the functions in this file. */
17
+ typedef _Float16 __v32hf __attribute__((__vector_size__(64), __aligned__(64)));
18
+ typedef _Float16 __m512h __attribute__((__vector_size__(64), __aligned__(64)));
19
+ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
20
+ typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
21
+ typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16)));
22
+ typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1)));
23
+ typedef _Float16 __v16hf __attribute__((__vector_size__(32), __aligned__(32)));
24
+ typedef _Float16 __m256h __attribute__((__vector_size__(32), __aligned__(32)));
25
+ typedef _Float16 __m256h_u __attribute__((__vector_size__(32), __aligned__(1)));
26
+
27
+ /* Define the default attributes for the functions in this file. */
28
+ #define __DEFAULT_FN_ATTRS512 \
29
+ __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
30
+ __min_vector_width__(512)))
31
+ #define __DEFAULT_FN_ATTRS256 \
32
+ __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
33
+ __min_vector_width__(256)))
34
+ #define __DEFAULT_FN_ATTRS128 \
35
+ __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
36
+ __min_vector_width__(128)))
37
+
38
+ static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {
39
+ return __a[0];
40
+ }
41
+
42
+ static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_setzero_ph(void) {
43
+ return (__m128h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
44
+ }
45
+
46
+ static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_setzero_ph(void) {
47
+ return (__m256h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
48
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
49
+ }
50
+
51
+ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_undefined_ph(void) {
52
+ return (__m256h)__builtin_ia32_undef256();
53
+ }
54
+
55
+ static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_setzero_ph(void) {
56
+ return (__m512h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
57
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
58
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
59
+ }
60
+
61
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_undefined_ph(void) {
62
+ return (__m128h)__builtin_ia32_undef128();
63
+ }
64
+
65
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_undefined_ph(void) {
66
+ return (__m512h)__builtin_ia32_undef512();
67
+ }
68
+
69
+ static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_set1_ph(_Float16 __h) {
70
+ return (__m512h)(__v32hf){__h, __h, __h, __h, __h, __h, __h, __h,
71
+ __h, __h, __h, __h, __h, __h, __h, __h,
72
+ __h, __h, __h, __h, __h, __h, __h, __h,
73
+ __h, __h, __h, __h, __h, __h, __h, __h};
74
+ }
75
+
76
+ static __inline __m512h __DEFAULT_FN_ATTRS512
77
+ _mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
78
+ _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
79
+ _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
80
+ _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16,
81
+ _Float16 __h17, _Float16 __h18, _Float16 __h19, _Float16 __h20,
82
+ _Float16 __h21, _Float16 __h22, _Float16 __h23, _Float16 __h24,
83
+ _Float16 __h25, _Float16 __h26, _Float16 __h27, _Float16 __h28,
84
+ _Float16 __h29, _Float16 __h30, _Float16 __h31, _Float16 __h32) {
85
+ return (__m512h)(__v32hf){__h32, __h31, __h30, __h29, __h28, __h27, __h26,
86
+ __h25, __h24, __h23, __h22, __h21, __h20, __h19,
87
+ __h18, __h17, __h16, __h15, __h14, __h13, __h12,
88
+ __h11, __h10, __h9, __h8, __h7, __h6, __h5,
89
+ __h4, __h3, __h2, __h1};
90
+ }
91
+
92
+ #define _mm512_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \
93
+ h14, h15, h16, h17, h18, h19, h20, h21, h22, h23, h24, \
94
+ h25, h26, h27, h28, h29, h30, h31, h32) \
95
+ _mm512_set_ph((h32), (h31), (h30), (h29), (h28), (h27), (h26), (h25), (h24), \
96
+ (h23), (h22), (h21), (h20), (h19), (h18), (h17), (h16), (h15), \
97
+ (h14), (h13), (h12), (h11), (h10), (h9), (h8), (h7), (h6), \
98
+ (h5), (h4), (h3), (h2), (h1))
99
+
100
+ static __inline __m512h __DEFAULT_FN_ATTRS512
101
+ _mm512_set1_pch(_Float16 _Complex h) {
102
+ return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, h));
103
+ }
104
+
105
+ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castph_ps(__m128h __a) {
106
+ return (__m128)__a;
107
+ }
108
+
109
+ static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_castph_ps(__m256h __a) {
110
+ return (__m256)__a;
111
+ }
112
+
113
+ static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castph_ps(__m512h __a) {
114
+ return (__m512)__a;
115
+ }
116
+
117
+ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castph_pd(__m128h __a) {
118
+ return (__m128d)__a;
119
+ }
120
+
121
+ static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_castph_pd(__m256h __a) {
122
+ return (__m256d)__a;
123
+ }
124
+
125
+ static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castph_pd(__m512h __a) {
126
+ return (__m512d)__a;
127
+ }
128
+
129
+ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_castph_si128(__m128h __a) {
130
+ return (__m128i)__a;
131
+ }
132
+
133
+ static __inline__ __m256i __DEFAULT_FN_ATTRS256
134
+ _mm256_castph_si256(__m256h __a) {
135
+ return (__m256i)__a;
136
+ }
137
+
138
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
139
+ _mm512_castph_si512(__m512h __a) {
140
+ return (__m512i)__a;
141
+ }
142
+
143
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castps_ph(__m128 __a) {
144
+ return (__m128h)__a;
145
+ }
146
+
147
+ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castps_ph(__m256 __a) {
148
+ return (__m256h)__a;
149
+ }
150
+
151
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castps_ph(__m512 __a) {
152
+ return (__m512h)__a;
153
+ }
154
+
155
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castpd_ph(__m128d __a) {
156
+ return (__m128h)__a;
157
+ }
158
+
159
+ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castpd_ph(__m256d __a) {
160
+ return (__m256h)__a;
161
+ }
162
+
163
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castpd_ph(__m512d __a) {
164
+ return (__m512h)__a;
165
+ }
166
+
167
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castsi128_ph(__m128i __a) {
168
+ return (__m128h)__a;
169
+ }
170
+
171
+ static __inline__ __m256h __DEFAULT_FN_ATTRS256
172
+ _mm256_castsi256_ph(__m256i __a) {
173
+ return (__m256h)__a;
174
+ }
175
+
176
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
177
+ _mm512_castsi512_ph(__m512i __a) {
178
+ return (__m512h)__a;
179
+ }
180
+
181
+ static __inline__ __m128h __DEFAULT_FN_ATTRS256
182
+ _mm256_castph256_ph128(__m256h __a) {
183
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
184
+ }
185
+
186
+ static __inline__ __m128h __DEFAULT_FN_ATTRS512
187
+ _mm512_castph512_ph128(__m512h __a) {
188
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
189
+ }
190
+
191
+ static __inline__ __m256h __DEFAULT_FN_ATTRS512
192
+ _mm512_castph512_ph256(__m512h __a) {
193
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
194
+ 12, 13, 14, 15);
195
+ }
196
+
197
+ static __inline__ __m256h __DEFAULT_FN_ATTRS256
198
+ _mm256_castph128_ph256(__m128h __a) {
199
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
200
+ -1, -1, -1, -1, -1);
201
+ }
202
+
203
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
204
+ _mm512_castph128_ph512(__m128h __a) {
205
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
206
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
207
+ -1, -1, -1, -1, -1, -1, -1, -1, -1);
208
+ }
209
+
210
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
211
+ _mm512_castph256_ph512(__m256h __a) {
212
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
213
+ 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
214
+ -1, -1, -1, -1, -1, -1, -1, -1);
215
+ }
216
+
217
+ /// Constructs a 256-bit floating-point vector of [16 x half] from a
218
+ /// 128-bit floating-point vector of [8 x half]. The lower 128 bits
219
+ /// contain the value of the source vector. The upper 384 bits are set
220
+ /// to zero.
221
+ ///
222
+ /// \headerfile <x86intrin.h>
223
+ ///
224
+ /// This intrinsic has no corresponding instruction.
225
+ ///
226
+ /// \param __a
227
+ /// A 128-bit vector of [8 x half].
228
+ /// \returns A 512-bit floating-point vector of [16 x half]. The lower 128 bits
229
+ /// contain the value of the parameter. The upper 384 bits are set to zero.
230
+ static __inline__ __m256h __DEFAULT_FN_ATTRS256
231
+ _mm256_zextph128_ph256(__m128h __a) {
232
+ return __builtin_shufflevector(__a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4,
233
+ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
234
+ }
235
+
236
+ /// Constructs a 512-bit floating-point vector of [32 x half] from a
237
+ /// 128-bit floating-point vector of [8 x half]. The lower 128 bits
238
+ /// contain the value of the source vector. The upper 384 bits are set
239
+ /// to zero.
240
+ ///
241
+ /// \headerfile <x86intrin.h>
242
+ ///
243
+ /// This intrinsic has no corresponding instruction.
244
+ ///
245
+ /// \param __a
246
+ /// A 128-bit vector of [8 x half].
247
+ /// \returns A 512-bit floating-point vector of [32 x half]. The lower 128 bits
248
+ /// contain the value of the parameter. The upper 384 bits are set to zero.
249
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
250
+ _mm512_zextph128_ph512(__m128h __a) {
251
+ return __builtin_shufflevector(
252
+ __a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
253
+ 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
254
+ }
255
+
256
+ /// Constructs a 512-bit floating-point vector of [32 x half] from a
257
+ /// 256-bit floating-point vector of [16 x half]. The lower 256 bits
258
+ /// contain the value of the source vector. The upper 256 bits are set
259
+ /// to zero.
260
+ ///
261
+ /// \headerfile <x86intrin.h>
262
+ ///
263
+ /// This intrinsic has no corresponding instruction.
264
+ ///
265
+ /// \param __a
266
+ /// A 256-bit vector of [16 x half].
267
+ /// \returns A 512-bit floating-point vector of [32 x half]. The lower 256 bits
268
+ /// contain the value of the parameter. The upper 256 bits are set to zero.
269
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
270
+ _mm512_zextph256_ph512(__m256h __a) {
271
+ return __builtin_shufflevector(__a, (__v16hf)_mm256_setzero_ph(), 0, 1, 2, 3,
272
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
273
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
274
+ 29, 30, 31);
275
+ }
276
+
277
+ #define _mm_comi_round_sh(A, B, P, R) \
278
+ __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, (int)(P), (int)(R))
279
+
280
+ #define _mm_comi_sh(A, B, pred) \
281
+ _mm_comi_round_sh((A), (B), (pred), _MM_FROUND_CUR_DIRECTION)
282
+
283
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h A,
284
+ __m128h B) {
285
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OS,
286
+ _MM_FROUND_CUR_DIRECTION);
287
+ }
288
+
289
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h A,
290
+ __m128h B) {
291
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OS,
292
+ _MM_FROUND_CUR_DIRECTION);
293
+ }
294
+
295
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h A,
296
+ __m128h B) {
297
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OS,
298
+ _MM_FROUND_CUR_DIRECTION);
299
+ }
300
+
301
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h A,
302
+ __m128h B) {
303
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OS,
304
+ _MM_FROUND_CUR_DIRECTION);
305
+ }
306
+
307
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h A,
308
+ __m128h B) {
309
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OS,
310
+ _MM_FROUND_CUR_DIRECTION);
311
+ }
312
+
313
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h A,
314
+ __m128h B) {
315
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_US,
316
+ _MM_FROUND_CUR_DIRECTION);
317
+ }
318
+
319
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h A,
320
+ __m128h B) {
321
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OQ,
322
+ _MM_FROUND_CUR_DIRECTION);
323
+ }
324
+
325
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h A,
326
+ __m128h B) {
327
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OQ,
328
+ _MM_FROUND_CUR_DIRECTION);
329
+ }
330
+
331
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h A,
332
+ __m128h B) {
333
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OQ,
334
+ _MM_FROUND_CUR_DIRECTION);
335
+ }
336
+
337
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h A,
338
+ __m128h B) {
339
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OQ,
340
+ _MM_FROUND_CUR_DIRECTION);
341
+ }
342
+
343
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h A,
344
+ __m128h B) {
345
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OQ,
346
+ _MM_FROUND_CUR_DIRECTION);
347
+ }
348
+
349
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h A,
350
+ __m128h B) {
351
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_UQ,
352
+ _MM_FROUND_CUR_DIRECTION);
353
+ }
354
+
355
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_add_ph(__m512h __A,
356
+ __m512h __B) {
357
+ return (__m512h)((__v32hf)__A + (__v32hf)__B);
358
+ }
359
+
360
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
361
+ _mm512_mask_add_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
362
+ return (__m512h)__builtin_ia32_selectph_512(
363
+ (__mmask32)__U, (__v32hf)_mm512_add_ph(__A, __B), (__v32hf)__W);
364
+ }
365
+
366
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
367
+ _mm512_maskz_add_ph(__mmask32 __U, __m512h __A, __m512h __B) {
368
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
369
+ (__v32hf)_mm512_add_ph(__A, __B),
370
+ (__v32hf)_mm512_setzero_ph());
371
+ }
372
+
373
+ #define _mm512_add_round_ph(A, B, R) \
374
+ ((__m512h)__builtin_ia32_addph512((__v32hf)(__m512h)(A), \
375
+ (__v32hf)(__m512h)(B), (int)(R)))
376
+
377
+ #define _mm512_mask_add_round_ph(W, U, A, B, R) \
378
+ ((__m512h)__builtin_ia32_selectph_512( \
379
+ (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \
380
+ (__v32hf)(__m512h)(W)))
381
+
382
+ #define _mm512_maskz_add_round_ph(U, A, B, R) \
383
+ ((__m512h)__builtin_ia32_selectph_512( \
384
+ (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \
385
+ (__v32hf)_mm512_setzero_ph()))
386
+
387
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sub_ph(__m512h __A,
388
+ __m512h __B) {
389
+ return (__m512h)((__v32hf)__A - (__v32hf)__B);
390
+ }
391
+
392
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
393
+ _mm512_mask_sub_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
394
+ return (__m512h)__builtin_ia32_selectph_512(
395
+ (__mmask32)__U, (__v32hf)_mm512_sub_ph(__A, __B), (__v32hf)__W);
396
+ }
397
+
398
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
399
+ _mm512_maskz_sub_ph(__mmask32 __U, __m512h __A, __m512h __B) {
400
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
401
+ (__v32hf)_mm512_sub_ph(__A, __B),
402
+ (__v32hf)_mm512_setzero_ph());
403
+ }
404
+
405
+ #define _mm512_sub_round_ph(A, B, R) \
406
+ ((__m512h)__builtin_ia32_subph512((__v32hf)(__m512h)(A), \
407
+ (__v32hf)(__m512h)(B), (int)(R)))
408
+
409
+ #define _mm512_mask_sub_round_ph(W, U, A, B, R) \
410
+ ((__m512h)__builtin_ia32_selectph_512( \
411
+ (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \
412
+ (__v32hf)(__m512h)(W)))
413
+
414
+ #define _mm512_maskz_sub_round_ph(U, A, B, R) \
415
+ ((__m512h)__builtin_ia32_selectph_512( \
416
+ (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \
417
+ (__v32hf)_mm512_setzero_ph()))
418
+
419
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mul_ph(__m512h __A,
420
+ __m512h __B) {
421
+ return (__m512h)((__v32hf)__A * (__v32hf)__B);
422
+ }
423
+
424
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
425
+ _mm512_mask_mul_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
426
+ return (__m512h)__builtin_ia32_selectph_512(
427
+ (__mmask32)__U, (__v32hf)_mm512_mul_ph(__A, __B), (__v32hf)__W);
428
+ }
429
+
430
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
431
+ _mm512_maskz_mul_ph(__mmask32 __U, __m512h __A, __m512h __B) {
432
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
433
+ (__v32hf)_mm512_mul_ph(__A, __B),
434
+ (__v32hf)_mm512_setzero_ph());
435
+ }
436
+
437
+ #define _mm512_mul_round_ph(A, B, R) \
438
+ ((__m512h)__builtin_ia32_mulph512((__v32hf)(__m512h)(A), \
439
+ (__v32hf)(__m512h)(B), (int)(R)))
440
+
441
+ #define _mm512_mask_mul_round_ph(W, U, A, B, R) \
442
+ ((__m512h)__builtin_ia32_selectph_512( \
443
+ (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \
444
+ (__v32hf)(__m512h)(W)))
445
+
446
+ #define _mm512_maskz_mul_round_ph(U, A, B, R) \
447
+ ((__m512h)__builtin_ia32_selectph_512( \
448
+ (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \
449
+ (__v32hf)_mm512_setzero_ph()))
450
+
451
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_div_ph(__m512h __A,
452
+ __m512h __B) {
453
+ return (__m512h)((__v32hf)__A / (__v32hf)__B);
454
+ }
455
+
456
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
457
+ _mm512_mask_div_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
458
+ return (__m512h)__builtin_ia32_selectph_512(
459
+ (__mmask32)__U, (__v32hf)_mm512_div_ph(__A, __B), (__v32hf)__W);
460
+ }
461
+
462
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
463
+ _mm512_maskz_div_ph(__mmask32 __U, __m512h __A, __m512h __B) {
464
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
465
+ (__v32hf)_mm512_div_ph(__A, __B),
466
+ (__v32hf)_mm512_setzero_ph());
467
+ }
468
+
469
+ #define _mm512_div_round_ph(A, B, R) \
470
+ ((__m512h)__builtin_ia32_divph512((__v32hf)(__m512h)(A), \
471
+ (__v32hf)(__m512h)(B), (int)(R)))
472
+
473
+ #define _mm512_mask_div_round_ph(W, U, A, B, R) \
474
+ ((__m512h)__builtin_ia32_selectph_512( \
475
+ (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \
476
+ (__v32hf)(__m512h)(W)))
477
+
478
+ #define _mm512_maskz_div_round_ph(U, A, B, R) \
479
+ ((__m512h)__builtin_ia32_selectph_512( \
480
+ (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \
481
+ (__v32hf)_mm512_setzero_ph()))
482
+
483
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_min_ph(__m512h __A,
484
+ __m512h __B) {
485
+ return (__m512h)__builtin_ia32_minph512((__v32hf)__A, (__v32hf)__B,
486
+ _MM_FROUND_CUR_DIRECTION);
487
+ }
488
+
489
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
490
+ _mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
491
+ return (__m512h)__builtin_ia32_selectph_512(
492
+ (__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)__W);
493
+ }
494
+
495
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
496
+ _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) {
497
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
498
+ (__v32hf)_mm512_min_ph(__A, __B),
499
+ (__v32hf)_mm512_setzero_ph());
500
+ }
501
+
502
+ #define _mm512_min_round_ph(A, B, R) \
503
+ ((__m512h)__builtin_ia32_minph512((__v32hf)(__m512h)(A), \
504
+ (__v32hf)(__m512h)(B), (int)(R)))
505
+
506
+ #define _mm512_mask_min_round_ph(W, U, A, B, R) \
507
+ ((__m512h)__builtin_ia32_selectph_512( \
508
+ (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \
509
+ (__v32hf)(__m512h)(W)))
510
+
511
+ #define _mm512_maskz_min_round_ph(U, A, B, R) \
512
+ ((__m512h)__builtin_ia32_selectph_512( \
513
+ (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \
514
+ (__v32hf)_mm512_setzero_ph()))
515
+
516
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_max_ph(__m512h __A,
517
+ __m512h __B) {
518
+ return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B,
519
+ _MM_FROUND_CUR_DIRECTION);
520
+ }
521
+
522
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
523
+ _mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
524
+ return (__m512h)__builtin_ia32_selectph_512(
525
+ (__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)__W);
526
+ }
527
+
528
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
529
+ _mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) {
530
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
531
+ (__v32hf)_mm512_max_ph(__A, __B),
532
+ (__v32hf)_mm512_setzero_ph());
533
+ }
534
+
535
+ #define _mm512_max_round_ph(A, B, R) \
536
+ ((__m512h)__builtin_ia32_maxph512((__v32hf)(__m512h)(A), \
537
+ (__v32hf)(__m512h)(B), (int)(R)))
538
+
539
+ #define _mm512_mask_max_round_ph(W, U, A, B, R) \
540
+ ((__m512h)__builtin_ia32_selectph_512( \
541
+ (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \
542
+ (__v32hf)(__m512h)(W)))
543
+
544
+ #define _mm512_maskz_max_round_ph(U, A, B, R) \
545
+ ((__m512h)__builtin_ia32_selectph_512( \
546
+ (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \
547
+ (__v32hf)_mm512_setzero_ph()))
548
+
549
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) {
550
+ return (__m512h)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF), (__m512i)__A);
551
+ }
552
+
553
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_conj_pch(__m512h __A) {
554
+ return (__m512h)_mm512_xor_ps((__m512)__A, _mm512_set1_ps(-0.0f));
555
+ }
556
+
557
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
558
+ _mm512_mask_conj_pch(__m512h __W, __mmask16 __U, __m512h __A) {
559
+ return (__m512h)__builtin_ia32_selectps_512(
560
+ (__mmask16)__U, (__v16sf)_mm512_conj_pch(__A), (__v16sf)__W);
561
+ }
562
+
563
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
564
+ _mm512_maskz_conj_pch(__mmask16 __U, __m512h __A) {
565
+ return (__m512h)__builtin_ia32_selectps_512((__mmask16)__U,
566
+ (__v16sf)_mm512_conj_pch(__A),
567
+ (__v16sf)_mm512_setzero_ps());
568
+ }
569
+
570
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_sh(__m128h __A,
571
+ __m128h __B) {
572
+ __A[0] += __B[0];
573
+ return __A;
574
+ }
575
+
576
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_sh(__m128h __W,
577
+ __mmask8 __U,
578
+ __m128h __A,
579
+ __m128h __B) {
580
+ __A = _mm_add_sh(__A, __B);
581
+ return __builtin_ia32_selectsh_128(__U, __A, __W);
582
+ }
583
+
584
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U,
585
+ __m128h __A,
586
+ __m128h __B) {
587
+ __A = _mm_add_sh(__A, __B);
588
+ return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
589
+ }
590
+
591
+ #define _mm_add_round_sh(A, B, R) \
592
+ ((__m128h)__builtin_ia32_addsh_round_mask( \
593
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
594
+ (__mmask8)-1, (int)(R)))
595
+
596
+ #define _mm_mask_add_round_sh(W, U, A, B, R) \
597
+ ((__m128h)__builtin_ia32_addsh_round_mask( \
598
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
599
+ (__mmask8)(U), (int)(R)))
600
+
601
+ #define _mm_maskz_add_round_sh(U, A, B, R) \
602
+ ((__m128h)__builtin_ia32_addsh_round_mask( \
603
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
604
+ (__mmask8)(U), (int)(R)))
605
+
606
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_sh(__m128h __A,
607
+ __m128h __B) {
608
+ __A[0] -= __B[0];
609
+ return __A;
610
+ }
611
+
612
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_sh(__m128h __W,
613
+ __mmask8 __U,
614
+ __m128h __A,
615
+ __m128h __B) {
616
+ __A = _mm_sub_sh(__A, __B);
617
+ return __builtin_ia32_selectsh_128(__U, __A, __W);
618
+ }
619
+
620
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U,
621
+ __m128h __A,
622
+ __m128h __B) {
623
+ __A = _mm_sub_sh(__A, __B);
624
+ return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
625
+ }
626
+
627
+ #define _mm_sub_round_sh(A, B, R) \
628
+ ((__m128h)__builtin_ia32_subsh_round_mask( \
629
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
630
+ (__mmask8)-1, (int)(R)))
631
+
632
+ #define _mm_mask_sub_round_sh(W, U, A, B, R) \
633
+ ((__m128h)__builtin_ia32_subsh_round_mask( \
634
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
635
+ (__mmask8)(U), (int)(R)))
636
+
637
+ #define _mm_maskz_sub_round_sh(U, A, B, R) \
638
+ ((__m128h)__builtin_ia32_subsh_round_mask( \
639
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
640
+ (__mmask8)(U), (int)(R)))
641
+
642
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_sh(__m128h __A,
643
+ __m128h __B) {
644
+ __A[0] *= __B[0];
645
+ return __A;
646
+ }
647
+
648
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_sh(__m128h __W,
649
+ __mmask8 __U,
650
+ __m128h __A,
651
+ __m128h __B) {
652
+ __A = _mm_mul_sh(__A, __B);
653
+ return __builtin_ia32_selectsh_128(__U, __A, __W);
654
+ }
655
+
656
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U,
657
+ __m128h __A,
658
+ __m128h __B) {
659
+ __A = _mm_mul_sh(__A, __B);
660
+ return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
661
+ }
662
+
663
+ #define _mm_mul_round_sh(A, B, R) \
664
+ ((__m128h)__builtin_ia32_mulsh_round_mask( \
665
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
666
+ (__mmask8)-1, (int)(R)))
667
+
668
+ #define _mm_mask_mul_round_sh(W, U, A, B, R) \
669
+ ((__m128h)__builtin_ia32_mulsh_round_mask( \
670
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
671
+ (__mmask8)(U), (int)(R)))
672
+
673
+ #define _mm_maskz_mul_round_sh(U, A, B, R) \
674
+ ((__m128h)__builtin_ia32_mulsh_round_mask( \
675
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
676
+ (__mmask8)(U), (int)(R)))
677
+
678
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_sh(__m128h __A,
679
+ __m128h __B) {
680
+ __A[0] /= __B[0];
681
+ return __A;
682
+ }
683
+
684
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_sh(__m128h __W,
685
+ __mmask8 __U,
686
+ __m128h __A,
687
+ __m128h __B) {
688
+ __A = _mm_div_sh(__A, __B);
689
+ return __builtin_ia32_selectsh_128(__U, __A, __W);
690
+ }
691
+
692
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_sh(__mmask8 __U,
693
+ __m128h __A,
694
+ __m128h __B) {
695
+ __A = _mm_div_sh(__A, __B);
696
+ return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
697
+ }
698
+
699
+ #define _mm_div_round_sh(A, B, R) \
700
+ ((__m128h)__builtin_ia32_divsh_round_mask( \
701
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
702
+ (__mmask8)-1, (int)(R)))
703
+
704
+ #define _mm_mask_div_round_sh(W, U, A, B, R) \
705
+ ((__m128h)__builtin_ia32_divsh_round_mask( \
706
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
707
+ (__mmask8)(U), (int)(R)))
708
+
709
+ #define _mm_maskz_div_round_sh(U, A, B, R) \
710
+ ((__m128h)__builtin_ia32_divsh_round_mask( \
711
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
712
+ (__mmask8)(U), (int)(R)))
713
+
714
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_sh(__m128h __A,
715
+ __m128h __B) {
716
+ return (__m128h)__builtin_ia32_minsh_round_mask(
717
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
718
+ _MM_FROUND_CUR_DIRECTION);
719
+ }
720
+
721
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_sh(__m128h __W,
722
+ __mmask8 __U,
723
+ __m128h __A,
724
+ __m128h __B) {
725
+ return (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)__A, (__v8hf)__B,
726
+ (__v8hf)__W, (__mmask8)__U,
727
+ _MM_FROUND_CUR_DIRECTION);
728
+ }
729
+
730
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_sh(__mmask8 __U,
731
+ __m128h __A,
732
+ __m128h __B) {
733
+ return (__m128h)__builtin_ia32_minsh_round_mask(
734
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
735
+ _MM_FROUND_CUR_DIRECTION);
736
+ }
737
+
738
+ #define _mm_min_round_sh(A, B, R) \
739
+ ((__m128h)__builtin_ia32_minsh_round_mask( \
740
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
741
+ (__mmask8)-1, (int)(R)))
742
+
743
+ #define _mm_mask_min_round_sh(W, U, A, B, R) \
744
+ ((__m128h)__builtin_ia32_minsh_round_mask( \
745
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
746
+ (__mmask8)(U), (int)(R)))
747
+
748
+ #define _mm_maskz_min_round_sh(U, A, B, R) \
749
+ ((__m128h)__builtin_ia32_minsh_round_mask( \
750
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
751
+ (__mmask8)(U), (int)(R)))
752
+
753
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_sh(__m128h __A,
754
+ __m128h __B) {
755
+ return (__m128h)__builtin_ia32_maxsh_round_mask(
756
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
757
+ _MM_FROUND_CUR_DIRECTION);
758
+ }
759
+
760
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_sh(__m128h __W,
761
+ __mmask8 __U,
762
+ __m128h __A,
763
+ __m128h __B) {
764
+ return (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)__A, (__v8hf)__B,
765
+ (__v8hf)__W, (__mmask8)__U,
766
+ _MM_FROUND_CUR_DIRECTION);
767
+ }
768
+
769
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_sh(__mmask8 __U,
770
+ __m128h __A,
771
+ __m128h __B) {
772
+ return (__m128h)__builtin_ia32_maxsh_round_mask(
773
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
774
+ _MM_FROUND_CUR_DIRECTION);
775
+ }
776
+
777
+ #define _mm_max_round_sh(A, B, R) \
778
+ ((__m128h)__builtin_ia32_maxsh_round_mask( \
779
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
780
+ (__mmask8)-1, (int)(R)))
781
+
782
+ #define _mm_mask_max_round_sh(W, U, A, B, R) \
783
+ ((__m128h)__builtin_ia32_maxsh_round_mask( \
784
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
785
+ (__mmask8)(U), (int)(R)))
786
+
787
+ #define _mm_maskz_max_round_sh(U, A, B, R) \
788
+ ((__m128h)__builtin_ia32_maxsh_round_mask( \
789
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
790
+ (__mmask8)(U), (int)(R)))
791
+
792
+ #define _mm512_cmp_round_ph_mask(A, B, P, R) \
793
+ ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \
794
+ (__v32hf)(__m512h)(B), (int)(P), \
795
+ (__mmask32)-1, (int)(R)))
796
+
797
+ #define _mm512_mask_cmp_round_ph_mask(U, A, B, P, R) \
798
+ ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \
799
+ (__v32hf)(__m512h)(B), (int)(P), \
800
+ (__mmask32)(U), (int)(R)))
801
+
802
+ #define _mm512_cmp_ph_mask(A, B, P) \
803
+ _mm512_cmp_round_ph_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
804
+
805
+ #define _mm512_mask_cmp_ph_mask(U, A, B, P) \
806
+ _mm512_mask_cmp_round_ph_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
807
+
808
+ #define _mm_cmp_round_sh_mask(X, Y, P, R) \
809
+ ((__mmask8)__builtin_ia32_cmpsh_mask((__v8hf)(__m128h)(X), \
810
+ (__v8hf)(__m128h)(Y), (int)(P), \
811
+ (__mmask8)-1, (int)(R)))
812
+
813
+ #define _mm_mask_cmp_round_sh_mask(M, X, Y, P, R) \
814
+ ((__mmask8)__builtin_ia32_cmpsh_mask((__v8hf)(__m128h)(X), \
815
+ (__v8hf)(__m128h)(Y), (int)(P), \
816
+ (__mmask8)(M), (int)(R)))
817
+
818
+ #define _mm_cmp_sh_mask(X, Y, P) \
819
+ ((__mmask8)__builtin_ia32_cmpsh_mask( \
820
+ (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(P), (__mmask8)-1, \
821
+ _MM_FROUND_CUR_DIRECTION))
822
+
823
+ #define _mm_mask_cmp_sh_mask(M, X, Y, P) \
824
+ ((__mmask8)__builtin_ia32_cmpsh_mask( \
825
+ (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(P), (__mmask8)(M), \
826
+ _MM_FROUND_CUR_DIRECTION))
827
+ // loads with vmovsh:
828
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_sh(void const *__dp) {
829
+ struct __mm_load_sh_struct {
830
+ _Float16 __u;
831
+ } __attribute__((__packed__, __may_alias__));
832
+ _Float16 __u = ((struct __mm_load_sh_struct *)__dp)->__u;
833
+ return (__m128h){__u, 0, 0, 0, 0, 0, 0, 0};
834
+ }
835
+
836
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
837
+ _mm_mask_load_sh(__m128h __W, __mmask8 __U, const void *__A) {
838
+ __m128h src = (__v8hf)__builtin_shufflevector(
839
+ (__v8hf)__W, (__v8hf)_mm_setzero_ph(), 0, 8, 8, 8, 8, 8, 8, 8);
840
+
841
+ return (__m128h)__builtin_ia32_loadsh128_mask((__v8hf *)__A, src, __U & 1);
842
+ }
843
+
844
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
845
+ _mm_maskz_load_sh(__mmask8 __U, const void *__A) {
846
+ return (__m128h)__builtin_ia32_loadsh128_mask(
847
+ (__v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1);
848
+ }
849
+
850
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
851
+ _mm512_load_ph(void const *__p) {
852
+ return *(const __m512h *)__p;
853
+ }
854
+
855
+ static __inline__ __m256h __DEFAULT_FN_ATTRS256
856
+ _mm256_load_ph(void const *__p) {
857
+ return *(const __m256h *)__p;
858
+ }
859
+
860
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_ph(void const *__p) {
861
+ return *(const __m128h *)__p;
862
+ }
863
+
864
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
865
+ _mm512_loadu_ph(void const *__p) {
866
+ struct __loadu_ph {
867
+ __m512h_u __v;
868
+ } __attribute__((__packed__, __may_alias__));
869
+ return ((const struct __loadu_ph *)__p)->__v;
870
+ }
871
+
872
+ static __inline__ __m256h __DEFAULT_FN_ATTRS256
873
+ _mm256_loadu_ph(void const *__p) {
874
+ struct __loadu_ph {
875
+ __m256h_u __v;
876
+ } __attribute__((__packed__, __may_alias__));
877
+ return ((const struct __loadu_ph *)__p)->__v;
878
+ }
879
+
880
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_loadu_ph(void const *__p) {
881
+ struct __loadu_ph {
882
+ __m128h_u __v;
883
+ } __attribute__((__packed__, __may_alias__));
884
+ return ((const struct __loadu_ph *)__p)->__v;
885
+ }
886
+
887
+ // stores with vmovsh:
888
+ static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_sh(void *__dp,
889
+ __m128h __a) {
890
+ struct __mm_store_sh_struct {
891
+ _Float16 __u;
892
+ } __attribute__((__packed__, __may_alias__));
893
+ ((struct __mm_store_sh_struct *)__dp)->__u = __a[0];
894
+ }
895
+
896
+ static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sh(void *__W,
897
+ __mmask8 __U,
898
+ __m128h __A) {
899
+ __builtin_ia32_storesh128_mask((__v8hf *)__W, __A, __U & 1);
900
+ }
901
+
902
+ static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_ph(void *__P,
903
+ __m512h __A) {
904
+ *(__m512h *)__P = __A;
905
+ }
906
+
907
+ static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_store_ph(void *__P,
908
+ __m256h __A) {
909
+ *(__m256h *)__P = __A;
910
+ }
911
+
912
+ static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_ph(void *__P,
913
+ __m128h __A) {
914
+ *(__m128h *)__P = __A;
915
+ }
916
+
917
+ static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_ph(void *__P,
918
+ __m512h __A) {
919
+ struct __storeu_ph {
920
+ __m512h_u __v;
921
+ } __attribute__((__packed__, __may_alias__));
922
+ ((struct __storeu_ph *)__P)->__v = __A;
923
+ }
924
+
925
+ static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_storeu_ph(void *__P,
926
+ __m256h __A) {
927
+ struct __storeu_ph {
928
+ __m256h_u __v;
929
+ } __attribute__((__packed__, __may_alias__));
930
+ ((struct __storeu_ph *)__P)->__v = __A;
931
+ }
932
+
933
+ static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_ph(void *__P,
934
+ __m128h __A) {
935
+ struct __storeu_ph {
936
+ __m128h_u __v;
937
+ } __attribute__((__packed__, __may_alias__));
938
+ ((struct __storeu_ph *)__P)->__v = __A;
939
+ }
940
+
941
+ // moves with vmovsh:
942
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_move_sh(__m128h __a,
943
+ __m128h __b) {
944
+ __a[0] = __b[0];
945
+ return __a;
946
+ }
947
+
948
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_move_sh(__m128h __W,
949
+ __mmask8 __U,
950
+ __m128h __A,
951
+ __m128h __B) {
952
+ return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B), __W);
953
+ }
954
+
955
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_move_sh(__mmask8 __U,
956
+ __m128h __A,
957
+ __m128h __B) {
958
+ return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B),
959
+ _mm_setzero_ph());
960
+ }
961
+
962
+ // vmovw:
963
+ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsi16_si128(short __a) {
964
+ return (__m128i)(__v8hi){__a, 0, 0, 0, 0, 0, 0, 0};
965
+ }
966
+
967
+ static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtsi128_si16(__m128i __a) {
968
+ __v8hi __b = (__v8hi)__a;
969
+ return __b[0];
970
+ }
971
+
972
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rcp_ph(__m512h __A) {
973
+ return (__m512h)__builtin_ia32_rcpph512_mask(
974
+ (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1);
975
+ }
976
+
977
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
978
+ _mm512_mask_rcp_ph(__m512h __W, __mmask32 __U, __m512h __A) {
979
+ return (__m512h)__builtin_ia32_rcpph512_mask((__v32hf)__A, (__v32hf)__W,
980
+ (__mmask32)__U);
981
+ }
982
+
983
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
984
+ _mm512_maskz_rcp_ph(__mmask32 __U, __m512h __A) {
985
+ return (__m512h)__builtin_ia32_rcpph512_mask(
986
+ (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U);
987
+ }
988
+
989
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rsqrt_ph(__m512h __A) {
990
+ return (__m512h)__builtin_ia32_rsqrtph512_mask(
991
+ (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1);
992
+ }
993
+
994
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
995
+ _mm512_mask_rsqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) {
996
+ return (__m512h)__builtin_ia32_rsqrtph512_mask((__v32hf)__A, (__v32hf)__W,
997
+ (__mmask32)__U);
998
+ }
999
+
1000
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1001
+ _mm512_maskz_rsqrt_ph(__mmask32 __U, __m512h __A) {
1002
+ return (__m512h)__builtin_ia32_rsqrtph512_mask(
1003
+ (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U);
1004
+ }
1005
+
1006
+ #define _mm512_getmant_ph(A, B, C) \
1007
+ ((__m512h)__builtin_ia32_getmantph512_mask( \
1008
+ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1009
+ (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, \
1010
+ _MM_FROUND_CUR_DIRECTION))
1011
+
1012
+ #define _mm512_mask_getmant_ph(W, U, A, B, C) \
1013
+ ((__m512h)__builtin_ia32_getmantph512_mask( \
1014
+ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \
1015
+ (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1016
+
1017
+ #define _mm512_maskz_getmant_ph(U, A, B, C) \
1018
+ ((__m512h)__builtin_ia32_getmantph512_mask( \
1019
+ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1020
+ (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1021
+
1022
+ #define _mm512_getmant_round_ph(A, B, C, R) \
1023
+ ((__m512h)__builtin_ia32_getmantph512_mask( \
1024
+ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1025
+ (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R)))
1026
+
1027
+ #define _mm512_mask_getmant_round_ph(W, U, A, B, C, R) \
1028
+ ((__m512h)__builtin_ia32_getmantph512_mask( \
1029
+ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \
1030
+ (__mmask32)(U), (int)(R)))
1031
+
1032
+ #define _mm512_maskz_getmant_round_ph(U, A, B, C, R) \
1033
+ ((__m512h)__builtin_ia32_getmantph512_mask( \
1034
+ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1035
+ (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1036
+
1037
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_getexp_ph(__m512h __A) {
1038
+ return (__m512h)__builtin_ia32_getexpph512_mask(
1039
+ (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
1040
+ _MM_FROUND_CUR_DIRECTION);
1041
+ }
1042
+
1043
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1044
+ _mm512_mask_getexp_ph(__m512h __W, __mmask32 __U, __m512h __A) {
1045
+ return (__m512h)__builtin_ia32_getexpph512_mask(
1046
+ (__v32hf)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1047
+ }
1048
+
1049
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1050
+ _mm512_maskz_getexp_ph(__mmask32 __U, __m512h __A) {
1051
+ return (__m512h)__builtin_ia32_getexpph512_mask(
1052
+ (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1053
+ _MM_FROUND_CUR_DIRECTION);
1054
+ }
1055
+
1056
+ #define _mm512_getexp_round_ph(A, R) \
1057
+ ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
1058
+ (__v32hf)_mm512_undefined_ph(), \
1059
+ (__mmask32)-1, (int)(R)))
1060
+
1061
+ #define _mm512_mask_getexp_round_ph(W, U, A, R) \
1062
+ ((__m512h)__builtin_ia32_getexpph512_mask( \
1063
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(W), (__mmask32)(U), (int)(R)))
1064
+
1065
+ #define _mm512_maskz_getexp_round_ph(U, A, R) \
1066
+ ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
1067
+ (__v32hf)_mm512_setzero_ph(), \
1068
+ (__mmask32)(U), (int)(R)))
1069
+
1070
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_scalef_ph(__m512h __A,
1071
+ __m512h __B) {
1072
+ return (__m512h)__builtin_ia32_scalefph512_mask(
1073
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
1074
+ _MM_FROUND_CUR_DIRECTION);
1075
+ }
1076
+
1077
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1078
+ _mm512_mask_scalef_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
1079
+ return (__m512h)__builtin_ia32_scalefph512_mask((__v32hf)__A, (__v32hf)__B,
1080
+ (__v32hf)__W, (__mmask32)__U,
1081
+ _MM_FROUND_CUR_DIRECTION);
1082
+ }
1083
+
1084
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1085
+ _mm512_maskz_scalef_ph(__mmask32 __U, __m512h __A, __m512h __B) {
1086
+ return (__m512h)__builtin_ia32_scalefph512_mask(
1087
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1088
+ _MM_FROUND_CUR_DIRECTION);
1089
+ }
1090
+
1091
+ #define _mm512_scalef_round_ph(A, B, R) \
1092
+ ((__m512h)__builtin_ia32_scalefph512_mask( \
1093
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \
1094
+ (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R)))
1095
+
1096
+ #define _mm512_mask_scalef_round_ph(W, U, A, B, R) \
1097
+ ((__m512h)__builtin_ia32_scalefph512_mask( \
1098
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(W), \
1099
+ (__mmask32)(U), (int)(R)))
1100
+
1101
+ #define _mm512_maskz_scalef_round_ph(U, A, B, R) \
1102
+ ((__m512h)__builtin_ia32_scalefph512_mask( \
1103
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \
1104
+ (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1105
+
1106
+ #define _mm512_roundscale_ph(A, B) \
1107
+ ((__m512h)__builtin_ia32_rndscaleph_mask( \
1108
+ (__v32hf)(__m512h)(A), (int)(B), (__v32hf)(__m512h)(A), (__mmask32)-1, \
1109
+ _MM_FROUND_CUR_DIRECTION))
1110
+
1111
+ #define _mm512_mask_roundscale_ph(A, B, C, imm) \
1112
+ ((__m512h)__builtin_ia32_rndscaleph_mask( \
1113
+ (__v32hf)(__m512h)(C), (int)(imm), (__v32hf)(__m512h)(A), \
1114
+ (__mmask32)(B), _MM_FROUND_CUR_DIRECTION))
1115
+
1116
+ #define _mm512_maskz_roundscale_ph(A, B, imm) \
1117
+ ((__m512h)__builtin_ia32_rndscaleph_mask( \
1118
+ (__v32hf)(__m512h)(B), (int)(imm), (__v32hf)_mm512_setzero_ph(), \
1119
+ (__mmask32)(A), _MM_FROUND_CUR_DIRECTION))
1120
+
1121
+ #define _mm512_mask_roundscale_round_ph(A, B, C, imm, R) \
1122
+ ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(C), (int)(imm), \
1123
+ (__v32hf)(__m512h)(A), \
1124
+ (__mmask32)(B), (int)(R)))
1125
+
1126
+ #define _mm512_maskz_roundscale_round_ph(A, B, imm, R) \
1127
+ ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(B), (int)(imm), \
1128
+ (__v32hf)_mm512_setzero_ph(), \
1129
+ (__mmask32)(A), (int)(R)))
1130
+
1131
+ #define _mm512_roundscale_round_ph(A, imm, R) \
1132
+ ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(A), (int)(imm), \
1133
+ (__v32hf)_mm512_undefined_ph(), \
1134
+ (__mmask32)-1, (int)(R)))
1135
+
1136
+ #define _mm512_reduce_ph(A, imm) \
1137
+ ((__m512h)__builtin_ia32_reduceph512_mask( \
1138
+ (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_undefined_ph(), \
1139
+ (__mmask32)-1, _MM_FROUND_CUR_DIRECTION))
1140
+
1141
+ #define _mm512_mask_reduce_ph(W, U, A, imm) \
1142
+ ((__m512h)__builtin_ia32_reduceph512_mask( \
1143
+ (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)(__m512h)(W), \
1144
+ (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1145
+
1146
+ #define _mm512_maskz_reduce_ph(U, A, imm) \
1147
+ ((__m512h)__builtin_ia32_reduceph512_mask( \
1148
+ (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_setzero_ph(), \
1149
+ (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1150
+
1151
+ #define _mm512_mask_reduce_round_ph(W, U, A, imm, R) \
1152
+ ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
1153
+ (__v32hf)(__m512h)(W), \
1154
+ (__mmask32)(U), (int)(R)))
1155
+
1156
+ #define _mm512_maskz_reduce_round_ph(U, A, imm, R) \
1157
+ ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
1158
+ (__v32hf)_mm512_setzero_ph(), \
1159
+ (__mmask32)(U), (int)(R)))
1160
+
1161
+ #define _mm512_reduce_round_ph(A, imm, R) \
1162
+ ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
1163
+ (__v32hf)_mm512_undefined_ph(), \
1164
+ (__mmask32)-1, (int)(R)))
1165
+
1166
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_sh(__m128h __A,
1167
+ __m128h __B) {
1168
+ return (__m128h)__builtin_ia32_rcpsh_mask(
1169
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
1170
+ }
1171
+
1172
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_sh(__m128h __W,
1173
+ __mmask8 __U,
1174
+ __m128h __A,
1175
+ __m128h __B) {
1176
+ return (__m128h)__builtin_ia32_rcpsh_mask((__v8hf)__A, (__v8hf)__B,
1177
+ (__v8hf)__W, (__mmask8)__U);
1178
+ }
1179
+
1180
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_sh(__mmask8 __U,
1181
+ __m128h __A,
1182
+ __m128h __B) {
1183
+ return (__m128h)__builtin_ia32_rcpsh_mask(
1184
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1185
+ }
1186
+
1187
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_sh(__m128h __A,
1188
+ __m128h __B) {
1189
+ return (__m128h)__builtin_ia32_rsqrtsh_mask(
1190
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
1191
+ }
1192
+
1193
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_sh(__m128h __W,
1194
+ __mmask8 __U,
1195
+ __m128h __A,
1196
+ __m128h __B) {
1197
+ return (__m128h)__builtin_ia32_rsqrtsh_mask((__v8hf)__A, (__v8hf)__B,
1198
+ (__v8hf)__W, (__mmask8)__U);
1199
+ }
1200
+
1201
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
1202
+ _mm_maskz_rsqrt_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1203
+ return (__m128h)__builtin_ia32_rsqrtsh_mask(
1204
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1205
+ }
1206
+
1207
+ #define _mm_getmant_round_sh(A, B, C, D, R) \
1208
+ ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1209
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1210
+ (__v8hf)_mm_setzero_ph(), (__mmask8)-1, (int)(R)))
1211
+
1212
+ #define _mm_getmant_sh(A, B, C, D) \
1213
+ ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1214
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1215
+ (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
1216
+
1217
+ #define _mm_mask_getmant_sh(W, U, A, B, C, D) \
1218
+ ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1219
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1220
+ (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
1221
+
1222
+ #define _mm_mask_getmant_round_sh(W, U, A, B, C, D, R) \
1223
+ ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1224
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1225
+ (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R)))
1226
+
1227
+ #define _mm_maskz_getmant_sh(U, A, B, C, D) \
1228
+ ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1229
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1230
+ (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
1231
+
1232
+ #define _mm_maskz_getmant_round_sh(U, A, B, C, D, R) \
1233
+ ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1234
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1235
+ (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
1236
+
1237
+ #define _mm_getexp_round_sh(A, B, R) \
1238
+ ((__m128h)__builtin_ia32_getexpsh128_round_mask( \
1239
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1240
+ (__mmask8)-1, (int)(R)))
1241
+
1242
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_sh(__m128h __A,
1243
+ __m128h __B) {
1244
+ return (__m128h)__builtin_ia32_getexpsh128_round_mask(
1245
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1246
+ _MM_FROUND_CUR_DIRECTION);
1247
+ }
1248
+
1249
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
1250
+ _mm_mask_getexp_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1251
+ return (__m128h)__builtin_ia32_getexpsh128_round_mask(
1252
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U,
1253
+ _MM_FROUND_CUR_DIRECTION);
1254
+ }
1255
+
1256
+ #define _mm_mask_getexp_round_sh(W, U, A, B, R) \
1257
+ ((__m128h)__builtin_ia32_getexpsh128_round_mask( \
1258
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1259
+ (__mmask8)(U), (int)(R)))
1260
+
1261
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
1262
+ _mm_maskz_getexp_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1263
+ return (__m128h)__builtin_ia32_getexpsh128_round_mask(
1264
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1265
+ _MM_FROUND_CUR_DIRECTION);
1266
+ }
1267
+
1268
+ #define _mm_maskz_getexp_round_sh(U, A, B, R) \
1269
+ ((__m128h)__builtin_ia32_getexpsh128_round_mask( \
1270
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1271
+ (__mmask8)(U), (int)(R)))
1272
+
1273
+ #define _mm_scalef_round_sh(A, B, R) \
1274
+ ((__m128h)__builtin_ia32_scalefsh_round_mask( \
1275
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1276
+ (__mmask8)-1, (int)(R)))
1277
+
1278
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_sh(__m128h __A,
1279
+ __m128h __B) {
1280
+ return (__m128h)__builtin_ia32_scalefsh_round_mask(
1281
+ (__v8hf)__A, (__v8hf)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1282
+ _MM_FROUND_CUR_DIRECTION);
1283
+ }
1284
+
1285
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
1286
+ _mm_mask_scalef_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1287
+ return (__m128h)__builtin_ia32_scalefsh_round_mask((__v8hf)__A, (__v8hf)__B,
1288
+ (__v8hf)__W, (__mmask8)__U,
1289
+ _MM_FROUND_CUR_DIRECTION);
1290
+ }
1291
+
1292
+ #define _mm_mask_scalef_round_sh(W, U, A, B, R) \
1293
+ ((__m128h)__builtin_ia32_scalefsh_round_mask( \
1294
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1295
+ (__mmask8)(U), (int)(R)))
1296
+
1297
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
1298
+ _mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1299
+ return (__m128h)__builtin_ia32_scalefsh_round_mask(
1300
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1301
+ _MM_FROUND_CUR_DIRECTION);
1302
+ }
1303
+
1304
+ #define _mm_maskz_scalef_round_sh(U, A, B, R) \
1305
+ ((__m128h)__builtin_ia32_scalefsh_round_mask( \
1306
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1307
+ (__mmask8)(U), (int)(R)))
1308
+
1309
+ #define _mm_roundscale_round_sh(A, B, imm, R) \
1310
+ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1311
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1312
+ (__mmask8)-1, (int)(imm), (int)(R)))
1313
+
1314
+ #define _mm_roundscale_sh(A, B, imm) \
1315
+ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1316
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1317
+ (__mmask8)-1, (int)(imm), _MM_FROUND_CUR_DIRECTION))
1318
+
1319
+ #define _mm_mask_roundscale_sh(W, U, A, B, I) \
1320
+ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1321
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1322
+ (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION))
1323
+
1324
+ #define _mm_mask_roundscale_round_sh(W, U, A, B, I, R) \
1325
+ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1326
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1327
+ (__mmask8)(U), (int)(I), (int)(R)))
1328
+
1329
+ #define _mm_maskz_roundscale_sh(U, A, B, I) \
1330
+ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1331
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1332
+ (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION))
1333
+
1334
+ #define _mm_maskz_roundscale_round_sh(U, A, B, I, R) \
1335
+ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1336
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1337
+ (__mmask8)(U), (int)(I), (int)(R)))
1338
+
1339
+ #define _mm_reduce_sh(A, B, C) \
1340
+ ((__m128h)__builtin_ia32_reducesh_mask( \
1341
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1342
+ (__mmask8)-1, (int)(C), _MM_FROUND_CUR_DIRECTION))
1343
+
1344
+ #define _mm_mask_reduce_sh(W, U, A, B, C) \
1345
+ ((__m128h)__builtin_ia32_reducesh_mask( \
1346
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1347
+ (__mmask8)(U), (int)(C), _MM_FROUND_CUR_DIRECTION))
1348
+
1349
+ #define _mm_maskz_reduce_sh(U, A, B, C) \
1350
+ ((__m128h)__builtin_ia32_reducesh_mask( \
1351
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1352
+ (__mmask8)(U), (int)(C), _MM_FROUND_CUR_DIRECTION))
1353
+
1354
+ #define _mm_reduce_round_sh(A, B, C, R) \
1355
+ ((__m128h)__builtin_ia32_reducesh_mask( \
1356
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1357
+ (__mmask8)-1, (int)(C), (int)(R)))
1358
+
1359
+ #define _mm_mask_reduce_round_sh(W, U, A, B, C, R) \
1360
+ ((__m128h)__builtin_ia32_reducesh_mask( \
1361
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1362
+ (__mmask8)(U), (int)(C), (int)(R)))
1363
+
1364
+ #define _mm_maskz_reduce_round_sh(U, A, B, C, R) \
1365
+ ((__m128h)__builtin_ia32_reducesh_mask( \
1366
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1367
+ (__mmask8)(U), (int)(C), (int)(R)))
1368
+
1369
+ #define _mm512_sqrt_round_ph(A, R) \
1370
+ ((__m512h)__builtin_ia32_sqrtph512((__v32hf)(__m512h)(A), (int)(R)))
1371
+
1372
+ #define _mm512_mask_sqrt_round_ph(W, U, A, R) \
1373
+ ((__m512h)__builtin_ia32_selectph_512( \
1374
+ (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \
1375
+ (__v32hf)(__m512h)(W)))
1376
+
1377
+ #define _mm512_maskz_sqrt_round_ph(U, A, R) \
1378
+ ((__m512h)__builtin_ia32_selectph_512( \
1379
+ (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \
1380
+ (__v32hf)_mm512_setzero_ph()))
1381
+
1382
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sqrt_ph(__m512h __A) {
1383
+ return (__m512h)__builtin_ia32_sqrtph512((__v32hf)__A,
1384
+ _MM_FROUND_CUR_DIRECTION);
1385
+ }
1386
+
1387
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1388
+ _mm512_mask_sqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) {
1389
+ return (__m512h)__builtin_ia32_selectph_512(
1390
+ (__mmask32)(__U),
1391
+ (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
1392
+ (__v32hf)(__m512h)(__W));
1393
+ }
1394
+
1395
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1396
+ _mm512_maskz_sqrt_ph(__mmask32 __U, __m512h __A) {
1397
+ return (__m512h)__builtin_ia32_selectph_512(
1398
+ (__mmask32)(__U),
1399
+ (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
1400
+ (__v32hf)_mm512_setzero_ph());
1401
+ }
1402
+
1403
+ #define _mm_sqrt_round_sh(A, B, R) \
1404
+ ((__m128h)__builtin_ia32_sqrtsh_round_mask( \
1405
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1406
+ (__mmask8)-1, (int)(R)))
1407
+
1408
+ #define _mm_mask_sqrt_round_sh(W, U, A, B, R) \
1409
+ ((__m128h)__builtin_ia32_sqrtsh_round_mask( \
1410
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1411
+ (__mmask8)(U), (int)(R)))
1412
+
1413
+ #define _mm_maskz_sqrt_round_sh(U, A, B, R) \
1414
+ ((__m128h)__builtin_ia32_sqrtsh_round_mask( \
1415
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1416
+ (__mmask8)(U), (int)(R)))
1417
+
1418
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_sh(__m128h __A,
1419
+ __m128h __B) {
1420
+ return (__m128h)__builtin_ia32_sqrtsh_round_mask(
1421
+ (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(),
1422
+ (__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
1423
+ }
1424
+
1425
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sh(__m128h __W,
1426
+ __mmask32 __U,
1427
+ __m128h __A,
1428
+ __m128h __B) {
1429
+ return (__m128h)__builtin_ia32_sqrtsh_round_mask(
1430
+ (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)(__m128h)(__W),
1431
+ (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
1432
+ }
1433
+
1434
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sh(__mmask32 __U,
1435
+ __m128h __A,
1436
+ __m128h __B) {
1437
+ return (__m128h)__builtin_ia32_sqrtsh_round_mask(
1438
+ (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(),
1439
+ (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
1440
+ }
1441
+
1442
+ #define _mm512_mask_fpclass_ph_mask(U, A, imm) \
1443
+ ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \
1444
+ (int)(imm), (__mmask32)(U)))
1445
+
1446
+ #define _mm512_fpclass_ph_mask(A, imm) \
1447
+ ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \
1448
+ (int)(imm), (__mmask32)-1))
1449
+
1450
+ #define _mm_fpclass_sh_mask(A, imm) \
1451
+ ((__mmask8)__builtin_ia32_fpclasssh_mask((__v8hf)(__m128h)(A), (int)(imm), \
1452
+ (__mmask8)-1))
1453
+
1454
+ #define _mm_mask_fpclass_sh_mask(U, A, imm) \
1455
+ ((__mmask8)__builtin_ia32_fpclasssh_mask((__v8hf)(__m128h)(A), (int)(imm), \
1456
+ (__mmask8)(U)))
1457
+
1458
+ #define _mm512_cvt_roundpd_ph(A, R) \
1459
+ ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \
1460
+ (__v8df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
1461
+
1462
+ #define _mm512_mask_cvt_roundpd_ph(W, U, A, R) \
1463
+ ((__m128h)__builtin_ia32_vcvtpd2ph512_mask((__v8df)(A), (__v8hf)(W), \
1464
+ (__mmask8)(U), (int)(R)))
1465
+
1466
+ #define _mm512_maskz_cvt_roundpd_ph(U, A, R) \
1467
+ ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \
1468
+ (__v8df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
1469
+
1470
+ static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ph(__m512d __A) {
1471
+ return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
1472
+ (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1473
+ _MM_FROUND_CUR_DIRECTION);
1474
+ }
1475
+
1476
+ static __inline__ __m128h __DEFAULT_FN_ATTRS512
1477
+ _mm512_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m512d __A) {
1478
+ return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
1479
+ (__v8df)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
1480
+ }
1481
+
1482
+ static __inline__ __m128h __DEFAULT_FN_ATTRS512
1483
+ _mm512_maskz_cvtpd_ph(__mmask8 __U, __m512d __A) {
1484
+ return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
1485
+ (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1486
+ _MM_FROUND_CUR_DIRECTION);
1487
+ }
1488
+
1489
+ #define _mm512_cvt_roundph_pd(A, R) \
1490
+ ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \
1491
+ (__v8hf)(A), (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), (int)(R)))
1492
+
1493
+ #define _mm512_mask_cvt_roundph_pd(W, U, A, R) \
1494
+ ((__m512d)__builtin_ia32_vcvtph2pd512_mask((__v8hf)(A), (__v8df)(W), \
1495
+ (__mmask8)(U), (int)(R)))
1496
+
1497
+ #define _mm512_maskz_cvt_roundph_pd(U, A, R) \
1498
+ ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \
1499
+ (__v8hf)(A), (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R)))
1500
+
1501
+ static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtph_pd(__m128h __A) {
1502
+ return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
1503
+ (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)-1,
1504
+ _MM_FROUND_CUR_DIRECTION);
1505
+ }
1506
+
1507
+ static __inline__ __m512d __DEFAULT_FN_ATTRS512
1508
+ _mm512_mask_cvtph_pd(__m512d __W, __mmask8 __U, __m128h __A) {
1509
+ return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
1510
+ (__v8hf)__A, (__v8df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
1511
+ }
1512
+
1513
+ static __inline__ __m512d __DEFAULT_FN_ATTRS512
1514
+ _mm512_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
1515
+ return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
1516
+ (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U,
1517
+ _MM_FROUND_CUR_DIRECTION);
1518
+ }
1519
+
1520
+ #define _mm_cvt_roundsh_ss(A, B, R) \
1521
+ ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \
1522
+ (__v4sf)_mm_undefined_ps(), \
1523
+ (__mmask8)(-1), (int)(R)))
1524
+
1525
+ #define _mm_mask_cvt_roundsh_ss(W, U, A, B, R) \
1526
+ ((__m128)__builtin_ia32_vcvtsh2ss_round_mask( \
1527
+ (__v4sf)(A), (__v8hf)(B), (__v4sf)(W), (__mmask8)(U), (int)(R)))
1528
+
1529
+ #define _mm_maskz_cvt_roundsh_ss(U, A, B, R) \
1530
+ ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \
1531
+ (__v4sf)_mm_setzero_ps(), \
1532
+ (__mmask8)(U), (int)(R)))
1533
+
1534
+ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtsh_ss(__m128 __A,
1535
+ __m128h __B) {
1536
+ return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
1537
+ (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_undefined_ps(), (__mmask8)-1,
1538
+ _MM_FROUND_CUR_DIRECTION);
1539
+ }
1540
+
1541
+ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_ss(__m128 __W,
1542
+ __mmask8 __U,
1543
+ __m128 __A,
1544
+ __m128h __B) {
1545
+ return (__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)__A, (__v8hf)__B,
1546
+ (__v4sf)__W, (__mmask8)__U,
1547
+ _MM_FROUND_CUR_DIRECTION);
1548
+ }
1549
+
1550
+ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_ss(__mmask8 __U,
1551
+ __m128 __A,
1552
+ __m128h __B) {
1553
+ return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
1554
+ (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U,
1555
+ _MM_FROUND_CUR_DIRECTION);
1556
+ }
1557
+
1558
+ #define _mm_cvt_roundss_sh(A, B, R) \
1559
+ ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \
1560
+ (__v8hf)_mm_undefined_ph(), \
1561
+ (__mmask8)(-1), (int)(R)))
1562
+
1563
+ #define _mm_mask_cvt_roundss_sh(W, U, A, B, R) \
1564
+ ((__m128h)__builtin_ia32_vcvtss2sh_round_mask( \
1565
+ (__v8hf)(A), (__v4sf)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
1566
+
1567
+ #define _mm_maskz_cvt_roundss_sh(U, A, B, R) \
1568
+ ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \
1569
+ (__v8hf)_mm_setzero_ph(), \
1570
+ (__mmask8)(U), (int)(R)))
1571
+
1572
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtss_sh(__m128h __A,
1573
+ __m128 __B) {
1574
+ return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
1575
+ (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
1576
+ _MM_FROUND_CUR_DIRECTION);
1577
+ }
1578
+
1579
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sh(__m128h __W,
1580
+ __mmask8 __U,
1581
+ __m128h __A,
1582
+ __m128 __B) {
1583
+ return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
1584
+ (__v8hf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U,
1585
+ _MM_FROUND_CUR_DIRECTION);
1586
+ }
1587
+
1588
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sh(__mmask8 __U,
1589
+ __m128h __A,
1590
+ __m128 __B) {
1591
+ return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
1592
+ (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1593
+ _MM_FROUND_CUR_DIRECTION);
1594
+ }
1595
+
1596
+ #define _mm_cvt_roundsd_sh(A, B, R) \
1597
+ ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \
1598
+ (__v8hf)_mm_undefined_ph(), \
1599
+ (__mmask8)(-1), (int)(R)))
1600
+
1601
+ #define _mm_mask_cvt_roundsd_sh(W, U, A, B, R) \
1602
+ ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask( \
1603
+ (__v8hf)(A), (__v2df)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
1604
+
1605
+ #define _mm_maskz_cvt_roundsd_sh(U, A, B, R) \
1606
+ ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \
1607
+ (__v8hf)_mm_setzero_ph(), \
1608
+ (__mmask8)(U), (int)(R)))
1609
+
1610
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtsd_sh(__m128h __A,
1611
+ __m128d __B) {
1612
+ return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
1613
+ (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
1614
+ _MM_FROUND_CUR_DIRECTION);
1615
+ }
1616
+
1617
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_sh(__m128h __W,
1618
+ __mmask8 __U,
1619
+ __m128h __A,
1620
+ __m128d __B) {
1621
+ return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
1622
+ (__v8hf)__A, (__v2df)__B, (__v8hf)__W, (__mmask8)__U,
1623
+ _MM_FROUND_CUR_DIRECTION);
1624
+ }
1625
+
1626
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
1627
+ _mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B) {
1628
+ return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
1629
+ (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1630
+ _MM_FROUND_CUR_DIRECTION);
1631
+ }
1632
+
1633
+ #define _mm_cvt_roundsh_sd(A, B, R) \
1634
+ ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \
1635
+ (__v2df)_mm_undefined_pd(), \
1636
+ (__mmask8)(-1), (int)(R)))
1637
+
1638
+ #define _mm_mask_cvt_roundsh_sd(W, U, A, B, R) \
1639
+ ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask( \
1640
+ (__v2df)(A), (__v8hf)(B), (__v2df)(W), (__mmask8)(U), (int)(R)))
1641
+
1642
+ #define _mm_maskz_cvt_roundsh_sd(U, A, B, R) \
1643
+ ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \
1644
+ (__v2df)_mm_setzero_pd(), \
1645
+ (__mmask8)(U), (int)(R)))
1646
+
1647
+ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtsh_sd(__m128d __A,
1648
+ __m128h __B) {
1649
+ return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
1650
+ (__v2df)__A, (__v8hf)__B, (__v2df)_mm_undefined_pd(), (__mmask8)-1,
1651
+ _MM_FROUND_CUR_DIRECTION);
1652
+ }
1653
+
1654
+ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_sd(__m128d __W,
1655
+ __mmask8 __U,
1656
+ __m128d __A,
1657
+ __m128h __B) {
1658
+ return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
1659
+ (__v2df)__A, (__v8hf)__B, (__v2df)__W, (__mmask8)__U,
1660
+ _MM_FROUND_CUR_DIRECTION);
1661
+ }
1662
+
1663
+ static __inline__ __m128d __DEFAULT_FN_ATTRS128
1664
+ _mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B) {
1665
+ return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
1666
+ (__v2df)__A, (__v8hf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U,
1667
+ _MM_FROUND_CUR_DIRECTION);
1668
+ }
1669
+
1670
+ #define _mm512_cvt_roundph_epi16(A, R) \
1671
+ ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
1672
+ (__v32hi)_mm512_undefined_epi32(), \
1673
+ (__mmask32)(-1), (int)(R)))
1674
+
1675
+ #define _mm512_mask_cvt_roundph_epi16(W, U, A, R) \
1676
+ ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), (__v32hi)(W), \
1677
+ (__mmask32)(U), (int)(R)))
1678
+
1679
+ #define _mm512_maskz_cvt_roundph_epi16(U, A, R) \
1680
+ ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
1681
+ (__v32hi)_mm512_setzero_epi32(), \
1682
+ (__mmask32)(U), (int)(R)))
1683
+
1684
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1685
+ _mm512_cvtph_epi16(__m512h __A) {
1686
+ return (__m512i)__builtin_ia32_vcvtph2w512_mask(
1687
+ (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
1688
+ _MM_FROUND_CUR_DIRECTION);
1689
+ }
1690
+
1691
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1692
+ _mm512_mask_cvtph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
1693
+ return (__m512i)__builtin_ia32_vcvtph2w512_mask(
1694
+ (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1695
+ }
1696
+
1697
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1698
+ _mm512_maskz_cvtph_epi16(__mmask32 __U, __m512h __A) {
1699
+ return (__m512i)__builtin_ia32_vcvtph2w512_mask(
1700
+ (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
1701
+ _MM_FROUND_CUR_DIRECTION);
1702
+ }
1703
+
1704
+ #define _mm512_cvtt_roundph_epi16(A, R) \
1705
+ ((__m512i)__builtin_ia32_vcvttph2w512_mask( \
1706
+ (__v32hf)(A), (__v32hi)_mm512_undefined_epi32(), (__mmask32)(-1), \
1707
+ (int)(R)))
1708
+
1709
+ #define _mm512_mask_cvtt_roundph_epi16(W, U, A, R) \
1710
+ ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), (__v32hi)(W), \
1711
+ (__mmask32)(U), (int)(R)))
1712
+
1713
+ #define _mm512_maskz_cvtt_roundph_epi16(U, A, R) \
1714
+ ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), \
1715
+ (__v32hi)_mm512_setzero_epi32(), \
1716
+ (__mmask32)(U), (int)(R)))
1717
+
1718
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1719
+ _mm512_cvttph_epi16(__m512h __A) {
1720
+ return (__m512i)__builtin_ia32_vcvttph2w512_mask(
1721
+ (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
1722
+ _MM_FROUND_CUR_DIRECTION);
1723
+ }
1724
+
1725
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1726
+ _mm512_mask_cvttph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
1727
+ return (__m512i)__builtin_ia32_vcvttph2w512_mask(
1728
+ (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1729
+ }
1730
+
1731
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1732
+ _mm512_maskz_cvttph_epi16(__mmask32 __U, __m512h __A) {
1733
+ return (__m512i)__builtin_ia32_vcvttph2w512_mask(
1734
+ (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
1735
+ _MM_FROUND_CUR_DIRECTION);
1736
+ }
1737
+
1738
+ #define _mm512_cvt_roundepi16_ph(A, R) \
1739
+ ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), \
1740
+ (__v32hf)_mm512_undefined_ph(), \
1741
+ (__mmask32)(-1), (int)(R)))
1742
+
1743
+ #define _mm512_mask_cvt_roundepi16_ph(W, U, A, R) \
1744
+ ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), (__v32hf)(W), \
1745
+ (__mmask32)(U), (int)(R)))
1746
+
1747
+ #define _mm512_maskz_cvt_roundepi16_ph(U, A, R) \
1748
+ ((__m512h)__builtin_ia32_vcvtw2ph512_mask( \
1749
+ (__v32hi)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1750
+
1751
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1752
+ _mm512_cvtepi16_ph(__m512i __A) {
1753
+ return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
1754
+ (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
1755
+ _MM_FROUND_CUR_DIRECTION);
1756
+ }
1757
+
1758
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1759
+ _mm512_mask_cvtepi16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
1760
+ return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
1761
+ (__v32hi)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1762
+ }
1763
+
1764
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1765
+ _mm512_maskz_cvtepi16_ph(__mmask32 __U, __m512i __A) {
1766
+ return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
1767
+ (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1768
+ _MM_FROUND_CUR_DIRECTION);
1769
+ }
1770
+
1771
+ #define _mm512_cvt_roundph_epu16(A, R) \
1772
+ ((__m512i)__builtin_ia32_vcvtph2uw512_mask( \
1773
+ (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
1774
+ (int)(R)))
1775
+
1776
+ #define _mm512_mask_cvt_roundph_epu16(W, U, A, R) \
1777
+ ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
1778
+ (__mmask32)(U), (int)(R)))
1779
+
1780
+ #define _mm512_maskz_cvt_roundph_epu16(U, A, R) \
1781
+ ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), \
1782
+ (__v32hu)_mm512_setzero_epi32(), \
1783
+ (__mmask32)(U), (int)(R)))
1784
+
1785
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1786
+ _mm512_cvtph_epu16(__m512h __A) {
1787
+ return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
1788
+ (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
1789
+ _MM_FROUND_CUR_DIRECTION);
1790
+ }
1791
+
1792
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1793
+ _mm512_mask_cvtph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
1794
+ return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
1795
+ (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1796
+ }
1797
+
1798
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1799
+ _mm512_maskz_cvtph_epu16(__mmask32 __U, __m512h __A) {
1800
+ return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
1801
+ (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
1802
+ _MM_FROUND_CUR_DIRECTION);
1803
+ }
1804
+
1805
+ #define _mm512_cvtt_roundph_epu16(A, R) \
1806
+ ((__m512i)__builtin_ia32_vcvttph2uw512_mask( \
1807
+ (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
1808
+ (int)(R)))
1809
+
1810
+ #define _mm512_mask_cvtt_roundph_epu16(W, U, A, R) \
1811
+ ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
1812
+ (__mmask32)(U), (int)(R)))
1813
+
1814
+ #define _mm512_maskz_cvtt_roundph_epu16(U, A, R) \
1815
+ ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), \
1816
+ (__v32hu)_mm512_setzero_epi32(), \
1817
+ (__mmask32)(U), (int)(R)))
1818
+
1819
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1820
+ _mm512_cvttph_epu16(__m512h __A) {
1821
+ return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
1822
+ (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
1823
+ _MM_FROUND_CUR_DIRECTION);
1824
+ }
1825
+
1826
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1827
+ _mm512_mask_cvttph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
1828
+ return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
1829
+ (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1830
+ }
1831
+
1832
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1833
+ _mm512_maskz_cvttph_epu16(__mmask32 __U, __m512h __A) {
1834
+ return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
1835
+ (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
1836
+ _MM_FROUND_CUR_DIRECTION);
1837
+ }
1838
+
1839
+ #define _mm512_cvt_roundepu16_ph(A, R) \
1840
+ ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), \
1841
+ (__v32hf)_mm512_undefined_ph(), \
1842
+ (__mmask32)(-1), (int)(R)))
1843
+
1844
+ #define _mm512_mask_cvt_roundepu16_ph(W, U, A, R) \
1845
+ ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), (__v32hf)(W), \
1846
+ (__mmask32)(U), (int)(R)))
1847
+
1848
+ #define _mm512_maskz_cvt_roundepu16_ph(U, A, R) \
1849
+ ((__m512h)__builtin_ia32_vcvtuw2ph512_mask( \
1850
+ (__v32hu)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1851
+
1852
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1853
+ _mm512_cvtepu16_ph(__m512i __A) {
1854
+ return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
1855
+ (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
1856
+ _MM_FROUND_CUR_DIRECTION);
1857
+ }
1858
+
1859
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1860
+ _mm512_mask_cvtepu16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
1861
+ return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
1862
+ (__v32hu)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1863
+ }
1864
+
1865
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
1866
+ _mm512_maskz_cvtepu16_ph(__mmask32 __U, __m512i __A) {
1867
+ return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
1868
+ (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1869
+ _MM_FROUND_CUR_DIRECTION);
1870
+ }
1871
+
1872
+ #define _mm512_cvt_roundph_epi32(A, R) \
1873
+ ((__m512i)__builtin_ia32_vcvtph2dq512_mask( \
1874
+ (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \
1875
+ (int)(R)))
1876
+
1877
+ #define _mm512_mask_cvt_roundph_epi32(W, U, A, R) \
1878
+ ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), (__v16si)(W), \
1879
+ (__mmask16)(U), (int)(R)))
1880
+
1881
+ #define _mm512_maskz_cvt_roundph_epi32(U, A, R) \
1882
+ ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), \
1883
+ (__v16si)_mm512_setzero_epi32(), \
1884
+ (__mmask16)(U), (int)(R)))
1885
+
1886
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1887
+ _mm512_cvtph_epi32(__m256h __A) {
1888
+ return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
1889
+ (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
1890
+ _MM_FROUND_CUR_DIRECTION);
1891
+ }
1892
+
1893
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1894
+ _mm512_mask_cvtph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
1895
+ return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
1896
+ (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1897
+ }
1898
+
1899
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1900
+ _mm512_maskz_cvtph_epi32(__mmask16 __U, __m256h __A) {
1901
+ return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
1902
+ (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
1903
+ _MM_FROUND_CUR_DIRECTION);
1904
+ }
1905
+
1906
+ #define _mm512_cvt_roundph_epu32(A, R) \
1907
+ ((__m512i)__builtin_ia32_vcvtph2udq512_mask( \
1908
+ (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \
1909
+ (int)(R)))
1910
+
1911
+ #define _mm512_mask_cvt_roundph_epu32(W, U, A, R) \
1912
+ ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), (__v16su)(W), \
1913
+ (__mmask16)(U), (int)(R)))
1914
+
1915
+ #define _mm512_maskz_cvt_roundph_epu32(U, A, R) \
1916
+ ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), \
1917
+ (__v16su)_mm512_setzero_epi32(), \
1918
+ (__mmask16)(U), (int)(R)))
1919
+
1920
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1921
+ _mm512_cvtph_epu32(__m256h __A) {
1922
+ return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
1923
+ (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
1924
+ _MM_FROUND_CUR_DIRECTION);
1925
+ }
1926
+
1927
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1928
+ _mm512_mask_cvtph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
1929
+ return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
1930
+ (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1931
+ }
1932
+
1933
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
1934
+ _mm512_maskz_cvtph_epu32(__mmask16 __U, __m256h __A) {
1935
+ return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
1936
+ (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
1937
+ _MM_FROUND_CUR_DIRECTION);
1938
+ }
1939
+
1940
+ #define _mm512_cvt_roundepi32_ph(A, R) \
1941
+ ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), \
1942
+ (__v16hf)_mm256_undefined_ph(), \
1943
+ (__mmask16)(-1), (int)(R)))
1944
+
1945
+ #define _mm512_mask_cvt_roundepi32_ph(W, U, A, R) \
1946
+ ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), (__v16hf)(W), \
1947
+ (__mmask16)(U), (int)(R)))
1948
+
1949
+ #define _mm512_maskz_cvt_roundepi32_ph(U, A, R) \
1950
+ ((__m256h)__builtin_ia32_vcvtdq2ph512_mask( \
1951
+ (__v16si)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
1952
+
1953
+ static __inline__ __m256h __DEFAULT_FN_ATTRS512
1954
+ _mm512_cvtepi32_ph(__m512i __A) {
1955
+ return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
1956
+ (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
1957
+ _MM_FROUND_CUR_DIRECTION);
1958
+ }
1959
+
1960
+ static __inline__ __m256h __DEFAULT_FN_ATTRS512
1961
+ _mm512_mask_cvtepi32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
1962
+ return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
1963
+ (__v16si)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1964
+ }
1965
+
1966
+ static __inline__ __m256h __DEFAULT_FN_ATTRS512
1967
+ _mm512_maskz_cvtepi32_ph(__mmask16 __U, __m512i __A) {
1968
+ return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
1969
+ (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
1970
+ _MM_FROUND_CUR_DIRECTION);
1971
+ }
1972
+
1973
+ #define _mm512_cvt_roundepu32_ph(A, R) \
1974
+ ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), \
1975
+ (__v16hf)_mm256_undefined_ph(), \
1976
+ (__mmask16)(-1), (int)(R)))
1977
+
1978
+ #define _mm512_mask_cvt_roundepu32_ph(W, U, A, R) \
1979
+ ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), (__v16hf)(W), \
1980
+ (__mmask16)(U), (int)(R)))
1981
+
1982
+ #define _mm512_maskz_cvt_roundepu32_ph(U, A, R) \
1983
+ ((__m256h)__builtin_ia32_vcvtudq2ph512_mask( \
1984
+ (__v16su)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
1985
+
1986
+ static __inline__ __m256h __DEFAULT_FN_ATTRS512
1987
+ _mm512_cvtepu32_ph(__m512i __A) {
1988
+ return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
1989
+ (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
1990
+ _MM_FROUND_CUR_DIRECTION);
1991
+ }
1992
+
1993
+ static __inline__ __m256h __DEFAULT_FN_ATTRS512
1994
+ _mm512_mask_cvtepu32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
1995
+ return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
1996
+ (__v16su)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1997
+ }
1998
+
1999
+ static __inline__ __m256h __DEFAULT_FN_ATTRS512
2000
+ _mm512_maskz_cvtepu32_ph(__mmask16 __U, __m512i __A) {
2001
+ return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
2002
+ (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
2003
+ _MM_FROUND_CUR_DIRECTION);
2004
+ }
2005
+
2006
+ #define _mm512_cvtt_roundph_epi32(A, R) \
2007
+ ((__m512i)__builtin_ia32_vcvttph2dq512_mask( \
2008
+ (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \
2009
+ (int)(R)))
2010
+
2011
+ #define _mm512_mask_cvtt_roundph_epi32(W, U, A, R) \
2012
+ ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), (__v16si)(W), \
2013
+ (__mmask16)(U), (int)(R)))
2014
+
2015
+ #define _mm512_maskz_cvtt_roundph_epi32(U, A, R) \
2016
+ ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), \
2017
+ (__v16si)_mm512_setzero_epi32(), \
2018
+ (__mmask16)(U), (int)(R)))
2019
+
2020
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2021
+ _mm512_cvttph_epi32(__m256h __A) {
2022
+ return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
2023
+ (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
2024
+ _MM_FROUND_CUR_DIRECTION);
2025
+ }
2026
+
2027
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2028
+ _mm512_mask_cvttph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
2029
+ return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
2030
+ (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2031
+ }
2032
+
2033
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2034
+ _mm512_maskz_cvttph_epi32(__mmask16 __U, __m256h __A) {
2035
+ return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
2036
+ (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
2037
+ _MM_FROUND_CUR_DIRECTION);
2038
+ }
2039
+
2040
+ #define _mm512_cvtt_roundph_epu32(A, R) \
2041
+ ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \
2042
+ (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \
2043
+ (int)(R)))
2044
+
2045
+ #define _mm512_mask_cvtt_roundph_epu32(W, U, A, R) \
2046
+ ((__m512i)__builtin_ia32_vcvttph2udq512_mask((__v16hf)(A), (__v16su)(W), \
2047
+ (__mmask16)(U), (int)(R)))
2048
+
2049
+ #define _mm512_maskz_cvtt_roundph_epu32(U, A, R) \
2050
+ ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \
2051
+ (__v16hf)(A), (__v16su)_mm512_setzero_epi32(), (__mmask16)(U), \
2052
+ (int)(R)))
2053
+
2054
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2055
+ _mm512_cvttph_epu32(__m256h __A) {
2056
+ return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
2057
+ (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
2058
+ _MM_FROUND_CUR_DIRECTION);
2059
+ }
2060
+
2061
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2062
+ _mm512_mask_cvttph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
2063
+ return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
2064
+ (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2065
+ }
2066
+
2067
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2068
+ _mm512_maskz_cvttph_epu32(__mmask16 __U, __m256h __A) {
2069
+ return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
2070
+ (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
2071
+ _MM_FROUND_CUR_DIRECTION);
2072
+ }
2073
+
2074
+ #define _mm512_cvt_roundepi64_ph(A, R) \
2075
+ ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \
2076
+ (__v8di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
2077
+
2078
+ #define _mm512_mask_cvt_roundepi64_ph(W, U, A, R) \
2079
+ ((__m128h)__builtin_ia32_vcvtqq2ph512_mask((__v8di)(A), (__v8hf)(W), \
2080
+ (__mmask8)(U), (int)(R)))
2081
+
2082
+ #define _mm512_maskz_cvt_roundepi64_ph(U, A, R) \
2083
+ ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \
2084
+ (__v8di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
2085
+
2086
+ static __inline__ __m128h __DEFAULT_FN_ATTRS512
2087
+ _mm512_cvtepi64_ph(__m512i __A) {
2088
+ return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
2089
+ (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
2090
+ _MM_FROUND_CUR_DIRECTION);
2091
+ }
2092
+
2093
+ static __inline__ __m128h __DEFAULT_FN_ATTRS512
2094
+ _mm512_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
2095
+ return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
2096
+ (__v8di)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2097
+ }
2098
+
2099
+ static __inline__ __m128h __DEFAULT_FN_ATTRS512
2100
+ _mm512_maskz_cvtepi64_ph(__mmask8 __U, __m512i __A) {
2101
+ return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
2102
+ (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
2103
+ _MM_FROUND_CUR_DIRECTION);
2104
+ }
2105
+
2106
+ #define _mm512_cvt_roundph_epi64(A, R) \
2107
+ ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), \
2108
+ (__v8di)_mm512_undefined_epi32(), \
2109
+ (__mmask8)(-1), (int)(R)))
2110
+
2111
+ #define _mm512_mask_cvt_roundph_epi64(W, U, A, R) \
2112
+ ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), (__v8di)(W), \
2113
+ (__mmask8)(U), (int)(R)))
2114
+
2115
+ #define _mm512_maskz_cvt_roundph_epi64(U, A, R) \
2116
+ ((__m512i)__builtin_ia32_vcvtph2qq512_mask( \
2117
+ (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
2118
+
2119
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2120
+ _mm512_cvtph_epi64(__m128h __A) {
2121
+ return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
2122
+ (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
2123
+ _MM_FROUND_CUR_DIRECTION);
2124
+ }
2125
+
2126
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2127
+ _mm512_mask_cvtph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
2128
+ return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
2129
+ (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2130
+ }
2131
+
2132
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2133
+ _mm512_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
2134
+ return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
2135
+ (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
2136
+ _MM_FROUND_CUR_DIRECTION);
2137
+ }
2138
+
2139
+ #define _mm512_cvt_roundepu64_ph(A, R) \
2140
+ ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \
2141
+ (__v8du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
2142
+
2143
+ #define _mm512_mask_cvt_roundepu64_ph(W, U, A, R) \
2144
+ ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask((__v8du)(A), (__v8hf)(W), \
2145
+ (__mmask8)(U), (int)(R)))
2146
+
2147
+ #define _mm512_maskz_cvt_roundepu64_ph(U, A, R) \
2148
+ ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \
2149
+ (__v8du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
2150
+
2151
+ static __inline__ __m128h __DEFAULT_FN_ATTRS512
2152
+ _mm512_cvtepu64_ph(__m512i __A) {
2153
+ return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
2154
+ (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
2155
+ _MM_FROUND_CUR_DIRECTION);
2156
+ }
2157
+
2158
+ static __inline__ __m128h __DEFAULT_FN_ATTRS512
2159
+ _mm512_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
2160
+ return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
2161
+ (__v8du)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2162
+ }
2163
+
2164
+ static __inline__ __m128h __DEFAULT_FN_ATTRS512
2165
+ _mm512_maskz_cvtepu64_ph(__mmask8 __U, __m512i __A) {
2166
+ return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
2167
+ (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
2168
+ _MM_FROUND_CUR_DIRECTION);
2169
+ }
2170
+
2171
+ #define _mm512_cvt_roundph_epu64(A, R) \
2172
+ ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \
2173
+ (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \
2174
+ (int)(R)))
2175
+
2176
+ #define _mm512_mask_cvt_roundph_epu64(W, U, A, R) \
2177
+ ((__m512i)__builtin_ia32_vcvtph2uqq512_mask((__v8hf)(A), (__v8du)(W), \
2178
+ (__mmask8)(U), (int)(R)))
2179
+
2180
+ #define _mm512_maskz_cvt_roundph_epu64(U, A, R) \
2181
+ ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \
2182
+ (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
2183
+
2184
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2185
+ _mm512_cvtph_epu64(__m128h __A) {
2186
+ return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
2187
+ (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
2188
+ _MM_FROUND_CUR_DIRECTION);
2189
+ }
2190
+
2191
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2192
+ _mm512_mask_cvtph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
2193
+ return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
2194
+ (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2195
+ }
2196
+
2197
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2198
+ _mm512_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
2199
+ return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
2200
+ (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
2201
+ _MM_FROUND_CUR_DIRECTION);
2202
+ }
2203
+
2204
+ #define _mm512_cvtt_roundph_epi64(A, R) \
2205
+ ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \
2206
+ (__v8hf)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8)(-1), \
2207
+ (int)(R)))
2208
+
2209
+ #define _mm512_mask_cvtt_roundph_epi64(W, U, A, R) \
2210
+ ((__m512i)__builtin_ia32_vcvttph2qq512_mask((__v8hf)(A), (__v8di)(W), \
2211
+ (__mmask8)(U), (int)(R)))
2212
+
2213
+ #define _mm512_maskz_cvtt_roundph_epi64(U, A, R) \
2214
+ ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \
2215
+ (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
2216
+
2217
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2218
+ _mm512_cvttph_epi64(__m128h __A) {
2219
+ return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
2220
+ (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
2221
+ _MM_FROUND_CUR_DIRECTION);
2222
+ }
2223
+
2224
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2225
+ _mm512_mask_cvttph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
2226
+ return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
2227
+ (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2228
+ }
2229
+
2230
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2231
+ _mm512_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
2232
+ return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
2233
+ (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
2234
+ _MM_FROUND_CUR_DIRECTION);
2235
+ }
2236
+
2237
+ #define _mm512_cvtt_roundph_epu64(A, R) \
2238
+ ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \
2239
+ (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \
2240
+ (int)(R)))
2241
+
2242
+ #define _mm512_mask_cvtt_roundph_epu64(W, U, A, R) \
2243
+ ((__m512i)__builtin_ia32_vcvttph2uqq512_mask((__v8hf)(A), (__v8du)(W), \
2244
+ (__mmask8)(U), (int)(R)))
2245
+
2246
+ #define _mm512_maskz_cvtt_roundph_epu64(U, A, R) \
2247
+ ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \
2248
+ (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
2249
+
2250
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2251
+ _mm512_cvttph_epu64(__m128h __A) {
2252
+ return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
2253
+ (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
2254
+ _MM_FROUND_CUR_DIRECTION);
2255
+ }
2256
+
2257
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2258
+ _mm512_mask_cvttph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
2259
+ return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
2260
+ (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2261
+ }
2262
+
2263
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512
2264
+ _mm512_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
2265
+ return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
2266
+ (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
2267
+ _MM_FROUND_CUR_DIRECTION);
2268
+ }
2269
+
2270
+ #define _mm_cvt_roundsh_i32(A, R) \
2271
+ ((int)__builtin_ia32_vcvtsh2si32((__v8hf)(A), (int)(R)))
2272
+
2273
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvtsh_i32(__m128h __A) {
2274
+ return (int)__builtin_ia32_vcvtsh2si32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
2275
+ }
2276
+
2277
+ #define _mm_cvt_roundsh_u32(A, R) \
2278
+ ((unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)(A), (int)(R)))
2279
+
2280
+ static __inline__ unsigned int __DEFAULT_FN_ATTRS128
2281
+ _mm_cvtsh_u32(__m128h __A) {
2282
+ return (unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)__A,
2283
+ _MM_FROUND_CUR_DIRECTION);
2284
+ }
2285
+
2286
+ #ifdef __x86_64__
2287
+ #define _mm_cvt_roundsh_i64(A, R) \
2288
+ ((long long)__builtin_ia32_vcvtsh2si64((__v8hf)(A), (int)(R)))
2289
+
2290
+ static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvtsh_i64(__m128h __A) {
2291
+ return (long long)__builtin_ia32_vcvtsh2si64((__v8hf)__A,
2292
+ _MM_FROUND_CUR_DIRECTION);
2293
+ }
2294
+
2295
+ #define _mm_cvt_roundsh_u64(A, R) \
2296
+ ((unsigned long long)__builtin_ia32_vcvtsh2usi64((__v8hf)(A), (int)(R)))
2297
+
2298
+ static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
2299
+ _mm_cvtsh_u64(__m128h __A) {
2300
+ return (unsigned long long)__builtin_ia32_vcvtsh2usi64(
2301
+ (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
2302
+ }
2303
+ #endif // __x86_64__
2304
+
2305
+ #define _mm_cvt_roundu32_sh(A, B, R) \
2306
+ ((__m128h)__builtin_ia32_vcvtusi2sh((__v8hf)(A), (unsigned int)(B), (int)(R)))
2307
+
2308
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2309
+ _mm_cvtu32_sh(__m128h __A, unsigned int __B) {
2310
+ __A[0] = __B;
2311
+ return __A;
2312
+ }
2313
+
2314
+ #ifdef __x86_64__
2315
+ #define _mm_cvt_roundu64_sh(A, B, R) \
2316
+ ((__m128h)__builtin_ia32_vcvtusi642sh((__v8hf)(A), (unsigned long long)(B), \
2317
+ (int)(R)))
2318
+
2319
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2320
+ _mm_cvtu64_sh(__m128h __A, unsigned long long __B) {
2321
+ __A[0] = __B;
2322
+ return __A;
2323
+ }
2324
+ #endif
2325
+
2326
+ #define _mm_cvt_roundi32_sh(A, B, R) \
2327
+ ((__m128h)__builtin_ia32_vcvtsi2sh((__v8hf)(A), (int)(B), (int)(R)))
2328
+
2329
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti32_sh(__m128h __A,
2330
+ int __B) {
2331
+ __A[0] = __B;
2332
+ return __A;
2333
+ }
2334
+
2335
+ #ifdef __x86_64__
2336
+ #define _mm_cvt_roundi64_sh(A, B, R) \
2337
+ ((__m128h)__builtin_ia32_vcvtsi642sh((__v8hf)(A), (long long)(B), (int)(R)))
2338
+
2339
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti64_sh(__m128h __A,
2340
+ long long __B) {
2341
+ __A[0] = __B;
2342
+ return __A;
2343
+ }
2344
+ #endif
2345
+
2346
+ #define _mm_cvtt_roundsh_i32(A, R) \
2347
+ ((int)__builtin_ia32_vcvttsh2si32((__v8hf)(A), (int)(R)))
2348
+
2349
+ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsh_i32(__m128h __A) {
2350
+ return (int)__builtin_ia32_vcvttsh2si32((__v8hf)__A,
2351
+ _MM_FROUND_CUR_DIRECTION);
2352
+ }
2353
+
2354
+ #ifdef __x86_64__
2355
+ #define _mm_cvtt_roundsh_i64(A, R) \
2356
+ ((long long)__builtin_ia32_vcvttsh2si64((__v8hf)(A), (int)(R)))
2357
+
2358
+ static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsh_i64(__m128h __A) {
2359
+ return (long long)__builtin_ia32_vcvttsh2si64((__v8hf)__A,
2360
+ _MM_FROUND_CUR_DIRECTION);
2361
+ }
2362
+ #endif
2363
+
2364
+ #define _mm_cvtt_roundsh_u32(A, R) \
2365
+ ((unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)(A), (int)(R)))
2366
+
2367
+ static __inline__ unsigned int __DEFAULT_FN_ATTRS128
2368
+ _mm_cvttsh_u32(__m128h __A) {
2369
+ return (unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)__A,
2370
+ _MM_FROUND_CUR_DIRECTION);
2371
+ }
2372
+
2373
+ #ifdef __x86_64__
2374
+ #define _mm_cvtt_roundsh_u64(A, R) \
2375
+ ((unsigned long long)__builtin_ia32_vcvttsh2usi64((__v8hf)(A), (int)(R)))
2376
+
2377
+ static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
2378
+ _mm_cvttsh_u64(__m128h __A) {
2379
+ return (unsigned long long)__builtin_ia32_vcvttsh2usi64(
2380
+ (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
2381
+ }
2382
+ #endif
2383
+
2384
+ #define _mm512_cvtx_roundph_ps(A, R) \
2385
+ ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), \
2386
+ (__v16sf)_mm512_undefined_ps(), \
2387
+ (__mmask16)(-1), (int)(R)))
2388
+
2389
+ #define _mm512_mask_cvtx_roundph_ps(W, U, A, R) \
2390
+ ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), (__v16sf)(W), \
2391
+ (__mmask16)(U), (int)(R)))
2392
+
2393
+ #define _mm512_maskz_cvtx_roundph_ps(U, A, R) \
2394
+ ((__m512)__builtin_ia32_vcvtph2psx512_mask( \
2395
+ (__v16hf)(A), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R)))
2396
+
2397
+ static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtxph_ps(__m256h __A) {
2398
+ return (__m512)__builtin_ia32_vcvtph2psx512_mask(
2399
+ (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)-1,
2400
+ _MM_FROUND_CUR_DIRECTION);
2401
+ }
2402
+
2403
+ static __inline__ __m512 __DEFAULT_FN_ATTRS512
2404
+ _mm512_mask_cvtxph_ps(__m512 __W, __mmask16 __U, __m256h __A) {
2405
+ return (__m512)__builtin_ia32_vcvtph2psx512_mask(
2406
+ (__v16hf)__A, (__v16sf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2407
+ }
2408
+
2409
+ static __inline__ __m512 __DEFAULT_FN_ATTRS512
2410
+ _mm512_maskz_cvtxph_ps(__mmask16 __U, __m256h __A) {
2411
+ return (__m512)__builtin_ia32_vcvtph2psx512_mask(
2412
+ (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U,
2413
+ _MM_FROUND_CUR_DIRECTION);
2414
+ }
2415
+
2416
+ #define _mm512_cvtx_roundps_ph(A, R) \
2417
+ ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), \
2418
+ (__v16hf)_mm256_undefined_ph(), \
2419
+ (__mmask16)(-1), (int)(R)))
2420
+
2421
+ #define _mm512_mask_cvtx_roundps_ph(W, U, A, R) \
2422
+ ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), (__v16hf)(W), \
2423
+ (__mmask16)(U), (int)(R)))
2424
+
2425
+ #define _mm512_maskz_cvtx_roundps_ph(U, A, R) \
2426
+ ((__m256h)__builtin_ia32_vcvtps2phx512_mask( \
2427
+ (__v16sf)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
2428
+
2429
+ static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtxps_ph(__m512 __A) {
2430
+ return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
2431
+ (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
2432
+ _MM_FROUND_CUR_DIRECTION);
2433
+ }
2434
+
2435
+ static __inline__ __m256h __DEFAULT_FN_ATTRS512
2436
+ _mm512_mask_cvtxps_ph(__m256h __W, __mmask16 __U, __m512 __A) {
2437
+ return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
2438
+ (__v16sf)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2439
+ }
2440
+
2441
+ static __inline__ __m256h __DEFAULT_FN_ATTRS512
2442
+ _mm512_maskz_cvtxps_ph(__mmask16 __U, __m512 __A) {
2443
+ return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
2444
+ (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
2445
+ _MM_FROUND_CUR_DIRECTION);
2446
+ }
2447
+
2448
+ #define _mm512_fmadd_round_ph(A, B, C, R) \
2449
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2450
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2451
+ (__mmask32)-1, (int)(R)))
2452
+
2453
+ #define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \
2454
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2455
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2456
+ (__mmask32)(U), (int)(R)))
2457
+
2458
+ #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \
2459
+ ((__m512h)__builtin_ia32_vfmaddph512_mask3( \
2460
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2461
+ (__mmask32)(U), (int)(R)))
2462
+
2463
+ #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \
2464
+ ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
2465
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2466
+ (__mmask32)(U), (int)(R)))
2467
+
2468
+ #define _mm512_fmsub_round_ph(A, B, C, R) \
2469
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2470
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2471
+ (__mmask32)-1, (int)(R)))
2472
+
2473
+ #define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \
2474
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2475
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2476
+ (__mmask32)(U), (int)(R)))
2477
+
2478
+ #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \
2479
+ ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
2480
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2481
+ (__mmask32)(U), (int)(R)))
2482
+
2483
+ #define _mm512_fnmadd_round_ph(A, B, C, R) \
2484
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2485
+ (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2486
+ (__mmask32)-1, (int)(R)))
2487
+
2488
+ #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \
2489
+ ((__m512h)__builtin_ia32_vfmaddph512_mask3( \
2490
+ -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2491
+ (__mmask32)(U), (int)(R)))
2492
+
2493
+ #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \
2494
+ ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
2495
+ -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2496
+ (__mmask32)(U), (int)(R)))
2497
+
2498
+ #define _mm512_fnmsub_round_ph(A, B, C, R) \
2499
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2500
+ (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2501
+ (__mmask32)-1, (int)(R)))
2502
+
2503
+ #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \
2504
+ ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
2505
+ -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2506
+ (__mmask32)(U), (int)(R)))
2507
+
2508
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_ph(__m512h __A,
2509
+ __m512h __B,
2510
+ __m512h __C) {
2511
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2512
+ (__v32hf)__C, (__mmask32)-1,
2513
+ _MM_FROUND_CUR_DIRECTION);
2514
+ }
2515
+
2516
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2517
+ _mm512_mask_fmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2518
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2519
+ (__v32hf)__C, (__mmask32)__U,
2520
+ _MM_FROUND_CUR_DIRECTION);
2521
+ }
2522
+
2523
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2524
+ _mm512_mask3_fmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2525
+ return (__m512h)__builtin_ia32_vfmaddph512_mask3((__v32hf)__A, (__v32hf)__B,
2526
+ (__v32hf)__C, (__mmask32)__U,
2527
+ _MM_FROUND_CUR_DIRECTION);
2528
+ }
2529
+
2530
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2531
+ _mm512_maskz_fmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2532
+ return (__m512h)__builtin_ia32_vfmaddph512_maskz((__v32hf)__A, (__v32hf)__B,
2533
+ (__v32hf)__C, (__mmask32)__U,
2534
+ _MM_FROUND_CUR_DIRECTION);
2535
+ }
2536
+
2537
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmsub_ph(__m512h __A,
2538
+ __m512h __B,
2539
+ __m512h __C) {
2540
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2541
+ -(__v32hf)__C, (__mmask32)-1,
2542
+ _MM_FROUND_CUR_DIRECTION);
2543
+ }
2544
+
2545
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2546
+ _mm512_mask_fmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2547
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2548
+ -(__v32hf)__C, (__mmask32)__U,
2549
+ _MM_FROUND_CUR_DIRECTION);
2550
+ }
2551
+
2552
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2553
+ _mm512_maskz_fmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2554
+ return (__m512h)__builtin_ia32_vfmaddph512_maskz(
2555
+ (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2556
+ _MM_FROUND_CUR_DIRECTION);
2557
+ }
2558
+
2559
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ph(__m512h __A,
2560
+ __m512h __B,
2561
+ __m512h __C) {
2562
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2563
+ (__v32hf)__C, (__mmask32)-1,
2564
+ _MM_FROUND_CUR_DIRECTION);
2565
+ }
2566
+
2567
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2568
+ _mm512_mask3_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2569
+ return (__m512h)__builtin_ia32_vfmaddph512_mask3(-(__v32hf)__A, (__v32hf)__B,
2570
+ (__v32hf)__C, (__mmask32)__U,
2571
+ _MM_FROUND_CUR_DIRECTION);
2572
+ }
2573
+
2574
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2575
+ _mm512_maskz_fnmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2576
+ return (__m512h)__builtin_ia32_vfmaddph512_maskz(-(__v32hf)__A, (__v32hf)__B,
2577
+ (__v32hf)__C, (__mmask32)__U,
2578
+ _MM_FROUND_CUR_DIRECTION);
2579
+ }
2580
+
2581
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ph(__m512h __A,
2582
+ __m512h __B,
2583
+ __m512h __C) {
2584
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2585
+ -(__v32hf)__C, (__mmask32)-1,
2586
+ _MM_FROUND_CUR_DIRECTION);
2587
+ }
2588
+
2589
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2590
+ _mm512_maskz_fnmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2591
+ return (__m512h)__builtin_ia32_vfmaddph512_maskz(
2592
+ -(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2593
+ _MM_FROUND_CUR_DIRECTION);
2594
+ }
2595
+
2596
+ #define _mm512_fmaddsub_round_ph(A, B, C, R) \
2597
+ ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
2598
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2599
+ (__mmask32)-1, (int)(R)))
2600
+
2601
+ #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \
2602
+ ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
2603
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2604
+ (__mmask32)(U), (int)(R)))
2605
+
2606
+ #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \
2607
+ ((__m512h)__builtin_ia32_vfmaddsubph512_mask3( \
2608
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2609
+ (__mmask32)(U), (int)(R)))
2610
+
2611
+ #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \
2612
+ ((__m512h)__builtin_ia32_vfmaddsubph512_maskz( \
2613
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2614
+ (__mmask32)(U), (int)(R)))
2615
+
2616
+ #define _mm512_fmsubadd_round_ph(A, B, C, R) \
2617
+ ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
2618
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2619
+ (__mmask32)-1, (int)(R)))
2620
+
2621
+ #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \
2622
+ ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
2623
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2624
+ (__mmask32)(U), (int)(R)))
2625
+
2626
+ #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \
2627
+ ((__m512h)__builtin_ia32_vfmaddsubph512_maskz( \
2628
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2629
+ (__mmask32)(U), (int)(R)))
2630
+
2631
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2632
+ _mm512_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C) {
2633
+ return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
2634
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)-1,
2635
+ _MM_FROUND_CUR_DIRECTION);
2636
+ }
2637
+
2638
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2639
+ _mm512_mask_fmaddsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2640
+ return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
2641
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2642
+ _MM_FROUND_CUR_DIRECTION);
2643
+ }
2644
+
2645
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2646
+ _mm512_mask3_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2647
+ return (__m512h)__builtin_ia32_vfmaddsubph512_mask3(
2648
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2649
+ _MM_FROUND_CUR_DIRECTION);
2650
+ }
2651
+
2652
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2653
+ _mm512_maskz_fmaddsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2654
+ return (__m512h)__builtin_ia32_vfmaddsubph512_maskz(
2655
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2656
+ _MM_FROUND_CUR_DIRECTION);
2657
+ }
2658
+
2659
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2660
+ _mm512_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C) {
2661
+ return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
2662
+ (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)-1,
2663
+ _MM_FROUND_CUR_DIRECTION);
2664
+ }
2665
+
2666
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2667
+ _mm512_mask_fmsubadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2668
+ return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
2669
+ (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2670
+ _MM_FROUND_CUR_DIRECTION);
2671
+ }
2672
+
2673
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2674
+ _mm512_maskz_fmsubadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2675
+ return (__m512h)__builtin_ia32_vfmaddsubph512_maskz(
2676
+ (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2677
+ _MM_FROUND_CUR_DIRECTION);
2678
+ }
2679
+
2680
+ #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \
2681
+ ((__m512h)__builtin_ia32_vfmsubph512_mask3( \
2682
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2683
+ (__mmask32)(U), (int)(R)))
2684
+
2685
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2686
+ _mm512_mask3_fmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2687
+ return (__m512h)__builtin_ia32_vfmsubph512_mask3((__v32hf)__A, (__v32hf)__B,
2688
+ (__v32hf)__C, (__mmask32)__U,
2689
+ _MM_FROUND_CUR_DIRECTION);
2690
+ }
2691
+
2692
+ #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \
2693
+ ((__m512h)__builtin_ia32_vfmsubaddph512_mask3( \
2694
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2695
+ (__mmask32)(U), (int)(R)))
2696
+
2697
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2698
+ _mm512_mask3_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2699
+ return (__m512h)__builtin_ia32_vfmsubaddph512_mask3(
2700
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2701
+ _MM_FROUND_CUR_DIRECTION);
2702
+ }
2703
+
2704
+ #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \
2705
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2706
+ (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2707
+ (__mmask32)(U), (int)(R)))
2708
+
2709
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2710
+ _mm512_mask_fnmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2711
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2712
+ (__v32hf)__C, (__mmask32)__U,
2713
+ _MM_FROUND_CUR_DIRECTION);
2714
+ }
2715
+
2716
+ #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \
2717
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2718
+ (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2719
+ (__mmask32)(U), (int)(R)))
2720
+
2721
+ #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \
2722
+ ((__m512h)__builtin_ia32_vfmsubph512_mask3( \
2723
+ -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2724
+ (__mmask32)(U), (int)(R)))
2725
+
2726
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2727
+ _mm512_mask_fnmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2728
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2729
+ -(__v32hf)__C, (__mmask32)__U,
2730
+ _MM_FROUND_CUR_DIRECTION);
2731
+ }
2732
+
2733
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
2734
+ _mm512_mask3_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2735
+ return (__m512h)__builtin_ia32_vfmsubph512_mask3(-(__v32hf)__A, (__v32hf)__B,
2736
+ (__v32hf)__C, (__mmask32)__U,
2737
+ _MM_FROUND_CUR_DIRECTION);
2738
+ }
2739
+
2740
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sh(__m128h __W,
2741
+ __m128h __A,
2742
+ __m128h __B) {
2743
+ return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
2744
+ (__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
2745
+ }
2746
+
2747
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sh(__m128h __W,
2748
+ __mmask8 __U,
2749
+ __m128h __A,
2750
+ __m128h __B) {
2751
+ return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
2752
+ (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2753
+ }
2754
+
2755
+ #define _mm_fmadd_round_sh(A, B, C, R) \
2756
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2757
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
2758
+ (__mmask8)-1, (int)(R)))
2759
+
2760
+ #define _mm_mask_fmadd_round_sh(W, U, A, B, R) \
2761
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2762
+ (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), \
2763
+ (__mmask8)(U), (int)(R)))
2764
+
2765
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2766
+ _mm_maskz_fmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2767
+ return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B, (__v8hf)__C,
2768
+ (__mmask8)__U,
2769
+ _MM_FROUND_CUR_DIRECTION);
2770
+ }
2771
+
2772
+ #define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
2773
+ ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
2774
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
2775
+ (__mmask8)(U), (int)(R)))
2776
+
2777
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2778
+ _mm_mask3_fmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
2779
+ return __builtin_ia32_vfmaddsh3_mask3((__v8hf)__W, (__v8hf)__X, (__v8hf)__Y,
2780
+ (__mmask8)__U,
2781
+ _MM_FROUND_CUR_DIRECTION);
2782
+ }
2783
+
2784
+ #define _mm_mask3_fmadd_round_sh(W, X, Y, U, R) \
2785
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask3( \
2786
+ (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
2787
+ (__mmask8)(U), (int)(R)))
2788
+
2789
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_sh(__m128h __W,
2790
+ __m128h __A,
2791
+ __m128h __B) {
2792
+ return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A,
2793
+ -(__v8hf)__B, (__mmask8)-1,
2794
+ _MM_FROUND_CUR_DIRECTION);
2795
+ }
2796
+
2797
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sh(__m128h __W,
2798
+ __mmask8 __U,
2799
+ __m128h __A,
2800
+ __m128h __B) {
2801
+ return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A,
2802
+ -(__v8hf)__B, (__mmask8)__U,
2803
+ _MM_FROUND_CUR_DIRECTION);
2804
+ }
2805
+
2806
+ #define _mm_fmsub_round_sh(A, B, C, R) \
2807
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2808
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
2809
+ (__mmask8)-1, (int)(R)))
2810
+
2811
+ #define _mm_mask_fmsub_round_sh(W, U, A, B, R) \
2812
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2813
+ (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), \
2814
+ (__mmask8)(U), (int)(R)))
2815
+
2816
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2817
+ _mm_maskz_fmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2818
+ return (__m128h)__builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B,
2819
+ -(__v8hf)__C, (__mmask8)__U,
2820
+ _MM_FROUND_CUR_DIRECTION);
2821
+ }
2822
+
2823
+ #define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
2824
+ ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
2825
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
2826
+ (__mmask8)(U), (int)R))
2827
+
2828
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2829
+ _mm_mask3_fmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
2830
+ return __builtin_ia32_vfmsubsh3_mask3((__v8hf)__W, (__v8hf)__X, (__v8hf)__Y,
2831
+ (__mmask8)__U,
2832
+ _MM_FROUND_CUR_DIRECTION);
2833
+ }
2834
+
2835
+ #define _mm_mask3_fmsub_round_sh(W, X, Y, U, R) \
2836
+ ((__m128h)__builtin_ia32_vfmsubsh3_mask3( \
2837
+ (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
2838
+ (__mmask8)(U), (int)(R)))
2839
+
2840
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_sh(__m128h __W,
2841
+ __m128h __A,
2842
+ __m128h __B) {
2843
+ return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
2844
+ (__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
2845
+ }
2846
+
2847
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2848
+ _mm_mask_fnmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
2849
+ return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
2850
+ (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2851
+ }
2852
+
2853
+ #define _mm_fnmadd_round_sh(A, B, C, R) \
2854
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2855
+ (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
2856
+ (__mmask8)-1, (int)(R)))
2857
+
2858
+ #define _mm_mask_fnmadd_round_sh(W, U, A, B, R) \
2859
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2860
+ (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), \
2861
+ (__mmask8)(U), (int)(R)))
2862
+
2863
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2864
+ _mm_maskz_fnmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2865
+ return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C,
2866
+ (__mmask8)__U,
2867
+ _MM_FROUND_CUR_DIRECTION);
2868
+ }
2869
+
2870
+ #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
2871
+ ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
2872
+ (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
2873
+ (__mmask8)(U), (int)(R)))
2874
+
2875
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2876
+ _mm_mask3_fnmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
2877
+ return __builtin_ia32_vfmaddsh3_mask3((__v8hf)__W, -(__v8hf)__X, (__v8hf)__Y,
2878
+ (__mmask8)__U,
2879
+ _MM_FROUND_CUR_DIRECTION);
2880
+ }
2881
+
2882
+ #define _mm_mask3_fnmadd_round_sh(W, X, Y, U, R) \
2883
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask3( \
2884
+ (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
2885
+ (__mmask8)(U), (int)(R)))
2886
+
2887
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_sh(__m128h __W,
2888
+ __m128h __A,
2889
+ __m128h __B) {
2890
+ return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
2891
+ (__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
2892
+ }
2893
+
2894
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2895
+ _mm_mask_fnmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
2896
+ return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
2897
+ (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2898
+ }
2899
+
2900
+ #define _mm_fnmsub_round_sh(A, B, C, R) \
2901
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2902
+ (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
2903
+ (__mmask8)-1, (int)(R)))
2904
+
2905
+ #define _mm_mask_fnmsub_round_sh(W, U, A, B, R) \
2906
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2907
+ (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), \
2908
+ (__mmask8)(U), (int)(R)))
2909
+
2910
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2911
+ _mm_maskz_fnmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2912
+ return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C,
2913
+ (__mmask8)__U,
2914
+ _MM_FROUND_CUR_DIRECTION);
2915
+ }
2916
+
2917
+ #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
2918
+ ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
2919
+ (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
2920
+ (__mmask8)(U), (int)(R)))
2921
+
2922
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2923
+ _mm_mask3_fnmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
2924
+ return __builtin_ia32_vfmsubsh3_mask3((__v8hf)__W, -(__v8hf)__X, (__v8hf)__Y,
2925
+ (__mmask8)__U,
2926
+ _MM_FROUND_CUR_DIRECTION);
2927
+ }
2928
+
2929
+ #define _mm_mask3_fnmsub_round_sh(W, X, Y, U, R) \
2930
+ ((__m128h)__builtin_ia32_vfmsubsh3_mask3( \
2931
+ (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
2932
+ (__mmask8)(U), (int)(R)))
2933
+
2934
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_sch(__m128h __A,
2935
+ __m128h __B,
2936
+ __m128h __C) {
2937
+ return (__m128h)__builtin_ia32_vfcmaddcsh_mask((__v4sf)__A, (__v4sf)__B,
2938
+ (__v4sf)__C, (__mmask8)-1,
2939
+ _MM_FROUND_CUR_DIRECTION);
2940
+ }
2941
+
2942
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2943
+ _mm_mask_fcmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
2944
+ return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask(
2945
+ (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
2946
+ }
2947
+
2948
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2949
+ _mm_maskz_fcmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2950
+ return (__m128h)__builtin_ia32_vfcmaddcsh_maskz((__v4sf)__A, (__v4sf)__B,
2951
+ (__v4sf)__C, (__mmask8)__U,
2952
+ _MM_FROUND_CUR_DIRECTION);
2953
+ }
2954
+
2955
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2956
+ _mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
2957
+ return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask3(
2958
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
2959
+ }
2960
+
2961
+ #define _mm_fcmadd_round_sch(A, B, C, R) \
2962
+ ((__m128h)__builtin_ia32_vfcmaddcsh_mask( \
2963
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
2964
+ (__mmask8)-1, (int)(R)))
2965
+
2966
+ #define _mm_mask_fcmadd_round_sch(A, U, B, C, R) \
2967
+ ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask( \
2968
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
2969
+ (__mmask8)(U), (int)(R)))
2970
+
2971
+ #define _mm_maskz_fcmadd_round_sch(U, A, B, C, R) \
2972
+ ((__m128h)__builtin_ia32_vfcmaddcsh_maskz( \
2973
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
2974
+ (__mmask8)(U), (int)(R)))
2975
+
2976
+ #define _mm_mask3_fcmadd_round_sch(A, B, C, U, R) \
2977
+ ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask3( \
2978
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
2979
+ (__mmask8)(U), (int)(R)))
2980
+
2981
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sch(__m128h __A,
2982
+ __m128h __B,
2983
+ __m128h __C) {
2984
+ return (__m128h)__builtin_ia32_vfmaddcsh_mask((__v4sf)__A, (__v4sf)__B,
2985
+ (__v4sf)__C, (__mmask8)-1,
2986
+ _MM_FROUND_CUR_DIRECTION);
2987
+ }
2988
+
2989
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2990
+ _mm_mask_fmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
2991
+ return (__m128h)__builtin_ia32_vfmaddcsh_round_mask(
2992
+ (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
2993
+ }
2994
+
2995
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
2996
+ _mm_maskz_fmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2997
+ return (__m128h)__builtin_ia32_vfmaddcsh_maskz((__v4sf)__A, (__v4sf)__B,
2998
+ (__v4sf)__C, (__mmask8)__U,
2999
+ _MM_FROUND_CUR_DIRECTION);
3000
+ }
3001
+
3002
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
3003
+ _mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
3004
+ return (__m128h)__builtin_ia32_vfmaddcsh_round_mask3(
3005
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
3006
+ }
3007
+
3008
+ #define _mm_fmadd_round_sch(A, B, C, R) \
3009
+ ((__m128h)__builtin_ia32_vfmaddcsh_mask( \
3010
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
3011
+ (__mmask8)-1, (int)(R)))
3012
+
3013
+ #define _mm_mask_fmadd_round_sch(A, U, B, C, R) \
3014
+ ((__m128h)__builtin_ia32_vfmaddcsh_round_mask( \
3015
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
3016
+ (__mmask8)(U), (int)(R)))
3017
+
3018
+ #define _mm_maskz_fmadd_round_sch(U, A, B, C, R) \
3019
+ ((__m128h)__builtin_ia32_vfmaddcsh_maskz( \
3020
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
3021
+ (__mmask8)(U), (int)(R)))
3022
+
3023
+ #define _mm_mask3_fmadd_round_sch(A, B, C, U, R) \
3024
+ ((__m128h)__builtin_ia32_vfmaddcsh_round_mask3( \
3025
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
3026
+ (__mmask8)(U), (int)(R)))
3027
+
3028
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_sch(__m128h __A,
3029
+ __m128h __B) {
3030
+ return (__m128h)__builtin_ia32_vfcmulcsh_mask(
3031
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1,
3032
+ _MM_FROUND_CUR_DIRECTION);
3033
+ }
3034
+
3035
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
3036
+ _mm_mask_fcmul_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
3037
+ return (__m128h)__builtin_ia32_vfcmulcsh_mask((__v4sf)__A, (__v4sf)__B,
3038
+ (__v4sf)__W, (__mmask8)__U,
3039
+ _MM_FROUND_CUR_DIRECTION);
3040
+ }
3041
+
3042
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
3043
+ _mm_maskz_fcmul_sch(__mmask8 __U, __m128h __A, __m128h __B) {
3044
+ return (__m128h)__builtin_ia32_vfcmulcsh_mask(
3045
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U,
3046
+ _MM_FROUND_CUR_DIRECTION);
3047
+ }
3048
+
3049
+ #define _mm_fcmul_round_sch(A, B, R) \
3050
+ ((__m128h)__builtin_ia32_vfcmulcsh_mask( \
3051
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
3052
+ (__v4sf)(__m128h)_mm_undefined_ph(), (__mmask8)-1, (int)(R)))
3053
+
3054
+ #define _mm_mask_fcmul_round_sch(W, U, A, B, R) \
3055
+ ((__m128h)__builtin_ia32_vfcmulcsh_mask( \
3056
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(W), \
3057
+ (__mmask8)(U), (int)(R)))
3058
+
3059
+ #define _mm_maskz_fcmul_round_sch(U, A, B, R) \
3060
+ ((__m128h)__builtin_ia32_vfcmulcsh_mask( \
3061
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
3062
+ (__v4sf)(__m128h)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
3063
+
3064
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_sch(__m128h __A,
3065
+ __m128h __B) {
3066
+ return (__m128h)__builtin_ia32_vfmulcsh_mask(
3067
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1,
3068
+ _MM_FROUND_CUR_DIRECTION);
3069
+ }
3070
+
3071
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_sch(__m128h __W,
3072
+ __mmask8 __U,
3073
+ __m128h __A,
3074
+ __m128h __B) {
3075
+ return (__m128h)__builtin_ia32_vfmulcsh_mask((__v4sf)__A, (__v4sf)__B,
3076
+ (__v4sf)__W, (__mmask8)__U,
3077
+ _MM_FROUND_CUR_DIRECTION);
3078
+ }
3079
+
3080
+ static __inline__ __m128h __DEFAULT_FN_ATTRS128
3081
+ _mm_maskz_fmul_sch(__mmask8 __U, __m128h __A, __m128h __B) {
3082
+ return (__m128h)__builtin_ia32_vfmulcsh_mask(
3083
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U,
3084
+ _MM_FROUND_CUR_DIRECTION);
3085
+ }
3086
+
3087
+ #define _mm_fmul_round_sch(A, B, R) \
3088
+ ((__m128h)__builtin_ia32_vfmulcsh_mask( \
3089
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
3090
+ (__v4sf)(__m128h)_mm_undefined_ph(), (__mmask8)-1, (int)(R)))
3091
+
3092
+ #define _mm_mask_fmul_round_sch(W, U, A, B, R) \
3093
+ ((__m128h)__builtin_ia32_vfmulcsh_mask( \
3094
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(W), \
3095
+ (__mmask8)(U), (int)(R)))
3096
+
3097
+ #define _mm_maskz_fmul_round_sch(U, A, B, R) \
3098
+ ((__m128h)__builtin_ia32_vfmulcsh_mask( \
3099
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
3100
+ (__v4sf)(__m128h)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
3101
+
3102
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmul_pch(__m512h __A,
3103
+ __m512h __B) {
3104
+ return (__m512h)__builtin_ia32_vfcmulcph512_mask(
3105
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1,
3106
+ _MM_FROUND_CUR_DIRECTION);
3107
+ }
3108
+
3109
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3110
+ _mm512_mask_fcmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
3111
+ return (__m512h)__builtin_ia32_vfcmulcph512_mask((__v16sf)__A, (__v16sf)__B,
3112
+ (__v16sf)__W, (__mmask16)__U,
3113
+ _MM_FROUND_CUR_DIRECTION);
3114
+ }
3115
+
3116
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3117
+ _mm512_maskz_fcmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
3118
+ return (__m512h)__builtin_ia32_vfcmulcph512_mask(
3119
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U,
3120
+ _MM_FROUND_CUR_DIRECTION);
3121
+ }
3122
+
3123
+ #define _mm512_fcmul_round_pch(A, B, R) \
3124
+ ((__m512h)__builtin_ia32_vfcmulcph512_mask( \
3125
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
3126
+ (__v16sf)(__m512h)_mm512_undefined_ph(), (__mmask16)-1, (int)(R)))
3127
+
3128
+ #define _mm512_mask_fcmul_round_pch(W, U, A, B, R) \
3129
+ ((__m512h)__builtin_ia32_vfcmulcph512_mask( \
3130
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(W), \
3131
+ (__mmask16)(U), (int)(R)))
3132
+
3133
+ #define _mm512_maskz_fcmul_round_pch(U, A, B, R) \
3134
+ ((__m512h)__builtin_ia32_vfcmulcph512_mask( \
3135
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
3136
+ (__v16sf)(__m512h)_mm512_setzero_ph(), (__mmask16)(U), (int)(R)))
3137
+
3138
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmul_pch(__m512h __A,
3139
+ __m512h __B) {
3140
+ return (__m512h)__builtin_ia32_vfmulcph512_mask(
3141
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1,
3142
+ _MM_FROUND_CUR_DIRECTION);
3143
+ }
3144
+
3145
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3146
+ _mm512_mask_fmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
3147
+ return (__m512h)__builtin_ia32_vfmulcph512_mask((__v16sf)__A, (__v16sf)__B,
3148
+ (__v16sf)__W, (__mmask16)__U,
3149
+ _MM_FROUND_CUR_DIRECTION);
3150
+ }
3151
+
3152
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3153
+ _mm512_maskz_fmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
3154
+ return (__m512h)__builtin_ia32_vfmulcph512_mask(
3155
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U,
3156
+ _MM_FROUND_CUR_DIRECTION);
3157
+ }
3158
+
3159
+ #define _mm512_fmul_round_pch(A, B, R) \
3160
+ ((__m512h)__builtin_ia32_vfmulcph512_mask( \
3161
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
3162
+ (__v16sf)(__m512h)_mm512_undefined_ph(), (__mmask16)-1, (int)(R)))
3163
+
3164
+ #define _mm512_mask_fmul_round_pch(W, U, A, B, R) \
3165
+ ((__m512h)__builtin_ia32_vfmulcph512_mask( \
3166
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(W), \
3167
+ (__mmask16)(U), (int)(R)))
3168
+
3169
+ #define _mm512_maskz_fmul_round_pch(U, A, B, R) \
3170
+ ((__m512h)__builtin_ia32_vfmulcph512_mask( \
3171
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
3172
+ (__v16sf)(__m512h)_mm512_setzero_ph(), (__mmask16)(U), (int)(R)))
3173
+
3174
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmadd_pch(__m512h __A,
3175
+ __m512h __B,
3176
+ __m512h __C) {
3177
+ return (__m512h)__builtin_ia32_vfcmaddcph512_mask3(
3178
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)-1,
3179
+ _MM_FROUND_CUR_DIRECTION);
3180
+ }
3181
+
3182
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3183
+ _mm512_mask_fcmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
3184
+ return (__m512h)__builtin_ia32_vfcmaddcph512_mask(
3185
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3186
+ _MM_FROUND_CUR_DIRECTION);
3187
+ }
3188
+
3189
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3190
+ _mm512_mask3_fcmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
3191
+ return (__m512h)__builtin_ia32_vfcmaddcph512_mask3(
3192
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3193
+ _MM_FROUND_CUR_DIRECTION);
3194
+ }
3195
+
3196
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3197
+ _mm512_maskz_fcmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
3198
+ return (__m512h)__builtin_ia32_vfcmaddcph512_maskz(
3199
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3200
+ _MM_FROUND_CUR_DIRECTION);
3201
+ }
3202
+
3203
+ #define _mm512_fcmadd_round_pch(A, B, C, R) \
3204
+ ((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \
3205
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3206
+ (__mmask16)-1, (int)(R)))
3207
+
3208
+ #define _mm512_mask_fcmadd_round_pch(A, U, B, C, R) \
3209
+ ((__m512h)__builtin_ia32_vfcmaddcph512_mask( \
3210
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3211
+ (__mmask16)(U), (int)(R)))
3212
+
3213
+ #define _mm512_mask3_fcmadd_round_pch(A, B, C, U, R) \
3214
+ ((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \
3215
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3216
+ (__mmask16)(U), (int)(R)))
3217
+
3218
+ #define _mm512_maskz_fcmadd_round_pch(U, A, B, C, R) \
3219
+ ((__m512h)__builtin_ia32_vfcmaddcph512_maskz( \
3220
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3221
+ (__mmask16)(U), (int)(R)))
3222
+
3223
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_pch(__m512h __A,
3224
+ __m512h __B,
3225
+ __m512h __C) {
3226
+ return (__m512h)__builtin_ia32_vfmaddcph512_mask3((__v16sf)__A, (__v16sf)__B,
3227
+ (__v16sf)__C, (__mmask16)-1,
3228
+ _MM_FROUND_CUR_DIRECTION);
3229
+ }
3230
+
3231
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3232
+ _mm512_mask_fmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
3233
+ return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
3234
+ (__v16sf)__C, (__mmask16)__U,
3235
+ _MM_FROUND_CUR_DIRECTION);
3236
+ }
3237
+
3238
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3239
+ _mm512_mask3_fmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
3240
+ return (__m512h)__builtin_ia32_vfmaddcph512_mask3(
3241
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3242
+ _MM_FROUND_CUR_DIRECTION);
3243
+ }
3244
+
3245
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3246
+ _mm512_maskz_fmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
3247
+ return (__m512h)__builtin_ia32_vfmaddcph512_maskz(
3248
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3249
+ _MM_FROUND_CUR_DIRECTION);
3250
+ }
3251
+
3252
+ #define _mm512_fmadd_round_pch(A, B, C, R) \
3253
+ ((__m512h)__builtin_ia32_vfmaddcph512_mask3( \
3254
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3255
+ (__mmask16)-1, (int)(R)))
3256
+
3257
+ #define _mm512_mask_fmadd_round_pch(A, U, B, C, R) \
3258
+ ((__m512h)__builtin_ia32_vfmaddcph512_mask( \
3259
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3260
+ (__mmask16)(U), (int)(R)))
3261
+
3262
+ #define _mm512_mask3_fmadd_round_pch(A, B, C, U, R) \
3263
+ ((__m512h)__builtin_ia32_vfmaddcph512_mask3( \
3264
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3265
+ (__mmask16)(U), (int)(R)))
3266
+
3267
+ #define _mm512_maskz_fmadd_round_pch(U, A, B, C, R) \
3268
+ ((__m512h)__builtin_ia32_vfmaddcph512_maskz( \
3269
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3270
+ (__mmask16)(U), (int)(R)))
3271
+
3272
+ static __inline__ _Float16 __DEFAULT_FN_ATTRS512
3273
+ _mm512_reduce_add_ph(__m512h __W) {
3274
+ return __builtin_ia32_reduce_fadd_ph512(-0.0f16, __W);
3275
+ }
3276
+
3277
+ static __inline__ _Float16 __DEFAULT_FN_ATTRS512
3278
+ _mm512_reduce_mul_ph(__m512h __W) {
3279
+ return __builtin_ia32_reduce_fmul_ph512(1.0f16, __W);
3280
+ }
3281
+
3282
+ static __inline__ _Float16 __DEFAULT_FN_ATTRS512
3283
+ _mm512_reduce_max_ph(__m512h __V) {
3284
+ return __builtin_ia32_reduce_fmax_ph512(__V);
3285
+ }
3286
+
3287
+ static __inline__ _Float16 __DEFAULT_FN_ATTRS512
3288
+ _mm512_reduce_min_ph(__m512h __V) {
3289
+ return __builtin_ia32_reduce_fmin_ph512(__V);
3290
+ }
3291
+
3292
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3293
+ _mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) {
3294
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W,
3295
+ (__v32hf)__A);
3296
+ }
3297
+
3298
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3299
+ _mm512_permutex2var_ph(__m512h __A, __m512i __I, __m512h __B) {
3300
+ return (__m512h)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
3301
+ (__v32hi)__B);
3302
+ }
3303
+
3304
+ static __inline__ __m512h __DEFAULT_FN_ATTRS512
3305
+ _mm512_permutexvar_ph(__m512i __A, __m512h __B) {
3306
+ return (__m512h)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
3307
+ }
3308
+
3309
+ // intrinsics below are alias for f*mul_*ch
3310
+ #define _mm512_mul_pch(A, B) _mm512_fmul_pch(A, B)
3311
+ #define _mm512_mask_mul_pch(W, U, A, B) _mm512_mask_fmul_pch(W, U, A, B)
3312
+ #define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch(U, A, B)
3313
+ #define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch(A, B, R)
3314
+ #define _mm512_mask_mul_round_pch(W, U, A, B, R) \
3315
+ _mm512_mask_fmul_round_pch(W, U, A, B, R)
3316
+ #define _mm512_maskz_mul_round_pch(U, A, B, R) \
3317
+ _mm512_maskz_fmul_round_pch(U, A, B, R)
3318
+
3319
+ #define _mm512_cmul_pch(A, B) _mm512_fcmul_pch(A, B)
3320
+ #define _mm512_mask_cmul_pch(W, U, A, B) _mm512_mask_fcmul_pch(W, U, A, B)
3321
+ #define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch(U, A, B)
3322
+ #define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch(A, B, R)
3323
+ #define _mm512_mask_cmul_round_pch(W, U, A, B, R) \
3324
+ _mm512_mask_fcmul_round_pch(W, U, A, B, R)
3325
+ #define _mm512_maskz_cmul_round_pch(U, A, B, R) \
3326
+ _mm512_maskz_fcmul_round_pch(U, A, B, R)
3327
+
3328
+ #define _mm_mul_sch(A, B) _mm_fmul_sch(A, B)
3329
+ #define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch(W, U, A, B)
3330
+ #define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch(U, A, B)
3331
+ #define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch(A, B, R)
3332
+ #define _mm_mask_mul_round_sch(W, U, A, B, R) \
3333
+ _mm_mask_fmul_round_sch(W, U, A, B, R)
3334
+ #define _mm_maskz_mul_round_sch(U, A, B, R) _mm_maskz_fmul_round_sch(U, A, B, R)
3335
+
3336
+ #define _mm_cmul_sch(A, B) _mm_fcmul_sch(A, B)
3337
+ #define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch(W, U, A, B)
3338
+ #define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch(U, A, B)
3339
+ #define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch(A, B, R)
3340
+ #define _mm_mask_cmul_round_sch(W, U, A, B, R) \
3341
+ _mm_mask_fcmul_round_sch(W, U, A, B, R)
3342
+ #define _mm_maskz_cmul_round_sch(U, A, B, R) \
3343
+ _mm_maskz_fcmul_round_sch(U, A, B, R)
3344
+
3345
+ #undef __DEFAULT_FN_ATTRS128
3346
+ #undef __DEFAULT_FN_ATTRS256
3347
+ #undef __DEFAULT_FN_ATTRS512
3348
+
3349
+ #endif