@zigc/lib 0.17.0-dev.135 → 0.17.0-dev.215

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (673) hide show
  1. package/c/stdlib.zig +32 -22
  2. package/compiler/aro/aro/Compilation.zig +0 -3
  3. package/compiler/translate-c/ast.zig +5 -2
  4. package/compiler_rt/arm.zig +3 -3
  5. package/compiler_rt/trunctfhf2.zig +3 -0
  6. package/compiler_rt.zig +1 -22
  7. package/docs/wasm/markdown/Parser.zig +5 -9
  8. package/include/__clang_spirv_builtins.h +12 -12
  9. package/include/__float_float.h +176 -0
  10. package/include/__float_header_macro.h +12 -0
  11. package/include/__float_infinity_nan.h +20 -0
  12. package/include/amo.h +131 -0
  13. package/include/amxavx512intrin.h +215 -1
  14. package/include/amxintrin.h +0 -2
  15. package/include/arm_acle.h +37 -27
  16. package/include/arm_neon.h +218 -82
  17. package/include/arm_sme.h +8 -8
  18. package/include/arm_sve.h +4162 -3782
  19. package/include/avx10_2_512bf16intrin.h +19 -12
  20. package/include/avx10_2_512convertintrin.h +1 -1
  21. package/include/avx10_2_512niintrin.h +31 -31
  22. package/include/avx10_2_512satcvtdsintrin.h +1 -1
  23. package/include/avx10_2bf16intrin.h +54 -45
  24. package/include/avx10_2convertintrin.h +2 -2
  25. package/include/avx10_2copyintrin.h +1 -1
  26. package/include/avx10_2niintrin.h +14 -14
  27. package/include/avx10_2satcvtdsintrin.h +2 -2
  28. package/include/avx2intrin.h +275 -377
  29. package/include/avx512bf16intrin.h +25 -16
  30. package/include/avx512bitalgintrin.h +19 -30
  31. package/include/avx512bwintrin.h +386 -505
  32. package/include/avx512cdintrin.h +42 -55
  33. package/include/avx512dqintrin.h +132 -161
  34. package/include/avx512fintrin.h +1015 -1424
  35. package/include/avx512fp16intrin.h +112 -110
  36. package/include/avx512ifmaintrin.h +32 -34
  37. package/include/avx512ifmavlintrin.h +73 -46
  38. package/include/avx512vbmi2intrin.h +43 -32
  39. package/include/avx512vbmiintrin.h +19 -27
  40. package/include/avx512vbmivlintrin.h +35 -49
  41. package/include/avx512vlbf16intrin.h +32 -22
  42. package/include/avx512vlbitalgintrin.h +37 -53
  43. package/include/avx512vlbwintrin.h +470 -573
  44. package/include/avx512vlcdintrin.h +74 -102
  45. package/include/avx512vldqintrin.h +110 -127
  46. package/include/avx512vlfp16intrin.h +130 -111
  47. package/include/avx512vlintrin.h +945 -1299
  48. package/include/avx512vlvbmi2intrin.h +78 -63
  49. package/include/avx512vlvnniintrin.h +21 -18
  50. package/include/avx512vlvp2intersectintrin.h +2 -2
  51. package/include/avx512vnniintrin.h +10 -10
  52. package/include/avx512vp2intersectintrin.h +1 -2
  53. package/include/avx512vpopcntdqintrin.h +8 -10
  54. package/include/avx512vpopcntdqvlintrin.h +17 -15
  55. package/include/avxifmaintrin.h +16 -0
  56. package/include/avxintrin.h +165 -241
  57. package/include/avxvnniint16intrin.h +118 -99
  58. package/include/avxvnniint8intrin.h +56 -32
  59. package/include/avxvnniintrin.h +16 -8
  60. package/include/cpuid.h +101 -4
  61. package/include/emmintrin.h +168 -168
  62. package/include/f16cintrin.h +23 -9
  63. package/include/float.h +16 -155
  64. package/include/fma4intrin.h +98 -96
  65. package/include/fmaintrin.h +96 -66
  66. package/include/gfniintrin.h +21 -16
  67. package/include/hexagon_types.h +23 -20
  68. package/include/hvx_hexagon_protos.h +649 -860
  69. package/include/immintrin.h +0 -12
  70. package/include/intrin.h +4 -0
  71. package/include/lasxintrin.h +113 -0
  72. package/include/llvm_libc_wrappers/assert.h +3 -5
  73. package/include/llvm_libc_wrappers/ctype.h +3 -115
  74. package/include/llvm_libc_wrappers/inttypes.h +3 -5
  75. package/include/llvm_libc_wrappers/stdio.h +10 -38
  76. package/include/llvm_libc_wrappers/stdlib.h +3 -24
  77. package/include/llvm_libc_wrappers/string.h +2 -70
  78. package/include/llvm_libc_wrappers/time.h +4 -10
  79. package/include/mmintrin.h +188 -257
  80. package/include/module.modulemap +23 -4
  81. package/include/movrs_avx10_2_512intrin.h +2 -2
  82. package/include/movrs_avx10_2intrin.h +4 -4
  83. package/include/pmmintrin.h +12 -24
  84. package/include/ptrauth.h +16 -2
  85. package/include/riscv_mips.h +34 -0
  86. package/include/riscv_nds.h +89 -0
  87. package/include/sifive_vector.h +58 -2
  88. package/include/sm4evexintrin.h +2 -2
  89. package/include/smmintrin.h +77 -59
  90. package/include/spirvintrin.h +194 -0
  91. package/include/stddefer.h +19 -0
  92. package/include/tmmintrin.h +116 -147
  93. package/include/vaesintrin.h +1 -2
  94. package/include/xmmintrin.h +44 -70
  95. package/include/xopintrin.h +20 -10
  96. package/libcxx/include/__algorithm/all_of.h +11 -5
  97. package/libcxx/include/__algorithm/comp.h +4 -0
  98. package/libcxx/include/__algorithm/copy.h +28 -147
  99. package/libcxx/include/__algorithm/copy_backward.h +9 -24
  100. package/libcxx/include/__algorithm/copy_n.h +50 -16
  101. package/libcxx/include/__algorithm/count.h +2 -2
  102. package/libcxx/include/__algorithm/equal.h +43 -55
  103. package/libcxx/include/__algorithm/fill.h +26 -8
  104. package/libcxx/include/__algorithm/fill_n.h +32 -46
  105. package/libcxx/include/__algorithm/find.h +96 -39
  106. package/libcxx/include/__algorithm/find_end.h +105 -0
  107. package/libcxx/include/__algorithm/for_each.h +18 -24
  108. package/libcxx/include/__algorithm/for_each_n.h +20 -47
  109. package/libcxx/include/__algorithm/for_each_n_segment.h +1 -1
  110. package/libcxx/include/__algorithm/for_each_segment.h +26 -0
  111. package/libcxx/include/__algorithm/generate.h +4 -2
  112. package/libcxx/include/__algorithm/generate_n.h +19 -6
  113. package/libcxx/include/__algorithm/is_permutation.h +4 -4
  114. package/libcxx/include/__algorithm/iterator_operations.h +3 -0
  115. package/libcxx/include/__algorithm/lexicographical_compare.h +2 -2
  116. package/libcxx/include/__algorithm/lexicographical_compare_three_way.h +6 -6
  117. package/libcxx/include/__algorithm/make_heap.h +16 -4
  118. package/libcxx/include/__algorithm/mismatch.h +2 -2
  119. package/libcxx/include/__algorithm/move.h +8 -19
  120. package/libcxx/include/__algorithm/move_backward.h +9 -24
  121. package/libcxx/include/__algorithm/none_of.h +4 -4
  122. package/libcxx/include/__algorithm/partial_sort.h +1 -1
  123. package/libcxx/include/__algorithm/partial_sort_copy.h +1 -1
  124. package/libcxx/include/__algorithm/pstl.h +9 -9
  125. package/libcxx/include/__algorithm/radix_sort.h +27 -25
  126. package/libcxx/include/__algorithm/ranges_copy_n.h +3 -26
  127. package/libcxx/include/__algorithm/ranges_equal.h +17 -26
  128. package/libcxx/include/__algorithm/ranges_fill.h +7 -6
  129. package/libcxx/include/__algorithm/ranges_for_each.h +9 -1
  130. package/libcxx/include/__algorithm/ranges_generate_n.h +2 -6
  131. package/libcxx/include/__algorithm/ranges_search_n.h +2 -2
  132. package/libcxx/include/__algorithm/rotate.h +27 -44
  133. package/libcxx/include/__algorithm/search_n.h +49 -37
  134. package/libcxx/include/__algorithm/sift_down.h +19 -18
  135. package/libcxx/include/__algorithm/simd_utils.h +33 -4
  136. package/libcxx/include/__algorithm/specialized_algorithms.h +54 -0
  137. package/libcxx/include/__algorithm/stable_sort.h +1 -1
  138. package/libcxx/include/__assertion_handler +31 -4
  139. package/libcxx/include/__atomic/atomic.h +36 -60
  140. package/libcxx/include/__atomic/atomic_flag.h +19 -37
  141. package/libcxx/include/__atomic/atomic_ref.h +29 -12
  142. package/libcxx/include/__atomic/atomic_sync.h +127 -55
  143. package/libcxx/include/__atomic/atomic_sync_timed.h +144 -0
  144. package/libcxx/include/__atomic/atomic_waitable_traits.h +103 -0
  145. package/libcxx/include/__atomic/contention_t.h +27 -3
  146. package/libcxx/include/__atomic/floating_point_helper.h +55 -0
  147. package/libcxx/include/__bit/countl.h +1 -2
  148. package/libcxx/include/__bit/countr.h +1 -2
  149. package/libcxx/include/__bit/has_single_bit.h +1 -1
  150. package/libcxx/include/__bit/popcount.h +0 -1
  151. package/libcxx/include/__bit/rotate.h +15 -26
  152. package/libcxx/include/__bit_reference +207 -18
  153. package/libcxx/include/__charconv/from_chars_integral.h +1 -1
  154. package/libcxx/include/__charconv/from_chars_result.h +1 -1
  155. package/libcxx/include/__charconv/to_chars_integral.h +1 -0
  156. package/libcxx/include/__charconv/to_chars_result.h +1 -1
  157. package/libcxx/include/__charconv/traits.h +3 -24
  158. package/libcxx/include/__chrono/day.h +11 -0
  159. package/libcxx/include/__chrono/duration.h +58 -33
  160. package/libcxx/include/__chrono/file_clock.h +4 -2
  161. package/libcxx/include/__chrono/is_clock.h +72 -0
  162. package/libcxx/include/__chrono/leap_second.h +13 -0
  163. package/libcxx/include/__chrono/month.h +13 -0
  164. package/libcxx/include/__chrono/month_weekday.h +22 -0
  165. package/libcxx/include/__chrono/monthday.h +20 -0
  166. package/libcxx/include/__chrono/steady_clock.h +1 -1
  167. package/libcxx/include/__chrono/system_clock.h +3 -3
  168. package/libcxx/include/__chrono/time_point.h +37 -13
  169. package/libcxx/include/__chrono/weekday.h +25 -0
  170. package/libcxx/include/__chrono/year.h +11 -0
  171. package/libcxx/include/__chrono/year_month.h +13 -0
  172. package/libcxx/include/__chrono/year_month_day.h +23 -0
  173. package/libcxx/include/__chrono/year_month_weekday.h +26 -0
  174. package/libcxx/include/__chrono/zoned_time.h +16 -0
  175. package/libcxx/include/__compare/is_eq.h +6 -6
  176. package/libcxx/include/__compare/strong_order.h +12 -30
  177. package/libcxx/include/__compare/three_way_comparable.h +2 -2
  178. package/libcxx/include/__concepts/comparison_common_type.h +40 -0
  179. package/libcxx/include/__concepts/equality_comparable.h +2 -1
  180. package/libcxx/include/__condition_variable/condition_variable.h +1 -1
  181. package/libcxx/include/__config +63 -280
  182. package/libcxx/include/__configuration/abi.h +14 -24
  183. package/libcxx/include/__configuration/availability.h +65 -118
  184. package/libcxx/include/__configuration/compiler.h +6 -6
  185. package/libcxx/include/__configuration/experimental.h +38 -0
  186. package/libcxx/include/__configuration/hardening.h +215 -0
  187. package/libcxx/include/__configuration/language.h +3 -0
  188. package/libcxx/include/__configuration/platform.h +9 -16
  189. package/libcxx/include/__coroutine/coroutine_handle.h +9 -9
  190. package/libcxx/include/__coroutine/noop_coroutine_handle.h +11 -13
  191. package/libcxx/include/__debug_utils/strict_weak_ordering_check.h +1 -1
  192. package/libcxx/include/__exception/exception.h +6 -4
  193. package/libcxx/include/__exception/exception_ptr.h +27 -5
  194. package/libcxx/include/__exception/nested_exception.h +2 -2
  195. package/libcxx/include/__exception/operations.h +5 -5
  196. package/libcxx/include/__expected/bad_expected_access.h +8 -6
  197. package/libcxx/include/__expected/expected.h +62 -64
  198. package/libcxx/include/__expected/unexpected.h +4 -4
  199. package/libcxx/include/__filesystem/copy_options.h +4 -4
  200. package/libcxx/include/__filesystem/directory_entry.h +37 -33
  201. package/libcxx/include/__filesystem/directory_iterator.h +9 -11
  202. package/libcxx/include/__filesystem/directory_options.h +7 -4
  203. package/libcxx/include/__filesystem/file_status.h +3 -3
  204. package/libcxx/include/__filesystem/filesystem_error.h +9 -10
  205. package/libcxx/include/__filesystem/operations.h +97 -66
  206. package/libcxx/include/__filesystem/path.h +68 -64
  207. package/libcxx/include/__filesystem/path_iterator.h +1 -3
  208. package/libcxx/include/__filesystem/perm_options.h +4 -4
  209. package/libcxx/include/__filesystem/perms.h +4 -4
  210. package/libcxx/include/__filesystem/recursive_directory_iterator.h +9 -14
  211. package/libcxx/include/__filesystem/space_info.h +1 -1
  212. package/libcxx/include/__filesystem/u8path.h +12 -14
  213. package/libcxx/include/__flat_map/flat_map.h +88 -71
  214. package/libcxx/include/__flat_map/flat_multimap.h +251 -172
  215. package/libcxx/include/__flat_map/key_value_iterator.h +0 -1
  216. package/libcxx/include/__flat_map/utils.h +1 -0
  217. package/libcxx/include/__flat_set/flat_multiset.h +211 -143
  218. package/libcxx/include/__flat_set/flat_set.h +86 -68
  219. package/libcxx/include/__format/concepts.h +0 -14
  220. package/libcxx/include/__format/extended_grapheme_cluster_table.h +3 -2
  221. package/libcxx/include/__format/fmt_pair_like.h +42 -0
  222. package/libcxx/include/__format/format_arg.h +7 -10
  223. package/libcxx/include/__format/format_args.h +1 -1
  224. package/libcxx/include/__format/format_context.h +5 -5
  225. package/libcxx/include/__format/format_parse_context.h +2 -2
  226. package/libcxx/include/__format/formatter_output.h +30 -34
  227. package/libcxx/include/__format/indic_conjunct_break_table.h +3 -2
  228. package/libcxx/include/__format/range_default_formatter.h +2 -41
  229. package/libcxx/include/__format/range_format.h +71 -0
  230. package/libcxx/include/__format/range_formatter.h +1 -0
  231. package/libcxx/include/__format/width_estimation_table.h +4 -2
  232. package/libcxx/include/__functional/bind.h +10 -15
  233. package/libcxx/include/__functional/bind_back.h +1 -1
  234. package/libcxx/include/__functional/bind_front.h +1 -1
  235. package/libcxx/include/__functional/function.h +57 -75
  236. package/libcxx/include/__functional/hash.h +1 -10
  237. package/libcxx/include/__functional/identity.h +1 -1
  238. package/libcxx/include/__functional/is_transparent.h +8 -0
  239. package/libcxx/include/__functional/mem_fn.h +2 -1
  240. package/libcxx/include/__functional/operations.h +18 -0
  241. package/libcxx/include/__functional/ranges_operations.h +7 -0
  242. package/libcxx/include/__functional/reference_wrapper.h +7 -5
  243. package/libcxx/include/__functional/weak_result_type.h +14 -28
  244. package/libcxx/include/__fwd/ios.h +1 -1
  245. package/libcxx/include/__fwd/tuple.h +14 -0
  246. package/libcxx/include/__hash_table +371 -357
  247. package/libcxx/include/__ios/fpos.h +4 -4
  248. package/libcxx/include/__iterator/back_insert_iterator.h +1 -7
  249. package/libcxx/include/__iterator/bounded_iter.h +7 -8
  250. package/libcxx/include/__iterator/concepts.h +6 -9
  251. package/libcxx/include/__iterator/cpp17_iterator_concepts.h +13 -12
  252. package/libcxx/include/__iterator/distance.h +40 -18
  253. package/libcxx/include/__iterator/front_insert_iterator.h +1 -7
  254. package/libcxx/include/__iterator/insert_iterator.h +1 -7
  255. package/libcxx/include/__iterator/istream_iterator.h +6 -7
  256. package/libcxx/include/__iterator/istreambuf_iterator.h +6 -7
  257. package/libcxx/include/__iterator/iter_move.h +1 -1
  258. package/libcxx/include/__iterator/iterator.h +13 -0
  259. package/libcxx/include/__iterator/iterator_traits.h +13 -14
  260. package/libcxx/include/__iterator/ostream_iterator.h +1 -7
  261. package/libcxx/include/__iterator/ostreambuf_iterator.h +1 -7
  262. package/libcxx/include/__iterator/reverse_iterator.h +8 -13
  263. package/libcxx/include/__iterator/segmented_iterator.h +3 -8
  264. package/libcxx/include/__iterator/static_bounded_iter.h +3 -3
  265. package/libcxx/include/__iterator/wrap_iter.h +8 -6
  266. package/libcxx/include/__locale +3 -10
  267. package/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h +0 -10
  268. package/libcxx/include/__locale_dir/locale_base_api.h +5 -28
  269. package/libcxx/include/__locale_dir/messages.h +1 -1
  270. package/libcxx/include/__locale_dir/money.h +2 -2
  271. package/libcxx/include/__locale_dir/num.h +190 -243
  272. package/libcxx/include/__locale_dir/pad_and_output.h +5 -6
  273. package/libcxx/include/__locale_dir/support/bsd_like.h +0 -20
  274. package/libcxx/include/__locale_dir/support/fuchsia.h +0 -7
  275. package/libcxx/include/__locale_dir/support/linux.h +0 -37
  276. package/libcxx/include/__locale_dir/support/netbsd.h +0 -2
  277. package/libcxx/include/__locale_dir/support/newlib.h +243 -0
  278. package/libcxx/include/__locale_dir/support/no_locale/characters.h +0 -4
  279. package/libcxx/include/__locale_dir/support/no_locale/strtonum.h +0 -9
  280. package/libcxx/include/__locale_dir/support/windows.h +0 -29
  281. package/libcxx/include/__locale_dir/time.h +3 -7
  282. package/libcxx/include/__math/hypot.h +1 -1
  283. package/libcxx/include/__math/logarithms.h +1 -1
  284. package/libcxx/include/__math/traits.h +80 -11
  285. package/libcxx/include/__mdspan/extents.h +7 -4
  286. package/libcxx/include/__mdspan/layout_stride.h +4 -5
  287. package/libcxx/include/__mdspan/mdspan.h +29 -23
  288. package/libcxx/include/__memory/addressof.h +7 -5
  289. package/libcxx/include/__memory/align.h +18 -1
  290. package/libcxx/include/__memory/allocate_at_least.h +15 -10
  291. package/libcxx/include/__memory/allocator.h +14 -26
  292. package/libcxx/include/__memory/allocator_traits.h +6 -4
  293. package/libcxx/include/__memory/compressed_pair.h +15 -9
  294. package/libcxx/include/__memory/construct_at.h +12 -23
  295. package/libcxx/include/__memory/inout_ptr.h +1 -1
  296. package/libcxx/include/__memory/is_sufficiently_aligned.h +1 -1
  297. package/libcxx/include/__memory/out_ptr.h +1 -1
  298. package/libcxx/include/__memory/pointer_traits.h +1 -1
  299. package/libcxx/include/__memory/raw_storage_iterator.h +3 -9
  300. package/libcxx/include/__memory/shared_count.h +9 -30
  301. package/libcxx/include/__memory/shared_ptr.h +100 -145
  302. package/libcxx/include/__memory/temp_value.h +1 -2
  303. package/libcxx/include/__memory/uninitialized_algorithms.h +44 -115
  304. package/libcxx/include/__memory/unique_ptr.h +14 -16
  305. package/libcxx/include/__memory/uses_allocator_construction.h +1 -0
  306. package/libcxx/include/__memory_resource/memory_resource.h +4 -2
  307. package/libcxx/include/__memory_resource/monotonic_buffer_resource.h +1 -1
  308. package/libcxx/include/__memory_resource/polymorphic_allocator.h +13 -8
  309. package/libcxx/include/__memory_resource/pool_options.h +1 -1
  310. package/libcxx/include/__memory_resource/synchronized_pool_resource.h +4 -2
  311. package/libcxx/include/__memory_resource/unsynchronized_pool_resource.h +1 -1
  312. package/libcxx/include/__mutex/mutex.h +2 -2
  313. package/libcxx/include/__mutex/once_flag.h +14 -11
  314. package/libcxx/include/__mutex/tag_types.h +3 -3
  315. package/libcxx/include/__mutex/unique_lock.h +8 -7
  316. package/libcxx/include/__new/align_val_t.h +6 -0
  317. package/libcxx/include/__new/allocate.h +1 -2
  318. package/libcxx/include/__new/exceptions.h +8 -2
  319. package/libcxx/include/__new/global_new_delete.h +4 -11
  320. package/libcxx/include/__new/interference_size.h +0 -4
  321. package/libcxx/include/__new/launder.h +3 -5
  322. package/libcxx/include/__new/nothrow_t.h +1 -1
  323. package/libcxx/include/__numeric/gcd_lcm.h +24 -34
  324. package/libcxx/include/__numeric/midpoint.h +9 -14
  325. package/libcxx/include/__numeric/pstl.h +2 -2
  326. package/libcxx/include/__numeric/saturation_arithmetic.h +13 -5
  327. package/libcxx/include/__ostream/basic_ostream.h +8 -8
  328. package/libcxx/include/__pstl/backends/default.h +14 -14
  329. package/libcxx/include/__pstl/backends/libdispatch.h +2 -2
  330. package/libcxx/include/__pstl/cpu_algos/find_if.h +1 -1
  331. package/libcxx/include/__pstl/cpu_algos/transform.h +5 -6
  332. package/libcxx/include/__pstl/cpu_algos/transform_reduce.h +5 -4
  333. package/libcxx/include/__random/binomial_distribution.h +10 -4
  334. package/libcxx/include/__random/mersenne_twister_engine.h +50 -154
  335. package/libcxx/include/__random/piecewise_constant_distribution.h +3 -2
  336. package/libcxx/include/__random/piecewise_linear_distribution.h +3 -2
  337. package/libcxx/include/__ranges/adjacent_transform_view.h +406 -0
  338. package/libcxx/include/__ranges/adjacent_view.h +419 -0
  339. package/libcxx/include/__ranges/as_rvalue_view.h +9 -9
  340. package/libcxx/include/__ranges/chunk_by_view.h +6 -6
  341. package/libcxx/include/__ranges/common_view.h +7 -7
  342. package/libcxx/include/__ranges/drop_view.h +8 -8
  343. package/libcxx/include/__ranges/drop_while_view.h +5 -5
  344. package/libcxx/include/__ranges/elements_of.h +49 -0
  345. package/libcxx/include/__ranges/empty_view.h +5 -5
  346. package/libcxx/include/__ranges/filter_view.h +10 -10
  347. package/libcxx/include/__ranges/iota_view.h +41 -22
  348. package/libcxx/include/__ranges/owning_view.h +15 -15
  349. package/libcxx/include/__ranges/ref_view.h +6 -6
  350. package/libcxx/include/__ranges/repeat_view.h +17 -10
  351. package/libcxx/include/__ranges/single_view.h +8 -8
  352. package/libcxx/include/__ranges/take_view.h +9 -9
  353. package/libcxx/include/__ranges/transform_view.h +1 -2
  354. package/libcxx/include/__ranges/view_interface.h +10 -10
  355. package/libcxx/include/__ranges/zip_transform_view.h +357 -0
  356. package/libcxx/include/__ranges/zip_view.h +20 -20
  357. package/libcxx/include/__split_buffer +612 -240
  358. package/libcxx/include/__stop_token/atomic_unique_lock.h +1 -1
  359. package/libcxx/include/__stop_token/stop_callback.h +2 -2
  360. package/libcxx/include/__stop_token/stop_source.h +1 -1
  361. package/libcxx/include/__stop_token/stop_state.h +4 -4
  362. package/libcxx/include/__stop_token/stop_token.h +1 -1
  363. package/libcxx/include/__string/char_traits.h +51 -31
  364. package/libcxx/include/__string/constexpr_c_functions.h +5 -5
  365. package/libcxx/include/__support/xlocale/__strtonum_fallback.h +0 -8
  366. package/libcxx/include/__system_error/error_category.h +8 -8
  367. package/libcxx/include/__system_error/error_code.h +5 -5
  368. package/libcxx/include/__system_error/error_condition.h +4 -4
  369. package/libcxx/include/__system_error/system_error.h +1 -1
  370. package/libcxx/include/__thread/id.h +1 -1
  371. package/libcxx/include/__thread/jthread.h +1 -1
  372. package/libcxx/include/__thread/poll_with_backoff.h +27 -8
  373. package/libcxx/include/__thread/support/c11.h +8 -8
  374. package/libcxx/include/__thread/support/pthread.h +8 -8
  375. package/libcxx/include/__thread/support/windows.h +8 -8
  376. package/libcxx/include/__thread/thread.h +13 -8
  377. package/libcxx/include/__thread/timed_backoff_policy.h +3 -2
  378. package/libcxx/include/__tree +849 -701
  379. package/libcxx/include/__tuple/sfinae_helpers.h +1 -44
  380. package/libcxx/include/__tuple/tuple_element.h +0 -12
  381. package/libcxx/include/__tuple/tuple_size.h +0 -4
  382. package/libcxx/include/__tuple/tuple_transform.h +45 -0
  383. package/libcxx/include/__type_traits/aligned_storage.h +13 -40
  384. package/libcxx/include/__type_traits/desugars_to.h +4 -0
  385. package/libcxx/include/__type_traits/invoke.h +8 -0
  386. package/libcxx/include/__type_traits/is_allocator.h +6 -7
  387. package/libcxx/include/__type_traits/is_array.h +26 -0
  388. package/libcxx/include/__type_traits/is_equality_comparable.h +16 -21
  389. package/libcxx/include/__type_traits/is_final.h +1 -1
  390. package/libcxx/include/__type_traits/is_floating_point.h +7 -6
  391. package/libcxx/include/__type_traits/is_generic_transparent_comparator.h +30 -0
  392. package/libcxx/include/__type_traits/is_specialization.h +2 -6
  393. package/libcxx/include/__type_traits/is_within_lifetime.h +29 -0
  394. package/libcxx/include/__type_traits/make_transparent.h +52 -0
  395. package/libcxx/include/__type_traits/reference_constructs_from_temporary.h +1 -7
  396. package/libcxx/include/__type_traits/reference_converts_from_temporary.h +1 -1
  397. package/libcxx/include/__utility/cmp.h +19 -7
  398. package/libcxx/include/__utility/default_three_way_comparator.h +70 -0
  399. package/libcxx/include/__utility/in_place.h +1 -1
  400. package/libcxx/include/__utility/integer_sequence.h +56 -41
  401. package/libcxx/include/__utility/lazy_synth_three_way_comparator.h +120 -0
  402. package/libcxx/include/__utility/pair.h +22 -25
  403. package/libcxx/include/__utility/scope_guard.h +2 -0
  404. package/libcxx/include/__utility/try_key_extraction.h +114 -0
  405. package/libcxx/include/__vector/vector.h +187 -160
  406. package/libcxx/include/__vector/vector_bool.h +76 -83
  407. package/libcxx/include/any +118 -155
  408. package/libcxx/include/array +88 -56
  409. package/libcxx/include/atomic +2 -0
  410. package/libcxx/include/barrier +20 -24
  411. package/libcxx/include/bitset +49 -30
  412. package/libcxx/include/ccomplex +3 -11
  413. package/libcxx/include/chrono +47 -0
  414. package/libcxx/include/ciso646 +3 -6
  415. package/libcxx/include/complex +77 -65
  416. package/libcxx/include/complex.h +10 -10
  417. package/libcxx/include/condition_variable +3 -3
  418. package/libcxx/include/cstdalign +3 -10
  419. package/libcxx/include/cstdbool +3 -10
  420. package/libcxx/include/ctgmath +2 -11
  421. package/libcxx/include/ctype.h +24 -24
  422. package/libcxx/include/cwchar +2 -2
  423. package/libcxx/include/deque +109 -225
  424. package/libcxx/include/errno.h +269 -269
  425. package/libcxx/include/exception +4 -1
  426. package/libcxx/include/ext/hash_map +7 -48
  427. package/libcxx/include/ext/hash_set +2 -8
  428. package/libcxx/include/fenv.h +43 -43
  429. package/libcxx/include/flat_map +663 -11
  430. package/libcxx/include/flat_set +543 -8
  431. package/libcxx/include/float.h +16 -16
  432. package/libcxx/include/forward_list +33 -53
  433. package/libcxx/include/fstream +57 -42
  434. package/libcxx/include/future +41 -51
  435. package/libcxx/include/initializer_list +9 -3
  436. package/libcxx/include/inttypes.h +16 -16
  437. package/libcxx/include/ios +28 -28
  438. package/libcxx/include/istream +19 -13
  439. package/libcxx/include/iterator +10 -0
  440. package/libcxx/include/latch +7 -5
  441. package/libcxx/include/limits +4 -4
  442. package/libcxx/include/list +61 -79
  443. package/libcxx/include/map +386 -274
  444. package/libcxx/include/math.h +19 -0
  445. package/libcxx/include/mdspan +1 -5
  446. package/libcxx/include/mutex +29 -19
  447. package/libcxx/include/optional +644 -149
  448. package/libcxx/include/print +9 -5
  449. package/libcxx/include/queue +39 -37
  450. package/libcxx/include/ranges +48 -0
  451. package/libcxx/include/regex +33 -31
  452. package/libcxx/include/scoped_allocator +16 -11
  453. package/libcxx/include/semaphore +15 -18
  454. package/libcxx/include/set +220 -192
  455. package/libcxx/include/shared_mutex +3 -8
  456. package/libcxx/include/span +53 -37
  457. package/libcxx/include/sstream +34 -28
  458. package/libcxx/include/stack +13 -15
  459. package/libcxx/include/stddef.h +10 -10
  460. package/libcxx/include/stdexcept +2 -2
  461. package/libcxx/include/stdio.h +20 -21
  462. package/libcxx/include/streambuf +59 -19
  463. package/libcxx/include/string +574 -573
  464. package/libcxx/include/string_view +113 -89
  465. package/libcxx/include/strstream +10 -10
  466. package/libcxx/include/syncstream +4 -4
  467. package/libcxx/include/tgmath.h +12 -12
  468. package/libcxx/include/tuple +242 -212
  469. package/libcxx/include/type_traits +8 -2
  470. package/libcxx/include/typeindex +5 -3
  471. package/libcxx/include/typeinfo +92 -89
  472. package/libcxx/include/unordered_map +179 -304
  473. package/libcxx/include/unordered_set +168 -195
  474. package/libcxx/include/utility +12 -0
  475. package/libcxx/include/valarray +106 -161
  476. package/libcxx/include/variant +25 -33
  477. package/libcxx/include/version +41 -24
  478. package/libcxx/include/wctype.h +29 -29
  479. package/libcxx/src/any.cpp +4 -0
  480. package/libcxx/src/atomic.cpp +365 -80
  481. package/libcxx/src/barrier.cpp +4 -3
  482. package/libcxx/src/charconv.cpp +6 -3
  483. package/libcxx/src/condition_variable_destructor.cpp +1 -1
  484. package/libcxx/src/error_category.cpp +3 -1
  485. package/libcxx/src/exception.cpp +2 -10
  486. package/libcxx/src/experimental/time_zone.cpp +1 -1
  487. package/libcxx/src/experimental/tzdb.cpp +4 -1
  488. package/libcxx/src/filesystem/error.h +4 -22
  489. package/libcxx/src/filesystem/format_string.h +9 -18
  490. package/libcxx/src/filesystem/int128_builtins.cpp +2 -0
  491. package/libcxx/src/filesystem/operations.cpp +2 -9
  492. package/libcxx/src/filesystem/path.cpp +3 -1
  493. package/libcxx/src/include/aligned_alloc.h +65 -0
  494. package/libcxx/src/include/config_elast.h +1 -1
  495. package/libcxx/src/include/from_chars_floating_point.h +11 -7
  496. package/libcxx/src/include/overridable_function.h +8 -10
  497. package/libcxx/src/iostream.cpp +56 -37
  498. package/libcxx/src/locale.cpp +213 -196
  499. package/libcxx/src/memory.cpp +12 -14
  500. package/libcxx/src/mutex_destructor.cpp +1 -1
  501. package/libcxx/src/new.cpp +5 -5
  502. package/libcxx/src/optional.cpp +4 -0
  503. package/libcxx/src/print.cpp +9 -1
  504. package/libcxx/src/random.cpp +0 -26
  505. package/libcxx/src/string.cpp +10 -25
  506. package/libcxx/src/support/runtime/exception_fallback.ipp +2 -0
  507. package/libcxx/src/support/runtime/exception_glibcxx.ipp +3 -0
  508. package/libcxx/src/support/runtime/exception_libcxxabi.ipp +6 -2
  509. package/libcxx/src/support/runtime/exception_libcxxrt.ipp +2 -0
  510. package/libcxx/src/support/runtime/exception_msvc.ipp +2 -0
  511. package/libcxx/src/support/runtime/exception_pointer_cxxabi.ipp +9 -10
  512. package/libcxx/src/support/runtime/exception_pointer_glibcxx.ipp +2 -0
  513. package/libcxx/src/support/runtime/exception_pointer_msvc.ipp +1 -0
  514. package/libcxx/src/support/runtime/exception_pointer_unimplemented.ipp +1 -0
  515. package/libcxx/src/support/win32/locale_win32.cpp +1 -1
  516. package/libcxx/src/system_error.cpp +2 -0
  517. package/libcxx/src/thread.cpp +1 -3
  518. package/libcxx/src/valarray.cpp +1 -2
  519. package/libcxx/src/vector.cpp +2 -2
  520. package/libcxxabi/include/__cxxabi_config.h +42 -23
  521. package/libcxxabi/src/cxa_exception.cpp +4 -2
  522. package/libcxxabi/src/cxa_exception.h +16 -14
  523. package/libcxxabi/src/cxa_personality.cpp +126 -9
  524. package/libcxxabi/src/cxa_thread_atexit.cpp +2 -1
  525. package/libcxxabi/src/demangle/DemangleConfig.h +4 -0
  526. package/libcxxabi/src/demangle/ItaniumDemangle.h +9 -6
  527. package/libcxxabi/src/demangle/Utility.h +21 -7
  528. package/libcxxabi/src/fallback_malloc.cpp +1 -1
  529. package/libcxxabi/src/private_typeinfo.cpp +6 -0
  530. package/libcxxabi/src/stdlib_new_delete.cpp +5 -5
  531. package/libtsan/LICENSE.TXT +311 -0
  532. package/libtsan/builtins/assembly.h +41 -8
  533. package/libtsan/interception/interception_win.cpp +4 -0
  534. package/libtsan/sanitizer_common/sanitizer_allocator_primary32.h +1 -0
  535. package/libtsan/sanitizer_common/sanitizer_allocator_primary64.h +18 -0
  536. package/libtsan/sanitizer_common/sanitizer_common.h +15 -2
  537. package/libtsan/sanitizer_common/sanitizer_common_interceptors.inc +30 -4
  538. package/libtsan/sanitizer_common/sanitizer_common_interceptors_ioctl.inc +4 -0
  539. package/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_aarch64.inc.S +2 -1
  540. package/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S +2 -0
  541. package/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S +2 -0
  542. package/libtsan/sanitizer_common/sanitizer_common_syscalls.inc +18 -0
  543. package/libtsan/sanitizer_common/sanitizer_file.cpp +40 -12
  544. package/libtsan/sanitizer_common/sanitizer_file.h +3 -0
  545. package/libtsan/sanitizer_common/sanitizer_flags.inc +7 -0
  546. package/libtsan/sanitizer_common/sanitizer_fuchsia.cpp +30 -3
  547. package/libtsan/sanitizer_common/sanitizer_haiku.cpp +2 -2
  548. package/libtsan/sanitizer_common/sanitizer_internal_defs.h +1 -1
  549. package/libtsan/sanitizer_common/sanitizer_libc.cpp +8 -0
  550. package/libtsan/sanitizer_common/sanitizer_libc.h +1 -0
  551. package/libtsan/sanitizer_common/sanitizer_linux.cpp +15 -7
  552. package/libtsan/sanitizer_common/sanitizer_linux.h +3 -3
  553. package/libtsan/sanitizer_common/sanitizer_linux_libcdep.cpp +1 -0
  554. package/libtsan/sanitizer_common/sanitizer_mac.cpp +255 -104
  555. package/libtsan/sanitizer_common/sanitizer_mac.h +5 -0
  556. package/libtsan/sanitizer_common/sanitizer_netbsd.cpp +2 -2
  557. package/libtsan/sanitizer_common/sanitizer_platform.h +27 -1
  558. package/libtsan/sanitizer_common/sanitizer_platform_interceptors.h +5 -4
  559. package/libtsan/sanitizer_common/sanitizer_platform_limits_posix.cpp +15 -17
  560. package/libtsan/sanitizer_common/sanitizer_platform_limits_posix.h +32 -6
  561. package/libtsan/sanitizer_common/sanitizer_posix.cpp +3 -12
  562. package/libtsan/sanitizer_common/sanitizer_posix.h +2 -1
  563. package/libtsan/sanitizer_common/sanitizer_posix_libcdep.cpp +19 -0
  564. package/libtsan/sanitizer_common/sanitizer_procmaps_mac.cpp +102 -37
  565. package/libtsan/sanitizer_common/sanitizer_redefine_builtins.h +1 -1
  566. package/libtsan/sanitizer_common/sanitizer_signal_interceptors.inc +40 -2
  567. package/libtsan/sanitizer_common/sanitizer_stoptheworld.h +1 -1
  568. package/libtsan/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp +83 -12
  569. package/libtsan/sanitizer_common/sanitizer_stoptheworld_mac.cpp +3 -3
  570. package/libtsan/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp +7 -7
  571. package/libtsan/sanitizer_common/sanitizer_stoptheworld_win.cpp +2 -2
  572. package/libtsan/sanitizer_common/sanitizer_symbolizer_internal.h +5 -1
  573. package/libtsan/sanitizer_common/sanitizer_symbolizer_libcdep.cpp +11 -1
  574. package/libtsan/sanitizer_common/sanitizer_symbolizer_mac.cpp +86 -29
  575. package/libtsan/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp +27 -16
  576. package/libtsan/sanitizer_common/sanitizer_thread_registry.cpp +5 -4
  577. package/libtsan/sanitizer_common/sanitizer_thread_registry.h +4 -4
  578. package/libtsan/sanitizer_common/sanitizer_win.cpp +1 -3
  579. package/libtsan/tsan_debugging.cpp +2 -2
  580. package/libtsan/tsan_flags.cpp +37 -0
  581. package/libtsan/tsan_flags.h +8 -0
  582. package/libtsan/tsan_flags.inc +12 -0
  583. package/libtsan/tsan_interceptors.h +9 -1
  584. package/libtsan/tsan_interceptors_mac.cpp +19 -0
  585. package/libtsan/tsan_interceptors_posix.cpp +78 -32
  586. package/libtsan/tsan_interface.h +3 -3
  587. package/libtsan/tsan_interface_ann.cpp +23 -9
  588. package/libtsan/tsan_mman.cpp +18 -4
  589. package/libtsan/tsan_platform.h +44 -7
  590. package/libtsan/tsan_platform_linux.cpp +42 -14
  591. package/libtsan/tsan_platform_mac.cpp +16 -3
  592. package/libtsan/tsan_report.h +14 -1
  593. package/libtsan/tsan_rtl.cpp +14 -0
  594. package/libtsan/tsan_rtl.h +7 -2
  595. package/libtsan/tsan_rtl_aarch64.S +3 -5
  596. package/libtsan/tsan_rtl_access.cpp +8 -3
  597. package/libtsan/tsan_rtl_amd64.S +2 -0
  598. package/libtsan/tsan_rtl_mutex.cpp +94 -49
  599. package/libtsan/tsan_rtl_report.cpp +132 -65
  600. package/libtsan/tsan_rtl_thread.cpp +31 -9
  601. package/libtsan/tsan_symbolize.cpp +1 -1
  602. package/libtsan/tsan_symbolize.h +1 -1
  603. package/libtsan/tsan_trace.h +1 -1
  604. package/libunwind/include/__libunwind_config.h +11 -2
  605. package/libunwind/include/libunwind.h +117 -11
  606. package/libunwind/include/unwind_arm_ehabi.h +4 -1
  607. package/libunwind/src/AddressSpace.hpp +40 -19
  608. package/libunwind/src/CompactUnwinder.hpp +16 -5
  609. package/libunwind/src/DwarfInstructions.hpp +24 -13
  610. package/libunwind/src/DwarfParser.hpp +60 -22
  611. package/libunwind/src/EHHeaderParser.hpp +7 -4
  612. package/libunwind/src/Registers.hpp +226 -22
  613. package/libunwind/src/Unwind-seh.cpp +6 -7
  614. package/libunwind/src/Unwind-wasm.c +7 -7
  615. package/libunwind/src/UnwindCursor.hpp +167 -49
  616. package/libunwind/src/UnwindLevel1.c +46 -17
  617. package/libunwind/src/UnwindRegistersRestore.S +46 -5
  618. package/libunwind/src/UnwindRegistersSave.S +86 -2
  619. package/libunwind/src/assembly.h +5 -1
  620. package/libunwind/src/config.h +9 -0
  621. package/libunwind/src/gcc_personality_v0.c +79 -6
  622. package/libunwind/src/libunwind.cpp +104 -4
  623. package/libunwind/src/libunwind_ext.h +7 -1
  624. package/libunwind/src/shadow_stack_unwind.h +2 -2
  625. package/lldb/pretty_printers.py +948 -0
  626. package/package.json +1 -1
  627. package/std/Build/Step/Compile.zig +18 -19
  628. package/std/Build/Step/Run.zig +13 -6
  629. package/std/Build/Step.zig +0 -3
  630. package/std/Io/Threaded.zig +3 -0
  631. package/std/Target/aarch64.zig +620 -77
  632. package/std/Target/amdgcn.zig +421 -21
  633. package/std/Target/arm.zig +40 -6
  634. package/std/Target/bpf.zig +6 -0
  635. package/std/Target/hexagon.zig +41 -6
  636. package/std/Target/loongarch.zig +18 -0
  637. package/std/Target/mips.zig +6 -0
  638. package/std/Target/nvptx.zig +58 -35
  639. package/std/Target/powerpc.zig +27 -19
  640. package/std/Target/riscv.zig +415 -177
  641. package/std/Target/sparc.zig +17 -0
  642. package/std/Target/wasm.zig +7 -0
  643. package/std/Target/x86.zig +200 -31
  644. package/std/Target/xtensa.zig +65 -0
  645. package/std/Target.zig +11 -2
  646. package/std/c.zig +7 -0
  647. package/std/debug/Dwarf.zig +14 -11
  648. package/std/debug/Pdb.zig +24 -16
  649. package/std/hash/xxhash.zig +0 -6
  650. package/std/math/log10.zig +0 -2
  651. package/std/math/modf.zig +0 -1
  652. package/std/mem.zig +1 -2
  653. package/std/os/linux/x86.zig +2 -2
  654. package/std/os/windows.zig +130 -1
  655. package/std/simd.zig +4 -21
  656. package/std/start.zig +4 -3
  657. package/std/zig/Ast.zig +5 -7
  658. package/std/zig/AstGen.zig +20 -14
  659. package/std/zig/ErrorBundle.zig +6 -2
  660. package/std/zig/ZonGen.zig +13 -21
  661. package/std/zig/llvm/Builder.zig +2 -2
  662. package/std/zig/system/arm.zig +56 -2
  663. package/std/zig/system/windows.zig +34 -1
  664. package/std/zig/system/x86.zig +60 -16
  665. package/std/zig/system.zig +0 -10
  666. package/include/amxbf16transposeintrin.h +0 -94
  667. package/include/amxcomplextransposeintrin.h +0 -303
  668. package/include/amxfp16transposeintrin.h +0 -94
  669. package/include/amxmovrstransposeintrin.h +0 -200
  670. package/include/amxtf32transposeintrin.h +0 -105
  671. package/include/amxtransposeintrin.h +0 -248
  672. package/libtsan/sanitizer_common/sanitizer_coverage_interface.inc +0 -43
  673. package/std/Build/Step/CheckObject.zig +0 -2764
@@ -67,38 +67,30 @@ pub fn generate(gpa: Allocator, tree: Ast, options: Options) Allocator.Error!Zoi
67
67
  }
68
68
 
69
69
  if (zg.compile_errors.items.len > 0) {
70
- const string_bytes = try zg.string_bytes.toOwnedSlice(gpa);
71
- errdefer gpa.free(string_bytes);
72
- const compile_errors = try zg.compile_errors.toOwnedSlice(gpa);
73
- errdefer gpa.free(compile_errors);
74
- const error_notes = try zg.error_notes.toOwnedSlice(gpa);
75
- errdefer gpa.free(error_notes);
70
+ try zg.string_bytes.shrinkToLen(gpa);
71
+ try zg.compile_errors.shrinkToLen(gpa);
72
+ try zg.error_notes.shrinkToLen(gpa);
76
73
 
77
74
  return .{
78
75
  .nodes = .empty,
79
76
  .extra = &.{},
80
77
  .limbs = &.{},
81
- .string_bytes = string_bytes,
82
- .compile_errors = compile_errors,
83
- .error_notes = error_notes,
78
+ .string_bytes = zg.string_bytes.toOwnedSliceAssert(),
79
+ .compile_errors = zg.compile_errors.toOwnedSliceAssert(),
80
+ .error_notes = zg.error_notes.toOwnedSliceAssert(),
84
81
  };
85
82
  } else {
86
83
  assert(zg.error_notes.items.len == 0);
87
84
 
88
- var nodes = zg.nodes.toOwnedSlice();
89
- errdefer nodes.deinit(gpa);
90
- const extra = try zg.extra.toOwnedSlice(gpa);
91
- errdefer gpa.free(extra);
92
- const limbs = try zg.limbs.toOwnedSlice(gpa);
93
- errdefer gpa.free(limbs);
94
- const string_bytes = try zg.string_bytes.toOwnedSlice(gpa);
95
- errdefer gpa.free(string_bytes);
85
+ try zg.extra.shrinkToLen(gpa);
86
+ try zg.limbs.shrinkToLen(gpa);
87
+ try zg.string_bytes.shrinkToLen(gpa);
96
88
 
97
89
  return .{
98
- .nodes = nodes,
99
- .extra = extra,
100
- .limbs = limbs,
101
- .string_bytes = string_bytes,
90
+ .nodes = zg.nodes.toOwnedSlice(),
91
+ .extra = zg.extra.toOwnedSliceAssert(),
92
+ .limbs = zg.limbs.toOwnedSliceAssert(),
93
+ .string_bytes = zg.string_bytes.toOwnedSliceAssert(),
102
94
  .compile_errors = &.{},
103
95
  .error_notes = &.{},
104
96
  };
@@ -13947,8 +13947,8 @@ pub fn toBitcode(self: *Builder, allocator: Allocator, producer: Producer) bitco
13947
13947
  const bit_count = extra.type.scalarBits(self);
13948
13948
  const val: i64 = if (bit_count <= 64)
13949
13949
  bigint.toInt(i64) catch unreachable
13950
- else if (bigint.toInt(u64)) |val|
13951
- @bitCast(val)
13950
+ else if (bigint.toInt(u63)) |val|
13951
+ @bitCast(@as(u64, val))
13952
13952
  else |_| {
13953
13953
  const limbs = try record.addManyAsSlice(
13954
13954
  self.gpa,
@@ -21,7 +21,7 @@ pub const cpu_models = struct {
21
21
  };
22
22
 
23
23
  // implementer = 0x41
24
- const ARM = [_]E{
24
+ const Arm = [_]E{
25
25
  E{ .part = 0x926, .m32 = &A32.arm926ej_s },
26
26
  E{ .part = 0xb02, .m32 = &A32.mpcore },
27
27
  E{ .part = 0xb36, .m32 = &A32.arm1136j_s },
@@ -88,8 +88,12 @@ pub const cpu_models = struct {
88
88
  E{ .part = 0xd87, .m64 = &A64.cortex_a725 },
89
89
  E{ .part = 0xd88, .m64 = &A64.cortex_a520ae },
90
90
  E{ .part = 0xd89, .m64 = &A64.cortex_a720ae },
91
+ E{ .part = 0xd8a, .m64 = &A64.c1_nano },
92
+ E{ .part = 0xd8b, .m64 = &A64.c1_pro },
93
+ E{ .part = 0xd8c, .m64 = &A64.c1_ultra },
91
94
  E{ .part = 0xd8e, .m64 = &A64.neoverse_n3 },
92
95
  E{ .part = 0xd8f, .m64 = &A64.cortex_a320 },
96
+ E{ .part = 0xd90, .m64 = &A64.c1_premium },
93
97
  };
94
98
  // implementer = 0x42
95
99
  const Broadcom = [_]E{
@@ -102,10 +106,17 @@ pub const cpu_models = struct {
102
106
  E{ .part = 0x0a3, .m64 = &A64.thunderxt83 },
103
107
  E{ .part = 0x0a1, .m64 = &A64.thunderxt88 },
104
108
  E{ .part = 0x0af, .m64 = &A64.thunderx2t99 },
109
+ E{ .part = 0x0b0, .m64 = &A64.cortex_a57 },
110
+ E{ .part = 0x0b1, .m64 = &A64.cortex_a57 },
111
+ E{ .part = 0x0b2, .m64 = &A64.cortex_a57 },
112
+ E{ .part = 0x0b3, .m64 = &A64.cortex_a57 },
113
+ E{ .part = 0x0b4, .m64 = &A64.cortex_a57 },
114
+ E{ .part = 0x0b5, .m64 = &A64.cortex_a57 },
105
115
  };
106
116
  // implementer = 0x46
107
117
  const Fujitsu = [_]E{
108
118
  E{ .part = 0x001, .m64 = &A64.a64fx },
119
+ E{ .part = 0x003, .m64 = &A64.fujitsu_monaka },
109
120
  };
110
121
  // implementer = 0x48
111
122
  const HiSilicon = [_]E{
@@ -137,8 +148,14 @@ pub const cpu_models = struct {
137
148
  E{ .part = 0xc00, .m64 = &A64.falkor },
138
149
  E{ .part = 0xc01, .m64 = &A64.saphira },
139
150
  };
151
+ // implementer = 0x53
152
+ const Samsung = [_]E{
153
+ E{ .part = 0x000, .m64 = &A64.exynos_m1 },
154
+ };
140
155
  // implementer = 0x61
141
156
  const Apple = [_]E{
157
+ E{ .part = 0x020, .m64 = &A64.apple_m1 },
158
+ E{ .part = 0x021, .m64 = &A64.apple_m1 },
142
159
  E{ .part = 0x022, .m64 = &A64.apple_m1 },
143
160
  E{ .part = 0x023, .m64 = &A64.apple_m1 },
144
161
  E{ .part = 0x024, .m64 = &A64.apple_m1 },
@@ -151,11 +168,43 @@ pub const cpu_models = struct {
151
168
  E{ .part = 0x035, .m64 = &A64.apple_m2 },
152
169
  E{ .part = 0x038, .m64 = &A64.apple_m2 },
153
170
  E{ .part = 0x039, .m64 = &A64.apple_m2 },
171
+ E{ .part = 0x042, .m64 = &A64.apple_m3 },
172
+ E{ .part = 0x043, .m64 = &A64.apple_m3 },
173
+ E{ .part = 0x044, .m64 = &A64.apple_m3 },
174
+ E{ .part = 0x045, .m64 = &A64.apple_m3 },
175
+ E{ .part = 0x048, .m64 = &A64.apple_m3 },
176
+ E{ .part = 0x049, .m64 = &A64.apple_m3 },
177
+ E{ .part = 0x052, .m64 = &A64.apple_m4 },
178
+ E{ .part = 0x053, .m64 = &A64.apple_m4 },
179
+ E{ .part = 0x054, .m64 = &A64.apple_m4 },
180
+ E{ .part = 0x055, .m64 = &A64.apple_m4 },
181
+ E{ .part = 0x058, .m64 = &A64.apple_m4 },
182
+ E{ .part = 0x059, .m64 = &A64.apple_m4 },
183
+ };
184
+ // implementer = 0x63
185
+ const ArmChina = [_]E{
186
+ E{ .part = 0x132, .m32 = &A32.star_mc1 },
187
+ E{ .part = 0xd25, .m32 = &A32.star_mc3 },
188
+ };
189
+ // implementer = 0x68
190
+ const Hxt = [_]E{
191
+ E{ .part = 0x000, .m64 = &A64.cortex_a57 },
192
+ };
193
+ // implementer = 0x6d
194
+ const Microsoft = [_]E{
195
+ E{ .part = 0xd49, .m64 = &A64.neoverse_n2 },
196
+ };
197
+ // implementer = 0xC0
198
+ const AmpereOne = [_]E{
199
+ E{ .part = 0xac3, .m64 = &A64.ampere1 },
200
+ E{ .part = 0xac4, .m64 = &A64.ampere1a },
201
+ E{ .part = 0xac5, .m64 = &A64.ampere1b },
202
+ E{ .part = 0xac7, .m64 = &A64.ampere1c },
154
203
  };
155
204
 
156
205
  pub fn isKnown(core: CoreInfo, is_64bit: bool) ?*const Target.Cpu.Model {
157
206
  const models = switch (core.implementer) {
158
- 0x41 => &ARM,
207
+ 0x41 => &Arm,
159
208
  0x42 => &Broadcom,
160
209
  0x43 => &Cavium,
161
210
  0x46 => &Fujitsu,
@@ -163,7 +212,12 @@ pub const cpu_models = struct {
163
212
  0x4e => &Nvidia,
164
213
  0x50 => &Ampere,
165
214
  0x51 => &Qualcomm,
215
+ 0x53 => &Samsung,
166
216
  0x61 => &Apple,
217
+ 0x63 => &ArmChina,
218
+ 0x68 => &Hxt,
219
+ 0x6d => &Microsoft,
220
+ 0xC0 => &AmpereOne,
167
221
  else => return null,
168
222
  };
169
223
 
@@ -215,10 +215,43 @@ fn genericCpuAndNativeFeatures(arch: Target.Cpu.Arch) Target.Cpu {
215
215
  // Override any features that are either present or absent
216
216
  setFeature(Feature, &cpu, .neon, IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE));
217
217
  setFeature(Feature, &cpu, .crc, IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE));
218
- setFeature(Feature, &cpu, .crypto, IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE));
218
+ setFeature(Feature, &cpu, .aes, IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE));
219
+ setFeature(Feature, &cpu, .sha2, IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE));
219
220
  setFeature(Feature, &cpu, .lse, IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE));
220
221
  setFeature(Feature, &cpu, .dotprod, IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE));
221
222
  setFeature(Feature, &cpu, .jsconv, IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE));
223
+ setFeature(Feature, &cpu, .rcpc, IsProcessorFeaturePresent(PF.ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE));
224
+ setFeature(Feature, &cpu, .sve, IsProcessorFeaturePresent(PF.ARM_SVE_INSTRUCTIONS_AVAILABLE));
225
+ setFeature(Feature, &cpu, .sve2, IsProcessorFeaturePresent(PF.ARM_SVE2_INSTRUCTIONS_AVAILABLE));
226
+ setFeature(Feature, &cpu, .sve2p1, IsProcessorFeaturePresent(PF.ARM_SVE2_1_INSTRUCTIONS_AVAILABLE));
227
+ setFeature(Feature, &cpu, .sve_aes, IsProcessorFeaturePresent(PF.ARM_SVE_AES_INSTRUCTIONS_AVAILABLE) or IsProcessorFeaturePresent(PF.ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE));
228
+ setFeature(Feature, &cpu, .sve_bitperm, IsProcessorFeaturePresent(PF.ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE));
229
+ setFeature(Feature, &cpu, .bf16, IsProcessorFeaturePresent(PF.ARM_V86_BF16_INSTRUCTIONS_AVAILABLE));
230
+ setFeature(Feature, &cpu, .sve_b16b16, IsProcessorFeaturePresent(PF.ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE));
231
+ setFeature(Feature, &cpu, .sve_sha3, IsProcessorFeaturePresent(PF.ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE));
232
+ setFeature(Feature, &cpu, .sve_sm4, IsProcessorFeaturePresent(PF.ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE));
233
+ setFeature(Feature, &cpu, .i8mm, IsProcessorFeaturePresent(PF.ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE));
234
+ setFeature(Feature, &cpu, .f32mm, IsProcessorFeaturePresent(PF.ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE));
235
+ setFeature(Feature, &cpu, .f64mm, IsProcessorFeaturePresent(PF.ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE));
236
+ setFeature(Feature, &cpu, .sme, IsProcessorFeaturePresent(PF.ARM_SME_INSTRUCTIONS_AVAILABLE));
237
+ setFeature(Feature, &cpu, .sme2, IsProcessorFeaturePresent(PF.ARM_SME2_INSTRUCTIONS_AVAILABLE));
238
+ setFeature(Feature, &cpu, .lse2, IsProcessorFeaturePresent(PF.ARM_LSE2_AVAILABLE));
239
+ setFeature(Feature, &cpu, .sha3, IsProcessorFeaturePresent(PF.ARM_SHA3_INSTRUCTIONS_AVAILABLE) and IsProcessorFeaturePresent(PF.ARM_SHA512_INSTRUCTIONS_AVAILABLE));
240
+ setFeature(Feature, &cpu, .fullfp16, IsProcessorFeaturePresent(PF.ARM_V82_FP16_INSTRUCTIONS_AVAILABLE));
241
+ setFeature(Feature, &cpu, .sme2p1, IsProcessorFeaturePresent(PF.ARM_SME2_1_INSTRUCTIONS_AVAILABLE));
242
+ setFeature(Feature, &cpu, .sme2p2, IsProcessorFeaturePresent(PF.ARM_SME2_2_INSTRUCTIONS_AVAILABLE));
243
+ setFeature(Feature, &cpu, .ssve_aes, IsProcessorFeaturePresent(PF.ARM_SME_AES_INSTRUCTIONS_AVAILABLE));
244
+ setFeature(Feature, &cpu, .ssve_bitperm, IsProcessorFeaturePresent(PF.ARM_SME_SBITPERM_INSTRUCTIONS_AVAILABLE));
245
+ setFeature(Feature, &cpu, .ssve_fp8dot2, IsProcessorFeaturePresent(PF.ARM_SME_SF8DP2_INSTRUCTIONS_AVAILABLE));
246
+ setFeature(Feature, &cpu, .ssve_fp8dot4, IsProcessorFeaturePresent(PF.ARM_SME_SF8DP4_INSTRUCTIONS_AVAILABLE));
247
+ setFeature(Feature, &cpu, .ssve_fp8fma, IsProcessorFeaturePresent(PF.ARM_SME_SF8FMA_INSTRUCTIONS_AVAILABLE));
248
+ setFeature(Feature, &cpu, .sme_f8f32, IsProcessorFeaturePresent(PF.ARM_SME_F8F32_INSTRUCTIONS_AVAILABLE));
249
+ setFeature(Feature, &cpu, .sme_f8f16, IsProcessorFeaturePresent(PF.ARM_SME_F8F16_INSTRUCTIONS_AVAILABLE));
250
+ setFeature(Feature, &cpu, .sme_b16b16, IsProcessorFeaturePresent(PF.ARM_SME_B16B16_INSTRUCTIONS_AVAILABLE));
251
+ setFeature(Feature, &cpu, .sme_f64f64, IsProcessorFeaturePresent(PF.ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE));
252
+ setFeature(Feature, &cpu, .sme_i16i64, IsProcessorFeaturePresent(PF.ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE));
253
+ setFeature(Feature, &cpu, .sme_lutv2, IsProcessorFeaturePresent(PF.ARM_SME_LUTv2_INSTRUCTIONS_AVAILABLE));
254
+ setFeature(Feature, &cpu, .sme_fa64, IsProcessorFeaturePresent(PF.ARM_SME_FA64_INSTRUCTIONS_AVAILABLE));
222
255
  },
223
256
  else => {},
224
257
  }
@@ -232,7 +232,7 @@ fn detectIntelProcessor(cpu: *Target.Cpu, family: u32, model: u32, brand_id: u32
232
232
  cpu.model = &Target.x86.cpu.lunarlake;
233
233
  return;
234
234
  },
235
- 0xcc => {
235
+ 0xcc, 0xd5 => {
236
236
  cpu.model = &Target.x86.cpu.pantherlake;
237
237
  return;
238
238
  },
@@ -307,6 +307,20 @@ fn detectIntelProcessor(cpu: *Target.Cpu, family: u32, model: u32, brand_id: u32
307
307
  cpu.model = &Target.x86.cpu.pentium4;
308
308
  return;
309
309
  },
310
+ 18 => switch (model) {
311
+ 0x01, 0x03 => {
312
+ cpu.model = &Target.x86.cpu.novalake;
313
+ return;
314
+ },
315
+ else => return, // Unknown CPU Model
316
+ },
317
+ 19 => switch (model) {
318
+ 0x01 => {
319
+ cpu.model = &Target.x86.cpu.diamondrapids;
320
+ return;
321
+ },
322
+ else => return, // Unknown CPU Model
323
+ },
310
324
  else => return, // Unknown CPU Model
311
325
  }
312
326
  }
@@ -412,6 +426,8 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
412
426
  // AMX requires additional context to be saved by the OS.
413
427
  const has_amx_save = xcr0.xtilecfg and xcr0.xtiledata;
414
428
 
429
+ const has_apx_save = xcr0.apx;
430
+
415
431
  setFeature(cpu, .avx, has_avx_save);
416
432
  setFeature(cpu, .fma, bit(leaf.ecx, 12) and has_avx_save);
417
433
  // Only enable XSAVE if OS has enabled support for saving YMM state.
@@ -470,7 +486,20 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
470
486
  }
471
487
  }
472
488
 
473
- if (max_level >= 0x7) {
489
+ if (max_ext_level >= 0x80000021) {
490
+ leaf = cpuid(0x80000021, 0);
491
+
492
+ // AMD uses a different bit for prefetchi.
493
+ setFeature(cpu, .prefetchi, bit(leaf.eax, 20));
494
+ } else {
495
+ for ([_]Target.x86.Feature{
496
+ .prefetchi,
497
+ }) |feat| {
498
+ setFeature(cpu, feat, false);
499
+ }
500
+ }
501
+
502
+ const has_avx10 = if (max_level >= 0x7) has_avx10: {
474
503
  leaf = cpuid(0x7, 0);
475
504
 
476
505
  setFeature(cpu, .fsgsbase, bit(leaf.ebx, 0));
@@ -484,7 +513,6 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
484
513
  setFeature(cpu, .rtm, bit(leaf.ebx, 11));
485
514
  // AVX512 is only supported if the OS supports the context save for it.
486
515
  setFeature(cpu, .avx512f, bit(leaf.ebx, 16) and has_avx512_save);
487
- setFeature(cpu, .evex512, bit(leaf.ebx, 16) and has_avx512_save);
488
516
  setFeature(cpu, .avx512dq, bit(leaf.ebx, 17) and has_avx512_save);
489
517
  setFeature(cpu, .rdseed, bit(leaf.ebx, 18));
490
518
  setFeature(cpu, .adx, bit(leaf.ebx, 19));
@@ -556,15 +584,20 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
556
584
  setFeature(cpu, .avxneconvert, bit(leaf.edx, 5) and has_avx_save);
557
585
  setFeature(cpu, .amx_complex, bit(leaf.edx, 8) and has_amx_save);
558
586
  setFeature(cpu, .avxvnniint16, bit(leaf.edx, 10) and has_avx_save);
559
- setFeature(cpu, .prefetchi, bit(leaf.edx, 14));
587
+ // This needs to account for prefetchi already being detected above on AMD.
588
+ setFeature(cpu, .prefetchi, cpu.has(.x86, .prefetchi) or bit(leaf.edx, 14));
560
589
  setFeature(cpu, .usermsr, bit(leaf.edx, 15));
561
590
  // APX
562
- setFeature(cpu, .egpr, bit(leaf.edx, 21));
563
- setFeature(cpu, .push2pop2, bit(leaf.edx, 21));
564
- setFeature(cpu, .ppx, bit(leaf.edx, 21));
565
- setFeature(cpu, .ndd, bit(leaf.edx, 21));
566
- setFeature(cpu, .ccmp, bit(leaf.edx, 21));
567
- setFeature(cpu, .cf, bit(leaf.edx, 21));
591
+ setFeature(cpu, .egpr, bit(leaf.edx, 21) and has_apx_save);
592
+ setFeature(cpu, .push2pop2, bit(leaf.edx, 21) and has_apx_save);
593
+ setFeature(cpu, .ppx, bit(leaf.edx, 21) and has_apx_save);
594
+ setFeature(cpu, .ndd, bit(leaf.edx, 21) and has_apx_save);
595
+ setFeature(cpu, .ccmp, bit(leaf.edx, 21) and has_apx_save);
596
+ setFeature(cpu, .nf, bit(leaf.edx, 21) and has_apx_save);
597
+ setFeature(cpu, .cf, bit(leaf.edx, 21) and has_apx_save);
598
+ setFeature(cpu, .zu, bit(leaf.edx, 21) and has_apx_save);
599
+
600
+ break :has_avx10 bit(leaf.edx, 19);
568
601
  } else {
569
602
  for ([_]Target.x86.Feature{
570
603
  .sha512,
@@ -582,19 +615,23 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
582
615
  .avxneconvert,
583
616
  .amx_complex,
584
617
  .avxvnniint16,
585
- .prefetchi,
618
+ // prefetchi already handled earlier.
586
619
  .usermsr,
587
620
  .egpr,
588
621
  .push2pop2,
589
622
  .ppx,
590
623
  .ndd,
591
624
  .ccmp,
625
+ .nf,
592
626
  .cf,
627
+ .zu,
593
628
  }) |feat| {
594
629
  setFeature(cpu, feat, false);
595
630
  }
596
631
  }
597
- } else {
632
+
633
+ break :has_avx10 false;
634
+ } else has_avx10: {
598
635
  for ([_]Target.x86.Feature{
599
636
  .fsgsbase,
600
637
  .sgx,
@@ -605,7 +642,6 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
605
642
  .invpcid,
606
643
  .rtm,
607
644
  .avx512f,
608
- .evex512,
609
645
  .avx512dq,
610
646
  .rdseed,
611
647
  .adx,
@@ -664,18 +700,22 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
664
700
  .avxneconvert,
665
701
  .amx_complex,
666
702
  .avxvnniint16,
667
- .prefetchi,
703
+ // prefetchi already handled earlier.
668
704
  .usermsr,
669
705
  .egpr,
670
706
  .push2pop2,
671
707
  .ppx,
672
708
  .ndd,
673
709
  .ccmp,
710
+ .nf,
674
711
  .cf,
712
+ .zu,
675
713
  }) |feat| {
676
714
  setFeature(cpu, feat, false);
677
715
  }
678
- }
716
+
717
+ break :has_avx10 false;
718
+ };
679
719
 
680
720
  if (max_level >= 0xD and has_avx_save) {
681
721
  leaf = cpuid(0xD, 0x1);
@@ -721,10 +761,14 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
721
761
  if (max_level >= 0x24) {
722
762
  leaf = cpuid(0x24, 0);
723
763
 
724
- setFeature(cpu, .avx10_1, bit(leaf.ebx, 18));
764
+ const avx_ver = leaf.ebx & 0xff;
765
+
766
+ setFeature(cpu, .avx10_1, has_avx10 and avx_ver >= 1);
767
+ setFeature(cpu, .avx10_2, has_avx10 and avx_ver >= 2);
725
768
  } else {
726
769
  for ([_]Target.x86.Feature{
727
770
  .avx10_1,
771
+ .avx10_2,
728
772
  }) |feat| {
729
773
  setFeature(cpu, feat, false);
730
774
  }
@@ -460,16 +460,6 @@ pub fn resolveTargetQuery(io: Io, query: Target.Query) DetectError!Target {
460
460
  if (result.cpu.arch.isArm() and result.abi.float() == .soft) {
461
461
  result.cpu.features.removeFeature(@intFromEnum(Target.arm.Feature.vfp2));
462
462
  }
463
-
464
- // https://github.com/llvm/llvm-project/issues/135283
465
- if (result.cpu.arch.isMIPS() and result.abi.float() == .soft) {
466
- result.cpu.features.addFeature(@intFromEnum(Target.mips.Feature.soft_float));
467
- }
468
-
469
- // https://github.com/llvm/llvm-project/issues/168992
470
- if (result.cpu.arch == .s390x) {
471
- result.cpu.features.removeFeature(@intFromEnum(Target.s390x.Feature.vector));
472
- }
473
463
  }
474
464
 
475
465
  // It's possible that we detect the native ABI, but fail to detect the OS version or were told
@@ -1,94 +0,0 @@
1
- /*===----- amxbf16transposeintrin.h - AMX-BF16 and AMX-TRANSPOSE ------------===
2
- *
3
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
- * See https://llvm.org/LICENSE.txt for license information.
5
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
- *
7
- *===------------------------------------------------------------------------===
8
- */
9
-
10
- #ifndef __IMMINTRIN_H
11
- #error \
12
- "Never use <amxbf16transposeintrin.h> directly; use <immintrin.h> instead."
13
- #endif /* __IMMINTRIN_H */
14
-
15
- #ifndef __AMX_BF16TRANSPOSEINTRIN_H
16
- #define __AMX_BF16TRANSPOSEINTRIN_H
17
- #ifdef __x86_64__
18
-
19
- /* Define the default attributes for the functions in this file. */
20
- #define __DEFAULT_FN_ATTRS \
21
- __attribute__((__always_inline__, __nodebug__, \
22
- __target__("amx-bf16,amx-transpose")))
23
-
24
- /// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
25
- /// tiles \a a and \a b, accumulating the intermediate single-precision
26
- /// (32-bit) floating-point elements with elements in \a dst, and store the
27
- /// 32-bit result back to tile \a dst.
28
- ///
29
- /// \headerfile <immintrin.h>
30
- ///
31
- /// \code
32
- /// void _tile_tdpbf16ps (__tile dst, __tile a, __tile b)
33
- /// \endcode
34
- ///
35
- /// \code{.operation}
36
- /// FOR m := 0 TO dst.rows - 1
37
- /// tmp := dst.row[m]
38
- /// FOR k := 0 TO (a.colsb / 4) - 1
39
- /// FOR n := 0 TO (dst.colsb / 4) - 1
40
- /// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+0]) *
41
- /// FP32(b.row[k].bf16[2*n+0])
42
- /// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+1]) *
43
- /// FP32(b.row[k].bf16[2*n+1])
44
- /// ENDFOR
45
- /// ENDFOR
46
- /// write_row_and_zero(dst, m, tmp, dst.colsb)
47
- /// ENDFOR
48
- /// zero_upper_rows(dst, dst.rows)
49
- /// zero_tileconfig_start()
50
- /// \endcode
51
- ///
52
- /// This intrinsic corresponds to the \c TTDPBF16PS instruction.
53
- ///
54
- /// \param dst
55
- /// The destination tile. Max size is 1024 Bytes.
56
- /// \param a
57
- /// The 1st source tile. Max size is 1024 Bytes.
58
- /// \param b
59
- /// The 2nd source tile. Max size is 1024 Bytes.
60
- #define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b))
61
-
62
- /// This is internal intrinsic. C/C++ user should avoid calling it directly.
63
- static __inline__ _tile1024i __DEFAULT_FN_ATTRS
64
- _tile_tdpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k,
65
- _tile1024i dst, _tile1024i src1, _tile1024i src2) {
66
- return __builtin_ia32_ttdpbf16ps_internal(m, n, k, dst, src1, src2);
67
- }
68
-
69
- /// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
70
- /// tiles src0 and src1, accumulating the intermediate single-precision
71
- /// (32-bit) floating-point elements with elements in "dst", and store the
72
- /// 32-bit result back to tile "dst".
73
- ///
74
- /// \headerfile <immintrin.h>
75
- ///
76
- /// This intrinsic corresponds to the <c> TTDPBF16PS </c> instruction.
77
- ///
78
- /// \param dst
79
- /// The destination tile. Max size is 1024 Bytes.
80
- /// \param src0
81
- /// The 1st source tile. Max size is 1024 Bytes.
82
- /// \param src1
83
- /// The 2nd source tile. Max size is 1024 Bytes.
84
- __DEFAULT_FN_ATTRS
85
- static __inline__ void __tile_tdpbf16ps(__tile1024i *dst, __tile1024i src0,
86
- __tile1024i src1) {
87
- dst->tile = _tile_tdpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile,
88
- src0.tile, src1.tile);
89
- }
90
-
91
- #undef __DEFAULT_FN_ATTRS
92
-
93
- #endif /* __x86_64__ */
94
- #endif /* __AMX_BF16TRANSPOSEINTRIN_H */