minimap2 0.2.25.0 → 0.2.25.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (123) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/minimap2/Makefile +6 -2
  4. data/ext/minimap2/NEWS.md +38 -0
  5. data/ext/minimap2/README.md +9 -3
  6. data/ext/minimap2/align.c +5 -3
  7. data/ext/minimap2/cookbook.md +2 -2
  8. data/ext/minimap2/format.c +7 -4
  9. data/ext/minimap2/kalloc.c +20 -1
  10. data/ext/minimap2/kalloc.h +13 -2
  11. data/ext/minimap2/ksw2.h +1 -0
  12. data/ext/minimap2/ksw2_extd2_sse.c +1 -1
  13. data/ext/minimap2/ksw2_exts2_sse.c +79 -40
  14. data/ext/minimap2/ksw2_extz2_sse.c +1 -1
  15. data/ext/minimap2/lchain.c +15 -16
  16. data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
  17. data/ext/minimap2/lib/simde/COPYING +20 -0
  18. data/ext/minimap2/lib/simde/README.md +333 -0
  19. data/ext/minimap2/lib/simde/amalgamate.py +58 -0
  20. data/ext/minimap2/lib/simde/meson.build +33 -0
  21. data/ext/minimap2/lib/simde/netlify.toml +20 -0
  22. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
  23. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
  24. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
  25. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
  26. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
  27. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
  28. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
  29. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
  30. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
  31. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
  32. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
  33. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
  34. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
  35. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
  36. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
  37. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
  38. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
  39. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
  40. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
  41. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
  42. data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
  43. data/ext/minimap2/lib/simde/simde/check.h +267 -0
  44. data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
  45. data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
  46. data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
  47. data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
  48. data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
  49. data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
  50. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
  51. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
  52. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
  53. data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
  54. data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
  55. data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
  56. data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
  57. data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
  58. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
  59. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
  60. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
  61. data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
  62. data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
  63. data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
  64. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
  65. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
  66. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
  67. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
  68. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
  69. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
  70. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
  71. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
  72. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
  73. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
  74. data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
  75. data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
  76. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
  77. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
  78. data/ext/minimap2/lib/simde/test/meson.build +64 -0
  79. data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
  80. data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
  81. data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
  82. data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
  83. data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
  84. data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
  85. data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
  86. data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
  87. data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
  88. data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
  89. data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
  90. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
  91. data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
  92. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
  93. data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
  94. data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
  95. data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
  96. data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
  97. data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
  98. data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
  99. data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
  100. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
  101. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
  102. data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
  103. data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
  104. data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
  105. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
  106. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
  107. data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
  108. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
  109. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
  110. data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
  111. data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
  112. data/ext/minimap2/main.c +13 -6
  113. data/ext/minimap2/map.c +0 -5
  114. data/ext/minimap2/minimap.h +40 -31
  115. data/ext/minimap2/minimap2.1 +19 -5
  116. data/ext/minimap2/misc/paftools.js +545 -24
  117. data/ext/minimap2/options.c +1 -1
  118. data/ext/minimap2/pyproject.toml +2 -0
  119. data/ext/minimap2/python/mappy.pyx +3 -1
  120. data/ext/minimap2/seed.c +1 -1
  121. data/ext/minimap2/setup.py +32 -22
  122. data/lib/minimap2/version.rb +1 -1
  123. metadata +100 -3
@@ -0,0 +1,1545 @@
1
+ /* Permission is hereby granted, free of charge, to any person
2
+ * obtaining a copy of this software and associated documentation
3
+ * files (the "Software"), to deal in the Software without
4
+ * restriction, including without limitation the rights to use, copy,
5
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
6
+ * of the Software, and to permit persons to whom the Software is
7
+ * furnished to do so, subject to the following conditions:
8
+ *
9
+ * The above copyright notice and this permission notice shall be
10
+ * included in all copies or substantial portions of the Software.
11
+ *
12
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
16
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
17
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ * SOFTWARE.
20
+ *
21
+ * Copyright:
22
+ * 2020 Evan Nemerson <evan@nemerson.com>
23
+ */
24
+
25
+ #define SIMDE_TESTS_CURRENT_ISAX svml
26
+ #include <test/x86/test-x86-internal.h>
27
+ #include <simde/x86/svml.h>
28
+
29
+ #if defined(SIMDE_SVML_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
30
+
31
+ static MunitResult
32
+ test_simde_mm_div_epi8(const MunitParameter params[], void* data) {
33
+ (void) params;
34
+ (void) data;
35
+
36
+ const struct {
37
+ simde__m128i a;
38
+ simde__m128i b;
39
+ simde__m128i r;
40
+ } test_vec[8] = {
41
+ { simde_mm_set_epi8(INT8_C( 80), INT8_C( 26), INT8_C( -96), INT8_C( 63),
42
+ INT8_C( 84), INT8_C( 0), INT8_C( 86), INT8_C( -92),
43
+ INT8_C( 19), INT8_C( 73), INT8_C( 49), INT8_C( 84),
44
+ INT8_C( 93), INT8_C( -26), INT8_C( 48), INT8_C( -85)),
45
+ simde_mm_set_epi8(INT8_C( 4), INT8_C( 4), INT8_C( 3), INT8_C( 27),
46
+ INT8_C( 44), INT8_C( 48), INT8_C( 3), INT8_C( 53),
47
+ INT8_C( 11), INT8_C( 6), INT8_C( 2), INT8_C( 14),
48
+ INT8_C( 89), INT8_C( 10), INT8_C( 3), INT8_C( 1)),
49
+ simde_mm_set_epi8(INT8_C( 20), INT8_C( 6), INT8_C( -32), INT8_C( 2),
50
+ INT8_C( 1), INT8_C( 0), INT8_C( 28), INT8_C( -1),
51
+ INT8_C( 1), INT8_C( 12), INT8_C( 24), INT8_C( 6),
52
+ INT8_C( 1), INT8_C( -2), INT8_C( 16), INT8_C( -85)) },
53
+ { simde_mm_set_epi8(INT8_C( -53), INT8_C(-123), INT8_C( 83), INT8_C( 82),
54
+ INT8_C( -17), INT8_C( 32), INT8_C( -32), INT8_C( 68),
55
+ INT8_C( -20), INT8_C( 5), INT8_C( -1), INT8_C( -23),
56
+ INT8_C( 118), INT8_C(-101), INT8_C( 53), INT8_C( 4)),
57
+ simde_mm_set_epi8(INT8_C( 9), INT8_C( 1), INT8_C( -68), INT8_C( 1),
58
+ INT8_C( 1), INT8_C( 1), INT8_C( 22), INT8_C( 17),
59
+ INT8_C( 4), INT8_C( 8), INT8_C( 6), INT8_C( 10),
60
+ INT8_C( 55), INT8_C( 3), INT8_C( 14), INT8_C( 14)),
61
+ simde_mm_set_epi8(INT8_C( -5), INT8_C(-123), INT8_C( -1), INT8_C( 82),
62
+ INT8_C( -17), INT8_C( 32), INT8_C( -1), INT8_C( 4),
63
+ INT8_C( -5), INT8_C( 0), INT8_C( 0), INT8_C( -2),
64
+ INT8_C( 2), INT8_C( -33), INT8_C( 3), INT8_C( 0)) },
65
+ { simde_mm_set_epi8(INT8_C( 122), INT8_C( 103), INT8_C( 28), INT8_C(-102),
66
+ INT8_C( -41), INT8_C(-105), INT8_C( -14), INT8_C(-120),
67
+ INT8_C( -71), INT8_C( 84), INT8_C( 90), INT8_C( 8),
68
+ INT8_C( 84), INT8_C( 120), INT8_C( -59), INT8_C( 9)),
69
+ simde_mm_set_epi8(INT8_C( 59), INT8_C( -21), INT8_C( 22), INT8_C( 53),
70
+ INT8_C( 22), INT8_C( 3), INT8_C( 5), INT8_C( 6),
71
+ INT8_C( 2), INT8_C( 21), INT8_C( 3), INT8_C( 3),
72
+ INT8_C( 2), INT8_C( 10), INT8_C( 10), INT8_C( 3)),
73
+ simde_mm_set_epi8(INT8_C( 2), INT8_C( -4), INT8_C( 1), INT8_C( -1),
74
+ INT8_C( -1), INT8_C( -35), INT8_C( -2), INT8_C( -20),
75
+ INT8_C( -35), INT8_C( 4), INT8_C( 30), INT8_C( 2),
76
+ INT8_C( 42), INT8_C( 12), INT8_C( -5), INT8_C( 3)) },
77
+ { simde_mm_set_epi8(INT8_C( 121), INT8_C( -15), INT8_C(-123), INT8_C( 80),
78
+ INT8_C( 43), INT8_C( 58), INT8_C( 119), INT8_C( -49),
79
+ INT8_C( 107), INT8_C( -94), INT8_C( 51), INT8_C(-118),
80
+ INT8_C( 68), INT8_C( 112), INT8_C( -56), INT8_C(-103)),
81
+ simde_mm_set_epi8(INT8_C( 44), INT8_C( 13), INT8_C( 14), INT8_C( 8),
82
+ INT8_C( -24), INT8_C( 77), INT8_C( 118), INT8_C( 21),
83
+ INT8_C( 1), INT8_C( -34), INT8_C( 2), INT8_C( 29),
84
+ INT8_C( 14), INT8_C( 53), INT8_C( 1), INT8_C( 54)),
85
+ simde_mm_set_epi8(INT8_C( 2), INT8_C( -1), INT8_C( -8), INT8_C( 10),
86
+ INT8_C( -1), INT8_C( 0), INT8_C( 1), INT8_C( -2),
87
+ INT8_C( 107), INT8_C( 2), INT8_C( 25), INT8_C( -4),
88
+ INT8_C( 4), INT8_C( 2), INT8_C( -56), INT8_C( -1)) },
89
+ { simde_mm_set_epi8(INT8_C( -42), INT8_C( 14), INT8_C(-113), INT8_C( 62),
90
+ INT8_C( -34), INT8_C( -16), INT8_C(-103), INT8_C(-122),
91
+ INT8_C(-128), INT8_C( -77), INT8_C( -15), INT8_C( -38),
92
+ INT8_C( 87), INT8_C( -72), INT8_C( 57), INT8_C( -40)),
93
+ simde_mm_set_epi8(INT8_C( 30), INT8_C( 124), INT8_C( -94), INT8_C( 4),
94
+ INT8_C( 46), INT8_C( 11), INT8_C( 3), INT8_C( -54),
95
+ INT8_C( 11), INT8_C( 8), INT8_C(-114), INT8_C( 3),
96
+ INT8_C( 6), INT8_C( 1), INT8_C(-121), INT8_C( 4)),
97
+ simde_mm_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( 1), INT8_C( 15),
98
+ INT8_C( 0), INT8_C( -1), INT8_C( -34), INT8_C( 2),
99
+ INT8_C( -11), INT8_C( -9), INT8_C( 0), INT8_C( -12),
100
+ INT8_C( 14), INT8_C( -72), INT8_C( 0), INT8_C( -10)) },
101
+ { simde_mm_set_epi8(INT8_C( -13), INT8_C( -82), INT8_C( 64), INT8_C( -67),
102
+ INT8_C(-120), INT8_C( 26), INT8_C(-105), INT8_C( 40),
103
+ INT8_C( 59), INT8_C( -83), INT8_C( 64), INT8_C( -39),
104
+ INT8_C( 99), INT8_C( -73), INT8_C( -97), INT8_C( -1)),
105
+ simde_mm_set_epi8(INT8_C( -27), INT8_C( 114), INT8_C(-109), INT8_C( 8),
106
+ INT8_C( 12), INT8_C( 4), INT8_C( 2), INT8_C( 2),
107
+ INT8_C( 3), INT8_C( 11), INT8_C( 3), INT8_C( 11),
108
+ INT8_C( 82), INT8_C( 14), INT8_C( 120), INT8_C(-107)),
109
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -8),
110
+ INT8_C( -10), INT8_C( 6), INT8_C( -52), INT8_C( 20),
111
+ INT8_C( 19), INT8_C( -7), INT8_C( 21), INT8_C( -3),
112
+ INT8_C( 1), INT8_C( -5), INT8_C( 0), INT8_C( 0)) },
113
+ { simde_mm_set_epi8(INT8_C( -57), INT8_C( 53), INT8_C( 114), INT8_C( -35),
114
+ INT8_C( -22), INT8_C( -59), INT8_C( 52), INT8_C( 113),
115
+ INT8_C( 25), INT8_C( 16), INT8_C( -8), INT8_C( -67),
116
+ INT8_C( 7), INT8_C( -33), INT8_C( 51), INT8_C( 118)),
117
+ simde_mm_set_epi8(INT8_C( 14), INT8_C( 15), INT8_C( 24), INT8_C( 83),
118
+ INT8_C( 4), INT8_C( 45), INT8_C( 4), INT8_C( 34),
119
+ INT8_C( 9), INT8_C( 19), INT8_C( 4), INT8_C( 11),
120
+ INT8_C( 8), INT8_C( 14), INT8_C( 102), INT8_C( -88)),
121
+ simde_mm_set_epi8(INT8_C( -4), INT8_C( 3), INT8_C( 4), INT8_C( 0),
122
+ INT8_C( -5), INT8_C( -1), INT8_C( 13), INT8_C( 3),
123
+ INT8_C( 2), INT8_C( 0), INT8_C( -2), INT8_C( -6),
124
+ INT8_C( 0), INT8_C( -2), INT8_C( 0), INT8_C( -1)) },
125
+ { simde_mm_set_epi8(INT8_C( -69), INT8_C( 57), INT8_C( 3), INT8_C( 127),
126
+ INT8_C( -28), INT8_C( -47), INT8_C(-127), INT8_C( -14),
127
+ INT8_C( -28), INT8_C( 68), INT8_C( -27), INT8_C( -44),
128
+ INT8_C( -16), INT8_C( 1), INT8_C( -44), INT8_C( 112)),
129
+ simde_mm_set_epi8(INT8_C( 57), INT8_C( 1), INT8_C( -43), INT8_C( 103),
130
+ INT8_C( 4), INT8_C( 1), INT8_C( 2), INT8_C( 96),
131
+ INT8_C( 9), INT8_C( 57), INT8_C( 54), INT8_C( 105),
132
+ INT8_C( 1), INT8_C( 31), INT8_C( -85), INT8_C( 104)),
133
+ simde_mm_set_epi8(INT8_C( -1), INT8_C( 57), INT8_C( 0), INT8_C( 1),
134
+ INT8_C( -7), INT8_C( -47), INT8_C( -63), INT8_C( 0),
135
+ INT8_C( -3), INT8_C( 1), INT8_C( 0), INT8_C( 0),
136
+ INT8_C( -16), INT8_C( 0), INT8_C( 0), INT8_C( 1)) }
137
+ };
138
+
139
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
140
+ simde__m128i r = simde_mm_div_epi8(test_vec[i].a, test_vec[i].b);
141
+ simde_assert_m128i_i8(r, ==, test_vec[i].r);
142
+ }
143
+
144
+ return MUNIT_OK;
145
+ }
146
+
147
+ static MunitResult
148
+ test_simde_mm_div_epi16(const MunitParameter params[], void* data) {
149
+ (void) params;
150
+ (void) data;
151
+
152
+ const struct {
153
+ simde__m128i a;
154
+ simde__m128i b;
155
+ simde__m128i r;
156
+ } test_vec[8] = {
157
+ { simde_mm_set_epi16(INT16_C( 7569), INT16_C(-21774), INT16_C( 5125), INT16_C( 21356),
158
+ INT16_C( 9222), INT16_C( 7511), INT16_C(-21561), INT16_C( 29102)),
159
+ simde_mm_set_epi16(INT16_C( 6450), INT16_C( -2), INT16_C( 190), INT16_C( -44),
160
+ INT16_C( -3), INT16_C( -9), INT16_C( -911), INT16_C( 3)),
161
+ simde_mm_set_epi16(INT16_C( 1), INT16_C( 10887), INT16_C( 26), INT16_C( -485),
162
+ INT16_C( -3074), INT16_C( -834), INT16_C( 23), INT16_C( 9700)) },
163
+ { simde_mm_set_epi16(INT16_C( 14790), INT16_C(-17845), INT16_C( 12471), INT16_C( 16666),
164
+ INT16_C( -4541), INT16_C( 18926), INT16_C( 4112), INT16_C( 26905)),
165
+ simde_mm_set_epi16(INT16_C( -1), INT16_C( -8), INT16_C( 15), INT16_C( -16),
166
+ INT16_C( -1), INT16_C( -28), INT16_C( -3387), INT16_C( -5)),
167
+ simde_mm_set_epi16(INT16_C(-14790), INT16_C( 2230), INT16_C( 831), INT16_C( -1041),
168
+ INT16_C( 4541), INT16_C( -675), INT16_C( -1), INT16_C( -5381)) },
169
+ { simde_mm_set_epi16(INT16_C( 24700), INT16_C( 18820), INT16_C( -6493), INT16_C(-11852),
170
+ INT16_C( 7293), INT16_C( 18330), INT16_C(-13423), INT16_C( 30834)),
171
+ simde_mm_set_epi16(INT16_C( 9411), INT16_C( -2), INT16_C( -2), INT16_C( -10),
172
+ INT16_C( 942), INT16_C( 5062), INT16_C( 3712), INT16_C(-24297)),
173
+ simde_mm_set_epi16(INT16_C( 2), INT16_C( -9410), INT16_C( 3246), INT16_C( 1185),
174
+ INT16_C( 7), INT16_C( 3), INT16_C( -3), INT16_C( -1)) },
175
+ { simde_mm_set_epi16(INT16_C( -8188), INT16_C( -5752), INT16_C( -6400), INT16_C(-18754),
176
+ INT16_C( 26203), INT16_C( 11990), INT16_C( 27655), INT16_C( 30479)),
177
+ simde_mm_set_epi16(INT16_C( -2891), INT16_C( -9), INT16_C( 1), INT16_C( 24),
178
+ INT16_C( 1410), INT16_C( -7348), INT16_C( 56), INT16_C( -8)),
179
+ simde_mm_set_epi16(INT16_C( 2), INT16_C( 639), INT16_C( -6400), INT16_C( -781),
180
+ INT16_C( 18), INT16_C( -1), INT16_C( 493), INT16_C( -3809)) },
181
+ { simde_mm_set_epi16(INT16_C( 27464), INT16_C( 30742), INT16_C(-17463), INT16_C( 5584),
182
+ INT16_C( 16882), INT16_C(-13221), INT16_C(-30009), INT16_C( 27529)),
183
+ simde_mm_set_epi16(INT16_C( 92), INT16_C( -245), INT16_C( 87), INT16_C( 2027),
184
+ INT16_C( -218), INT16_C( 181), INT16_C( 1), INT16_C( -448)),
185
+ simde_mm_set_epi16(INT16_C( 298), INT16_C( -125), INT16_C( -200), INT16_C( 2),
186
+ INT16_C( -77), INT16_C( -73), INT16_C(-30009), INT16_C( -61)) },
187
+ { simde_mm_set_epi16(INT16_C(-28312), INT16_C( -6464), INT16_C( 7438), INT16_C(-24771),
188
+ INT16_C( 27969), INT16_C( 18884), INT16_C( 17235), INT16_C( 31019)),
189
+ simde_mm_set_epi16(INT16_C( -3989), INT16_C( 8), INT16_C( -1), INT16_C( -27),
190
+ INT16_C( 53), INT16_C( -58), INT16_C( 2274), INT16_C( -9)),
191
+ simde_mm_set_epi16(INT16_C( 7), INT16_C( -808), INT16_C( -7438), INT16_C( 917),
192
+ INT16_C( 527), INT16_C( -325), INT16_C( 7), INT16_C( -3446)) },
193
+ { simde_mm_set_epi16(INT16_C(-31090), INT16_C( 20346), INT16_C( 14276), INT16_C(-27653),
194
+ INT16_C( 19203), INT16_C(-24798), INT16_C(-17826), INT16_C( 16379)),
195
+ simde_mm_set_epi16(INT16_C( 3), INT16_C( 8), INT16_C( -60), INT16_C( 14),
196
+ INT16_C( -435), INT16_C( -1), INT16_C( -395), INT16_C( -1532)),
197
+ simde_mm_set_epi16(INT16_C(-10363), INT16_C( 2543), INT16_C( -237), INT16_C( -1975),
198
+ INT16_C( -44), INT16_C( 24798), INT16_C( 45), INT16_C( -10)) },
199
+ { simde_mm_set_epi16(INT16_C( -4012), INT16_C( 17981), INT16_C( 26341), INT16_C(-11451),
200
+ INT16_C(-22746), INT16_C(-13246), INT16_C( -6273), INT16_C( 15936)),
201
+ simde_mm_set_epi16(INT16_C( -5), INT16_C( 325), INT16_C( 10), INT16_C( -2018),
202
+ INT16_C(-26192), INT16_C( -15), INT16_C( -29), INT16_C( 2009)),
203
+ simde_mm_set_epi16(INT16_C( 802), INT16_C( 55), INT16_C( 2634), INT16_C( 5),
204
+ INT16_C( 0), INT16_C( 883), INT16_C( 216), INT16_C( 7)) }
205
+ };
206
+
207
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
208
+ simde__m128i r = simde_mm_div_epi16(test_vec[i].a, test_vec[i].b);
209
+ simde_assert_m128i_i16(r, ==, test_vec[i].r);
210
+ }
211
+
212
+ return MUNIT_OK;
213
+ }
214
+
215
+ static MunitResult
216
+ test_simde_mm_div_epi32(const MunitParameter params[], void* data) {
217
+ (void) params;
218
+ (void) data;
219
+
220
+ const struct {
221
+ simde__m128i a;
222
+ simde__m128i b;
223
+ simde__m128i r;
224
+ } test_vec[8] = {
225
+ { simde_mm_set_epi32(INT32_C(-2101284579), INT32_C( 1788896628), INT32_C( 742774378), INT32_C( -512831871)),
226
+ simde_mm_set_epi32(INT32_C( -173), INT32_C( -20613654), INT32_C( 28772), INT32_C( 118)),
227
+ simde_mm_set_epi32(INT32_C( 12146153), INT32_C( -86), INT32_C( 25815), INT32_C( -4346032)) },
228
+ { simde_mm_set_epi32(INT32_C( 505370509), INT32_C( -307733024), INT32_C( -192358019), INT32_C( -299231491)),
229
+ simde_mm_set_epi32(INT32_C( 34268), INT32_C( -6), INT32_C( 6850), INT32_C( 1214711)),
230
+ simde_mm_set_epi32(INT32_C( 14747), INT32_C( 51288837), INT32_C( -28081), INT32_C( -246)) },
231
+ { simde_mm_set_epi32(INT32_C(-1154189768), INT32_C( 94538029), INT32_C( 423884488), INT32_C( 1619435962)),
232
+ simde_mm_set_epi32(INT32_C( -565), INT32_C( -128659), INT32_C( -59), INT32_C( -208397178)),
233
+ simde_mm_set_epi32(INT32_C( 2042813), INT32_C( -734), INT32_C( -7184482), INT32_C( -7)) },
234
+ { simde_mm_set_epi32(INT32_C(-1938127942), INT32_C( -553846699), INT32_C( 685427224), INT32_C( -86375451)),
235
+ simde_mm_set_epi32(INT32_C( 1223981911), INT32_C( -108113), INT32_C( 3), INT32_C( -3698)),
236
+ simde_mm_set_epi32(INT32_C( -1), INT32_C( 5122), INT32_C( 228475741), INT32_C( 23357)) },
237
+ { simde_mm_set_epi32(INT32_C(-1690889220), INT32_C( -667367235), INT32_C( 1220206139), INT32_C(-1217543723)),
238
+ simde_mm_set_epi32(INT32_C( 299), INT32_C( 7724), INT32_C( -1), INT32_C( 173051558)),
239
+ simde_mm_set_epi32(INT32_C( -5655147), INT32_C( -86401), INT32_C(-1220206139), INT32_C( -7)) },
240
+ { simde_mm_set_epi32(INT32_C( 93323521), INT32_C( 1996592708), INT32_C( 2087305602), INT32_C( 27568495)),
241
+ simde_mm_set_epi32(INT32_C( -2), INT32_C( 15626723), INT32_C( 1507), INT32_C( 5412)),
242
+ simde_mm_set_epi32(INT32_C( -46661760), INT32_C( 127), INT32_C( 1385073), INT32_C( 5093)) },
243
+ { simde_mm_set_epi32(INT32_C( 1825211631), INT32_C( 1750705004), INT32_C( 1935103134), INT32_C(-1042289581)),
244
+ simde_mm_set_epi32(INT32_C( -20153), INT32_C( -109992928), INT32_C( -4), INT32_C( 3)),
245
+ simde_mm_set_epi32(INT32_C( -90567), INT32_C( -15), INT32_C( -483775783), INT32_C( -347429860)) },
246
+ { simde_mm_set_epi32(INT32_C( -836927167), INT32_C(-2031963629), INT32_C( 1244477192), INT32_C( 662038781)),
247
+ simde_mm_set_epi32(INT32_C( -226), INT32_C( 320), INT32_C( 17085036), INT32_C( -883)),
248
+ simde_mm_set_epi32(INT32_C( 3703217), INT32_C( -6349886), INT32_C( 72), INT32_C( -749760)) }
249
+ };
250
+
251
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
252
+ simde__m128i r = simde_mm_div_epi32(test_vec[i].a, test_vec[i].b);
253
+ simde_assert_m128i_i32(r, ==, test_vec[i].r);
254
+ }
255
+
256
+ return MUNIT_OK;
257
+ }
258
+
259
+ static MunitResult
260
+ test_simde_mm_div_epi64(const MunitParameter params[], void* data) {
261
+ (void) params;
262
+ (void) data;
263
+
264
+ const struct {
265
+ simde__m128i a;
266
+ simde__m128i b;
267
+ simde__m128i r;
268
+ } test_vec[8] = {
269
+ { simde_mm_set_epi64x(INT64_C(-8762915026342605517), INT64_C( 6327019035084041530)),
270
+ simde_mm_set_epi64x(INT64_C( 1040172869250133860), INT64_C( -3393154419)),
271
+ simde_mm_set_epi64x(INT64_C( -8), INT64_C( -1864642233)) },
272
+ { simde_mm_set_epi64x(INT64_C( 7086115847005357544), INT64_C( 7169462887889416879)),
273
+ simde_mm_set_epi64x(INT64_C( -402272), INT64_C( -6362438)),
274
+ simde_mm_set_epi64x(INT64_C( -17615235082246), INT64_C( -1126842082844)) },
275
+ { simde_mm_set_epi64x(INT64_C( 3227829673356714047), INT64_C( 5122063021698718134)),
276
+ simde_mm_set_epi64x(INT64_C( 290796), INT64_C( -647054)),
277
+ simde_mm_set_epi64x(INT64_C( 11099979619240), INT64_C( -7915974588981)) },
278
+ { simde_mm_set_epi64x(INT64_C( -712959233727550094), INT64_C( 8175697730423622547)),
279
+ simde_mm_set_epi64x(INT64_C( -114108996), INT64_C( 727492806)),
280
+ simde_mm_set_epi64x(INT64_C( 6248054568), INT64_C( 11238183612)) },
281
+ { simde_mm_set_epi64x(INT64_C( 7475816922473172733), INT64_C(-1631503293395556188)),
282
+ simde_mm_set_epi64x(INT64_C( 5), INT64_C( -24770378177)),
283
+ simde_mm_set_epi64x(INT64_C( 1495163384494634546), INT64_C( 65865094)) },
284
+ { simde_mm_set_epi64x(INT64_C(-7220293124938945390), INT64_C( 5345879758546587877)),
285
+ simde_mm_set_epi64x(INT64_C( -716), INT64_C( 1692902)),
286
+ simde_mm_set_epi64x(INT64_C( 10084208275054393), INT64_C( 3157819979270)) },
287
+ { simde_mm_set_epi64x(INT64_C(-2100788141468237692), INT64_C( 1869244361192362281)),
288
+ simde_mm_set_epi64x(INT64_C( -1), INT64_C( 27867346395)),
289
+ simde_mm_set_epi64x(INT64_C( 2100788141468237692), INT64_C( 67076510)) },
290
+ { simde_mm_set_epi64x(INT64_C(-4218200756000910912), INT64_C( 8429274423139369867)),
291
+ simde_mm_set_epi64x(INT64_C( 25), INT64_C( -63869567732)),
292
+ simde_mm_set_epi64x(INT64_C( -168728030240036436), INT64_C( -131976381)) }
293
+ };
294
+
295
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
296
+ simde__m128i r = simde_mm_div_epi64(test_vec[i].a, test_vec[i].b);
297
+ simde_assert_m128i_i64(r, ==, test_vec[i].r);
298
+ }
299
+
300
+ return MUNIT_OK;
301
+ }
302
+
303
+ static MunitResult
304
+ test_simde_mm_div_epu8(const MunitParameter params[], void* data) {
305
+ (void) params;
306
+ (void) data;
307
+
308
+ const struct {
309
+ simde__m128i a;
310
+ simde__m128i b;
311
+ simde__m128i r;
312
+ } test_vec[8] = {
313
+ { simde_x_mm_set_epu8(UINT8_C( 15), UINT8_C( 75), UINT8_C(224), UINT8_C(156),
314
+ UINT8_C( 1), UINT8_C( 34), UINT8_C( 35), UINT8_C(127),
315
+ UINT8_C(127), UINT8_C(120), UINT8_C(177), UINT8_C( 31),
316
+ UINT8_C(136), UINT8_C(180), UINT8_C(141), UINT8_C(206)),
317
+ simde_x_mm_set_epu8(UINT8_C( 45), UINT8_C( 8), UINT8_C( 9), UINT8_C( 13),
318
+ UINT8_C(246), UINT8_C( 1), UINT8_C( 15), UINT8_C( 2),
319
+ UINT8_C(152), UINT8_C( 45), UINT8_C( 56), UINT8_C( 26),
320
+ UINT8_C( 1), UINT8_C( 1), UINT8_C( 16), UINT8_C( 15)),
321
+ simde_x_mm_set_epu8(UINT8_C( 0), UINT8_C( 9), UINT8_C( 24), UINT8_C( 12),
322
+ UINT8_C( 0), UINT8_C( 34), UINT8_C( 2), UINT8_C( 63),
323
+ UINT8_C( 0), UINT8_C( 2), UINT8_C( 3), UINT8_C( 1),
324
+ UINT8_C(136), UINT8_C(180), UINT8_C( 8), UINT8_C( 13)) },
325
+ { simde_x_mm_set_epu8(UINT8_C( 75), UINT8_C(233), UINT8_C(186), UINT8_C(216),
326
+ UINT8_C(224), UINT8_C( 45), UINT8_C( 40), UINT8_C(134),
327
+ UINT8_C( 1), UINT8_C( 47), UINT8_C( 23), UINT8_C(119),
328
+ UINT8_C(229), UINT8_C(107), UINT8_C(175), UINT8_C( 79)),
329
+ simde_x_mm_set_epu8(UINT8_C( 9), UINT8_C( 12), UINT8_C( 46), UINT8_C( 39),
330
+ UINT8_C( 11), UINT8_C( 15), UINT8_C( 32), UINT8_C( 13),
331
+ UINT8_C( 21), UINT8_C(239), UINT8_C( 5), UINT8_C( 2),
332
+ UINT8_C( 1), UINT8_C( 26), UINT8_C(182), UINT8_C( 29)),
333
+ simde_x_mm_set_epu8(UINT8_C( 8), UINT8_C( 19), UINT8_C( 4), UINT8_C( 5),
334
+ UINT8_C( 20), UINT8_C( 3), UINT8_C( 1), UINT8_C( 10),
335
+ UINT8_C( 0), UINT8_C( 0), UINT8_C( 4), UINT8_C( 59),
336
+ UINT8_C(229), UINT8_C( 4), UINT8_C( 0), UINT8_C( 2)) },
337
+ { simde_x_mm_set_epu8(UINT8_C( 75), UINT8_C(109), UINT8_C( 28), UINT8_C(204),
338
+ UINT8_C( 53), UINT8_C(255), UINT8_C(143), UINT8_C(254),
339
+ UINT8_C( 82), UINT8_C(109), UINT8_C(205), UINT8_C( 21),
340
+ UINT8_C( 16), UINT8_C( 18), UINT8_C(221), UINT8_C(119)),
341
+ simde_x_mm_set_epu8(UINT8_C(210), UINT8_C(108), UINT8_C( 89), UINT8_C( 21),
342
+ UINT8_C(154), UINT8_C( 52), UINT8_C( 17), UINT8_C( 8),
343
+ UINT8_C( 90), UINT8_C( 6), UINT8_C( 1), UINT8_C( 5),
344
+ UINT8_C( 1), UINT8_C(201), UINT8_C( 23), UINT8_C( 2)),
345
+ simde_x_mm_set_epu8(UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 9),
346
+ UINT8_C( 0), UINT8_C( 4), UINT8_C( 8), UINT8_C( 31),
347
+ UINT8_C( 0), UINT8_C( 18), UINT8_C(205), UINT8_C( 4),
348
+ UINT8_C( 16), UINT8_C( 0), UINT8_C( 9), UINT8_C( 59)) },
349
+ { simde_x_mm_set_epu8(UINT8_C( 23), UINT8_C(229), UINT8_C(200), UINT8_C( 62),
350
+ UINT8_C(169), UINT8_C(116), UINT8_C(131), UINT8_C(205),
351
+ UINT8_C(117), UINT8_C( 49), UINT8_C(130), UINT8_C( 21),
352
+ UINT8_C( 91), UINT8_C(138), UINT8_C(101), UINT8_C(205)),
353
+ simde_x_mm_set_epu8(UINT8_C( 43), UINT8_C( 65), UINT8_C( 28), UINT8_C( 61),
354
+ UINT8_C( 12), UINT8_C( 4), UINT8_C( 37), UINT8_C( 4),
355
+ UINT8_C(237), UINT8_C( 25), UINT8_C( 38), UINT8_C( 15),
356
+ UINT8_C( 9), UINT8_C( 6), UINT8_C(140), UINT8_C( 10)),
357
+ simde_x_mm_set_epu8(UINT8_C( 0), UINT8_C( 3), UINT8_C( 7), UINT8_C( 1),
358
+ UINT8_C( 14), UINT8_C( 29), UINT8_C( 3), UINT8_C( 51),
359
+ UINT8_C( 0), UINT8_C( 1), UINT8_C( 3), UINT8_C( 1),
360
+ UINT8_C( 10), UINT8_C( 23), UINT8_C( 0), UINT8_C( 20)) },
361
+ { simde_x_mm_set_epu8(UINT8_C(140), UINT8_C(170), UINT8_C(150), UINT8_C(208),
362
+ UINT8_C( 64), UINT8_C( 6), UINT8_C(116), UINT8_C(102),
363
+ UINT8_C(200), UINT8_C(110), UINT8_C(136), UINT8_C(125),
364
+ UINT8_C(201), UINT8_C( 22), UINT8_C(166), UINT8_C(235)),
365
+ simde_x_mm_set_epu8(UINT8_C( 1), UINT8_C( 7), UINT8_C( 23), UINT8_C( 2),
366
+ UINT8_C( 12), UINT8_C(103), UINT8_C( 24), UINT8_C( 18),
367
+ UINT8_C(234), UINT8_C( 11), UINT8_C( 6), UINT8_C( 2),
368
+ UINT8_C( 5), UINT8_C( 34), UINT8_C( 60), UINT8_C( 13)),
369
+ simde_x_mm_set_epu8(UINT8_C(140), UINT8_C( 24), UINT8_C( 6), UINT8_C(104),
370
+ UINT8_C( 5), UINT8_C( 0), UINT8_C( 4), UINT8_C( 5),
371
+ UINT8_C( 0), UINT8_C( 10), UINT8_C( 22), UINT8_C( 62),
372
+ UINT8_C( 40), UINT8_C( 0), UINT8_C( 2), UINT8_C( 18)) },
373
+ { simde_x_mm_set_epu8(UINT8_C(143), UINT8_C( 77), UINT8_C(114), UINT8_C( 66),
374
+ UINT8_C( 82), UINT8_C(133), UINT8_C( 93), UINT8_C(122),
375
+ UINT8_C(225), UINT8_C(230), UINT8_C(202), UINT8_C(147),
376
+ UINT8_C(170), UINT8_C(252), UINT8_C(163), UINT8_C(161)),
377
+ simde_x_mm_set_epu8(UINT8_C( 5), UINT8_C( 8), UINT8_C( 15), UINT8_C( 99),
378
+ UINT8_C( 10), UINT8_C( 4), UINT8_C( 1), UINT8_C( 1),
379
+ UINT8_C( 15), UINT8_C( 21), UINT8_C( 3), UINT8_C( 1),
380
+ UINT8_C( 2), UINT8_C( 18), UINT8_C( 18), UINT8_C( 2)),
381
+ simde_x_mm_set_epu8(UINT8_C( 28), UINT8_C( 9), UINT8_C( 7), UINT8_C( 0),
382
+ UINT8_C( 8), UINT8_C( 33), UINT8_C( 93), UINT8_C(122),
383
+ UINT8_C( 15), UINT8_C( 10), UINT8_C( 67), UINT8_C(147),
384
+ UINT8_C( 85), UINT8_C( 14), UINT8_C( 9), UINT8_C( 80)) },
385
+ { simde_x_mm_set_epu8(UINT8_C(125), UINT8_C(134), UINT8_C(114), UINT8_C( 16),
386
+ UINT8_C(101), UINT8_C( 75), UINT8_C( 71), UINT8_C(136),
387
+ UINT8_C(137), UINT8_C(104), UINT8_C(249), UINT8_C(115),
388
+ UINT8_C(110), UINT8_C(132), UINT8_C(229), UINT8_C( 48)),
389
+ simde_x_mm_set_epu8(UINT8_C( 69), UINT8_C( 11), UINT8_C( 3), UINT8_C( 2),
390
+ UINT8_C( 2), UINT8_C( 21), UINT8_C( 3), UINT8_C( 1),
391
+ UINT8_C( 5), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2),
392
+ UINT8_C( 1), UINT8_C(163), UINT8_C( 1), UINT8_C( 2)),
393
+ simde_x_mm_set_epu8(UINT8_C( 1), UINT8_C( 12), UINT8_C( 38), UINT8_C( 8),
394
+ UINT8_C( 50), UINT8_C( 3), UINT8_C( 23), UINT8_C(136),
395
+ UINT8_C( 27), UINT8_C(104), UINT8_C( 83), UINT8_C( 57),
396
+ UINT8_C(110), UINT8_C( 0), UINT8_C(229), UINT8_C( 24)) },
397
+ { simde_x_mm_set_epu8(UINT8_C( 72), UINT8_C(139), UINT8_C(120), UINT8_C(127),
398
+ UINT8_C(102), UINT8_C(165), UINT8_C( 82), UINT8_C( 63),
399
+ UINT8_C(192), UINT8_C( 18), UINT8_C(103), UINT8_C(151),
400
+ UINT8_C( 81), UINT8_C(222), UINT8_C(212), UINT8_C( 1)),
401
+ simde_x_mm_set_epu8(UINT8_C( 7), UINT8_C( 26), UINT8_C( 32), UINT8_C( 1),
402
+ UINT8_C( 1), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2),
403
+ UINT8_C( 65), UINT8_C( 24), UINT8_C( 1), UINT8_C( 97),
404
+ UINT8_C( 14), UINT8_C( 8), UINT8_C( 89), UINT8_C( 11)),
405
+ simde_x_mm_set_epu8(UINT8_C( 10), UINT8_C( 5), UINT8_C( 3), UINT8_C(127),
406
+ UINT8_C(102), UINT8_C(165), UINT8_C( 27), UINT8_C( 31),
407
+ UINT8_C( 2), UINT8_C( 0), UINT8_C(103), UINT8_C( 1),
408
+ UINT8_C( 5), UINT8_C( 27), UINT8_C( 2), UINT8_C( 0)) }
409
+ };
410
+
411
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
412
+ simde__m128i r = simde_mm_div_epu8(test_vec[i].a, test_vec[i].b);
413
+ simde_assert_m128i_u8(r, ==, test_vec[i].r);
414
+ }
415
+
416
+ return MUNIT_OK;
417
+ }
418
+
419
+ static MunitResult
420
+ test_simde_mm_div_epu16(const MunitParameter params[], void* data) {
421
+ (void) params;
422
+ (void) data;
423
+
424
+ const struct {
425
+ simde__m128i a;
426
+ simde__m128i b;
427
+ simde__m128i r;
428
+ } test_vec[8] = {
429
+ { simde_x_mm_set_epu16(UINT16_C(27566), UINT16_C(40504), UINT16_C( 4629), UINT16_C(53715),
430
+ UINT16_C( 9716), UINT16_C( 9411), UINT16_C(47476), UINT16_C(41385)),
431
+ simde_x_mm_set_epu16(UINT16_C( 13), UINT16_C( 6506), UINT16_C( 2031), UINT16_C( 2041),
432
+ UINT16_C( 41), UINT16_C( 3089), UINT16_C( 4707), UINT16_C( 3)),
433
+ simde_x_mm_set_epu16(UINT16_C( 2120), UINT16_C( 6), UINT16_C( 2), UINT16_C( 26),
434
+ UINT16_C( 236), UINT16_C( 3), UINT16_C( 10), UINT16_C(13795)) },
435
+ { simde_x_mm_set_epu16(UINT16_C( 9353), UINT16_C( 761), UINT16_C( 3256), UINT16_C(15648),
436
+ UINT16_C(54529), UINT16_C(37909), UINT16_C( 6524), UINT16_C(24806)),
437
+ simde_x_mm_set_epu16(UINT16_C(17088), UINT16_C( 3660), UINT16_C( 3), UINT16_C( 9),
438
+ UINT16_C( 186), UINT16_C( 2), UINT16_C( 7), UINT16_C( 1856)),
439
+ simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 1085), UINT16_C( 1738),
440
+ UINT16_C( 293), UINT16_C(18954), UINT16_C( 932), UINT16_C( 13)) },
441
+ { simde_x_mm_set_epu16(UINT16_C(19795), UINT16_C(45332), UINT16_C(60579), UINT16_C(32327),
442
+ UINT16_C(25905), UINT16_C(63671), UINT16_C( 930), UINT16_C(32017)),
443
+ simde_x_mm_set_epu16(UINT16_C( 8), UINT16_C(30488), UINT16_C( 26), UINT16_C( 3397),
444
+ UINT16_C( 1518), UINT16_C( 2), UINT16_C( 20), UINT16_C( 6)),
445
+ simde_x_mm_set_epu16(UINT16_C( 2474), UINT16_C( 1), UINT16_C( 2329), UINT16_C( 9),
446
+ UINT16_C( 17), UINT16_C(31835), UINT16_C( 46), UINT16_C( 5336)) },
447
+ { simde_x_mm_set_epu16(UINT16_C(29801), UINT16_C(62435), UINT16_C(31106), UINT16_C(58247),
448
+ UINT16_C(47275), UINT16_C(34875), UINT16_C(63847), UINT16_C( 8602)),
449
+ simde_x_mm_set_epu16(UINT16_C( 5), UINT16_C( 1), UINT16_C( 842), UINT16_C( 1634),
450
+ UINT16_C( 11), UINT16_C( 25), UINT16_C( 3640), UINT16_C( 932)),
451
+ simde_x_mm_set_epu16(UINT16_C( 5960), UINT16_C(62435), UINT16_C( 36), UINT16_C( 35),
452
+ UINT16_C( 4297), UINT16_C( 1395), UINT16_C( 17), UINT16_C( 9)) },
453
+ { simde_x_mm_set_epu16(UINT16_C(41564), UINT16_C(16940), UINT16_C(39647), UINT16_C(59460),
454
+ UINT16_C(17425), UINT16_C(59711), UINT16_C(30880), UINT16_C(42139)),
455
+ simde_x_mm_set_epu16(UINT16_C(25139), UINT16_C( 3416), UINT16_C( 43), UINT16_C( 6),
456
+ UINT16_C( 4), UINT16_C( 1256), UINT16_C( 60), UINT16_C( 129)),
457
+ simde_x_mm_set_epu16(UINT16_C( 1), UINT16_C( 4), UINT16_C( 922), UINT16_C( 9910),
458
+ UINT16_C( 4356), UINT16_C( 47), UINT16_C( 514), UINT16_C( 326)) },
459
+ { simde_x_mm_set_epu16(UINT16_C(39593), UINT16_C(41522), UINT16_C(58894), UINT16_C( 6383),
460
+ UINT16_C(39956), UINT16_C( 2820), UINT16_C(20260), UINT16_C(57360)),
461
+ simde_x_mm_set_epu16(UINT16_C( 1), UINT16_C(10468), UINT16_C( 2), UINT16_C( 79),
462
+ UINT16_C( 5), UINT16_C( 1166), UINT16_C( 2), UINT16_C( 3)),
463
+ simde_x_mm_set_epu16(UINT16_C(39593), UINT16_C( 3), UINT16_C(29447), UINT16_C( 80),
464
+ UINT16_C( 7991), UINT16_C( 2), UINT16_C(10130), UINT16_C(19120)) },
465
+ { simde_x_mm_set_epu16(UINT16_C(58633), UINT16_C(30014), UINT16_C(57061), UINT16_C(60439),
466
+ UINT16_C(22536), UINT16_C(20868), UINT16_C(20870), UINT16_C(13916)),
467
+ simde_x_mm_set_epu16(UINT16_C( 15), UINT16_C( 490), UINT16_C( 2338), UINT16_C( 64),
468
+ UINT16_C( 876), UINT16_C( 706), UINT16_C( 65), UINT16_C( 320)),
469
+ simde_x_mm_set_epu16(UINT16_C( 3908), UINT16_C( 61), UINT16_C( 24), UINT16_C( 944),
470
+ UINT16_C( 25), UINT16_C( 29), UINT16_C( 321), UINT16_C( 43)) },
471
+ { simde_x_mm_set_epu16(UINT16_C( 6697), UINT16_C(21906), UINT16_C(59582), UINT16_C(44845),
472
+ UINT16_C(35883), UINT16_C(64682), UINT16_C(55100), UINT16_C(57711)),
473
+ simde_x_mm_set_epu16(UINT16_C( 7058), UINT16_C( 10), UINT16_C(60566), UINT16_C( 1),
474
+ UINT16_C( 1), UINT16_C( 872), UINT16_C( 109), UINT16_C( 1)),
475
+ simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 2190), UINT16_C( 0), UINT16_C(44845),
476
+ UINT16_C(35883), UINT16_C( 74), UINT16_C( 505), UINT16_C(57711)) }
477
+ };
478
+
479
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
480
+ simde__m128i r = simde_mm_div_epu16(test_vec[i].a, test_vec[i].b);
481
+ simde_assert_m128i_u16(r, ==, test_vec[i].r);
482
+ }
483
+
484
+ return MUNIT_OK;
485
+ }
486
+
487
+ static MunitResult
488
+ test_simde_mm_div_epu32(const MunitParameter params[], void* data) {
489
+ (void) params;
490
+ (void) data;
491
+
492
+ const struct {
493
+ simde__m128i a;
494
+ simde__m128i b;
495
+ simde__m128i r;
496
+ } test_vec[8] = {
497
+ { simde_x_mm_set_epu32(UINT32_C(3152261024), UINT32_C(2598586578), UINT32_C(1610828679), UINT32_C(3536337768)),
498
+ simde_x_mm_set_epu32(UINT32_C( 14157), UINT32_C( 947), UINT32_C(1043337665), UINT32_C( 97937)),
499
+ simde_x_mm_set_epu32(UINT32_C( 222664), UINT32_C( 2744019), UINT32_C( 1), UINT32_C( 36108)) },
500
+ { simde_x_mm_set_epu32(UINT32_C( 75140339), UINT32_C(1941562012), UINT32_C( 857740081), UINT32_C(1336535286)),
501
+ simde_x_mm_set_epu32(UINT32_C( 22), UINT32_C( 1682), UINT32_C( 11), UINT32_C( 2)),
502
+ simde_x_mm_set_epu32(UINT32_C( 3415469), UINT32_C( 1154317), UINT32_C( 77976371), UINT32_C( 668267643)) },
503
+ { simde_x_mm_set_epu32(UINT32_C( 948661264), UINT32_C(1195769225), UINT32_C( 694120276), UINT32_C(3517239447)),
504
+ simde_x_mm_set_epu32(UINT32_C( 3949), UINT32_C( 275), UINT32_C( 12430067), UINT32_C( 15794)),
505
+ simde_x_mm_set_epu32(UINT32_C( 240228), UINT32_C( 4348251), UINT32_C( 55), UINT32_C( 222694)) },
506
+ { simde_x_mm_set_epu32(UINT32_C(3023938951), UINT32_C(4109050401), UINT32_C( 287757059), UINT32_C(2648669825)),
507
+ simde_x_mm_set_epu32(UINT32_C( 57756), UINT32_C( 40), UINT32_C(1080216164), UINT32_C( 173312)),
508
+ simde_x_mm_set_epu32(UINT32_C( 52357), UINT32_C( 102726260), UINT32_C( 0), UINT32_C( 15282)) },
509
+ { simde_x_mm_set_epu32(UINT32_C( 864299658), UINT32_C(2427378437), UINT32_C( 823539242), UINT32_C(1758563044)),
510
+ simde_x_mm_set_epu32(UINT32_C( 225), UINT32_C( 75), UINT32_C( 11529), UINT32_C( 119418298)),
511
+ simde_x_mm_set_epu32(UINT32_C( 3841331), UINT32_C( 32365045), UINT32_C( 71431), UINT32_C( 14)) },
512
+ { simde_x_mm_set_epu32(UINT32_C(2662820398), UINT32_C(1208068616), UINT32_C(2158211537), UINT32_C(3417661837)),
513
+ simde_x_mm_set_epu32(UINT32_C( 2367), UINT32_C( 126619), UINT32_C( 55203), UINT32_C( 155)),
514
+ simde_x_mm_set_epu32(UINT32_C( 1124976), UINT32_C( 9540), UINT32_C( 39095), UINT32_C( 22049431)) },
515
+ { simde_x_mm_set_epu32(UINT32_C(1097247740), UINT32_C(3448507951), UINT32_C(4106436665), UINT32_C(3017338787)),
516
+ simde_x_mm_set_epu32(UINT32_C( 61963115), UINT32_C( 238397327), UINT32_C( 245318), UINT32_C( 3312135)),
517
+ simde_x_mm_set_epu32(UINT32_C( 17), UINT32_C( 14), UINT32_C( 16739), UINT32_C( 910)) },
518
+ { simde_x_mm_set_epu32(UINT32_C(3006363325), UINT32_C(2983927188), UINT32_C(2177891039), UINT32_C(1117727917)),
519
+ simde_x_mm_set_epu32(UINT32_C( 24), UINT32_C( 12), UINT32_C(1067413818), UINT32_C( 206)),
520
+ simde_x_mm_set_epu32(UINT32_C( 125265138), UINT32_C( 248660599), UINT32_C( 2), UINT32_C( 5425863)) }
521
+ };
522
+
523
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
524
+ simde__m128i r = simde_mm_div_epu32(test_vec[i].a, test_vec[i].b);
525
+ simde_assert_m128i_u32(r, ==, test_vec[i].r);
526
+ }
527
+
528
+ return MUNIT_OK;
529
+ }
530
+
531
+ static MunitResult
532
+ test_simde_mm_div_epu64(const MunitParameter params[], void* data) {
533
+ (void) params;
534
+ (void) data;
535
+
536
+ const struct {
537
+ simde__m128i a;
538
+ simde__m128i b;
539
+ simde__m128i r;
540
+ } test_vec[8] = {
541
+ { simde_x_mm_set_epu64x(UINT64_C(14823946846053138543), UINT64_C( 2773213006356142856)),
542
+ simde_x_mm_set_epu64x(UINT64_C( 22806630538915743), UINT64_C( 1295)),
543
+ simde_x_mm_set_epu64x(UINT64_C( 649), UINT64_C( 2141477224985438)) },
544
+ { simde_x_mm_set_epu64x(UINT64_C(16338394746286416599), UINT64_C( 4395568244008230294)),
545
+ simde_x_mm_set_epu64x(UINT64_C( 1610), UINT64_C( 68247035008)),
546
+ simde_x_mm_set_epu64x(UINT64_C( 10148071270985351), UINT64_C( 64406728)) },
547
+ { simde_x_mm_set_epu64x(UINT64_C( 6431957656146818365), UINT64_C(14710883493083458909)),
548
+ simde_x_mm_set_epu64x(UINT64_C( 2399266305377), UINT64_C( 16092627197291141)),
549
+ simde_x_mm_set_epu64x(UINT64_C( 2680801), UINT64_C( 914)) },
550
+ { simde_x_mm_set_epu64x(UINT64_C( 7920700281052633117), UINT64_C(15482760419196872328)),
551
+ simde_x_mm_set_epu64x(UINT64_C( 45928957131), UINT64_C( 837231)),
552
+ simde_x_mm_set_epu64x(UINT64_C( 172455478), UINT64_C( 18492817895176)) },
553
+ { simde_x_mm_set_epu64x(UINT64_C( 230158309193392347), UINT64_C(18390356791266391163)),
554
+ simde_x_mm_set_epu64x(UINT64_C( 2253), UINT64_C( 1691141090999)),
555
+ simde_x_mm_set_epu64x(UINT64_C( 102156373365908), UINT64_C( 10874525)) },
556
+ { simde_x_mm_set_epu64x(UINT64_C(12307531484633875995), UINT64_C(16695234188854570094)),
557
+ simde_x_mm_set_epu64x(UINT64_C( 131150029), UINT64_C( 516657134296053652)),
558
+ simde_x_mm_set_epu64x(UINT64_C( 93843147260), UINT64_C( 32)) },
559
+ { simde_x_mm_set_epu64x(UINT64_C(11764896934406933200), UINT64_C(18439918542668248477)),
560
+ simde_x_mm_set_epu64x(UINT64_C( 306481550847), UINT64_C( 776223621938168297)),
561
+ simde_x_mm_set_epu64x(UINT64_C( 38386966), UINT64_C( 23)) },
562
+ { simde_x_mm_set_epu64x(UINT64_C(15338454595408931369), UINT64_C(14530768559531423502)),
563
+ simde_x_mm_set_epu64x(UINT64_C( 3408), UINT64_C( 2)),
564
+ simde_x_mm_set_epu64x(UINT64_C( 4500720245131728), UINT64_C( 7265384279765711751)) }
565
+ };
566
+
567
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
568
+ simde__m128i r = simde_mm_div_epu64(test_vec[i].a, test_vec[i].b);
569
+ simde_assert_m128i_u64(r, ==, test_vec[i].r);
570
+ }
571
+
572
+ return MUNIT_OK;
573
+ }
574
+
575
+ static MunitResult
576
+ test_simde_mm256_div_epi8(const MunitParameter params[], void* data) {
577
+ (void) params;
578
+ (void) data;
579
+
580
+ const struct {
581
+ simde__m256i a;
582
+ simde__m256i b;
583
+ simde__m256i r;
584
+ } test_vec[8] = {
585
+ { simde_mm256_set_epi8(INT8_C( -27), INT8_C( 46), INT8_C(-122), INT8_C( 87),
586
+ INT8_C( 34), INT8_C( -53), INT8_C( 64), INT8_C( -70),
587
+ INT8_C( 25), INT8_C( -17), INT8_C( 56), INT8_C( 3),
588
+ INT8_C( -75), INT8_C( -17), INT8_C( -12), INT8_C( 60),
589
+ INT8_C( 100), INT8_C( -7), INT8_C(-102), INT8_C( -6),
590
+ INT8_C( -10), INT8_C(-111), INT8_C( 106), INT8_C( -43),
591
+ INT8_C( -28), INT8_C( -46), INT8_C( 42), INT8_C( -58),
592
+ INT8_C( 85), INT8_C( -33), INT8_C(-106), INT8_C(-106)),
593
+ simde_mm256_set_epi8(INT8_C( 1), INT8_C( 4), INT8_C( -31), INT8_C( 6),
594
+ INT8_C( 13), INT8_C( 15), INT8_C( 20), INT8_C( 3),
595
+ INT8_C( -77), INT8_C( 32), INT8_C( 5), INT8_C( 55),
596
+ INT8_C( 5), INT8_C( 1), INT8_C( 16), INT8_C( 49),
597
+ INT8_C( 43), INT8_C( 83), INT8_C( 5), INT8_C( 16),
598
+ INT8_C( 34), INT8_C( 20), INT8_C( 2), INT8_C( 13),
599
+ INT8_C( 8), INT8_C( 2), INT8_C( 90), INT8_C( 2),
600
+ INT8_C( 23), INT8_C( 12), INT8_C( 2), INT8_C( 5)),
601
+ simde_mm256_set_epi8(INT8_C( -27), INT8_C( 11), INT8_C( 3), INT8_C( 14),
602
+ INT8_C( 2), INT8_C( -3), INT8_C( 3), INT8_C( -23),
603
+ INT8_C( 0), INT8_C( 0), INT8_C( 11), INT8_C( 0),
604
+ INT8_C( -15), INT8_C( -17), INT8_C( 0), INT8_C( 1),
605
+ INT8_C( 2), INT8_C( 0), INT8_C( -20), INT8_C( 0),
606
+ INT8_C( 0), INT8_C( -5), INT8_C( 53), INT8_C( -3),
607
+ INT8_C( -3), INT8_C( -23), INT8_C( 0), INT8_C( -29),
608
+ INT8_C( 3), INT8_C( -2), INT8_C( -53), INT8_C( -21)) },
609
+ { simde_mm256_set_epi8(INT8_C( 64), INT8_C(-114), INT8_C( 66), INT8_C( -73),
610
+ INT8_C( -80), INT8_C( 97), INT8_C( 103), INT8_C( -46),
611
+ INT8_C( -83), INT8_C( 104), INT8_C( 22), INT8_C( -39),
612
+ INT8_C( 114), INT8_C( -82), INT8_C( 83), INT8_C( 122),
613
+ INT8_C( 1), INT8_C( 51), INT8_C( 75), INT8_C(-100),
614
+ INT8_C( 17), INT8_C( 37), INT8_C( 53), INT8_C( -57),
615
+ INT8_C( 121), INT8_C( -35), INT8_C( 108), INT8_C( -68),
616
+ INT8_C( 25), INT8_C( -78), INT8_C( -54), INT8_C(-104)),
617
+ simde_mm256_set_epi8(INT8_C( 91), INT8_C( 10), INT8_C( -96), INT8_C( 14),
618
+ INT8_C( 21), INT8_C( 23), INT8_C( 1), INT8_C( 8),
619
+ INT8_C( 9), INT8_C( 2), INT8_C( 8), INT8_C( 30),
620
+ INT8_C( 1), INT8_C( -75), INT8_C( 15), INT8_C( 1),
621
+ INT8_C( 27), INT8_C( 5), INT8_C( 104), INT8_C( 48),
622
+ INT8_C( 11), INT8_C( 4), INT8_C( 31), INT8_C( 3),
623
+ INT8_C( 20), INT8_C( 118), INT8_C( 1), INT8_C( 18),
624
+ INT8_C( 1), INT8_C( 22), INT8_C( 20), INT8_C( 33)),
625
+ simde_mm256_set_epi8(INT8_C( 0), INT8_C( -11), INT8_C( 0), INT8_C( -5),
626
+ INT8_C( -3), INT8_C( 4), INT8_C( 103), INT8_C( -5),
627
+ INT8_C( -9), INT8_C( 52), INT8_C( 2), INT8_C( -1),
628
+ INT8_C( 114), INT8_C( 1), INT8_C( 5), INT8_C( 122),
629
+ INT8_C( 0), INT8_C( 10), INT8_C( 0), INT8_C( -2),
630
+ INT8_C( 1), INT8_C( 9), INT8_C( 1), INT8_C( -19),
631
+ INT8_C( 6), INT8_C( 0), INT8_C( 108), INT8_C( -3),
632
+ INT8_C( 25), INT8_C( -3), INT8_C( -2), INT8_C( -3)) },
633
+ { simde_mm256_set_epi8(INT8_C( 123), INT8_C( 92), INT8_C( -58), INT8_C( 47),
634
+ INT8_C( 51), INT8_C( 47), INT8_C( 69), INT8_C( 12),
635
+ INT8_C( 68), INT8_C( -99), INT8_C( 76), INT8_C( 32),
636
+ INT8_C( 85), INT8_C( -81), INT8_C( -3), INT8_C( -4),
637
+ INT8_C( -35), INT8_C( -48), INT8_C( 17), INT8_C( -73),
638
+ INT8_C( 109), INT8_C( 88), INT8_C( -56), INT8_C( -99),
639
+ INT8_C(-114), INT8_C( 127), INT8_C( 26), INT8_C( -29),
640
+ INT8_C( -48), INT8_C( -28), INT8_C( 93), INT8_C( -85)),
641
+ simde_mm256_set_epi8(INT8_C( 86), INT8_C( 12), INT8_C( 90), INT8_C( 46),
642
+ INT8_C( 10), INT8_C( 18), INT8_C( 1), INT8_C( 58),
643
+ INT8_C( -94), INT8_C( 4), INT8_C( 2), INT8_C( 1),
644
+ INT8_C( 20), INT8_C( 20), INT8_C( 1), INT8_C( 10),
645
+ INT8_C( 4), INT8_C( 13), INT8_C( 1), INT8_C( 1),
646
+ INT8_C( 1), INT8_C( 3), INT8_C( 16), INT8_C( 4),
647
+ INT8_C( 4), INT8_C( 2), INT8_C( 8), INT8_C( -96),
648
+ INT8_C( 1), INT8_C( 5), INT8_C( -98), INT8_C( 11)),
649
+ simde_mm256_set_epi8(INT8_C( 1), INT8_C( 7), INT8_C( 0), INT8_C( 1),
650
+ INT8_C( 5), INT8_C( 2), INT8_C( 69), INT8_C( 0),
651
+ INT8_C( 0), INT8_C( -24), INT8_C( 38), INT8_C( 32),
652
+ INT8_C( 4), INT8_C( -4), INT8_C( -3), INT8_C( 0),
653
+ INT8_C( -8), INT8_C( -3), INT8_C( 17), INT8_C( -73),
654
+ INT8_C( 109), INT8_C( 29), INT8_C( -3), INT8_C( -24),
655
+ INT8_C( -28), INT8_C( 63), INT8_C( 3), INT8_C( 0),
656
+ INT8_C( -48), INT8_C( -5), INT8_C( 0), INT8_C( -7)) },
657
+ { simde_mm256_set_epi8(INT8_C( -83), INT8_C( 8), INT8_C( 39), INT8_C( 32),
658
+ INT8_C( -68), INT8_C( 0), INT8_C( 93), INT8_C( 7),
659
+ INT8_C( -26), INT8_C( -37), INT8_C( 3), INT8_C( -23),
660
+ INT8_C( 38), INT8_C( -61), INT8_C( 87), INT8_C( 32),
661
+ INT8_C( 65), INT8_C( 24), INT8_C( -17), INT8_C( -19),
662
+ INT8_C( 113), INT8_C( -25), INT8_C( 58), INT8_C( 4),
663
+ INT8_C(-127), INT8_C( 41), INT8_C( -74), INT8_C( 113),
664
+ INT8_C( 49), INT8_C( -39), INT8_C( -48), INT8_C( 114)),
665
+ simde_mm256_set_epi8(INT8_C(-102), INT8_C( 1), INT8_C( 22), INT8_C( 1),
666
+ INT8_C( 15), INT8_C( 2), INT8_C( 19), INT8_C( 69),
667
+ INT8_C( 1), INT8_C( 49), INT8_C( 66), INT8_C( 2),
668
+ INT8_C( 1), INT8_C( 2), INT8_C( 10), INT8_C( 8),
669
+ INT8_C( 1), INT8_C( 1), INT8_C( 4), INT8_C( 66),
670
+ INT8_C( 11), INT8_C( 22), INT8_C(-126), INT8_C( 49),
671
+ INT8_C( 1), INT8_C( 38), INT8_C( 1), INT8_C( 3),
672
+ INT8_C( 7), INT8_C( 3), INT8_C( 21), INT8_C( 21)),
673
+ simde_mm256_set_epi8(INT8_C( 0), INT8_C( 8), INT8_C( 1), INT8_C( 32),
674
+ INT8_C( -4), INT8_C( 0), INT8_C( 4), INT8_C( 0),
675
+ INT8_C( -26), INT8_C( 0), INT8_C( 0), INT8_C( -11),
676
+ INT8_C( 38), INT8_C( -30), INT8_C( 8), INT8_C( 4),
677
+ INT8_C( 65), INT8_C( 24), INT8_C( -4), INT8_C( 0),
678
+ INT8_C( 10), INT8_C( -1), INT8_C( 0), INT8_C( 0),
679
+ INT8_C(-127), INT8_C( 1), INT8_C( -74), INT8_C( 37),
680
+ INT8_C( 7), INT8_C( -13), INT8_C( -2), INT8_C( 5)) },
681
+ { simde_mm256_set_epi8(INT8_C( 66), INT8_C( 127), INT8_C( 41), INT8_C(-124),
682
+ INT8_C( -90), INT8_C( 28), INT8_C(-118), INT8_C( 18),
683
+ INT8_C( 79), INT8_C( 17), INT8_C( 126), INT8_C( -43),
684
+ INT8_C( -78), INT8_C( 78), INT8_C( 76), INT8_C( 46),
685
+ INT8_C( 60), INT8_C(-126), INT8_C( -41), INT8_C( -77),
686
+ INT8_C( -62), INT8_C(-116), INT8_C(-115), INT8_C( 55),
687
+ INT8_C( 19), INT8_C( 104), INT8_C(-104), INT8_C( -29),
688
+ INT8_C( 54), INT8_C(-118), INT8_C( -40), INT8_C( -58)),
689
+ simde_mm256_set_epi8(INT8_C( 3), INT8_C( 53), INT8_C( 28), INT8_C( -96),
690
+ INT8_C( 1), INT8_C( 91), INT8_C( 7), INT8_C( 1),
691
+ INT8_C( 29), INT8_C( 30), INT8_C( 1), INT8_C( 10),
692
+ INT8_C( 1), INT8_C( 36), INT8_C( 7), INT8_C( 1),
693
+ INT8_C(-101), INT8_C( 5), INT8_C( 13), INT8_C( 5),
694
+ INT8_C( 85), INT8_C( 11), INT8_C( 34), INT8_C( 48),
695
+ INT8_C( 17), INT8_C( 42), INT8_C( 3), INT8_C( 87),
696
+ INT8_C( 1), INT8_C( 2), INT8_C( 74), INT8_C( 8)),
697
+ simde_mm256_set_epi8(INT8_C( 22), INT8_C( 2), INT8_C( 1), INT8_C( 1),
698
+ INT8_C( -90), INT8_C( 0), INT8_C( -16), INT8_C( 18),
699
+ INT8_C( 2), INT8_C( 0), INT8_C( 126), INT8_C( -4),
700
+ INT8_C( -78), INT8_C( 2), INT8_C( 10), INT8_C( 46),
701
+ INT8_C( 0), INT8_C( -25), INT8_C( -3), INT8_C( -15),
702
+ INT8_C( 0), INT8_C( -10), INT8_C( -3), INT8_C( 1),
703
+ INT8_C( 1), INT8_C( 2), INT8_C( -34), INT8_C( 0),
704
+ INT8_C( 54), INT8_C( -59), INT8_C( 0), INT8_C( -7)) },
705
+ { simde_mm256_set_epi8(INT8_C( 79), INT8_C( -60), INT8_C( 106), INT8_C( -93),
706
+ INT8_C(-111), INT8_C( 118), INT8_C( -87), INT8_C( -78),
707
+ INT8_C( -28), INT8_C( 107), INT8_C( -12), INT8_C( -54),
708
+ INT8_C( 101), INT8_C( -62), INT8_C( 4), INT8_C( -51),
709
+ INT8_C( -90), INT8_C(-114), INT8_C( 14), INT8_C( 124),
710
+ INT8_C( -67), INT8_C( 47), INT8_C( 41), INT8_C( 37),
711
+ INT8_C( 126), INT8_C( -20), INT8_C( 119), INT8_C( 105),
712
+ INT8_C( -17), INT8_C( 95), INT8_C( -41), INT8_C( 19)),
713
+ simde_mm256_set_epi8(INT8_C( -34), INT8_C( 4), INT8_C( 32), INT8_C( 1),
714
+ INT8_C( 4), INT8_C( 10), INT8_C( 7), INT8_C( 5),
715
+ INT8_C( 120), INT8_C( 1), INT8_C( 1), INT8_C( 1),
716
+ INT8_C( 26), INT8_C( 6), INT8_C( 44), INT8_C( 2),
717
+ INT8_C( 55), INT8_C( 14), INT8_C( 4), INT8_C( 41),
718
+ INT8_C( 41), INT8_C( 6), INT8_C( 10), INT8_C( 7),
719
+ INT8_C( 7), INT8_C( 21), INT8_C( 126), INT8_C( 59),
720
+ INT8_C( 13), INT8_C( 8), INT8_C( 2), INT8_C( 6)),
721
+ simde_mm256_set_epi8(INT8_C( -2), INT8_C( -15), INT8_C( 3), INT8_C( -93),
722
+ INT8_C( -27), INT8_C( 11), INT8_C( -12), INT8_C( -15),
723
+ INT8_C( 0), INT8_C( 107), INT8_C( -12), INT8_C( -54),
724
+ INT8_C( 3), INT8_C( -10), INT8_C( 0), INT8_C( -25),
725
+ INT8_C( -1), INT8_C( -8), INT8_C( 3), INT8_C( 3),
726
+ INT8_C( -1), INT8_C( 7), INT8_C( 4), INT8_C( 5),
727
+ INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 1),
728
+ INT8_C( -1), INT8_C( 11), INT8_C( -20), INT8_C( 3)) },
729
+ { simde_mm256_set_epi8(INT8_C( -48), INT8_C( -29), INT8_C( 23), INT8_C( 39),
730
+ INT8_C( 106), INT8_C( -37), INT8_C( 1), INT8_C( 62),
731
+ INT8_C( -21), INT8_C( -4), INT8_C( -92), INT8_C( -12),
732
+ INT8_C( 78), INT8_C( -93), INT8_C( 36), INT8_C( -10),
733
+ INT8_C( -84), INT8_C( 102), INT8_C( 9), INT8_C( 70),
734
+ INT8_C( -16), INT8_C( -90), INT8_C( 82), INT8_C(-124),
735
+ INT8_C( -78), INT8_C( 58), INT8_C( 35), INT8_C( 108),
736
+ INT8_C(-105), INT8_C( -72), INT8_C( -16), INT8_C(-103)),
737
+ simde_mm256_set_epi8(INT8_C( 2), INT8_C( 4), INT8_C( 28), INT8_C( 120),
738
+ INT8_C( 1), INT8_C( 5), INT8_C( 2), INT8_C( 61),
739
+ INT8_C( 1), INT8_C( 33), INT8_C( 110), INT8_C( 1),
740
+ INT8_C( 102), INT8_C( 3), INT8_C( 3), INT8_C( 1),
741
+ INT8_C( 1), INT8_C( 26), INT8_C( 11), INT8_C( 7),
742
+ INT8_C( 75), INT8_C( 3), INT8_C( 5), INT8_C( 19),
743
+ INT8_C( 3), INT8_C( -26), INT8_C( 56), INT8_C( 5),
744
+ INT8_C( 7), INT8_C( 6), INT8_C( 2), INT8_C( 5)),
745
+ simde_mm256_set_epi8(INT8_C( -24), INT8_C( -7), INT8_C( 0), INT8_C( 0),
746
+ INT8_C( 106), INT8_C( -7), INT8_C( 0), INT8_C( 1),
747
+ INT8_C( -21), INT8_C( 0), INT8_C( 0), INT8_C( -12),
748
+ INT8_C( 0), INT8_C( -31), INT8_C( 12), INT8_C( -10),
749
+ INT8_C( -84), INT8_C( 3), INT8_C( 0), INT8_C( 10),
750
+ INT8_C( 0), INT8_C( -30), INT8_C( 16), INT8_C( -6),
751
+ INT8_C( -26), INT8_C( -2), INT8_C( 0), INT8_C( 21),
752
+ INT8_C( -15), INT8_C( -12), INT8_C( -8), INT8_C( -20)) },
753
+ { simde_mm256_set_epi8(INT8_C( 110), INT8_C( 56), INT8_C(-120), INT8_C( -32),
754
+ INT8_C( -22), INT8_C( 97), INT8_C( -56), INT8_C( 55),
755
+ INT8_C( -90), INT8_C( 33), INT8_C( 92), INT8_C( 89),
756
+ INT8_C(-107), INT8_C( 55), INT8_C( -50), INT8_C( -88),
757
+ INT8_C( 35), INT8_C( 21), INT8_C( 54), INT8_C( 26),
758
+ INT8_C(-122), INT8_C( 103), INT8_C( 76), INT8_C( 38),
759
+ INT8_C(-110), INT8_C( 11), INT8_C( 26), INT8_C( -11),
760
+ INT8_C( 0), INT8_C( 3), INT8_C( 30), INT8_C( 59)),
761
+ simde_mm256_set_epi8(INT8_C( -31), INT8_C( -83), INT8_C( 101), INT8_C( 17),
762
+ INT8_C( 8), INT8_C( 15), INT8_C( 2), INT8_C( 7),
763
+ INT8_C( 37), INT8_C( 84), INT8_C( -52), INT8_C( 25),
764
+ INT8_C( 42), INT8_C( -27), INT8_C( 1), INT8_C( 10),
765
+ INT8_C( 7), INT8_C( 37), INT8_C( 54), INT8_C( 31),
766
+ INT8_C( 54), INT8_C( 62), INT8_C( 11), INT8_C( 54),
767
+ INT8_C( 43), INT8_C( 1), INT8_C( 4), INT8_C( 5),
768
+ INT8_C( 93), INT8_C( 124), INT8_C( 2), INT8_C( 3)),
769
+ simde_mm256_set_epi8(INT8_C( -3), INT8_C( 0), INT8_C( -1), INT8_C( -1),
770
+ INT8_C( -2), INT8_C( 6), INT8_C( -28), INT8_C( 7),
771
+ INT8_C( -2), INT8_C( 0), INT8_C( -1), INT8_C( 3),
772
+ INT8_C( -2), INT8_C( -2), INT8_C( -50), INT8_C( -8),
773
+ INT8_C( 5), INT8_C( 0), INT8_C( 1), INT8_C( 0),
774
+ INT8_C( -2), INT8_C( 1), INT8_C( 6), INT8_C( 0),
775
+ INT8_C( -2), INT8_C( 11), INT8_C( 6), INT8_C( -2),
776
+ INT8_C( 0), INT8_C( 0), INT8_C( 15), INT8_C( 19)) }
777
+ };
778
+
779
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
780
+ simde__m256i r = simde_mm256_div_epi8(test_vec[i].a, test_vec[i].b);
781
+ simde_assert_m256i_i8(r, ==, test_vec[i].r);
782
+ }
783
+
784
+ return MUNIT_OK;
785
+ }
786
+
787
+ static MunitResult
788
+ test_simde_mm256_div_epi16(const MunitParameter params[], void* data) {
789
+ (void) params;
790
+ (void) data;
791
+
792
+ const struct {
793
+ simde__m256i a;
794
+ simde__m256i b;
795
+ simde__m256i r;
796
+ } test_vec[8] = {
797
+ { simde_mm256_set_epi16(INT16_C(-29867), INT16_C( 9314), INT16_C( 7980), INT16_C( 8102),
798
+ INT16_C(-24663), INT16_C( 4367), INT16_C(-15443), INT16_C( -5657),
799
+ INT16_C(-20080), INT16_C(-10092), INT16_C(-31734), INT16_C( 6262),
800
+ INT16_C( 3510), INT16_C(-31811), INT16_C( -4053), INT16_C( -6124)),
801
+ simde_mm256_set_epi16(INT16_C( 1), INT16_C( 1438), INT16_C( -9), INT16_C( 435),
802
+ INT16_C( -11), INT16_C( 2), INT16_C( -496), INT16_C( 10321),
803
+ INT16_C( -1000), INT16_C( -27), INT16_C( -4), INT16_C( 453),
804
+ INT16_C( -2), INT16_C( 19741), INT16_C( -615), INT16_C( -3265)),
805
+ simde_mm256_set_epi16(INT16_C(-29867), INT16_C( 6), INT16_C( -886), INT16_C( 18),
806
+ INT16_C( 2242), INT16_C( 2183), INT16_C( 31), INT16_C( 0),
807
+ INT16_C( 20), INT16_C( 373), INT16_C( 7933), INT16_C( 13),
808
+ INT16_C( -1755), INT16_C( -1), INT16_C( 6), INT16_C( 1)) },
809
+ { simde_mm256_set_epi16(INT16_C( -6800), INT16_C( 13259), INT16_C( -2233), INT16_C( 1354),
810
+ INT16_C( -8106), INT16_C(-17039), INT16_C( 9504), INT16_C( 22255),
811
+ INT16_C( 12402), INT16_C( -2677), INT16_C( 4463), INT16_C( 28303),
812
+ INT16_C(-12322), INT16_C(-19201), INT16_C( 30668), INT16_C( 15284)),
813
+ simde_mm256_set_epi16(INT16_C( 16270), INT16_C(-26534), INT16_C( -13), INT16_C( -20),
814
+ INT16_C( -12), INT16_C( -182), INT16_C( -13), INT16_C( -2),
815
+ INT16_C( 399), INT16_C( -245), INT16_C( -1), INT16_C( -1),
816
+ INT16_C( -3), INT16_C( 59), INT16_C( 11), INT16_C( -9799)),
817
+ simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 171), INT16_C( -67),
818
+ INT16_C( 675), INT16_C( 93), INT16_C( -731), INT16_C(-11127),
819
+ INT16_C( 31), INT16_C( 10), INT16_C( -4463), INT16_C(-28303),
820
+ INT16_C( 4107), INT16_C( -325), INT16_C( 2788), INT16_C( -1)) },
821
+ { simde_mm256_set_epi16(INT16_C( 23535), INT16_C( 10930), INT16_C( 30193), INT16_C( -8194),
822
+ INT16_C( -8688), INT16_C( 2183), INT16_C(-14596), INT16_C(-28144),
823
+ INT16_C(-10670), INT16_C( 1107), INT16_C( 31427), INT16_C( -7322),
824
+ INT16_C( 17038), INT16_C(-32679), INT16_C( 23368), INT16_C(-24524)),
825
+ simde_mm256_set_epi16(INT16_C( 19), INT16_C( -388), INT16_C( -1), INT16_C( -2261),
826
+ INT16_C( -7651), INT16_C( 1639), INT16_C( -50), INT16_C( -2059),
827
+ INT16_C( -25), INT16_C( -57), INT16_C( -952), INT16_C( 17),
828
+ INT16_C( -4528), INT16_C( -764), INT16_C( -925), INT16_C( -20)),
829
+ simde_mm256_set_epi16(INT16_C( 1238), INT16_C( -28), INT16_C(-30193), INT16_C( 3),
830
+ INT16_C( 1), INT16_C( 1), INT16_C( 291), INT16_C( 13),
831
+ INT16_C( 426), INT16_C( -19), INT16_C( -33), INT16_C( -430),
832
+ INT16_C( -3), INT16_C( 42), INT16_C( -25), INT16_C( 1226)) },
833
+ { simde_mm256_set_epi16(INT16_C( 22767), INT16_C( 28543), INT16_C(-30401), INT16_C( 25623),
834
+ INT16_C( 2206), INT16_C(-16640), INT16_C(-13607), INT16_C(-30899),
835
+ INT16_C( -2384), INT16_C( -1714), INT16_C( 12691), INT16_C( 9427),
836
+ INT16_C( 11864), INT16_C( 29526), INT16_C( 8259), INT16_C( 6808)),
837
+ simde_mm256_set_epi16(INT16_C( 15244), INT16_C( 1), INT16_C( -1), INT16_C( -3),
838
+ INT16_C( -18), INT16_C( -10), INT16_C(-15299), INT16_C( -824),
839
+ INT16_C( 2005), INT16_C( 471), INT16_C( 2069), INT16_C( 204),
840
+ INT16_C( 25), INT16_C( -13), INT16_C( -3), INT16_C( 11)),
841
+ simde_mm256_set_epi16(INT16_C( 1), INT16_C( 28543), INT16_C( 30401), INT16_C( -8541),
842
+ INT16_C( -122), INT16_C( 1664), INT16_C( 0), INT16_C( 37),
843
+ INT16_C( -1), INT16_C( -3), INT16_C( 6), INT16_C( 46),
844
+ INT16_C( 474), INT16_C( -2271), INT16_C( -2753), INT16_C( 618)) },
845
+ { simde_mm256_set_epi16(INT16_C(-16585), INT16_C(-25277), INT16_C( -4139), INT16_C(-27065),
846
+ INT16_C(-28777), INT16_C( -9487), INT16_C(-18713), INT16_C(-30387),
847
+ INT16_C(-14811), INT16_C( 24102), INT16_C(-10162), INT16_C( 7921),
848
+ INT16_C( 29417), INT16_C( 15464), INT16_C( 24785), INT16_C( -1285)),
849
+ simde_mm256_set_epi16(INT16_C( -121), INT16_C( 328), INT16_C( 10), INT16_C( -385),
850
+ INT16_C( -1), INT16_C( 4), INT16_C( 388), INT16_C( -1),
851
+ INT16_C( 1), INT16_C( 4863), INT16_C( -499), INT16_C( 3),
852
+ INT16_C( -226), INT16_C(-15244), INT16_C( 5), INT16_C( -5)),
853
+ simde_mm256_set_epi16(INT16_C( 137), INT16_C( -77), INT16_C( -413), INT16_C( 70),
854
+ INT16_C( 28777), INT16_C( -2371), INT16_C( -48), INT16_C( 30387),
855
+ INT16_C(-14811), INT16_C( 4), INT16_C( 20), INT16_C( 2640),
856
+ INT16_C( -130), INT16_C( -1), INT16_C( 4957), INT16_C( 257)) },
857
+ { simde_mm256_set_epi16(INT16_C( -8831), INT16_C(-12421), INT16_C( 28092), INT16_C(-15215),
858
+ INT16_C( 5495), INT16_C( 15560), INT16_C( 8747), INT16_C( 22186),
859
+ INT16_C(-22634), INT16_C(-23262), INT16_C( 360), INT16_C(-18340),
860
+ INT16_C(-15939), INT16_C(-18429), INT16_C(-10641), INT16_C(-25953)),
861
+ simde_mm256_set_epi16(INT16_C( 6646), INT16_C( -440), INT16_C( 5), INT16_C( 9),
862
+ INT16_C( 5230), INT16_C( 14027), INT16_C( -115), INT16_C( -1),
863
+ INT16_C( -118), INT16_C( -466), INT16_C( -288), INT16_C( -9),
864
+ INT16_C( 114), INT16_C( -2656), INT16_C( -2539), INT16_C( 1803)),
865
+ simde_mm256_set_epi16(INT16_C( -1), INT16_C( 28), INT16_C( 5618), INT16_C( -1690),
866
+ INT16_C( 1), INT16_C( 1), INT16_C( -76), INT16_C(-22186),
867
+ INT16_C( 191), INT16_C( 49), INT16_C( -1), INT16_C( 2037),
868
+ INT16_C( -139), INT16_C( 6), INT16_C( 4), INT16_C( -14)) },
869
+ { simde_mm256_set_epi16(INT16_C( 2118), INT16_C( 26269), INT16_C( 31059), INT16_C( 17912),
870
+ INT16_C(-28141), INT16_C( 5202), INT16_C( 30957), INT16_C(-32121),
871
+ INT16_C( -2609), INT16_C(-12316), INT16_C(-10959), INT16_C( 17018),
872
+ INT16_C( 4376), INT16_C( 1963), INT16_C( 14912), INT16_C( 8031)),
873
+ simde_mm256_set_epi16(INT16_C( -2197), INT16_C( 11), INT16_C( -18), INT16_C( -3745),
874
+ INT16_C( -1), INT16_C( -3), INT16_C( 4), INT16_C( 3362),
875
+ INT16_C( -1965), INT16_C( 2), INT16_C( 574), INT16_C( 1347),
876
+ INT16_C( -888), INT16_C( -15), INT16_C( 1260), INT16_C( -640)),
877
+ simde_mm256_set_epi16(INT16_C( 0), INT16_C( 2388), INT16_C( -1725), INT16_C( -4),
878
+ INT16_C( 28141), INT16_C( -1734), INT16_C( 7739), INT16_C( -9),
879
+ INT16_C( 1), INT16_C( -6158), INT16_C( -19), INT16_C( 12),
880
+ INT16_C( -4), INT16_C( -130), INT16_C( 11), INT16_C( -12)) },
881
+ { simde_mm256_set_epi16(INT16_C(-28159), INT16_C( 7162), INT16_C(-24830), INT16_C( 4589),
882
+ INT16_C( 7038), INT16_C( 3178), INT16_C( 4246), INT16_C( -8357),
883
+ INT16_C( -4695), INT16_C( -9928), INT16_C( -5517), INT16_C(-27023),
884
+ INT16_C( 18843), INT16_C( 726), INT16_C( 30135), INT16_C( -4871)),
885
+ simde_mm256_set_epi16(INT16_C( -48), INT16_C( 767), INT16_C( 10), INT16_C( 14),
886
+ INT16_C( -2039), INT16_C( -2), INT16_C( -53), INT16_C( -1),
887
+ INT16_C( -1865), INT16_C( -5344), INT16_C( 63), INT16_C( -505),
888
+ INT16_C( 2993), INT16_C(-14674), INT16_C( 3), INT16_C( -2)),
889
+ simde_mm256_set_epi16(INT16_C( 586), INT16_C( 9), INT16_C( -2483), INT16_C( 327),
890
+ INT16_C( -3), INT16_C( -1589), INT16_C( -80), INT16_C( 8357),
891
+ INT16_C( 2), INT16_C( 1), INT16_C( -87), INT16_C( 53),
892
+ INT16_C( 6), INT16_C( 0), INT16_C( 10045), INT16_C( 2435)) }
893
+ };
894
+
895
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
896
+ simde__m256i r = simde_mm256_div_epi16(test_vec[i].a, test_vec[i].b);
897
+ simde_assert_m256i_i16(r, ==, test_vec[i].r);
898
+ }
899
+
900
+ return MUNIT_OK;
901
+ }
902
+
903
+ static MunitResult
904
+ test_simde_mm256_div_epi32(const MunitParameter params[], void* data) {
905
+ (void) params;
906
+ (void) data;
907
+
908
+ const struct {
909
+ simde__m256i a;
910
+ simde__m256i b;
911
+ simde__m256i r;
912
+ } test_vec[8] = {
913
+ { simde_mm256_set_epi32(INT32_C( 1220357195), INT32_C( 1053623553), INT32_C( 1487300768), INT32_C(-1113593972),
914
+ INT32_C( -270466921), INT32_C( 1339961381), INT32_C( 586340423), INT32_C( 1641199948)),
915
+ simde_mm256_set_epi32(INT32_C( 119685834), INT32_C( 18), INT32_C( 13175516), INT32_C( 2634495),
916
+ INT32_C( 17), INT32_C( 43789), INT32_C( -89), INT32_C( 14)),
917
+ simde_mm256_set_epi32(INT32_C( 10), INT32_C( 58534641), INT32_C( 112), INT32_C( -422),
918
+ INT32_C( -15909818), INT32_C( 30600), INT32_C( -6588094), INT32_C( 117228567)) },
919
+ { simde_mm256_set_epi32(INT32_C( 1446174898), INT32_C( 1812297946), INT32_C(-2020316623), INT32_C( 843765864),
920
+ INT32_C(-1892632155), INT32_C( -473868741), INT32_C( -150363910), INT32_C(-1673359813)),
921
+ simde_mm256_set_epi32(INT32_C( 2569135), INT32_C( 8168), INT32_C( -4111977), INT32_C( -322),
922
+ INT32_C( -34091386), INT32_C( 6306), INT32_C( 363174), INT32_C( -37460)),
923
+ simde_mm256_set_epi32(INT32_C( 562), INT32_C( 221877), INT32_C( 491), INT32_C( -2620390),
924
+ INT32_C( 55), INT32_C( -75145), INT32_C( -414), INT32_C( 44670)) },
925
+ { simde_mm256_set_epi32(INT32_C( 1015973964), INT32_C( -637033789), INT32_C(-1269659180), INT32_C(-1847076164),
926
+ INT32_C( 841308417), INT32_C(-1365136816), INT32_C( -621262370), INT32_C( -734285761)),
927
+ simde_mm256_set_epi32(INT32_C( -1597720), INT32_C( 192391), INT32_C( 2145556), INT32_C( -4054),
928
+ INT32_C( -1), INT32_C( 63753), INT32_C( 24015328), INT32_C( 267)),
929
+ simde_mm256_set_epi32(INT32_C( -635), INT32_C( -3311), INT32_C( -591), INT32_C( 455618),
930
+ INT32_C( -841308417), INT32_C( -21412), INT32_C( -25), INT32_C( -2750133)) },
931
+ { simde_mm256_set_epi32(INT32_C( 55709148), INT32_C( 1036348942), INT32_C( 1622954205), INT32_C( 1464937075),
932
+ INT32_C( 309602207), INT32_C( 765487752), INT32_C(-1883826060), INT32_C( 396580110)),
933
+ simde_mm256_set_epi32(INT32_C( 81348), INT32_C( 130432), INT32_C( -2896201), INT32_C( 130033),
934
+ INT32_C( 2659), INT32_C( 12656), INT32_C( -49), INT32_C( -3976)),
935
+ simde_mm256_set_epi32(INT32_C( 684), INT32_C( 7945), INT32_C( -560), INT32_C( 11265),
936
+ INT32_C( 116435), INT32_C( 60484), INT32_C( 38445429), INT32_C( -99743)) },
937
+ { simde_mm256_set_epi32(INT32_C( -679308904), INT32_C( 1402916027), INT32_C( -568259373), INT32_C( -151984025),
938
+ INT32_C(-1276596492), INT32_C( 897258790), INT32_C( 1125465930), INT32_C(-1843912592)),
939
+ simde_mm256_set_epi32(INT32_C( -32), INT32_C( -3810), INT32_C( -77), INT32_C( -56604),
940
+ INT32_C( 2670), INT32_C( -7949), INT32_C( 3200), INT32_C( 22045)),
941
+ simde_mm256_set_epi32(INT32_C( 21228403), INT32_C( -368219), INT32_C( 7379991), INT32_C( 2685),
942
+ INT32_C( -478126), INT32_C( -112876), INT32_C( 351708), INT32_C( -83643)) },
943
+ { simde_mm256_set_epi32(INT32_C(-2128829075), INT32_C( -944286219), INT32_C(-1801390937), INT32_C( 1597729863),
944
+ INT32_C( -919883082), INT32_C( 243529930), INT32_C(-1346833089), INT32_C( -703593878)),
945
+ simde_mm256_set_epi32(INT32_C( -702474), INT32_C( -505), INT32_C( -33538370), INT32_C( 98),
946
+ INT32_C( -989384), INT32_C( -3405840), INT32_C( 1441037), INT32_C( 13)),
947
+ simde_mm256_set_epi32(INT32_C( 3030), INT32_C( 1869873), INT32_C( 53), INT32_C( 16303365),
948
+ INT32_C( 929), INT32_C( -71), INT32_C( -934), INT32_C( -54122606)) },
949
+ { simde_mm256_set_epi32(INT32_C( 2104898600), INT32_C( 1858378377), INT32_C( 427610695), INT32_C( 1702051599),
950
+ INT32_C( 1832473397), INT32_C( 333005662), INT32_C( 2145787203), INT32_C(-1223503753)),
951
+ simde_mm256_set_epi32(INT32_C( -558822192), INT32_C( -1119473), INT32_C( 71), INT32_C( -1),
952
+ INT32_C( 83208), INT32_C( -24), INT32_C( 490), INT32_C( 1423105)),
953
+ simde_mm256_set_epi32(INT32_C( -3), INT32_C( -1660), INT32_C( 6022685), INT32_C(-1702051599),
954
+ INT32_C( 22022), INT32_C( -13875235), INT32_C( 4379157), INT32_C( -859)) },
955
+ { simde_mm256_set_epi32(INT32_C( 1485879823), INT32_C( -139198096), INT32_C( 325243915), INT32_C( 1406493107),
956
+ INT32_C( 631640676), INT32_C( -221831503), INT32_C(-1100348538), INT32_C(-1615759789)),
957
+ simde_mm256_set_epi32(INT32_C( -5), INT32_C( 6019751), INT32_C( 240957918), INT32_C( -11512),
958
+ INT32_C( 598), INT32_C( -2086), INT32_C( -398), INT32_C( 57524929)),
959
+ simde_mm256_set_epi32(INT32_C( -297175964), INT32_C( -23), INT32_C( 1), INT32_C( -122176),
960
+ INT32_C( 1056255), INT32_C( 106343), INT32_C( 2764694), INT32_C( -28)) }
961
+ };
962
+
963
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
964
+ simde__m256i r = simde_mm256_div_epi32(test_vec[i].a, test_vec[i].b);
965
+ simde_assert_m256i_i32(r, ==, test_vec[i].r);
966
+ }
967
+
968
+ return MUNIT_OK;
969
+ }
970
+
971
+ static MunitResult
972
+ test_simde_mm256_div_epi64(const MunitParameter params[], void* data) {
973
+ (void) params;
974
+ (void) data;
975
+
976
+ const struct {
977
+ simde__m256i a;
978
+ simde__m256i b;
979
+ simde__m256i r;
980
+ } test_vec[8] = {
981
+ { simde_mm256_set_epi64x(INT64_C(-3334573923423752375), INT64_C( 5523377417165557950),
982
+ INT64_C( 8907494989684855351), INT64_C(-7237415305059575746)),
983
+ simde_mm256_set_epi64x(INT64_C( -9171626596647), INT64_C( -528646059918),
984
+ INT64_C( -547414), INT64_C( -408)),
985
+ simde_mm256_set_epi64x(INT64_C( 363574), INT64_C( -10448157),
986
+ INT64_C( -16271953201205), INT64_C( 17738763002596999)) },
987
+ { simde_mm256_set_epi64x(INT64_C( 1061533355853207499), INT64_C(-6945701440990101118),
988
+ INT64_C( 2574461366811200995), INT64_C( 5644549884645175906)),
989
+ simde_mm256_set_epi64x(INT64_C( -7767261), INT64_C( 10),
990
+ INT64_C( 703320391), INT64_C( 12482)),
991
+ simde_mm256_set_epi64x(INT64_C( -136667656185), INT64_C( -694570144099010111),
992
+ INT64_C( 3660438968), INT64_C( 452215180631723)) },
993
+ { simde_mm256_set_epi64x(INT64_C( 6574854431853233270), INT64_C(-4435882974713226150),
994
+ INT64_C(-7281891715377237835), INT64_C( 5757222003030846963)),
995
+ simde_mm256_set_epi64x(INT64_C( -6789037658203169), INT64_C( -17570),
996
+ INT64_C( 13607885161437703), INT64_C( -3435095)),
997
+ simde_mm256_set_epi64x(INT64_C( -968), INT64_C( 252469150524372),
998
+ INT64_C( -535), INT64_C( -1676000810175)) },
999
+ { simde_mm256_set_epi64x(INT64_C( 8744553519166698091), INT64_C( 1287292031192317940),
1000
+ INT64_C( 3174243940922689145), INT64_C( 1491394686146555130)),
1001
+ simde_mm256_set_epi64x(INT64_C( 4922490686897444762), INT64_C( 39224412374),
1002
+ INT64_C( 408105256075342), INT64_C( -123591096713)),
1003
+ simde_mm256_set_epi64x(INT64_C( 1), INT64_C( 32818644),
1004
+ INT64_C( 7778), INT64_C( -12067169)) },
1005
+ { simde_mm256_set_epi64x(INT64_C( 7799483112595335323), INT64_C(-7884857912053188380),
1006
+ INT64_C( 7107489308993436793), INT64_C( 8695475100908985079)),
1007
+ simde_mm256_set_epi64x(INT64_C( 87), INT64_C( 9826793),
1008
+ INT64_C( -161255109), INT64_C( -1858599442623445)),
1009
+ simde_mm256_set_epi64x(INT64_C( 89649231179256727), INT64_C( -802383637474),
1010
+ INT64_C( -44076056585), INT64_C( -4678)) },
1011
+ { simde_mm256_set_epi64x(INT64_C(-7825910496387937639), INT64_C( -900763466419687908),
1012
+ INT64_C(-4456690812175475739), INT64_C(-5053240277275181299)),
1013
+ simde_mm256_set_epi64x(INT64_C( -6606649764768), INT64_C( -57398),
1014
+ INT64_C( -568604113828926107), INT64_C( 4737239)),
1015
+ simde_mm256_set_epi64x(INT64_C( 1184550), INT64_C( 15693290121950),
1016
+ INT64_C( 7), INT64_C( -1066705791553)) },
1017
+ { simde_mm256_set_epi64x(INT64_C(-3221953081539923764), INT64_C(-1956032791701614517),
1018
+ INT64_C( 7374977017813000944), INT64_C( 1124803906659433418)),
1019
+ simde_mm256_set_epi64x(INT64_C( -339969907608416876), INT64_C( -15370),
1020
+ INT64_C( -1321351535), INT64_C( -7)),
1021
+ simde_mm256_set_epi64x(INT64_C( 9), INT64_C( 127263031340378),
1022
+ INT64_C( -5581389072), INT64_C( -160686272379919059)) },
1023
+ { simde_mm256_set_epi64x(INT64_C( 2535418176622027197), INT64_C(-1425521063377864898),
1024
+ INT64_C( 5027060343823160394), INT64_C(-2416798548878703366)),
1025
+ simde_mm256_set_epi64x(INT64_C( -250), INT64_C( 51),
1026
+ INT64_C( 3355), INT64_C( 22043462023905)),
1027
+ simde_mm256_set_epi64x(INT64_C( -10141672706488108), INT64_C( -27951393399565978),
1028
+ INT64_C( 1498378641974116), INT64_C( -109637)) }
1029
+ };
1030
+
1031
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1032
+ simde__m256i r = simde_mm256_div_epi64(test_vec[i].a, test_vec[i].b);
1033
+ simde_assert_m256i_i64(r, ==, test_vec[i].r);
1034
+ }
1035
+
1036
+ return MUNIT_OK;
1037
+ }
1038
+
1039
+ static MunitResult
1040
+ test_simde_mm256_div_epu8(const MunitParameter params[], void* data) {
1041
+ (void) params;
1042
+ (void) data;
1043
+
1044
+ const struct {
1045
+ simde__m256i a;
1046
+ simde__m256i b;
1047
+ simde__m256i r;
1048
+ } test_vec[8] = {
1049
+ { simde_x_mm256_set_epu8(UINT8_C(236), UINT8_C(194), UINT8_C(120), UINT8_C( 0),
1050
+ UINT8_C(238), UINT8_C(197), UINT8_C(223), UINT8_C( 50),
1051
+ UINT8_C(177), UINT8_C( 51), UINT8_C( 14), UINT8_C(208),
1052
+ UINT8_C(118), UINT8_C(136), UINT8_C(234), UINT8_C(162),
1053
+ UINT8_C( 34), UINT8_C(152), UINT8_C( 32), UINT8_C( 62),
1054
+ UINT8_C( 35), UINT8_C(101), UINT8_C( 72), UINT8_C( 2),
1055
+ UINT8_C(139), UINT8_C(150), UINT8_C(255), UINT8_C( 2),
1056
+ UINT8_C( 37), UINT8_C(232), UINT8_C( 3), UINT8_C(210)),
1057
+ simde_x_mm256_set_epu8(UINT8_C(218), UINT8_C( 43), UINT8_C( 2), UINT8_C( 2),
1058
+ UINT8_C( 29), UINT8_C( 90), UINT8_C( 30), UINT8_C( 31),
1059
+ UINT8_C( 20), UINT8_C( 1), UINT8_C( 24), UINT8_C( 92),
1060
+ UINT8_C( 3), UINT8_C( 1), UINT8_C( 33), UINT8_C( 6),
1061
+ UINT8_C( 14), UINT8_C( 38), UINT8_C( 5), UINT8_C( 4),
1062
+ UINT8_C( 13), UINT8_C( 2), UINT8_C( 11), UINT8_C( 1),
1063
+ UINT8_C( 1), UINT8_C( 25), UINT8_C(242), UINT8_C( 3),
1064
+ UINT8_C( 12), UINT8_C( 59), UINT8_C( 75), UINT8_C(192)),
1065
+ simde_x_mm256_set_epu8(UINT8_C( 1), UINT8_C( 4), UINT8_C( 60), UINT8_C( 0),
1066
+ UINT8_C( 8), UINT8_C( 2), UINT8_C( 7), UINT8_C( 1),
1067
+ UINT8_C( 8), UINT8_C( 51), UINT8_C( 0), UINT8_C( 2),
1068
+ UINT8_C( 39), UINT8_C(136), UINT8_C( 7), UINT8_C( 27),
1069
+ UINT8_C( 2), UINT8_C( 4), UINT8_C( 6), UINT8_C( 15),
1070
+ UINT8_C( 2), UINT8_C( 50), UINT8_C( 6), UINT8_C( 2),
1071
+ UINT8_C(139), UINT8_C( 6), UINT8_C( 1), UINT8_C( 0),
1072
+ UINT8_C( 3), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1)) },
1073
+ { simde_x_mm256_set_epu8(UINT8_C(223), UINT8_C(136), UINT8_C(181), UINT8_C(189),
1074
+ UINT8_C(144), UINT8_C(162), UINT8_C( 60), UINT8_C(122),
1075
+ UINT8_C(180), UINT8_C(157), UINT8_C(255), UINT8_C( 4),
1076
+ UINT8_C(248), UINT8_C( 71), UINT8_C( 45), UINT8_C(231),
1077
+ UINT8_C(108), UINT8_C(100), UINT8_C( 13), UINT8_C(181),
1078
+ UINT8_C(158), UINT8_C(251), UINT8_C(141), UINT8_C( 49),
1079
+ UINT8_C(175), UINT8_C( 90), UINT8_C(251), UINT8_C( 13),
1080
+ UINT8_C(151), UINT8_C(233), UINT8_C(181), UINT8_C(181)),
1081
+ simde_x_mm256_set_epu8(UINT8_C( 2), UINT8_C( 7), UINT8_C( 2), UINT8_C( 7),
1082
+ UINT8_C( 6), UINT8_C( 23), UINT8_C( 1), UINT8_C( 22),
1083
+ UINT8_C( 9), UINT8_C( 21), UINT8_C( 6), UINT8_C( 1),
1084
+ UINT8_C( 1), UINT8_C( 27), UINT8_C( 1), UINT8_C(254),
1085
+ UINT8_C( 30), UINT8_C( 92), UINT8_C( 8), UINT8_C( 13),
1086
+ UINT8_C( 7), UINT8_C( 4), UINT8_C( 29), UINT8_C( 24),
1087
+ UINT8_C( 1), UINT8_C( 15), UINT8_C( 31), UINT8_C( 1),
1088
+ UINT8_C(190), UINT8_C( 1), UINT8_C( 20), UINT8_C( 8)),
1089
+ simde_x_mm256_set_epu8(UINT8_C(111), UINT8_C( 19), UINT8_C( 90), UINT8_C( 27),
1090
+ UINT8_C( 24), UINT8_C( 7), UINT8_C( 60), UINT8_C( 5),
1091
+ UINT8_C( 20), UINT8_C( 7), UINT8_C( 42), UINT8_C( 4),
1092
+ UINT8_C(248), UINT8_C( 2), UINT8_C( 45), UINT8_C( 0),
1093
+ UINT8_C( 3), UINT8_C( 1), UINT8_C( 1), UINT8_C( 13),
1094
+ UINT8_C( 22), UINT8_C( 62), UINT8_C( 4), UINT8_C( 2),
1095
+ UINT8_C(175), UINT8_C( 6), UINT8_C( 8), UINT8_C( 13),
1096
+ UINT8_C( 0), UINT8_C(233), UINT8_C( 9), UINT8_C( 22)) },
1097
+ { simde_x_mm256_set_epu8(UINT8_C(162), UINT8_C( 7), UINT8_C(145), UINT8_C(154),
1098
+ UINT8_C(168), UINT8_C(175), UINT8_C( 61), UINT8_C( 3),
1099
+ UINT8_C( 93), UINT8_C( 6), UINT8_C(114), UINT8_C( 59),
1100
+ UINT8_C( 17), UINT8_C(165), UINT8_C(240), UINT8_C(189),
1101
+ UINT8_C(201), UINT8_C( 90), UINT8_C( 72), UINT8_C( 56),
1102
+ UINT8_C( 98), UINT8_C(155), UINT8_C( 93), UINT8_C(190),
1103
+ UINT8_C( 59), UINT8_C(174), UINT8_C(136), UINT8_C( 6),
1104
+ UINT8_C(153), UINT8_C(172), UINT8_C(102), UINT8_C(120)),
1105
+ simde_x_mm256_set_epu8(UINT8_C(110), UINT8_C( 41), UINT8_C( 3), UINT8_C( 12),
1106
+ UINT8_C(210), UINT8_C( 1), UINT8_C( 5), UINT8_C( 6),
1107
+ UINT8_C( 47), UINT8_C( 58), UINT8_C( 48), UINT8_C( 20),
1108
+ UINT8_C(109), UINT8_C( 3), UINT8_C( 34), UINT8_C( 3),
1109
+ UINT8_C( 8), UINT8_C( 5), UINT8_C( 3), UINT8_C( 1),
1110
+ UINT8_C( 20), UINT8_C( 14), UINT8_C( 1), UINT8_C( 6),
1111
+ UINT8_C( 15), UINT8_C( 3), UINT8_C( 95), UINT8_C( 1),
1112
+ UINT8_C( 4), UINT8_C( 1), UINT8_C( 7), UINT8_C( 1)),
1113
+ simde_x_mm256_set_epu8(UINT8_C( 1), UINT8_C( 0), UINT8_C( 48), UINT8_C( 12),
1114
+ UINT8_C( 0), UINT8_C(175), UINT8_C( 12), UINT8_C( 0),
1115
+ UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 2),
1116
+ UINT8_C( 0), UINT8_C( 55), UINT8_C( 7), UINT8_C( 63),
1117
+ UINT8_C( 25), UINT8_C( 18), UINT8_C( 24), UINT8_C( 56),
1118
+ UINT8_C( 4), UINT8_C( 11), UINT8_C( 93), UINT8_C( 31),
1119
+ UINT8_C( 3), UINT8_C( 58), UINT8_C( 1), UINT8_C( 6),
1120
+ UINT8_C( 38), UINT8_C(172), UINT8_C( 14), UINT8_C(120)) },
1121
+ { simde_x_mm256_set_epu8(UINT8_C( 3), UINT8_C( 62), UINT8_C(201), UINT8_C( 91),
1122
+ UINT8_C( 81), UINT8_C(108), UINT8_C(219), UINT8_C(124),
1123
+ UINT8_C(107), UINT8_C(229), UINT8_C(194), UINT8_C( 6),
1124
+ UINT8_C(247), UINT8_C(122), UINT8_C( 69), UINT8_C(216),
1125
+ UINT8_C(192), UINT8_C(132), UINT8_C( 14), UINT8_C(210),
1126
+ UINT8_C(242), UINT8_C(228), UINT8_C( 76), UINT8_C(247),
1127
+ UINT8_C(164), UINT8_C(249), UINT8_C(124), UINT8_C(200),
1128
+ UINT8_C(141), UINT8_C(206), UINT8_C(142), UINT8_C(235)),
1129
+ simde_x_mm256_set_epu8(UINT8_C(182), UINT8_C( 3), UINT8_C( 13), UINT8_C( 91),
1130
+ UINT8_C( 12), UINT8_C( 10), UINT8_C( 1), UINT8_C( 3),
1131
+ UINT8_C( 4), UINT8_C( 8), UINT8_C( 93), UINT8_C( 1),
1132
+ UINT8_C( 2), UINT8_C( 38), UINT8_C( 3), UINT8_C(172),
1133
+ UINT8_C( 38), UINT8_C( 15), UINT8_C( 55), UINT8_C( 26),
1134
+ UINT8_C( 4), UINT8_C( 16), UINT8_C( 28), UINT8_C( 54),
1135
+ UINT8_C( 21), UINT8_C( 30), UINT8_C( 3), UINT8_C( 39),
1136
+ UINT8_C( 14), UINT8_C(171), UINT8_C( 2), UINT8_C( 4)),
1137
+ simde_x_mm256_set_epu8(UINT8_C( 0), UINT8_C( 20), UINT8_C( 15), UINT8_C( 1),
1138
+ UINT8_C( 6), UINT8_C( 10), UINT8_C(219), UINT8_C( 41),
1139
+ UINT8_C( 26), UINT8_C( 28), UINT8_C( 2), UINT8_C( 6),
1140
+ UINT8_C(123), UINT8_C( 3), UINT8_C( 23), UINT8_C( 1),
1141
+ UINT8_C( 5), UINT8_C( 8), UINT8_C( 0), UINT8_C( 8),
1142
+ UINT8_C( 60), UINT8_C( 14), UINT8_C( 2), UINT8_C( 4),
1143
+ UINT8_C( 7), UINT8_C( 8), UINT8_C( 41), UINT8_C( 5),
1144
+ UINT8_C( 10), UINT8_C( 1), UINT8_C( 71), UINT8_C( 58)) },
1145
+ { simde_x_mm256_set_epu8(UINT8_C(168), UINT8_C( 0), UINT8_C(141), UINT8_C(215),
1146
+ UINT8_C( 23), UINT8_C(105), UINT8_C(153), UINT8_C(228),
1147
+ UINT8_C(144), UINT8_C(204), UINT8_C(214), UINT8_C(202),
1148
+ UINT8_C(227), UINT8_C(255), UINT8_C( 22), UINT8_C(115),
1149
+ UINT8_C(131), UINT8_C(142), UINT8_C( 73), UINT8_C(133),
1150
+ UINT8_C( 47), UINT8_C(243), UINT8_C(254), UINT8_C(234),
1151
+ UINT8_C( 91), UINT8_C(217), UINT8_C(119), UINT8_C(247),
1152
+ UINT8_C(245), UINT8_C( 31), UINT8_C( 46), UINT8_C( 19)),
1153
+ simde_x_mm256_set_epu8(UINT8_C( 1), UINT8_C(248), UINT8_C( 3), UINT8_C( 9),
1154
+ UINT8_C( 3), UINT8_C( 87), UINT8_C(117), UINT8_C( 58),
1155
+ UINT8_C( 18), UINT8_C( 9), UINT8_C( 7), UINT8_C( 77),
1156
+ UINT8_C( 11), UINT8_C( 11), UINT8_C( 28), UINT8_C( 49),
1157
+ UINT8_C( 64), UINT8_C( 46), UINT8_C( 5), UINT8_C( 1),
1158
+ UINT8_C(115), UINT8_C( 2), UINT8_C( 1), UINT8_C( 1),
1159
+ UINT8_C( 86), UINT8_C( 10), UINT8_C( 3), UINT8_C( 12),
1160
+ UINT8_C( 49), UINT8_C(155), UINT8_C( 1), UINT8_C( 3)),
1161
+ simde_x_mm256_set_epu8(UINT8_C(168), UINT8_C( 0), UINT8_C( 47), UINT8_C( 23),
1162
+ UINT8_C( 7), UINT8_C( 1), UINT8_C( 1), UINT8_C( 3),
1163
+ UINT8_C( 8), UINT8_C( 22), UINT8_C( 30), UINT8_C( 2),
1164
+ UINT8_C( 20), UINT8_C( 23), UINT8_C( 0), UINT8_C( 2),
1165
+ UINT8_C( 2), UINT8_C( 3), UINT8_C( 14), UINT8_C(133),
1166
+ UINT8_C( 0), UINT8_C(121), UINT8_C(254), UINT8_C(234),
1167
+ UINT8_C( 1), UINT8_C( 21), UINT8_C( 39), UINT8_C( 20),
1168
+ UINT8_C( 5), UINT8_C( 0), UINT8_C( 46), UINT8_C( 6)) },
1169
+ { simde_x_mm256_set_epu8(UINT8_C(163), UINT8_C(117), UINT8_C( 13), UINT8_C( 71),
1170
+ UINT8_C(173), UINT8_C(230), UINT8_C(206), UINT8_C( 2),
1171
+ UINT8_C( 15), UINT8_C(252), UINT8_C( 14), UINT8_C(197),
1172
+ UINT8_C(249), UINT8_C(198), UINT8_C( 30), UINT8_C(180),
1173
+ UINT8_C(128), UINT8_C( 78), UINT8_C(184), UINT8_C(254),
1174
+ UINT8_C(184), UINT8_C(231), UINT8_C(238), UINT8_C( 30),
1175
+ UINT8_C(194), UINT8_C( 37), UINT8_C(226), UINT8_C( 86),
1176
+ UINT8_C(140), UINT8_C( 24), UINT8_C(144), UINT8_C( 16)),
1177
+ simde_x_mm256_set_epu8(UINT8_C( 48), UINT8_C( 1), UINT8_C( 7), UINT8_C( 6),
1178
+ UINT8_C(119), UINT8_C( 41), UINT8_C(111), UINT8_C( 8),
1179
+ UINT8_C(135), UINT8_C( 2), UINT8_C( 23), UINT8_C( 1),
1180
+ UINT8_C( 88), UINT8_C( 15), UINT8_C( 65), UINT8_C( 79),
1181
+ UINT8_C( 29), UINT8_C( 5), UINT8_C( 5), UINT8_C( 6),
1182
+ UINT8_C( 44), UINT8_C( 21), UINT8_C( 2), UINT8_C( 3),
1183
+ UINT8_C( 15), UINT8_C( 1), UINT8_C( 3), UINT8_C( 3),
1184
+ UINT8_C( 1), UINT8_C( 10), UINT8_C( 1), UINT8_C( 55)),
1185
+ simde_x_mm256_set_epu8(UINT8_C( 3), UINT8_C(117), UINT8_C( 1), UINT8_C( 11),
1186
+ UINT8_C( 1), UINT8_C( 5), UINT8_C( 1), UINT8_C( 0),
1187
+ UINT8_C( 0), UINT8_C(126), UINT8_C( 0), UINT8_C(197),
1188
+ UINT8_C( 2), UINT8_C( 13), UINT8_C( 0), UINT8_C( 2),
1189
+ UINT8_C( 4), UINT8_C( 15), UINT8_C( 36), UINT8_C( 42),
1190
+ UINT8_C( 4), UINT8_C( 11), UINT8_C(119), UINT8_C( 10),
1191
+ UINT8_C( 12), UINT8_C( 37), UINT8_C( 75), UINT8_C( 28),
1192
+ UINT8_C(140), UINT8_C( 2), UINT8_C(144), UINT8_C( 0)) },
1193
+ { simde_x_mm256_set_epu8(UINT8_C(239), UINT8_C(204), UINT8_C( 51), UINT8_C(246),
1194
+ UINT8_C( 77), UINT8_C(149), UINT8_C( 40), UINT8_C( 86),
1195
+ UINT8_C( 29), UINT8_C( 8), UINT8_C(140), UINT8_C(202),
1196
+ UINT8_C(138), UINT8_C(208), UINT8_C(142), UINT8_C( 95),
1197
+ UINT8_C(247), UINT8_C(102), UINT8_C( 63), UINT8_C(232),
1198
+ UINT8_C(115), UINT8_C(187), UINT8_C(122), UINT8_C(179),
1199
+ UINT8_C( 81), UINT8_C(192), UINT8_C( 47), UINT8_C( 34),
1200
+ UINT8_C( 24), UINT8_C(133), UINT8_C( 98), UINT8_C(208)),
1201
+ simde_x_mm256_set_epu8(UINT8_C( 11), UINT8_C( 8), UINT8_C( 2), UINT8_C( 10),
1202
+ UINT8_C( 3), UINT8_C( 7), UINT8_C( 38), UINT8_C( 21),
1203
+ UINT8_C(247), UINT8_C( 14), UINT8_C( 4), UINT8_C( 3),
1204
+ UINT8_C( 85), UINT8_C( 59), UINT8_C( 41), UINT8_C( 1),
1205
+ UINT8_C( 1), UINT8_C(250), UINT8_C( 1), UINT8_C( 2),
1206
+ UINT8_C( 6), UINT8_C( 8), UINT8_C( 6), UINT8_C( 40),
1207
+ UINT8_C(136), UINT8_C( 10), UINT8_C( 29), UINT8_C( 7),
1208
+ UINT8_C( 36), UINT8_C( 8), UINT8_C( 1), UINT8_C( 7)),
1209
+ simde_x_mm256_set_epu8(UINT8_C( 21), UINT8_C( 25), UINT8_C( 25), UINT8_C( 24),
1210
+ UINT8_C( 25), UINT8_C( 21), UINT8_C( 1), UINT8_C( 4),
1211
+ UINT8_C( 0), UINT8_C( 0), UINT8_C( 35), UINT8_C( 67),
1212
+ UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 95),
1213
+ UINT8_C(247), UINT8_C( 0), UINT8_C( 63), UINT8_C(116),
1214
+ UINT8_C( 19), UINT8_C( 23), UINT8_C( 20), UINT8_C( 4),
1215
+ UINT8_C( 0), UINT8_C( 19), UINT8_C( 1), UINT8_C( 4),
1216
+ UINT8_C( 0), UINT8_C( 16), UINT8_C( 98), UINT8_C( 29)) },
1217
+ { simde_x_mm256_set_epu8(UINT8_C(179), UINT8_C(197), UINT8_C(124), UINT8_C(228),
1218
+ UINT8_C(210), UINT8_C(205), UINT8_C(251), UINT8_C( 37),
1219
+ UINT8_C( 37), UINT8_C( 57), UINT8_C( 27), UINT8_C( 38),
1220
+ UINT8_C( 13), UINT8_C(212), UINT8_C(201), UINT8_C(125),
1221
+ UINT8_C( 84), UINT8_C(229), UINT8_C( 76), UINT8_C(128),
1222
+ UINT8_C(139), UINT8_C(203), UINT8_C(238), UINT8_C(218),
1223
+ UINT8_C( 40), UINT8_C( 95), UINT8_C(243), UINT8_C(110),
1224
+ UINT8_C( 74), UINT8_C( 0), UINT8_C(215), UINT8_C( 43)),
1225
+ simde_x_mm256_set_epu8(UINT8_C( 2), UINT8_C( 2), UINT8_C( 4), UINT8_C( 5),
1226
+ UINT8_C( 7), UINT8_C( 2), UINT8_C(195), UINT8_C( 2),
1227
+ UINT8_C( 30), UINT8_C( 1), UINT8_C( 9), UINT8_C( 24),
1228
+ UINT8_C( 6), UINT8_C( 7), UINT8_C( 28), UINT8_C( 58),
1229
+ UINT8_C( 3), UINT8_C( 77), UINT8_C( 90), UINT8_C( 51),
1230
+ UINT8_C( 13), UINT8_C( 12), UINT8_C( 7), UINT8_C( 91),
1231
+ UINT8_C(243), UINT8_C( 40), UINT8_C( 1), UINT8_C( 45),
1232
+ UINT8_C( 77), UINT8_C( 45), UINT8_C( 60), UINT8_C( 3)),
1233
+ simde_x_mm256_set_epu8(UINT8_C( 89), UINT8_C( 98), UINT8_C( 31), UINT8_C( 45),
1234
+ UINT8_C( 30), UINT8_C(102), UINT8_C( 1), UINT8_C( 18),
1235
+ UINT8_C( 1), UINT8_C( 57), UINT8_C( 3), UINT8_C( 1),
1236
+ UINT8_C( 2), UINT8_C( 30), UINT8_C( 7), UINT8_C( 2),
1237
+ UINT8_C( 28), UINT8_C( 2), UINT8_C( 0), UINT8_C( 2),
1238
+ UINT8_C( 10), UINT8_C( 16), UINT8_C( 34), UINT8_C( 2),
1239
+ UINT8_C( 0), UINT8_C( 2), UINT8_C(243), UINT8_C( 2),
1240
+ UINT8_C( 0), UINT8_C( 0), UINT8_C( 3), UINT8_C( 14)) }
1241
+ };
1242
+
1243
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1244
+ simde__m256i r = simde_mm256_div_epu8(test_vec[i].a, test_vec[i].b);
1245
+ simde_assert_m256i_u8(r, ==, test_vec[i].r);
1246
+ }
1247
+
1248
+ return MUNIT_OK;
1249
+ }
1250
+
1251
+ static MunitResult
1252
+ test_simde_mm256_div_epu16(const MunitParameter params[], void* data) {
1253
+ (void) params;
1254
+ (void) data;
1255
+
1256
+ const struct {
1257
+ simde__m256i a;
1258
+ simde__m256i b;
1259
+ simde__m256i r;
1260
+ } test_vec[8] = {
1261
+ { simde_x_mm256_set_epu16(UINT16_C( 50042), UINT16_C( 33648), UINT16_C( 7535), UINT16_C( 12279),
1262
+ UINT16_C( 36071), UINT16_C( 18107), UINT16_C( 48674), UINT16_C( 48206),
1263
+ UINT16_C( 9011), UINT16_C( 45275), UINT16_C( 7845), UINT16_C( 54048),
1264
+ UINT16_C( 27322), UINT16_C( 31657), UINT16_C( 43497), UINT16_C( 33598)),
1265
+ simde_x_mm256_set_epu16(UINT16_C( 12011), UINT16_C( 249), UINT16_C( 5), UINT16_C( 2),
1266
+ UINT16_C( 1870), UINT16_C( 2904), UINT16_C( 1530), UINT16_C( 42479),
1267
+ UINT16_C( 63442), UINT16_C( 1039), UINT16_C( 54), UINT16_C( 1),
1268
+ UINT16_C( 98), UINT16_C( 7948), UINT16_C( 2053), UINT16_C( 29)),
1269
+ simde_x_mm256_set_epu16(UINT16_C( 4), UINT16_C( 135), UINT16_C( 1507), UINT16_C( 6139),
1270
+ UINT16_C( 19), UINT16_C( 6), UINT16_C( 31), UINT16_C( 1),
1271
+ UINT16_C( 0), UINT16_C( 43), UINT16_C( 145), UINT16_C( 54048),
1272
+ UINT16_C( 278), UINT16_C( 3), UINT16_C( 21), UINT16_C( 1158)) },
1273
+ { simde_x_mm256_set_epu16(UINT16_C( 31411), UINT16_C( 55001), UINT16_C( 38051), UINT16_C( 20389),
1274
+ UINT16_C( 61351), UINT16_C( 22045), UINT16_C( 61939), UINT16_C( 10168),
1275
+ UINT16_C( 65482), UINT16_C( 32951), UINT16_C( 59114), UINT16_C( 9472),
1276
+ UINT16_C( 21787), UINT16_C( 1387), UINT16_C( 60519), UINT16_C( 39038)),
1277
+ simde_x_mm256_set_epu16(UINT16_C( 11771), UINT16_C( 1), UINT16_C( 490), UINT16_C( 32408),
1278
+ UINT16_C( 2225), UINT16_C( 134), UINT16_C( 13968), UINT16_C( 1),
1279
+ UINT16_C( 387), UINT16_C( 14591), UINT16_C( 24), UINT16_C( 46),
1280
+ UINT16_C( 8450), UINT16_C( 1053), UINT16_C( 908), UINT16_C( 5686)),
1281
+ simde_x_mm256_set_epu16(UINT16_C( 2), UINT16_C( 55001), UINT16_C( 77), UINT16_C( 0),
1282
+ UINT16_C( 27), UINT16_C( 164), UINT16_C( 4), UINT16_C( 10168),
1283
+ UINT16_C( 169), UINT16_C( 2), UINT16_C( 2463), UINT16_C( 205),
1284
+ UINT16_C( 2), UINT16_C( 1), UINT16_C( 66), UINT16_C( 6)) },
1285
+ { simde_x_mm256_set_epu16(UINT16_C( 22899), UINT16_C( 630), UINT16_C( 34558), UINT16_C( 7884),
1286
+ UINT16_C( 39724), UINT16_C( 33230), UINT16_C( 54475), UINT16_C( 22805),
1287
+ UINT16_C( 61755), UINT16_C( 34661), UINT16_C( 28373), UINT16_C( 58279),
1288
+ UINT16_C( 22187), UINT16_C( 56981), UINT16_C( 43877), UINT16_C( 3469)),
1289
+ simde_x_mm256_set_epu16(UINT16_C( 12306), UINT16_C( 182), UINT16_C( 29239), UINT16_C( 4194),
1290
+ UINT16_C( 818), UINT16_C( 16), UINT16_C( 5), UINT16_C( 38),
1291
+ UINT16_C( 42688), UINT16_C( 8), UINT16_C( 1), UINT16_C( 96),
1292
+ UINT16_C( 3), UINT16_C( 1), UINT16_C( 508), UINT16_C( 1)),
1293
+ simde_x_mm256_set_epu16(UINT16_C( 1), UINT16_C( 3), UINT16_C( 1), UINT16_C( 1),
1294
+ UINT16_C( 48), UINT16_C( 2076), UINT16_C( 10895), UINT16_C( 600),
1295
+ UINT16_C( 1), UINT16_C( 4332), UINT16_C( 28373), UINT16_C( 607),
1296
+ UINT16_C( 7395), UINT16_C( 56981), UINT16_C( 86), UINT16_C( 3469)) },
1297
+ { simde_x_mm256_set_epu16(UINT16_C( 29363), UINT16_C( 50584), UINT16_C( 56168), UINT16_C( 44370),
1298
+ UINT16_C( 62910), UINT16_C( 23255), UINT16_C( 39479), UINT16_C( 21044),
1299
+ UINT16_C( 7491), UINT16_C( 25737), UINT16_C( 6938), UINT16_C( 40142),
1300
+ UINT16_C( 22210), UINT16_C( 63545), UINT16_C( 33358), UINT16_C( 9014)),
1301
+ simde_x_mm256_set_epu16(UINT16_C( 61), UINT16_C( 274), UINT16_C( 365), UINT16_C( 58937),
1302
+ UINT16_C( 2), UINT16_C( 172), UINT16_C( 432), UINT16_C( 2),
1303
+ UINT16_C( 957), UINT16_C( 351), UINT16_C( 18), UINT16_C( 12717),
1304
+ UINT16_C( 4), UINT16_C( 417), UINT16_C( 1), UINT16_C( 10550)),
1305
+ simde_x_mm256_set_epu16(UINT16_C( 481), UINT16_C( 184), UINT16_C( 153), UINT16_C( 0),
1306
+ UINT16_C( 31455), UINT16_C( 135), UINT16_C( 91), UINT16_C( 10522),
1307
+ UINT16_C( 7), UINT16_C( 73), UINT16_C( 385), UINT16_C( 3),
1308
+ UINT16_C( 5552), UINT16_C( 152), UINT16_C( 33358), UINT16_C( 0)) },
1309
+ { simde_x_mm256_set_epu16(UINT16_C( 22208), UINT16_C( 58940), UINT16_C( 24739), UINT16_C( 29405),
1310
+ UINT16_C( 9863), UINT16_C( 41917), UINT16_C( 30045), UINT16_C( 40634),
1311
+ UINT16_C( 50211), UINT16_C( 4668), UINT16_C( 42314), UINT16_C( 29370),
1312
+ UINT16_C( 57744), UINT16_C( 37787), UINT16_C( 17171), UINT16_C( 34222)),
1313
+ simde_x_mm256_set_epu16(UINT16_C( 4256), UINT16_C( 23971), UINT16_C( 171), UINT16_C( 12),
1314
+ UINT16_C( 8070), UINT16_C( 2906), UINT16_C( 22), UINT16_C( 107),
1315
+ UINT16_C( 3), UINT16_C( 1), UINT16_C( 28355), UINT16_C( 2210),
1316
+ UINT16_C( 1), UINT16_C( 1161), UINT16_C( 613), UINT16_C( 51426)),
1317
+ simde_x_mm256_set_epu16(UINT16_C( 5), UINT16_C( 2), UINT16_C( 144), UINT16_C( 2450),
1318
+ UINT16_C( 1), UINT16_C( 14), UINT16_C( 1365), UINT16_C( 379),
1319
+ UINT16_C( 16737), UINT16_C( 4668), UINT16_C( 1), UINT16_C( 13),
1320
+ UINT16_C( 57744), UINT16_C( 32), UINT16_C( 28), UINT16_C( 0)) },
1321
+ { simde_x_mm256_set_epu16(UINT16_C( 9143), UINT16_C( 55963), UINT16_C( 46820), UINT16_C( 55354),
1322
+ UINT16_C( 21540), UINT16_C( 21596), UINT16_C( 49435), UINT16_C( 42142),
1323
+ UINT16_C( 28170), UINT16_C( 3714), UINT16_C( 39462), UINT16_C( 28043),
1324
+ UINT16_C( 45359), UINT16_C( 22609), UINT16_C( 55149), UINT16_C( 21886)),
1325
+ simde_x_mm256_set_epu16(UINT16_C( 3121), UINT16_C( 103), UINT16_C( 1), UINT16_C( 283),
1326
+ UINT16_C( 201), UINT16_C( 53), UINT16_C( 25996), UINT16_C( 3169),
1327
+ UINT16_C( 1), UINT16_C( 2), UINT16_C( 38), UINT16_C( 24),
1328
+ UINT16_C( 55), UINT16_C( 25444), UINT16_C( 5182), UINT16_C( 9)),
1329
+ simde_x_mm256_set_epu16(UINT16_C( 2), UINT16_C( 543), UINT16_C( 46820), UINT16_C( 195),
1330
+ UINT16_C( 107), UINT16_C( 407), UINT16_C( 1), UINT16_C( 13),
1331
+ UINT16_C( 28170), UINT16_C( 1857), UINT16_C( 1038), UINT16_C( 1168),
1332
+ UINT16_C( 824), UINT16_C( 0), UINT16_C( 10), UINT16_C( 2431)) },
1333
+ { simde_x_mm256_set_epu16(UINT16_C( 51894), UINT16_C( 1840), UINT16_C( 33552), UINT16_C( 50070),
1334
+ UINT16_C( 16848), UINT16_C( 13340), UINT16_C( 25356), UINT16_C( 34016),
1335
+ UINT16_C( 61275), UINT16_C( 22886), UINT16_C( 28292), UINT16_C( 37845),
1336
+ UINT16_C( 1481), UINT16_C( 559), UINT16_C( 12899), UINT16_C( 38851)),
1337
+ simde_x_mm256_set_epu16(UINT16_C( 16266), UINT16_C( 376), UINT16_C( 62048), UINT16_C( 8),
1338
+ UINT16_C( 53), UINT16_C( 1573), UINT16_C( 8), UINT16_C( 212),
1339
+ UINT16_C( 15505), UINT16_C( 1), UINT16_C( 10), UINT16_C( 2744),
1340
+ UINT16_C( 2), UINT16_C( 5), UINT16_C( 4478), UINT16_C( 12656)),
1341
+ simde_x_mm256_set_epu16(UINT16_C( 3), UINT16_C( 4), UINT16_C( 0), UINT16_C( 6258),
1342
+ UINT16_C( 317), UINT16_C( 8), UINT16_C( 3169), UINT16_C( 160),
1343
+ UINT16_C( 3), UINT16_C( 22886), UINT16_C( 2829), UINT16_C( 13),
1344
+ UINT16_C( 740), UINT16_C( 111), UINT16_C( 2), UINT16_C( 3)) },
1345
+ { simde_x_mm256_set_epu16(UINT16_C( 40946), UINT16_C( 11832), UINT16_C( 52869), UINT16_C( 41324),
1346
+ UINT16_C( 41064), UINT16_C( 57085), UINT16_C( 14204), UINT16_C( 23869),
1347
+ UINT16_C( 30467), UINT16_C( 20149), UINT16_C( 58844), UINT16_C( 49602),
1348
+ UINT16_C( 36092), UINT16_C( 39146), UINT16_C( 62840), UINT16_C( 19573)),
1349
+ simde_x_mm256_set_epu16(UINT16_C( 7725), UINT16_C( 5897), UINT16_C( 81), UINT16_C( 199),
1350
+ UINT16_C( 33008), UINT16_C( 55443), UINT16_C( 925), UINT16_C( 4043),
1351
+ UINT16_C( 362), UINT16_C( 156), UINT16_C( 2592), UINT16_C( 29),
1352
+ UINT16_C( 213), UINT16_C( 14), UINT16_C( 39), UINT16_C( 178)),
1353
+ simde_x_mm256_set_epu16(UINT16_C( 5), UINT16_C( 2), UINT16_C( 652), UINT16_C( 207),
1354
+ UINT16_C( 1), UINT16_C( 1), UINT16_C( 15), UINT16_C( 5),
1355
+ UINT16_C( 84), UINT16_C( 129), UINT16_C( 22), UINT16_C( 1710),
1356
+ UINT16_C( 169), UINT16_C( 2796), UINT16_C( 1611), UINT16_C( 109)) }
1357
+ };
1358
+
1359
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1360
+ simde__m256i r = simde_mm256_div_epu16(test_vec[i].a, test_vec[i].b);
1361
+ simde_assert_m256i_u16(r, ==, test_vec[i].r);
1362
+ }
1363
+
1364
+ return MUNIT_OK;
1365
+ }
1366
+
1367
+ static MunitResult
1368
+ test_simde_mm256_div_epu32(const MunitParameter params[], void* data) {
1369
+ (void) params;
1370
+ (void) data;
1371
+
1372
+ const struct {
1373
+ simde__m256i a;
1374
+ simde__m256i b;
1375
+ simde__m256i r;
1376
+ } test_vec[8] = {
1377
+ { simde_x_mm256_set_epu32(UINT32_C( 621216267), UINT32_C(2973447507), UINT32_C(1814279233), UINT32_C(3673557536),
1378
+ UINT32_C(4015780858), UINT32_C(1070914538), UINT32_C(2707640519), UINT32_C(3041291274)),
1379
+ simde_x_mm256_set_epu32(UINT32_C( 122731), UINT32_C( 51630147), UINT32_C( 152670), UINT32_C( 7731229),
1380
+ UINT32_C( 711400), UINT32_C( 1744981), UINT32_C( 164943127), UINT32_C( 169494)),
1381
+ simde_x_mm256_set_epu32(UINT32_C( 5061), UINT32_C( 57), UINT32_C( 11883), UINT32_C( 475),
1382
+ UINT32_C( 5644), UINT32_C( 613), UINT32_C( 16), UINT32_C( 17943)) },
1383
+ { simde_x_mm256_set_epu32(UINT32_C(1084014678), UINT32_C(1666523830), UINT32_C(3454667769), UINT32_C(4029614313),
1384
+ UINT32_C(3425016021), UINT32_C(2449839571), UINT32_C(1601532569), UINT32_C(1519388398)),
1385
+ simde_x_mm256_set_epu32(UINT32_C( 130157), UINT32_C( 5585515), UINT32_C( 62691231), UINT32_C( 37123),
1386
+ UINT32_C( 2515600), UINT32_C( 106484982), UINT32_C(4168501606), UINT32_C( 2781814)),
1387
+ simde_x_mm256_set_epu32(UINT32_C( 8328), UINT32_C( 298), UINT32_C( 55), UINT32_C( 108547),
1388
+ UINT32_C( 1361), UINT32_C( 23), UINT32_C( 0), UINT32_C( 546)) },
1389
+ { simde_x_mm256_set_epu32(UINT32_C(2187853776), UINT32_C( 131263503), UINT32_C( 20338031), UINT32_C(3062800456),
1390
+ UINT32_C(1802896354), UINT32_C( 22231847), UINT32_C(3438214155), UINT32_C(1776513196)),
1391
+ simde_x_mm256_set_epu32(UINT32_C( 28353115), UINT32_C( 92496104), UINT32_C( 15335526), UINT32_C( 99105532),
1392
+ UINT32_C( 5905009), UINT32_C( 27824), UINT32_C( 28986), UINT32_C( 12459911)),
1393
+ simde_x_mm256_set_epu32(UINT32_C( 77), UINT32_C( 1), UINT32_C( 1), UINT32_C( 30),
1394
+ UINT32_C( 305), UINT32_C( 799), UINT32_C( 118616), UINT32_C( 142)) },
1395
+ { simde_x_mm256_set_epu32(UINT32_C( 524596333), UINT32_C(3965897825), UINT32_C(1593754725), UINT32_C( 694203496),
1396
+ UINT32_C(1917650066), UINT32_C(2692610113), UINT32_C(1620259645), UINT32_C( 607116294)),
1397
+ simde_x_mm256_set_epu32(UINT32_C( 29757558), UINT32_C( 80117), UINT32_C( 412054571), UINT32_C( 878110),
1398
+ UINT32_C(4124070325), UINT32_C( 8250706), UINT32_C( 7930575), UINT32_C( 51813)),
1399
+ simde_x_mm256_set_epu32(UINT32_C( 17), UINT32_C( 49501), UINT32_C( 3), UINT32_C( 790),
1400
+ UINT32_C( 0), UINT32_C( 326), UINT32_C( 204), UINT32_C( 11717)) },
1401
+ { simde_x_mm256_set_epu32(UINT32_C( 625862951), UINT32_C( 793130310), UINT32_C(2489185635), UINT32_C(2468815203),
1402
+ UINT32_C(3079066921), UINT32_C( 802958712), UINT32_C(1537818066), UINT32_C(1678295724)),
1403
+ simde_x_mm256_set_epu32(UINT32_C( 8259237), UINT32_C( 229091), UINT32_C( 7899398), UINT32_C( 41009690),
1404
+ UINT32_C( 26030333), UINT32_C( 228627), UINT32_C(1200021710), UINT32_C( 186204)),
1405
+ simde_x_mm256_set_epu32(UINT32_C( 75), UINT32_C( 3462), UINT32_C( 315), UINT32_C( 60),
1406
+ UINT32_C( 118), UINT32_C( 3512), UINT32_C( 1), UINT32_C( 9013)) },
1407
+ { simde_x_mm256_set_epu32(UINT32_C(3334078645), UINT32_C(2226952893), UINT32_C(1901933944), UINT32_C(3456551705),
1408
+ UINT32_C(3394846076), UINT32_C(2592342753), UINT32_C(1822000161), UINT32_C(3060682219)),
1409
+ simde_x_mm256_set_epu32(UINT32_C( 55529), UINT32_C( 95077), UINT32_C( 61849330), UINT32_C( 77269),
1410
+ UINT32_C( 181901), UINT32_C( 66287), UINT32_C( 46407), UINT32_C( 1962)),
1411
+ simde_x_mm256_set_epu32(UINT32_C( 60042), UINT32_C( 23422), UINT32_C( 30), UINT32_C( 44734),
1412
+ UINT32_C( 18663), UINT32_C( 39107), UINT32_C( 39261), UINT32_C( 1559980)) },
1413
+ { simde_x_mm256_set_epu32(UINT32_C(2418478797), UINT32_C(3856569345), UINT32_C(2562700829), UINT32_C(2670510577),
1414
+ UINT32_C(3958231909), UINT32_C(3386864730), UINT32_C(2249491002), UINT32_C( 367242130)),
1415
+ simde_x_mm256_set_epu32(UINT32_C( 106591767), UINT32_C( 591565864), UINT32_C( 241208), UINT32_C( 384474),
1416
+ UINT32_C( 63569588), UINT32_C(1007016971), UINT32_C( 701090048), UINT32_C( 4482965)),
1417
+ simde_x_mm256_set_epu32(UINT32_C( 22), UINT32_C( 6), UINT32_C( 10624), UINT32_C( 6945),
1418
+ UINT32_C( 62), UINT32_C( 3), UINT32_C( 3), UINT32_C( 81)) },
1419
+ { simde_x_mm256_set_epu32(UINT32_C(3497551851), UINT32_C(3538232808), UINT32_C(3581222707), UINT32_C(2092274030),
1420
+ UINT32_C(1202922035), UINT32_C(3381143079), UINT32_C(1645890362), UINT32_C(2497764821)),
1421
+ simde_x_mm256_set_epu32(UINT32_C( 7255461), UINT32_C( 387871), UINT32_C( 216379987), UINT32_C( 1108325),
1422
+ UINT32_C( 9779926), UINT32_C( 265173482), UINT32_C( 305369), UINT32_C(1628979148)),
1423
+ simde_x_mm256_set_epu32(UINT32_C( 482), UINT32_C( 9122), UINT32_C( 16), UINT32_C( 1887),
1424
+ UINT32_C( 122), UINT32_C( 12), UINT32_C( 5389), UINT32_C( 1)) }
1425
+ };
1426
+
1427
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1428
+ simde__m256i r = simde_mm256_div_epu32(test_vec[i].a, test_vec[i].b);
1429
+ simde_assert_m256i_u32(r, ==, test_vec[i].r);
1430
+ }
1431
+
1432
+ return MUNIT_OK;
1433
+ }
1434
+
1435
+ static MunitResult
1436
+ test_simde_mm256_div_epu64(const MunitParameter params[], void* data) {
1437
+ (void) params;
1438
+ (void) data;
1439
+
1440
+ const struct {
1441
+ simde__m256i a;
1442
+ simde__m256i b;
1443
+ simde__m256i r;
1444
+ } test_vec[8] = {
1445
+ { simde_x_mm256_set_epu64x(UINT64_C(10385902570114433083), UINT64_C(14228451038995253976),
1446
+ UINT64_C( 3524803476344021799), UINT64_C( 9008088981795720991)),
1447
+ simde_x_mm256_set_epu64x(UINT64_C( 11435629647830), UINT64_C( 134705148152),
1448
+ UINT64_C( 1685), UINT64_C( 72468903699)),
1449
+ simde_x_mm256_set_epu64x(UINT64_C(18446744073708846728), UINT64_C(18446744073678236607),
1450
+ UINT64_C( 2091871499313959), UINT64_C( 124302818)) },
1451
+ { simde_x_mm256_set_epu64x(UINT64_C( 2776707612149100363), UINT64_C(15446686956822865619),
1452
+ UINT64_C( 8116027459326381863), UINT64_C(10577862568627142107)),
1453
+ simde_x_mm256_set_epu64x(UINT64_C( 160900), UINT64_C( 876),
1454
+ UINT64_C( 6656645), UINT64_C( 198)),
1455
+ simde_x_mm256_set_epu64x(UINT64_C( 17257349982281), UINT64_C(18443319350973379601),
1456
+ UINT64_C( 1219236936824), UINT64_C(18407002247926307124)) },
1457
+ { simde_x_mm256_set_epu64x(UINT64_C(17966513918331168112), UINT64_C(15404442576328540960),
1458
+ UINT64_C( 1544001744444053712), UINT64_C(12311626015854130554)),
1459
+ simde_x_mm256_set_epu64x(UINT64_C( 73453582701), UINT64_C( 2241703492778),
1460
+ UINT64_C( 149), UINT64_C( 1898802076338580)),
1461
+ simde_x_mm256_set_epu64x(UINT64_C(18446744073703013744), UINT64_C(18446744073708194478),
1462
+ UINT64_C( 10362427815060763), UINT64_C(18446744073709548385)) },
1463
+ { simde_x_mm256_set_epu64x(UINT64_C( 4996618049503500636), UINT64_C( 3587306346705364576),
1464
+ UINT64_C( 1416661578746677042), UINT64_C(18012200189266188151)),
1465
+ simde_x_mm256_set_epu64x(UINT64_C( 9141117518131), UINT64_C( 259684114065326460),
1466
+ UINT64_C( 3735868918), UINT64_C( 13028085907926)),
1467
+ simde_x_mm256_set_epu64x(UINT64_C( 546609), UINT64_C( 13),
1468
+ UINT64_C( 379205376), UINT64_C(18446744073709518262)) },
1469
+ { simde_x_mm256_set_epu64x(UINT64_C(17900245410321819662), UINT64_C( 86463307544105486),
1470
+ UINT64_C( 7004808110937624000), UINT64_C( 5352056724630121100)),
1471
+ simde_x_mm256_set_epu64x(UINT64_C( 574976069), UINT64_C( 26168849408611714),
1472
+ UINT64_C( 479458176), UINT64_C( 85883846687)),
1473
+ simde_x_mm256_set_epu64x(UINT64_C(18446744072759079601), UINT64_C( 3),
1474
+ UINT64_C( 14609841820), UINT64_C( 62317384)) },
1475
+ { simde_x_mm256_set_epu64x(UINT64_C(18191047755947595201), UINT64_C(11274709867061747164),
1476
+ UINT64_C( 4957427800472277352), UINT64_C( 2636046644056480855)),
1477
+ simde_x_mm256_set_epu64x(UINT64_C( 455513034), UINT64_C( 4176708352330988763),
1478
+ UINT64_C( 255407), UINT64_C( 77468887445572755)),
1479
+ simde_x_mm256_set_epu64x(UINT64_C(18446744073148214621), UINT64_C(18446744073709551615),
1480
+ UINT64_C( 19409913590748), UINT64_C( 34)) },
1481
+ { simde_x_mm256_set_epu64x(UINT64_C(17236629464649076584), UINT64_C( 6716520602983844465),
1482
+ UINT64_C(12794135593178656259), UINT64_C( 3865374743078695737)),
1483
+ simde_x_mm256_set_epu64x(UINT64_C( 13893724010244), UINT64_C( 1),
1484
+ UINT64_C( 142890905), UINT64_C( 135073488234)),
1485
+ simde_x_mm256_set_epu64x(UINT64_C(18446744073709464519), UINT64_C( 6716520602983844465),
1486
+ UINT64_C(18446744034150641408), UINT64_C( 28616827)) },
1487
+ { simde_x_mm256_set_epu64x(UINT64_C( 3248934010021333275), UINT64_C( 8464322280604302303),
1488
+ UINT64_C(10783963704762759650), UINT64_C(14288989654597257942)),
1489
+ simde_x_mm256_set_epu64x(UINT64_C( 37187973814779), UINT64_C( 988730192),
1490
+ UINT64_C( 9409064941619), UINT64_C( 554649997)),
1491
+ simde_x_mm256_set_epu64x(UINT64_C( 87365), UINT64_C( 8560800862),
1492
+ UINT64_C(18446744073708737212), UINT64_C(18446744066213374853)) }
1493
+ };
1494
+
1495
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1496
+ simde__m256i r = simde_mm256_div_epi64(test_vec[i].a, test_vec[i].b);
1497
+ simde_assert_m256i_i64(r, ==, test_vec[i].r);
1498
+ }
1499
+
1500
+ return MUNIT_OK;
1501
+ }
1502
+
1503
+ #endif /* defined(SIMDE_SVML_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
1504
+
1505
+ HEDLEY_DIAGNOSTIC_PUSH
1506
+ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
1507
+
1508
+ static MunitTest test_suite_tests[] = {
1509
+ #if defined(SIMDE_SVML_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
1510
+ SIMDE_TESTS_DEFINE_TEST(mm_div_epi8),
1511
+ SIMDE_TESTS_DEFINE_TEST(mm_div_epi16),
1512
+ SIMDE_TESTS_DEFINE_TEST(mm_div_epi32),
1513
+ SIMDE_TESTS_DEFINE_TEST(mm_div_epi64),
1514
+ SIMDE_TESTS_DEFINE_TEST(mm_div_epu8),
1515
+ SIMDE_TESTS_DEFINE_TEST(mm_div_epu16),
1516
+ SIMDE_TESTS_DEFINE_TEST(mm_div_epu32),
1517
+ SIMDE_TESTS_DEFINE_TEST(mm_div_epu64),
1518
+ SIMDE_TESTS_DEFINE_TEST(mm256_div_epi8),
1519
+ SIMDE_TESTS_DEFINE_TEST(mm256_div_epi16),
1520
+ SIMDE_TESTS_DEFINE_TEST(mm256_div_epi32),
1521
+ SIMDE_TESTS_DEFINE_TEST(mm256_div_epi64),
1522
+ SIMDE_TESTS_DEFINE_TEST(mm256_div_epu8),
1523
+ SIMDE_TESTS_DEFINE_TEST(mm256_div_epu16),
1524
+ SIMDE_TESTS_DEFINE_TEST(mm256_div_epu32),
1525
+ SIMDE_TESTS_DEFINE_TEST(mm256_div_epu64),
1526
+ #endif /* defined(SIMDE_SVML_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
1527
+
1528
+ { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
1529
+ };
1530
+
1531
+ HEDLEY_C_DECL MunitSuite* SIMDE_TESTS_GENERATE_SYMBOL(suite)(void) {
1532
+ static MunitSuite suite = { (char*) "/" HEDLEY_STRINGIFY(SIMDE_TESTS_CURRENT_ISAX), test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
1533
+
1534
+ return &suite;
1535
+ }
1536
+
1537
+ #if defined(SIMDE_TESTS_SINGLE_ISAX)
1538
+ int main(int argc, char* argv[HEDLEY_ARRAY_PARAM(argc + 1)]) {
1539
+ static MunitSuite suite = { "", test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
1540
+
1541
+ return munit_suite_main(&suite, NULL, argc, argv);
1542
+ }
1543
+ #endif /* defined(SIMDE_TESTS_SINGLE_ISAX) */
1544
+
1545
+ HEDLEY_DIAGNOSTIC_POP