minimap2 0.2.24.3 → 0.2.24.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
  3. data/ext/minimap2/lib/simde/COPYING +20 -0
  4. data/ext/minimap2/lib/simde/README.md +333 -0
  5. data/ext/minimap2/lib/simde/amalgamate.py +58 -0
  6. data/ext/minimap2/lib/simde/meson.build +33 -0
  7. data/ext/minimap2/lib/simde/netlify.toml +20 -0
  8. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
  9. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
  10. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
  11. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
  12. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
  13. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
  14. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
  15. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
  16. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
  17. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
  18. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
  19. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
  20. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
  21. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
  22. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
  23. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
  24. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
  25. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
  26. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
  27. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
  28. data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
  29. data/ext/minimap2/lib/simde/simde/check.h +267 -0
  30. data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
  31. data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
  32. data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
  33. data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
  34. data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
  35. data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
  36. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
  37. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
  38. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
  39. data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
  40. data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
  41. data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
  42. data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
  43. data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
  44. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
  45. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
  46. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
  47. data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
  48. data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
  49. data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
  50. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
  51. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
  52. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
  53. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
  54. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
  55. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
  56. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
  57. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
  58. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
  59. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
  60. data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
  61. data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
  62. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
  63. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
  64. data/ext/minimap2/lib/simde/test/meson.build +64 -0
  65. data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
  66. data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
  67. data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
  68. data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
  69. data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
  70. data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
  71. data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
  72. data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
  73. data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
  74. data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
  75. data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
  76. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
  77. data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
  78. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
  79. data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
  80. data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
  81. data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
  82. data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
  83. data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
  84. data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
  85. data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
  86. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
  87. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
  88. data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
  89. data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
  90. data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
  91. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
  92. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
  93. data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
  94. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
  95. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
  96. data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
  97. data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
  98. data/lib/minimap2/aligner.rb +2 -2
  99. data/lib/minimap2/ffi/constants.rb +3 -0
  100. data/lib/minimap2/version.rb +1 -1
  101. metadata +99 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bd42b0827b439d629ee498b2030da81f674f3682a818fafcb0987093bdb49e62
4
- data.tar.gz: fad5d4e9df7969c828afbdb1cb1a881b23aef2d0d0b61b124f1f8faa8cc3182a
3
+ metadata.gz: 5a1518fb8048d902865ee6771fedc9cb9b42693c984ec6616c2d1407597a9f26
4
+ data.tar.gz: 83501abbd0cedb037edb96c6a630b7c93adff87597950ef03359be41dad0710a
5
5
  SHA512:
6
- metadata.gz: 7866ba8829f4e8840ce752ae86e5c3fb1b4697e6ad538cbe80c09d93238f9a4cf59c22e52f23d3349eecefcba50bc1d610968c81db2ae0def98984dd83fecc8a
7
- data.tar.gz: 3609844781d7d9675175a791e2b20010d95e183920d891d0eceb924c6c408bfad7b2240659f1c3d07ce8eec6f2600830f23415333e6ca630c4df0a4b8ba39f3f
6
+ metadata.gz: d62d29fd4f5f8254f7fdaa9943cb1141a1fb2b832c33b30568fe659c6b4299a4827f596806c0d38368af53c63a48f1e1dcb2fe9562be7534fa3b1697a8b6b3de
7
+ data.tar.gz: 86e8179090acf840d615b3172fd1aa92e39c3b106c7629d079dfc473bb1a81180a922c13a19e60e41b54c6fda37534c6e61d9107f756bb87410ffa3fd87b93ce
@@ -0,0 +1,114 @@
1
+ # Contributing to SIMDe
2
+
3
+ First off, if you're even reading this, thank you! There is a lot of
4
+ work to do, and any help is appreciated.
5
+
6
+ If you haven't already, please read the
7
+ [README](https://github.com/nemequ/simde/blob/master/README.md). The
8
+ [wiki](https://github.com/nemequ/simde/wiki) also has some good
9
+ information, especially the
10
+ [FAQ](https://github.com/nemequ/simde/wiki/FAQ) and a guide on how to
11
+ [implement a new
12
+ function](https://github.com/nemequ/simde/wiki/Implementing-a-New-Function).
13
+
14
+ For information on developing for architectures you don't have access
15
+ to, please see the [Development
16
+ Environment](https://github.com/nemequ/simde/wiki/Development-Environment)
17
+ page on the wiki.
18
+
19
+ If you still have questions, or if anything below doesn't make sense
20
+ to you, please feel free to use the [issue
21
+ tracker](https://github.com/nemequ/simde/issues) or the [mailing
22
+ list](https://groups.google.com/forum/#!forum/simde) to ask. I know
23
+ the SIMDe documentation needs a lot of improvement, and asking
24
+ questions will help us understand what is missing, so please don't be
25
+ shy!
26
+
27
+ ## Building the Tests
28
+
29
+ SIMDe contains an extensive test suite used for development. Most
30
+ users will never need to build the suite, but if you're contributing
31
+ code to SIMDe you'll need to build them.
32
+
33
+ Here is the basic procedure for compiling the tests:
34
+
35
+ ```bash
36
+ mkdir test/build
37
+ cd test/build
38
+ CFLAGS="-march=native" CXXFLAGS="-march=native" cmake ..
39
+ make -j
40
+ ./run-tests
41
+ ```
42
+
43
+ Note that `-march=native` may not be the right flag for your compiler.
44
+ That should work for most compilers on x86/x86_64, though MSVC is an
45
+ exception (try `/arch:AVX2` instead of `-march=native`). On other
46
+ architectures please consult your compiler documentation to find out
47
+ what flags you should use to enable the SIMD extension for your target
48
+ platform. Here are a few to try:
49
+
50
+ * ARM:
51
+ * `-march=armv8-a+simd` (for ARMv8)
52
+ * `-march=armv7-a -mfpu=neon` (for ARMv7)
53
+ * POWER
54
+ * `-mcpu=native`
55
+
56
+ If you need a flag not listed above, please let us know so we can add
57
+ it to the list.
58
+
59
+ ## Coding Style
60
+
61
+ SIMDe uses two spaces for indentation. Please adjust your editor
62
+ accordingly.
63
+
64
+ The coding style for preprocessor macros is a bit mixed since I made
65
+ some changes mid-project. For new code, please indent the
66
+ preprocessor conditions before the hash to the same level as the
67
+ normal code would be, and indent the code inside of preprocessor
68
+ conditions as if the conditions were normal code. For example:
69
+
70
+ ```c
71
+ int
72
+ foo(void) {
73
+ #if 1
74
+ bar();
75
+ #else
76
+ baz();
77
+ #endif
78
+ }
79
+ ```
80
+
81
+ Other than that, please just try to follow the existing style. We'll
82
+ add new rules here as the need arises.
83
+
84
+ ## Commit Messages
85
+
86
+ Git commit messages should contain lines no longer than 72 characters.
87
+ The first line should always be a one-line summary of the commit, with
88
+ the relevant component followed by a colon and a space (if
89
+ applicable), then the summary.
90
+
91
+ If the one-line summary is insufficient to fully describe the changes
92
+ further descriptive paragraphs should be added, separated by blank
93
+ lines.
94
+
95
+ For example:
96
+
97
+ ```
98
+ sse: add magical code to make everything go fast
99
+
100
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur
101
+ interdum scelerisque risus non ultricies. Vivamus id tristique mauris.
102
+ Mauris et augue vel urna aliquam posuere. Morbi in sem nec ante
103
+ ullamcorper ultrices in quis nibh. In felis velit, semper a mauris sed,
104
+ congue ullamcorper enim. Vestibulum ante ipsum primis in faucibus orci
105
+ luctus et ultrices posuere cubilia Curae; Nulla facilisi. Donec
106
+ scelerisque elit dui, et aliquet magna tincidunt eu.
107
+
108
+ Curabitur rhoncus lacus ac elit pulvinar, quis posuere ante ultrices.
109
+ Suspendisse at mauris vitae justo pretium tempor et in mauris. Nunc
110
+ facilisis nulla a ante tincidunt, imperdiet rhoncus metus interdum.
111
+ Vivamus sed nunc vel tellus porta consequat. Donec quis porttitor elit,
112
+ et cursus urna. Donec et sapien lorem. In imperdiet magna at aliquet
113
+ hendrerit.
114
+ ```
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2017 Evan Nemerson <evan@nemerson.com>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,333 @@
1
+ # SIMD Everywhere
2
+ [![Gitter chat](https://badges.gitter.im/gitterHQ/gitter.png)](https://gitter.im/simd-everywhere/community)
3
+
4
+ The SIMDe header-only library provides fast, portable implementations of
5
+ [SIMD intrinsics](https://en.wikipedia.org/wiki/SIMD) on hardware which
6
+ doesn't natively support them, such as calling [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions)
7
+ functions on ARM. There is no performance penalty if the hardware
8
+ supports the native implementation (*e.g.*, SSE/[AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions)
9
+ runs at full speed on [x86](https://en.wikipedia.org/wiki/X86),
10
+ [NEON](https://en.wikipedia.org/wiki/ARM_architecture#Advanced_SIMD_(Neon)) on [ARM](https://en.wikipedia.org/wiki/ARM_architecture),
11
+ *etc.*).
12
+
13
+ This makes porting code to other architectures much easier in a few
14
+ key ways:
15
+
16
+ First, instead of forcing you to rewrite everything for each
17
+ architecture, SIMDe lets you get a port up and running almost
18
+ effortlessly. You can then start working on switching the most
19
+ performance-critical sections to native intrinsics, improving
20
+ performance gradually. SIMDe lets (for example) SSE/AVX and NEON code
21
+ exist side-by-side, in the same implementation.
22
+
23
+ Second, SIMDe makes it easier to write code targeting [ISA](https://en.wikipedia.org/wiki/Instruction_set_architecture)
24
+ extensions you don't have convenient access to. You can run NEON code on your
25
+ x86 machine *without an emulator*. Obviously you'll eventually want
26
+ to test on the actual hardware you're targeting, but for most
27
+ development, SIMDe can provide a much easier path.
28
+
29
+ SIMDe takes a very different approach from most other SIMD abstraction
30
+ layers in that it aims to expose the entire functionality of the
31
+ underlying instruction set. Instead of limiting functionality to the
32
+ lowest common denominator, SIMDe tries to minimize the amount of
33
+ effort required to port while still allowing you the space to optimize
34
+ as needed.
35
+
36
+ The current focus is on writing complete portable implementations,
37
+ though a large number of functions already have accelerated
38
+ implementations using one (or more) of the following:
39
+
40
+ * SIMD intrinsics from other ISA extensions (e.g., using NEON to
41
+ implement SSE).
42
+ * Compiler-specific vector extensions and built-ins such as
43
+ [`__builtin_shufflevector`](http://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-shufflevector)
44
+ and
45
+ [`__builtin_convertvector`](http://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-convertvector)
46
+ * Compiler auto-vectorization hints, using:
47
+ * [OpenMP 4 SIMD](http://www.openmp.org/)
48
+ * [Cilk Plus](https://www.cilkplus.org/)
49
+ * [GCC loop-specific pragmas](https://gcc.gnu.org/onlinedocs/gcc/Loop-Specific-Pragmas.html)
50
+ * [clang pragma loop hint directives](http://llvm.org/docs/Vectorizers.html#pragma-loop-hint-directives)
51
+
52
+ For an example of a project using SIMDe, see
53
+ [LZSSE-SIMDe](https://github.com/nemequ/LZSSE-SIMDe).
54
+
55
+ You can [try SIMDe online](https://simde.netlify.com/godbolt/demo)
56
+ using Compiler Explorer and an amalgamated SIMDe header.
57
+
58
+ If you have any questions, please feel free to use the
59
+ [issue tracker](https://github.com/nemequ/simde/issues) or the
60
+ [mailing list](https://groups.google.com/forum/#!forum/simde).
61
+
62
+ ## Current Status
63
+
64
+ There are currently complete implementations of the following instruction
65
+ sets:
66
+
67
+ * [MMX](https://en.wikipedia.org/wiki/MMX_(instruction_set))
68
+ * [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions)
69
+ * [SSE2](https://en.wikipedia.org/wiki/SSE2)
70
+ * [SSE3](https://en.wikipedia.org/wiki/SSE3)
71
+ * [SSSE3](https://en.wikipedia.org/wiki/SSSE3)
72
+ * [SSE4.1](https://en.wikipedia.org/wiki/SSE4#SSE4.1)
73
+ * [AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions)
74
+ * [FMA](https://en.wikipedia.org/wiki/FMA_instruction_set)
75
+
76
+ As well as partial support for many others; see the
77
+ [instruction-set-support](https://github.com/nemequ/simde/issues?q=is%3Aissue+is%3Aopen+label%3Ainstruction-set-support+sort%3Aupdated-desc)
78
+ label in the issue tracker for details on progress. If you'd like to
79
+ be notified when an instruction set is available you may subscribe to
80
+ the relevant issue.
81
+
82
+ If you have a project you're interested in using with SIMDe but we
83
+ don't yet support all the functions you need, please file an issue
84
+ with a list of what's missing so we know what to prioritize.
85
+
86
+ The `master` branch is protected so commits never reach it unless
87
+ they have passed extensive CI checks. Status badges don't really
88
+ make sense since they will always be green, but here are the links:
89
+
90
+ * [Travis CI](https://travis-ci.org/nemequ/simde)
91
+ * [AppVeyor](https://ci.appveyor.com/project/quixdb/simde)
92
+ * [GitHub Actions](https://github.com/nemequ/simde/actions)
93
+ * [Azure Pipelines](https://dev.azure.com/simd-everywhere/SIMDe/_build)
94
+ * [Drone CI](https://cloud.drone.io/nemequ/simde)
95
+
96
+ ## Contributing
97
+
98
+ First off, if you're reading this: thank you! Even considering
99
+ contributing to SIMDe is very much appreciated!
100
+
101
+ SIMDe is a fairly large undertaking; there are a *lot* of functions to
102
+ get through and a lot of opportunities for optimization on different
103
+ platforms, so we're very happy for any help you can provide.
104
+
105
+ Programmers of all skill levels are welcome, there are lots of tasks
106
+ which are pretty straightforward and don't require any special
107
+ expertise.
108
+
109
+ If you're not sure how you'd like to contribute, please consider taking
110
+ a look at [the issue tracker](https://github.com/nemequ/simde/issues).
111
+ There is a [good first issue](https://github.com/nemequ/simde/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
112
+ tag if you want to ease into a your first contributions, but if you're
113
+ interested in something else please get in touch via the issue tracker;
114
+ we're happy to help you get a handle on whatever you are interested in.
115
+
116
+ If you're interested in implementing currently unimplemented functions,
117
+ there is [a
118
+ guide](https://github.com/nemequ/simde/wiki/Implementing-a-New-Function)
119
+ explaining how to add new functions and how to quickly and easily get
120
+ a test case in place. It's a bit rough right now, but if anything is
121
+ unclear please feel free to use the issue tracker to ask about
122
+ anything you're not clear on.
123
+
124
+ ## Usage
125
+
126
+ First, it is important to note that *you do not need two separate
127
+ versions* (one using SIMDe, the other native). If the native functions
128
+ are available SIMDe will use them, and compilers easily optimize away
129
+ any overhead from SIMDe; all they have to do is some basic inlining.
130
+ `-O2` should be enough, but we strongly recommend `-O3` (or whatever
131
+ flag instructs your compiler to aggressizely optimize) since many of
132
+ the portable fallbacks are substantially faster with aggressive
133
+ auto-vectorization that isn't enabled at lower optimization levels.
134
+
135
+ Each instruction set has a separate file; `x86/mmx.h` for MMX,
136
+ `x86/sse.h` for SSE, `x86/sse2.h` for SSE2, and so on. Just include
137
+ the header for whichever instruction set(s) you want, and SIMDe will
138
+ provide the fastest implementation it can given which extensions
139
+ you've enabled in your compiler (i.e., if you want to use NEON to
140
+ implement SSE, you may need to pass something like `-mfpu=neon`
141
+ or `-march=armv8-a+simd`. See
142
+ [GCC ARM-Options](https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html)
143
+ for more information).
144
+
145
+ If you define `SIMDE_ENABLE_NATIVE_ALIASES` before including SIMDe
146
+ you can use the same names as the native functions. Unfortunately,
147
+ this is somewhat error-prone due to portability issues in the APIs, so
148
+ it's recommended to only do this for testing. When
149
+ `SIMDE_ENABLE_NATIVE_ALIASES` is undefined only the versions prefixed
150
+ with `simde_` will be available; for example, the MMX `_mm_add_pi8`
151
+ intrinsic becomes `simde_mm_add_pi8`, and `__m64` becomes `simde__m64`.
152
+
153
+ Since SIMDe is meant to be portable, many functions which assume types
154
+ are of a specific size have been altered to use fixed-width types
155
+ instead. For example, Intel's APIs use `char` for signed 8-bit
156
+ integers, but `char` on ARM is generally unsigned. SIMDe uses `int8_t`
157
+ to make the API portable, but that means your code may require some
158
+ minor changes (such as using `int8_t` instead of `char`) to work on
159
+ other platforms.
160
+
161
+ That said, the changes are usually quite minor. It's often enough to
162
+ just use search and replace, manual changes are required pretty
163
+ infrequently.
164
+
165
+ For best performance, in addition to `-O3` (or whatever your compiler's
166
+ equivalent is), you should enable OpenMP 4 SIMD support by defining
167
+ `SIMDE_ENABLE_OPENMP` before including any SIMDe headers, and
168
+ enabling OpenMP support in your compiler. GCC and ICC both support a
169
+ flag to enable only OpenMP SIMD support instead of full OpenMP (the OpenMP
170
+ SIMD support doesn't require the OpenMP run-time library); for GCC the
171
+ flag is `-fopenmp-simd`, for ICC `-qopenmp-simd`. SIMDe also supports
172
+ using [Cilk Plus](https://www.cilkplus.org/), [GCC loop-specific
173
+ pragmas](https://gcc.gnu.org/onlinedocs/gcc/Loop-Specific-Pragmas.html),
174
+ or [clang pragma loop hint
175
+ directives](http://llvm.org/docs/Vectorizers.html#pragma-loop-hint-directives),
176
+ though these are not nearly as effective as OpenMP SIMD and depending
177
+ on them will likely result in less efficient code.
178
+
179
+ ## Portability
180
+
181
+ ### Compilers
182
+
183
+ SIMDe does depend on some C99 features, though the subset supported by
184
+ MSVC also works. While we do our best to make sure we provide optimized
185
+ implementations where they are supported, SIMDe does contain portable
186
+ fallbacks which are designed to work on any C99 compiler.
187
+
188
+ Every commit is tested in CI on multiple compilers, platforms, and
189
+ configurations, and our test coverage is extremely extensive.
190
+ Currently tested compilers include:
191
+
192
+ * GCC versions back to 4.8
193
+ * Clang versions back to 7
194
+ * Microsoft Visual Studio back to 12 (2013)
195
+ * IBM XL C/C++
196
+ * Intel C/C++ Compiler (ICC)
197
+ * PGI C Compiler
198
+
199
+ I'm generally willing to accept patches to add support for other
200
+ compilers, as long as they're not too disruptive, *especially* if we
201
+ can get CI support going. We currently use Travis CI, AppVeyor, and
202
+ Microsoft Azure Pipelines, but other CI platforms can be added as
203
+ necessary.
204
+
205
+ ### Hardware
206
+
207
+ The following architectures are tested in CI for every commit:
208
+
209
+ * x86_64
210
+ * x86
211
+ * AArch64
212
+ * ARMv8
213
+ * ARMv7
214
+ * PPC64
215
+ * MIPS
216
+
217
+ We would love to add more, so patches are extremely welcome!
218
+
219
+ The tests currently contain some assumptions that they are running on
220
+ a little-endian CPU. We're working on this, but for now big endian
221
+ CPUs *should* work, but we can't promise.
222
+
223
+ ## Related Projects
224
+
225
+ * The "builtins" module in
226
+ [portable-snippets](https://github.com/nemequ/portable-snippets)
227
+ does much the same thing, but for compiler-specific intrinsics
228
+ (think `__builtin_clz` and `_BitScanForward`), **not** SIMD
229
+ intrinsics.
230
+ * Intel offers an emulator, the [Intel® Software Development
231
+ Emulator](https://software.intel.com/en-us/articles/intel-software-development-emulator/)
232
+ which can be used to develop software which uses Intel intrinsics
233
+ without having to own hardware which supports them, though it
234
+ doesn't help for deployment.
235
+ * [Iris](https://github.com/AlexYaruki/iris) is the only other project
236
+ I'm aware of which is attempting to create portable implementations
237
+ like SIMDe. SIMDe is much further along on the Intel side, but Iris
238
+ looks to be in better shape on ARM. C++-only, Apache 2.0 license.
239
+ AFAICT there are no accelerated fallbacks, nor is there a good way to
240
+ add them since it relies extensively on templates.
241
+ * There are a few projects trying to implement one set with another:
242
+ * [ARM_NEON_2_x86_SSE](https://github.com/intel/ARM_NEON_2_x86_SSE)
243
+ — implementing NEON using SSE. Quite extensive, Apache 2.0
244
+ license.
245
+ * [sse2neon](https://github.com/jratcliff63367/sse2neon) —
246
+ implementing SSE using NEON. This code has already been merged
247
+ into SIMDe.
248
+ * [veclib](https://github.com/IvantheDugtrio/veclib) — implementing
249
+ SSE2 using AltiVec/VMX, using a non-free IBM library called
250
+ [powerveclib](https://www.ibm.com/developerworks/community/groups/community/powerveclib/)
251
+ * [SSE-to-NEON](https://github.com/otim/SSE-to-NEON) — implementing
252
+ SSE with NEON. Non-free, C++.
253
+ * [arm-neon-tests](https://github.com/christophe-lyon/arm-neon-tests)
254
+ contains tests te verify NEON implementations.
255
+
256
+ If you know of any other related projects, please [let us
257
+ know](https://github.com/nemequ/simde/issues/new)!
258
+
259
+ ## Caveats
260
+
261
+ Sometime features can't be emulated. If SIMDe is operating in native
262
+ mode the functions will work as expected, but if there is no native
263
+ support some caveats apply:
264
+
265
+ * Many functions require <math.h> and/or <fenv.h>. SIMDe will still
266
+ work without those headers, but the results of those functions are
267
+ undefined.
268
+ * x86 / x86_64
269
+ * SSE
270
+ * `SIMDE_MM_SET_ROUNDING_MODE()` will use `fesetround()`, altering
271
+ the global rounding mode.
272
+ * `simde_mm_getcsr` and `simde_mm_setcsr` only implement bits 13
273
+ and 14 (rounding mode).
274
+ * AVX
275
+ * `simde_mm256_test*` do not set the CF/ZF registers as there is
276
+ no portable way to implement that functionality.
277
+ * `simde_mm256_zeroall` and `simde_mm256_zeroupper` are not
278
+ implemented as there is no portable way to implement that
279
+ functionality.
280
+
281
+ Additionally, there are some known limitations which apply when using
282
+ native aliases (`SIMDE_ENABLE_NATIVE_ALIASES`):
283
+
284
+ * On Windows x86 (but not x86_64), some MMX functions and SSE/SSE2
285
+ functions which use MMX types (__m64) other than for pointers may
286
+ return incorrect results.
287
+
288
+ Also, as mentioned earlier, while some APIs make assumptions about
289
+ basic types (*e.g.*, `int` is 32 bits), SIMDe does not, so many types
290
+ have been altered to use portable fixed-width versions such as
291
+ `int32_t`.
292
+
293
+ If you find any other differences, please file an issue so we can either fix
294
+ it or add it to the list above.
295
+
296
+ ## Benefactors
297
+
298
+ SIMDe uses resources provided for free by a number of organizations.
299
+ While this shouldn't be taken to imply endorsement of SIMDe, we're
300
+ tremendously grateful for their support:
301
+
302
+ * [GitHub](https://github.com/) — hosts our source repository, issue
303
+ tracker, etc.
304
+ * [Travis CI](https://travis-ci.org/) — provides CI testing on
305
+ numerous platforms.
306
+ * [AppVeyor](https://www.appveyor.com/) — provides CI testing on
307
+ Windows.
308
+ * [Drone CI](https://drone.io/) — provides CI testing on ARM 32 bits
309
+ platform, etc.
310
+ * [IntegriCloud](https://integricloud.com/) — provides access to a very
311
+ fast POWER9 server for developing AltiVec/VMX support.
312
+ * [GCC Compile Farm](https://gcc.gnu.org/wiki/CompileFarm) — provides
313
+ access to a wide range of machines with different architectures for
314
+ developing support for various ISA extensions.
315
+ * [CodeCov.io](https://codecov.io/) — provides code coverage analysis
316
+ for our test cases.
317
+
318
+ Without such organizations donating resources, SIMDe wouldn't be nearly
319
+ as useful or usable as it is today.
320
+
321
+ We would also like to thank anyone who has helped develop the myriad
322
+ of software on which SIMDe relies, including compilers and analysis
323
+ tools.
324
+
325
+ Finally, a special thank you to
326
+ [anyone who has contributed](https://github.com/nemequ/simde/graphs/contributors)
327
+ to SIMDe, filed bugs, provided suggestions, or helped with SIMDe
328
+ development in any way.
329
+
330
+ ## License
331
+
332
+ SIMDe is distributed under an MIT-style license; see COPYING for
333
+ details.
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/python3
2
+
3
+ # amalgamate.py
4
+ # Written by Evan Nemerson <evan@nemerson.com>
5
+ #
6
+ # To the extent possible under law, the author(s) have dedicated all
7
+ # copyright and related and neighboring rights to this software to
8
+ # the public domain worldwide. This software is distributed without
9
+ # any warranty.
10
+ #
11
+ # For details, see <http://creativecommons.org/publicdomain/zero/1.0/>.
12
+ # SPDX-License-Identifier: CC0-1.0
13
+
14
+ # Quick and dirty script to amalgamate C into a single file. Includes
15
+ # using angle brackets (#include <foo.h>) will be preserved, but for
16
+ # includes using double quotes (#include "foo.h") the file will be
17
+ # included by this script.
18
+ #
19
+ # If you make any improvements please report them in the SIMDe issue
20
+ # tracker at <https://github.com/nemequ/simde/issues> or directly to
21
+ # the author so they can be merged back into the original version.
22
+
23
+ import sys, re, os, subprocess
24
+
25
+ amalgamate_include = re.compile('^\\s*#\\s*include\\s+\\"([^)]+)\\"\\s$')
26
+ already_included = []
27
+
28
+ def amalgamate(filename, stream):
29
+ full_path = os.path.realpath(os.path.realpath(filename))
30
+ srcdir = os.path.dirname(full_path)
31
+
32
+ if full_path not in already_included:
33
+ already_included.insert(-1, full_path)
34
+ with open(filename) as input_file:
35
+ stream.write('/* :: Begin ' + os.path.relpath(full_path) + ' :: */\n')
36
+
37
+ for source_line in input_file:
38
+ a9e_inc_m = amalgamate_include.match(source_line)
39
+ if a9e_inc_m:
40
+ amalgamate(os.path.join(srcdir, a9e_inc_m.group(1)), stream)
41
+ else:
42
+ stream.write(source_line)
43
+
44
+ stream.write('/* :: End ' + os.path.relpath(full_path) + ' :: */\n')
45
+
46
+ if len(sys.argv) != 2:
47
+ sys.stderr.write("USAGE: " + sys.argv[0] + ' SOURCE_FILE\n\n')
48
+ sys.stderr.write("This will print a copy of $SOURCE_FILE to stdout, while replacing\n")
49
+ sys.stderr.write("all '#include AMALGAMATE(file)' lines with copies of file.\n")
50
+
51
+ sys.exit(1)
52
+
53
+ print('/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */')
54
+
55
+ git_id = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode().strip()
56
+ print("/* {:s} */".format(git_id))
57
+
58
+ amalgamate(sys.argv[1], sys.stdout)
@@ -0,0 +1,33 @@
1
+ project('SIMDe', 'c', 'cpp',
2
+ default_options: ['c_std=c99'],
3
+ license: 'MIT',
4
+ version: '0.5.0')
5
+
6
+ cc = meson.get_compiler('c')
7
+ cxx = meson.get_compiler('cpp')
8
+
9
+ subdir('test')
10
+
11
+ install_headers(
12
+ [
13
+ 'simde/hedley.h',
14
+ 'simde/check.h',
15
+ 'simde/debug-trap.h',
16
+ 'simde/simde-arch.h',
17
+ 'simde/simde-common.h',
18
+
19
+ 'simde/x86/avx2.h',
20
+ 'simde/x86/avx512bw.h',
21
+ 'simde/x86/avx512vl.h',
22
+ 'simde/x86/avx512f.h',
23
+ 'simde/x86/fma.h',
24
+ 'simde/x86/mmx.h',
25
+ 'simde/x86/sse.h',
26
+ 'simde/x86/sse2.h',
27
+ 'simde/x86/sse3.h',
28
+ 'simde/x86/sse4.1.h',
29
+ 'simde/x86/sse4.2.h',
30
+ 'simde/x86/ssse3.h',
31
+ 'simde/x86/svml.h',
32
+ ],
33
+ subdir: 'simde')
@@ -0,0 +1,20 @@
1
+ [build]
2
+ publish = 'web'
3
+ command = 'mkdir -p web/amalgamated/x86 && cd simde && for header in x86/*.h; do ../amalgamate.py "$header" > ../web/amalgamated/"$header"; done'
4
+
5
+ [[headers]]
6
+ for = "/amalgamated/x86/*.h"
7
+ [headers.values]
8
+ Access-Control-Allow-Origin = "*"
9
+
10
+ [[redirects]]
11
+ from = "/godbolt/simple"
12
+ to = "https://godbolt.org/z/-twon_"
13
+ status = 302
14
+ force = true
15
+
16
+ [[redirects]]
17
+ from = "/godbolt/demo"
18
+ to = "https://godbolt.org/z/8cAgiy"
19
+ status = 302
20
+ force = true