minimap2 0.2.25.1 → 0.2.26.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -0
- data/ext/Rakefile +2 -2
- data/ext/minimap2/NEWS.md +9 -0
- data/ext/minimap2/README.md +2 -2
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/minimap.h +1 -1
- data/ext/minimap2/minimap2.1 +1 -1
- data/ext/minimap2/misc/paftools.js +1 -1
- data/ext/minimap2/python/mappy.pyx +1 -1
- data/ext/minimap2/setup.py +22 -32
- data/lib/minimap2/version.rb +1 -1
- metadata +1 -97
- data/ext/minimap2/lib/simde/CONTRIBUTING.md +0 -114
- data/ext/minimap2/lib/simde/COPYING +0 -20
- data/ext/minimap2/lib/simde/README.md +0 -333
- data/ext/minimap2/lib/simde/amalgamate.py +0 -58
- data/ext/minimap2/lib/simde/meson.build +0 -33
- data/ext/minimap2/lib/simde/netlify.toml +0 -20
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +0 -140
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +0 -137
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +0 -142
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +0 -145
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +0 -140
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +0 -145
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +0 -140
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +0 -143
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +0 -137
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +0 -141
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +0 -147
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +0 -141
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +0 -134
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +0 -138
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +0 -134
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +0 -137
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +0 -131
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +0 -135
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +0 -141
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +0 -135
- data/ext/minimap2/lib/simde/simde/arm/neon.h +0 -97
- data/ext/minimap2/lib/simde/simde/check.h +0 -267
- data/ext/minimap2/lib/simde/simde/debug-trap.h +0 -83
- data/ext/minimap2/lib/simde/simde/hedley.h +0 -1899
- data/ext/minimap2/lib/simde/simde/simde-arch.h +0 -445
- data/ext/minimap2/lib/simde/simde/simde-common.h +0 -697
- data/ext/minimap2/lib/simde/simde/x86/avx.h +0 -5385
- data/ext/minimap2/lib/simde/simde/x86/avx2.h +0 -2402
- data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +0 -391
- data/ext/minimap2/lib/simde/simde/x86/avx512f.h +0 -3389
- data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +0 -112
- data/ext/minimap2/lib/simde/simde/x86/fma.h +0 -659
- data/ext/minimap2/lib/simde/simde/x86/mmx.h +0 -2210
- data/ext/minimap2/lib/simde/simde/x86/sse.h +0 -3696
- data/ext/minimap2/lib/simde/simde/x86/sse2.h +0 -5991
- data/ext/minimap2/lib/simde/simde/x86/sse3.h +0 -343
- data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +0 -1783
- data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +0 -105
- data/ext/minimap2/lib/simde/simde/x86/ssse3.h +0 -1053
- data/ext/minimap2/lib/simde/simde/x86/svml.h +0 -543
- data/ext/minimap2/lib/simde/test/CMakeLists.txt +0 -166
- data/ext/minimap2/lib/simde/test/arm/meson.build +0 -4
- data/ext/minimap2/lib/simde/test/arm/neon/meson.build +0 -23
- data/ext/minimap2/lib/simde/test/arm/neon/skel.c +0 -871
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +0 -134
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +0 -39
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +0 -10
- data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +0 -1260
- data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +0 -873
- data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +0 -1084
- data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +0 -1260
- data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +0 -18
- data/ext/minimap2/lib/simde/test/arm/test-arm.c +0 -20
- data/ext/minimap2/lib/simde/test/arm/test-arm.h +0 -8
- data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +0 -171
- data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +0 -68
- data/ext/minimap2/lib/simde/test/meson.build +0 -64
- data/ext/minimap2/lib/simde/test/munit/COPYING +0 -21
- data/ext/minimap2/lib/simde/test/munit/Makefile +0 -55
- data/ext/minimap2/lib/simde/test/munit/README.md +0 -54
- data/ext/minimap2/lib/simde/test/munit/example.c +0 -351
- data/ext/minimap2/lib/simde/test/munit/meson.build +0 -37
- data/ext/minimap2/lib/simde/test/munit/munit.c +0 -2055
- data/ext/minimap2/lib/simde/test/munit/munit.h +0 -535
- data/ext/minimap2/lib/simde/test/run-tests.c +0 -20
- data/ext/minimap2/lib/simde/test/run-tests.h +0 -260
- data/ext/minimap2/lib/simde/test/x86/avx.c +0 -13752
- data/ext/minimap2/lib/simde/test/x86/avx2.c +0 -9977
- data/ext/minimap2/lib/simde/test/x86/avx512bw.c +0 -2664
- data/ext/minimap2/lib/simde/test/x86/avx512f.c +0 -10416
- data/ext/minimap2/lib/simde/test/x86/avx512vl.c +0 -210
- data/ext/minimap2/lib/simde/test/x86/fma.c +0 -2557
- data/ext/minimap2/lib/simde/test/x86/meson.build +0 -33
- data/ext/minimap2/lib/simde/test/x86/mmx.c +0 -2878
- data/ext/minimap2/lib/simde/test/x86/skel.c +0 -2984
- data/ext/minimap2/lib/simde/test/x86/sse.c +0 -5121
- data/ext/minimap2/lib/simde/test/x86/sse2.c +0 -9860
- data/ext/minimap2/lib/simde/test/x86/sse3.c +0 -486
- data/ext/minimap2/lib/simde/test/x86/sse4.1.c +0 -3446
- data/ext/minimap2/lib/simde/test/x86/sse4.2.c +0 -101
- data/ext/minimap2/lib/simde/test/x86/ssse3.c +0 -2084
- data/ext/minimap2/lib/simde/test/x86/svml.c +0 -1545
- data/ext/minimap2/lib/simde/test/x86/test-avx.h +0 -16
- data/ext/minimap2/lib/simde/test/x86/test-avx512.h +0 -25
- data/ext/minimap2/lib/simde/test/x86/test-mmx.h +0 -13
- data/ext/minimap2/lib/simde/test/x86/test-sse.h +0 -13
- data/ext/minimap2/lib/simde/test/x86/test-sse2.h +0 -13
- data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +0 -196
- data/ext/minimap2/lib/simde/test/x86/test-x86.c +0 -48
- data/ext/minimap2/lib/simde/test/x86/test-x86.h +0 -8
@@ -1,333 +0,0 @@
|
|
1
|
-
# SIMD Everywhere
|
2
|
-
[![Gitter chat](https://badges.gitter.im/gitterHQ/gitter.png)](https://gitter.im/simd-everywhere/community)
|
3
|
-
|
4
|
-
The SIMDe header-only library provides fast, portable implementations of
|
5
|
-
[SIMD intrinsics](https://en.wikipedia.org/wiki/SIMD) on hardware which
|
6
|
-
doesn't natively support them, such as calling [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions)
|
7
|
-
functions on ARM. There is no performance penalty if the hardware
|
8
|
-
supports the native implementation (*e.g.*, SSE/[AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions)
|
9
|
-
runs at full speed on [x86](https://en.wikipedia.org/wiki/X86),
|
10
|
-
[NEON](https://en.wikipedia.org/wiki/ARM_architecture#Advanced_SIMD_(Neon)) on [ARM](https://en.wikipedia.org/wiki/ARM_architecture),
|
11
|
-
*etc.*).
|
12
|
-
|
13
|
-
This makes porting code to other architectures much easier in a few
|
14
|
-
key ways:
|
15
|
-
|
16
|
-
First, instead of forcing you to rewrite everything for each
|
17
|
-
architecture, SIMDe lets you get a port up and running almost
|
18
|
-
effortlessly. You can then start working on switching the most
|
19
|
-
performance-critical sections to native intrinsics, improving
|
20
|
-
performance gradually. SIMDe lets (for example) SSE/AVX and NEON code
|
21
|
-
exist side-by-side, in the same implementation.
|
22
|
-
|
23
|
-
Second, SIMDe makes it easier to write code targeting [ISA](https://en.wikipedia.org/wiki/Instruction_set_architecture)
|
24
|
-
extensions you don't have convenient access to. You can run NEON code on your
|
25
|
-
x86 machine *without an emulator*. Obviously you'll eventually want
|
26
|
-
to test on the actual hardware you're targeting, but for most
|
27
|
-
development, SIMDe can provide a much easier path.
|
28
|
-
|
29
|
-
SIMDe takes a very different approach from most other SIMD abstraction
|
30
|
-
layers in that it aims to expose the entire functionality of the
|
31
|
-
underlying instruction set. Instead of limiting functionality to the
|
32
|
-
lowest common denominator, SIMDe tries to minimize the amount of
|
33
|
-
effort required to port while still allowing you the space to optimize
|
34
|
-
as needed.
|
35
|
-
|
36
|
-
The current focus is on writing complete portable implementations,
|
37
|
-
though a large number of functions already have accelerated
|
38
|
-
implementations using one (or more) of the following:
|
39
|
-
|
40
|
-
* SIMD intrinsics from other ISA extensions (e.g., using NEON to
|
41
|
-
implement SSE).
|
42
|
-
* Compiler-specific vector extensions and built-ins such as
|
43
|
-
[`__builtin_shufflevector`](http://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-shufflevector)
|
44
|
-
and
|
45
|
-
[`__builtin_convertvector`](http://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-convertvector)
|
46
|
-
* Compiler auto-vectorization hints, using:
|
47
|
-
* [OpenMP 4 SIMD](http://www.openmp.org/)
|
48
|
-
* [Cilk Plus](https://www.cilkplus.org/)
|
49
|
-
* [GCC loop-specific pragmas](https://gcc.gnu.org/onlinedocs/gcc/Loop-Specific-Pragmas.html)
|
50
|
-
* [clang pragma loop hint directives](http://llvm.org/docs/Vectorizers.html#pragma-loop-hint-directives)
|
51
|
-
|
52
|
-
For an example of a project using SIMDe, see
|
53
|
-
[LZSSE-SIMDe](https://github.com/nemequ/LZSSE-SIMDe).
|
54
|
-
|
55
|
-
You can [try SIMDe online](https://simde.netlify.com/godbolt/demo)
|
56
|
-
using Compiler Explorer and an amalgamated SIMDe header.
|
57
|
-
|
58
|
-
If you have any questions, please feel free to use the
|
59
|
-
[issue tracker](https://github.com/nemequ/simde/issues) or the
|
60
|
-
[mailing list](https://groups.google.com/forum/#!forum/simde).
|
61
|
-
|
62
|
-
## Current Status
|
63
|
-
|
64
|
-
There are currently complete implementations of the following instruction
|
65
|
-
sets:
|
66
|
-
|
67
|
-
* [MMX](https://en.wikipedia.org/wiki/MMX_(instruction_set))
|
68
|
-
* [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions)
|
69
|
-
* [SSE2](https://en.wikipedia.org/wiki/SSE2)
|
70
|
-
* [SSE3](https://en.wikipedia.org/wiki/SSE3)
|
71
|
-
* [SSSE3](https://en.wikipedia.org/wiki/SSSE3)
|
72
|
-
* [SSE4.1](https://en.wikipedia.org/wiki/SSE4#SSE4.1)
|
73
|
-
* [AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions)
|
74
|
-
* [FMA](https://en.wikipedia.org/wiki/FMA_instruction_set)
|
75
|
-
|
76
|
-
As well as partial support for many others; see the
|
77
|
-
[instruction-set-support](https://github.com/nemequ/simde/issues?q=is%3Aissue+is%3Aopen+label%3Ainstruction-set-support+sort%3Aupdated-desc)
|
78
|
-
label in the issue tracker for details on progress. If you'd like to
|
79
|
-
be notified when an instruction set is available you may subscribe to
|
80
|
-
the relevant issue.
|
81
|
-
|
82
|
-
If you have a project you're interested in using with SIMDe but we
|
83
|
-
don't yet support all the functions you need, please file an issue
|
84
|
-
with a list of what's missing so we know what to prioritize.
|
85
|
-
|
86
|
-
The `master` branch is protected so commits never reach it unless
|
87
|
-
they have passed extensive CI checks. Status badges don't really
|
88
|
-
make sense since they will always be green, but here are the links:
|
89
|
-
|
90
|
-
* [Travis CI](https://travis-ci.org/nemequ/simde)
|
91
|
-
* [AppVeyor](https://ci.appveyor.com/project/quixdb/simde)
|
92
|
-
* [GitHub Actions](https://github.com/nemequ/simde/actions)
|
93
|
-
* [Azure Pipelines](https://dev.azure.com/simd-everywhere/SIMDe/_build)
|
94
|
-
* [Drone CI](https://cloud.drone.io/nemequ/simde)
|
95
|
-
|
96
|
-
## Contributing
|
97
|
-
|
98
|
-
First off, if you're reading this: thank you! Even considering
|
99
|
-
contributing to SIMDe is very much appreciated!
|
100
|
-
|
101
|
-
SIMDe is a fairly large undertaking; there are a *lot* of functions to
|
102
|
-
get through and a lot of opportunities for optimization on different
|
103
|
-
platforms, so we're very happy for any help you can provide.
|
104
|
-
|
105
|
-
Programmers of all skill levels are welcome, there are lots of tasks
|
106
|
-
which are pretty straightforward and don't require any special
|
107
|
-
expertise.
|
108
|
-
|
109
|
-
If you're not sure how you'd like to contribute, please consider taking
|
110
|
-
a look at [the issue tracker](https://github.com/nemequ/simde/issues).
|
111
|
-
There is a [good first issue](https://github.com/nemequ/simde/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
|
112
|
-
tag if you want to ease into a your first contributions, but if you're
|
113
|
-
interested in something else please get in touch via the issue tracker;
|
114
|
-
we're happy to help you get a handle on whatever you are interested in.
|
115
|
-
|
116
|
-
If you're interested in implementing currently unimplemented functions,
|
117
|
-
there is [a
|
118
|
-
guide](https://github.com/nemequ/simde/wiki/Implementing-a-New-Function)
|
119
|
-
explaining how to add new functions and how to quickly and easily get
|
120
|
-
a test case in place. It's a bit rough right now, but if anything is
|
121
|
-
unclear please feel free to use the issue tracker to ask about
|
122
|
-
anything you're not clear on.
|
123
|
-
|
124
|
-
## Usage
|
125
|
-
|
126
|
-
First, it is important to note that *you do not need two separate
|
127
|
-
versions* (one using SIMDe, the other native). If the native functions
|
128
|
-
are available SIMDe will use them, and compilers easily optimize away
|
129
|
-
any overhead from SIMDe; all they have to do is some basic inlining.
|
130
|
-
`-O2` should be enough, but we strongly recommend `-O3` (or whatever
|
131
|
-
flag instructs your compiler to aggressizely optimize) since many of
|
132
|
-
the portable fallbacks are substantially faster with aggressive
|
133
|
-
auto-vectorization that isn't enabled at lower optimization levels.
|
134
|
-
|
135
|
-
Each instruction set has a separate file; `x86/mmx.h` for MMX,
|
136
|
-
`x86/sse.h` for SSE, `x86/sse2.h` for SSE2, and so on. Just include
|
137
|
-
the header for whichever instruction set(s) you want, and SIMDe will
|
138
|
-
provide the fastest implementation it can given which extensions
|
139
|
-
you've enabled in your compiler (i.e., if you want to use NEON to
|
140
|
-
implement SSE, you may need to pass something like `-mfpu=neon`
|
141
|
-
or `-march=armv8-a+simd`. See
|
142
|
-
[GCC ARM-Options](https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html)
|
143
|
-
for more information).
|
144
|
-
|
145
|
-
If you define `SIMDE_ENABLE_NATIVE_ALIASES` before including SIMDe
|
146
|
-
you can use the same names as the native functions. Unfortunately,
|
147
|
-
this is somewhat error-prone due to portability issues in the APIs, so
|
148
|
-
it's recommended to only do this for testing. When
|
149
|
-
`SIMDE_ENABLE_NATIVE_ALIASES` is undefined only the versions prefixed
|
150
|
-
with `simde_` will be available; for example, the MMX `_mm_add_pi8`
|
151
|
-
intrinsic becomes `simde_mm_add_pi8`, and `__m64` becomes `simde__m64`.
|
152
|
-
|
153
|
-
Since SIMDe is meant to be portable, many functions which assume types
|
154
|
-
are of a specific size have been altered to use fixed-width types
|
155
|
-
instead. For example, Intel's APIs use `char` for signed 8-bit
|
156
|
-
integers, but `char` on ARM is generally unsigned. SIMDe uses `int8_t`
|
157
|
-
to make the API portable, but that means your code may require some
|
158
|
-
minor changes (such as using `int8_t` instead of `char`) to work on
|
159
|
-
other platforms.
|
160
|
-
|
161
|
-
That said, the changes are usually quite minor. It's often enough to
|
162
|
-
just use search and replace, manual changes are required pretty
|
163
|
-
infrequently.
|
164
|
-
|
165
|
-
For best performance, in addition to `-O3` (or whatever your compiler's
|
166
|
-
equivalent is), you should enable OpenMP 4 SIMD support by defining
|
167
|
-
`SIMDE_ENABLE_OPENMP` before including any SIMDe headers, and
|
168
|
-
enabling OpenMP support in your compiler. GCC and ICC both support a
|
169
|
-
flag to enable only OpenMP SIMD support instead of full OpenMP (the OpenMP
|
170
|
-
SIMD support doesn't require the OpenMP run-time library); for GCC the
|
171
|
-
flag is `-fopenmp-simd`, for ICC `-qopenmp-simd`. SIMDe also supports
|
172
|
-
using [Cilk Plus](https://www.cilkplus.org/), [GCC loop-specific
|
173
|
-
pragmas](https://gcc.gnu.org/onlinedocs/gcc/Loop-Specific-Pragmas.html),
|
174
|
-
or [clang pragma loop hint
|
175
|
-
directives](http://llvm.org/docs/Vectorizers.html#pragma-loop-hint-directives),
|
176
|
-
though these are not nearly as effective as OpenMP SIMD and depending
|
177
|
-
on them will likely result in less efficient code.
|
178
|
-
|
179
|
-
## Portability
|
180
|
-
|
181
|
-
### Compilers
|
182
|
-
|
183
|
-
SIMDe does depend on some C99 features, though the subset supported by
|
184
|
-
MSVC also works. While we do our best to make sure we provide optimized
|
185
|
-
implementations where they are supported, SIMDe does contain portable
|
186
|
-
fallbacks which are designed to work on any C99 compiler.
|
187
|
-
|
188
|
-
Every commit is tested in CI on multiple compilers, platforms, and
|
189
|
-
configurations, and our test coverage is extremely extensive.
|
190
|
-
Currently tested compilers include:
|
191
|
-
|
192
|
-
* GCC versions back to 4.8
|
193
|
-
* Clang versions back to 7
|
194
|
-
* Microsoft Visual Studio back to 12 (2013)
|
195
|
-
* IBM XL C/C++
|
196
|
-
* Intel C/C++ Compiler (ICC)
|
197
|
-
* PGI C Compiler
|
198
|
-
|
199
|
-
I'm generally willing to accept patches to add support for other
|
200
|
-
compilers, as long as they're not too disruptive, *especially* if we
|
201
|
-
can get CI support going. We currently use Travis CI, AppVeyor, and
|
202
|
-
Microsoft Azure Pipelines, but other CI platforms can be added as
|
203
|
-
necessary.
|
204
|
-
|
205
|
-
### Hardware
|
206
|
-
|
207
|
-
The following architectures are tested in CI for every commit:
|
208
|
-
|
209
|
-
* x86_64
|
210
|
-
* x86
|
211
|
-
* AArch64
|
212
|
-
* ARMv8
|
213
|
-
* ARMv7
|
214
|
-
* PPC64
|
215
|
-
* MIPS
|
216
|
-
|
217
|
-
We would love to add more, so patches are extremely welcome!
|
218
|
-
|
219
|
-
The tests currently contain some assumptions that they are running on
|
220
|
-
a little-endian CPU. We're working on this, but for now big endian
|
221
|
-
CPUs *should* work, but we can't promise.
|
222
|
-
|
223
|
-
## Related Projects
|
224
|
-
|
225
|
-
* The "builtins" module in
|
226
|
-
[portable-snippets](https://github.com/nemequ/portable-snippets)
|
227
|
-
does much the same thing, but for compiler-specific intrinsics
|
228
|
-
(think `__builtin_clz` and `_BitScanForward`), **not** SIMD
|
229
|
-
intrinsics.
|
230
|
-
* Intel offers an emulator, the [Intel® Software Development
|
231
|
-
Emulator](https://software.intel.com/en-us/articles/intel-software-development-emulator/)
|
232
|
-
which can be used to develop software which uses Intel intrinsics
|
233
|
-
without having to own hardware which supports them, though it
|
234
|
-
doesn't help for deployment.
|
235
|
-
* [Iris](https://github.com/AlexYaruki/iris) is the only other project
|
236
|
-
I'm aware of which is attempting to create portable implementations
|
237
|
-
like SIMDe. SIMDe is much further along on the Intel side, but Iris
|
238
|
-
looks to be in better shape on ARM. C++-only, Apache 2.0 license.
|
239
|
-
AFAICT there are no accelerated fallbacks, nor is there a good way to
|
240
|
-
add them since it relies extensively on templates.
|
241
|
-
* There are a few projects trying to implement one set with another:
|
242
|
-
* [ARM_NEON_2_x86_SSE](https://github.com/intel/ARM_NEON_2_x86_SSE)
|
243
|
-
— implementing NEON using SSE. Quite extensive, Apache 2.0
|
244
|
-
license.
|
245
|
-
* [sse2neon](https://github.com/jratcliff63367/sse2neon) —
|
246
|
-
implementing SSE using NEON. This code has already been merged
|
247
|
-
into SIMDe.
|
248
|
-
* [veclib](https://github.com/IvantheDugtrio/veclib) — implementing
|
249
|
-
SSE2 using AltiVec/VMX, using a non-free IBM library called
|
250
|
-
[powerveclib](https://www.ibm.com/developerworks/community/groups/community/powerveclib/)
|
251
|
-
* [SSE-to-NEON](https://github.com/otim/SSE-to-NEON) — implementing
|
252
|
-
SSE with NEON. Non-free, C++.
|
253
|
-
* [arm-neon-tests](https://github.com/christophe-lyon/arm-neon-tests)
|
254
|
-
contains tests te verify NEON implementations.
|
255
|
-
|
256
|
-
If you know of any other related projects, please [let us
|
257
|
-
know](https://github.com/nemequ/simde/issues/new)!
|
258
|
-
|
259
|
-
## Caveats
|
260
|
-
|
261
|
-
Sometime features can't be emulated. If SIMDe is operating in native
|
262
|
-
mode the functions will work as expected, but if there is no native
|
263
|
-
support some caveats apply:
|
264
|
-
|
265
|
-
* Many functions require <math.h> and/or <fenv.h>. SIMDe will still
|
266
|
-
work without those headers, but the results of those functions are
|
267
|
-
undefined.
|
268
|
-
* x86 / x86_64
|
269
|
-
* SSE
|
270
|
-
* `SIMDE_MM_SET_ROUNDING_MODE()` will use `fesetround()`, altering
|
271
|
-
the global rounding mode.
|
272
|
-
* `simde_mm_getcsr` and `simde_mm_setcsr` only implement bits 13
|
273
|
-
and 14 (rounding mode).
|
274
|
-
* AVX
|
275
|
-
* `simde_mm256_test*` do not set the CF/ZF registers as there is
|
276
|
-
no portable way to implement that functionality.
|
277
|
-
* `simde_mm256_zeroall` and `simde_mm256_zeroupper` are not
|
278
|
-
implemented as there is no portable way to implement that
|
279
|
-
functionality.
|
280
|
-
|
281
|
-
Additionally, there are some known limitations which apply when using
|
282
|
-
native aliases (`SIMDE_ENABLE_NATIVE_ALIASES`):
|
283
|
-
|
284
|
-
* On Windows x86 (but not x86_64), some MMX functions and SSE/SSE2
|
285
|
-
functions which use MMX types (__m64) other than for pointers may
|
286
|
-
return incorrect results.
|
287
|
-
|
288
|
-
Also, as mentioned earlier, while some APIs make assumptions about
|
289
|
-
basic types (*e.g.*, `int` is 32 bits), SIMDe does not, so many types
|
290
|
-
have been altered to use portable fixed-width versions such as
|
291
|
-
`int32_t`.
|
292
|
-
|
293
|
-
If you find any other differences, please file an issue so we can either fix
|
294
|
-
it or add it to the list above.
|
295
|
-
|
296
|
-
## Benefactors
|
297
|
-
|
298
|
-
SIMDe uses resources provided for free by a number of organizations.
|
299
|
-
While this shouldn't be taken to imply endorsement of SIMDe, we're
|
300
|
-
tremendously grateful for their support:
|
301
|
-
|
302
|
-
* [GitHub](https://github.com/) — hosts our source repository, issue
|
303
|
-
tracker, etc.
|
304
|
-
* [Travis CI](https://travis-ci.org/) — provides CI testing on
|
305
|
-
numerous platforms.
|
306
|
-
* [AppVeyor](https://www.appveyor.com/) — provides CI testing on
|
307
|
-
Windows.
|
308
|
-
* [Drone CI](https://drone.io/) — provides CI testing on ARM 32 bits
|
309
|
-
platform, etc.
|
310
|
-
* [IntegriCloud](https://integricloud.com/) — provides access to a very
|
311
|
-
fast POWER9 server for developing AltiVec/VMX support.
|
312
|
-
* [GCC Compile Farm](https://gcc.gnu.org/wiki/CompileFarm) — provides
|
313
|
-
access to a wide range of machines with different architectures for
|
314
|
-
developing support for various ISA extensions.
|
315
|
-
* [CodeCov.io](https://codecov.io/) — provides code coverage analysis
|
316
|
-
for our test cases.
|
317
|
-
|
318
|
-
Without such organizations donating resources, SIMDe wouldn't be nearly
|
319
|
-
as useful or usable as it is today.
|
320
|
-
|
321
|
-
We would also like to thank anyone who has helped develop the myriad
|
322
|
-
of software on which SIMDe relies, including compilers and analysis
|
323
|
-
tools.
|
324
|
-
|
325
|
-
Finally, a special thank you to
|
326
|
-
[anyone who has contributed](https://github.com/nemequ/simde/graphs/contributors)
|
327
|
-
to SIMDe, filed bugs, provided suggestions, or helped with SIMDe
|
328
|
-
development in any way.
|
329
|
-
|
330
|
-
## License
|
331
|
-
|
332
|
-
SIMDe is distributed under an MIT-style license; see COPYING for
|
333
|
-
details.
|
@@ -1,58 +0,0 @@
|
|
1
|
-
#!/usr/bin/python3
|
2
|
-
|
3
|
-
# amalgamate.py
|
4
|
-
# Written by Evan Nemerson <evan@nemerson.com>
|
5
|
-
#
|
6
|
-
# To the extent possible under law, the author(s) have dedicated all
|
7
|
-
# copyright and related and neighboring rights to this software to
|
8
|
-
# the public domain worldwide. This software is distributed without
|
9
|
-
# any warranty.
|
10
|
-
#
|
11
|
-
# For details, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
12
|
-
# SPDX-License-Identifier: CC0-1.0
|
13
|
-
|
14
|
-
# Quick and dirty script to amalgamate C into a single file. Includes
|
15
|
-
# using angle brackets (#include <foo.h>) will be preserved, but for
|
16
|
-
# includes using double quotes (#include "foo.h") the file will be
|
17
|
-
# included by this script.
|
18
|
-
#
|
19
|
-
# If you make any improvements please report them in the SIMDe issue
|
20
|
-
# tracker at <https://github.com/nemequ/simde/issues> or directly to
|
21
|
-
# the author so they can be merged back into the original version.
|
22
|
-
|
23
|
-
import sys, re, os, subprocess
|
24
|
-
|
25
|
-
amalgamate_include = re.compile('^\\s*#\\s*include\\s+\\"([^)]+)\\"\\s$')
|
26
|
-
already_included = []
|
27
|
-
|
28
|
-
def amalgamate(filename, stream):
|
29
|
-
full_path = os.path.realpath(os.path.realpath(filename))
|
30
|
-
srcdir = os.path.dirname(full_path)
|
31
|
-
|
32
|
-
if full_path not in already_included:
|
33
|
-
already_included.insert(-1, full_path)
|
34
|
-
with open(filename) as input_file:
|
35
|
-
stream.write('/* :: Begin ' + os.path.relpath(full_path) + ' :: */\n')
|
36
|
-
|
37
|
-
for source_line in input_file:
|
38
|
-
a9e_inc_m = amalgamate_include.match(source_line)
|
39
|
-
if a9e_inc_m:
|
40
|
-
amalgamate(os.path.join(srcdir, a9e_inc_m.group(1)), stream)
|
41
|
-
else:
|
42
|
-
stream.write(source_line)
|
43
|
-
|
44
|
-
stream.write('/* :: End ' + os.path.relpath(full_path) + ' :: */\n')
|
45
|
-
|
46
|
-
if len(sys.argv) != 2:
|
47
|
-
sys.stderr.write("USAGE: " + sys.argv[0] + ' SOURCE_FILE\n\n')
|
48
|
-
sys.stderr.write("This will print a copy of $SOURCE_FILE to stdout, while replacing\n")
|
49
|
-
sys.stderr.write("all '#include AMALGAMATE(file)' lines with copies of file.\n")
|
50
|
-
|
51
|
-
sys.exit(1)
|
52
|
-
|
53
|
-
print('/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */')
|
54
|
-
|
55
|
-
git_id = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode().strip()
|
56
|
-
print("/* {:s} */".format(git_id))
|
57
|
-
|
58
|
-
amalgamate(sys.argv[1], sys.stdout)
|
@@ -1,33 +0,0 @@
|
|
1
|
-
project('SIMDe', 'c', 'cpp',
|
2
|
-
default_options: ['c_std=c99'],
|
3
|
-
license: 'MIT',
|
4
|
-
version: '0.5.0')
|
5
|
-
|
6
|
-
cc = meson.get_compiler('c')
|
7
|
-
cxx = meson.get_compiler('cpp')
|
8
|
-
|
9
|
-
subdir('test')
|
10
|
-
|
11
|
-
install_headers(
|
12
|
-
[
|
13
|
-
'simde/hedley.h',
|
14
|
-
'simde/check.h',
|
15
|
-
'simde/debug-trap.h',
|
16
|
-
'simde/simde-arch.h',
|
17
|
-
'simde/simde-common.h',
|
18
|
-
|
19
|
-
'simde/x86/avx2.h',
|
20
|
-
'simde/x86/avx512bw.h',
|
21
|
-
'simde/x86/avx512vl.h',
|
22
|
-
'simde/x86/avx512f.h',
|
23
|
-
'simde/x86/fma.h',
|
24
|
-
'simde/x86/mmx.h',
|
25
|
-
'simde/x86/sse.h',
|
26
|
-
'simde/x86/sse2.h',
|
27
|
-
'simde/x86/sse3.h',
|
28
|
-
'simde/x86/sse4.1.h',
|
29
|
-
'simde/x86/sse4.2.h',
|
30
|
-
'simde/x86/ssse3.h',
|
31
|
-
'simde/x86/svml.h',
|
32
|
-
],
|
33
|
-
subdir: 'simde')
|
@@ -1,20 +0,0 @@
|
|
1
|
-
[build]
|
2
|
-
publish = 'web'
|
3
|
-
command = 'mkdir -p web/amalgamated/x86 && cd simde && for header in x86/*.h; do ../amalgamate.py "$header" > ../web/amalgamated/"$header"; done'
|
4
|
-
|
5
|
-
[[headers]]
|
6
|
-
for = "/amalgamated/x86/*.h"
|
7
|
-
[headers.values]
|
8
|
-
Access-Control-Allow-Origin = "*"
|
9
|
-
|
10
|
-
[[redirects]]
|
11
|
-
from = "/godbolt/simple"
|
12
|
-
to = "https://godbolt.org/z/-twon_"
|
13
|
-
status = 302
|
14
|
-
force = true
|
15
|
-
|
16
|
-
[[redirects]]
|
17
|
-
from = "/godbolt/demo"
|
18
|
-
to = "https://godbolt.org/z/8cAgiy"
|
19
|
-
status = 302
|
20
|
-
force = true
|
@@ -1,140 +0,0 @@
|
|
1
|
-
/* Copyright (c) 2018-2019 Evan Nemerson <evan@nemerson.com>
|
2
|
-
*
|
3
|
-
* Permission is hereby granted, free of charge, to any person
|
4
|
-
* obtaining a copy of this software and associated documentation
|
5
|
-
* files (the "Software"), to deal in the Software without
|
6
|
-
* restriction, including without limitation the rights to use, copy,
|
7
|
-
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
8
|
-
* of the Software, and to permit persons to whom the Software is
|
9
|
-
* furnished to do so, subject to the following conditions:
|
10
|
-
*
|
11
|
-
* The above copyright notice and this permission notice shall be
|
12
|
-
* included in all copies or substantial portions of the Software.
|
13
|
-
*
|
14
|
-
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
-
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
-
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
18
|
-
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
19
|
-
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
-
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
-
* SOFTWARE.
|
22
|
-
*/
|
23
|
-
|
24
|
-
#if !defined(SIMDE__INSIDE_NEON_H)
|
25
|
-
# error Do not include simde/arm/neon/int32x2.h directly; use simde/arm/neon.h.
|
26
|
-
#endif
|
27
|
-
|
28
|
-
#if !defined(SIMDE__NEON_FLOAT32X2_H)
|
29
|
-
#define SIMDE__NEON_FLOAT32X2_H
|
30
|
-
|
31
|
-
typedef union {
|
32
|
-
#if defined(SIMDE_VECTOR_SUBSCRIPT)
|
33
|
-
simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
34
|
-
#else
|
35
|
-
simde_float32 f32[2];
|
36
|
-
#endif
|
37
|
-
|
38
|
-
#if defined(SIMDE_NEON_NATIVE)
|
39
|
-
float32x2_t n;
|
40
|
-
#endif
|
41
|
-
|
42
|
-
#if defined(SIMDE_NEON_MMX)
|
43
|
-
__m64 mmx;
|
44
|
-
#endif
|
45
|
-
} simde_float32x2_t;
|
46
|
-
|
47
|
-
#if defined(SIMDE_NEON_NATIVE)
|
48
|
-
HEDLEY_STATIC_ASSERT(sizeof(float32x2_t) == sizeof(simde_float32x2_t), "float32x2_t size doesn't match simde_float32x2_t size");
|
49
|
-
#endif
|
50
|
-
HEDLEY_STATIC_ASSERT(8 == sizeof(simde_float32x2_t), "simde_float32x2_t size incorrect");
|
51
|
-
|
52
|
-
SIMDE__FUNCTION_ATTRIBUTES
|
53
|
-
simde_float32x2_t
|
54
|
-
simde_vadd_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
55
|
-
simde_float32x2_t r;
|
56
|
-
#if defined(SIMDE_NEON_NATIVE)
|
57
|
-
r.n = vadd_f32(a.n, b.n);
|
58
|
-
#elif defined(SIMDE_MMX_NATIVE)
|
59
|
-
r.mmx = _mm_add_ps(a.mmx, b.mmx);
|
60
|
-
#else
|
61
|
-
SIMDE__VECTORIZE
|
62
|
-
for (size_t i = 0 ; i < (sizeof(r.f32) / sizeof(r.f32[0])) ; i++) {
|
63
|
-
r.f32[i] = a.f32[i] + b.f32[i];
|
64
|
-
}
|
65
|
-
#endif
|
66
|
-
return r;
|
67
|
-
}
|
68
|
-
|
69
|
-
SIMDE__FUNCTION_ATTRIBUTES
|
70
|
-
simde_float32x2_t
|
71
|
-
simde_vld1_f32 (simde_float32 const ptr[2]) {
|
72
|
-
simde_float32x2_t r;
|
73
|
-
#if defined(SIMDE_NEON_NATIVE)
|
74
|
-
r.n = vld1_f32(ptr);
|
75
|
-
#else
|
76
|
-
SIMDE__VECTORIZE
|
77
|
-
for (size_t i = 0 ; i < (sizeof(r.f32) / sizeof(r.f32[0])) ; i++) {
|
78
|
-
r.f32[i] = ptr[i];
|
79
|
-
}
|
80
|
-
#endif
|
81
|
-
return r;
|
82
|
-
}
|
83
|
-
|
84
|
-
SIMDE__FUNCTION_ATTRIBUTES
|
85
|
-
simde_float32x2_t
|
86
|
-
simde_x_vload_f32 (simde_float32 l0, simde_float32 l1) {
|
87
|
-
simde_float32 v[] = { l0, l1 };
|
88
|
-
return simde_vld1_f32(v);
|
89
|
-
}
|
90
|
-
|
91
|
-
SIMDE__FUNCTION_ATTRIBUTES
|
92
|
-
simde_float32x2_t
|
93
|
-
simde_vdup_n_f32 (simde_float32 value) {
|
94
|
-
simde_float32x2_t r;
|
95
|
-
#if defined(SIMDE_NEON_NATIVE)
|
96
|
-
r.n = vdup_n_f32(value);
|
97
|
-
#else
|
98
|
-
SIMDE__VECTORIZE
|
99
|
-
for (size_t i = 0 ; i < (sizeof(r.f32) / sizeof(r.f32[0])) ; i++) {
|
100
|
-
r.f32[i] = value;
|
101
|
-
}
|
102
|
-
#endif
|
103
|
-
return r;
|
104
|
-
}
|
105
|
-
|
106
|
-
SIMDE__FUNCTION_ATTRIBUTES
|
107
|
-
simde_float32x2_t
|
108
|
-
simde_vmul_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
109
|
-
simde_float32x2_t r;
|
110
|
-
#if defined(SIMDE_NEON_NATIVE)
|
111
|
-
r.n = vmul_f32(a.n, b.n);
|
112
|
-
#elif defined(SIMDE_MMX_NATIVE)
|
113
|
-
r.mmx = _mm_mul_ps(a.mmx, b.mmx);
|
114
|
-
#else
|
115
|
-
SIMDE__VECTORIZE
|
116
|
-
for (size_t i = 0 ; i < (sizeof(r.f32) / sizeof(r.f32[0])) ; i++) {
|
117
|
-
r.f32[i] = a.f32[i] * b.f32[i];
|
118
|
-
}
|
119
|
-
#endif
|
120
|
-
return r;
|
121
|
-
}
|
122
|
-
|
123
|
-
SIMDE__FUNCTION_ATTRIBUTES
|
124
|
-
simde_float32x2_t
|
125
|
-
simde_vsub_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
126
|
-
simde_float32x2_t r;
|
127
|
-
#if defined(SIMDE_NEON_NATIVE)
|
128
|
-
r.n = vsub_f32(a.n, b.n);
|
129
|
-
#elif defined(SIMDE_MMX_NATIVE)
|
130
|
-
r.mmx = _mm_sub_ps(a.mmx, b.mmx);
|
131
|
-
#else
|
132
|
-
SIMDE__VECTORIZE
|
133
|
-
for (size_t i = 0 ; i < (sizeof(r.f32) / sizeof(r.f32[0])) ; i++) {
|
134
|
-
r.f32[i] = a.f32[i] - b.f32[i];
|
135
|
-
}
|
136
|
-
#endif
|
137
|
-
return r;
|
138
|
-
}
|
139
|
-
|
140
|
-
#endif
|