minimap2 0.2.25.0 → 0.2.25.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/ext/minimap2/Makefile +6 -2
- data/ext/minimap2/NEWS.md +38 -0
- data/ext/minimap2/README.md +9 -3
- data/ext/minimap2/align.c +5 -3
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +7 -4
- data/ext/minimap2/kalloc.c +20 -1
- data/ext/minimap2/kalloc.h +13 -2
- data/ext/minimap2/ksw2.h +1 -0
- data/ext/minimap2/ksw2_extd2_sse.c +1 -1
- data/ext/minimap2/ksw2_exts2_sse.c +79 -40
- data/ext/minimap2/ksw2_extz2_sse.c +1 -1
- data/ext/minimap2/lchain.c +15 -16
- data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
- data/ext/minimap2/lib/simde/COPYING +20 -0
- data/ext/minimap2/lib/simde/README.md +333 -0
- data/ext/minimap2/lib/simde/amalgamate.py +58 -0
- data/ext/minimap2/lib/simde/meson.build +33 -0
- data/ext/minimap2/lib/simde/netlify.toml +20 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
- data/ext/minimap2/lib/simde/simde/check.h +267 -0
- data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
- data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
- data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
- data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
- data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
- data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
- data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
- data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
- data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
- data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
- data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
- data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
- data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
- data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
- data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
- data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
- data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
- data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
- data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
- data/ext/minimap2/lib/simde/test/meson.build +64 -0
- data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
- data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
- data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
- data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
- data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
- data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
- data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
- data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
- data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
- data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
- data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
- data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
- data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
- data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
- data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
- data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
- data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
- data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
- data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
- data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
- data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
- data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
- data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
- data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
- data/ext/minimap2/main.c +13 -6
- data/ext/minimap2/map.c +0 -5
- data/ext/minimap2/minimap.h +40 -31
- data/ext/minimap2/minimap2.1 +19 -5
- data/ext/minimap2/misc/paftools.js +545 -24
- data/ext/minimap2/options.c +1 -1
- data/ext/minimap2/pyproject.toml +2 -0
- data/ext/minimap2/python/mappy.pyx +3 -1
- data/ext/minimap2/seed.c +1 -1
- data/ext/minimap2/setup.py +32 -22
- data/lib/minimap2/version.rb +1 -1
- metadata +100 -3
@@ -0,0 +1,114 @@
|
|
1
|
+
# Contributing to SIMDe
|
2
|
+
|
3
|
+
First off, if you're even reading this, thank you! There is a lot of
|
4
|
+
work to do, and any help is appreciated.
|
5
|
+
|
6
|
+
If you haven't already, please read the
|
7
|
+
[README](https://github.com/nemequ/simde/blob/master/README.md). The
|
8
|
+
[wiki](https://github.com/nemequ/simde/wiki) also has some good
|
9
|
+
information, especially the
|
10
|
+
[FAQ](https://github.com/nemequ/simde/wiki/FAQ) and a guide on how to
|
11
|
+
[implement a new
|
12
|
+
function](https://github.com/nemequ/simde/wiki/Implementing-a-New-Function).
|
13
|
+
|
14
|
+
For information on developing for architectures you don't have access
|
15
|
+
to, please see the [Development
|
16
|
+
Environment](https://github.com/nemequ/simde/wiki/Development-Environment)
|
17
|
+
page on the wiki.
|
18
|
+
|
19
|
+
If you still have questions, or if anything below doesn't make sense
|
20
|
+
to you, please feel free to use the [issue
|
21
|
+
tracker](https://github.com/nemequ/simde/issues) or the [mailing
|
22
|
+
list](https://groups.google.com/forum/#!forum/simde) to ask. I know
|
23
|
+
the SIMDe documentation needs a lot of improvement, and asking
|
24
|
+
questions will help us understand what is missing, so please don't be
|
25
|
+
shy!
|
26
|
+
|
27
|
+
## Building the Tests
|
28
|
+
|
29
|
+
SIMDe contains an extensive test suite used for development. Most
|
30
|
+
users will never need to build the suite, but if you're contributing
|
31
|
+
code to SIMDe you'll need to build them.
|
32
|
+
|
33
|
+
Here is the basic procedure for compiling the tests:
|
34
|
+
|
35
|
+
```bash
|
36
|
+
mkdir test/build
|
37
|
+
cd test/build
|
38
|
+
CFLAGS="-march=native" CXXFLAGS="-march=native" cmake ..
|
39
|
+
make -j
|
40
|
+
./run-tests
|
41
|
+
```
|
42
|
+
|
43
|
+
Note that `-march=native` may not be the right flag for your compiler.
|
44
|
+
That should work for most compilers on x86/x86_64, though MSVC is an
|
45
|
+
exception (try `/arch:AVX2` instead of `-march=native`). On other
|
46
|
+
architectures please consult your compiler documentation to find out
|
47
|
+
what flags you should use to enable the SIMD extension for your target
|
48
|
+
platform. Here are a few to try:
|
49
|
+
|
50
|
+
* ARM:
|
51
|
+
* `-march=armv8-a+simd` (for ARMv8)
|
52
|
+
* `-march=armv7-a -mfpu=neon` (for ARMv7)
|
53
|
+
* POWER
|
54
|
+
* `-mcpu=native`
|
55
|
+
|
56
|
+
If you need a flag not listed above, please let us know so we can add
|
57
|
+
it to the list.
|
58
|
+
|
59
|
+
## Coding Style
|
60
|
+
|
61
|
+
SIMDe uses two spaces for indentation. Please adjust your editor
|
62
|
+
accordingly.
|
63
|
+
|
64
|
+
The coding style for preprocessor macros is a bit mixed since I made
|
65
|
+
some changes mid-project. For new code, please indent the
|
66
|
+
preprocessor conditions before the hash to the same level as the
|
67
|
+
normal code would be, and indent the code inside of preprocessor
|
68
|
+
conditions as if the conditions were normal code. For example:
|
69
|
+
|
70
|
+
```c
|
71
|
+
int
|
72
|
+
foo(void) {
|
73
|
+
#if 1
|
74
|
+
bar();
|
75
|
+
#else
|
76
|
+
baz();
|
77
|
+
#endif
|
78
|
+
}
|
79
|
+
```
|
80
|
+
|
81
|
+
Other than that, please just try to follow the existing style. We'll
|
82
|
+
add new rules here as the need arises.
|
83
|
+
|
84
|
+
## Commit Messages
|
85
|
+
|
86
|
+
Git commit messages should contain lines no longer than 72 characters.
|
87
|
+
The first line should always be a one-line summary of the commit, with
|
88
|
+
the relevant component followed by a colon and a space (if
|
89
|
+
applicable), then the summary.
|
90
|
+
|
91
|
+
If the one-line summary is insufficient to fully describe the changes
|
92
|
+
further descriptive paragraphs should be added, separated by blank
|
93
|
+
lines.
|
94
|
+
|
95
|
+
For example:
|
96
|
+
|
97
|
+
```
|
98
|
+
sse: add magical code to make everything go fast
|
99
|
+
|
100
|
+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur
|
101
|
+
interdum scelerisque risus non ultricies. Vivamus id tristique mauris.
|
102
|
+
Mauris et augue vel urna aliquam posuere. Morbi in sem nec ante
|
103
|
+
ullamcorper ultrices in quis nibh. In felis velit, semper a mauris sed,
|
104
|
+
congue ullamcorper enim. Vestibulum ante ipsum primis in faucibus orci
|
105
|
+
luctus et ultrices posuere cubilia Curae; Nulla facilisi. Donec
|
106
|
+
scelerisque elit dui, et aliquet magna tincidunt eu.
|
107
|
+
|
108
|
+
Curabitur rhoncus lacus ac elit pulvinar, quis posuere ante ultrices.
|
109
|
+
Suspendisse at mauris vitae justo pretium tempor et in mauris. Nunc
|
110
|
+
facilisis nulla a ante tincidunt, imperdiet rhoncus metus interdum.
|
111
|
+
Vivamus sed nunc vel tellus porta consequat. Donec quis porttitor elit,
|
112
|
+
et cursus urna. Donec et sapien lorem. In imperdiet magna at aliquet
|
113
|
+
hendrerit.
|
114
|
+
```
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2017 Evan Nemerson <evan@nemerson.com>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -0,0 +1,333 @@
|
|
1
|
+
# SIMD Everywhere
|
2
|
+
[![Gitter chat](https://badges.gitter.im/gitterHQ/gitter.png)](https://gitter.im/simd-everywhere/community)
|
3
|
+
|
4
|
+
The SIMDe header-only library provides fast, portable implementations of
|
5
|
+
[SIMD intrinsics](https://en.wikipedia.org/wiki/SIMD) on hardware which
|
6
|
+
doesn't natively support them, such as calling [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions)
|
7
|
+
functions on ARM. There is no performance penalty if the hardware
|
8
|
+
supports the native implementation (*e.g.*, SSE/[AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions)
|
9
|
+
runs at full speed on [x86](https://en.wikipedia.org/wiki/X86),
|
10
|
+
[NEON](https://en.wikipedia.org/wiki/ARM_architecture#Advanced_SIMD_(Neon)) on [ARM](https://en.wikipedia.org/wiki/ARM_architecture),
|
11
|
+
*etc.*).
|
12
|
+
|
13
|
+
This makes porting code to other architectures much easier in a few
|
14
|
+
key ways:
|
15
|
+
|
16
|
+
First, instead of forcing you to rewrite everything for each
|
17
|
+
architecture, SIMDe lets you get a port up and running almost
|
18
|
+
effortlessly. You can then start working on switching the most
|
19
|
+
performance-critical sections to native intrinsics, improving
|
20
|
+
performance gradually. SIMDe lets (for example) SSE/AVX and NEON code
|
21
|
+
exist side-by-side, in the same implementation.
|
22
|
+
|
23
|
+
Second, SIMDe makes it easier to write code targeting [ISA](https://en.wikipedia.org/wiki/Instruction_set_architecture)
|
24
|
+
extensions you don't have convenient access to. You can run NEON code on your
|
25
|
+
x86 machine *without an emulator*. Obviously you'll eventually want
|
26
|
+
to test on the actual hardware you're targeting, but for most
|
27
|
+
development, SIMDe can provide a much easier path.
|
28
|
+
|
29
|
+
SIMDe takes a very different approach from most other SIMD abstraction
|
30
|
+
layers in that it aims to expose the entire functionality of the
|
31
|
+
underlying instruction set. Instead of limiting functionality to the
|
32
|
+
lowest common denominator, SIMDe tries to minimize the amount of
|
33
|
+
effort required to port while still allowing you the space to optimize
|
34
|
+
as needed.
|
35
|
+
|
36
|
+
The current focus is on writing complete portable implementations,
|
37
|
+
though a large number of functions already have accelerated
|
38
|
+
implementations using one (or more) of the following:
|
39
|
+
|
40
|
+
* SIMD intrinsics from other ISA extensions (e.g., using NEON to
|
41
|
+
implement SSE).
|
42
|
+
* Compiler-specific vector extensions and built-ins such as
|
43
|
+
[`__builtin_shufflevector`](http://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-shufflevector)
|
44
|
+
and
|
45
|
+
[`__builtin_convertvector`](http://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-convertvector)
|
46
|
+
* Compiler auto-vectorization hints, using:
|
47
|
+
* [OpenMP 4 SIMD](http://www.openmp.org/)
|
48
|
+
* [Cilk Plus](https://www.cilkplus.org/)
|
49
|
+
* [GCC loop-specific pragmas](https://gcc.gnu.org/onlinedocs/gcc/Loop-Specific-Pragmas.html)
|
50
|
+
* [clang pragma loop hint directives](http://llvm.org/docs/Vectorizers.html#pragma-loop-hint-directives)
|
51
|
+
|
52
|
+
For an example of a project using SIMDe, see
|
53
|
+
[LZSSE-SIMDe](https://github.com/nemequ/LZSSE-SIMDe).
|
54
|
+
|
55
|
+
You can [try SIMDe online](https://simde.netlify.com/godbolt/demo)
|
56
|
+
using Compiler Explorer and an amalgamated SIMDe header.
|
57
|
+
|
58
|
+
If you have any questions, please feel free to use the
|
59
|
+
[issue tracker](https://github.com/nemequ/simde/issues) or the
|
60
|
+
[mailing list](https://groups.google.com/forum/#!forum/simde).
|
61
|
+
|
62
|
+
## Current Status
|
63
|
+
|
64
|
+
There are currently complete implementations of the following instruction
|
65
|
+
sets:
|
66
|
+
|
67
|
+
* [MMX](https://en.wikipedia.org/wiki/MMX_(instruction_set))
|
68
|
+
* [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions)
|
69
|
+
* [SSE2](https://en.wikipedia.org/wiki/SSE2)
|
70
|
+
* [SSE3](https://en.wikipedia.org/wiki/SSE3)
|
71
|
+
* [SSSE3](https://en.wikipedia.org/wiki/SSSE3)
|
72
|
+
* [SSE4.1](https://en.wikipedia.org/wiki/SSE4#SSE4.1)
|
73
|
+
* [AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions)
|
74
|
+
* [FMA](https://en.wikipedia.org/wiki/FMA_instruction_set)
|
75
|
+
|
76
|
+
As well as partial support for many others; see the
|
77
|
+
[instruction-set-support](https://github.com/nemequ/simde/issues?q=is%3Aissue+is%3Aopen+label%3Ainstruction-set-support+sort%3Aupdated-desc)
|
78
|
+
label in the issue tracker for details on progress. If you'd like to
|
79
|
+
be notified when an instruction set is available you may subscribe to
|
80
|
+
the relevant issue.
|
81
|
+
|
82
|
+
If you have a project you're interested in using with SIMDe but we
|
83
|
+
don't yet support all the functions you need, please file an issue
|
84
|
+
with a list of what's missing so we know what to prioritize.
|
85
|
+
|
86
|
+
The `master` branch is protected so commits never reach it unless
|
87
|
+
they have passed extensive CI checks. Status badges don't really
|
88
|
+
make sense since they will always be green, but here are the links:
|
89
|
+
|
90
|
+
* [Travis CI](https://travis-ci.org/nemequ/simde)
|
91
|
+
* [AppVeyor](https://ci.appveyor.com/project/quixdb/simde)
|
92
|
+
* [GitHub Actions](https://github.com/nemequ/simde/actions)
|
93
|
+
* [Azure Pipelines](https://dev.azure.com/simd-everywhere/SIMDe/_build)
|
94
|
+
* [Drone CI](https://cloud.drone.io/nemequ/simde)
|
95
|
+
|
96
|
+
## Contributing
|
97
|
+
|
98
|
+
First off, if you're reading this: thank you! Even considering
|
99
|
+
contributing to SIMDe is very much appreciated!
|
100
|
+
|
101
|
+
SIMDe is a fairly large undertaking; there are a *lot* of functions to
|
102
|
+
get through and a lot of opportunities for optimization on different
|
103
|
+
platforms, so we're very happy for any help you can provide.
|
104
|
+
|
105
|
+
Programmers of all skill levels are welcome, there are lots of tasks
|
106
|
+
which are pretty straightforward and don't require any special
|
107
|
+
expertise.
|
108
|
+
|
109
|
+
If you're not sure how you'd like to contribute, please consider taking
|
110
|
+
a look at [the issue tracker](https://github.com/nemequ/simde/issues).
|
111
|
+
There is a [good first issue](https://github.com/nemequ/simde/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
|
112
|
+
tag if you want to ease into a your first contributions, but if you're
|
113
|
+
interested in something else please get in touch via the issue tracker;
|
114
|
+
we're happy to help you get a handle on whatever you are interested in.
|
115
|
+
|
116
|
+
If you're interested in implementing currently unimplemented functions,
|
117
|
+
there is [a
|
118
|
+
guide](https://github.com/nemequ/simde/wiki/Implementing-a-New-Function)
|
119
|
+
explaining how to add new functions and how to quickly and easily get
|
120
|
+
a test case in place. It's a bit rough right now, but if anything is
|
121
|
+
unclear please feel free to use the issue tracker to ask about
|
122
|
+
anything you're not clear on.
|
123
|
+
|
124
|
+
## Usage
|
125
|
+
|
126
|
+
First, it is important to note that *you do not need two separate
|
127
|
+
versions* (one using SIMDe, the other native). If the native functions
|
128
|
+
are available SIMDe will use them, and compilers easily optimize away
|
129
|
+
any overhead from SIMDe; all they have to do is some basic inlining.
|
130
|
+
`-O2` should be enough, but we strongly recommend `-O3` (or whatever
|
131
|
+
flag instructs your compiler to aggressizely optimize) since many of
|
132
|
+
the portable fallbacks are substantially faster with aggressive
|
133
|
+
auto-vectorization that isn't enabled at lower optimization levels.
|
134
|
+
|
135
|
+
Each instruction set has a separate file; `x86/mmx.h` for MMX,
|
136
|
+
`x86/sse.h` for SSE, `x86/sse2.h` for SSE2, and so on. Just include
|
137
|
+
the header for whichever instruction set(s) you want, and SIMDe will
|
138
|
+
provide the fastest implementation it can given which extensions
|
139
|
+
you've enabled in your compiler (i.e., if you want to use NEON to
|
140
|
+
implement SSE, you may need to pass something like `-mfpu=neon`
|
141
|
+
or `-march=armv8-a+simd`. See
|
142
|
+
[GCC ARM-Options](https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html)
|
143
|
+
for more information).
|
144
|
+
|
145
|
+
If you define `SIMDE_ENABLE_NATIVE_ALIASES` before including SIMDe
|
146
|
+
you can use the same names as the native functions. Unfortunately,
|
147
|
+
this is somewhat error-prone due to portability issues in the APIs, so
|
148
|
+
it's recommended to only do this for testing. When
|
149
|
+
`SIMDE_ENABLE_NATIVE_ALIASES` is undefined only the versions prefixed
|
150
|
+
with `simde_` will be available; for example, the MMX `_mm_add_pi8`
|
151
|
+
intrinsic becomes `simde_mm_add_pi8`, and `__m64` becomes `simde__m64`.
|
152
|
+
|
153
|
+
Since SIMDe is meant to be portable, many functions which assume types
|
154
|
+
are of a specific size have been altered to use fixed-width types
|
155
|
+
instead. For example, Intel's APIs use `char` for signed 8-bit
|
156
|
+
integers, but `char` on ARM is generally unsigned. SIMDe uses `int8_t`
|
157
|
+
to make the API portable, but that means your code may require some
|
158
|
+
minor changes (such as using `int8_t` instead of `char`) to work on
|
159
|
+
other platforms.
|
160
|
+
|
161
|
+
That said, the changes are usually quite minor. It's often enough to
|
162
|
+
just use search and replace, manual changes are required pretty
|
163
|
+
infrequently.
|
164
|
+
|
165
|
+
For best performance, in addition to `-O3` (or whatever your compiler's
|
166
|
+
equivalent is), you should enable OpenMP 4 SIMD support by defining
|
167
|
+
`SIMDE_ENABLE_OPENMP` before including any SIMDe headers, and
|
168
|
+
enabling OpenMP support in your compiler. GCC and ICC both support a
|
169
|
+
flag to enable only OpenMP SIMD support instead of full OpenMP (the OpenMP
|
170
|
+
SIMD support doesn't require the OpenMP run-time library); for GCC the
|
171
|
+
flag is `-fopenmp-simd`, for ICC `-qopenmp-simd`. SIMDe also supports
|
172
|
+
using [Cilk Plus](https://www.cilkplus.org/), [GCC loop-specific
|
173
|
+
pragmas](https://gcc.gnu.org/onlinedocs/gcc/Loop-Specific-Pragmas.html),
|
174
|
+
or [clang pragma loop hint
|
175
|
+
directives](http://llvm.org/docs/Vectorizers.html#pragma-loop-hint-directives),
|
176
|
+
though these are not nearly as effective as OpenMP SIMD and depending
|
177
|
+
on them will likely result in less efficient code.
|
178
|
+
|
179
|
+
## Portability
|
180
|
+
|
181
|
+
### Compilers
|
182
|
+
|
183
|
+
SIMDe does depend on some C99 features, though the subset supported by
|
184
|
+
MSVC also works. While we do our best to make sure we provide optimized
|
185
|
+
implementations where they are supported, SIMDe does contain portable
|
186
|
+
fallbacks which are designed to work on any C99 compiler.
|
187
|
+
|
188
|
+
Every commit is tested in CI on multiple compilers, platforms, and
|
189
|
+
configurations, and our test coverage is extremely extensive.
|
190
|
+
Currently tested compilers include:
|
191
|
+
|
192
|
+
* GCC versions back to 4.8
|
193
|
+
* Clang versions back to 7
|
194
|
+
* Microsoft Visual Studio back to 12 (2013)
|
195
|
+
* IBM XL C/C++
|
196
|
+
* Intel C/C++ Compiler (ICC)
|
197
|
+
* PGI C Compiler
|
198
|
+
|
199
|
+
I'm generally willing to accept patches to add support for other
|
200
|
+
compilers, as long as they're not too disruptive, *especially* if we
|
201
|
+
can get CI support going. We currently use Travis CI, AppVeyor, and
|
202
|
+
Microsoft Azure Pipelines, but other CI platforms can be added as
|
203
|
+
necessary.
|
204
|
+
|
205
|
+
### Hardware
|
206
|
+
|
207
|
+
The following architectures are tested in CI for every commit:
|
208
|
+
|
209
|
+
* x86_64
|
210
|
+
* x86
|
211
|
+
* AArch64
|
212
|
+
* ARMv8
|
213
|
+
* ARMv7
|
214
|
+
* PPC64
|
215
|
+
* MIPS
|
216
|
+
|
217
|
+
We would love to add more, so patches are extremely welcome!
|
218
|
+
|
219
|
+
The tests currently contain some assumptions that they are running on
|
220
|
+
a little-endian CPU. We're working on this, but for now big endian
|
221
|
+
CPUs *should* work, but we can't promise.
|
222
|
+
|
223
|
+
## Related Projects
|
224
|
+
|
225
|
+
* The "builtins" module in
|
226
|
+
[portable-snippets](https://github.com/nemequ/portable-snippets)
|
227
|
+
does much the same thing, but for compiler-specific intrinsics
|
228
|
+
(think `__builtin_clz` and `_BitScanForward`), **not** SIMD
|
229
|
+
intrinsics.
|
230
|
+
* Intel offers an emulator, the [Intel® Software Development
|
231
|
+
Emulator](https://software.intel.com/en-us/articles/intel-software-development-emulator/)
|
232
|
+
which can be used to develop software which uses Intel intrinsics
|
233
|
+
without having to own hardware which supports them, though it
|
234
|
+
doesn't help for deployment.
|
235
|
+
* [Iris](https://github.com/AlexYaruki/iris) is the only other project
|
236
|
+
I'm aware of which is attempting to create portable implementations
|
237
|
+
like SIMDe. SIMDe is much further along on the Intel side, but Iris
|
238
|
+
looks to be in better shape on ARM. C++-only, Apache 2.0 license.
|
239
|
+
AFAICT there are no accelerated fallbacks, nor is there a good way to
|
240
|
+
add them since it relies extensively on templates.
|
241
|
+
* There are a few projects trying to implement one set with another:
|
242
|
+
* [ARM_NEON_2_x86_SSE](https://github.com/intel/ARM_NEON_2_x86_SSE)
|
243
|
+
— implementing NEON using SSE. Quite extensive, Apache 2.0
|
244
|
+
license.
|
245
|
+
* [sse2neon](https://github.com/jratcliff63367/sse2neon) —
|
246
|
+
implementing SSE using NEON. This code has already been merged
|
247
|
+
into SIMDe.
|
248
|
+
* [veclib](https://github.com/IvantheDugtrio/veclib) — implementing
|
249
|
+
SSE2 using AltiVec/VMX, using a non-free IBM library called
|
250
|
+
[powerveclib](https://www.ibm.com/developerworks/community/groups/community/powerveclib/)
|
251
|
+
* [SSE-to-NEON](https://github.com/otim/SSE-to-NEON) — implementing
|
252
|
+
SSE with NEON. Non-free, C++.
|
253
|
+
* [arm-neon-tests](https://github.com/christophe-lyon/arm-neon-tests)
|
254
|
+
contains tests te verify NEON implementations.
|
255
|
+
|
256
|
+
If you know of any other related projects, please [let us
|
257
|
+
know](https://github.com/nemequ/simde/issues/new)!
|
258
|
+
|
259
|
+
## Caveats
|
260
|
+
|
261
|
+
Sometime features can't be emulated. If SIMDe is operating in native
|
262
|
+
mode the functions will work as expected, but if there is no native
|
263
|
+
support some caveats apply:
|
264
|
+
|
265
|
+
* Many functions require <math.h> and/or <fenv.h>. SIMDe will still
|
266
|
+
work without those headers, but the results of those functions are
|
267
|
+
undefined.
|
268
|
+
* x86 / x86_64
|
269
|
+
* SSE
|
270
|
+
* `SIMDE_MM_SET_ROUNDING_MODE()` will use `fesetround()`, altering
|
271
|
+
the global rounding mode.
|
272
|
+
* `simde_mm_getcsr` and `simde_mm_setcsr` only implement bits 13
|
273
|
+
and 14 (rounding mode).
|
274
|
+
* AVX
|
275
|
+
* `simde_mm256_test*` do not set the CF/ZF registers as there is
|
276
|
+
no portable way to implement that functionality.
|
277
|
+
* `simde_mm256_zeroall` and `simde_mm256_zeroupper` are not
|
278
|
+
implemented as there is no portable way to implement that
|
279
|
+
functionality.
|
280
|
+
|
281
|
+
Additionally, there are some known limitations which apply when using
|
282
|
+
native aliases (`SIMDE_ENABLE_NATIVE_ALIASES`):
|
283
|
+
|
284
|
+
* On Windows x86 (but not x86_64), some MMX functions and SSE/SSE2
|
285
|
+
functions which use MMX types (__m64) other than for pointers may
|
286
|
+
return incorrect results.
|
287
|
+
|
288
|
+
Also, as mentioned earlier, while some APIs make assumptions about
|
289
|
+
basic types (*e.g.*, `int` is 32 bits), SIMDe does not, so many types
|
290
|
+
have been altered to use portable fixed-width versions such as
|
291
|
+
`int32_t`.
|
292
|
+
|
293
|
+
If you find any other differences, please file an issue so we can either fix
|
294
|
+
it or add it to the list above.
|
295
|
+
|
296
|
+
## Benefactors
|
297
|
+
|
298
|
+
SIMDe uses resources provided for free by a number of organizations.
|
299
|
+
While this shouldn't be taken to imply endorsement of SIMDe, we're
|
300
|
+
tremendously grateful for their support:
|
301
|
+
|
302
|
+
* [GitHub](https://github.com/) — hosts our source repository, issue
|
303
|
+
tracker, etc.
|
304
|
+
* [Travis CI](https://travis-ci.org/) — provides CI testing on
|
305
|
+
numerous platforms.
|
306
|
+
* [AppVeyor](https://www.appveyor.com/) — provides CI testing on
|
307
|
+
Windows.
|
308
|
+
* [Drone CI](https://drone.io/) — provides CI testing on ARM 32 bits
|
309
|
+
platform, etc.
|
310
|
+
* [IntegriCloud](https://integricloud.com/) — provides access to a very
|
311
|
+
fast POWER9 server for developing AltiVec/VMX support.
|
312
|
+
* [GCC Compile Farm](https://gcc.gnu.org/wiki/CompileFarm) — provides
|
313
|
+
access to a wide range of machines with different architectures for
|
314
|
+
developing support for various ISA extensions.
|
315
|
+
* [CodeCov.io](https://codecov.io/) — provides code coverage analysis
|
316
|
+
for our test cases.
|
317
|
+
|
318
|
+
Without such organizations donating resources, SIMDe wouldn't be nearly
|
319
|
+
as useful or usable as it is today.
|
320
|
+
|
321
|
+
We would also like to thank anyone who has helped develop the myriad
|
322
|
+
of software on which SIMDe relies, including compilers and analysis
|
323
|
+
tools.
|
324
|
+
|
325
|
+
Finally, a special thank you to
|
326
|
+
[anyone who has contributed](https://github.com/nemequ/simde/graphs/contributors)
|
327
|
+
to SIMDe, filed bugs, provided suggestions, or helped with SIMDe
|
328
|
+
development in any way.
|
329
|
+
|
330
|
+
## License
|
331
|
+
|
332
|
+
SIMDe is distributed under an MIT-style license; see COPYING for
|
333
|
+
details.
|
@@ -0,0 +1,58 @@
|
|
1
|
+
#!/usr/bin/python3
|
2
|
+
|
3
|
+
# amalgamate.py
|
4
|
+
# Written by Evan Nemerson <evan@nemerson.com>
|
5
|
+
#
|
6
|
+
# To the extent possible under law, the author(s) have dedicated all
|
7
|
+
# copyright and related and neighboring rights to this software to
|
8
|
+
# the public domain worldwide. This software is distributed without
|
9
|
+
# any warranty.
|
10
|
+
#
|
11
|
+
# For details, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
12
|
+
# SPDX-License-Identifier: CC0-1.0
|
13
|
+
|
14
|
+
# Quick and dirty script to amalgamate C into a single file. Includes
|
15
|
+
# using angle brackets (#include <foo.h>) will be preserved, but for
|
16
|
+
# includes using double quotes (#include "foo.h") the file will be
|
17
|
+
# included by this script.
|
18
|
+
#
|
19
|
+
# If you make any improvements please report them in the SIMDe issue
|
20
|
+
# tracker at <https://github.com/nemequ/simde/issues> or directly to
|
21
|
+
# the author so they can be merged back into the original version.
|
22
|
+
|
23
|
+
import sys, re, os, subprocess
|
24
|
+
|
25
|
+
amalgamate_include = re.compile('^\\s*#\\s*include\\s+\\"([^)]+)\\"\\s$')
|
26
|
+
already_included = []
|
27
|
+
|
28
|
+
def amalgamate(filename, stream):
|
29
|
+
full_path = os.path.realpath(os.path.realpath(filename))
|
30
|
+
srcdir = os.path.dirname(full_path)
|
31
|
+
|
32
|
+
if full_path not in already_included:
|
33
|
+
already_included.insert(-1, full_path)
|
34
|
+
with open(filename) as input_file:
|
35
|
+
stream.write('/* :: Begin ' + os.path.relpath(full_path) + ' :: */\n')
|
36
|
+
|
37
|
+
for source_line in input_file:
|
38
|
+
a9e_inc_m = amalgamate_include.match(source_line)
|
39
|
+
if a9e_inc_m:
|
40
|
+
amalgamate(os.path.join(srcdir, a9e_inc_m.group(1)), stream)
|
41
|
+
else:
|
42
|
+
stream.write(source_line)
|
43
|
+
|
44
|
+
stream.write('/* :: End ' + os.path.relpath(full_path) + ' :: */\n')
|
45
|
+
|
46
|
+
if len(sys.argv) != 2:
|
47
|
+
sys.stderr.write("USAGE: " + sys.argv[0] + ' SOURCE_FILE\n\n')
|
48
|
+
sys.stderr.write("This will print a copy of $SOURCE_FILE to stdout, while replacing\n")
|
49
|
+
sys.stderr.write("all '#include AMALGAMATE(file)' lines with copies of file.\n")
|
50
|
+
|
51
|
+
sys.exit(1)
|
52
|
+
|
53
|
+
print('/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */')
|
54
|
+
|
55
|
+
git_id = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode().strip()
|
56
|
+
print("/* {:s} */".format(git_id))
|
57
|
+
|
58
|
+
amalgamate(sys.argv[1], sys.stdout)
|
@@ -0,0 +1,33 @@
|
|
1
|
+
project('SIMDe', 'c', 'cpp',
|
2
|
+
default_options: ['c_std=c99'],
|
3
|
+
license: 'MIT',
|
4
|
+
version: '0.5.0')
|
5
|
+
|
6
|
+
cc = meson.get_compiler('c')
|
7
|
+
cxx = meson.get_compiler('cpp')
|
8
|
+
|
9
|
+
subdir('test')
|
10
|
+
|
11
|
+
install_headers(
|
12
|
+
[
|
13
|
+
'simde/hedley.h',
|
14
|
+
'simde/check.h',
|
15
|
+
'simde/debug-trap.h',
|
16
|
+
'simde/simde-arch.h',
|
17
|
+
'simde/simde-common.h',
|
18
|
+
|
19
|
+
'simde/x86/avx2.h',
|
20
|
+
'simde/x86/avx512bw.h',
|
21
|
+
'simde/x86/avx512vl.h',
|
22
|
+
'simde/x86/avx512f.h',
|
23
|
+
'simde/x86/fma.h',
|
24
|
+
'simde/x86/mmx.h',
|
25
|
+
'simde/x86/sse.h',
|
26
|
+
'simde/x86/sse2.h',
|
27
|
+
'simde/x86/sse3.h',
|
28
|
+
'simde/x86/sse4.1.h',
|
29
|
+
'simde/x86/sse4.2.h',
|
30
|
+
'simde/x86/ssse3.h',
|
31
|
+
'simde/x86/svml.h',
|
32
|
+
],
|
33
|
+
subdir: 'simde')
|
@@ -0,0 +1,20 @@
|
|
1
|
+
[build]
|
2
|
+
publish = 'web'
|
3
|
+
command = 'mkdir -p web/amalgamated/x86 && cd simde && for header in x86/*.h; do ../amalgamate.py "$header" > ../web/amalgamated/"$header"; done'
|
4
|
+
|
5
|
+
[[headers]]
|
6
|
+
for = "/amalgamated/x86/*.h"
|
7
|
+
[headers.values]
|
8
|
+
Access-Control-Allow-Origin = "*"
|
9
|
+
|
10
|
+
[[redirects]]
|
11
|
+
from = "/godbolt/simple"
|
12
|
+
to = "https://godbolt.org/z/-twon_"
|
13
|
+
status = 302
|
14
|
+
force = true
|
15
|
+
|
16
|
+
[[redirects]]
|
17
|
+
from = "/godbolt/demo"
|
18
|
+
to = "https://godbolt.org/z/8cAgiy"
|
19
|
+
status = 302
|
20
|
+
force = true
|