libdeflate 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/test.yml +34 -0
  3. data/README.md +1 -6
  4. data/ext/libdeflate/extconf.rb +18 -7
  5. data/ext/libdeflate/libdeflate_ext.c +17 -17
  6. data/lib/libdeflate/version.rb +1 -1
  7. data/libdeflate.gemspec +2 -1
  8. metadata +13 -84
  9. data/.gitmodules +0 -3
  10. data/.travis.yml +0 -5
  11. data/ext/libdeflate/libdeflate/.gitignore +0 -19
  12. data/ext/libdeflate/libdeflate/COPYING +0 -21
  13. data/ext/libdeflate/libdeflate/Makefile +0 -231
  14. data/ext/libdeflate/libdeflate/Makefile.msc +0 -64
  15. data/ext/libdeflate/libdeflate/NEWS +0 -57
  16. data/ext/libdeflate/libdeflate/README.md +0 -170
  17. data/ext/libdeflate/libdeflate/common/common_defs.h +0 -351
  18. data/ext/libdeflate/libdeflate/common/compiler_gcc.h +0 -134
  19. data/ext/libdeflate/libdeflate/common/compiler_msc.h +0 -95
  20. data/ext/libdeflate/libdeflate/lib/adler32.c +0 -213
  21. data/ext/libdeflate/libdeflate/lib/adler32_impl.h +0 -281
  22. data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +0 -57
  23. data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +0 -13
  24. data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +0 -357
  25. data/ext/libdeflate/libdeflate/lib/crc32.c +0 -368
  26. data/ext/libdeflate/libdeflate/lib/crc32_impl.h +0 -286
  27. data/ext/libdeflate/libdeflate/lib/crc32_table.h +0 -526
  28. data/ext/libdeflate/libdeflate/lib/decompress_impl.h +0 -404
  29. data/ext/libdeflate/libdeflate/lib/deflate_compress.c +0 -2817
  30. data/ext/libdeflate/libdeflate/lib/deflate_compress.h +0 -14
  31. data/ext/libdeflate/libdeflate/lib/deflate_constants.h +0 -66
  32. data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +0 -889
  33. data/ext/libdeflate/libdeflate/lib/gzip_compress.c +0 -95
  34. data/ext/libdeflate/libdeflate/lib/gzip_constants.h +0 -45
  35. data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +0 -130
  36. data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +0 -405
  37. data/ext/libdeflate/libdeflate/lib/lib_common.h +0 -35
  38. data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +0 -53
  39. data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +0 -205
  40. data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +0 -61
  41. data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +0 -53
  42. data/ext/libdeflate/libdeflate/lib/unaligned.h +0 -202
  43. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +0 -169
  44. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +0 -48
  45. data/ext/libdeflate/libdeflate/lib/zlib_compress.c +0 -87
  46. data/ext/libdeflate/libdeflate/lib/zlib_constants.h +0 -21
  47. data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +0 -91
  48. data/ext/libdeflate/libdeflate/libdeflate.h +0 -274
  49. data/ext/libdeflate/libdeflate/programs/benchmark.c +0 -558
  50. data/ext/libdeflate/libdeflate/programs/checksum.c +0 -197
  51. data/ext/libdeflate/libdeflate/programs/detect.sh +0 -62
  52. data/ext/libdeflate/libdeflate/programs/gzip.c +0 -603
  53. data/ext/libdeflate/libdeflate/programs/prog_util.c +0 -530
  54. data/ext/libdeflate/libdeflate/programs/prog_util.h +0 -162
  55. data/ext/libdeflate/libdeflate/programs/test_checksums.c +0 -135
  56. data/ext/libdeflate/libdeflate/programs/tgetopt.c +0 -118
  57. data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +0 -12
  58. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +0 -40
  59. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
  60. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +0 -28
  61. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +0 -3
  62. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +0 -28
  63. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
  64. data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +0 -14
  65. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +0 -28
  66. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +0 -3
  67. data/ext/libdeflate/libdeflate/tools/android_build.sh +0 -104
  68. data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +0 -76
  69. data/ext/libdeflate/libdeflate/tools/exec_tests.sh +0 -30
  70. data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +0 -108
  71. data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +0 -100
  72. data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +0 -412
  73. data/ext/libdeflate/libdeflate/tools/make-windows-releases +0 -21
  74. data/ext/libdeflate/libdeflate/tools/mips_build.sh +0 -9
  75. data/ext/libdeflate/libdeflate/tools/msc_test.bat +0 -3
  76. data/ext/libdeflate/libdeflate/tools/pgo_build.sh +0 -23
  77. data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +0 -37
  78. data/ext/libdeflate/libdeflate/tools/run_tests.sh +0 -305
  79. data/ext/libdeflate/libdeflate/tools/windows_build.sh +0 -10
@@ -1,64 +0,0 @@
1
- #
2
- # Makefile for the Microsoft toolchain
3
- #
4
- # Usage:
5
- # nmake /f Makefile.msc
6
- #
7
-
8
- .SUFFIXES: .c .obj .dllobj
9
-
10
- CC = cl
11
- LD = link
12
- AR = lib
13
- CFLAGS = /MD /O2 -I. -Icommon
14
- LDFLAGS =
15
-
16
- STATIC_LIB = libdeflatestatic.lib
17
- SHARED_LIB = libdeflate.dll
18
- IMPORT_LIB = libdeflate.lib
19
-
20
- STATIC_LIB_OBJ = \
21
- lib/aligned_malloc.obj \
22
- lib/adler32.obj \
23
- lib/crc32.obj \
24
- lib/deflate_compress.obj \
25
- lib/deflate_decompress.obj \
26
- lib/gzip_compress.obj \
27
- lib/gzip_decompress.obj \
28
- lib/x86_cpu_features.obj \
29
- lib/zlib_compress.obj \
30
- lib/zlib_decompress.obj
31
-
32
- SHARED_LIB_OBJ = $(STATIC_LIB_OBJ:.obj=.dllobj)
33
-
34
- PROG_COMMON_OBJ = programs/prog_util.obj \
35
- programs/tgetopt.obj \
36
- $(STATIC_LIB)
37
-
38
- PROG_CFLAGS = $(CFLAGS) -Iprograms
39
-
40
- all: $(STATIC_LIB) $(SHARED_LIB) $(IMPORT_LIB) gzip.exe gunzip.exe
41
-
42
- .c.obj:
43
- $(CC) -c /Fo$@ $(CFLAGS) $**
44
-
45
- .c.dllobj:
46
- $(CC) -c /Fo$@ $(CFLAGS) /DLIBDEFLATE_DLL $**
47
-
48
- $(STATIC_LIB): $(STATIC_LIB_OBJ)
49
- $(AR) $(ARFLAGS) -out:$@ $(STATIC_LIB_OBJ)
50
-
51
- $(SHARED_LIB): $(SHARED_LIB_OBJ)
52
- $(LD) $(LDFLAGS) -out:$@ -dll -implib:$(IMPORT_LIB) $(SHARED_LIB_OBJ)
53
-
54
- $(IMPORT_LIB): $(SHARED_LIB)
55
-
56
- gzip.exe:programs/gzip.obj $(PROG_COMMON_OBJ)
57
- $(LD) $(LDFLAGS) -out:$@ $**
58
-
59
- gunzip.exe:gzip.exe
60
- copy $** $@
61
-
62
- clean:
63
- -del *.dll *.exe *.exp libdeflate.lib libdeflatestatic.lib gzip.lib \
64
- lib\*.obj lib\*.dllobj programs\*.obj 2>nul
@@ -1,57 +0,0 @@
1
- Version 0.7:
2
- Fixed a very rare bug that caused data to be compressed incorrectly.
3
- The bug affected compression levels 7 and below since libdeflate v0.2.
4
- Although there have been no user reports of the bug, and I believe it
5
- would have been highly unlikely to encounter on realistic data, it could
6
- occur on data specially crafted to reproduce it.
7
-
8
- Fixed a compilation error when building with clang 3.7.
9
-
10
- Version 0.6:
11
- Various improvements to the gzip program's behavior.
12
-
13
- Faster CRC-32 on AVX-capable processors.
14
-
15
- Other minor changes.
16
-
17
- Version 0.5:
18
- The CRC-32 checksum algorithm has been optimized with carryless
19
- multiplication instructions for x86_64 (PCLMUL). This speeds up gzip
20
- compression and decompression.
21
-
22
- Build fixes for certain platforms and compilers.
23
-
24
- Added more test programs and scripts.
25
-
26
- libdeflate is now entirely MIT-licensed.
27
-
28
- Version 0.4:
29
- The Adler-32 checksum algorithm has been optimized with vector
30
- instructions for x86_64 (SSE2 and AVX2) and ARM (NEON). This speeds up
31
- zlib compression and decompression.
32
-
33
- To avoid naming collisions, functions and definitions in libdeflate's
34
- API have been renamed to be prefixed with "libdeflate_" or
35
- "LIBDEFLATE_". Programs using the old API will need to be updated.
36
-
37
- Various bug fixes and other improvements.
38
-
39
- Version 0.3:
40
- Some bug fixes and other minor changes.
41
-
42
- Version 0.2:
43
- Implemented a new block splitting algorithm which typically improves the
44
- compression ratio slightly at all compression levels.
45
-
46
- The compressor now outputs each block using the cheapest type (dynamic
47
- Huffman, static Huffman, or uncompressed).
48
-
49
- The gzip program has received an overhaul and now behaves more like the
50
- standard version.
51
-
52
- Build system updates, including: some build options were changed and
53
- some build options were removed, and the default 'make' target now
54
- includes the gzip program as well as the library.
55
-
56
- Version 0.1:
57
- Initial official release.
@@ -1,170 +0,0 @@
1
- # Overview
2
-
3
- libdeflate is a library for fast, whole-buffer DEFLATE-based compression and
4
- decompression.
5
-
6
- The supported formats are:
7
-
8
- - DEFLATE (raw)
9
- - zlib (a.k.a. DEFLATE with a zlib wrapper)
10
- - gzip (a.k.a. DEFLATE with a gzip wrapper)
11
-
12
- libdeflate is heavily optimized. It is significantly faster than the zlib
13
- library, both for compression and decompression, and especially on x86
14
- processors. In addition, libdeflate provides optional high compression modes
15
- that provide a better compression ratio than the zlib's "level 9".
16
-
17
- libdeflate itself is a library, but the following command-line programs which
18
- use this library are also provided:
19
-
20
- * gzip (or gunzip), a program which mostly behaves like the standard equivalent,
21
- except that it does not yet have good streaming support and therefore does not
22
- yet support very large files
23
- * benchmark, a program for benchmarking in-memory compression and decompression
24
-
25
- # Building
26
-
27
- ## For UNIX
28
-
29
- Just run `make`. You need GNU Make and either GCC or Clang. GCC is recommended
30
- because it builds slightly faster binaries. There is no `make install` yet;
31
- just copy the file(s) to where you want.
32
-
33
- By default, all targets are built, including the library and programs, with the
34
- exception of the `benchmark` program. `make help` shows the available targets.
35
- There are also several options which can be set on the `make` command line. See
36
- the Makefile for details.
37
-
38
- ## For Windows
39
-
40
- MinGW (GCC) is the recommended compiler to use when building binaries for
41
- Windows. MinGW can be used on either Windows or Linux. On Windows, you'll need
42
- the compiler as well as GNU Make and basic UNIX tools such as `sh`. This is
43
- most easily set up with Cygwin, but some standalone MinGW distributions for
44
- Windows also work. Or, on Linux, you'll need to install the `mingw-w64-gcc` or
45
- similarly-named package. Once ready, do the build using a command like:
46
-
47
- $ make CC=x86_64-w64-mingw32-gcc
48
-
49
- Some MinGW distributions for Windows may require `CC=gcc` instead.
50
-
51
- Windows binaries prebuilt with MinGW may also be downloaded from
52
- https://github.com/ebiggers/libdeflate/releases.
53
-
54
- Alternatively, a separate Makefile, `Makefile.msc`, is provided for the tools
55
- that come with Visual Studio, for those who strongly prefer that toolchain.
56
-
57
- As usual, 64-bit binaries are faster than 32-bit binaries and should be
58
- preferred whenever possible.
59
-
60
- # API
61
-
62
- libdeflate has a simple API that is not zlib-compatible. You can create
63
- compressors and decompressors and use them to compress or decompress buffers.
64
- See libdeflate.h for details.
65
-
66
- There is currently no support for streaming. This has been considered, but it
67
- always significantly increases complexity and slows down fast paths.
68
- Unfortunately, at this point it remains a future TODO. So: if your application
69
- compresses data in "chunks", say, less than 1 MB in size, then libdeflate is a
70
- great choice for you; that's what it's designed to do. This is perfect for
71
- certain use cases such as transparent filesystem compression. But if your
72
- application compresses large files as a single compressed stream, similarly to
73
- the `gzip` program, then libdeflate isn't for you.
74
-
75
- Note that with chunk-based compression, you generally should have the
76
- uncompressed size of each chunk stored outside of the compressed data itself.
77
- This enables you to allocate an output buffer of the correct size without
78
- guessing. However, libdeflate's decompression routines do optionally provide
79
- the actual number of output bytes in case you need it.
80
-
81
- # DEFLATE vs. zlib vs. gzip
82
-
83
- The DEFLATE format ([rfc1951](https://www.ietf.org/rfc/rfc1951.txt)), the zlib
84
- format ([rfc1950](https://www.ietf.org/rfc/rfc1950.txt)), and the gzip format
85
- ([rfc1952](https://www.ietf.org/rfc/rfc1952.txt)) are commonly confused with
86
- each other as well as with the [zlib software library](http://zlib.net), which
87
- actually supports all three formats. libdeflate (this library) also supports
88
- all three formats.
89
-
90
- Briefly, DEFLATE is a raw compressed stream, whereas zlib and gzip are different
91
- wrappers for this stream. Both zlib and gzip include checksums, but gzip can
92
- include extra information such as the original filename. Generally, you should
93
- choose a format as follows:
94
-
95
- - If you are compressing whole files with no subdivisions, similar to the `gzip`
96
- program, you probably should use the gzip format.
97
- - Otherwise, if you don't need the features of the gzip header and footer but do
98
- still want a checksum for corruption detection, you probably should use the
99
- zlib format.
100
- - Otherwise, you probably should use raw DEFLATE. This is ideal if you don't
101
- need checksums, e.g. because they're simply not needed for your use case or
102
- because you already compute your own checksums that are stored separately from
103
- the compressed stream.
104
-
105
- Note that gzip and zlib streams can be distinguished from each other based on
106
- their starting bytes, but this is not necessarily true of raw DEFLATE streams.
107
-
108
- # Compression levels
109
-
110
- An often-underappreciated fact of compression formats such as DEFLATE is that
111
- there are an enormous number of different ways that a given input could be
112
- compressed. Different algorithms and different amounts of computation time will
113
- result in different compression ratios, while remaining equally compatible with
114
- the decompressor.
115
-
116
- For this reason, the commonly used zlib library provides nine compression
117
- levels. Level 1 is the fastest but provides the worst compression; level 9
118
- provides the best compression but is the slowest. It defaults to level 6.
119
- libdeflate uses this same design but is designed to improve on both zlib's
120
- performance *and* compression ratio at every compression level. In addition,
121
- libdeflate's levels go [up to 12](https://xkcd.com/670/) to make room for a
122
- minimum-cost-path based algorithm (sometimes called "optimal parsing") that can
123
- significantly improve on zlib's compression ratio.
124
-
125
- If you are using DEFLATE (or zlib, or gzip) in your application, you should test
126
- different levels to see which works best for your application.
127
-
128
- # Motivation
129
-
130
- Despite DEFLATE's widespread use mainly through the zlib library, in the
131
- compression community this format from the early 1990s is often considered
132
- obsolete. And in a few significant ways, it is.
133
-
134
- So why implement DEFLATE at all, instead of focusing entirely on
135
- bzip2/LZMA/xz/LZ4/LZX/ZSTD/Brotli/LZHAM/LZFSE/[insert cool new format here]?
136
-
137
- To do something better, you need to understand what came before. And it turns
138
- out that most ideas from DEFLATE are still relevant. Many of the newer formats
139
- share a similar structure as DEFLATE, with different tweaks. The effects of
140
- trivial but very useful tweaks, such as increasing the sliding window size, are
141
- often confused with the effects of nontrivial but less useful tweaks. And
142
- actually, many of these formats are similar enough that common algorithms and
143
- optimizations (e.g. those dealing with LZ77 matchfinding) can be reused.
144
-
145
- In addition, comparing compressors fairly is difficult because the performance
146
- of a compressor depends heavily on optimizations which are not intrinsic to the
147
- compression format itself. In this respect, the zlib library sometimes compares
148
- poorly to certain newer code because zlib is not well optimized for modern
149
- processors. libdeflate addresses this by providing an optimized DEFLATE
150
- implementation which can be used for benchmarking purposes. And, of course,
151
- real applications can use it as well.
152
-
153
- That being said, I have also started [a separate
154
- project](https://github.com/ebiggers/xpack) for an experimental, more modern
155
- compression format.
156
-
157
- # License
158
-
159
- libdeflate is [MIT-licensed](COPYING).
160
-
161
- Additional notes (informational only):
162
-
163
- - I am not aware of any patents covering libdeflate.
164
-
165
- - Old versions of libdeflate were public domain; I only started copyrighting
166
- changes in newer versions. Portions of the source code that have not been
167
- changed since being released in a public domain version can theoretically
168
- still be used as public domain if you want to. But for practical purposes, it
169
- probably would be easier to just take the MIT license option, which is nearly
170
- the same anyway.
@@ -1,351 +0,0 @@
1
- /*
2
- * common_defs.h
3
- *
4
- * Copyright 2016 Eric Biggers
5
- *
6
- * Permission is hereby granted, free of charge, to any person
7
- * obtaining a copy of this software and associated documentation
8
- * files (the "Software"), to deal in the Software without
9
- * restriction, including without limitation the rights to use,
10
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- * copies of the Software, and to permit persons to whom the
12
- * Software is furnished to do so, subject to the following
13
- * conditions:
14
- *
15
- * The above copyright notice and this permission notice shall be
16
- * included in all copies or substantial portions of the Software.
17
- *
18
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
- * OTHER DEALINGS IN THE SOFTWARE.
26
- */
27
-
28
- #ifndef COMMON_COMMON_DEFS_H
29
- #define COMMON_COMMON_DEFS_H
30
-
31
- #ifdef __GNUC__
32
- # include "compiler_gcc.h"
33
- #elif defined(_MSC_VER)
34
- # include "compiler_msc.h"
35
- #else
36
- # pragma message("Unrecognized compiler. Please add a header file for your compiler. Compilation will proceed, but performance may suffer!")
37
- #endif
38
-
39
- /* ========================================================================== */
40
- /* Type definitions */
41
- /* ========================================================================== */
42
-
43
- #include <stddef.h> /* size_t */
44
-
45
- #ifndef __bool_true_false_are_defined
46
- # include <stdbool.h> /* bool */
47
- #endif
48
-
49
- /* Fixed-width integer types */
50
- #ifndef PRIu32
51
- # include <inttypes.h>
52
- #endif
53
- typedef uint8_t u8;
54
- typedef uint16_t u16;
55
- typedef uint32_t u32;
56
- typedef uint64_t u64;
57
- typedef int8_t s8;
58
- typedef int16_t s16;
59
- typedef int32_t s32;
60
- typedef int64_t s64;
61
-
62
- /*
63
- * Word type of the target architecture. Use 'size_t' instead of 'unsigned
64
- * long' to account for platforms such as Windows that use 32-bit 'unsigned
65
- * long' on 64-bit architectures.
66
- */
67
- typedef size_t machine_word_t;
68
-
69
- /* Number of bytes in a word */
70
- #define WORDBYTES ((int)sizeof(machine_word_t))
71
-
72
- /* Number of bits in a word */
73
- #define WORDBITS (8 * WORDBYTES)
74
-
75
- /* ========================================================================== */
76
- /* Optional compiler features */
77
- /* ========================================================================== */
78
-
79
- /* LIBEXPORT - export a function from a shared library */
80
- #ifndef LIBEXPORT
81
- # define LIBEXPORT
82
- #endif
83
-
84
- /* inline - suggest that a function be inlined */
85
- #ifndef inline
86
- # define inline
87
- #endif
88
-
89
- /* forceinline - force a function to be inlined, if possible */
90
- #ifndef forceinline
91
- # define forceinline inline
92
- #endif
93
-
94
- /* restrict - annotate a non-aliased pointer */
95
- #ifndef restrict
96
- # define restrict
97
- #endif
98
-
99
- /* likely(expr) - hint that an expression is usually true */
100
- #ifndef likely
101
- # define likely(expr) (expr)
102
- #endif
103
-
104
- /* unlikely(expr) - hint that an expression is usually false */
105
- #ifndef unlikely
106
- # define unlikely(expr) (expr)
107
- #endif
108
-
109
- /* prefetchr(addr) - prefetch into L1 cache for read */
110
- #ifndef prefetchr
111
- # define prefetchr(addr)
112
- #endif
113
-
114
- /* prefetchw(addr) - prefetch into L1 cache for write */
115
- #ifndef prefetchw
116
- # define prefetchw(addr)
117
- #endif
118
-
119
- /* Does the compiler support the 'target' function attribute? */
120
- #ifndef COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE
121
- # define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
122
- #endif
123
-
124
- /* Are target-specific intrinsics supported in 'target' attribute functions? */
125
- #ifndef COMPILER_SUPPORTS_TARGET_INTRINSICS
126
- # define COMPILER_SUPPORTS_TARGET_INTRINSICS 0
127
- #endif
128
-
129
- /* Which targets are supported with the 'target' function attribute? */
130
- #ifndef COMPILER_SUPPORTS_PCLMUL_TARGET
131
- # define COMPILER_SUPPORTS_PCLMUL_TARGET 0
132
- #endif
133
- #ifndef COMPILER_SUPPORTS_BMI2_TARGET
134
- # define COMPILER_SUPPORTS_BMI2_TARGET 0
135
- #endif
136
- #ifndef COMPILER_SUPPORTS_AVX_TARGET
137
- # define COMPILER_SUPPORTS_AVX_TARGET 0
138
- #endif
139
- #ifndef COMPILER_SUPPORTS_AVX2_TARGET
140
- # define COMPILER_SUPPORTS_AVX2_TARGET 0
141
- #endif
142
-
143
- /* _aligned_attribute(n) - declare that the annotated variable, or variables of
144
- * the annotated type, are to be aligned on n-byte boundaries */
145
- #ifndef _aligned_attribute
146
- #endif
147
-
148
- /* ========================================================================== */
149
- /* Miscellaneous macros */
150
- /* ========================================================================== */
151
-
152
- #define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
153
- #define MIN(a, b) ((a) <= (b) ? (a) : (b))
154
- #define MAX(a, b) ((a) >= (b) ? (a) : (b))
155
- #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
156
- #define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
157
- #define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
158
-
159
- /* ========================================================================== */
160
- /* Endianness handling */
161
- /* ========================================================================== */
162
-
163
- /*
164
- * CPU_IS_LITTLE_ENDIAN() - a macro which evaluates to 1 if the CPU is little
165
- * endian or 0 if it is big endian. The macro should be defined in a way such
166
- * that the compiler can evaluate it at compilation time. If not defined, a
167
- * fallback is used.
168
- */
169
- #ifndef CPU_IS_LITTLE_ENDIAN
170
- static forceinline int CPU_IS_LITTLE_ENDIAN(void)
171
- {
172
- union {
173
- unsigned int v;
174
- unsigned char b;
175
- } u;
176
- u.v = 1;
177
- return u.b;
178
- }
179
- #endif
180
-
181
- /* bswap16(n) - swap the bytes of a 16-bit integer */
182
- #ifndef bswap16
183
- static forceinline u16 bswap16(u16 n)
184
- {
185
- return (n << 8) | (n >> 8);
186
- }
187
- #endif
188
-
189
- /* bswap32(n) - swap the bytes of a 32-bit integer */
190
- #ifndef bswap32
191
- static forceinline u32 bswap32(u32 n)
192
- {
193
- return ((n & 0x000000FF) << 24) |
194
- ((n & 0x0000FF00) << 8) |
195
- ((n & 0x00FF0000) >> 8) |
196
- ((n & 0xFF000000) >> 24);
197
- }
198
- #endif
199
-
200
- /* bswap64(n) - swap the bytes of a 64-bit integer */
201
- #ifndef bswap64
202
- static forceinline u64 bswap64(u64 n)
203
- {
204
- return ((n & 0x00000000000000FF) << 56) |
205
- ((n & 0x000000000000FF00) << 40) |
206
- ((n & 0x0000000000FF0000) << 24) |
207
- ((n & 0x00000000FF000000) << 8) |
208
- ((n & 0x000000FF00000000) >> 8) |
209
- ((n & 0x0000FF0000000000) >> 24) |
210
- ((n & 0x00FF000000000000) >> 40) |
211
- ((n & 0xFF00000000000000) >> 56);
212
- }
213
- #endif
214
-
215
- #define le16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap16(n))
216
- #define le32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap32(n))
217
- #define le64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap64(n))
218
- #define be16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap16(n) : (n))
219
- #define be32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap32(n) : (n))
220
- #define be64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap64(n) : (n))
221
-
222
- /* ========================================================================== */
223
- /* Unaligned memory accesses */
224
- /* ========================================================================== */
225
-
226
- /*
227
- * UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses
228
- * can be performed efficiently on the target platform.
229
- */
230
- #ifndef UNALIGNED_ACCESS_IS_FAST
231
- # define UNALIGNED_ACCESS_IS_FAST 0
232
- #endif
233
-
234
- /*
235
- * DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type',
236
- * defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions
237
- * which load and store variables of type 'type' from/to unaligned memory
238
- * addresses. If not defined, a fallback is used.
239
- */
240
- #ifndef DEFINE_UNALIGNED_TYPE
241
-
242
- /*
243
- * Although memcpy() may seem inefficient, it *usually* gets optimized
244
- * appropriately by modern compilers. It's portable and may be the best we can
245
- * do for a fallback...
246
- */
247
- #include <string.h>
248
-
249
- #define DEFINE_UNALIGNED_TYPE(type) \
250
- \
251
- static forceinline type \
252
- load_##type##_unaligned(const void *p) \
253
- { \
254
- type v; \
255
- memcpy(&v, p, sizeof(v)); \
256
- return v; \
257
- } \
258
- \
259
- static forceinline void \
260
- store_##type##_unaligned(type v, void *p) \
261
- { \
262
- memcpy(p, &v, sizeof(v)); \
263
- }
264
-
265
- #endif /* !DEFINE_UNALIGNED_TYPE */
266
-
267
- /* ========================================================================== */
268
- /* Bit scan functions */
269
- /* ========================================================================== */
270
-
271
- /*
272
- * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
273
- * significant end) of the *most* significant 1 bit in the input value. The
274
- * input value must be nonzero!
275
- */
276
-
277
- #ifndef bsr32
278
- static forceinline unsigned
279
- bsr32(u32 n)
280
- {
281
- unsigned i = 0;
282
- while ((n >>= 1) != 0)
283
- i++;
284
- return i;
285
- }
286
- #endif
287
-
288
- #ifndef bsr64
289
- static forceinline unsigned
290
- bsr64(u64 n)
291
- {
292
- unsigned i = 0;
293
- while ((n >>= 1) != 0)
294
- i++;
295
- return i;
296
- }
297
- #endif
298
-
299
- static forceinline unsigned
300
- bsrw(machine_word_t n)
301
- {
302
- STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
303
- if (WORDBITS == 32)
304
- return bsr32(n);
305
- else
306
- return bsr64(n);
307
- }
308
-
309
- /*
310
- * Bit Scan Forward (BSF) - find the 0-based index (relative to the least
311
- * significant end) of the *least* significant 1 bit in the input value. The
312
- * input value must be nonzero!
313
- */
314
-
315
- #ifndef bsf32
316
- static forceinline unsigned
317
- bsf32(u32 n)
318
- {
319
- unsigned i = 0;
320
- while ((n & 1) == 0) {
321
- i++;
322
- n >>= 1;
323
- }
324
- return i;
325
- }
326
- #endif
327
-
328
- #ifndef bsf64
329
- static forceinline unsigned
330
- bsf64(u64 n)
331
- {
332
- unsigned i = 0;
333
- while ((n & 1) == 0) {
334
- i++;
335
- n >>= 1;
336
- }
337
- return i;
338
- }
339
- #endif
340
-
341
- static forceinline unsigned
342
- bsfw(machine_word_t n)
343
- {
344
- STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
345
- if (WORDBITS == 32)
346
- return bsf32(n);
347
- else
348
- return bsf64(n);
349
- }
350
-
351
- #endif /* COMMON_COMMON_DEFS_H */