libdeflate 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.rubocop.yml +1 -0
  6. data/.rubocop_todo.yml +9 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +52 -0
  11. data/Rakefile +15 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/ext/libdeflate/extconf.rb +14 -0
  15. data/ext/libdeflate/libdeflate/.gitignore +19 -0
  16. data/ext/libdeflate/libdeflate/COPYING +21 -0
  17. data/ext/libdeflate/libdeflate/Makefile +231 -0
  18. data/ext/libdeflate/libdeflate/Makefile.msc +64 -0
  19. data/ext/libdeflate/libdeflate/NEWS +57 -0
  20. data/ext/libdeflate/libdeflate/README.md +170 -0
  21. data/ext/libdeflate/libdeflate/common/common_defs.h +351 -0
  22. data/ext/libdeflate/libdeflate/common/compiler_gcc.h +134 -0
  23. data/ext/libdeflate/libdeflate/common/compiler_msc.h +95 -0
  24. data/ext/libdeflate/libdeflate/lib/adler32.c +213 -0
  25. data/ext/libdeflate/libdeflate/lib/adler32_impl.h +281 -0
  26. data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +57 -0
  27. data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +13 -0
  28. data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +357 -0
  29. data/ext/libdeflate/libdeflate/lib/crc32.c +368 -0
  30. data/ext/libdeflate/libdeflate/lib/crc32_impl.h +286 -0
  31. data/ext/libdeflate/libdeflate/lib/crc32_table.h +526 -0
  32. data/ext/libdeflate/libdeflate/lib/decompress_impl.h +404 -0
  33. data/ext/libdeflate/libdeflate/lib/deflate_compress.c +2817 -0
  34. data/ext/libdeflate/libdeflate/lib/deflate_compress.h +14 -0
  35. data/ext/libdeflate/libdeflate/lib/deflate_constants.h +66 -0
  36. data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +889 -0
  37. data/ext/libdeflate/libdeflate/lib/gzip_compress.c +95 -0
  38. data/ext/libdeflate/libdeflate/lib/gzip_constants.h +45 -0
  39. data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +130 -0
  40. data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +405 -0
  41. data/ext/libdeflate/libdeflate/lib/lib_common.h +35 -0
  42. data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +53 -0
  43. data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +205 -0
  44. data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +61 -0
  45. data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +53 -0
  46. data/ext/libdeflate/libdeflate/lib/unaligned.h +202 -0
  47. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +169 -0
  48. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +48 -0
  49. data/ext/libdeflate/libdeflate/lib/zlib_compress.c +87 -0
  50. data/ext/libdeflate/libdeflate/lib/zlib_constants.h +21 -0
  51. data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +91 -0
  52. data/ext/libdeflate/libdeflate/libdeflate.h +274 -0
  53. data/ext/libdeflate/libdeflate/programs/benchmark.c +558 -0
  54. data/ext/libdeflate/libdeflate/programs/checksum.c +197 -0
  55. data/ext/libdeflate/libdeflate/programs/detect.sh +62 -0
  56. data/ext/libdeflate/libdeflate/programs/gzip.c +603 -0
  57. data/ext/libdeflate/libdeflate/programs/prog_util.c +530 -0
  58. data/ext/libdeflate/libdeflate/programs/prog_util.h +162 -0
  59. data/ext/libdeflate/libdeflate/programs/test_checksums.c +135 -0
  60. data/ext/libdeflate/libdeflate/programs/tgetopt.c +118 -0
  61. data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +12 -0
  62. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +40 -0
  63. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
  64. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +28 -0
  65. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +3 -0
  66. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +28 -0
  67. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
  68. data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +14 -0
  69. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +28 -0
  70. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +3 -0
  71. data/ext/libdeflate/libdeflate/tools/android_build.sh +104 -0
  72. data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +76 -0
  73. data/ext/libdeflate/libdeflate/tools/exec_tests.sh +30 -0
  74. data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +108 -0
  75. data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +100 -0
  76. data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +412 -0
  77. data/ext/libdeflate/libdeflate/tools/make-windows-releases +21 -0
  78. data/ext/libdeflate/libdeflate/tools/mips_build.sh +9 -0
  79. data/ext/libdeflate/libdeflate/tools/msc_test.bat +3 -0
  80. data/ext/libdeflate/libdeflate/tools/pgo_build.sh +23 -0
  81. data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +37 -0
  82. data/ext/libdeflate/libdeflate/tools/run_tests.sh +305 -0
  83. data/ext/libdeflate/libdeflate/tools/windows_build.sh +10 -0
  84. data/ext/libdeflate/libdeflate_ext.c +389 -0
  85. data/ext/libdeflate/libdeflate_ext.h +8 -0
  86. data/lib/libdeflate.rb +2 -0
  87. data/lib/libdeflate/version.rb +3 -0
  88. data/libdeflate.gemspec +33 -0
  89. metadata +230 -0
@@ -0,0 +1,64 @@
1
+ #
2
+ # Makefile for the Microsoft toolchain
3
+ #
4
+ # Usage:
5
+ # nmake /f Makefile.msc
6
+ #
7
+
8
+ .SUFFIXES: .c .obj .dllobj
9
+
10
+ CC = cl
11
+ LD = link
12
+ AR = lib
13
+ CFLAGS = /MD /O2 -I. -Icommon
14
+ LDFLAGS =
15
+
16
+ STATIC_LIB = libdeflatestatic.lib
17
+ SHARED_LIB = libdeflate.dll
18
+ IMPORT_LIB = libdeflate.lib
19
+
20
+ STATIC_LIB_OBJ = \
21
+ lib/aligned_malloc.obj \
22
+ lib/adler32.obj \
23
+ lib/crc32.obj \
24
+ lib/deflate_compress.obj \
25
+ lib/deflate_decompress.obj \
26
+ lib/gzip_compress.obj \
27
+ lib/gzip_decompress.obj \
28
+ lib/x86_cpu_features.obj \
29
+ lib/zlib_compress.obj \
30
+ lib/zlib_decompress.obj
31
+
32
+ SHARED_LIB_OBJ = $(STATIC_LIB_OBJ:.obj=.dllobj)
33
+
34
+ PROG_COMMON_OBJ = programs/prog_util.obj \
35
+ programs/tgetopt.obj \
36
+ $(STATIC_LIB)
37
+
38
+ PROG_CFLAGS = $(CFLAGS) -Iprograms
39
+
40
+ all: $(STATIC_LIB) $(SHARED_LIB) $(IMPORT_LIB) gzip.exe gunzip.exe
41
+
42
+ .c.obj:
43
+ $(CC) -c /Fo$@ $(CFLAGS) $**
44
+
45
+ .c.dllobj:
46
+ $(CC) -c /Fo$@ $(CFLAGS) /DLIBDEFLATE_DLL $**
47
+
48
+ $(STATIC_LIB): $(STATIC_LIB_OBJ)
49
+ $(AR) $(ARFLAGS) -out:$@ $(STATIC_LIB_OBJ)
50
+
51
+ $(SHARED_LIB): $(SHARED_LIB_OBJ)
52
+ $(LD) $(LDFLAGS) -out:$@ -dll -implib:$(IMPORT_LIB) $(SHARED_LIB_OBJ)
53
+
54
+ $(IMPORT_LIB): $(SHARED_LIB)
55
+
56
+ gzip.exe:programs/gzip.obj $(PROG_COMMON_OBJ)
57
+ $(LD) $(LDFLAGS) -out:$@ $**
58
+
59
+ gunzip.exe:gzip.exe
60
+ copy $** $@
61
+
62
+ clean:
63
+ -del *.dll *.exe *.exp libdeflate.lib libdeflatestatic.lib gzip.lib \
64
+ lib\*.obj lib\*.dllobj programs\*.obj 2>nul
@@ -0,0 +1,57 @@
1
+ Version 0.7:
2
+ Fixed a very rare bug that caused data to be compressed incorrectly.
3
+ The bug affected compression levels 7 and below since libdeflate v0.2.
4
+ Although there have been no user reports of the bug, and I believe it
5
+ would have been highly unlikely to encounter on realistic data, it could
6
+ occur on data specially crafted to reproduce it.
7
+
8
+ Fixed a compilation error when building with clang 3.7.
9
+
10
+ Version 0.6:
11
+ Various improvements to the gzip program's behavior.
12
+
13
+ Faster CRC-32 on AVX-capable processors.
14
+
15
+ Other minor changes.
16
+
17
+ Version 0.5:
18
+ The CRC-32 checksum algorithm has been optimized with carryless
19
+ multiplication instructions for x86_64 (PCLMUL). This speeds up gzip
20
+ compression and decompression.
21
+
22
+ Build fixes for certain platforms and compilers.
23
+
24
+ Added more test programs and scripts.
25
+
26
+ libdeflate is now entirely MIT-licensed.
27
+
28
+ Version 0.4:
29
+ The Adler-32 checksum algorithm has been optimized with vector
30
+ instructions for x86_64 (SSE2 and AVX2) and ARM (NEON). This speeds up
31
+ zlib compression and decompression.
32
+
33
+ To avoid naming collisions, functions and definitions in libdeflate's
34
+ API have been renamed to be prefixed with "libdeflate_" or
35
+ "LIBDEFLATE_". Programs using the old API will need to be updated.
36
+
37
+ Various bug fixes and other improvements.
38
+
39
+ Version 0.3:
40
+ Some bug fixes and other minor changes.
41
+
42
+ Version 0.2:
43
+ Implemented a new block splitting algorithm which typically improves the
44
+ compression ratio slightly at all compression levels.
45
+
46
+ The compressor now outputs each block using the cheapest type (dynamic
47
+ Huffman, static Huffman, or uncompressed).
48
+
49
+ The gzip program has received an overhaul and now behaves more like the
50
+ standard version.
51
+
52
+ Build system updates, including: some build options were changed and
53
+ some build options were removed, and the default 'make' target now
54
+ includes the gzip program as well as the library.
55
+
56
+ Version 0.1:
57
+ Initial official release.
@@ -0,0 +1,170 @@
1
+ # Overview
2
+
3
+ libdeflate is a library for fast, whole-buffer DEFLATE-based compression and
4
+ decompression.
5
+
6
+ The supported formats are:
7
+
8
+ - DEFLATE (raw)
9
+ - zlib (a.k.a. DEFLATE with a zlib wrapper)
10
+ - gzip (a.k.a. DEFLATE with a gzip wrapper)
11
+
12
+ libdeflate is heavily optimized. It is significantly faster than the zlib
13
+ library, both for compression and decompression, and especially on x86
14
+ processors. In addition, libdeflate provides optional high compression modes
15
+ that provide a better compression ratio than the zlib's "level 9".
16
+
17
+ libdeflate itself is a library, but the following command-line programs which
18
+ use this library are also provided:
19
+
20
+ * gzip (or gunzip), a program which mostly behaves like the standard equivalent,
21
+ except that it does not yet have good streaming support and therefore does not
22
+ yet support very large files
23
+ * benchmark, a program for benchmarking in-memory compression and decompression
24
+
25
+ # Building
26
+
27
+ ## For UNIX
28
+
29
+ Just run `make`. You need GNU Make and either GCC or Clang. GCC is recommended
30
+ because it builds slightly faster binaries. There is no `make install` yet;
31
+ just copy the file(s) to where you want.
32
+
33
+ By default, all targets are built, including the library and programs, with the
34
+ exception of the `benchmark` program. `make help` shows the available targets.
35
+ There are also several options which can be set on the `make` command line. See
36
+ the Makefile for details.
37
+
38
+ ## For Windows
39
+
40
+ MinGW (GCC) is the recommended compiler to use when building binaries for
41
+ Windows. MinGW can be used on either Windows or Linux. On Windows, you'll need
42
+ the compiler as well as GNU Make and basic UNIX tools such as `sh`. This is
43
+ most easily set up with Cygwin, but some standalone MinGW distributions for
44
+ Windows also work. Or, on Linux, you'll need to install the `mingw-w64-gcc` or
45
+ similarly-named package. Once ready, do the build using a command like:
46
+
47
+ $ make CC=x86_64-w64-mingw32-gcc
48
+
49
+ Some MinGW distributions for Windows may require `CC=gcc` instead.
50
+
51
+ Windows binaries prebuilt with MinGW may also be downloaded from
52
+ https://github.com/ebiggers/libdeflate/releases.
53
+
54
+ Alternatively, a separate Makefile, `Makefile.msc`, is provided for the tools
55
+ that come with Visual Studio, for those who strongly prefer that toolchain.
56
+
57
+ As usual, 64-bit binaries are faster than 32-bit binaries and should be
58
+ preferred whenever possible.
59
+
60
+ # API
61
+
62
+ libdeflate has a simple API that is not zlib-compatible. You can create
63
+ compressors and decompressors and use them to compress or decompress buffers.
64
+ See libdeflate.h for details.
65
+
66
+ There is currently no support for streaming. This has been considered, but it
67
+ always significantly increases complexity and slows down fast paths.
68
+ Unfortunately, at this point it remains a future TODO. So: if your application
69
+ compresses data in "chunks", say, less than 1 MB in size, then libdeflate is a
70
+ great choice for you; that's what it's designed to do. This is perfect for
71
+ certain use cases such as transparent filesystem compression. But if your
72
+ application compresses large files as a single compressed stream, similarly to
73
+ the `gzip` program, then libdeflate isn't for you.
74
+
75
+ Note that with chunk-based compression, you generally should have the
76
+ uncompressed size of each chunk stored outside of the compressed data itself.
77
+ This enables you to allocate an output buffer of the correct size without
78
+ guessing. However, libdeflate's decompression routines do optionally provide
79
+ the actual number of output bytes in case you need it.
80
+
81
+ # DEFLATE vs. zlib vs. gzip
82
+
83
+ The DEFLATE format ([rfc1951](https://www.ietf.org/rfc/rfc1951.txt)), the zlib
84
+ format ([rfc1950](https://www.ietf.org/rfc/rfc1950.txt)), and the gzip format
85
+ ([rfc1952](https://www.ietf.org/rfc/rfc1952.txt)) are commonly confused with
86
+ each other as well as with the [zlib software library](http://zlib.net), which
87
+ actually supports all three formats. libdeflate (this library) also supports
88
+ all three formats.
89
+
90
+ Briefly, DEFLATE is a raw compressed stream, whereas zlib and gzip are different
91
+ wrappers for this stream. Both zlib and gzip include checksums, but gzip can
92
+ include extra information such as the original filename. Generally, you should
93
+ choose a format as follows:
94
+
95
+ - If you are compressing whole files with no subdivisions, similar to the `gzip`
96
+ program, you probably should use the gzip format.
97
+ - Otherwise, if you don't need the features of the gzip header and footer but do
98
+ still want a checksum for corruption detection, you probably should use the
99
+ zlib format.
100
+ - Otherwise, you probably should use raw DEFLATE. This is ideal if you don't
101
+ need checksums, e.g. because they're simply not needed for your use case or
102
+ because you already compute your own checksums that are stored separately from
103
+ the compressed stream.
104
+
105
+ Note that gzip and zlib streams can be distinguished from each other based on
106
+ their starting bytes, but this is not necessarily true of raw DEFLATE streams.
107
+
108
+ # Compression levels
109
+
110
+ An often-underappreciated fact of compression formats such as DEFLATE is that
111
+ there are an enormous number of different ways that a given input could be
112
+ compressed. Different algorithms and different amounts of computation time will
113
+ result in different compression ratios, while remaining equally compatible with
114
+ the decompressor.
115
+
116
+ For this reason, the commonly used zlib library provides nine compression
117
+ levels. Level 1 is the fastest but provides the worst compression; level 9
118
+ provides the best compression but is the slowest. It defaults to level 6.
119
+ libdeflate uses this same design but is designed to improve on both zlib's
120
+ performance *and* compression ratio at every compression level. In addition,
121
+ libdeflate's levels go [up to 12](https://xkcd.com/670/) to make room for a
122
+ minimum-cost-path based algorithm (sometimes called "optimal parsing") that can
123
+ significantly improve on zlib's compression ratio.
124
+
125
+ If you are using DEFLATE (or zlib, or gzip) in your application, you should test
126
+ different levels to see which works best for your application.
127
+
128
+ # Motivation
129
+
130
+ Despite DEFLATE's widespread use mainly through the zlib library, in the
131
+ compression community this format from the early 1990s is often considered
132
+ obsolete. And in a few significant ways, it is.
133
+
134
+ So why implement DEFLATE at all, instead of focusing entirely on
135
+ bzip2/LZMA/xz/LZ4/LZX/ZSTD/Brotli/LZHAM/LZFSE/[insert cool new format here]?
136
+
137
+ To do something better, you need to understand what came before. And it turns
138
+ out that most ideas from DEFLATE are still relevant. Many of the newer formats
139
+ share a similar structure as DEFLATE, with different tweaks. The effects of
140
+ trivial but very useful tweaks, such as increasing the sliding window size, are
141
+ often confused with the effects of nontrivial but less useful tweaks. And
142
+ actually, many of these formats are similar enough that common algorithms and
143
+ optimizations (e.g. those dealing with LZ77 matchfinding) can be reused.
144
+
145
+ In addition, comparing compressors fairly is difficult because the performance
146
+ of a compressor depends heavily on optimizations which are not intrinsic to the
147
+ compression format itself. In this respect, the zlib library sometimes compares
148
+ poorly to certain newer code because zlib is not well optimized for modern
149
+ processors. libdeflate addresses this by providing an optimized DEFLATE
150
+ implementation which can be used for benchmarking purposes. And, of course,
151
+ real applications can use it as well.
152
+
153
+ That being said, I have also started [a separate
154
+ project](https://github.com/ebiggers/xpack) for an experimental, more modern
155
+ compression format.
156
+
157
+ # License
158
+
159
+ libdeflate is [MIT-licensed](COPYING).
160
+
161
+ Additional notes (informational only):
162
+
163
+ - I am not aware of any patents covering libdeflate.
164
+
165
+ - Old versions of libdeflate were public domain; I only started copyrighting
166
+ changes in newer versions. Portions of the source code that have not been
167
+ changed since being released in a public domain version can theoretically
168
+ still be used as public domain if you want to. But for practical purposes, it
169
+ probably would be easier to just take the MIT license option, which is nearly
170
+ the same anyway.
@@ -0,0 +1,351 @@
1
+ /*
2
+ * common_defs.h
3
+ *
4
+ * Copyright 2016 Eric Biggers
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person
7
+ * obtaining a copy of this software and associated documentation
8
+ * files (the "Software"), to deal in the Software without
9
+ * restriction, including without limitation the rights to use,
10
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ * copies of the Software, and to permit persons to whom the
12
+ * Software is furnished to do so, subject to the following
13
+ * conditions:
14
+ *
15
+ * The above copyright notice and this permission notice shall be
16
+ * included in all copies or substantial portions of the Software.
17
+ *
18
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
+ * OTHER DEALINGS IN THE SOFTWARE.
26
+ */
27
+
28
+ #ifndef COMMON_COMMON_DEFS_H
29
+ #define COMMON_COMMON_DEFS_H
30
+
31
+ #ifdef __GNUC__
32
+ # include "compiler_gcc.h"
33
+ #elif defined(_MSC_VER)
34
+ # include "compiler_msc.h"
35
+ #else
36
+ # pragma message("Unrecognized compiler. Please add a header file for your compiler. Compilation will proceed, but performance may suffer!")
37
+ #endif
38
+
39
+ /* ========================================================================== */
40
+ /* Type definitions */
41
+ /* ========================================================================== */
42
+
43
+ #include <stddef.h> /* size_t */
44
+
45
+ #ifndef __bool_true_false_are_defined
46
+ # include <stdbool.h> /* bool */
47
+ #endif
48
+
49
+ /* Fixed-width integer types */
50
+ #ifndef PRIu32
51
+ # include <inttypes.h>
52
+ #endif
53
+ typedef uint8_t u8;
54
+ typedef uint16_t u16;
55
+ typedef uint32_t u32;
56
+ typedef uint64_t u64;
57
+ typedef int8_t s8;
58
+ typedef int16_t s16;
59
+ typedef int32_t s32;
60
+ typedef int64_t s64;
61
+
62
+ /*
63
+ * Word type of the target architecture. Use 'size_t' instead of 'unsigned
64
+ * long' to account for platforms such as Windows that use 32-bit 'unsigned
65
+ * long' on 64-bit architectures.
66
+ */
67
+ typedef size_t machine_word_t;
68
+
69
+ /* Number of bytes in a word */
70
+ #define WORDBYTES ((int)sizeof(machine_word_t))
71
+
72
+ /* Number of bits in a word */
73
+ #define WORDBITS (8 * WORDBYTES)
74
+
75
+ /* ========================================================================== */
76
+ /* Optional compiler features */
77
+ /* ========================================================================== */
78
+
79
+ /* LIBEXPORT - export a function from a shared library */
80
+ #ifndef LIBEXPORT
81
+ # define LIBEXPORT
82
+ #endif
83
+
84
+ /* inline - suggest that a function be inlined */
85
+ #ifndef inline
86
+ # define inline
87
+ #endif
88
+
89
+ /* forceinline - force a function to be inlined, if possible */
90
+ #ifndef forceinline
91
+ # define forceinline inline
92
+ #endif
93
+
94
+ /* restrict - annotate a non-aliased pointer */
95
+ #ifndef restrict
96
+ # define restrict
97
+ #endif
98
+
99
+ /* likely(expr) - hint that an expression is usually true */
100
+ #ifndef likely
101
+ # define likely(expr) (expr)
102
+ #endif
103
+
104
+ /* unlikely(expr) - hint that an expression is usually false */
105
+ #ifndef unlikely
106
+ # define unlikely(expr) (expr)
107
+ #endif
108
+
109
+ /* prefetchr(addr) - prefetch into L1 cache for read */
110
+ #ifndef prefetchr
111
+ # define prefetchr(addr)
112
+ #endif
113
+
114
+ /* prefetchw(addr) - prefetch into L1 cache for write */
115
+ #ifndef prefetchw
116
+ # define prefetchw(addr)
117
+ #endif
118
+
119
+ /* Does the compiler support the 'target' function attribute? */
120
+ #ifndef COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE
121
+ # define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
122
+ #endif
123
+
124
+ /* Are target-specific intrinsics supported in 'target' attribute functions? */
125
+ #ifndef COMPILER_SUPPORTS_TARGET_INTRINSICS
126
+ # define COMPILER_SUPPORTS_TARGET_INTRINSICS 0
127
+ #endif
128
+
129
+ /* Which targets are supported with the 'target' function attribute? */
130
+ #ifndef COMPILER_SUPPORTS_PCLMUL_TARGET
131
+ # define COMPILER_SUPPORTS_PCLMUL_TARGET 0
132
+ #endif
133
+ #ifndef COMPILER_SUPPORTS_BMI2_TARGET
134
+ # define COMPILER_SUPPORTS_BMI2_TARGET 0
135
+ #endif
136
+ #ifndef COMPILER_SUPPORTS_AVX_TARGET
137
+ # define COMPILER_SUPPORTS_AVX_TARGET 0
138
+ #endif
139
+ #ifndef COMPILER_SUPPORTS_AVX2_TARGET
140
+ # define COMPILER_SUPPORTS_AVX2_TARGET 0
141
+ #endif
142
+
143
+ /* _aligned_attribute(n) - declare that the annotated variable, or variables of
144
+ * the annotated type, are to be aligned on n-byte boundaries */
145
+ #ifndef _aligned_attribute
146
+ #endif
147
+
148
+ /* ========================================================================== */
149
+ /* Miscellaneous macros */
150
+ /* ========================================================================== */
151
+
152
+ #define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
153
+ #define MIN(a, b) ((a) <= (b) ? (a) : (b))
154
+ #define MAX(a, b) ((a) >= (b) ? (a) : (b))
155
+ #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
156
+ #define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
157
+ #define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
158
+
159
+ /* ========================================================================== */
160
+ /* Endianness handling */
161
+ /* ========================================================================== */
162
+
163
+ /*
164
+ * CPU_IS_LITTLE_ENDIAN() - a macro which evaluates to 1 if the CPU is little
165
+ * endian or 0 if it is big endian. The macro should be defined in a way such
166
+ * that the compiler can evaluate it at compilation time. If not defined, a
167
+ * fallback is used.
168
+ */
169
+ #ifndef CPU_IS_LITTLE_ENDIAN
170
+ static forceinline int CPU_IS_LITTLE_ENDIAN(void)
171
+ {
172
+ union {
173
+ unsigned int v;
174
+ unsigned char b;
175
+ } u;
176
+ u.v = 1;
177
+ return u.b;
178
+ }
179
+ #endif
180
+
181
+ /* bswap16(n) - swap the bytes of a 16-bit integer */
182
+ #ifndef bswap16
183
+ static forceinline u16 bswap16(u16 n)
184
+ {
185
+ return (n << 8) | (n >> 8);
186
+ }
187
+ #endif
188
+
189
+ /* bswap32(n) - swap the bytes of a 32-bit integer */
190
+ #ifndef bswap32
191
+ static forceinline u32 bswap32(u32 n)
192
+ {
193
+ return ((n & 0x000000FF) << 24) |
194
+ ((n & 0x0000FF00) << 8) |
195
+ ((n & 0x00FF0000) >> 8) |
196
+ ((n & 0xFF000000) >> 24);
197
+ }
198
+ #endif
199
+
200
+ /* bswap64(n) - swap the bytes of a 64-bit integer */
201
+ #ifndef bswap64
202
+ static forceinline u64 bswap64(u64 n)
203
+ {
204
+ return ((n & 0x00000000000000FF) << 56) |
205
+ ((n & 0x000000000000FF00) << 40) |
206
+ ((n & 0x0000000000FF0000) << 24) |
207
+ ((n & 0x00000000FF000000) << 8) |
208
+ ((n & 0x000000FF00000000) >> 8) |
209
+ ((n & 0x0000FF0000000000) >> 24) |
210
+ ((n & 0x00FF000000000000) >> 40) |
211
+ ((n & 0xFF00000000000000) >> 56);
212
+ }
213
+ #endif
214
+
215
+ #define le16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap16(n))
216
+ #define le32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap32(n))
217
+ #define le64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap64(n))
218
+ #define be16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap16(n) : (n))
219
+ #define be32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap32(n) : (n))
220
+ #define be64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap64(n) : (n))
221
+
222
+ /* ========================================================================== */
223
+ /* Unaligned memory accesses */
224
+ /* ========================================================================== */
225
+
226
+ /*
227
+ * UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses
228
+ * can be performed efficiently on the target platform.
229
+ */
230
+ #ifndef UNALIGNED_ACCESS_IS_FAST
231
+ # define UNALIGNED_ACCESS_IS_FAST 0
232
+ #endif
233
+
234
+ /*
235
+ * DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type',
236
+ * defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions
237
+ * which load and store variables of type 'type' from/to unaligned memory
238
+ * addresses. If not defined, a fallback is used.
239
+ */
240
+ #ifndef DEFINE_UNALIGNED_TYPE
241
+
242
+ /*
243
+ * Although memcpy() may seem inefficient, it *usually* gets optimized
244
+ * appropriately by modern compilers. It's portable and may be the best we can
245
+ * do for a fallback...
246
+ */
247
+ #include <string.h>
248
+
249
+ #define DEFINE_UNALIGNED_TYPE(type) \
250
+ \
251
+ static forceinline type \
252
+ load_##type##_unaligned(const void *p) \
253
+ { \
254
+ type v; \
255
+ memcpy(&v, p, sizeof(v)); \
256
+ return v; \
257
+ } \
258
+ \
259
+ static forceinline void \
260
+ store_##type##_unaligned(type v, void *p) \
261
+ { \
262
+ memcpy(p, &v, sizeof(v)); \
263
+ }
264
+
265
+ #endif /* !DEFINE_UNALIGNED_TYPE */
266
+
267
+ /* ========================================================================== */
268
+ /* Bit scan functions */
269
+ /* ========================================================================== */
270
+
271
+ /*
272
+ * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
273
+ * significant end) of the *most* significant 1 bit in the input value. The
274
+ * input value must be nonzero!
275
+ */
276
+
277
+ #ifndef bsr32
278
+ static forceinline unsigned
279
+ bsr32(u32 n)
280
+ {
281
+ unsigned i = 0;
282
+ while ((n >>= 1) != 0)
283
+ i++;
284
+ return i;
285
+ }
286
+ #endif
287
+
288
+ #ifndef bsr64
289
+ static forceinline unsigned
290
+ bsr64(u64 n)
291
+ {
292
+ unsigned i = 0;
293
+ while ((n >>= 1) != 0)
294
+ i++;
295
+ return i;
296
+ }
297
+ #endif
298
+
299
+ static forceinline unsigned
300
+ bsrw(machine_word_t n)
301
+ {
302
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
303
+ if (WORDBITS == 32)
304
+ return bsr32(n);
305
+ else
306
+ return bsr64(n);
307
+ }
308
+
309
+ /*
310
+ * Bit Scan Forward (BSF) - find the 0-based index (relative to the least
311
+ * significant end) of the *least* significant 1 bit in the input value. The
312
+ * input value must be nonzero!
313
+ */
314
+
315
+ #ifndef bsf32
316
+ static forceinline unsigned
317
+ bsf32(u32 n)
318
+ {
319
+ unsigned i = 0;
320
+ while ((n & 1) == 0) {
321
+ i++;
322
+ n >>= 1;
323
+ }
324
+ return i;
325
+ }
326
+ #endif
327
+
328
+ #ifndef bsf64
329
+ static forceinline unsigned
330
+ bsf64(u64 n)
331
+ {
332
+ unsigned i = 0;
333
+ while ((n & 1) == 0) {
334
+ i++;
335
+ n >>= 1;
336
+ }
337
+ return i;
338
+ }
339
+ #endif
340
+
341
+ static forceinline unsigned
342
+ bsfw(machine_word_t n)
343
+ {
344
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
345
+ if (WORDBITS == 32)
346
+ return bsf32(n);
347
+ else
348
+ return bsf64(n);
349
+ }
350
+
351
+ #endif /* COMMON_COMMON_DEFS_H */