libdeflate 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +9 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +52 -0
- data/Rakefile +15 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/libdeflate/extconf.rb +14 -0
- data/ext/libdeflate/libdeflate/.gitignore +19 -0
- data/ext/libdeflate/libdeflate/COPYING +21 -0
- data/ext/libdeflate/libdeflate/Makefile +231 -0
- data/ext/libdeflate/libdeflate/Makefile.msc +64 -0
- data/ext/libdeflate/libdeflate/NEWS +57 -0
- data/ext/libdeflate/libdeflate/README.md +170 -0
- data/ext/libdeflate/libdeflate/common/common_defs.h +351 -0
- data/ext/libdeflate/libdeflate/common/compiler_gcc.h +134 -0
- data/ext/libdeflate/libdeflate/common/compiler_msc.h +95 -0
- data/ext/libdeflate/libdeflate/lib/adler32.c +213 -0
- data/ext/libdeflate/libdeflate/lib/adler32_impl.h +281 -0
- data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +57 -0
- data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +13 -0
- data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +357 -0
- data/ext/libdeflate/libdeflate/lib/crc32.c +368 -0
- data/ext/libdeflate/libdeflate/lib/crc32_impl.h +286 -0
- data/ext/libdeflate/libdeflate/lib/crc32_table.h +526 -0
- data/ext/libdeflate/libdeflate/lib/decompress_impl.h +404 -0
- data/ext/libdeflate/libdeflate/lib/deflate_compress.c +2817 -0
- data/ext/libdeflate/libdeflate/lib/deflate_compress.h +14 -0
- data/ext/libdeflate/libdeflate/lib/deflate_constants.h +66 -0
- data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +889 -0
- data/ext/libdeflate/libdeflate/lib/gzip_compress.c +95 -0
- data/ext/libdeflate/libdeflate/lib/gzip_constants.h +45 -0
- data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +130 -0
- data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +405 -0
- data/ext/libdeflate/libdeflate/lib/lib_common.h +35 -0
- data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +53 -0
- data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +205 -0
- data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +61 -0
- data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +53 -0
- data/ext/libdeflate/libdeflate/lib/unaligned.h +202 -0
- data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +169 -0
- data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +48 -0
- data/ext/libdeflate/libdeflate/lib/zlib_compress.c +87 -0
- data/ext/libdeflate/libdeflate/lib/zlib_constants.h +21 -0
- data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +91 -0
- data/ext/libdeflate/libdeflate/libdeflate.h +274 -0
- data/ext/libdeflate/libdeflate/programs/benchmark.c +558 -0
- data/ext/libdeflate/libdeflate/programs/checksum.c +197 -0
- data/ext/libdeflate/libdeflate/programs/detect.sh +62 -0
- data/ext/libdeflate/libdeflate/programs/gzip.c +603 -0
- data/ext/libdeflate/libdeflate/programs/prog_util.c +530 -0
- data/ext/libdeflate/libdeflate/programs/prog_util.h +162 -0
- data/ext/libdeflate/libdeflate/programs/test_checksums.c +135 -0
- data/ext/libdeflate/libdeflate/programs/tgetopt.c +118 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +12 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +40 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +28 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +3 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +28 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +14 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +28 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +3 -0
- data/ext/libdeflate/libdeflate/tools/android_build.sh +104 -0
- data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +76 -0
- data/ext/libdeflate/libdeflate/tools/exec_tests.sh +30 -0
- data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +108 -0
- data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +100 -0
- data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +412 -0
- data/ext/libdeflate/libdeflate/tools/make-windows-releases +21 -0
- data/ext/libdeflate/libdeflate/tools/mips_build.sh +9 -0
- data/ext/libdeflate/libdeflate/tools/msc_test.bat +3 -0
- data/ext/libdeflate/libdeflate/tools/pgo_build.sh +23 -0
- data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +37 -0
- data/ext/libdeflate/libdeflate/tools/run_tests.sh +305 -0
- data/ext/libdeflate/libdeflate/tools/windows_build.sh +10 -0
- data/ext/libdeflate/libdeflate_ext.c +389 -0
- data/ext/libdeflate/libdeflate_ext.h +8 -0
- data/lib/libdeflate.rb +2 -0
- data/lib/libdeflate/version.rb +3 -0
- data/libdeflate.gemspec +33 -0
- metadata +230 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
#
|
2
|
+
# Makefile for the Microsoft toolchain
|
3
|
+
#
|
4
|
+
# Usage:
|
5
|
+
# nmake /f Makefile.msc
|
6
|
+
#
|
7
|
+
|
8
|
+
.SUFFIXES: .c .obj .dllobj
|
9
|
+
|
10
|
+
CC = cl
|
11
|
+
LD = link
|
12
|
+
AR = lib
|
13
|
+
CFLAGS = /MD /O2 -I. -Icommon
|
14
|
+
LDFLAGS =
|
15
|
+
|
16
|
+
STATIC_LIB = libdeflatestatic.lib
|
17
|
+
SHARED_LIB = libdeflate.dll
|
18
|
+
IMPORT_LIB = libdeflate.lib
|
19
|
+
|
20
|
+
STATIC_LIB_OBJ = \
|
21
|
+
lib/aligned_malloc.obj \
|
22
|
+
lib/adler32.obj \
|
23
|
+
lib/crc32.obj \
|
24
|
+
lib/deflate_compress.obj \
|
25
|
+
lib/deflate_decompress.obj \
|
26
|
+
lib/gzip_compress.obj \
|
27
|
+
lib/gzip_decompress.obj \
|
28
|
+
lib/x86_cpu_features.obj \
|
29
|
+
lib/zlib_compress.obj \
|
30
|
+
lib/zlib_decompress.obj
|
31
|
+
|
32
|
+
SHARED_LIB_OBJ = $(STATIC_LIB_OBJ:.obj=.dllobj)
|
33
|
+
|
34
|
+
PROG_COMMON_OBJ = programs/prog_util.obj \
|
35
|
+
programs/tgetopt.obj \
|
36
|
+
$(STATIC_LIB)
|
37
|
+
|
38
|
+
PROG_CFLAGS = $(CFLAGS) -Iprograms
|
39
|
+
|
40
|
+
all: $(STATIC_LIB) $(SHARED_LIB) $(IMPORT_LIB) gzip.exe gunzip.exe
|
41
|
+
|
42
|
+
.c.obj:
|
43
|
+
$(CC) -c /Fo$@ $(CFLAGS) $**
|
44
|
+
|
45
|
+
.c.dllobj:
|
46
|
+
$(CC) -c /Fo$@ $(CFLAGS) /DLIBDEFLATE_DLL $**
|
47
|
+
|
48
|
+
$(STATIC_LIB): $(STATIC_LIB_OBJ)
|
49
|
+
$(AR) $(ARFLAGS) -out:$@ $(STATIC_LIB_OBJ)
|
50
|
+
|
51
|
+
$(SHARED_LIB): $(SHARED_LIB_OBJ)
|
52
|
+
$(LD) $(LDFLAGS) -out:$@ -dll -implib:$(IMPORT_LIB) $(SHARED_LIB_OBJ)
|
53
|
+
|
54
|
+
$(IMPORT_LIB): $(SHARED_LIB)
|
55
|
+
|
56
|
+
gzip.exe:programs/gzip.obj $(PROG_COMMON_OBJ)
|
57
|
+
$(LD) $(LDFLAGS) -out:$@ $**
|
58
|
+
|
59
|
+
gunzip.exe:gzip.exe
|
60
|
+
copy $** $@
|
61
|
+
|
62
|
+
clean:
|
63
|
+
-del *.dll *.exe *.exp libdeflate.lib libdeflatestatic.lib gzip.lib \
|
64
|
+
lib\*.obj lib\*.dllobj programs\*.obj 2>nul
|
@@ -0,0 +1,57 @@
|
|
1
|
+
Version 0.7:
|
2
|
+
Fixed a very rare bug that caused data to be compressed incorrectly.
|
3
|
+
The bug affected compression levels 7 and below since libdeflate v0.2.
|
4
|
+
Although there have been no user reports of the bug, and I believe it
|
5
|
+
would have been highly unlikely to encounter on realistic data, it could
|
6
|
+
occur on data specially crafted to reproduce it.
|
7
|
+
|
8
|
+
Fixed a compilation error when building with clang 3.7.
|
9
|
+
|
10
|
+
Version 0.6:
|
11
|
+
Various improvements to the gzip program's behavior.
|
12
|
+
|
13
|
+
Faster CRC-32 on AVX-capable processors.
|
14
|
+
|
15
|
+
Other minor changes.
|
16
|
+
|
17
|
+
Version 0.5:
|
18
|
+
The CRC-32 checksum algorithm has been optimized with carryless
|
19
|
+
multiplication instructions for x86_64 (PCLMUL). This speeds up gzip
|
20
|
+
compression and decompression.
|
21
|
+
|
22
|
+
Build fixes for certain platforms and compilers.
|
23
|
+
|
24
|
+
Added more test programs and scripts.
|
25
|
+
|
26
|
+
libdeflate is now entirely MIT-licensed.
|
27
|
+
|
28
|
+
Version 0.4:
|
29
|
+
The Adler-32 checksum algorithm has been optimized with vector
|
30
|
+
instructions for x86_64 (SSE2 and AVX2) and ARM (NEON). This speeds up
|
31
|
+
zlib compression and decompression.
|
32
|
+
|
33
|
+
To avoid naming collisions, functions and definitions in libdeflate's
|
34
|
+
API have been renamed to be prefixed with "libdeflate_" or
|
35
|
+
"LIBDEFLATE_". Programs using the old API will need to be updated.
|
36
|
+
|
37
|
+
Various bug fixes and other improvements.
|
38
|
+
|
39
|
+
Version 0.3:
|
40
|
+
Some bug fixes and other minor changes.
|
41
|
+
|
42
|
+
Version 0.2:
|
43
|
+
Implemented a new block splitting algorithm which typically improves the
|
44
|
+
compression ratio slightly at all compression levels.
|
45
|
+
|
46
|
+
The compressor now outputs each block using the cheapest type (dynamic
|
47
|
+
Huffman, static Huffman, or uncompressed).
|
48
|
+
|
49
|
+
The gzip program has received an overhaul and now behaves more like the
|
50
|
+
standard version.
|
51
|
+
|
52
|
+
Build system updates, including: some build options were changed and
|
53
|
+
some build options were removed, and the default 'make' target now
|
54
|
+
includes the gzip program as well as the library.
|
55
|
+
|
56
|
+
Version 0.1:
|
57
|
+
Initial official release.
|
@@ -0,0 +1,170 @@
|
|
1
|
+
# Overview
|
2
|
+
|
3
|
+
libdeflate is a library for fast, whole-buffer DEFLATE-based compression and
|
4
|
+
decompression.
|
5
|
+
|
6
|
+
The supported formats are:
|
7
|
+
|
8
|
+
- DEFLATE (raw)
|
9
|
+
- zlib (a.k.a. DEFLATE with a zlib wrapper)
|
10
|
+
- gzip (a.k.a. DEFLATE with a gzip wrapper)
|
11
|
+
|
12
|
+
libdeflate is heavily optimized. It is significantly faster than the zlib
|
13
|
+
library, both for compression and decompression, and especially on x86
|
14
|
+
processors. In addition, libdeflate provides optional high compression modes
|
15
|
+
that provide a better compression ratio than the zlib's "level 9".
|
16
|
+
|
17
|
+
libdeflate itself is a library, but the following command-line programs which
|
18
|
+
use this library are also provided:
|
19
|
+
|
20
|
+
* gzip (or gunzip), a program which mostly behaves like the standard equivalent,
|
21
|
+
except that it does not yet have good streaming support and therefore does not
|
22
|
+
yet support very large files
|
23
|
+
* benchmark, a program for benchmarking in-memory compression and decompression
|
24
|
+
|
25
|
+
# Building
|
26
|
+
|
27
|
+
## For UNIX
|
28
|
+
|
29
|
+
Just run `make`. You need GNU Make and either GCC or Clang. GCC is recommended
|
30
|
+
because it builds slightly faster binaries. There is no `make install` yet;
|
31
|
+
just copy the file(s) to where you want.
|
32
|
+
|
33
|
+
By default, all targets are built, including the library and programs, with the
|
34
|
+
exception of the `benchmark` program. `make help` shows the available targets.
|
35
|
+
There are also several options which can be set on the `make` command line. See
|
36
|
+
the Makefile for details.
|
37
|
+
|
38
|
+
## For Windows
|
39
|
+
|
40
|
+
MinGW (GCC) is the recommended compiler to use when building binaries for
|
41
|
+
Windows. MinGW can be used on either Windows or Linux. On Windows, you'll need
|
42
|
+
the compiler as well as GNU Make and basic UNIX tools such as `sh`. This is
|
43
|
+
most easily set up with Cygwin, but some standalone MinGW distributions for
|
44
|
+
Windows also work. Or, on Linux, you'll need to install the `mingw-w64-gcc` or
|
45
|
+
similarly-named package. Once ready, do the build using a command like:
|
46
|
+
|
47
|
+
$ make CC=x86_64-w64-mingw32-gcc
|
48
|
+
|
49
|
+
Some MinGW distributions for Windows may require `CC=gcc` instead.
|
50
|
+
|
51
|
+
Windows binaries prebuilt with MinGW may also be downloaded from
|
52
|
+
https://github.com/ebiggers/libdeflate/releases.
|
53
|
+
|
54
|
+
Alternatively, a separate Makefile, `Makefile.msc`, is provided for the tools
|
55
|
+
that come with Visual Studio, for those who strongly prefer that toolchain.
|
56
|
+
|
57
|
+
As usual, 64-bit binaries are faster than 32-bit binaries and should be
|
58
|
+
preferred whenever possible.
|
59
|
+
|
60
|
+
# API
|
61
|
+
|
62
|
+
libdeflate has a simple API that is not zlib-compatible. You can create
|
63
|
+
compressors and decompressors and use them to compress or decompress buffers.
|
64
|
+
See libdeflate.h for details.
|
65
|
+
|
66
|
+
There is currently no support for streaming. This has been considered, but it
|
67
|
+
always significantly increases complexity and slows down fast paths.
|
68
|
+
Unfortunately, at this point it remains a future TODO. So: if your application
|
69
|
+
compresses data in "chunks", say, less than 1 MB in size, then libdeflate is a
|
70
|
+
great choice for you; that's what it's designed to do. This is perfect for
|
71
|
+
certain use cases such as transparent filesystem compression. But if your
|
72
|
+
application compresses large files as a single compressed stream, similarly to
|
73
|
+
the `gzip` program, then libdeflate isn't for you.
|
74
|
+
|
75
|
+
Note that with chunk-based compression, you generally should have the
|
76
|
+
uncompressed size of each chunk stored outside of the compressed data itself.
|
77
|
+
This enables you to allocate an output buffer of the correct size without
|
78
|
+
guessing. However, libdeflate's decompression routines do optionally provide
|
79
|
+
the actual number of output bytes in case you need it.
|
80
|
+
|
81
|
+
# DEFLATE vs. zlib vs. gzip
|
82
|
+
|
83
|
+
The DEFLATE format ([rfc1951](https://www.ietf.org/rfc/rfc1951.txt)), the zlib
|
84
|
+
format ([rfc1950](https://www.ietf.org/rfc/rfc1950.txt)), and the gzip format
|
85
|
+
([rfc1952](https://www.ietf.org/rfc/rfc1952.txt)) are commonly confused with
|
86
|
+
each other as well as with the [zlib software library](http://zlib.net), which
|
87
|
+
actually supports all three formats. libdeflate (this library) also supports
|
88
|
+
all three formats.
|
89
|
+
|
90
|
+
Briefly, DEFLATE is a raw compressed stream, whereas zlib and gzip are different
|
91
|
+
wrappers for this stream. Both zlib and gzip include checksums, but gzip can
|
92
|
+
include extra information such as the original filename. Generally, you should
|
93
|
+
choose a format as follows:
|
94
|
+
|
95
|
+
- If you are compressing whole files with no subdivisions, similar to the `gzip`
|
96
|
+
program, you probably should use the gzip format.
|
97
|
+
- Otherwise, if you don't need the features of the gzip header and footer but do
|
98
|
+
still want a checksum for corruption detection, you probably should use the
|
99
|
+
zlib format.
|
100
|
+
- Otherwise, you probably should use raw DEFLATE. This is ideal if you don't
|
101
|
+
need checksums, e.g. because they're simply not needed for your use case or
|
102
|
+
because you already compute your own checksums that are stored separately from
|
103
|
+
the compressed stream.
|
104
|
+
|
105
|
+
Note that gzip and zlib streams can be distinguished from each other based on
|
106
|
+
their starting bytes, but this is not necessarily true of raw DEFLATE streams.
|
107
|
+
|
108
|
+
# Compression levels
|
109
|
+
|
110
|
+
An often-underappreciated fact of compression formats such as DEFLATE is that
|
111
|
+
there are an enormous number of different ways that a given input could be
|
112
|
+
compressed. Different algorithms and different amounts of computation time will
|
113
|
+
result in different compression ratios, while remaining equally compatible with
|
114
|
+
the decompressor.
|
115
|
+
|
116
|
+
For this reason, the commonly used zlib library provides nine compression
|
117
|
+
levels. Level 1 is the fastest but provides the worst compression; level 9
|
118
|
+
provides the best compression but is the slowest. It defaults to level 6.
|
119
|
+
libdeflate uses this same design but is designed to improve on both zlib's
|
120
|
+
performance *and* compression ratio at every compression level. In addition,
|
121
|
+
libdeflate's levels go [up to 12](https://xkcd.com/670/) to make room for a
|
122
|
+
minimum-cost-path based algorithm (sometimes called "optimal parsing") that can
|
123
|
+
significantly improve on zlib's compression ratio.
|
124
|
+
|
125
|
+
If you are using DEFLATE (or zlib, or gzip) in your application, you should test
|
126
|
+
different levels to see which works best for your application.
|
127
|
+
|
128
|
+
# Motivation
|
129
|
+
|
130
|
+
Despite DEFLATE's widespread use mainly through the zlib library, in the
|
131
|
+
compression community this format from the early 1990s is often considered
|
132
|
+
obsolete. And in a few significant ways, it is.
|
133
|
+
|
134
|
+
So why implement DEFLATE at all, instead of focusing entirely on
|
135
|
+
bzip2/LZMA/xz/LZ4/LZX/ZSTD/Brotli/LZHAM/LZFSE/[insert cool new format here]?
|
136
|
+
|
137
|
+
To do something better, you need to understand what came before. And it turns
|
138
|
+
out that most ideas from DEFLATE are still relevant. Many of the newer formats
|
139
|
+
share a similar structure as DEFLATE, with different tweaks. The effects of
|
140
|
+
trivial but very useful tweaks, such as increasing the sliding window size, are
|
141
|
+
often confused with the effects of nontrivial but less useful tweaks. And
|
142
|
+
actually, many of these formats are similar enough that common algorithms and
|
143
|
+
optimizations (e.g. those dealing with LZ77 matchfinding) can be reused.
|
144
|
+
|
145
|
+
In addition, comparing compressors fairly is difficult because the performance
|
146
|
+
of a compressor depends heavily on optimizations which are not intrinsic to the
|
147
|
+
compression format itself. In this respect, the zlib library sometimes compares
|
148
|
+
poorly to certain newer code because zlib is not well optimized for modern
|
149
|
+
processors. libdeflate addresses this by providing an optimized DEFLATE
|
150
|
+
implementation which can be used for benchmarking purposes. And, of course,
|
151
|
+
real applications can use it as well.
|
152
|
+
|
153
|
+
That being said, I have also started [a separate
|
154
|
+
project](https://github.com/ebiggers/xpack) for an experimental, more modern
|
155
|
+
compression format.
|
156
|
+
|
157
|
+
# License
|
158
|
+
|
159
|
+
libdeflate is [MIT-licensed](COPYING).
|
160
|
+
|
161
|
+
Additional notes (informational only):
|
162
|
+
|
163
|
+
- I am not aware of any patents covering libdeflate.
|
164
|
+
|
165
|
+
- Old versions of libdeflate were public domain; I only started copyrighting
|
166
|
+
changes in newer versions. Portions of the source code that have not been
|
167
|
+
changed since being released in a public domain version can theoretically
|
168
|
+
still be used as public domain if you want to. But for practical purposes, it
|
169
|
+
probably would be easier to just take the MIT license option, which is nearly
|
170
|
+
the same anyway.
|
@@ -0,0 +1,351 @@
|
|
1
|
+
/*
|
2
|
+
* common_defs.h
|
3
|
+
*
|
4
|
+
* Copyright 2016 Eric Biggers
|
5
|
+
*
|
6
|
+
* Permission is hereby granted, free of charge, to any person
|
7
|
+
* obtaining a copy of this software and associated documentation
|
8
|
+
* files (the "Software"), to deal in the Software without
|
9
|
+
* restriction, including without limitation the rights to use,
|
10
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11
|
+
* copies of the Software, and to permit persons to whom the
|
12
|
+
* Software is furnished to do so, subject to the following
|
13
|
+
* conditions:
|
14
|
+
*
|
15
|
+
* The above copyright notice and this permission notice shall be
|
16
|
+
* included in all copies or substantial portions of the Software.
|
17
|
+
*
|
18
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
19
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
20
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
21
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
22
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
23
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
24
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
25
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#ifndef COMMON_COMMON_DEFS_H
|
29
|
+
#define COMMON_COMMON_DEFS_H
|
30
|
+
|
31
|
+
#ifdef __GNUC__
|
32
|
+
# include "compiler_gcc.h"
|
33
|
+
#elif defined(_MSC_VER)
|
34
|
+
# include "compiler_msc.h"
|
35
|
+
#else
|
36
|
+
# pragma message("Unrecognized compiler. Please add a header file for your compiler. Compilation will proceed, but performance may suffer!")
|
37
|
+
#endif
|
38
|
+
|
39
|
+
/* ========================================================================== */
|
40
|
+
/* Type definitions */
|
41
|
+
/* ========================================================================== */
|
42
|
+
|
43
|
+
#include <stddef.h> /* size_t */
|
44
|
+
|
45
|
+
#ifndef __bool_true_false_are_defined
|
46
|
+
# include <stdbool.h> /* bool */
|
47
|
+
#endif
|
48
|
+
|
49
|
+
/* Fixed-width integer types */
|
50
|
+
#ifndef PRIu32
|
51
|
+
# include <inttypes.h>
|
52
|
+
#endif
|
53
|
+
typedef uint8_t u8;
|
54
|
+
typedef uint16_t u16;
|
55
|
+
typedef uint32_t u32;
|
56
|
+
typedef uint64_t u64;
|
57
|
+
typedef int8_t s8;
|
58
|
+
typedef int16_t s16;
|
59
|
+
typedef int32_t s32;
|
60
|
+
typedef int64_t s64;
|
61
|
+
|
62
|
+
/*
|
63
|
+
* Word type of the target architecture. Use 'size_t' instead of 'unsigned
|
64
|
+
* long' to account for platforms such as Windows that use 32-bit 'unsigned
|
65
|
+
* long' on 64-bit architectures.
|
66
|
+
*/
|
67
|
+
typedef size_t machine_word_t;
|
68
|
+
|
69
|
+
/* Number of bytes in a word */
|
70
|
+
#define WORDBYTES ((int)sizeof(machine_word_t))
|
71
|
+
|
72
|
+
/* Number of bits in a word */
|
73
|
+
#define WORDBITS (8 * WORDBYTES)
|
74
|
+
|
75
|
+
/* ========================================================================== */
|
76
|
+
/* Optional compiler features */
|
77
|
+
/* ========================================================================== */
|
78
|
+
|
79
|
+
/* LIBEXPORT - export a function from a shared library */
|
80
|
+
#ifndef LIBEXPORT
|
81
|
+
# define LIBEXPORT
|
82
|
+
#endif
|
83
|
+
|
84
|
+
/* inline - suggest that a function be inlined */
|
85
|
+
#ifndef inline
|
86
|
+
# define inline
|
87
|
+
#endif
|
88
|
+
|
89
|
+
/* forceinline - force a function to be inlined, if possible */
|
90
|
+
#ifndef forceinline
|
91
|
+
# define forceinline inline
|
92
|
+
#endif
|
93
|
+
|
94
|
+
/* restrict - annotate a non-aliased pointer */
|
95
|
+
#ifndef restrict
|
96
|
+
# define restrict
|
97
|
+
#endif
|
98
|
+
|
99
|
+
/* likely(expr) - hint that an expression is usually true */
|
100
|
+
#ifndef likely
|
101
|
+
# define likely(expr) (expr)
|
102
|
+
#endif
|
103
|
+
|
104
|
+
/* unlikely(expr) - hint that an expression is usually false */
|
105
|
+
#ifndef unlikely
|
106
|
+
# define unlikely(expr) (expr)
|
107
|
+
#endif
|
108
|
+
|
109
|
+
/* prefetchr(addr) - prefetch into L1 cache for read */
|
110
|
+
#ifndef prefetchr
|
111
|
+
# define prefetchr(addr)
|
112
|
+
#endif
|
113
|
+
|
114
|
+
/* prefetchw(addr) - prefetch into L1 cache for write */
|
115
|
+
#ifndef prefetchw
|
116
|
+
# define prefetchw(addr)
|
117
|
+
#endif
|
118
|
+
|
119
|
+
/* Does the compiler support the 'target' function attribute? */
|
120
|
+
#ifndef COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE
|
121
|
+
# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
|
122
|
+
#endif
|
123
|
+
|
124
|
+
/* Are target-specific intrinsics supported in 'target' attribute functions? */
|
125
|
+
#ifndef COMPILER_SUPPORTS_TARGET_INTRINSICS
|
126
|
+
# define COMPILER_SUPPORTS_TARGET_INTRINSICS 0
|
127
|
+
#endif
|
128
|
+
|
129
|
+
/* Which targets are supported with the 'target' function attribute? */
|
130
|
+
#ifndef COMPILER_SUPPORTS_PCLMUL_TARGET
|
131
|
+
# define COMPILER_SUPPORTS_PCLMUL_TARGET 0
|
132
|
+
#endif
|
133
|
+
#ifndef COMPILER_SUPPORTS_BMI2_TARGET
|
134
|
+
# define COMPILER_SUPPORTS_BMI2_TARGET 0
|
135
|
+
#endif
|
136
|
+
#ifndef COMPILER_SUPPORTS_AVX_TARGET
|
137
|
+
# define COMPILER_SUPPORTS_AVX_TARGET 0
|
138
|
+
#endif
|
139
|
+
#ifndef COMPILER_SUPPORTS_AVX2_TARGET
|
140
|
+
# define COMPILER_SUPPORTS_AVX2_TARGET 0
|
141
|
+
#endif
|
142
|
+
|
143
|
+
/* _aligned_attribute(n) - declare that the annotated variable, or variables of
|
144
|
+
* the annotated type, are to be aligned on n-byte boundaries */
|
145
|
+
#ifndef _aligned_attribute
|
146
|
+
#endif
|
147
|
+
|
148
|
+
/* ========================================================================== */
|
149
|
+
/* Miscellaneous macros */
|
150
|
+
/* ========================================================================== */
|
151
|
+
|
152
|
+
#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
|
153
|
+
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
|
154
|
+
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
|
155
|
+
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
|
156
|
+
#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
|
157
|
+
#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
|
158
|
+
|
159
|
+
/* ========================================================================== */
|
160
|
+
/* Endianness handling */
|
161
|
+
/* ========================================================================== */
|
162
|
+
|
163
|
+
/*
|
164
|
+
* CPU_IS_LITTLE_ENDIAN() - a macro which evaluates to 1 if the CPU is little
|
165
|
+
* endian or 0 if it is big endian. The macro should be defined in a way such
|
166
|
+
* that the compiler can evaluate it at compilation time. If not defined, a
|
167
|
+
* fallback is used.
|
168
|
+
*/
|
169
|
+
#ifndef CPU_IS_LITTLE_ENDIAN
|
170
|
+
static forceinline int CPU_IS_LITTLE_ENDIAN(void)
|
171
|
+
{
|
172
|
+
union {
|
173
|
+
unsigned int v;
|
174
|
+
unsigned char b;
|
175
|
+
} u;
|
176
|
+
u.v = 1;
|
177
|
+
return u.b;
|
178
|
+
}
|
179
|
+
#endif
|
180
|
+
|
181
|
+
/* bswap16(n) - swap the bytes of a 16-bit integer */
|
182
|
+
#ifndef bswap16
|
183
|
+
static forceinline u16 bswap16(u16 n)
|
184
|
+
{
|
185
|
+
return (n << 8) | (n >> 8);
|
186
|
+
}
|
187
|
+
#endif
|
188
|
+
|
189
|
+
/* bswap32(n) - swap the bytes of a 32-bit integer */
|
190
|
+
#ifndef bswap32
|
191
|
+
static forceinline u32 bswap32(u32 n)
|
192
|
+
{
|
193
|
+
return ((n & 0x000000FF) << 24) |
|
194
|
+
((n & 0x0000FF00) << 8) |
|
195
|
+
((n & 0x00FF0000) >> 8) |
|
196
|
+
((n & 0xFF000000) >> 24);
|
197
|
+
}
|
198
|
+
#endif
|
199
|
+
|
200
|
+
/* bswap64(n) - swap the bytes of a 64-bit integer */
|
201
|
+
#ifndef bswap64
|
202
|
+
static forceinline u64 bswap64(u64 n)
|
203
|
+
{
|
204
|
+
return ((n & 0x00000000000000FF) << 56) |
|
205
|
+
((n & 0x000000000000FF00) << 40) |
|
206
|
+
((n & 0x0000000000FF0000) << 24) |
|
207
|
+
((n & 0x00000000FF000000) << 8) |
|
208
|
+
((n & 0x000000FF00000000) >> 8) |
|
209
|
+
((n & 0x0000FF0000000000) >> 24) |
|
210
|
+
((n & 0x00FF000000000000) >> 40) |
|
211
|
+
((n & 0xFF00000000000000) >> 56);
|
212
|
+
}
|
213
|
+
#endif
|
214
|
+
|
215
|
+
#define le16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap16(n))
|
216
|
+
#define le32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap32(n))
|
217
|
+
#define le64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap64(n))
|
218
|
+
#define be16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap16(n) : (n))
|
219
|
+
#define be32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap32(n) : (n))
|
220
|
+
#define be64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap64(n) : (n))
|
221
|
+
|
222
|
+
/* ========================================================================== */
|
223
|
+
/* Unaligned memory accesses */
|
224
|
+
/* ========================================================================== */
|
225
|
+
|
226
|
+
/*
|
227
|
+
* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses
|
228
|
+
* can be performed efficiently on the target platform.
|
229
|
+
*/
|
230
|
+
#ifndef UNALIGNED_ACCESS_IS_FAST
|
231
|
+
# define UNALIGNED_ACCESS_IS_FAST 0
|
232
|
+
#endif
|
233
|
+
|
234
|
+
/*
|
235
|
+
* DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type',
|
236
|
+
* defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions
|
237
|
+
* which load and store variables of type 'type' from/to unaligned memory
|
238
|
+
* addresses. If not defined, a fallback is used.
|
239
|
+
*/
|
240
|
+
#ifndef DEFINE_UNALIGNED_TYPE
|
241
|
+
|
242
|
+
/*
|
243
|
+
* Although memcpy() may seem inefficient, it *usually* gets optimized
|
244
|
+
* appropriately by modern compilers. It's portable and may be the best we can
|
245
|
+
* do for a fallback...
|
246
|
+
*/
|
247
|
+
#include <string.h>
|
248
|
+
|
249
|
+
#define DEFINE_UNALIGNED_TYPE(type) \
|
250
|
+
\
|
251
|
+
static forceinline type \
|
252
|
+
load_##type##_unaligned(const void *p) \
|
253
|
+
{ \
|
254
|
+
type v; \
|
255
|
+
memcpy(&v, p, sizeof(v)); \
|
256
|
+
return v; \
|
257
|
+
} \
|
258
|
+
\
|
259
|
+
static forceinline void \
|
260
|
+
store_##type##_unaligned(type v, void *p) \
|
261
|
+
{ \
|
262
|
+
memcpy(p, &v, sizeof(v)); \
|
263
|
+
}
|
264
|
+
|
265
|
+
#endif /* !DEFINE_UNALIGNED_TYPE */
|
266
|
+
|
267
|
+
/* ========================================================================== */
|
268
|
+
/* Bit scan functions */
|
269
|
+
/* ========================================================================== */
|
270
|
+
|
271
|
+
/*
|
272
|
+
* Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
|
273
|
+
* significant end) of the *most* significant 1 bit in the input value. The
|
274
|
+
* input value must be nonzero!
|
275
|
+
*/
|
276
|
+
|
277
|
+
#ifndef bsr32
|
278
|
+
static forceinline unsigned
|
279
|
+
bsr32(u32 n)
|
280
|
+
{
|
281
|
+
unsigned i = 0;
|
282
|
+
while ((n >>= 1) != 0)
|
283
|
+
i++;
|
284
|
+
return i;
|
285
|
+
}
|
286
|
+
#endif
|
287
|
+
|
288
|
+
#ifndef bsr64
|
289
|
+
static forceinline unsigned
|
290
|
+
bsr64(u64 n)
|
291
|
+
{
|
292
|
+
unsigned i = 0;
|
293
|
+
while ((n >>= 1) != 0)
|
294
|
+
i++;
|
295
|
+
return i;
|
296
|
+
}
|
297
|
+
#endif
|
298
|
+
|
299
|
+
static forceinline unsigned
|
300
|
+
bsrw(machine_word_t n)
|
301
|
+
{
|
302
|
+
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
303
|
+
if (WORDBITS == 32)
|
304
|
+
return bsr32(n);
|
305
|
+
else
|
306
|
+
return bsr64(n);
|
307
|
+
}
|
308
|
+
|
309
|
+
/*
|
310
|
+
* Bit Scan Forward (BSF) - find the 0-based index (relative to the least
|
311
|
+
* significant end) of the *least* significant 1 bit in the input value. The
|
312
|
+
* input value must be nonzero!
|
313
|
+
*/
|
314
|
+
|
315
|
+
#ifndef bsf32
|
316
|
+
static forceinline unsigned
|
317
|
+
bsf32(u32 n)
|
318
|
+
{
|
319
|
+
unsigned i = 0;
|
320
|
+
while ((n & 1) == 0) {
|
321
|
+
i++;
|
322
|
+
n >>= 1;
|
323
|
+
}
|
324
|
+
return i;
|
325
|
+
}
|
326
|
+
#endif
|
327
|
+
|
328
|
+
#ifndef bsf64
|
329
|
+
static forceinline unsigned
|
330
|
+
bsf64(u64 n)
|
331
|
+
{
|
332
|
+
unsigned i = 0;
|
333
|
+
while ((n & 1) == 0) {
|
334
|
+
i++;
|
335
|
+
n >>= 1;
|
336
|
+
}
|
337
|
+
return i;
|
338
|
+
}
|
339
|
+
#endif
|
340
|
+
|
341
|
+
static forceinline unsigned
|
342
|
+
bsfw(machine_word_t n)
|
343
|
+
{
|
344
|
+
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
345
|
+
if (WORDBITS == 32)
|
346
|
+
return bsf32(n);
|
347
|
+
else
|
348
|
+
return bsf64(n);
|
349
|
+
}
|
350
|
+
|
351
|
+
#endif /* COMMON_COMMON_DEFS_H */
|