libdeflate 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/test.yml +34 -0
- data/README.md +1 -6
- data/ext/libdeflate/extconf.rb +18 -7
- data/ext/libdeflate/libdeflate_ext.c +17 -17
- data/lib/libdeflate/version.rb +1 -1
- data/libdeflate.gemspec +2 -1
- metadata +13 -84
- data/.gitmodules +0 -3
- data/.travis.yml +0 -5
- data/ext/libdeflate/libdeflate/.gitignore +0 -19
- data/ext/libdeflate/libdeflate/COPYING +0 -21
- data/ext/libdeflate/libdeflate/Makefile +0 -231
- data/ext/libdeflate/libdeflate/Makefile.msc +0 -64
- data/ext/libdeflate/libdeflate/NEWS +0 -57
- data/ext/libdeflate/libdeflate/README.md +0 -170
- data/ext/libdeflate/libdeflate/common/common_defs.h +0 -351
- data/ext/libdeflate/libdeflate/common/compiler_gcc.h +0 -134
- data/ext/libdeflate/libdeflate/common/compiler_msc.h +0 -95
- data/ext/libdeflate/libdeflate/lib/adler32.c +0 -213
- data/ext/libdeflate/libdeflate/lib/adler32_impl.h +0 -281
- data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +0 -57
- data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +0 -13
- data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +0 -357
- data/ext/libdeflate/libdeflate/lib/crc32.c +0 -368
- data/ext/libdeflate/libdeflate/lib/crc32_impl.h +0 -286
- data/ext/libdeflate/libdeflate/lib/crc32_table.h +0 -526
- data/ext/libdeflate/libdeflate/lib/decompress_impl.h +0 -404
- data/ext/libdeflate/libdeflate/lib/deflate_compress.c +0 -2817
- data/ext/libdeflate/libdeflate/lib/deflate_compress.h +0 -14
- data/ext/libdeflate/libdeflate/lib/deflate_constants.h +0 -66
- data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +0 -889
- data/ext/libdeflate/libdeflate/lib/gzip_compress.c +0 -95
- data/ext/libdeflate/libdeflate/lib/gzip_constants.h +0 -45
- data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +0 -130
- data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +0 -405
- data/ext/libdeflate/libdeflate/lib/lib_common.h +0 -35
- data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +0 -53
- data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +0 -205
- data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +0 -61
- data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +0 -53
- data/ext/libdeflate/libdeflate/lib/unaligned.h +0 -202
- data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +0 -169
- data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +0 -48
- data/ext/libdeflate/libdeflate/lib/zlib_compress.c +0 -87
- data/ext/libdeflate/libdeflate/lib/zlib_constants.h +0 -21
- data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +0 -91
- data/ext/libdeflate/libdeflate/libdeflate.h +0 -274
- data/ext/libdeflate/libdeflate/programs/benchmark.c +0 -558
- data/ext/libdeflate/libdeflate/programs/checksum.c +0 -197
- data/ext/libdeflate/libdeflate/programs/detect.sh +0 -62
- data/ext/libdeflate/libdeflate/programs/gzip.c +0 -603
- data/ext/libdeflate/libdeflate/programs/prog_util.c +0 -530
- data/ext/libdeflate/libdeflate/programs/prog_util.h +0 -162
- data/ext/libdeflate/libdeflate/programs/test_checksums.c +0 -135
- data/ext/libdeflate/libdeflate/programs/tgetopt.c +0 -118
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +0 -12
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +0 -40
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +0 -28
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +0 -3
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +0 -28
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +0 -14
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +0 -28
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +0 -3
- data/ext/libdeflate/libdeflate/tools/android_build.sh +0 -104
- data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +0 -76
- data/ext/libdeflate/libdeflate/tools/exec_tests.sh +0 -30
- data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +0 -108
- data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +0 -100
- data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +0 -412
- data/ext/libdeflate/libdeflate/tools/make-windows-releases +0 -21
- data/ext/libdeflate/libdeflate/tools/mips_build.sh +0 -9
- data/ext/libdeflate/libdeflate/tools/msc_test.bat +0 -3
- data/ext/libdeflate/libdeflate/tools/pgo_build.sh +0 -23
- data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +0 -37
- data/ext/libdeflate/libdeflate/tools/run_tests.sh +0 -305
- data/ext/libdeflate/libdeflate/tools/windows_build.sh +0 -10
@@ -1,64 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Makefile for the Microsoft toolchain
|
3
|
-
#
|
4
|
-
# Usage:
|
5
|
-
# nmake /f Makefile.msc
|
6
|
-
#
|
7
|
-
|
8
|
-
.SUFFIXES: .c .obj .dllobj
|
9
|
-
|
10
|
-
CC = cl
|
11
|
-
LD = link
|
12
|
-
AR = lib
|
13
|
-
CFLAGS = /MD /O2 -I. -Icommon
|
14
|
-
LDFLAGS =
|
15
|
-
|
16
|
-
STATIC_LIB = libdeflatestatic.lib
|
17
|
-
SHARED_LIB = libdeflate.dll
|
18
|
-
IMPORT_LIB = libdeflate.lib
|
19
|
-
|
20
|
-
STATIC_LIB_OBJ = \
|
21
|
-
lib/aligned_malloc.obj \
|
22
|
-
lib/adler32.obj \
|
23
|
-
lib/crc32.obj \
|
24
|
-
lib/deflate_compress.obj \
|
25
|
-
lib/deflate_decompress.obj \
|
26
|
-
lib/gzip_compress.obj \
|
27
|
-
lib/gzip_decompress.obj \
|
28
|
-
lib/x86_cpu_features.obj \
|
29
|
-
lib/zlib_compress.obj \
|
30
|
-
lib/zlib_decompress.obj
|
31
|
-
|
32
|
-
SHARED_LIB_OBJ = $(STATIC_LIB_OBJ:.obj=.dllobj)
|
33
|
-
|
34
|
-
PROG_COMMON_OBJ = programs/prog_util.obj \
|
35
|
-
programs/tgetopt.obj \
|
36
|
-
$(STATIC_LIB)
|
37
|
-
|
38
|
-
PROG_CFLAGS = $(CFLAGS) -Iprograms
|
39
|
-
|
40
|
-
all: $(STATIC_LIB) $(SHARED_LIB) $(IMPORT_LIB) gzip.exe gunzip.exe
|
41
|
-
|
42
|
-
.c.obj:
|
43
|
-
$(CC) -c /Fo$@ $(CFLAGS) $**
|
44
|
-
|
45
|
-
.c.dllobj:
|
46
|
-
$(CC) -c /Fo$@ $(CFLAGS) /DLIBDEFLATE_DLL $**
|
47
|
-
|
48
|
-
$(STATIC_LIB): $(STATIC_LIB_OBJ)
|
49
|
-
$(AR) $(ARFLAGS) -out:$@ $(STATIC_LIB_OBJ)
|
50
|
-
|
51
|
-
$(SHARED_LIB): $(SHARED_LIB_OBJ)
|
52
|
-
$(LD) $(LDFLAGS) -out:$@ -dll -implib:$(IMPORT_LIB) $(SHARED_LIB_OBJ)
|
53
|
-
|
54
|
-
$(IMPORT_LIB): $(SHARED_LIB)
|
55
|
-
|
56
|
-
gzip.exe:programs/gzip.obj $(PROG_COMMON_OBJ)
|
57
|
-
$(LD) $(LDFLAGS) -out:$@ $**
|
58
|
-
|
59
|
-
gunzip.exe:gzip.exe
|
60
|
-
copy $** $@
|
61
|
-
|
62
|
-
clean:
|
63
|
-
-del *.dll *.exe *.exp libdeflate.lib libdeflatestatic.lib gzip.lib \
|
64
|
-
lib\*.obj lib\*.dllobj programs\*.obj 2>nul
|
@@ -1,57 +0,0 @@
|
|
1
|
-
Version 0.7:
|
2
|
-
Fixed a very rare bug that caused data to be compressed incorrectly.
|
3
|
-
The bug affected compression levels 7 and below since libdeflate v0.2.
|
4
|
-
Although there have been no user reports of the bug, and I believe it
|
5
|
-
would have been highly unlikely to encounter on realistic data, it could
|
6
|
-
occur on data specially crafted to reproduce it.
|
7
|
-
|
8
|
-
Fixed a compilation error when building with clang 3.7.
|
9
|
-
|
10
|
-
Version 0.6:
|
11
|
-
Various improvements to the gzip program's behavior.
|
12
|
-
|
13
|
-
Faster CRC-32 on AVX-capable processors.
|
14
|
-
|
15
|
-
Other minor changes.
|
16
|
-
|
17
|
-
Version 0.5:
|
18
|
-
The CRC-32 checksum algorithm has been optimized with carryless
|
19
|
-
multiplication instructions for x86_64 (PCLMUL). This speeds up gzip
|
20
|
-
compression and decompression.
|
21
|
-
|
22
|
-
Build fixes for certain platforms and compilers.
|
23
|
-
|
24
|
-
Added more test programs and scripts.
|
25
|
-
|
26
|
-
libdeflate is now entirely MIT-licensed.
|
27
|
-
|
28
|
-
Version 0.4:
|
29
|
-
The Adler-32 checksum algorithm has been optimized with vector
|
30
|
-
instructions for x86_64 (SSE2 and AVX2) and ARM (NEON). This speeds up
|
31
|
-
zlib compression and decompression.
|
32
|
-
|
33
|
-
To avoid naming collisions, functions and definitions in libdeflate's
|
34
|
-
API have been renamed to be prefixed with "libdeflate_" or
|
35
|
-
"LIBDEFLATE_". Programs using the old API will need to be updated.
|
36
|
-
|
37
|
-
Various bug fixes and other improvements.
|
38
|
-
|
39
|
-
Version 0.3:
|
40
|
-
Some bug fixes and other minor changes.
|
41
|
-
|
42
|
-
Version 0.2:
|
43
|
-
Implemented a new block splitting algorithm which typically improves the
|
44
|
-
compression ratio slightly at all compression levels.
|
45
|
-
|
46
|
-
The compressor now outputs each block using the cheapest type (dynamic
|
47
|
-
Huffman, static Huffman, or uncompressed).
|
48
|
-
|
49
|
-
The gzip program has received an overhaul and now behaves more like the
|
50
|
-
standard version.
|
51
|
-
|
52
|
-
Build system updates, including: some build options were changed and
|
53
|
-
some build options were removed, and the default 'make' target now
|
54
|
-
includes the gzip program as well as the library.
|
55
|
-
|
56
|
-
Version 0.1:
|
57
|
-
Initial official release.
|
@@ -1,170 +0,0 @@
|
|
1
|
-
# Overview
|
2
|
-
|
3
|
-
libdeflate is a library for fast, whole-buffer DEFLATE-based compression and
|
4
|
-
decompression.
|
5
|
-
|
6
|
-
The supported formats are:
|
7
|
-
|
8
|
-
- DEFLATE (raw)
|
9
|
-
- zlib (a.k.a. DEFLATE with a zlib wrapper)
|
10
|
-
- gzip (a.k.a. DEFLATE with a gzip wrapper)
|
11
|
-
|
12
|
-
libdeflate is heavily optimized. It is significantly faster than the zlib
|
13
|
-
library, both for compression and decompression, and especially on x86
|
14
|
-
processors. In addition, libdeflate provides optional high compression modes
|
15
|
-
that provide a better compression ratio than the zlib's "level 9".
|
16
|
-
|
17
|
-
libdeflate itself is a library, but the following command-line programs which
|
18
|
-
use this library are also provided:
|
19
|
-
|
20
|
-
* gzip (or gunzip), a program which mostly behaves like the standard equivalent,
|
21
|
-
except that it does not yet have good streaming support and therefore does not
|
22
|
-
yet support very large files
|
23
|
-
* benchmark, a program for benchmarking in-memory compression and decompression
|
24
|
-
|
25
|
-
# Building
|
26
|
-
|
27
|
-
## For UNIX
|
28
|
-
|
29
|
-
Just run `make`. You need GNU Make and either GCC or Clang. GCC is recommended
|
30
|
-
because it builds slightly faster binaries. There is no `make install` yet;
|
31
|
-
just copy the file(s) to where you want.
|
32
|
-
|
33
|
-
By default, all targets are built, including the library and programs, with the
|
34
|
-
exception of the `benchmark` program. `make help` shows the available targets.
|
35
|
-
There are also several options which can be set on the `make` command line. See
|
36
|
-
the Makefile for details.
|
37
|
-
|
38
|
-
## For Windows
|
39
|
-
|
40
|
-
MinGW (GCC) is the recommended compiler to use when building binaries for
|
41
|
-
Windows. MinGW can be used on either Windows or Linux. On Windows, you'll need
|
42
|
-
the compiler as well as GNU Make and basic UNIX tools such as `sh`. This is
|
43
|
-
most easily set up with Cygwin, but some standalone MinGW distributions for
|
44
|
-
Windows also work. Or, on Linux, you'll need to install the `mingw-w64-gcc` or
|
45
|
-
similarly-named package. Once ready, do the build using a command like:
|
46
|
-
|
47
|
-
$ make CC=x86_64-w64-mingw32-gcc
|
48
|
-
|
49
|
-
Some MinGW distributions for Windows may require `CC=gcc` instead.
|
50
|
-
|
51
|
-
Windows binaries prebuilt with MinGW may also be downloaded from
|
52
|
-
https://github.com/ebiggers/libdeflate/releases.
|
53
|
-
|
54
|
-
Alternatively, a separate Makefile, `Makefile.msc`, is provided for the tools
|
55
|
-
that come with Visual Studio, for those who strongly prefer that toolchain.
|
56
|
-
|
57
|
-
As usual, 64-bit binaries are faster than 32-bit binaries and should be
|
58
|
-
preferred whenever possible.
|
59
|
-
|
60
|
-
# API
|
61
|
-
|
62
|
-
libdeflate has a simple API that is not zlib-compatible. You can create
|
63
|
-
compressors and decompressors and use them to compress or decompress buffers.
|
64
|
-
See libdeflate.h for details.
|
65
|
-
|
66
|
-
There is currently no support for streaming. This has been considered, but it
|
67
|
-
always significantly increases complexity and slows down fast paths.
|
68
|
-
Unfortunately, at this point it remains a future TODO. So: if your application
|
69
|
-
compresses data in "chunks", say, less than 1 MB in size, then libdeflate is a
|
70
|
-
great choice for you; that's what it's designed to do. This is perfect for
|
71
|
-
certain use cases such as transparent filesystem compression. But if your
|
72
|
-
application compresses large files as a single compressed stream, similarly to
|
73
|
-
the `gzip` program, then libdeflate isn't for you.
|
74
|
-
|
75
|
-
Note that with chunk-based compression, you generally should have the
|
76
|
-
uncompressed size of each chunk stored outside of the compressed data itself.
|
77
|
-
This enables you to allocate an output buffer of the correct size without
|
78
|
-
guessing. However, libdeflate's decompression routines do optionally provide
|
79
|
-
the actual number of output bytes in case you need it.
|
80
|
-
|
81
|
-
# DEFLATE vs. zlib vs. gzip
|
82
|
-
|
83
|
-
The DEFLATE format ([rfc1951](https://www.ietf.org/rfc/rfc1951.txt)), the zlib
|
84
|
-
format ([rfc1950](https://www.ietf.org/rfc/rfc1950.txt)), and the gzip format
|
85
|
-
([rfc1952](https://www.ietf.org/rfc/rfc1952.txt)) are commonly confused with
|
86
|
-
each other as well as with the [zlib software library](http://zlib.net), which
|
87
|
-
actually supports all three formats. libdeflate (this library) also supports
|
88
|
-
all three formats.
|
89
|
-
|
90
|
-
Briefly, DEFLATE is a raw compressed stream, whereas zlib and gzip are different
|
91
|
-
wrappers for this stream. Both zlib and gzip include checksums, but gzip can
|
92
|
-
include extra information such as the original filename. Generally, you should
|
93
|
-
choose a format as follows:
|
94
|
-
|
95
|
-
- If you are compressing whole files with no subdivisions, similar to the `gzip`
|
96
|
-
program, you probably should use the gzip format.
|
97
|
-
- Otherwise, if you don't need the features of the gzip header and footer but do
|
98
|
-
still want a checksum for corruption detection, you probably should use the
|
99
|
-
zlib format.
|
100
|
-
- Otherwise, you probably should use raw DEFLATE. This is ideal if you don't
|
101
|
-
need checksums, e.g. because they're simply not needed for your use case or
|
102
|
-
because you already compute your own checksums that are stored separately from
|
103
|
-
the compressed stream.
|
104
|
-
|
105
|
-
Note that gzip and zlib streams can be distinguished from each other based on
|
106
|
-
their starting bytes, but this is not necessarily true of raw DEFLATE streams.
|
107
|
-
|
108
|
-
# Compression levels
|
109
|
-
|
110
|
-
An often-underappreciated fact of compression formats such as DEFLATE is that
|
111
|
-
there are an enormous number of different ways that a given input could be
|
112
|
-
compressed. Different algorithms and different amounts of computation time will
|
113
|
-
result in different compression ratios, while remaining equally compatible with
|
114
|
-
the decompressor.
|
115
|
-
|
116
|
-
For this reason, the commonly used zlib library provides nine compression
|
117
|
-
levels. Level 1 is the fastest but provides the worst compression; level 9
|
118
|
-
provides the best compression but is the slowest. It defaults to level 6.
|
119
|
-
libdeflate uses this same design but is designed to improve on both zlib's
|
120
|
-
performance *and* compression ratio at every compression level. In addition,
|
121
|
-
libdeflate's levels go [up to 12](https://xkcd.com/670/) to make room for a
|
122
|
-
minimum-cost-path based algorithm (sometimes called "optimal parsing") that can
|
123
|
-
significantly improve on zlib's compression ratio.
|
124
|
-
|
125
|
-
If you are using DEFLATE (or zlib, or gzip) in your application, you should test
|
126
|
-
different levels to see which works best for your application.
|
127
|
-
|
128
|
-
# Motivation
|
129
|
-
|
130
|
-
Despite DEFLATE's widespread use mainly through the zlib library, in the
|
131
|
-
compression community this format from the early 1990s is often considered
|
132
|
-
obsolete. And in a few significant ways, it is.
|
133
|
-
|
134
|
-
So why implement DEFLATE at all, instead of focusing entirely on
|
135
|
-
bzip2/LZMA/xz/LZ4/LZX/ZSTD/Brotli/LZHAM/LZFSE/[insert cool new format here]?
|
136
|
-
|
137
|
-
To do something better, you need to understand what came before. And it turns
|
138
|
-
out that most ideas from DEFLATE are still relevant. Many of the newer formats
|
139
|
-
share a similar structure as DEFLATE, with different tweaks. The effects of
|
140
|
-
trivial but very useful tweaks, such as increasing the sliding window size, are
|
141
|
-
often confused with the effects of nontrivial but less useful tweaks. And
|
142
|
-
actually, many of these formats are similar enough that common algorithms and
|
143
|
-
optimizations (e.g. those dealing with LZ77 matchfinding) can be reused.
|
144
|
-
|
145
|
-
In addition, comparing compressors fairly is difficult because the performance
|
146
|
-
of a compressor depends heavily on optimizations which are not intrinsic to the
|
147
|
-
compression format itself. In this respect, the zlib library sometimes compares
|
148
|
-
poorly to certain newer code because zlib is not well optimized for modern
|
149
|
-
processors. libdeflate addresses this by providing an optimized DEFLATE
|
150
|
-
implementation which can be used for benchmarking purposes. And, of course,
|
151
|
-
real applications can use it as well.
|
152
|
-
|
153
|
-
That being said, I have also started [a separate
|
154
|
-
project](https://github.com/ebiggers/xpack) for an experimental, more modern
|
155
|
-
compression format.
|
156
|
-
|
157
|
-
# License
|
158
|
-
|
159
|
-
libdeflate is [MIT-licensed](COPYING).
|
160
|
-
|
161
|
-
Additional notes (informational only):
|
162
|
-
|
163
|
-
- I am not aware of any patents covering libdeflate.
|
164
|
-
|
165
|
-
- Old versions of libdeflate were public domain; I only started copyrighting
|
166
|
-
changes in newer versions. Portions of the source code that have not been
|
167
|
-
changed since being released in a public domain version can theoretically
|
168
|
-
still be used as public domain if you want to. But for practical purposes, it
|
169
|
-
probably would be easier to just take the MIT license option, which is nearly
|
170
|
-
the same anyway.
|
@@ -1,351 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* common_defs.h
|
3
|
-
*
|
4
|
-
* Copyright 2016 Eric Biggers
|
5
|
-
*
|
6
|
-
* Permission is hereby granted, free of charge, to any person
|
7
|
-
* obtaining a copy of this software and associated documentation
|
8
|
-
* files (the "Software"), to deal in the Software without
|
9
|
-
* restriction, including without limitation the rights to use,
|
10
|
-
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11
|
-
* copies of the Software, and to permit persons to whom the
|
12
|
-
* Software is furnished to do so, subject to the following
|
13
|
-
* conditions:
|
14
|
-
*
|
15
|
-
* The above copyright notice and this permission notice shall be
|
16
|
-
* included in all copies or substantial portions of the Software.
|
17
|
-
*
|
18
|
-
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
19
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
20
|
-
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
21
|
-
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
22
|
-
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
23
|
-
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
24
|
-
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
25
|
-
* OTHER DEALINGS IN THE SOFTWARE.
|
26
|
-
*/
|
27
|
-
|
28
|
-
#ifndef COMMON_COMMON_DEFS_H
|
29
|
-
#define COMMON_COMMON_DEFS_H
|
30
|
-
|
31
|
-
#ifdef __GNUC__
|
32
|
-
# include "compiler_gcc.h"
|
33
|
-
#elif defined(_MSC_VER)
|
34
|
-
# include "compiler_msc.h"
|
35
|
-
#else
|
36
|
-
# pragma message("Unrecognized compiler. Please add a header file for your compiler. Compilation will proceed, but performance may suffer!")
|
37
|
-
#endif
|
38
|
-
|
39
|
-
/* ========================================================================== */
|
40
|
-
/* Type definitions */
|
41
|
-
/* ========================================================================== */
|
42
|
-
|
43
|
-
#include <stddef.h> /* size_t */
|
44
|
-
|
45
|
-
#ifndef __bool_true_false_are_defined
|
46
|
-
# include <stdbool.h> /* bool */
|
47
|
-
#endif
|
48
|
-
|
49
|
-
/* Fixed-width integer types */
|
50
|
-
#ifndef PRIu32
|
51
|
-
# include <inttypes.h>
|
52
|
-
#endif
|
53
|
-
typedef uint8_t u8;
|
54
|
-
typedef uint16_t u16;
|
55
|
-
typedef uint32_t u32;
|
56
|
-
typedef uint64_t u64;
|
57
|
-
typedef int8_t s8;
|
58
|
-
typedef int16_t s16;
|
59
|
-
typedef int32_t s32;
|
60
|
-
typedef int64_t s64;
|
61
|
-
|
62
|
-
/*
|
63
|
-
* Word type of the target architecture. Use 'size_t' instead of 'unsigned
|
64
|
-
* long' to account for platforms such as Windows that use 32-bit 'unsigned
|
65
|
-
* long' on 64-bit architectures.
|
66
|
-
*/
|
67
|
-
typedef size_t machine_word_t;
|
68
|
-
|
69
|
-
/* Number of bytes in a word */
|
70
|
-
#define WORDBYTES ((int)sizeof(machine_word_t))
|
71
|
-
|
72
|
-
/* Number of bits in a word */
|
73
|
-
#define WORDBITS (8 * WORDBYTES)
|
74
|
-
|
75
|
-
/* ========================================================================== */
|
76
|
-
/* Optional compiler features */
|
77
|
-
/* ========================================================================== */
|
78
|
-
|
79
|
-
/* LIBEXPORT - export a function from a shared library */
|
80
|
-
#ifndef LIBEXPORT
|
81
|
-
# define LIBEXPORT
|
82
|
-
#endif
|
83
|
-
|
84
|
-
/* inline - suggest that a function be inlined */
|
85
|
-
#ifndef inline
|
86
|
-
# define inline
|
87
|
-
#endif
|
88
|
-
|
89
|
-
/* forceinline - force a function to be inlined, if possible */
|
90
|
-
#ifndef forceinline
|
91
|
-
# define forceinline inline
|
92
|
-
#endif
|
93
|
-
|
94
|
-
/* restrict - annotate a non-aliased pointer */
|
95
|
-
#ifndef restrict
|
96
|
-
# define restrict
|
97
|
-
#endif
|
98
|
-
|
99
|
-
/* likely(expr) - hint that an expression is usually true */
|
100
|
-
#ifndef likely
|
101
|
-
# define likely(expr) (expr)
|
102
|
-
#endif
|
103
|
-
|
104
|
-
/* unlikely(expr) - hint that an expression is usually false */
|
105
|
-
#ifndef unlikely
|
106
|
-
# define unlikely(expr) (expr)
|
107
|
-
#endif
|
108
|
-
|
109
|
-
/* prefetchr(addr) - prefetch into L1 cache for read */
|
110
|
-
#ifndef prefetchr
|
111
|
-
# define prefetchr(addr)
|
112
|
-
#endif
|
113
|
-
|
114
|
-
/* prefetchw(addr) - prefetch into L1 cache for write */
|
115
|
-
#ifndef prefetchw
|
116
|
-
# define prefetchw(addr)
|
117
|
-
#endif
|
118
|
-
|
119
|
-
/* Does the compiler support the 'target' function attribute? */
|
120
|
-
#ifndef COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE
|
121
|
-
# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
|
122
|
-
#endif
|
123
|
-
|
124
|
-
/* Are target-specific intrinsics supported in 'target' attribute functions? */
|
125
|
-
#ifndef COMPILER_SUPPORTS_TARGET_INTRINSICS
|
126
|
-
# define COMPILER_SUPPORTS_TARGET_INTRINSICS 0
|
127
|
-
#endif
|
128
|
-
|
129
|
-
/* Which targets are supported with the 'target' function attribute? */
|
130
|
-
#ifndef COMPILER_SUPPORTS_PCLMUL_TARGET
|
131
|
-
# define COMPILER_SUPPORTS_PCLMUL_TARGET 0
|
132
|
-
#endif
|
133
|
-
#ifndef COMPILER_SUPPORTS_BMI2_TARGET
|
134
|
-
# define COMPILER_SUPPORTS_BMI2_TARGET 0
|
135
|
-
#endif
|
136
|
-
#ifndef COMPILER_SUPPORTS_AVX_TARGET
|
137
|
-
# define COMPILER_SUPPORTS_AVX_TARGET 0
|
138
|
-
#endif
|
139
|
-
#ifndef COMPILER_SUPPORTS_AVX2_TARGET
|
140
|
-
# define COMPILER_SUPPORTS_AVX2_TARGET 0
|
141
|
-
#endif
|
142
|
-
|
143
|
-
/* _aligned_attribute(n) - declare that the annotated variable, or variables of
|
144
|
-
* the annotated type, are to be aligned on n-byte boundaries */
|
145
|
-
#ifndef _aligned_attribute
|
146
|
-
#endif
|
147
|
-
|
148
|
-
/* ========================================================================== */
|
149
|
-
/* Miscellaneous macros */
|
150
|
-
/* ========================================================================== */
|
151
|
-
|
152
|
-
#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
|
153
|
-
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
|
154
|
-
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
|
155
|
-
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
|
156
|
-
#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
|
157
|
-
#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
|
158
|
-
|
159
|
-
/* ========================================================================== */
|
160
|
-
/* Endianness handling */
|
161
|
-
/* ========================================================================== */
|
162
|
-
|
163
|
-
/*
|
164
|
-
* CPU_IS_LITTLE_ENDIAN() - a macro which evaluates to 1 if the CPU is little
|
165
|
-
* endian or 0 if it is big endian. The macro should be defined in a way such
|
166
|
-
* that the compiler can evaluate it at compilation time. If not defined, a
|
167
|
-
* fallback is used.
|
168
|
-
*/
|
169
|
-
#ifndef CPU_IS_LITTLE_ENDIAN
|
170
|
-
static forceinline int CPU_IS_LITTLE_ENDIAN(void)
|
171
|
-
{
|
172
|
-
union {
|
173
|
-
unsigned int v;
|
174
|
-
unsigned char b;
|
175
|
-
} u;
|
176
|
-
u.v = 1;
|
177
|
-
return u.b;
|
178
|
-
}
|
179
|
-
#endif
|
180
|
-
|
181
|
-
/* bswap16(n) - swap the bytes of a 16-bit integer */
|
182
|
-
#ifndef bswap16
|
183
|
-
static forceinline u16 bswap16(u16 n)
|
184
|
-
{
|
185
|
-
return (n << 8) | (n >> 8);
|
186
|
-
}
|
187
|
-
#endif
|
188
|
-
|
189
|
-
/* bswap32(n) - swap the bytes of a 32-bit integer */
|
190
|
-
#ifndef bswap32
|
191
|
-
static forceinline u32 bswap32(u32 n)
|
192
|
-
{
|
193
|
-
return ((n & 0x000000FF) << 24) |
|
194
|
-
((n & 0x0000FF00) << 8) |
|
195
|
-
((n & 0x00FF0000) >> 8) |
|
196
|
-
((n & 0xFF000000) >> 24);
|
197
|
-
}
|
198
|
-
#endif
|
199
|
-
|
200
|
-
/* bswap64(n) - swap the bytes of a 64-bit integer */
|
201
|
-
#ifndef bswap64
|
202
|
-
static forceinline u64 bswap64(u64 n)
|
203
|
-
{
|
204
|
-
return ((n & 0x00000000000000FF) << 56) |
|
205
|
-
((n & 0x000000000000FF00) << 40) |
|
206
|
-
((n & 0x0000000000FF0000) << 24) |
|
207
|
-
((n & 0x00000000FF000000) << 8) |
|
208
|
-
((n & 0x000000FF00000000) >> 8) |
|
209
|
-
((n & 0x0000FF0000000000) >> 24) |
|
210
|
-
((n & 0x00FF000000000000) >> 40) |
|
211
|
-
((n & 0xFF00000000000000) >> 56);
|
212
|
-
}
|
213
|
-
#endif
|
214
|
-
|
215
|
-
#define le16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap16(n))
|
216
|
-
#define le32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap32(n))
|
217
|
-
#define le64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap64(n))
|
218
|
-
#define be16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap16(n) : (n))
|
219
|
-
#define be32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap32(n) : (n))
|
220
|
-
#define be64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap64(n) : (n))
|
221
|
-
|
222
|
-
/* ========================================================================== */
|
223
|
-
/* Unaligned memory accesses */
|
224
|
-
/* ========================================================================== */
|
225
|
-
|
226
|
-
/*
|
227
|
-
* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses
|
228
|
-
* can be performed efficiently on the target platform.
|
229
|
-
*/
|
230
|
-
#ifndef UNALIGNED_ACCESS_IS_FAST
|
231
|
-
# define UNALIGNED_ACCESS_IS_FAST 0
|
232
|
-
#endif
|
233
|
-
|
234
|
-
/*
|
235
|
-
* DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type',
|
236
|
-
* defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions
|
237
|
-
* which load and store variables of type 'type' from/to unaligned memory
|
238
|
-
* addresses. If not defined, a fallback is used.
|
239
|
-
*/
|
240
|
-
#ifndef DEFINE_UNALIGNED_TYPE
|
241
|
-
|
242
|
-
/*
|
243
|
-
* Although memcpy() may seem inefficient, it *usually* gets optimized
|
244
|
-
* appropriately by modern compilers. It's portable and may be the best we can
|
245
|
-
* do for a fallback...
|
246
|
-
*/
|
247
|
-
#include <string.h>
|
248
|
-
|
249
|
-
#define DEFINE_UNALIGNED_TYPE(type) \
|
250
|
-
\
|
251
|
-
static forceinline type \
|
252
|
-
load_##type##_unaligned(const void *p) \
|
253
|
-
{ \
|
254
|
-
type v; \
|
255
|
-
memcpy(&v, p, sizeof(v)); \
|
256
|
-
return v; \
|
257
|
-
} \
|
258
|
-
\
|
259
|
-
static forceinline void \
|
260
|
-
store_##type##_unaligned(type v, void *p) \
|
261
|
-
{ \
|
262
|
-
memcpy(p, &v, sizeof(v)); \
|
263
|
-
}
|
264
|
-
|
265
|
-
#endif /* !DEFINE_UNALIGNED_TYPE */
|
266
|
-
|
267
|
-
/* ========================================================================== */
|
268
|
-
/* Bit scan functions */
|
269
|
-
/* ========================================================================== */
|
270
|
-
|
271
|
-
/*
|
272
|
-
* Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
|
273
|
-
* significant end) of the *most* significant 1 bit in the input value. The
|
274
|
-
* input value must be nonzero!
|
275
|
-
*/
|
276
|
-
|
277
|
-
#ifndef bsr32
|
278
|
-
static forceinline unsigned
|
279
|
-
bsr32(u32 n)
|
280
|
-
{
|
281
|
-
unsigned i = 0;
|
282
|
-
while ((n >>= 1) != 0)
|
283
|
-
i++;
|
284
|
-
return i;
|
285
|
-
}
|
286
|
-
#endif
|
287
|
-
|
288
|
-
#ifndef bsr64
|
289
|
-
static forceinline unsigned
|
290
|
-
bsr64(u64 n)
|
291
|
-
{
|
292
|
-
unsigned i = 0;
|
293
|
-
while ((n >>= 1) != 0)
|
294
|
-
i++;
|
295
|
-
return i;
|
296
|
-
}
|
297
|
-
#endif
|
298
|
-
|
299
|
-
static forceinline unsigned
|
300
|
-
bsrw(machine_word_t n)
|
301
|
-
{
|
302
|
-
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
303
|
-
if (WORDBITS == 32)
|
304
|
-
return bsr32(n);
|
305
|
-
else
|
306
|
-
return bsr64(n);
|
307
|
-
}
|
308
|
-
|
309
|
-
/*
|
310
|
-
* Bit Scan Forward (BSF) - find the 0-based index (relative to the least
|
311
|
-
* significant end) of the *least* significant 1 bit in the input value. The
|
312
|
-
* input value must be nonzero!
|
313
|
-
*/
|
314
|
-
|
315
|
-
#ifndef bsf32
|
316
|
-
static forceinline unsigned
|
317
|
-
bsf32(u32 n)
|
318
|
-
{
|
319
|
-
unsigned i = 0;
|
320
|
-
while ((n & 1) == 0) {
|
321
|
-
i++;
|
322
|
-
n >>= 1;
|
323
|
-
}
|
324
|
-
return i;
|
325
|
-
}
|
326
|
-
#endif
|
327
|
-
|
328
|
-
#ifndef bsf64
|
329
|
-
static forceinline unsigned
|
330
|
-
bsf64(u64 n)
|
331
|
-
{
|
332
|
-
unsigned i = 0;
|
333
|
-
while ((n & 1) == 0) {
|
334
|
-
i++;
|
335
|
-
n >>= 1;
|
336
|
-
}
|
337
|
-
return i;
|
338
|
-
}
|
339
|
-
#endif
|
340
|
-
|
341
|
-
static forceinline unsigned
|
342
|
-
bsfw(machine_word_t n)
|
343
|
-
{
|
344
|
-
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
345
|
-
if (WORDBITS == 32)
|
346
|
-
return bsf32(n);
|
347
|
-
else
|
348
|
-
return bsf64(n);
|
349
|
-
}
|
350
|
-
|
351
|
-
#endif /* COMMON_COMMON_DEFS_H */
|