deflate-ruby 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. checksums.yaml +4 -4
  2. data/CLAUDE.md +95 -92
  3. data/LICENSE.txt +6 -6
  4. data/README.md +87 -65
  5. data/Rakefile +23 -0
  6. data/ext/deflate_ruby/{libdeflate/lib/x86/adler32_impl.h → adler32_impl.h} +8 -7
  7. data/ext/deflate_ruby/common_defs.h +748 -0
  8. data/ext/deflate_ruby/{libdeflate/lib/x86/cpu_features.c → cpu_features.c} +46 -16
  9. data/ext/deflate_ruby/{libdeflate/lib/x86/cpu_features.h → cpu_features.h} +2 -1
  10. data/ext/deflate_ruby/{libdeflate/lib/x86/crc32_impl.h → crc32_impl.h} +22 -23
  11. data/ext/deflate_ruby/{libdeflate/lib/crc32_multipliers.h → crc32_multipliers.h} +2 -4
  12. data/ext/deflate_ruby/{libdeflate/lib/x86/crc32_pclmul_template.h → crc32_pclmul_template.h} +23 -94
  13. data/ext/deflate_ruby/{libdeflate/lib/crc32_tables.h → crc32_tables.h} +1 -1
  14. data/ext/deflate_ruby/{libdeflate/lib/deflate_compress.c → deflate_compress.c} +59 -60
  15. data/ext/deflate_ruby/deflate_ruby.c +392 -218
  16. data/ext/deflate_ruby/deflate_ruby.h +6 -0
  17. data/ext/deflate_ruby/extconf.rb +35 -25
  18. data/ext/deflate_ruby/libdeflate/adler32.c +162 -0
  19. data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/adler32_impl.h +14 -7
  20. data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/crc32_impl.h +25 -31
  21. data/ext/deflate_ruby/libdeflate/arm/crc32_pmull_helpers.h +156 -0
  22. data/ext/deflate_ruby/libdeflate/arm/crc32_pmull_wide.h +226 -0
  23. data/ext/deflate_ruby/libdeflate/bt_matchfinder.h +342 -0
  24. data/ext/deflate_ruby/libdeflate/common_defs.h +2 -1
  25. data/ext/deflate_ruby/libdeflate/cpu_features_common.h +93 -0
  26. data/ext/deflate_ruby/libdeflate/crc32.c +262 -0
  27. data/ext/deflate_ruby/libdeflate/crc32_multipliers.h +375 -0
  28. data/ext/deflate_ruby/libdeflate/crc32_tables.h +587 -0
  29. data/ext/deflate_ruby/libdeflate/decompress_template.h +777 -0
  30. data/ext/deflate_ruby/libdeflate/deflate_compress.c +4128 -0
  31. data/ext/deflate_ruby/libdeflate/deflate_compress.h +15 -0
  32. data/ext/deflate_ruby/libdeflate/deflate_constants.h +56 -0
  33. data/ext/deflate_ruby/libdeflate/deflate_decompress.c +1208 -0
  34. data/ext/deflate_ruby/libdeflate/gzip_compress.c +90 -0
  35. data/ext/deflate_ruby/libdeflate/gzip_constants.h +45 -0
  36. data/ext/deflate_ruby/libdeflate/gzip_decompress.c +144 -0
  37. data/ext/deflate_ruby/libdeflate/hc_matchfinder.h +401 -0
  38. data/ext/deflate_ruby/libdeflate/ht_matchfinder.h +234 -0
  39. data/ext/deflate_ruby/libdeflate/lib_common.h +106 -0
  40. data/ext/deflate_ruby/libdeflate/libdeflate.h +2 -2
  41. data/ext/deflate_ruby/libdeflate/{lib/matchfinder_common.h → matchfinder_common.h} +3 -3
  42. data/ext/deflate_ruby/libdeflate/x86/adler32_impl.h +135 -0
  43. data/ext/deflate_ruby/libdeflate/x86/adler32_template.h +518 -0
  44. data/ext/deflate_ruby/libdeflate/x86/cpu_features.c +213 -0
  45. data/ext/deflate_ruby/libdeflate/x86/cpu_features.h +170 -0
  46. data/ext/deflate_ruby/libdeflate/x86/crc32_impl.h +159 -0
  47. data/ext/deflate_ruby/libdeflate/x86/crc32_pclmul_template.h +424 -0
  48. data/ext/deflate_ruby/libdeflate/x86/decompress_impl.h +57 -0
  49. data/ext/deflate_ruby/libdeflate.h +411 -0
  50. data/ext/deflate_ruby/matchfinder_common.h +224 -0
  51. data/ext/deflate_ruby/matchfinder_impl.h +122 -0
  52. data/ext/deflate_ruby/utils.c +141 -0
  53. data/ext/deflate_ruby/zlib_compress.c +82 -0
  54. data/ext/deflate_ruby/zlib_constants.h +21 -0
  55. data/ext/deflate_ruby/zlib_decompress.c +104 -0
  56. data/lib/deflate_ruby/version.rb +1 -1
  57. data/lib/deflate_ruby.rb +1 -63
  58. data/sig/deflate_ruby.rbs +4 -0
  59. data/test/test_deflate_ruby.rb +220 -0
  60. data/test/test_helper.rb +6 -0
  61. metadata +89 -144
  62. data/ext/deflate_ruby/libdeflate/CMakeLists.txt +0 -270
  63. data/ext/deflate_ruby/libdeflate/NEWS.md +0 -494
  64. data/ext/deflate_ruby/libdeflate/README.md +0 -228
  65. data/ext/deflate_ruby/libdeflate/libdeflate-config.cmake.in +0 -3
  66. data/ext/deflate_ruby/libdeflate/libdeflate.pc.in +0 -18
  67. data/ext/deflate_ruby/libdeflate/programs/CMakeLists.txt +0 -105
  68. data/ext/deflate_ruby/libdeflate/programs/benchmark.c +0 -696
  69. data/ext/deflate_ruby/libdeflate/programs/checksum.c +0 -218
  70. data/ext/deflate_ruby/libdeflate/programs/config.h.in +0 -19
  71. data/ext/deflate_ruby/libdeflate/programs/gzip.c +0 -688
  72. data/ext/deflate_ruby/libdeflate/programs/prog_util.c +0 -521
  73. data/ext/deflate_ruby/libdeflate/programs/prog_util.h +0 -225
  74. data/ext/deflate_ruby/libdeflate/programs/test_checksums.c +0 -200
  75. data/ext/deflate_ruby/libdeflate/programs/test_custom_malloc.c +0 -155
  76. data/ext/deflate_ruby/libdeflate/programs/test_incomplete_codes.c +0 -385
  77. data/ext/deflate_ruby/libdeflate/programs/test_invalid_streams.c +0 -130
  78. data/ext/deflate_ruby/libdeflate/programs/test_litrunlen_overflow.c +0 -72
  79. data/ext/deflate_ruby/libdeflate/programs/test_overread.c +0 -95
  80. data/ext/deflate_ruby/libdeflate/programs/test_slow_decompression.c +0 -472
  81. data/ext/deflate_ruby/libdeflate/programs/test_trailing_bytes.c +0 -151
  82. data/ext/deflate_ruby/libdeflate/programs/test_util.c +0 -237
  83. data/ext/deflate_ruby/libdeflate/programs/test_util.h +0 -61
  84. data/ext/deflate_ruby/libdeflate/programs/tgetopt.c +0 -118
  85. data/ext/deflate_ruby/libdeflate/scripts/android_build.sh +0 -118
  86. data/ext/deflate_ruby/libdeflate/scripts/android_tests.sh +0 -69
  87. data/ext/deflate_ruby/libdeflate/scripts/benchmark.sh +0 -10
  88. data/ext/deflate_ruby/libdeflate/scripts/checksum.sh +0 -10
  89. data/ext/deflate_ruby/libdeflate/scripts/checksum_benchmarks.sh +0 -253
  90. data/ext/deflate_ruby/libdeflate/scripts/cmake-helper.sh +0 -17
  91. data/ext/deflate_ruby/libdeflate/scripts/deflate_benchmarks.sh +0 -119
  92. data/ext/deflate_ruby/libdeflate/scripts/exec_tests.sh +0 -38
  93. data/ext/deflate_ruby/libdeflate/scripts/gen-release-archives.sh +0 -37
  94. data/ext/deflate_ruby/libdeflate/scripts/gen_bitreverse_tab.py +0 -19
  95. data/ext/deflate_ruby/libdeflate/scripts/gen_crc32_multipliers.c +0 -199
  96. data/ext/deflate_ruby/libdeflate/scripts/gen_crc32_tables.c +0 -105
  97. data/ext/deflate_ruby/libdeflate/scripts/gen_default_litlen_costs.py +0 -44
  98. data/ext/deflate_ruby/libdeflate/scripts/gen_offset_slot_map.py +0 -29
  99. data/ext/deflate_ruby/libdeflate/scripts/gzip_tests.sh +0 -523
  100. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_compress/corpus/0 +0 -0
  101. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_compress/fuzz.c +0 -95
  102. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_decompress/corpus/0 +0 -3
  103. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/deflate_decompress/fuzz.c +0 -62
  104. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/fuzz.sh +0 -108
  105. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/gzip_decompress/corpus/0 +0 -0
  106. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/gzip_decompress/fuzz.c +0 -19
  107. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/zlib_decompress/corpus/0 +0 -3
  108. data/ext/deflate_ruby/libdeflate/scripts/libFuzzer/zlib_decompress/fuzz.c +0 -19
  109. data/ext/deflate_ruby/libdeflate/scripts/run_tests.sh +0 -416
  110. data/ext/deflate_ruby/libdeflate/scripts/toolchain-i686-w64-mingw32.cmake +0 -8
  111. data/ext/deflate_ruby/libdeflate/scripts/toolchain-x86_64-w64-mingw32.cmake +0 -8
  112. /data/ext/deflate_ruby/{libdeflate/lib/adler32.c → adler32.c} +0 -0
  113. /data/ext/deflate_ruby/{libdeflate/lib/x86/adler32_template.h → adler32_template.h} +0 -0
  114. /data/ext/deflate_ruby/{libdeflate/lib/bt_matchfinder.h → bt_matchfinder.h} +0 -0
  115. /data/ext/deflate_ruby/{libdeflate/lib/cpu_features_common.h → cpu_features_common.h} +0 -0
  116. /data/ext/deflate_ruby/{libdeflate/lib/crc32.c → crc32.c} +0 -0
  117. /data/ext/deflate_ruby/{libdeflate/lib/arm/crc32_pmull_helpers.h → crc32_pmull_helpers.h} +0 -0
  118. /data/ext/deflate_ruby/{libdeflate/lib/arm/crc32_pmull_wide.h → crc32_pmull_wide.h} +0 -0
  119. /data/ext/deflate_ruby/{libdeflate/lib/x86/decompress_impl.h → decompress_impl.h} +0 -0
  120. /data/ext/deflate_ruby/{libdeflate/lib/decompress_template.h → decompress_template.h} +0 -0
  121. /data/ext/deflate_ruby/{libdeflate/lib/deflate_compress.h → deflate_compress.h} +0 -0
  122. /data/ext/deflate_ruby/{libdeflate/lib/deflate_constants.h → deflate_constants.h} +0 -0
  123. /data/ext/deflate_ruby/{libdeflate/lib/deflate_decompress.c → deflate_decompress.c} +0 -0
  124. /data/ext/deflate_ruby/{libdeflate/lib/gzip_compress.c → gzip_compress.c} +0 -0
  125. /data/ext/deflate_ruby/{libdeflate/lib/gzip_constants.h → gzip_constants.h} +0 -0
  126. /data/ext/deflate_ruby/{libdeflate/lib/gzip_decompress.c → gzip_decompress.c} +0 -0
  127. /data/ext/deflate_ruby/{libdeflate/lib/hc_matchfinder.h → hc_matchfinder.h} +0 -0
  128. /data/ext/deflate_ruby/{libdeflate/lib/ht_matchfinder.h → ht_matchfinder.h} +0 -0
  129. /data/ext/deflate_ruby/{libdeflate/lib/lib_common.h → lib_common.h} +0 -0
  130. /data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/cpu_features.c +0 -0
  131. /data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/cpu_features.h +0 -0
  132. /data/ext/deflate_ruby/libdeflate/{lib/arm → arm}/matchfinder_impl.h +0 -0
  133. /data/ext/deflate_ruby/libdeflate/{lib/riscv → riscv}/matchfinder_impl.h +0 -0
  134. /data/ext/deflate_ruby/libdeflate/{lib/utils.c → utils.c} +0 -0
  135. /data/ext/deflate_ruby/libdeflate/{lib/x86 → x86}/matchfinder_impl.h +0 -0
  136. /data/ext/deflate_ruby/libdeflate/{lib/zlib_compress.c → zlib_compress.c} +0 -0
  137. /data/ext/deflate_ruby/libdeflate/{lib/zlib_constants.h → zlib_constants.h} +0 -0
  138. /data/ext/deflate_ruby/libdeflate/{lib/zlib_decompress.c → zlib_decompress.c} +0 -0
@@ -1,253 +0,0 @@
1
- #!/bin/bash
2
-
3
- set -eu -o pipefail
4
-
5
- __have_cpu_feature() {
6
- local feature="$1"
7
- local tag
8
- case $ARCH in
9
- arm*|aarch*)
10
- tag="Features"
11
- ;;
12
- *)
13
- tag="flags"
14
- ;;
15
- esac
16
- grep -q "^$tag"$'[ \t]'"*:.*\<$feature\>" /proc/cpuinfo
17
- }
18
-
19
- have_cpu_features() {
20
- local feature
21
- for feature; do
22
- __have_cpu_feature "$feature" || return 1
23
- done
24
- }
25
-
26
- make_and_test() {
27
- # Build the checksum program and tests. Set the special test support
28
- # flag to get support for LIBDEFLATE_DISABLE_CPU_FEATURES.
29
- rm -rf build
30
- CFLAGS="$CFLAGS -DTEST_SUPPORT__DO_NOT_USE=1" \
31
- cmake -B build -G Ninja -DLIBDEFLATE_BUILD_TESTS=1 \
32
- "${EXTRA_CMAKE_FLAGS[@]}" > /dev/null
33
- cmake --build build > /dev/null
34
-
35
- # Run the checksum tests, for good measure. (This isn't actually part
36
- # of the benchmarking.)
37
- ./build/programs/test_checksums > /dev/null
38
- }
39
-
40
- __do_benchmark() {
41
- local impl="$1" speed
42
- shift
43
- local flags=("$@")
44
-
45
- speed=$(./build/programs/checksum "${CKSUM_FLAGS[@]}" \
46
- "${flags[@]}" -t "$FILE" | \
47
- grep -o '[0-9]\+ MB/s' | grep -o '[0-9]\+')
48
- printf "%-60s%-10s\n" "$CKSUM_NAME ($impl)" "$speed"
49
- }
50
-
51
- do_benchmark() {
52
- local impl="$1"
53
-
54
- CFLAGS="${EXTRA_CFLAGS[*]}" make_and_test
55
- if [ "$impl" = zlib ]; then
56
- __do_benchmark "$impl" "-Z"
57
- else
58
- __do_benchmark "libdeflate, $impl"
59
- if $ENABLE_32BIT; then
60
- CFLAGS="-m32 ${EXTRA_CFLAGS[*]}" make_and_test
61
- __do_benchmark "libdeflate, $impl, 32-bit"
62
- fi
63
- fi
64
- }
65
-
66
- sort_by_speed() {
67
- awk '{print $NF, $0}' | sort -nr | cut -f2- -d' '
68
- }
69
-
70
- disable_cpu_feature() {
71
- LIBDEFLATE_DISABLE_CPU_FEATURES+=",$1"
72
- shift
73
- if (( $# > 0 )); then
74
- EXTRA_CFLAGS+=("$@")
75
- fi
76
- }
77
-
78
- cleanup() {
79
- if $USING_TMPFILE; then
80
- rm "$FILE"
81
- fi
82
- }
83
-
84
- ARCH="$(uname -m)"
85
- USING_TMPFILE=false
86
- EXTRA_CMAKE_FLAGS=()
87
- ENABLE_32BIT=false
88
-
89
- trap cleanup EXIT
90
-
91
- longopts="help"
92
- longopts+=",cmake-flag:"
93
- longopts+=",enable-32bit"
94
-
95
- usage() {
96
- echo "Usage: $0 [--cmake-flag=FLAG]... [--enable-32bit] [FILE]"
97
- }
98
-
99
- if ! options=$(getopt -o "" -l "$longopts" -- "$@"); then
100
- usage 1>&2
101
- exit 1
102
- fi
103
- eval set -- "$options"
104
- while (( $# >= 1 )); do
105
- case "$1" in
106
- --cmake-flag)
107
- EXTRA_CMAKE_FLAGS+=("$2")
108
- shift
109
- ;;
110
- --enable-32bit)
111
- ENABLE_32BIT=true
112
- ;;
113
- --help)
114
- usage
115
- exit 0
116
- ;;
117
- --)
118
- shift
119
- break
120
- ;;
121
- *)
122
- echo 1>&2 "Invalid option: '$1'"
123
- usage 1>&2
124
- exit 1
125
- ;;
126
- esac
127
- shift
128
- done
129
-
130
- if (( $# == 0 )); then
131
- # Generate default test data file.
132
- FILE=$(mktemp -t checksum_testdata.XXXXXXXXXX)
133
- USING_TMPFILE=true
134
- echo "Generating 250 MB test file: $FILE"
135
- head -c 250000000 /dev/urandom > "$FILE"
136
- elif (( $# == 1 )); then
137
- FILE="$1"
138
- else
139
- usage 1>&2
140
- exit 1
141
- fi
142
-
143
- cat << EOF
144
- Method Speed (MB/s)
145
- ------ ------------
146
- EOF
147
-
148
- # CRC-32
149
- CKSUM_NAME="CRC-32"
150
- CKSUM_FLAGS=()
151
- EXTRA_CFLAGS=()
152
- export LIBDEFLATE_DISABLE_CPU_FEATURES=""
153
- {
154
- case $ARCH in
155
- i386|x86_64)
156
- if have_cpu_features vpclmulqdq pclmulqdq avx512bw avx512vl; then
157
- do_benchmark "VPCLMULQDQ/AVX512/VL512"
158
- disable_cpu_feature zmm
159
- do_benchmark "VPCLMULQDQ/AVX512/VL256"
160
- disable_cpu_feature avx512vl "-mno-avx512vl"
161
- disable_cpu_feature avx512bw "-mno-avx512bw"
162
- fi
163
- if have_cpu_features vpclmulqdq pclmulqdq avx2; then
164
- do_benchmark "VPCLMULQDQ/AVX2"
165
- disable_cpu_feature vpclmulqdq "-mno-vpclmulqdq"
166
- fi
167
- if have_cpu_features pclmulqdq avx; then
168
- do_benchmark "PCLMULQDQ/AVX"
169
- disable_cpu_feature avx "-mno-avx"
170
- fi
171
- if have_cpu_features pclmulqdq; then
172
- do_benchmark "PCLMULQDQ"
173
- disable_cpu_feature pclmulqdq "-mno-pclmul"
174
- fi
175
- ;;
176
- aarch*)
177
- EXTRA_CFLAGS=("-march=armv8-a")
178
- if have_cpu_features pmull crc32 sha3; then
179
- do_benchmark "pmullx12_crc_eor3"
180
- disable_cpu_feature sha3
181
- fi
182
- if have_cpu_features pmull crc32; then
183
- do_benchmark "pmullx12_crc"
184
- disable_cpu_feature prefer_pmull
185
- do_benchmark "crc_pmullcombine"
186
- fi
187
- if have_cpu_features crc32; then
188
- do_benchmark "crc"
189
- disable_cpu_feature crc32
190
- fi
191
- if have_cpu_features pmull; then
192
- do_benchmark "pmull4x"
193
- disable_cpu_feature pmull
194
- fi
195
- ;;
196
- esac
197
- do_benchmark "generic"
198
- do_benchmark "zlib"
199
- } | sort_by_speed
200
-
201
- # Adler-32
202
- CKSUM_NAME="Adler-32"
203
- CKSUM_FLAGS=(-A)
204
- EXTRA_CFLAGS=()
205
- export LIBDEFLATE_DISABLE_CPU_FEATURES=""
206
- echo
207
- {
208
- case $ARCH in
209
- i386|x86_64)
210
- if have_cpu_features avx512bw avx512_vnni; then
211
- do_benchmark "AVX512VNNI/VL512"
212
- disable_cpu_feature zmm
213
- if have_cpu_features avx512vl; then
214
- do_benchmark "AVX512VNNI/VL256"
215
- fi
216
- disable_cpu_feature avx512_vnni "-mno-avx512vnni"
217
- disable_cpu_feature avx512bw "-mno-avx512bw"
218
- fi
219
- if have_cpu_features avx2 avx_vnni; then
220
- do_benchmark "AVX-VNNI"
221
- disable_cpu_feature avx_vnni "-mno-avxvnni"
222
- fi
223
- if have_cpu_features avx2; then
224
- do_benchmark "AVX2"
225
- disable_cpu_feature avx2 "-mno-avx2"
226
- fi
227
- if have_cpu_features sse2; then
228
- do_benchmark "SSE2"
229
- disable_cpu_feature sse2 "-mno-sse2"
230
- fi
231
- ;;
232
- arm*)
233
- if have_cpu_features neon; then
234
- do_benchmark "NEON"
235
- disable_cpu_feature neon "-mfpu=vfpv3"
236
- fi
237
- ;;
238
- aarch*)
239
- EXTRA_CFLAGS=("-march=armv8-a")
240
- if have_cpu_features asimd asimddp; then
241
- do_benchmark "DOTPROD"
242
- disable_cpu_feature dotprod
243
- fi
244
- if have_cpu_features asimd; then
245
- do_benchmark "NEON"
246
- disable_cpu_feature neon
247
- EXTRA_CFLAGS=("-march=armv8-a+nosimd")
248
- fi
249
- ;;
250
- esac
251
- do_benchmark "generic"
252
- do_benchmark "zlib"
253
- } | sort_by_speed
@@ -1,17 +0,0 @@
1
- #!/bin/sh
2
-
3
- # This script ensures that the 'build' directory has been created and configured
4
- # with the given CMake options and environment.
5
-
6
- set -e
7
-
8
- TOPDIR="$(dirname "$0")"/..
9
- BUILDDIR="$TOPDIR"/build
10
-
11
- flags=$(env; echo "@CMAKEOPTS@=$*")
12
- if [ "$flags" != "$(cat "$BUILDDIR"/.flags 2>/dev/null || true)" ]; then
13
- rm -rf "$BUILDDIR"/CMakeCache.txt "$BUILDDIR"/CMakeFiles
14
- mkdir -p "$BUILDDIR"
15
- cmake -S "$TOPDIR" -B "$BUILDDIR" "$@"
16
- echo "$flags" > "$BUILDDIR"/.flags
17
- fi
@@ -1,119 +0,0 @@
1
- #!/bin/bash
2
-
3
- set -eu -o pipefail
4
- topdir="$(dirname "$0")/.."
5
- tmpfile=$(mktemp)
6
- trap 'rm -f $tmpfile' EXIT
7
-
8
- run_benchmark()
9
- {
10
- local best_ctime=1000000000
11
- local i
12
-
13
- for i in $(seq "$NUM_ITERATIONS"); do
14
- "$@" > "$tmpfile"
15
- csize=$(awk '/Compressed/{print $4}' "$tmpfile")
16
- ctime=$(awk '/Compression time/{print $3}' "$tmpfile")
17
- if (( ctime < best_ctime )); then
18
- best_ctime=$ctime
19
- fi
20
- : "$i" # make shellcheck happy
21
- done
22
- CSIZE=$csize
23
- CTIME=$best_ctime
24
- }
25
-
26
- multifile()
27
- {
28
- local file results cmd best em
29
-
30
- NUM_ITERATIONS=1
31
-
32
- echo "File | zlib -6 | zlib -9 | libdeflate -6 | libdeflate -9 | libdeflate -12"
33
- echo "-----|---------|---------|---------------|---------------|---------------"
34
-
35
- for file in "$@"; do
36
- echo -n "$(basename "$file")"
37
- results=()
38
- cmd=("$topdir/build/programs/benchmark"
39
- -s"$(stat -c "%s" "$file")" "$file")
40
- run_benchmark "${cmd[@]}" -Y -6
41
- results+=("$CSIZE")
42
- run_benchmark "${cmd[@]}" -Y -6
43
- results+=("$CSIZE")
44
- run_benchmark "${cmd[@]}" -6
45
- results+=("$CSIZE")
46
- run_benchmark "${cmd[@]}" -9
47
- results+=("$CSIZE")
48
- run_benchmark "${cmd[@]}" -12
49
- results+=("$CSIZE")
50
- best=2000000000
51
- for result in "${results[@]}"; do
52
- if (( result < best)); then
53
- best=$result
54
- fi
55
- done
56
- for result in "${results[@]}"; do
57
- if (( result == best )); then
58
- em="**"
59
- else
60
- em=""
61
- fi
62
- echo -n " | ${em}${result}${em}"
63
- done
64
- echo
65
- done
66
- }
67
-
68
- single_file()
69
- {
70
- local file=$1
71
- local usize args
72
- local include_old=false
73
-
74
- usize=$(stat -c "%s" "$file")
75
- : ${NUM_ITERATIONS:=3}
76
-
77
- if [ -e "$topdir/benchmark-old" ]; then
78
- include_old=true
79
- fi
80
- echo -n "Level | libdeflate (new) "
81
- if $include_old; then
82
- echo -n "| libdeflate (old) "
83
- fi
84
- echo "| zlib"
85
- echo -n "------|------------------"
86
- if $include_old; then
87
- echo -n "|------------------"
88
- fi
89
- echo "|-----"
90
- for level in {1..12}; do
91
- echo -n "$level"
92
- args=("$file" -s "$usize" "-$level")
93
-
94
- run_benchmark "$topdir/build/programs/benchmark" "${args[@]}"
95
- echo -n " | $CSIZE / $CTIME"
96
-
97
- if $include_old; then
98
- run_benchmark "$topdir/benchmark-old" "${args[@]}"
99
- echo -n " | $CSIZE / $CTIME"
100
- fi
101
-
102
- if (( level > 9 )); then
103
- echo -n " | N/A"
104
- else
105
- run_benchmark "$topdir/build/programs/benchmark" \
106
- "${args[@]}" -Y
107
- echo -n " | $CSIZE / $CTIME"
108
- fi
109
- echo
110
- done
111
- }
112
-
113
- if (( $# > 1 )); then
114
- multifile "$@"
115
- elif (( $# == 1 )); then
116
- single_file "$@"
117
- else
118
- echo 1>&2 "Usage: $0 FILE..."
119
- fi
@@ -1,38 +0,0 @@
1
- #!/bin/sh
2
- #
3
- # Helper script used by run_tests.sh and android_tests.sh,
4
- # not intended to be run directly
5
- #
6
-
7
- set -eu
8
-
9
- DIR=${1:-.}
10
-
11
- cd "$DIR"
12
-
13
- run_cmd() {
14
- echo "$WRAPPER $*"
15
- $WRAPPER "$@" > /dev/null
16
- }
17
-
18
- for prog in ./test_*; do
19
- run_cmd "$prog"
20
- done
21
-
22
- for format in '' '-g' '-z'; do
23
- for ref_impl in '' '-Y' '-Z'; do
24
- run_cmd ./benchmark $format $ref_impl "$TESTDATA"
25
- done
26
- done
27
- for level in 0 1 3 7 9; do
28
- for ref_impl in '' '-Y'; do
29
- run_cmd ./benchmark -$level $ref_impl "$TESTDATA"
30
- done
31
- done
32
- for level in 0 1 3 7 9 12; do
33
- for ref_impl in '' '-Z'; do
34
- run_cmd ./benchmark -$level $ref_impl "$TESTDATA"
35
- done
36
- done
37
-
38
- echo "exec_tests finished successfully" # Needed for 'adb shell'
@@ -1,37 +0,0 @@
1
- #!/bin/bash
2
-
3
- set -eu -o pipefail
4
-
5
- # This script generates source and binary archives that should be posted for
6
- # each new release of libdeflate.
7
-
8
- prefix="libdeflate-$(git describe HEAD | sed 's/^v//')"
9
-
10
- # Generate source code archive libdeflate-*.tar.gz
11
- tarball="${prefix}.tar.gz"
12
- echo "Generating $tarball"
13
- git archive --format=tar --prefix="${prefix}/" HEAD \
14
- | libdeflate-gzip -12 > "$tarball"
15
-
16
- # Generate Windows binary releases libdeflate-*-windows-*-bin.zip
17
- for arch in 'i686' 'x86_64'; do
18
- dir=${prefix}-windows-${arch}-bin
19
- zipfile="${dir}.zip"
20
- echo "Generating $zipfile"
21
- rm -rf build "$dir" "$zipfile"
22
- CFLAGS="-Werror" ${arch}-w64-mingw32-cmake -B build -G Ninja \
23
- -DLIBDEFLATE_BUILD_TESTS=1 > /dev/null
24
- cmake --build build > /dev/null
25
- mkdir "$dir"
26
- cp libdeflate.h build/libdeflate.{dll,dll.a,a} \
27
- build/programs/{benchmark,checksum}.exe "$dir"
28
- cp build/programs/libdeflate-gzip.exe "$dir"/gzip.exe
29
- cp build/programs/libdeflate-gzip.exe "$dir"/gunzip.exe
30
- ${arch}-w64-mingw32-strip "$dir"/libdeflate.dll "$dir"/*.exe
31
- for file in COPYING NEWS.md README.md; do
32
- sed < $file > "$dir/${file}.txt" -e 's/$/\r/g'
33
- done
34
- (cd "$dir" && zip -q -r "../${zipfile}" .)
35
- done
36
-
37
- echo "Successfully generated release archives"
@@ -1,19 +0,0 @@
1
- #!/usr/bin/env python3
2
- #
3
- # This script computes a table that maps each byte to its bitwise reverse.
4
-
5
- def reverse_byte(v):
6
- return sum(1 << (7 - bit) for bit in range(8) if (v & (1 << bit)) != 0)
7
-
8
- tab = [reverse_byte(v) for v in range(256)]
9
-
10
- print('static const u8 bitreverse_tab[256] = {')
11
- for i in range(0, len(tab), 8):
12
- print('\t', end='')
13
- for j, v in enumerate(tab[i:i+8]):
14
- print(f'0x{v:02x},', end='')
15
- if j == 7:
16
- print('')
17
- else:
18
- print(' ', end='')
19
- print('};')
@@ -1,199 +0,0 @@
1
- /*
2
- * gen_crc32_multipliers.c
3
- *
4
- * Copyright 2016 Eric Biggers
5
- *
6
- * Permission is hereby granted, free of charge, to any person
7
- * obtaining a copy of this software and associated documentation
8
- * files (the "Software"), to deal in the Software without
9
- * restriction, including without limitation the rights to use,
10
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- * copies of the Software, and to permit persons to whom the
12
- * Software is furnished to do so, subject to the following
13
- * conditions:
14
- *
15
- * The above copyright notice and this permission notice shall be
16
- * included in all copies or substantial portions of the Software.
17
- *
18
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
- * OTHER DEALINGS IN THE SOFTWARE.
26
- */
27
-
28
- /*
29
- * This program computes the constant multipliers needed for "folding" over
30
- * various distances with the gzip CRC-32. Each such multiplier is x^D mod G(x)
31
- * for some distance D, in bits, over which the folding is occurring.
32
- *
33
- * Folding works as follows: let A(x) be a polynomial (possibly reduced
34
- * partially or fully mod G(x)) for part of the message, and let B(x) be a
35
- * polynomial (possibly reduced partially or fully mod G(x)) for a later part of
36
- * the message. The unreduced combined polynomial is A(x)*x^D + B(x), where D
37
- * is the number of bits separating the two parts of the message plus len(B(x)).
38
- * Since mod G(x) can be applied at any point, x^D mod G(x) can be precomputed
39
- * and used instead of x^D unreduced. That allows the combined polynomial to be
40
- * computed relatively easily in a partially-reduced form A(x)*(x^D mod G(x)) +
41
- * B(x), with length max(len(A(x)) + 31, len(B(x))). This does require doing a
42
- * polynomial multiplication (carryless multiplication).
43
- *
44
- * "Folding" in this way can be used for the entire CRC computation except the
45
- * final reduction to 32 bits; this works well when CPU support for carryless
46
- * multiplication is available. It can also be used to combine CRCs of
47
- * different parts of the message that were computed using a different method.
48
- *
49
- * Note that the gzip CRC-32 uses bit-reversed polynomials. I.e., the low order
50
- * bits are really the high order polynomial coefficients.
51
- */
52
-
53
- #include <inttypes.h>
54
- #include <stdio.h>
55
-
56
- #include "../common_defs.h"
57
-
58
- /* The generator polynomial G(x) for the gzip CRC-32 */
59
- #define CRCPOLY 0xEDB88320 /* G(x) without x^32 term */
60
- #define CRCPOLY_FULL (((u64)CRCPOLY << 1) | 1) /* G(x) */
61
-
62
- /* Compute x^D mod G(x) */
63
- static u32
64
- compute_xD_modG(size_t D)
65
- {
66
- /* Start with x^0 mod G(x) */
67
- u32 remainder = 0x80000000;
68
-
69
- /* Each iteration, 'remainder' becomes x^i mod G(x) */
70
- for (size_t i = 1; i <= D; i++)
71
- remainder = (remainder >> 1) ^ ((remainder & 1) ? CRCPOLY : 0);
72
-
73
- /* Now 'remainder' is x^D mod G(x) */
74
- return remainder;
75
- }
76
-
77
- /* Compute floor(x^64 / G(x)) */
78
- static u64
79
- compute_x64_div_G(void)
80
- {
81
- u64 quotient = 0;
82
- u64 dividend = 0x1;
83
-
84
- for (int i = 0; i < 64 - 32 + 1; i++) {
85
- if ((dividend >> i) & 1) {
86
- quotient |= (u64)1 << i;
87
- dividend ^= CRCPOLY_FULL << i;
88
- }
89
- }
90
-
91
- return quotient;
92
- }
93
-
94
- static void
95
- gen_vec_folding_constants(void)
96
- {
97
- /*
98
- * Compute the multipliers needed for CRC-32 folding with carryless
99
- * multiplication instructions that operate on the 64-bit halves of
100
- * 128-bit segments. Using the terminology from earlier, for each 64-bit
101
- * fold len(A(x)) = 64, and len(B(x)) = 95 since a 64-bit polynomial
102
- * multiplied by a 32-bit one produces a 95-bit one. When A(x) is the
103
- * low order polynomial half of a 128-bit segments (high order physical
104
- * half), the separation between the message parts is the total length
105
- * of the 128-bit segments separating the values. When A(x) is the high
106
- * order polynomial half, the separation is 64 bits greater.
107
- */
108
- for (int i = 1; i <= 32; i++) {
109
- const int sep_lo = 128 * (i - 1);
110
- const int sep_hi = sep_lo + 64;
111
- const int len_B = 95;
112
- int D;
113
-
114
- /* A(x) = high 64 polynomial bits (low 64 physical bits) */
115
- D = sep_hi + len_B;
116
- printf("#define CRC32_X%d_MODG 0x%08"PRIx32" /* x^%d mod G(x) */\n",
117
- D, compute_xD_modG(D), D);
118
-
119
- /* A(x) = low 64 polynomial bits (high 64 physical bits) */
120
- D = sep_lo + len_B;
121
- printf("#define CRC32_X%d_MODG 0x%08"PRIx32" /* x^%d mod G(x) */\n",
122
- D, compute_xD_modG(D), D);
123
- printf("\n");
124
- }
125
-
126
- /* Multiplier for final 96 => 64 bit fold */
127
- printf("#define CRC32_X63_MODG 0x%08"PRIx32" /* x^63 mod G(x) */\n",
128
- compute_xD_modG(63));
129
-
130
- /*
131
- * Constants for final 64 => 32 bit reduction. These constants are the
132
- * odd ones out, as this final reduction step can't use the regular CRC
133
- * folding described above. It uses Barrett reduction instead.
134
- */
135
- printf("#define CRC32_BARRETT_CONSTANT_1 0x%016"PRIx64"ULL /* floor(x^64 / G(x)) */\n",
136
- compute_x64_div_G());
137
- printf("#define CRC32_BARRETT_CONSTANT_2 0x%016"PRIx64"ULL /* G(x) */\n",
138
- CRCPOLY_FULL);
139
- printf("#define CRC32_BARRETT_CONSTANTS { CRC32_BARRETT_CONSTANT_1, CRC32_BARRETT_CONSTANT_2 }\n");
140
- }
141
-
142
- /* Multipliers for combining the CRCs of separate chunks */
143
- static void
144
- gen_chunk_constants(void)
145
- {
146
- const size_t num_chunks = 4;
147
- const size_t table_len = 129;
148
- const size_t min_chunk_len = 128;
149
-
150
- printf("#define CRC32_NUM_CHUNKS %zu\n", num_chunks);
151
- printf("#define CRC32_MIN_VARIABLE_CHUNK_LEN %zuUL\n", min_chunk_len);
152
- printf("#define CRC32_MAX_VARIABLE_CHUNK_LEN %zuUL\n",
153
- (table_len - 1) * min_chunk_len);
154
- printf("\n");
155
- printf("/* Multipliers for implementations that use a variable chunk length */\n");
156
- printf("static const u32 crc32_mults_for_chunklen[][CRC32_NUM_CHUNKS - 1] MAYBE_UNUSED = {\n");
157
- printf("\t{ 0 /* unused row */ },\n");
158
- for (size_t i = 1; i < table_len; i++) {
159
- const size_t chunk_len = i*min_chunk_len;
160
-
161
- printf("\t/* chunk_len=%zu */\n", chunk_len);
162
- printf("\t{ ");
163
- for (size_t j = num_chunks - 1; j >= 1; j--) {
164
- const size_t D = (j * 8 * chunk_len) - 33;
165
-
166
- printf("0x%08"PRIx32" /* x^%zu mod G(x) */, ",
167
- compute_xD_modG(D), D);
168
- }
169
- printf("},\n");
170
- }
171
- printf("};\n");
172
- printf("\n");
173
-
174
- printf("/* Multipliers for implementations that use a large fixed chunk length */\n");
175
- const size_t fixed_chunk_len = 32768;
176
- printf("#define CRC32_FIXED_CHUNK_LEN %zuUL\n", fixed_chunk_len);
177
- for (int j = 1; j < num_chunks; j++) {
178
- const size_t D = (j * 8 * fixed_chunk_len) - 33;
179
-
180
- printf("#define CRC32_FIXED_CHUNK_MULT_%d 0x%08"PRIx32" /* x^%zu mod G(x) */\n",
181
- j, compute_xD_modG(D), D);
182
- }
183
- }
184
-
185
- int
186
- main(void)
187
- {
188
- printf("/*\n"
189
- " * crc32_multipliers.h - constants for CRC-32 folding\n"
190
- " *\n"
191
- " * THIS FILE WAS GENERATED BY gen_crc32_multipliers.c. DO NOT EDIT.\n"
192
- " */\n"
193
- "\n");
194
-
195
- gen_vec_folding_constants();
196
- printf("\n");
197
- gen_chunk_constants();
198
- return 0;
199
- }