ob64 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +1 -1
- data/lib/ob64/version.rb +1 -1
- data/ob64.gemspec +2 -0
- data/vendor/libbase64/.gitignore +12 -0
- data/vendor/libbase64/.travis.yml +71 -0
- data/vendor/libbase64/CMakeLists.txt +264 -0
- data/vendor/libbase64/LICENSE +28 -0
- data/vendor/libbase64/Makefile +93 -0
- data/vendor/libbase64/README.md +474 -0
- data/vendor/libbase64/base64-benchmarks.png +0 -0
- data/vendor/libbase64/bin/base64.c +132 -0
- data/vendor/libbase64/cmake/Modules/TargetArch.cmake +29 -0
- data/vendor/libbase64/cmake/Modules/TargetSIMDInstructionSet.cmake +34 -0
- data/vendor/libbase64/cmake/base64-config.cmake.in +5 -0
- data/vendor/libbase64/cmake/config.h.in +25 -0
- data/vendor/libbase64/cmake/test-arch.c +35 -0
- data/vendor/libbase64/include/libbase64.h +145 -0
- data/vendor/libbase64/lib/arch/avx/codec.c +42 -0
- data/vendor/libbase64/lib/arch/avx2/codec.c +42 -0
- data/vendor/libbase64/lib/arch/avx2/dec_loop.c +110 -0
- data/vendor/libbase64/lib/arch/avx2/dec_reshuffle.c +34 -0
- data/vendor/libbase64/lib/arch/avx2/enc_loop.c +89 -0
- data/vendor/libbase64/lib/arch/avx2/enc_reshuffle.c +83 -0
- data/vendor/libbase64/lib/arch/avx2/enc_translate.c +30 -0
- data/vendor/libbase64/lib/arch/generic/32/dec_loop.c +86 -0
- data/vendor/libbase64/lib/arch/generic/32/enc_loop.c +73 -0
- data/vendor/libbase64/lib/arch/generic/64/enc_loop.c +77 -0
- data/vendor/libbase64/lib/arch/generic/codec.c +39 -0
- data/vendor/libbase64/lib/arch/generic/dec_head.c +37 -0
- data/vendor/libbase64/lib/arch/generic/dec_tail.c +91 -0
- data/vendor/libbase64/lib/arch/generic/enc_head.c +24 -0
- data/vendor/libbase64/lib/arch/generic/enc_tail.c +34 -0
- data/vendor/libbase64/lib/arch/neon32/codec.c +72 -0
- data/vendor/libbase64/lib/arch/neon32/dec_loop.c +106 -0
- data/vendor/libbase64/lib/arch/neon32/enc_loop.c +58 -0
- data/vendor/libbase64/lib/arch/neon32/enc_reshuffle.c +54 -0
- data/vendor/libbase64/lib/arch/neon32/enc_translate.c +57 -0
- data/vendor/libbase64/lib/arch/neon64/codec.c +70 -0
- data/vendor/libbase64/lib/arch/neon64/dec_loop.c +129 -0
- data/vendor/libbase64/lib/arch/neon64/enc_loop.c +66 -0
- data/vendor/libbase64/lib/arch/neon64/enc_reshuffle.c +54 -0
- data/vendor/libbase64/lib/arch/sse41/codec.c +42 -0
- data/vendor/libbase64/lib/arch/sse42/codec.c +42 -0
- data/vendor/libbase64/lib/arch/ssse3/codec.c +42 -0
- data/vendor/libbase64/lib/arch/ssse3/dec_loop.c +173 -0
- data/vendor/libbase64/lib/arch/ssse3/dec_reshuffle.c +33 -0
- data/vendor/libbase64/lib/arch/ssse3/enc_loop.c +67 -0
- data/vendor/libbase64/lib/arch/ssse3/enc_reshuffle.c +48 -0
- data/vendor/libbase64/lib/arch/ssse3/enc_translate.c +33 -0
- data/vendor/libbase64/lib/codec_choose.c +281 -0
- data/vendor/libbase64/lib/codecs.h +65 -0
- data/vendor/libbase64/lib/env.h +67 -0
- data/vendor/libbase64/lib/exports.txt +7 -0
- data/vendor/libbase64/lib/lib.c +164 -0
- data/vendor/libbase64/lib/lib_openmp.c +149 -0
- data/vendor/libbase64/lib/tables/.gitignore +1 -0
- data/vendor/libbase64/lib/tables/Makefile +17 -0
- data/vendor/libbase64/lib/tables/table_dec_32bit.h +393 -0
- data/vendor/libbase64/lib/tables/table_enc_12bit.h +1031 -0
- data/vendor/libbase64/lib/tables/table_enc_12bit.py +45 -0
- data/vendor/libbase64/lib/tables/table_generator.c +184 -0
- data/vendor/libbase64/lib/tables/tables.c +40 -0
- data/vendor/libbase64/lib/tables/tables.h +23 -0
- metadata +64 -4
@@ -0,0 +1,474 @@
|
|
1
|
+
# Fast Base64 stream encoder/decoder
|
2
|
+
|
3
|
+
[![Build Status](https://travis-ci.org/aklomp/base64.png?branch=master)](https://travis-ci.org/aklomp/base64)
|
4
|
+
|
5
|
+
This is an implementation of a base64 stream encoding/decoding library in C99
|
6
|
+
with SIMD (AVX2, NEON, AArch64/NEON, SSSE3, SSE4.1, SSE4.2, AVX) and
|
7
|
+
[OpenMP](http://www.openmp.org) acceleration. It also contains wrapper functions
|
8
|
+
to encode/decode simple length-delimited strings. This library aims to be:
|
9
|
+
|
10
|
+
- FAST;
|
11
|
+
- easy to use;
|
12
|
+
- elegant.
|
13
|
+
|
14
|
+
On x86, the library does runtime feature detection. The first time it's called,
|
15
|
+
the library will determine the appropriate encoding/decoding routines for the
|
16
|
+
machine. It then remembers them for the lifetime of the program. If your
|
17
|
+
processor supports AVX2, SSSE3, SSE4.1, SSE4.2 or AVX instructions, the library
|
18
|
+
will pick an optimized codec that lets it encode/decode 12 or 24 bytes at a
|
19
|
+
time, which gives a speedup of four or more times compared to the "plain"
|
20
|
+
bytewise codec.
|
21
|
+
|
22
|
+
NEON support is hardcoded to on or off at compile time, because portable
|
23
|
+
runtime feature detection is unavailable on ARM.
|
24
|
+
|
25
|
+
Even if your processor does not support SIMD instructions, this is a very fast
|
26
|
+
library. The fallback routine can process 32 or 64 bits of input in one round,
|
27
|
+
depending on your processor's word width, which still makes it significantly
|
28
|
+
faster than naive bytewise implementations. On some 64-bit machines, the 64-bit
|
29
|
+
routines even outperform the SSSE3 ones.
|
30
|
+
|
31
|
+
To the author's knowledge, at the time of original release, this was the only
|
32
|
+
Base64 library to offer SIMD acceleration. The author wrote
|
33
|
+
[an article](http://www.alfredklomp.com/programming/sse-base64) explaining one
|
34
|
+
possible SIMD approach to encoding/decoding Base64. The article can help figure
|
35
|
+
out what the code is doing, and why.
|
36
|
+
|
37
|
+
Notable features:
|
38
|
+
|
39
|
+
- Really fast on x86 and ARM systems by using SIMD vector processing;
|
40
|
+
- Can use [OpenMP](http://www.openmp.org) for even more parallel speedups;
|
41
|
+
- Really fast on other 32 or 64-bit platforms through optimized routines;
|
42
|
+
- Reads/writes blocks of streaming data;
|
43
|
+
- Does not dynamically allocate memory;
|
44
|
+
- Valid C99 that compiles with pedantic options on;
|
45
|
+
- Re-entrant and threadsafe;
|
46
|
+
- Unit tested;
|
47
|
+
- Uses Duff's Device.
|
48
|
+
|
49
|
+
## Acknowledgements
|
50
|
+
|
51
|
+
The original AVX2, NEON and Aarch64/NEON codecs were generously contributed by
|
52
|
+
[Inkymail](https://github.com/inkymail/base64), who, in their fork, also
|
53
|
+
implemented some additional features. Their work is slowly being backported
|
54
|
+
into this project.
|
55
|
+
|
56
|
+
The SSSE3 and AVX2 codecs were substantially improved by using some very clever
|
57
|
+
optimizations described by Wojciech Muła in a
|
58
|
+
[series](http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html) of
|
59
|
+
[articles](http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html).
|
60
|
+
His own code is [here](https://github.com/WojciechMula/toys/tree/master/base64).
|
61
|
+
|
62
|
+
The OpenMP implementation was added by Ferry Toth (@htot) from [Exalon Delft](http://www.exalondelft.nl).
|
63
|
+
|
64
|
+
## Building
|
65
|
+
|
66
|
+
The `lib` directory contains the code for the actual library.
|
67
|
+
Typing `make` in the toplevel directory will build `lib/libbase64.o` and `bin/base64`.
|
68
|
+
The first is a single, self-contained object file that you can link into your own project.
|
69
|
+
The second is a standalone test binary that works similarly to the `base64` system utility.
|
70
|
+
|
71
|
+
The matching header file needed to use this library is in `include/libbase64.h`.
|
72
|
+
|
73
|
+
To compile just the "plain" library without SIMD codecs, type:
|
74
|
+
|
75
|
+
```sh
|
76
|
+
make lib/libbase64.o
|
77
|
+
```
|
78
|
+
|
79
|
+
Optional SIMD codecs can be included by specifying the `AVX2_CFLAGS`, `NEON32_CFLAGS`, `NEON64_CFLAGS`,
|
80
|
+
`SSSE3_CFLAGS`, `SSE41_CFLAGS`, `SSE42_CFLAGS` and/or `AVX_CFLAGS` environment variables.
|
81
|
+
A typical build invocation on x86 looks like this:
|
82
|
+
|
83
|
+
```sh
|
84
|
+
AVX2_CFLAGS=-mavx2 SSSE3_CFLAGS=-mssse3 SSE41_CFLAGS=-msse4.1 SSE42_CFLAGS=-msse4.2 AVX_CFLAGS=-mavx make lib/libbase64.o
|
85
|
+
```
|
86
|
+
|
87
|
+
### AVX2
|
88
|
+
|
89
|
+
To build and include the AVX2 codec, set the `AVX2_CFLAGS` environment variable to a value that will turn on AVX2 support in your compiler, typically `-mavx2`.
|
90
|
+
Example:
|
91
|
+
|
92
|
+
```sh
|
93
|
+
AVX2_CFLAGS=-mavx2 make
|
94
|
+
```
|
95
|
+
|
96
|
+
The codec will only be used if runtime feature detection shows that the target machine supports AVX2.
|
97
|
+
|
98
|
+
### SSSE3
|
99
|
+
|
100
|
+
To build and include the SSSE3 codec, set the `SSSE3_CFLAGS` environment variable to a value that will turn on SSSE3 support in your compiler, typically `-mssse3`.
|
101
|
+
Example:
|
102
|
+
|
103
|
+
```sh
|
104
|
+
SSSE3_CFLAGS=-mssse3 make
|
105
|
+
```
|
106
|
+
|
107
|
+
The codec will only be used if runtime feature detection shows that the target machine supports SSSE3.
|
108
|
+
|
109
|
+
### NEON
|
110
|
+
|
111
|
+
This library includes two NEON codecs: one for regular 32-bit ARM and one for the 64-bit AArch64 with NEON, which has double the amount of SIMD registers and can do full 64-byte table lookups.
|
112
|
+
These codecs encode in 48-byte chunks and decode in massive 64-byte chunks, so they had to be augmented with an uint32/64 codec to stay fast on smaller inputs!
|
113
|
+
|
114
|
+
Use LLVM/Clang for compiling the NEON codecs.
|
115
|
+
The code generation of at least GCC 4.6 (the version shipped with Raspbian and used for testing) contains a bug when compiling `vstq4_u8()`, and the generated assembly code is of low quality.
|
116
|
+
NEON intrinsics are a known weak area of GCC.
|
117
|
+
Clang does a better job.
|
118
|
+
|
119
|
+
NEON support can unfortunately not be portably detected at runtime from userland (the `mrc` instruction is privileged), so the default value for using the NEON codec is determined at compile-time.
|
120
|
+
But you can do your own runtime detection.
|
121
|
+
You can include the NEON codec and make it the default, then do a runtime check if the CPU has NEON support, and if not, force a downgrade to non-NEON with `BASE64_FORCE_PLAIN`.
|
122
|
+
|
123
|
+
These are your options:
|
124
|
+
|
125
|
+
1. Don't include NEON support;
|
126
|
+
2. build NEON support and make it the default, but build all other code without NEON flags so that you can override the default at runtime with `BASE64_FORCE_PLAIN`;
|
127
|
+
3. build everything with NEON support and make it the default;
|
128
|
+
4. build everything with NEON support, but don't make it the default (which makes no sense).
|
129
|
+
|
130
|
+
For option 1, simply don't specify any NEON-specific compiler flags at all, like so:
|
131
|
+
|
132
|
+
```sh
|
133
|
+
CC=clang CFLAGS="-march=armv6" make
|
134
|
+
```
|
135
|
+
|
136
|
+
For option 2, keep your `CFLAGS` plain, but set the `NEON32_CFLAGS` environment variable to a value that will build NEON support.
|
137
|
+
The line below, for instance, will build all the code at ARMv6 level, except for the NEON codec, which is built at ARMv7.
|
138
|
+
It will also make the NEON codec the default.
|
139
|
+
For ARMv6 platforms, override that default at runtime with the `BASE64_FORCE_PLAIN` flag.
|
140
|
+
No ARMv7/NEON code will then be touched.
|
141
|
+
|
142
|
+
```sh
|
143
|
+
CC=clang CFLAGS="-march=armv6" NEON32_CFLAGS="-march=armv7 -mfpu=neon" make
|
144
|
+
```
|
145
|
+
|
146
|
+
For option 3, put everything in your `CFLAGS` and use a stub, but non-empty, `NEON32_CFLAGS`.
|
147
|
+
This example works for the Raspberry Pi 2B V1.1, which has NEON support:
|
148
|
+
|
149
|
+
```sh
|
150
|
+
CC=clang CFLAGS="-march=armv7 -mtune=cortex-a7" NEON32_CFLAGS="-mfpu=neon" make
|
151
|
+
```
|
152
|
+
|
153
|
+
To build and include the NEON64 codec, use `CFLAGS` as usual to define the platform and set `NEON64_CFLAGS` to a nonempty stub.
|
154
|
+
(The AArch64 target has mandatory NEON64 support.)
|
155
|
+
Example:
|
156
|
+
|
157
|
+
```sh
|
158
|
+
CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a" NEON64_CFLAGS=" " make
|
159
|
+
```
|
160
|
+
|
161
|
+
### OpenMP
|
162
|
+
|
163
|
+
To enable OpenMP on GCC you need to build with `-fopenmp`. This can be by setting the the `OPENMP` environment variable to `1`.
|
164
|
+
|
165
|
+
Example:
|
166
|
+
|
167
|
+
```sh
|
168
|
+
OPENMP=1 make
|
169
|
+
```
|
170
|
+
|
171
|
+
This will let the compiler define `_OPENMP`, which in turn will include the OpenMP optimized `lib_openmp.c` into `lib.c`.
|
172
|
+
|
173
|
+
By default the number of parallel threads will be equal to the number of cores of the processor.
|
174
|
+
On a quad core with hyperthreading eight cores will be detected, but hyperthreading will not increase the performance.
|
175
|
+
|
176
|
+
To get verbose information about OpenMP start the program with `OMP_DISPLAY_ENV=VERBOSE`, for instance
|
177
|
+
|
178
|
+
```sh
|
179
|
+
OMP_DISPLAY_ENV=VERBOSE test/benchmark
|
180
|
+
```
|
181
|
+
|
182
|
+
To put a limit on the number of threads, start the program with `OMP_THREAD_LIMIT=n`, for instance
|
183
|
+
|
184
|
+
```sh
|
185
|
+
OMP_THREAD_LIMIT=2 test/benchmark
|
186
|
+
```
|
187
|
+
|
188
|
+
An example of running a benchmark with OpenMP, SSSE3 and AVX2 enabled:
|
189
|
+
|
190
|
+
```sh
|
191
|
+
make clean && OPENMP=1 SSSE3_CFLAGS=-mssse3 AVX2_CFLAGS=-mavx2 make && OPENMP=1 make -C test
|
192
|
+
```
|
193
|
+
|
194
|
+
## API reference
|
195
|
+
|
196
|
+
Strings are represented as a pointer and a length; they are not
|
197
|
+
zero-terminated. This was a conscious design decision. In the decoding step,
|
198
|
+
relying on zero-termination would make no sense since the output could contain
|
199
|
+
legitimate zero bytes. In the encoding step, returning the length saves the
|
200
|
+
overhead of calling `strlen()` on the output. If you insist on the trailing
|
201
|
+
zero, you can easily add it yourself at the given offset.
|
202
|
+
|
203
|
+
### Flags
|
204
|
+
|
205
|
+
Some API calls take a `flags` argument.
|
206
|
+
That argument can be used to force the use of a specific codec, even if that codec is a no-op in the current build.
|
207
|
+
Mainly there for testing purposes, this is also useful on ARM where the only way to do runtime NEON detection is to ask the OS if it's available.
|
208
|
+
The following constants can be used:
|
209
|
+
|
210
|
+
- `BASE64_FORCE_AVX2`
|
211
|
+
- `BASE64_FORCE_NEON32`
|
212
|
+
- `BASE64_FORCE_NEON64`
|
213
|
+
- `BASE64_FORCE_PLAIN`
|
214
|
+
- `BASE64_FORCE_SSSE3`
|
215
|
+
- `BASE64_FORCE_SSE41`
|
216
|
+
- `BASE64_FORCE_SSE42`
|
217
|
+
- `BASE64_FORCE_AVX`
|
218
|
+
|
219
|
+
Set `flags` to `0` for the default behavior, which is runtime feature detection on x86, a compile-time fixed codec on ARM, and the plain codec on other platforms.
|
220
|
+
|
221
|
+
### Encoding
|
222
|
+
|
223
|
+
#### base64_encode
|
224
|
+
|
225
|
+
```c
|
226
|
+
void base64_encode
|
227
|
+
( const char *src
|
228
|
+
, size_t srclen
|
229
|
+
, char *out
|
230
|
+
, size_t *outlen
|
231
|
+
, int flags
|
232
|
+
) ;
|
233
|
+
```
|
234
|
+
|
235
|
+
Wrapper function to encode a plain string of given length.
|
236
|
+
Output is written to `out` without trailing zero.
|
237
|
+
Output length in bytes is written to `outlen`.
|
238
|
+
The buffer in `out` has been allocated by the caller and is at least 4/3 the size of the input.
|
239
|
+
|
240
|
+
#### base64_stream_encode_init
|
241
|
+
|
242
|
+
```c
|
243
|
+
void base64_stream_encode_init
|
244
|
+
( struct base64_state *state
|
245
|
+
, int flags
|
246
|
+
) ;
|
247
|
+
```
|
248
|
+
|
249
|
+
Call this before calling `base64_stream_encode()` to init the state.
|
250
|
+
|
251
|
+
#### base64_stream_encode
|
252
|
+
|
253
|
+
```c
|
254
|
+
void base64_stream_encode
|
255
|
+
( struct base64_state *state
|
256
|
+
, const char *src
|
257
|
+
, size_t srclen
|
258
|
+
, char *out
|
259
|
+
, size_t *outlen
|
260
|
+
) ;
|
261
|
+
```
|
262
|
+
|
263
|
+
Encodes the block of data of given length at `src`, into the buffer at `out`.
|
264
|
+
Caller is responsible for allocating a large enough out-buffer; it must be at least 4/3 the size of the in-buffer, but take some margin.
|
265
|
+
Places the number of new bytes written into `outlen` (which is set to zero when the function starts).
|
266
|
+
Does not zero-terminate or finalize the output.
|
267
|
+
|
268
|
+
#### base64_stream_encode_final
|
269
|
+
|
270
|
+
```c
|
271
|
+
void base64_stream_encode_final
|
272
|
+
( struct base64_state *state
|
273
|
+
, char *out
|
274
|
+
, size_t *outlen
|
275
|
+
) ;
|
276
|
+
```
|
277
|
+
|
278
|
+
Finalizes the output begun by previous calls to `base64_stream_encode()`.
|
279
|
+
Adds the required end-of-stream markers if appropriate.
|
280
|
+
`outlen` is modified and will contain the number of new bytes written at `out` (which will quite often be zero).
|
281
|
+
|
282
|
+
### Decoding
|
283
|
+
|
284
|
+
#### base64_decode
|
285
|
+
|
286
|
+
```c
|
287
|
+
int base64_decode
|
288
|
+
( const char *src
|
289
|
+
, size_t srclen
|
290
|
+
, char *out
|
291
|
+
, size_t *outlen
|
292
|
+
, int flags
|
293
|
+
) ;
|
294
|
+
```
|
295
|
+
|
296
|
+
Wrapper function to decode a plain string of given length.
|
297
|
+
Output is written to `out` without trailing zero. Output length in bytes is written to `outlen`.
|
298
|
+
The buffer in `out` has been allocated by the caller and is at least 3/4 the size of the input.
|
299
|
+
Returns `1` for success, and `0` when a decode error has occured due to invalid input.
|
300
|
+
Returns `-1` if the chosen codec is not included in the current build.
|
301
|
+
|
302
|
+
#### base64_stream_decode_init
|
303
|
+
|
304
|
+
```c
|
305
|
+
void base64_stream_decode_init
|
306
|
+
( struct base64_state *state
|
307
|
+
, int flags
|
308
|
+
) ;
|
309
|
+
```
|
310
|
+
|
311
|
+
Call this before calling `base64_stream_decode()` to init the state.
|
312
|
+
|
313
|
+
#### base64_stream_decode
|
314
|
+
|
315
|
+
```c
|
316
|
+
int base64_stream_decode
|
317
|
+
( struct base64_state *state
|
318
|
+
, const char *src
|
319
|
+
, size_t srclen
|
320
|
+
, char *out
|
321
|
+
, size_t *outlen
|
322
|
+
) ;
|
323
|
+
```
|
324
|
+
|
325
|
+
Decodes the block of data of given length at `src`, into the buffer at `out`.
|
326
|
+
Caller is responsible for allocating a large enough out-buffer; it must be at least 3/4 the size of the in-buffer, but take some margin.
|
327
|
+
Places the number of new bytes written into `outlen` (which is set to zero when the function starts).
|
328
|
+
Does not zero-terminate the output.
|
329
|
+
Returns 1 if all is well, and 0 if a decoding error was found, such as an invalid character.
|
330
|
+
Returns -1 if the chosen codec is not included in the current build.
|
331
|
+
Used by the test harness to check whether a codec is available for testing.
|
332
|
+
|
333
|
+
## Examples
|
334
|
+
|
335
|
+
A simple example of encoding a static string to base64 and printing the output
|
336
|
+
to stdout:
|
337
|
+
|
338
|
+
```c
|
339
|
+
#include <stdio.h> /* fwrite */
|
340
|
+
#include "libbase64.h"
|
341
|
+
|
342
|
+
int main ()
|
343
|
+
{
|
344
|
+
char src[] = "hello world";
|
345
|
+
char out[20];
|
346
|
+
size_t srclen = sizeof(src) - 1;
|
347
|
+
size_t outlen;
|
348
|
+
|
349
|
+
base64_encode(src, srclen, out, &outlen, 0);
|
350
|
+
|
351
|
+
fwrite(out, outlen, 1, stdout);
|
352
|
+
|
353
|
+
return 0;
|
354
|
+
}
|
355
|
+
```
|
356
|
+
|
357
|
+
A simple example (no error checking, etc) of stream encoding standard input to
|
358
|
+
standard output:
|
359
|
+
|
360
|
+
```c
|
361
|
+
#include <stdio.h>
|
362
|
+
#include "libbase64.h"
|
363
|
+
|
364
|
+
int main ()
|
365
|
+
{
|
366
|
+
size_t nread, nout;
|
367
|
+
char buf[12000], out[16000];
|
368
|
+
struct base64_state state;
|
369
|
+
|
370
|
+
// Initialize stream encoder:
|
371
|
+
base64_stream_encode_init(&state, 0);
|
372
|
+
|
373
|
+
// Read contents of stdin into buffer:
|
374
|
+
while ((nread = fread(buf, 1, sizeof(buf), stdin)) > 0) {
|
375
|
+
|
376
|
+
// Encode buffer:
|
377
|
+
base64_stream_encode(&state, buf, nread, out, &nout);
|
378
|
+
|
379
|
+
// If there's output, print it to stdout:
|
380
|
+
if (nout) {
|
381
|
+
fwrite(out, nout, 1, stdout);
|
382
|
+
}
|
383
|
+
|
384
|
+
// If an error occurred, exit the loop:
|
385
|
+
if (feof(stdin)) {
|
386
|
+
break;
|
387
|
+
}
|
388
|
+
}
|
389
|
+
|
390
|
+
// Finalize encoding:
|
391
|
+
base64_stream_encode_final(&state, out, &nout);
|
392
|
+
|
393
|
+
// If the finalizing resulted in extra output bytes, print them:
|
394
|
+
if (nout) {
|
395
|
+
fwrite(out, nout, 1, stdout);
|
396
|
+
}
|
397
|
+
|
398
|
+
return 0;
|
399
|
+
}
|
400
|
+
```
|
401
|
+
|
402
|
+
Also see `bin/base64.c` for a simple re-implementation of the `base64` utility.
|
403
|
+
A file or standard input is fed through the encoder/decoder, and the output is
|
404
|
+
written to standard output.
|
405
|
+
|
406
|
+
## Tests
|
407
|
+
|
408
|
+
See `tests/` for a small test suite. Testing is automated with [Travis
|
409
|
+
CI](https://travis-ci.org/aklomp/base64), which builds and tests the code
|
410
|
+
across various architectures.
|
411
|
+
|
412
|
+
## Benchmarks
|
413
|
+
|
414
|
+
Benchmarks can be run with the built-in benchmark program as follows:
|
415
|
+
|
416
|
+
```sh
|
417
|
+
make -C test benchmark <buildflags> && test/benchmark
|
418
|
+
```
|
419
|
+
|
420
|
+
It will run an encoding and decoding benchmark for all of the compiled-in codecs.
|
421
|
+
|
422
|
+
The tables below contain some results on random machines. All numbers measured with a 10MB buffer in MB/sec, rounded to the nearest integer.
|
423
|
+
|
424
|
+
\*: Update needed
|
425
|
+
|
426
|
+
x86 processors
|
427
|
+
|
428
|
+
| Processor | Plain enc | Plain dec | SSSE3 enc | SSSE3 dec | AVX enc | AVX dec | AVX2 enc | AVX2 dec |
|
429
|
+
|-------------------------------------------|----------:|----------:|----------:|----------:|--------:|--------:|---------:|---------:|
|
430
|
+
| i7-4771 @ 3.5 GHz | 833\* | 1111\* | 3333\* | 4444\* | TBD | TBD | 4999\* | 6666\* |
|
431
|
+
| i7-4770 @ 3.4 GHz DDR1600 | 1790\* | 3038\* | 4899\* | 4043\* | 4796\* | 5709\* | 4681\* | 6386\* |
|
432
|
+
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 1 thread | 1784\* | 3041\* | 4945\* | 4035\* | 4776\* | 5719\* | 4661\* | 6294\* |
|
433
|
+
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 2 thread | 3401\* | 5729\* | 5489\* | 7444\* | 5003\* | 8624\* | 5105\* | 8558\* |
|
434
|
+
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 4 thread | 4884\* | 7099\* | 4917\* | 7057\* | 4799\* | 7143\* | 4902\* | 7219\* |
|
435
|
+
| i7-4770 @ 3.4 GHz DDR1600 OPENMP 8 thread | 5212\* | 8849\* | 5284\* | 9099\* | 5289\* | 9220\* | 4849\* | 9200\* |
|
436
|
+
| i7-4870HQ @ 2.5 GHz | 1471\* | 3066\* | 6721\* | 6962\* | 7015\* | 8267\* | 8328\* | 11576\* |
|
437
|
+
| i5-4590S @ 3.0 GHz | 3356 | 3197 | 4363 | 6104 | 4243 | 6233 | 4160 | 6344 |
|
438
|
+
| Xeon X5570 @ 2.93 GHz | 2161 | 1508 | 3160 | 3915 | - | - | - | - |
|
439
|
+
| Pentium4 @ 3.4 GHz | 896 | 740 | - | - | - | - | - | - |
|
440
|
+
| Atom N270 | 243 | 266 | 508 | 387 | - | - | - | - |
|
441
|
+
| AMD E-450 | 645 | 564 | 625 | 634 | - | - | - | - |
|
442
|
+
| Intel Edison @ 500 MHz | 79\* | 92\* | 152\* | 172\* | - | - | - | - |
|
443
|
+
| Intel Edison @ 500 MHz OPENMP 2 thread | 158\* | 184\* | 300\* | 343\* | - | - | - | - |
|
444
|
+
| Intel Edison @ 500 MHz (x86-64) | 97\* | 146\* | 197\* | 207\* | - | - | - | - |
|
445
|
+
| Intel Edison @ 500 MHz (x86-64) 2 thread | 193\* | 288\* | 389\* | 410\* | - | - | - | - |
|
446
|
+
|
447
|
+
ARM processors
|
448
|
+
|
449
|
+
| Processor | Plain enc | Plain dec | NEON32 enc | NEON32 dec | NEON64 enc | NEON64 dec |
|
450
|
+
|-------------------------------------------|----------:|----------:|-----------:|-----------:|-----------:|-----------:|
|
451
|
+
| Raspberry PI B+ V1.2 | 46\* | 40\* | - | - | - | - |
|
452
|
+
| Raspberry PI 2 B V1.1 | 85 | 141 | 282 | 225 | - | - |
|
453
|
+
| Apple iPhone SE armv7 | 1056\* | 895\* | 2943\* | 2618\* | - | - |
|
454
|
+
| Apple iPhone SE arm64 | 1061\* | 1239\* | - | - | 4098\* | 3983\* |
|
455
|
+
|
456
|
+
PowerPC processors
|
457
|
+
|
458
|
+
| Processor | Plain enc | Plain dec |
|
459
|
+
|-------------------------------------------|----------:|----------:|
|
460
|
+
| PowerPC E6500 @ 1.8GHz | 270\* | 265\* |
|
461
|
+
|
462
|
+
|
463
|
+
Benchmarks on i7-4770 @ 3.4 GHz DDR1600 with varrying buffer sizes:
|
464
|
+
![Benchmarks](base64-benchmarks.png)
|
465
|
+
|
466
|
+
Note: optimal buffer size to take advantage of the cache is in the range of 100 kB to 1 MB, leading to 12x faster AVX encoding/decoding compared to Plain, or a throughput of 24/27GB/sec.
|
467
|
+
Also note the performance degradation when the buffer size is less than 10 kB due to thread creation overhead.
|
468
|
+
To prevent this from happening `lib_openmp.c` defines `OMP_THRESHOLD 20000`, requiring at least a 20000 byte buffer to enable multithreading.
|
469
|
+
|
470
|
+
## License
|
471
|
+
|
472
|
+
This repository is licensed under the
|
473
|
+
[BSD 2-clause License](http://opensource.org/licenses/BSD-2-Clause). See the
|
474
|
+
LICENSE file.
|
Binary file
|
@@ -0,0 +1,132 @@
|
|
1
|
+
#include <stddef.h> // size_t
|
2
|
+
#include <stdio.h> // fopen()
|
3
|
+
#include <string.h> // strlen()
|
4
|
+
#include <getopt.h>
|
5
|
+
#include "../include/libbase64.h"
|
6
|
+
|
7
|
+
#define BUFSIZE 1024 * 1024
|
8
|
+
|
9
|
+
static char buf[BUFSIZE];
|
10
|
+
static char out[(BUFSIZE * 5) / 3]; // Technically 4/3 of input, but take some margin
|
11
|
+
size_t nread;
|
12
|
+
size_t nout;
|
13
|
+
|
14
|
+
static int
|
15
|
+
enc (FILE *fp)
|
16
|
+
{
|
17
|
+
int ret = 1;
|
18
|
+
struct base64_state state;
|
19
|
+
size_t acc = 0;
|
20
|
+
|
21
|
+
base64_stream_encode_init(&state, 0);
|
22
|
+
|
23
|
+
while ((nread = fread(buf, 1, BUFSIZE, fp)) > 0) {
|
24
|
+
base64_stream_encode(&state, buf, nread, out + acc, &nout);
|
25
|
+
if (nout) {
|
26
|
+
fwrite(out + acc, nout, 1, stdout);
|
27
|
+
acc += nout;
|
28
|
+
}
|
29
|
+
if (feof(fp)) {
|
30
|
+
break;
|
31
|
+
}
|
32
|
+
}
|
33
|
+
if (ferror(fp)) {
|
34
|
+
fprintf(stderr, "read error\n");
|
35
|
+
ret = 0;
|
36
|
+
goto out;
|
37
|
+
}
|
38
|
+
base64_stream_encode_final(&state, out + acc, &nout);
|
39
|
+
|
40
|
+
if (nout) {
|
41
|
+
fwrite(out + acc, nout, 1, stdout);
|
42
|
+
}
|
43
|
+
out: fclose(fp);
|
44
|
+
fclose(stdout);
|
45
|
+
return ret;
|
46
|
+
}
|
47
|
+
|
48
|
+
static int
|
49
|
+
dec (FILE *fp)
|
50
|
+
{
|
51
|
+
int ret = 1;
|
52
|
+
struct base64_state state;
|
53
|
+
size_t acc = 0;
|
54
|
+
|
55
|
+
base64_stream_decode_init(&state, 0);
|
56
|
+
|
57
|
+
while ((nread = fread(buf, 1, BUFSIZE, fp)) > 0) {
|
58
|
+
if (!base64_stream_decode(&state, buf, nread, out + acc, &nout)) {
|
59
|
+
fprintf(stderr, "decoding error\n");
|
60
|
+
ret = 0;
|
61
|
+
goto out;
|
62
|
+
}
|
63
|
+
if (nout) {
|
64
|
+
fwrite(out + acc, nout, 1, stdout);
|
65
|
+
acc += nout;
|
66
|
+
}
|
67
|
+
if (feof(fp)) {
|
68
|
+
break;
|
69
|
+
}
|
70
|
+
}
|
71
|
+
if (ferror(fp)) {
|
72
|
+
fprintf(stderr, "read error\n");
|
73
|
+
ret = 0;
|
74
|
+
}
|
75
|
+
out: fclose(fp);
|
76
|
+
fclose(stdout);
|
77
|
+
return ret;
|
78
|
+
}
|
79
|
+
|
80
|
+
int
|
81
|
+
main (int argc, char **argv)
|
82
|
+
{
|
83
|
+
char *file;
|
84
|
+
FILE *fp;
|
85
|
+
int decode = 0;
|
86
|
+
|
87
|
+
// Parse options:
|
88
|
+
for (;;)
|
89
|
+
{
|
90
|
+
int c;
|
91
|
+
int opt_index = 0;
|
92
|
+
static struct option opt_long[] = {
|
93
|
+
{ "decode", 0, 0, 'd' },
|
94
|
+
{ 0, 0, 0, 0 }
|
95
|
+
};
|
96
|
+
if ((c = getopt_long(argc, argv, "d", opt_long, &opt_index)) == -1) {
|
97
|
+
break;
|
98
|
+
}
|
99
|
+
switch (c)
|
100
|
+
{
|
101
|
+
case 'd':
|
102
|
+
decode = 1;
|
103
|
+
break;
|
104
|
+
}
|
105
|
+
}
|
106
|
+
|
107
|
+
// No options left on command line? Read from stdin:
|
108
|
+
if (optind >= argc) {
|
109
|
+
fp = stdin;
|
110
|
+
}
|
111
|
+
|
112
|
+
// One option left on command line? Treat it as a file:
|
113
|
+
else if (optind + 1 == argc) {
|
114
|
+
file = argv[optind];
|
115
|
+
if (strcmp(file, "-") == 0) {
|
116
|
+
fp = stdin;
|
117
|
+
}
|
118
|
+
else if ((fp = fopen(file, "rb")) == NULL) {
|
119
|
+
printf("cannot open %s\n", file);
|
120
|
+
return 1;
|
121
|
+
}
|
122
|
+
}
|
123
|
+
|
124
|
+
// More than one option left on command line? Syntax error:
|
125
|
+
else {
|
126
|
+
printf("Usage: %s <file>\n", argv[0]);
|
127
|
+
return 1;
|
128
|
+
}
|
129
|
+
|
130
|
+
// Invert return codes to create shell return code:
|
131
|
+
return (decode) ? !dec(fp) : !enc(fp);
|
132
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Written in 2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
|
2
|
+
#
|
3
|
+
# To the extent possible under law, the author(s) have dedicated all
|
4
|
+
# copyright and related and neighboring rights to this software to the
|
5
|
+
# public domain worldwide. This software is distributed without any warranty.
|
6
|
+
#
|
7
|
+
# You should have received a copy of the CC0 Public Domain Dedication
|
8
|
+
# along with this software. If not, see
|
9
|
+
#
|
10
|
+
# http://creativecommons.org/publicdomain/zero/1.0/
|
11
|
+
#
|
12
|
+
########################################################################
|
13
|
+
|
14
|
+
set(TARGET_ARCHITECTURE_TEST_FILE "${CMAKE_CURRENT_LIST_DIR}/../test-arch.c")
|
15
|
+
|
16
|
+
function(detect_target_architecture OUTPUT_VARIABLE)
|
17
|
+
message(STATUS "${CMAKE_CURRENT_LIST_DIR}")
|
18
|
+
try_compile(_IGNORED "${CMAKE_CURRENT_BINARY_DIR}"
|
19
|
+
"${TARGET_ARCHITECTURE_TEST_FILE}"
|
20
|
+
OUTPUT_VARIABLE _LOG
|
21
|
+
)
|
22
|
+
|
23
|
+
string(REGEX MATCH "##arch=([^#]+)##" _IGNORED "${_LOG}")
|
24
|
+
|
25
|
+
set(${OUTPUT_VARIABLE} "${CMAKE_MATCH_1}" PARENT_SCOPE)
|
26
|
+
if (CMAKE_MATCH_1 STREQUAL "unknown")
|
27
|
+
message(WARNING "could not detect the target architecture.")
|
28
|
+
endif()
|
29
|
+
endfunction()
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Written in 2016-2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
|
2
|
+
#
|
3
|
+
# To the extent possible under law, the author(s) have dedicated all
|
4
|
+
# copyright and related and neighboring rights to this software to the
|
5
|
+
# public domain worldwide. This software is distributed without any warranty.
|
6
|
+
#
|
7
|
+
# You should have received a copy of the CC0 Public Domain Dedication
|
8
|
+
# along with this software. If not, see
|
9
|
+
#
|
10
|
+
# http://creativecommons.org/publicdomain/zero/1.0/
|
11
|
+
#
|
12
|
+
########################################################################
|
13
|
+
|
14
|
+
########################################################################
|
15
|
+
# compiler flags definition
|
16
|
+
macro(define_SIMD_compile_flags)
|
17
|
+
if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
18
|
+
# x86
|
19
|
+
set(COMPILE_FLAGS_SSSE3 "-mssse3")
|
20
|
+
set(COMPILE_FLAGS_SSE41 "-msse4.1")
|
21
|
+
set(COMPILE_FLAGS_SSE42 "-msse4.2")
|
22
|
+
set(COMPILE_FLAGS_AVX "-mavx")
|
23
|
+
set(COMPILE_FLAGS_AVX2 "-mavx2")
|
24
|
+
|
25
|
+
#arm
|
26
|
+
set(COMPILE_FLAGS_NEON32 "-mfpu=neon")
|
27
|
+
elseif(MSVC)
|
28
|
+
set(COMPILE_FLAGS_SSSE3 " ")
|
29
|
+
set(COMPILE_FLAGS_SSE41 " ")
|
30
|
+
set(COMPILE_FLAGS_SSE42 " ")
|
31
|
+
set(COMPILE_FLAGS_AVX "/arch:AVX2")
|
32
|
+
set(COMPILE_FLAGS_AVX2 "/arch:AVX2")
|
33
|
+
endif()
|
34
|
+
endmacro(define_SIMD_compile_flags)
|