ob64 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +1 -1
  4. data/lib/ob64/version.rb +1 -1
  5. data/ob64.gemspec +2 -0
  6. data/vendor/libbase64/.gitignore +12 -0
  7. data/vendor/libbase64/.travis.yml +71 -0
  8. data/vendor/libbase64/CMakeLists.txt +264 -0
  9. data/vendor/libbase64/LICENSE +28 -0
  10. data/vendor/libbase64/Makefile +93 -0
  11. data/vendor/libbase64/README.md +474 -0
  12. data/vendor/libbase64/base64-benchmarks.png +0 -0
  13. data/vendor/libbase64/bin/base64.c +132 -0
  14. data/vendor/libbase64/cmake/Modules/TargetArch.cmake +29 -0
  15. data/vendor/libbase64/cmake/Modules/TargetSIMDInstructionSet.cmake +34 -0
  16. data/vendor/libbase64/cmake/base64-config.cmake.in +5 -0
  17. data/vendor/libbase64/cmake/config.h.in +25 -0
  18. data/vendor/libbase64/cmake/test-arch.c +35 -0
  19. data/vendor/libbase64/include/libbase64.h +145 -0
  20. data/vendor/libbase64/lib/arch/avx/codec.c +42 -0
  21. data/vendor/libbase64/lib/arch/avx2/codec.c +42 -0
  22. data/vendor/libbase64/lib/arch/avx2/dec_loop.c +110 -0
  23. data/vendor/libbase64/lib/arch/avx2/dec_reshuffle.c +34 -0
  24. data/vendor/libbase64/lib/arch/avx2/enc_loop.c +89 -0
  25. data/vendor/libbase64/lib/arch/avx2/enc_reshuffle.c +83 -0
  26. data/vendor/libbase64/lib/arch/avx2/enc_translate.c +30 -0
  27. data/vendor/libbase64/lib/arch/generic/32/dec_loop.c +86 -0
  28. data/vendor/libbase64/lib/arch/generic/32/enc_loop.c +73 -0
  29. data/vendor/libbase64/lib/arch/generic/64/enc_loop.c +77 -0
  30. data/vendor/libbase64/lib/arch/generic/codec.c +39 -0
  31. data/vendor/libbase64/lib/arch/generic/dec_head.c +37 -0
  32. data/vendor/libbase64/lib/arch/generic/dec_tail.c +91 -0
  33. data/vendor/libbase64/lib/arch/generic/enc_head.c +24 -0
  34. data/vendor/libbase64/lib/arch/generic/enc_tail.c +34 -0
  35. data/vendor/libbase64/lib/arch/neon32/codec.c +72 -0
  36. data/vendor/libbase64/lib/arch/neon32/dec_loop.c +106 -0
  37. data/vendor/libbase64/lib/arch/neon32/enc_loop.c +58 -0
  38. data/vendor/libbase64/lib/arch/neon32/enc_reshuffle.c +54 -0
  39. data/vendor/libbase64/lib/arch/neon32/enc_translate.c +57 -0
  40. data/vendor/libbase64/lib/arch/neon64/codec.c +70 -0
  41. data/vendor/libbase64/lib/arch/neon64/dec_loop.c +129 -0
  42. data/vendor/libbase64/lib/arch/neon64/enc_loop.c +66 -0
  43. data/vendor/libbase64/lib/arch/neon64/enc_reshuffle.c +54 -0
  44. data/vendor/libbase64/lib/arch/sse41/codec.c +42 -0
  45. data/vendor/libbase64/lib/arch/sse42/codec.c +42 -0
  46. data/vendor/libbase64/lib/arch/ssse3/codec.c +42 -0
  47. data/vendor/libbase64/lib/arch/ssse3/dec_loop.c +173 -0
  48. data/vendor/libbase64/lib/arch/ssse3/dec_reshuffle.c +33 -0
  49. data/vendor/libbase64/lib/arch/ssse3/enc_loop.c +67 -0
  50. data/vendor/libbase64/lib/arch/ssse3/enc_reshuffle.c +48 -0
  51. data/vendor/libbase64/lib/arch/ssse3/enc_translate.c +33 -0
  52. data/vendor/libbase64/lib/codec_choose.c +281 -0
  53. data/vendor/libbase64/lib/codecs.h +65 -0
  54. data/vendor/libbase64/lib/env.h +67 -0
  55. data/vendor/libbase64/lib/exports.txt +7 -0
  56. data/vendor/libbase64/lib/lib.c +164 -0
  57. data/vendor/libbase64/lib/lib_openmp.c +149 -0
  58. data/vendor/libbase64/lib/tables/.gitignore +1 -0
  59. data/vendor/libbase64/lib/tables/Makefile +17 -0
  60. data/vendor/libbase64/lib/tables/table_dec_32bit.h +393 -0
  61. data/vendor/libbase64/lib/tables/table_enc_12bit.h +1031 -0
  62. data/vendor/libbase64/lib/tables/table_enc_12bit.py +45 -0
  63. data/vendor/libbase64/lib/tables/table_generator.c +184 -0
  64. data/vendor/libbase64/lib/tables/tables.c +40 -0
  65. data/vendor/libbase64/lib/tables/tables.h +23 -0
  66. metadata +64 -4
@@ -0,0 +1,474 @@
1
+ # Fast Base64 stream encoder/decoder
2
+
3
+ [![Build Status](https://travis-ci.org/aklomp/base64.png?branch=master)](https://travis-ci.org/aklomp/base64)
4
+
5
+ This is an implementation of a base64 stream encoding/decoding library in C99
6
+ with SIMD (AVX2, NEON, AArch64/NEON, SSSE3, SSE4.1, SSE4.2, AVX) and
7
+ [OpenMP](http://www.openmp.org) acceleration. It also contains wrapper functions
8
+ to encode/decode simple length-delimited strings. This library aims to be:
9
+
10
+ - FAST;
11
+ - easy to use;
12
+ - elegant.
13
+
14
+ On x86, the library does runtime feature detection. The first time it's called,
15
+ the library will determine the appropriate encoding/decoding routines for the
16
+ machine. It then remembers them for the lifetime of the program. If your
17
+ processor supports AVX2, SSSE3, SSE4.1, SSE4.2 or AVX instructions, the library
18
+ will pick an optimized codec that lets it encode/decode 12 or 24 bytes at a
19
+ time, which gives a speedup of four or more times compared to the "plain"
20
+ bytewise codec.
21
+
22
+ NEON support is hardcoded to on or off at compile time, because portable
23
+ runtime feature detection is unavailable on ARM.
24
+
25
+ Even if your processor does not support SIMD instructions, this is a very fast
26
+ library. The fallback routine can process 32 or 64 bits of input in one round,
27
+ depending on your processor's word width, which still makes it significantly
28
+ faster than naive bytewise implementations. On some 64-bit machines, the 64-bit
29
+ routines even outperform the SSSE3 ones.
30
+
31
+ To the author's knowledge, at the time of original release, this was the only
32
+ Base64 library to offer SIMD acceleration. The author wrote
33
+ [an article](http://www.alfredklomp.com/programming/sse-base64) explaining one
34
+ possible SIMD approach to encoding/decoding Base64. The article can help figure
35
+ out what the code is doing, and why.
36
+
37
+ Notable features:
38
+
39
+ - Really fast on x86 and ARM systems by using SIMD vector processing;
40
+ - Can use [OpenMP](http://www.openmp.org) for even more parallel speedups;
41
+ - Really fast on other 32 or 64-bit platforms through optimized routines;
42
+ - Reads/writes blocks of streaming data;
43
+ - Does not dynamically allocate memory;
44
+ - Valid C99 that compiles with pedantic options on;
45
+ - Re-entrant and threadsafe;
46
+ - Unit tested;
47
+ - Uses Duff's Device.
48
+
49
+ ## Acknowledgements
50
+
51
+ The original AVX2, NEON and Aarch64/NEON codecs were generously contributed by
52
+ [Inkymail](https://github.com/inkymail/base64), who, in their fork, also
53
+ implemented some additional features. Their work is slowly being backported
54
+ into this project.
55
+
56
+ The SSSE3 and AVX2 codecs were substantially improved by using some very clever
57
+ optimizations described by Wojciech Muła in a
58
+ [series](http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html) of
59
+ [articles](http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html).
60
+ His own code is [here](https://github.com/WojciechMula/toys/tree/master/base64).
61
+
62
+ The OpenMP implementation was added by Ferry Toth (@htot) from [Exalon Delft](http://www.exalondelft.nl).
63
+
64
+ ## Building
65
+
66
+ The `lib` directory contains the code for the actual library.
67
+ Typing `make` in the toplevel directory will build `lib/libbase64.o` and `bin/base64`.
68
+ The first is a single, self-contained object file that you can link into your own project.
69
+ The second is a standalone test binary that works similarly to the `base64` system utility.
70
+
71
+ The matching header file needed to use this library is in `include/libbase64.h`.
72
+
73
+ To compile just the "plain" library without SIMD codecs, type:
74
+
75
+ ```sh
76
+ make lib/libbase64.o
77
+ ```
78
+
79
+ Optional SIMD codecs can be included by specifying the `AVX2_CFLAGS`, `NEON32_CFLAGS`, `NEON64_CFLAGS`,
80
+ `SSSE3_CFLAGS`, `SSE41_CFLAGS`, `SSE42_CFLAGS` and/or `AVX_CFLAGS` environment variables.
81
+ A typical build invocation on x86 looks like this:
82
+
83
+ ```sh
84
+ AVX2_CFLAGS=-mavx2 SSSE3_CFLAGS=-mssse3 SSE41_CFLAGS=-msse4.1 SSE42_CFLAGS=-msse4.2 AVX_CFLAGS=-mavx make lib/libbase64.o
85
+ ```
86
+
87
+ ### AVX2
88
+
89
+ To build and include the AVX2 codec, set the `AVX2_CFLAGS` environment variable to a value that will turn on AVX2 support in your compiler, typically `-mavx2`.
90
+ Example:
91
+
92
+ ```sh
93
+ AVX2_CFLAGS=-mavx2 make
94
+ ```
95
+
96
+ The codec will only be used if runtime feature detection shows that the target machine supports AVX2.
97
+
98
+ ### SSSE3
99
+
100
+ To build and include the SSSE3 codec, set the `SSSE3_CFLAGS` environment variable to a value that will turn on SSSE3 support in your compiler, typically `-mssse3`.
101
+ Example:
102
+
103
+ ```sh
104
+ SSSE3_CFLAGS=-mssse3 make
105
+ ```
106
+
107
+ The codec will only be used if runtime feature detection shows that the target machine supports SSSE3.
108
+
109
+ ### NEON
110
+
111
+ This library includes two NEON codecs: one for regular 32-bit ARM and one for the 64-bit AArch64 with NEON, which has double the amount of SIMD registers and can do full 64-byte table lookups.
112
+ These codecs encode in 48-byte chunks and decode in massive 64-byte chunks, so they had to be augmented with an uint32/64 codec to stay fast on smaller inputs!
113
+
114
+ Use LLVM/Clang for compiling the NEON codecs.
115
+ The code generation of at least GCC 4.6 (the version shipped with Raspbian and used for testing) contains a bug when compiling `vstq4_u8()`, and the generated assembly code is of low quality.
116
+ NEON intrinsics are a known weak area of GCC.
117
+ Clang does a better job.
118
+
119
+ NEON support can unfortunately not be portably detected at runtime from userland (the `mrc` instruction is privileged), so the default value for using the NEON codec is determined at compile-time.
120
+ But you can do your own runtime detection.
121
+ You can include the NEON codec and make it the default, then do a runtime check if the CPU has NEON support, and if not, force a downgrade to non-NEON with `BASE64_FORCE_PLAIN`.
122
+
123
+ These are your options:
124
+
125
+ 1. Don't include NEON support;
126
+ 2. build NEON support and make it the default, but build all other code without NEON flags so that you can override the default at runtime with `BASE64_FORCE_PLAIN`;
127
+ 3. build everything with NEON support and make it the default;
128
+ 4. build everything with NEON support, but don't make it the default (which makes no sense).
129
+
130
+ For option 1, simply don't specify any NEON-specific compiler flags at all, like so:
131
+
132
+ ```sh
133
+ CC=clang CFLAGS="-march=armv6" make
134
+ ```
135
+
136
+ For option 2, keep your `CFLAGS` plain, but set the `NEON32_CFLAGS` environment variable to a value that will build NEON support.
137
+ The line below, for instance, will build all the code at ARMv6 level, except for the NEON codec, which is built at ARMv7.
138
+ It will also make the NEON codec the default.
139
+ For ARMv6 platforms, override that default at runtime with the `BASE64_FORCE_PLAIN` flag.
140
+ No ARMv7/NEON code will then be touched.
141
+
142
+ ```sh
143
+ CC=clang CFLAGS="-march=armv6" NEON32_CFLAGS="-march=armv7 -mfpu=neon" make
144
+ ```
145
+
146
+ For option 3, put everything in your `CFLAGS` and use a stub, but non-empty, `NEON32_CFLAGS`.
147
+ This example works for the Raspberry Pi 2B V1.1, which has NEON support:
148
+
149
+ ```sh
150
+ CC=clang CFLAGS="-march=armv7 -mtune=cortex-a7" NEON32_CFLAGS="-mfpu=neon" make
151
+ ```
152
+
153
+ To build and include the NEON64 codec, use `CFLAGS` as usual to define the platform and set `NEON64_CFLAGS` to a nonempty stub.
154
+ (The AArch64 target has mandatory NEON64 support.)
155
+ Example:
156
+
157
+ ```sh
158
+ CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a" NEON64_CFLAGS=" " make
159
+ ```
160
+
161
+ ### OpenMP
162
+
163
+ To enable OpenMP on GCC you need to build with `-fopenmp`. This can be by setting the the `OPENMP` environment variable to `1`.
164
+
165
+ Example:
166
+
167
+ ```sh
168
+ OPENMP=1 make
169
+ ```
170
+
171
+ This will let the compiler define `_OPENMP`, which in turn will include the OpenMP optimized `lib_openmp.c` into `lib.c`.
172
+
173
+ By default the number of parallel threads will be equal to the number of cores of the processor.
174
+ On a quad core with hyperthreading eight cores will be detected, but hyperthreading will not increase the performance.
175
+
176
+ To get verbose information about OpenMP start the program with `OMP_DISPLAY_ENV=VERBOSE`, for instance
177
+
178
+ ```sh
179
+ OMP_DISPLAY_ENV=VERBOSE test/benchmark
180
+ ```
181
+
182
+ To put a limit on the number of threads, start the program with `OMP_THREAD_LIMIT=n`, for instance
183
+
184
+ ```sh
185
+ OMP_THREAD_LIMIT=2 test/benchmark
186
+ ```
187
+
188
+ An example of running a benchmark with OpenMP, SSSE3 and AVX2 enabled:
189
+
190
+ ```sh
191
+ make clean && OPENMP=1 SSSE3_CFLAGS=-mssse3 AVX2_CFLAGS=-mavx2 make && OPENMP=1 make -C test
192
+ ```
193
+
194
+ ## API reference
195
+
196
+ Strings are represented as a pointer and a length; they are not
197
+ zero-terminated. This was a conscious design decision. In the decoding step,
198
+ relying on zero-termination would make no sense since the output could contain
199
+ legitimate zero bytes. In the encoding step, returning the length saves the
200
+ overhead of calling `strlen()` on the output. If you insist on the trailing
201
+ zero, you can easily add it yourself at the given offset.
202
+
203
+ ### Flags
204
+
205
+ Some API calls take a `flags` argument.
206
+ That argument can be used to force the use of a specific codec, even if that codec is a no-op in the current build.
207
+ Mainly there for testing purposes, this is also useful on ARM where the only way to do runtime NEON detection is to ask the OS if it's available.
208
+ The following constants can be used:
209
+
210
+ - `BASE64_FORCE_AVX2`
211
+ - `BASE64_FORCE_NEON32`
212
+ - `BASE64_FORCE_NEON64`
213
+ - `BASE64_FORCE_PLAIN`
214
+ - `BASE64_FORCE_SSSE3`
215
+ - `BASE64_FORCE_SSE41`
216
+ - `BASE64_FORCE_SSE42`
217
+ - `BASE64_FORCE_AVX`
218
+
219
+ Set `flags` to `0` for the default behavior, which is runtime feature detection on x86, a compile-time fixed codec on ARM, and the plain codec on other platforms.
220
+
221
+ ### Encoding
222
+
223
+ #### base64_encode
224
+
225
+ ```c
226
+ void base64_encode
227
+ ( const char *src
228
+ , size_t srclen
229
+ , char *out
230
+ , size_t *outlen
231
+ , int flags
232
+ ) ;
233
+ ```
234
+
235
+ Wrapper function to encode a plain string of given length.
236
+ Output is written to `out` without trailing zero.
237
+ Output length in bytes is written to `outlen`.
238
+ The buffer in `out` has been allocated by the caller and is at least 4/3 the size of the input.
239
+
240
+ #### base64_stream_encode_init
241
+
242
+ ```c
243
+ void base64_stream_encode_init
244
+ ( struct base64_state *state
245
+ , int flags
246
+ ) ;
247
+ ```
248
+
249
+ Call this before calling `base64_stream_encode()` to init the state.
250
+
251
+ #### base64_stream_encode
252
+
253
+ ```c
254
+ void base64_stream_encode
255
+ ( struct base64_state *state
256
+ , const char *src
257
+ , size_t srclen
258
+ , char *out
259
+ , size_t *outlen
260
+ ) ;
261
+ ```
262
+
263
+ Encodes the block of data of given length at `src`, into the buffer at `out`.
264
+ Caller is responsible for allocating a large enough out-buffer; it must be at least 4/3 the size of the in-buffer, but take some margin.
265
+ Places the number of new bytes written into `outlen` (which is set to zero when the function starts).
266
+ Does not zero-terminate or finalize the output.
267
+
268
+ #### base64_stream_encode_final
269
+
270
+ ```c
271
+ void base64_stream_encode_final
272
+ ( struct base64_state *state
273
+ , char *out
274
+ , size_t *outlen
275
+ ) ;
276
+ ```
277
+
278
+ Finalizes the output begun by previous calls to `base64_stream_encode()`.
279
+ Adds the required end-of-stream markers if appropriate.
280
+ `outlen` is modified and will contain the number of new bytes written at `out` (which will quite often be zero).
281
+
282
+ ### Decoding
283
+
284
+ #### base64_decode
285
+
286
+ ```c
287
+ int base64_decode
288
+ ( const char *src
289
+ , size_t srclen
290
+ , char *out
291
+ , size_t *outlen
292
+ , int flags
293
+ ) ;
294
+ ```
295
+
296
+ Wrapper function to decode a plain string of given length.
297
+ Output is written to `out` without trailing zero. Output length in bytes is written to `outlen`.
298
+ The buffer in `out` has been allocated by the caller and is at least 3/4 the size of the input.
299
+ Returns `1` for success, and `0` when a decode error has occured due to invalid input.
300
+ Returns `-1` if the chosen codec is not included in the current build.
301
+
302
+ #### base64_stream_decode_init
303
+
304
+ ```c
305
+ void base64_stream_decode_init
306
+ ( struct base64_state *state
307
+ , int flags
308
+ ) ;
309
+ ```
310
+
311
+ Call this before calling `base64_stream_decode()` to init the state.
312
+
313
+ #### base64_stream_decode
314
+
315
+ ```c
316
+ int base64_stream_decode
317
+ ( struct base64_state *state
318
+ , const char *src
319
+ , size_t srclen
320
+ , char *out
321
+ , size_t *outlen
322
+ ) ;
323
+ ```
324
+
325
+ Decodes the block of data of given length at `src`, into the buffer at `out`.
326
+ Caller is responsible for allocating a large enough out-buffer; it must be at least 3/4 the size of the in-buffer, but take some margin.
327
+ Places the number of new bytes written into `outlen` (which is set to zero when the function starts).
328
+ Does not zero-terminate the output.
329
+ Returns 1 if all is well, and 0 if a decoding error was found, such as an invalid character.
330
+ Returns -1 if the chosen codec is not included in the current build.
331
+ Used by the test harness to check whether a codec is available for testing.
332
+
333
+ ## Examples
334
+
335
+ A simple example of encoding a static string to base64 and printing the output
336
+ to stdout:
337
+
338
+ ```c
339
+ #include <stdio.h> /* fwrite */
340
+ #include "libbase64.h"
341
+
342
+ int main ()
343
+ {
344
+ char src[] = "hello world";
345
+ char out[20];
346
+ size_t srclen = sizeof(src) - 1;
347
+ size_t outlen;
348
+
349
+ base64_encode(src, srclen, out, &outlen, 0);
350
+
351
+ fwrite(out, outlen, 1, stdout);
352
+
353
+ return 0;
354
+ }
355
+ ```
356
+
357
+ A simple example (no error checking, etc) of stream encoding standard input to
358
+ standard output:
359
+
360
+ ```c
361
+ #include <stdio.h>
362
+ #include "libbase64.h"
363
+
364
+ int main ()
365
+ {
366
+ size_t nread, nout;
367
+ char buf[12000], out[16000];
368
+ struct base64_state state;
369
+
370
+ // Initialize stream encoder:
371
+ base64_stream_encode_init(&state, 0);
372
+
373
+ // Read contents of stdin into buffer:
374
+ while ((nread = fread(buf, 1, sizeof(buf), stdin)) > 0) {
375
+
376
+ // Encode buffer:
377
+ base64_stream_encode(&state, buf, nread, out, &nout);
378
+
379
+ // If there's output, print it to stdout:
380
+ if (nout) {
381
+ fwrite(out, nout, 1, stdout);
382
+ }
383
+
384
+ // If an error occurred, exit the loop:
385
+ if (feof(stdin)) {
386
+ break;
387
+ }
388
+ }
389
+
390
+ // Finalize encoding:
391
+ base64_stream_encode_final(&state, out, &nout);
392
+
393
+ // If the finalizing resulted in extra output bytes, print them:
394
+ if (nout) {
395
+ fwrite(out, nout, 1, stdout);
396
+ }
397
+
398
+ return 0;
399
+ }
400
+ ```
401
+
402
+ Also see `bin/base64.c` for a simple re-implementation of the `base64` utility.
403
+ A file or standard input is fed through the encoder/decoder, and the output is
404
+ written to standard output.
405
+
406
+ ## Tests
407
+
408
+ See `tests/` for a small test suite. Testing is automated with [Travis
409
+ CI](https://travis-ci.org/aklomp/base64), which builds and tests the code
410
+ across various architectures.
411
+
412
+ ## Benchmarks
413
+
414
+ Benchmarks can be run with the built-in benchmark program as follows:
415
+
416
+ ```sh
417
+ make -C test benchmark <buildflags> && test/benchmark
418
+ ```
419
+
420
+ It will run an encoding and decoding benchmark for all of the compiled-in codecs.
421
+
422
+ The tables below contain some results on random machines. All numbers measured with a 10MB buffer in MB/sec, rounded to the nearest integer.
423
+
424
+ \*: Update needed
425
+
426
+ x86 processors
427
+
428
+ | Processor | Plain enc | Plain dec | SSSE3 enc | SSSE3 dec | AVX enc | AVX dec | AVX2 enc | AVX2 dec |
429
+ |-------------------------------------------|----------:|----------:|----------:|----------:|--------:|--------:|---------:|---------:|
430
+ | i7-4771 @ 3.5 GHz | 833\* | 1111\* | 3333\* | 4444\* | TBD | TBD | 4999\* | 6666\* |
431
+ | i7-4770 @ 3.4 GHz DDR1600 | 1790\* | 3038\* | 4899\* | 4043\* | 4796\* | 5709\* | 4681\* | 6386\* |
432
+ | i7-4770 @ 3.4 GHz DDR1600 OPENMP 1 thread | 1784\* | 3041\* | 4945\* | 4035\* | 4776\* | 5719\* | 4661\* | 6294\* |
433
+ | i7-4770 @ 3.4 GHz DDR1600 OPENMP 2 thread | 3401\* | 5729\* | 5489\* | 7444\* | 5003\* | 8624\* | 5105\* | 8558\* |
434
+ | i7-4770 @ 3.4 GHz DDR1600 OPENMP 4 thread | 4884\* | 7099\* | 4917\* | 7057\* | 4799\* | 7143\* | 4902\* | 7219\* |
435
+ | i7-4770 @ 3.4 GHz DDR1600 OPENMP 8 thread | 5212\* | 8849\* | 5284\* | 9099\* | 5289\* | 9220\* | 4849\* | 9200\* |
436
+ | i7-4870HQ @ 2.5 GHz | 1471\* | 3066\* | 6721\* | 6962\* | 7015\* | 8267\* | 8328\* | 11576\* |
437
+ | i5-4590S @ 3.0 GHz | 3356 | 3197 | 4363 | 6104 | 4243 | 6233 | 4160 | 6344 |
438
+ | Xeon X5570 @ 2.93 GHz | 2161 | 1508 | 3160 | 3915 | - | - | - | - |
439
+ | Pentium4 @ 3.4 GHz | 896 | 740 | - | - | - | - | - | - |
440
+ | Atom N270 | 243 | 266 | 508 | 387 | - | - | - | - |
441
+ | AMD E-450 | 645 | 564 | 625 | 634 | - | - | - | - |
442
+ | Intel Edison @ 500 MHz | 79\* | 92\* | 152\* | 172\* | - | - | - | - |
443
+ | Intel Edison @ 500 MHz OPENMP 2 thread | 158\* | 184\* | 300\* | 343\* | - | - | - | - |
444
+ | Intel Edison @ 500 MHz (x86-64) | 97\* | 146\* | 197\* | 207\* | - | - | - | - |
445
+ | Intel Edison @ 500 MHz (x86-64) 2 thread | 193\* | 288\* | 389\* | 410\* | - | - | - | - |
446
+
447
+ ARM processors
448
+
449
+ | Processor | Plain enc | Plain dec | NEON32 enc | NEON32 dec | NEON64 enc | NEON64 dec |
450
+ |-------------------------------------------|----------:|----------:|-----------:|-----------:|-----------:|-----------:|
451
+ | Raspberry PI B+ V1.2 | 46\* | 40\* | - | - | - | - |
452
+ | Raspberry PI 2 B V1.1 | 85 | 141 | 282 | 225 | - | - |
453
+ | Apple iPhone SE armv7 | 1056\* | 895\* | 2943\* | 2618\* | - | - |
454
+ | Apple iPhone SE arm64 | 1061\* | 1239\* | - | - | 4098\* | 3983\* |
455
+
456
+ PowerPC processors
457
+
458
+ | Processor | Plain enc | Plain dec |
459
+ |-------------------------------------------|----------:|----------:|
460
+ | PowerPC E6500 @ 1.8GHz | 270\* | 265\* |
461
+
462
+
463
+ Benchmarks on i7-4770 @ 3.4 GHz DDR1600 with varrying buffer sizes:
464
+ ![Benchmarks](base64-benchmarks.png)
465
+
466
+ Note: optimal buffer size to take advantage of the cache is in the range of 100 kB to 1 MB, leading to 12x faster AVX encoding/decoding compared to Plain, or a throughput of 24/27GB/sec.
467
+ Also note the performance degradation when the buffer size is less than 10 kB due to thread creation overhead.
468
+ To prevent this from happening `lib_openmp.c` defines `OMP_THRESHOLD 20000`, requiring at least a 20000 byte buffer to enable multithreading.
469
+
470
+ ## License
471
+
472
+ This repository is licensed under the
473
+ [BSD 2-clause License](http://opensource.org/licenses/BSD-2-Clause). See the
474
+ LICENSE file.
@@ -0,0 +1,132 @@
1
+ #include <stddef.h> // size_t
2
+ #include <stdio.h> // fopen()
3
+ #include <string.h> // strlen()
4
+ #include <getopt.h>
5
+ #include "../include/libbase64.h"
6
+
7
+ #define BUFSIZE 1024 * 1024
8
+
9
+ static char buf[BUFSIZE];
10
+ static char out[(BUFSIZE * 5) / 3]; // Technically 4/3 of input, but take some margin
11
+ size_t nread;
12
+ size_t nout;
13
+
14
+ static int
15
+ enc (FILE *fp)
16
+ {
17
+ int ret = 1;
18
+ struct base64_state state;
19
+ size_t acc = 0;
20
+
21
+ base64_stream_encode_init(&state, 0);
22
+
23
+ while ((nread = fread(buf, 1, BUFSIZE, fp)) > 0) {
24
+ base64_stream_encode(&state, buf, nread, out + acc, &nout);
25
+ if (nout) {
26
+ fwrite(out + acc, nout, 1, stdout);
27
+ acc += nout;
28
+ }
29
+ if (feof(fp)) {
30
+ break;
31
+ }
32
+ }
33
+ if (ferror(fp)) {
34
+ fprintf(stderr, "read error\n");
35
+ ret = 0;
36
+ goto out;
37
+ }
38
+ base64_stream_encode_final(&state, out + acc, &nout);
39
+
40
+ if (nout) {
41
+ fwrite(out + acc, nout, 1, stdout);
42
+ }
43
+ out: fclose(fp);
44
+ fclose(stdout);
45
+ return ret;
46
+ }
47
+
48
+ static int
49
+ dec (FILE *fp)
50
+ {
51
+ int ret = 1;
52
+ struct base64_state state;
53
+ size_t acc = 0;
54
+
55
+ base64_stream_decode_init(&state, 0);
56
+
57
+ while ((nread = fread(buf, 1, BUFSIZE, fp)) > 0) {
58
+ if (!base64_stream_decode(&state, buf, nread, out + acc, &nout)) {
59
+ fprintf(stderr, "decoding error\n");
60
+ ret = 0;
61
+ goto out;
62
+ }
63
+ if (nout) {
64
+ fwrite(out + acc, nout, 1, stdout);
65
+ acc += nout;
66
+ }
67
+ if (feof(fp)) {
68
+ break;
69
+ }
70
+ }
71
+ if (ferror(fp)) {
72
+ fprintf(stderr, "read error\n");
73
+ ret = 0;
74
+ }
75
+ out: fclose(fp);
76
+ fclose(stdout);
77
+ return ret;
78
+ }
79
+
80
+ int
81
+ main (int argc, char **argv)
82
+ {
83
+ char *file;
84
+ FILE *fp;
85
+ int decode = 0;
86
+
87
+ // Parse options:
88
+ for (;;)
89
+ {
90
+ int c;
91
+ int opt_index = 0;
92
+ static struct option opt_long[] = {
93
+ { "decode", 0, 0, 'd' },
94
+ { 0, 0, 0, 0 }
95
+ };
96
+ if ((c = getopt_long(argc, argv, "d", opt_long, &opt_index)) == -1) {
97
+ break;
98
+ }
99
+ switch (c)
100
+ {
101
+ case 'd':
102
+ decode = 1;
103
+ break;
104
+ }
105
+ }
106
+
107
+ // No options left on command line? Read from stdin:
108
+ if (optind >= argc) {
109
+ fp = stdin;
110
+ }
111
+
112
+ // One option left on command line? Treat it as a file:
113
+ else if (optind + 1 == argc) {
114
+ file = argv[optind];
115
+ if (strcmp(file, "-") == 0) {
116
+ fp = stdin;
117
+ }
118
+ else if ((fp = fopen(file, "rb")) == NULL) {
119
+ printf("cannot open %s\n", file);
120
+ return 1;
121
+ }
122
+ }
123
+
124
+ // More than one option left on command line? Syntax error:
125
+ else {
126
+ printf("Usage: %s <file>\n", argv[0]);
127
+ return 1;
128
+ }
129
+
130
+ // Invert return codes to create shell return code:
131
+ return (decode) ? !dec(fp) : !enc(fp);
132
+ }
@@ -0,0 +1,29 @@
1
+ # Written in 2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
2
+ #
3
+ # To the extent possible under law, the author(s) have dedicated all
4
+ # copyright and related and neighboring rights to this software to the
5
+ # public domain worldwide. This software is distributed without any warranty.
6
+ #
7
+ # You should have received a copy of the CC0 Public Domain Dedication
8
+ # along with this software. If not, see
9
+ #
10
+ # http://creativecommons.org/publicdomain/zero/1.0/
11
+ #
12
+ ########################################################################
13
+
14
+ set(TARGET_ARCHITECTURE_TEST_FILE "${CMAKE_CURRENT_LIST_DIR}/../test-arch.c")
15
+
16
+ function(detect_target_architecture OUTPUT_VARIABLE)
17
+ message(STATUS "${CMAKE_CURRENT_LIST_DIR}")
18
+ try_compile(_IGNORED "${CMAKE_CURRENT_BINARY_DIR}"
19
+ "${TARGET_ARCHITECTURE_TEST_FILE}"
20
+ OUTPUT_VARIABLE _LOG
21
+ )
22
+
23
+ string(REGEX MATCH "##arch=([^#]+)##" _IGNORED "${_LOG}")
24
+
25
+ set(${OUTPUT_VARIABLE} "${CMAKE_MATCH_1}" PARENT_SCOPE)
26
+ if (CMAKE_MATCH_1 STREQUAL "unknown")
27
+ message(WARNING "could not detect the target architecture.")
28
+ endif()
29
+ endfunction()
@@ -0,0 +1,34 @@
1
+ # Written in 2016-2017 by Henrik Steffen Gaßmann henrik@gassmann.onl
2
+ #
3
+ # To the extent possible under law, the author(s) have dedicated all
4
+ # copyright and related and neighboring rights to this software to the
5
+ # public domain worldwide. This software is distributed without any warranty.
6
+ #
7
+ # You should have received a copy of the CC0 Public Domain Dedication
8
+ # along with this software. If not, see
9
+ #
10
+ # http://creativecommons.org/publicdomain/zero/1.0/
11
+ #
12
+ ########################################################################
13
+
14
+ ########################################################################
15
+ # compiler flags definition
16
+ macro(define_SIMD_compile_flags)
17
+ if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang")
18
+ # x86
19
+ set(COMPILE_FLAGS_SSSE3 "-mssse3")
20
+ set(COMPILE_FLAGS_SSE41 "-msse4.1")
21
+ set(COMPILE_FLAGS_SSE42 "-msse4.2")
22
+ set(COMPILE_FLAGS_AVX "-mavx")
23
+ set(COMPILE_FLAGS_AVX2 "-mavx2")
24
+
25
+ #arm
26
+ set(COMPILE_FLAGS_NEON32 "-mfpu=neon")
27
+ elseif(MSVC)
28
+ set(COMPILE_FLAGS_SSSE3 " ")
29
+ set(COMPILE_FLAGS_SSE41 " ")
30
+ set(COMPILE_FLAGS_SSE42 " ")
31
+ set(COMPILE_FLAGS_AVX "/arch:AVX2")
32
+ set(COMPILE_FLAGS_AVX2 "/arch:AVX2")
33
+ endif()
34
+ endmacro(define_SIMD_compile_flags)
@@ -0,0 +1,5 @@
1
+ @PACKAGE_INIT@
2
+
3
+ include("${CMAKE_CURRENT_LIST_DIR}/base64-targets.cmake")
4
+
5
+ check_required_components(base64)