ob64 0.1.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +20 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +18 -1
- data/{LICENSE.txt → LICENSE} +1 -1
- data/README.md +34 -2
- data/benchmark.rb +42 -3
- data/ext/ob64/ob64_ext.c +5 -3
- data/lib/ob64/core_ext.rb +2 -0
- data/lib/ob64/version.rb +1 -1
- data/lib/ob64.rb +52 -0
- data/ob64.gemspec +12 -6
- data/vendor/libbase64/.gitignore +12 -0
- data/vendor/libbase64/.travis.yml +71 -0
- data/vendor/libbase64/CMakeLists.txt +264 -0
- data/vendor/libbase64/LICENSE +28 -0
- data/vendor/libbase64/Makefile +93 -0
- data/vendor/libbase64/README.md +474 -0
- data/vendor/libbase64/base64-benchmarks.png +0 -0
- data/vendor/libbase64/bin/base64.c +132 -0
- data/vendor/libbase64/cmake/Modules/TargetArch.cmake +29 -0
- data/vendor/libbase64/cmake/Modules/TargetSIMDInstructionSet.cmake +34 -0
- data/vendor/libbase64/cmake/base64-config.cmake.in +5 -0
- data/vendor/libbase64/cmake/config.h.in +25 -0
- data/vendor/libbase64/cmake/test-arch.c +35 -0
- data/vendor/libbase64/include/libbase64.h +145 -0
- data/vendor/libbase64/lib/arch/avx/codec.c +42 -0
- data/vendor/libbase64/lib/arch/avx2/codec.c +42 -0
- data/vendor/libbase64/lib/arch/avx2/dec_loop.c +110 -0
- data/vendor/libbase64/lib/arch/avx2/dec_reshuffle.c +34 -0
- data/vendor/libbase64/lib/arch/avx2/enc_loop.c +89 -0
- data/vendor/libbase64/lib/arch/avx2/enc_reshuffle.c +83 -0
- data/vendor/libbase64/lib/arch/avx2/enc_translate.c +30 -0
- data/vendor/libbase64/lib/arch/generic/32/dec_loop.c +86 -0
- data/vendor/libbase64/lib/arch/generic/32/enc_loop.c +73 -0
- data/vendor/libbase64/lib/arch/generic/64/enc_loop.c +77 -0
- data/vendor/libbase64/lib/arch/generic/codec.c +39 -0
- data/vendor/libbase64/lib/arch/generic/dec_head.c +37 -0
- data/vendor/libbase64/lib/arch/generic/dec_tail.c +91 -0
- data/vendor/libbase64/lib/arch/generic/enc_head.c +24 -0
- data/vendor/libbase64/lib/arch/generic/enc_tail.c +34 -0
- data/vendor/libbase64/lib/arch/neon32/codec.c +72 -0
- data/vendor/libbase64/lib/arch/neon32/dec_loop.c +106 -0
- data/vendor/libbase64/lib/arch/neon32/enc_loop.c +58 -0
- data/vendor/libbase64/lib/arch/neon32/enc_reshuffle.c +54 -0
- data/vendor/libbase64/lib/arch/neon32/enc_translate.c +57 -0
- data/vendor/libbase64/lib/arch/neon64/codec.c +70 -0
- data/vendor/libbase64/lib/arch/neon64/dec_loop.c +129 -0
- data/vendor/libbase64/lib/arch/neon64/enc_loop.c +66 -0
- data/vendor/libbase64/lib/arch/neon64/enc_reshuffle.c +54 -0
- data/vendor/libbase64/lib/arch/sse41/codec.c +42 -0
- data/vendor/libbase64/lib/arch/sse42/codec.c +42 -0
- data/vendor/libbase64/lib/arch/ssse3/codec.c +42 -0
- data/vendor/libbase64/lib/arch/ssse3/dec_loop.c +173 -0
- data/vendor/libbase64/lib/arch/ssse3/dec_reshuffle.c +33 -0
- data/vendor/libbase64/lib/arch/ssse3/enc_loop.c +67 -0
- data/vendor/libbase64/lib/arch/ssse3/enc_reshuffle.c +48 -0
- data/vendor/libbase64/lib/arch/ssse3/enc_translate.c +33 -0
- data/vendor/libbase64/lib/codec_choose.c +281 -0
- data/vendor/libbase64/lib/codecs.h +65 -0
- data/vendor/libbase64/lib/env.h +67 -0
- data/vendor/libbase64/lib/exports.txt +7 -0
- data/vendor/libbase64/lib/lib.c +164 -0
- data/vendor/libbase64/lib/lib_openmp.c +149 -0
- data/vendor/libbase64/lib/tables/.gitignore +1 -0
- data/vendor/libbase64/lib/tables/Makefile +17 -0
- data/vendor/libbase64/lib/tables/table_dec_32bit.h +393 -0
- data/vendor/libbase64/lib/tables/table_enc_12bit.h +1031 -0
- data/vendor/libbase64/lib/tables/table_enc_12bit.py +45 -0
- data/vendor/libbase64/lib/tables/table_generator.c +184 -0
- data/vendor/libbase64/lib/tables/tables.c +40 -0
- data/vendor/libbase64/lib/tables/tables.h +23 -0
- metadata +67 -6
- data/.byebug_history +0 -72
- data/.envrc +0 -1
@@ -0,0 +1,67 @@
|
|
1
|
+
#ifndef BASE64_ENV_H
|
2
|
+
#define BASE64_ENV_H
|
3
|
+
|
4
|
+
// This header file contains macro definitions that describe certain aspects of
|
5
|
+
// the compile-time environment. Compatibility and portability macros go here.
|
6
|
+
|
7
|
+
// Define machine endianness. This is for GCC:
|
8
|
+
#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
9
|
+
# define BASE64_LITTLE_ENDIAN 1
|
10
|
+
#else
|
11
|
+
# define BASE64_LITTLE_ENDIAN 0
|
12
|
+
#endif
|
13
|
+
|
14
|
+
// This is for Clang:
|
15
|
+
#ifdef __LITTLE_ENDIAN__
|
16
|
+
# define BASE64_LITTLE_ENDIAN 1
|
17
|
+
#endif
|
18
|
+
|
19
|
+
#ifdef __BIG_ENDIAN__
|
20
|
+
# define BASE64_LITTLE_ENDIAN 0
|
21
|
+
#endif
|
22
|
+
|
23
|
+
// MSVC++ needs intrin.h for _byteswap_uint64 (issue #68):
|
24
|
+
#if BASE64_LITTLE_ENDIAN && defined(_MSC_VER)
|
25
|
+
# include <intrin.h>
|
26
|
+
#endif
|
27
|
+
|
28
|
+
// Endian conversion functions:
|
29
|
+
#if BASE64_LITTLE_ENDIAN
|
30
|
+
# ifdef _MSC_VER
|
31
|
+
// Microsoft Visual C++:
|
32
|
+
# define BASE64_HTOBE32(x) _byteswap_ulong(x)
|
33
|
+
# define BASE64_HTOBE64(x) _byteswap_uint64(x)
|
34
|
+
# else
|
35
|
+
// GCC and Clang:
|
36
|
+
# define BASE64_HTOBE32(x) __builtin_bswap32(x)
|
37
|
+
# define BASE64_HTOBE64(x) __builtin_bswap64(x)
|
38
|
+
# endif
|
39
|
+
#else
|
40
|
+
// No conversion needed:
|
41
|
+
# define BASE64_HTOBE32(x) (x)
|
42
|
+
# define BASE64_HTOBE64(x) (x)
|
43
|
+
#endif
|
44
|
+
|
45
|
+
// Detect word size:
|
46
|
+
#ifdef _INTEGRAL_MAX_BITS
|
47
|
+
# define BASE64_WORDSIZE _INTEGRAL_MAX_BITS
|
48
|
+
#else
|
49
|
+
# define BASE64_WORDSIZE __WORDSIZE
|
50
|
+
#endif
|
51
|
+
|
52
|
+
// End-of-file definitions.
|
53
|
+
// Almost end-of-file when waiting for the last '=' character:
|
54
|
+
#define BASE64_AEOF 1
|
55
|
+
// End-of-file when stream end has been reached or invalid input provided:
|
56
|
+
#define BASE64_EOF 2
|
57
|
+
|
58
|
+
// GCC 7 defaults to issuing a warning for fallthrough in switch statements,
|
59
|
+
// unless the fallthrough cases are marked with an attribute. As we use
|
60
|
+
// fallthrough deliberately, define an alias for the attribute:
|
61
|
+
#if __GNUC__ >= 7
|
62
|
+
# define BASE64_FALLTHROUGH __attribute__((fallthrough));
|
63
|
+
#else
|
64
|
+
# define BASE64_FALLTHROUGH
|
65
|
+
#endif
|
66
|
+
|
67
|
+
#endif // BASE64_ENV_H
|
@@ -0,0 +1,164 @@
|
|
1
|
+
#include <stdint.h>
|
2
|
+
#include <stddef.h>
|
3
|
+
#ifdef _OPENMP
|
4
|
+
#include <omp.h>
|
5
|
+
#endif
|
6
|
+
|
7
|
+
#include "../include/libbase64.h"
|
8
|
+
#include "tables/tables.h"
|
9
|
+
#include "codecs.h"
|
10
|
+
#include "env.h"
|
11
|
+
|
12
|
+
// These static function pointers are initialized once when the library is
|
13
|
+
// first used, and remain in use for the remaining lifetime of the program.
|
14
|
+
// The idea being that CPU features don't change at runtime.
|
15
|
+
static struct codec codec = { NULL, NULL };
|
16
|
+
|
17
|
+
void
|
18
|
+
base64_stream_encode_init (struct base64_state *state, int flags)
|
19
|
+
{
|
20
|
+
// If any of the codec flags are set, redo choice:
|
21
|
+
if (codec.enc == NULL || flags & 0xFF) {
|
22
|
+
codec_choose(&codec, flags);
|
23
|
+
}
|
24
|
+
state->eof = 0;
|
25
|
+
state->bytes = 0;
|
26
|
+
state->carry = 0;
|
27
|
+
state->flags = flags;
|
28
|
+
}
|
29
|
+
|
30
|
+
void
|
31
|
+
base64_stream_encode
|
32
|
+
( struct base64_state *state
|
33
|
+
, const char *src
|
34
|
+
, size_t srclen
|
35
|
+
, char *out
|
36
|
+
, size_t *outlen
|
37
|
+
)
|
38
|
+
{
|
39
|
+
codec.enc(state, src, srclen, out, outlen);
|
40
|
+
}
|
41
|
+
|
42
|
+
void
|
43
|
+
base64_stream_encode_final
|
44
|
+
( struct base64_state *state
|
45
|
+
, char *out
|
46
|
+
, size_t *outlen
|
47
|
+
)
|
48
|
+
{
|
49
|
+
uint8_t *o = (uint8_t *)out;
|
50
|
+
|
51
|
+
if (state->bytes == 1) {
|
52
|
+
*o++ = base64_table_enc_6bit[state->carry];
|
53
|
+
*o++ = '=';
|
54
|
+
*o++ = '=';
|
55
|
+
*outlen = 3;
|
56
|
+
return;
|
57
|
+
}
|
58
|
+
if (state->bytes == 2) {
|
59
|
+
*o++ = base64_table_enc_6bit[state->carry];
|
60
|
+
*o++ = '=';
|
61
|
+
*outlen = 2;
|
62
|
+
return;
|
63
|
+
}
|
64
|
+
*outlen = 0;
|
65
|
+
}
|
66
|
+
|
67
|
+
void
|
68
|
+
base64_stream_decode_init (struct base64_state *state, int flags)
|
69
|
+
{
|
70
|
+
// If any of the codec flags are set, redo choice:
|
71
|
+
if (codec.dec == NULL || flags & 0xFF) {
|
72
|
+
codec_choose(&codec, flags);
|
73
|
+
}
|
74
|
+
state->eof = 0;
|
75
|
+
state->bytes = 0;
|
76
|
+
state->carry = 0;
|
77
|
+
state->flags = flags;
|
78
|
+
}
|
79
|
+
|
80
|
+
int
|
81
|
+
base64_stream_decode
|
82
|
+
( struct base64_state *state
|
83
|
+
, const char *src
|
84
|
+
, size_t srclen
|
85
|
+
, char *out
|
86
|
+
, size_t *outlen
|
87
|
+
)
|
88
|
+
{
|
89
|
+
return codec.dec(state, src, srclen, out, outlen);
|
90
|
+
}
|
91
|
+
|
92
|
+
#ifdef _OPENMP
|
93
|
+
|
94
|
+
// Due to the overhead of initializing OpenMP and creating a team of
|
95
|
+
// threads, we require the data length to be larger than a threshold:
|
96
|
+
#define OMP_THRESHOLD 20000
|
97
|
+
|
98
|
+
// Conditionally include OpenMP-accelerated codec implementations:
|
99
|
+
#include "lib_openmp.c"
|
100
|
+
#endif
|
101
|
+
|
102
|
+
void
|
103
|
+
base64_encode
|
104
|
+
( const char *src
|
105
|
+
, size_t srclen
|
106
|
+
, char *out
|
107
|
+
, size_t *outlen
|
108
|
+
, int flags
|
109
|
+
)
|
110
|
+
{
|
111
|
+
size_t s;
|
112
|
+
size_t t;
|
113
|
+
struct base64_state state;
|
114
|
+
|
115
|
+
#ifdef _OPENMP
|
116
|
+
if (srclen >= OMP_THRESHOLD) {
|
117
|
+
base64_encode_openmp(src, srclen, out, outlen, flags);
|
118
|
+
return;
|
119
|
+
}
|
120
|
+
#endif
|
121
|
+
|
122
|
+
// Init the stream reader:
|
123
|
+
base64_stream_encode_init(&state, flags);
|
124
|
+
|
125
|
+
// Feed the whole string to the stream reader:
|
126
|
+
base64_stream_encode(&state, src, srclen, out, &s);
|
127
|
+
|
128
|
+
// Finalize the stream by writing trailer if any:
|
129
|
+
base64_stream_encode_final(&state, out + s, &t);
|
130
|
+
|
131
|
+
// Final output length is stream length plus tail:
|
132
|
+
*outlen = s + t;
|
133
|
+
}
|
134
|
+
|
135
|
+
int
|
136
|
+
base64_decode
|
137
|
+
( const char *src
|
138
|
+
, size_t srclen
|
139
|
+
, char *out
|
140
|
+
, size_t *outlen
|
141
|
+
, int flags
|
142
|
+
)
|
143
|
+
{
|
144
|
+
int ret;
|
145
|
+
struct base64_state state;
|
146
|
+
|
147
|
+
#ifdef _OPENMP
|
148
|
+
if (srclen >= OMP_THRESHOLD) {
|
149
|
+
return base64_decode_openmp(src, srclen, out, outlen, flags);
|
150
|
+
}
|
151
|
+
#endif
|
152
|
+
|
153
|
+
// Init the stream reader:
|
154
|
+
base64_stream_decode_init(&state, flags);
|
155
|
+
|
156
|
+
// Feed the whole string to the stream reader:
|
157
|
+
ret = base64_stream_decode(&state, src, srclen, out, outlen);
|
158
|
+
|
159
|
+
// If when decoding a whole block, we're still waiting for input then fail:
|
160
|
+
if (ret && (state.bytes == 0)) {
|
161
|
+
return ret;
|
162
|
+
}
|
163
|
+
return 0;
|
164
|
+
}
|
@@ -0,0 +1,149 @@
|
|
1
|
+
// This code makes some assumptions on the implementation of
|
2
|
+
// base64_stream_encode_init(), base64_stream_encode() and base64_stream_decode().
|
3
|
+
// Basically these assumptions boil down to that when breaking the src into
|
4
|
+
// parts, out parts can be written without side effects.
|
5
|
+
// This is met when:
|
6
|
+
// 1) base64_stream_encode() and base64_stream_decode() don't use globals;
|
7
|
+
// 2) the shared variables src and out are not read or written outside of the
|
8
|
+
// bounds of their parts, i.e. when base64_stream_encode() reads a multiple
|
9
|
+
// of 3 bytes, it must write no more then a multiple of 4 bytes, not even
|
10
|
+
// temporarily;
|
11
|
+
// 3) the state flag can be discarded after base64_stream_encode() and
|
12
|
+
// base64_stream_decode() on the parts.
|
13
|
+
|
14
|
+
static inline void
|
15
|
+
base64_encode_openmp
|
16
|
+
( const char *src
|
17
|
+
, size_t srclen
|
18
|
+
, char *out
|
19
|
+
, size_t *outlen
|
20
|
+
, int flags
|
21
|
+
)
|
22
|
+
{
|
23
|
+
size_t s;
|
24
|
+
size_t t;
|
25
|
+
size_t sum = 0, len, last_len;
|
26
|
+
struct base64_state state, initial_state;
|
27
|
+
int num_threads, i;
|
28
|
+
|
29
|
+
// Request a number of threads but not necessarily get them:
|
30
|
+
#pragma omp parallel
|
31
|
+
{
|
32
|
+
// Get the number of threads used from one thread only,
|
33
|
+
// as num_threads is a shared var:
|
34
|
+
#pragma omp single
|
35
|
+
{
|
36
|
+
num_threads = omp_get_num_threads();
|
37
|
+
|
38
|
+
// Split the input string into num_threads parts, each
|
39
|
+
// part a multiple of 3 bytes. The remaining bytes will
|
40
|
+
// be done later:
|
41
|
+
len = srclen / (num_threads * 3);
|
42
|
+
len *= 3;
|
43
|
+
last_len = srclen - num_threads * len;
|
44
|
+
|
45
|
+
// Init the stream reader:
|
46
|
+
base64_stream_encode_init(&state, flags);
|
47
|
+
initial_state = state;
|
48
|
+
}
|
49
|
+
|
50
|
+
// Single has an implicit barrier for all threads to wait here
|
51
|
+
// for the above to complete:
|
52
|
+
#pragma omp for firstprivate(state) private(s) reduction(+:sum) schedule(static,1)
|
53
|
+
for (i = 0; i < num_threads; i++)
|
54
|
+
{
|
55
|
+
// Feed each part of the string to the stream reader:
|
56
|
+
base64_stream_encode(&state, src + i * len, len, out + i * len * 4 / 3, &s);
|
57
|
+
sum += s;
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
// As encoding should never fail and we encode an exact multiple
|
62
|
+
// of 3 bytes, we can discard state:
|
63
|
+
state = initial_state;
|
64
|
+
|
65
|
+
// Encode the remaining bytes:
|
66
|
+
base64_stream_encode(&state, src + num_threads * len, last_len, out + num_threads * len * 4 / 3, &s);
|
67
|
+
|
68
|
+
// Finalize the stream by writing trailer if any:
|
69
|
+
base64_stream_encode_final(&state, out + num_threads * len * 4 / 3 + s, &t);
|
70
|
+
|
71
|
+
// Final output length is stream length plus tail:
|
72
|
+
sum += s + t;
|
73
|
+
*outlen = sum;
|
74
|
+
}
|
75
|
+
|
76
|
+
static inline int
|
77
|
+
base64_decode_openmp
|
78
|
+
( const char *src
|
79
|
+
, size_t srclen
|
80
|
+
, char *out
|
81
|
+
, size_t *outlen
|
82
|
+
, int flags
|
83
|
+
)
|
84
|
+
{
|
85
|
+
int num_threads, result = 0, i;
|
86
|
+
size_t sum = 0, len, last_len, s;
|
87
|
+
struct base64_state state, initial_state;
|
88
|
+
|
89
|
+
// Request a number of threads but not necessarily get them:
|
90
|
+
#pragma omp parallel
|
91
|
+
{
|
92
|
+
// Get the number of threads used from one thread only,
|
93
|
+
// as num_threads is a shared var:
|
94
|
+
#pragma omp single
|
95
|
+
{
|
96
|
+
num_threads = omp_get_num_threads();
|
97
|
+
|
98
|
+
// Split the input string into num_threads parts, each
|
99
|
+
// part a multiple of 4 bytes. The remaining bytes will
|
100
|
+
// be done later:
|
101
|
+
len = srclen / (num_threads * 4);
|
102
|
+
len *= 4;
|
103
|
+
last_len = srclen - num_threads * len;
|
104
|
+
|
105
|
+
// Init the stream reader:
|
106
|
+
base64_stream_decode_init(&state, flags);
|
107
|
+
|
108
|
+
initial_state = state;
|
109
|
+
}
|
110
|
+
|
111
|
+
// Single has an implicit barrier to wait here for the above to
|
112
|
+
// complete:
|
113
|
+
#pragma omp for firstprivate(state) private(s) reduction(+:sum, result) schedule(static,1)
|
114
|
+
for (i = 0; i < num_threads; i++)
|
115
|
+
{
|
116
|
+
int this_result;
|
117
|
+
|
118
|
+
// Feed each part of the string to the stream reader:
|
119
|
+
this_result = base64_stream_decode(&state, src + i * len, len, out + i * len * 3 / 4, &s);
|
120
|
+
sum += s;
|
121
|
+
result += this_result;
|
122
|
+
}
|
123
|
+
}
|
124
|
+
|
125
|
+
// If `result' equals `-num_threads', then all threads returned -1,
|
126
|
+
// indicating that the requested codec is not available:
|
127
|
+
if (result == -num_threads) {
|
128
|
+
return -1;
|
129
|
+
}
|
130
|
+
|
131
|
+
// If `result' does not equal `num_threads', then at least one of the
|
132
|
+
// threads hit a decode error:
|
133
|
+
if (result != num_threads) {
|
134
|
+
return 0;
|
135
|
+
}
|
136
|
+
|
137
|
+
// So far so good, now decode whatever remains in the buffer. Reuse the
|
138
|
+
// initial state, since we are at a 4-byte boundary:
|
139
|
+
state = initial_state;
|
140
|
+
result = base64_stream_decode(&state, src + num_threads * len, last_len, out + num_threads * len * 3 / 4, &s);
|
141
|
+
sum += s;
|
142
|
+
*outlen = sum;
|
143
|
+
|
144
|
+
// If when decoding a whole block, we're still waiting for input then fail:
|
145
|
+
if (result && (state.bytes == 0)) {
|
146
|
+
return result;
|
147
|
+
}
|
148
|
+
return 0;
|
149
|
+
}
|
@@ -0,0 +1 @@
|
|
1
|
+
table_generator
|
@@ -0,0 +1,17 @@
|
|
1
|
+
.PHONY: all clean
|
2
|
+
|
3
|
+
TARGETS := table_dec_32bit.h table_enc_12bit.h table_generator
|
4
|
+
|
5
|
+
all: $(TARGETS)
|
6
|
+
|
7
|
+
clean:
|
8
|
+
$(RM) $(TARGETS)
|
9
|
+
|
10
|
+
table_dec_32bit.h: table_generator
|
11
|
+
./$^ > $@
|
12
|
+
|
13
|
+
table_enc_12bit.h: table_enc_12bit.py
|
14
|
+
./$^ > $@
|
15
|
+
|
16
|
+
table_generator: table_generator.c
|
17
|
+
$(CC) $(CFLAGS) -o $@ $^
|