brotli 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/brotli/brotli.cc +114 -24
- data/ext/brotli/brotli.h +0 -1
- data/ext/brotli/extconf.rb +30 -23
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/LICENSE +1 -1
- data/vendor/brotli/dec/Makefile +1 -1
- data/vendor/brotli/dec/bit_reader.c +3 -3
- data/vendor/brotli/dec/bit_reader.h +25 -27
- data/vendor/brotli/dec/context.h +4 -4
- data/vendor/brotli/dec/decode.c +410 -486
- data/vendor/brotli/dec/decode.h +101 -105
- data/vendor/brotli/dec/dictionary.c +1 -1
- data/vendor/brotli/dec/dictionary.h +7 -8
- data/vendor/brotli/dec/huffman.c +103 -105
- data/vendor/brotli/dec/huffman.h +18 -18
- data/vendor/brotli/dec/port.h +52 -40
- data/vendor/brotli/dec/prefix.h +2 -0
- data/vendor/brotli/dec/state.c +13 -19
- data/vendor/brotli/dec/state.h +25 -39
- data/vendor/brotli/dec/transform.h +38 -44
- data/vendor/brotli/dec/types.h +2 -2
- data/vendor/brotli/enc/Makefile +1 -1
- data/vendor/brotli/enc/backward_references.cc +455 -359
- data/vendor/brotli/enc/backward_references.h +79 -3
- data/vendor/brotli/enc/bit_cost.h +54 -32
- data/vendor/brotli/enc/block_splitter.cc +285 -193
- data/vendor/brotli/enc/block_splitter.h +4 -12
- data/vendor/brotli/enc/brotli_bit_stream.cc +623 -324
- data/vendor/brotli/enc/brotli_bit_stream.h +76 -37
- data/vendor/brotli/enc/cluster.h +161 -120
- data/vendor/brotli/enc/command.h +60 -37
- data/vendor/brotli/enc/compress_fragment.cc +701 -0
- data/vendor/brotli/enc/compress_fragment.h +47 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.cc +524 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.h +40 -0
- data/vendor/brotli/enc/compressor.h +15 -0
- data/vendor/brotli/enc/context.h +1 -1
- data/vendor/brotli/enc/dictionary.h +2 -2
- data/vendor/brotli/enc/encode.cc +819 -286
- data/vendor/brotli/enc/encode.h +38 -15
- data/vendor/brotli/enc/encode_parallel.cc +40 -42
- data/vendor/brotli/enc/entropy_encode.cc +144 -147
- data/vendor/brotli/enc/entropy_encode.h +32 -8
- data/vendor/brotli/enc/entropy_encode_static.h +572 -0
- data/vendor/brotli/enc/fast_log.h +7 -40
- data/vendor/brotli/enc/find_match_length.h +9 -9
- data/vendor/brotli/enc/hash.h +462 -154
- data/vendor/brotli/enc/histogram.cc +6 -6
- data/vendor/brotli/enc/histogram.h +13 -13
- data/vendor/brotli/enc/literal_cost.cc +45 -45
- data/vendor/brotli/enc/metablock.cc +92 -89
- data/vendor/brotli/enc/metablock.h +12 -12
- data/vendor/brotli/enc/port.h +7 -16
- data/vendor/brotli/enc/prefix.h +23 -22
- data/vendor/brotli/enc/ringbuffer.h +75 -29
- data/vendor/brotli/enc/static_dict.cc +56 -48
- data/vendor/brotli/enc/static_dict.h +5 -5
- data/vendor/brotli/enc/streams.cc +1 -1
- data/vendor/brotli/enc/streams.h +5 -5
- data/vendor/brotli/enc/transform.h +40 -35
- data/vendor/brotli/enc/types.h +2 -0
- data/vendor/brotli/enc/utf8_util.cc +3 -2
- data/vendor/brotli/enc/write_bits.h +6 -6
- metadata +9 -5
- data/vendor/brotli/dec/streams.c +0 -102
- data/vendor/brotli/dec/streams.h +0 -95
@@ -9,6 +9,7 @@
|
|
9
9
|
#ifndef BROTLI_ENC_RINGBUFFER_H_
|
10
10
|
#define BROTLI_ENC_RINGBUFFER_H_
|
11
11
|
|
12
|
+
#include <cstdlib> /* free, realloc */
|
12
13
|
|
13
14
|
#include "./port.h"
|
14
15
|
#include "./types.h"
|
@@ -16,30 +17,66 @@
|
|
16
17
|
namespace brotli {
|
17
18
|
|
18
19
|
// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
|
19
|
-
// data in a circular manner: writing a byte writes it to
|
20
|
-
//
|
21
|
-
// contains another copy of the
|
22
|
-
//
|
20
|
+
// data in a circular manner: writing a byte writes it to:
|
21
|
+
// `position() % (1 << window_bits)'.
|
22
|
+
// For convenience, the RingBuffer array contains another copy of the
|
23
|
+
// first `1 << tail_bits' bytes:
|
24
|
+
// buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
|
25
|
+
// and another copy of the last two bytes:
|
26
|
+
// buffer_[-1] == buffer_[(1 << window_bits) - 1] and
|
27
|
+
// buffer_[-2] == buffer_[(1 << window_bits) - 2].
|
23
28
|
class RingBuffer {
|
24
29
|
public:
|
25
30
|
RingBuffer(int window_bits, int tail_bits)
|
26
|
-
: size_(
|
27
|
-
mask_((
|
28
|
-
tail_size_(
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
31
|
+
: size_(1u << window_bits),
|
32
|
+
mask_((1u << window_bits) - 1),
|
33
|
+
tail_size_(1u << tail_bits),
|
34
|
+
total_size_(size_ + tail_size_),
|
35
|
+
cur_size_(0),
|
36
|
+
pos_(0),
|
37
|
+
data_(0),
|
38
|
+
buffer_(0) {}
|
39
|
+
|
40
|
+
~RingBuffer(void) {
|
41
|
+
free(data_);
|
36
42
|
}
|
37
|
-
|
38
|
-
|
43
|
+
|
44
|
+
// Allocates or re-allocates data_ to the given length + plus some slack
|
45
|
+
// region before and after. Fills the slack regions with zeros.
|
46
|
+
inline void InitBuffer(const uint32_t buflen) {
|
47
|
+
static const size_t kSlackForEightByteHashingEverywhere = 7;
|
48
|
+
cur_size_ = buflen;
|
49
|
+
data_ = static_cast<uint8_t*>(realloc(
|
50
|
+
data_, 2 + buflen + kSlackForEightByteHashingEverywhere));
|
51
|
+
buffer_ = data_ + 2;
|
52
|
+
buffer_[-2] = buffer_[-1] = 0;
|
53
|
+
for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
|
54
|
+
buffer_[cur_size_ + i] = 0;
|
55
|
+
}
|
39
56
|
}
|
40
57
|
|
41
58
|
// Push bytes into the ring buffer.
|
42
59
|
void Write(const uint8_t *bytes, size_t n) {
|
60
|
+
if (pos_ == 0 && n < tail_size_) {
|
61
|
+
// Special case for the first write: to process the first block, we don't
|
62
|
+
// need to allocate the whole ringbuffer and we don't need the tail
|
63
|
+
// either. However, we do this memory usage optimization only if the
|
64
|
+
// first write is less than the tail size, which is also the input block
|
65
|
+
// size, otherwise it is likely that other blocks will follow and we
|
66
|
+
// will need to reallocate to the full size anyway.
|
67
|
+
pos_ = static_cast<uint32_t>(n);
|
68
|
+
InitBuffer(pos_);
|
69
|
+
memcpy(buffer_, bytes, n);
|
70
|
+
return;
|
71
|
+
}
|
72
|
+
if (cur_size_ < total_size_) {
|
73
|
+
// Lazily allocate the full buffer.
|
74
|
+
InitBuffer(total_size_);
|
75
|
+
// Initialize the last two bytes to zero, so that we don't have to worry
|
76
|
+
// later when we copy the last two bytes to the first two positions.
|
77
|
+
buffer_[size_ - 2] = 0;
|
78
|
+
buffer_[size_ - 1] = 0;
|
79
|
+
}
|
43
80
|
const size_t masked_pos = pos_ & mask_;
|
44
81
|
// The length of the writes is limited so that we do not need to worry
|
45
82
|
// about a write
|
@@ -51,26 +88,31 @@ class RingBuffer {
|
|
51
88
|
// Split into two writes.
|
52
89
|
// Copy into the end of the buffer, including the tail buffer.
|
53
90
|
memcpy(&buffer_[masked_pos], bytes,
|
54
|
-
std::min(n,
|
91
|
+
std::min(n, total_size_ - masked_pos));
|
55
92
|
// Copy into the beginning of the buffer
|
56
93
|
memcpy(&buffer_[0], bytes + (size_ - masked_pos),
|
57
94
|
n - (size_ - masked_pos));
|
58
95
|
}
|
59
|
-
|
96
|
+
buffer_[-2] = buffer_[size_ - 2];
|
97
|
+
buffer_[-1] = buffer_[size_ - 1];
|
98
|
+
pos_ += static_cast<uint32_t>(n);
|
99
|
+
if (pos_ > (1u << 30)) { /* Wrap, but preserve not-a-first-lap feature. */
|
100
|
+
pos_ = (pos_ & ((1u << 30) - 1)) | (1u << 30);
|
101
|
+
}
|
60
102
|
}
|
61
103
|
|
62
|
-
void Reset() {
|
104
|
+
void Reset(void) {
|
63
105
|
pos_ = 0;
|
64
106
|
}
|
65
107
|
|
66
108
|
// Logical cursor position in the ring buffer.
|
67
|
-
|
109
|
+
uint32_t position(void) const { return pos_; }
|
68
110
|
|
69
111
|
// Bit mask for getting the physical position for a logical position.
|
70
|
-
|
112
|
+
uint32_t mask(void) const { return mask_; }
|
71
113
|
|
72
|
-
uint8_t *start() { return &buffer_[0]; }
|
73
|
-
const uint8_t *start() const { return &buffer_[0]; }
|
114
|
+
uint8_t *start(void) { return &buffer_[0]; }
|
115
|
+
const uint8_t *start(void) const { return &buffer_[0]; }
|
74
116
|
|
75
117
|
private:
|
76
118
|
void WriteTail(const uint8_t *bytes, size_t n) {
|
@@ -83,14 +125,18 @@ class RingBuffer {
|
|
83
125
|
}
|
84
126
|
|
85
127
|
// Size of the ringbuffer is (1 << window_bits) + tail_size_.
|
86
|
-
const
|
87
|
-
const
|
88
|
-
const
|
128
|
+
const uint32_t size_;
|
129
|
+
const uint32_t mask_;
|
130
|
+
const uint32_t tail_size_;
|
131
|
+
const uint32_t total_size_;
|
89
132
|
|
133
|
+
uint32_t cur_size_;
|
90
134
|
// Position to write in the ring buffer.
|
91
|
-
|
92
|
-
// The actual ring buffer containing the
|
93
|
-
// as a tail.
|
135
|
+
uint32_t pos_;
|
136
|
+
// The actual ring buffer containing the copy of the last two bytes, the data,
|
137
|
+
// and the copy of the beginning as a tail.
|
138
|
+
uint8_t *data_;
|
139
|
+
// The start of the ringbuffer.
|
94
140
|
uint8_t *buffer_;
|
95
141
|
};
|
96
142
|
|
@@ -22,19 +22,24 @@ inline uint32_t Hash(const uint8_t *data) {
|
|
22
22
|
return h >> (32 - kDictNumBits);
|
23
23
|
}
|
24
24
|
|
25
|
-
inline void AddMatch(
|
26
|
-
|
25
|
+
inline void AddMatch(size_t distance, size_t len, size_t len_code,
|
26
|
+
uint32_t* matches) {
|
27
|
+
uint32_t match = static_cast<uint32_t>((distance << 5) + len_code);
|
28
|
+
matches[len] = std::min(matches[len], match);
|
27
29
|
}
|
28
30
|
|
29
|
-
inline
|
30
|
-
|
31
|
+
inline size_t DictMatchLength(const uint8_t* data,
|
32
|
+
size_t id,
|
33
|
+
size_t len,
|
34
|
+
size_t maxlen) {
|
35
|
+
const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
|
31
36
|
return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
|
32
37
|
std::min(len, maxlen));
|
33
38
|
}
|
34
39
|
|
35
|
-
inline bool IsMatch(DictWord w, const uint8_t* data,
|
40
|
+
inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) {
|
36
41
|
if (w.len > max_length) return false;
|
37
|
-
const
|
42
|
+
const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
|
38
43
|
const uint8_t* dict = &kBrotliDictionary[offset];
|
39
44
|
if (w.transform == 0) {
|
40
45
|
// Match against base dictionary word.
|
@@ -44,12 +49,12 @@ inline bool IsMatch(DictWord w, const uint8_t* data, int max_length) {
|
|
44
49
|
// Note that there are only ASCII uppercase words in the lookup table.
|
45
50
|
return (dict[0] >= 'a' && dict[0] <= 'z' &&
|
46
51
|
(dict[0] ^ 32) == data[0] &&
|
47
|
-
FindMatchLengthWithLimit(&dict[1], &data[1], w.len -
|
48
|
-
w.len -
|
52
|
+
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
|
53
|
+
w.len - 1u);
|
49
54
|
} else {
|
50
55
|
// Match against uppercase all transform.
|
51
56
|
// Note that there are only ASCII uppercase words in the lookup table.
|
52
|
-
for (
|
57
|
+
for (size_t i = 0; i < w.len; ++i) {
|
53
58
|
if (dict[i] >= 'a' && dict[i] <= 'z') {
|
54
59
|
if ((dict[i] ^ 32) != data[i]) return false;
|
55
60
|
} else {
|
@@ -61,22 +66,22 @@ inline bool IsMatch(DictWord w, const uint8_t* data, int max_length) {
|
|
61
66
|
}
|
62
67
|
|
63
68
|
bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
64
|
-
|
65
|
-
|
66
|
-
|
69
|
+
size_t min_length,
|
70
|
+
size_t max_length,
|
71
|
+
uint32_t* matches) {
|
67
72
|
bool found_match = false;
|
68
|
-
|
69
|
-
|
73
|
+
size_t key = Hash(data);
|
74
|
+
size_t bucket = kStaticDictionaryBuckets[key];
|
70
75
|
if (bucket != 0) {
|
71
|
-
|
72
|
-
|
73
|
-
for (
|
76
|
+
size_t num = bucket & 0xff;
|
77
|
+
size_t offset = bucket >> 8;
|
78
|
+
for (size_t i = 0; i < num; ++i) {
|
74
79
|
const DictWord w = kStaticDictionaryWords[offset + i];
|
75
|
-
const
|
76
|
-
const
|
77
|
-
const
|
80
|
+
const size_t l = w.len;
|
81
|
+
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
82
|
+
const size_t id = w.idx;
|
78
83
|
if (w.transform == 0) {
|
79
|
-
const
|
84
|
+
const size_t matchlen = DictMatchLength(data, id, l, max_length);
|
80
85
|
// Transform "" + kIdentity + ""
|
81
86
|
if (matchlen == l) {
|
82
87
|
AddMatch(id, l, l, matches);
|
@@ -93,9 +98,10 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
93
98
|
found_match = true;
|
94
99
|
}
|
95
100
|
// Transform "" + kOmitLastN + "" (N = 2 .. 9)
|
96
|
-
|
97
|
-
|
98
|
-
|
101
|
+
size_t minlen = min_length;
|
102
|
+
if (l > 9) minlen = std::max(minlen, l - 9);
|
103
|
+
size_t maxlen = std::min(matchlen, l - 2);
|
104
|
+
for (size_t len = minlen; len <= maxlen; ++len) {
|
99
105
|
AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
|
100
106
|
found_match = true;
|
101
107
|
}
|
@@ -250,8 +256,9 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
250
256
|
}
|
251
257
|
}
|
252
258
|
} else {
|
253
|
-
// Set t=
|
254
|
-
|
259
|
+
// Set t=false for kUppercaseFirst and
|
260
|
+
// t=true otherwise (kUppercaseAll) transform.
|
261
|
+
const bool t = w.transform != kUppercaseFirst;
|
255
262
|
if (!IsMatch(w, data, max_length)) {
|
256
263
|
continue;
|
257
264
|
}
|
@@ -299,13 +306,13 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
299
306
|
bool is_space = (data[0] == ' ');
|
300
307
|
key = Hash(&data[1]);
|
301
308
|
bucket = kStaticDictionaryBuckets[key];
|
302
|
-
|
303
|
-
|
304
|
-
for (
|
309
|
+
size_t num = bucket & 0xff;
|
310
|
+
size_t offset = bucket >> 8;
|
311
|
+
for (size_t i = 0; i < num; ++i) {
|
305
312
|
const DictWord w = kStaticDictionaryWords[offset + i];
|
306
|
-
const
|
307
|
-
const
|
308
|
-
const
|
313
|
+
const size_t l = w.len;
|
314
|
+
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
315
|
+
const size_t id = w.idx;
|
309
316
|
if (w.transform == 0) {
|
310
317
|
if (!IsMatch(w, &data[1], max_length - 1)) {
|
311
318
|
continue;
|
@@ -342,8 +349,9 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
342
349
|
}
|
343
350
|
}
|
344
351
|
} else if (is_space) {
|
345
|
-
// Set t=
|
346
|
-
|
352
|
+
// Set t=false for kUppercaseFirst and
|
353
|
+
// t=true otherwise (kUppercaseAll) transform.
|
354
|
+
const bool t = w.transform != kUppercaseFirst;
|
347
355
|
if (!IsMatch(w, &data[1], max_length - 1)) {
|
348
356
|
continue;
|
349
357
|
}
|
@@ -358,7 +366,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
358
366
|
if (s[0] == ' ') {
|
359
367
|
AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
|
360
368
|
} else if (s[0] == ',') {
|
361
|
-
if (t
|
369
|
+
if (!t) {
|
362
370
|
AddMatch(id + 109 * n, l + 2, l, matches);
|
363
371
|
}
|
364
372
|
if (s[1] == ' ') {
|
@@ -386,19 +394,19 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
386
394
|
(data[0] == 0xc2 && data[1] == 0xa0)) {
|
387
395
|
key = Hash(&data[2]);
|
388
396
|
bucket = kStaticDictionaryBuckets[key];
|
389
|
-
|
390
|
-
|
391
|
-
for (
|
397
|
+
size_t num = bucket & 0xff;
|
398
|
+
size_t offset = bucket >> 8;
|
399
|
+
for (size_t i = 0; i < num; ++i) {
|
392
400
|
const DictWord w = kStaticDictionaryWords[offset + i];
|
393
|
-
const
|
394
|
-
const
|
395
|
-
const
|
401
|
+
const size_t l = w.len;
|
402
|
+
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
403
|
+
const size_t id = w.idx;
|
396
404
|
if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
|
397
405
|
if (data[0] == 0xc2) {
|
398
406
|
AddMatch(id + 102 * n, l + 2, l, matches);
|
399
407
|
found_match = true;
|
400
408
|
} else if (l + 2 < max_length && data[l + 2] == ' ') {
|
401
|
-
|
409
|
+
size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
|
402
410
|
AddMatch(id + t * n, l + 3, l, matches);
|
403
411
|
found_match = true;
|
404
412
|
}
|
@@ -414,13 +422,13 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
414
422
|
data[3] == 'm' && data[4] == '/')) {
|
415
423
|
key = Hash(&data[5]);
|
416
424
|
bucket = kStaticDictionaryBuckets[key];
|
417
|
-
|
418
|
-
|
419
|
-
for (
|
425
|
+
size_t num = bucket & 0xff;
|
426
|
+
size_t offset = bucket >> 8;
|
427
|
+
for (size_t i = 0; i < num; ++i) {
|
420
428
|
const DictWord w = kStaticDictionaryWords[offset + i];
|
421
|
-
const
|
422
|
-
const
|
423
|
-
const
|
429
|
+
const size_t l = w.len;
|
430
|
+
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
431
|
+
const size_t id = w.idx;
|
424
432
|
if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
|
425
433
|
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
426
434
|
found_match = true;
|
@@ -13,8 +13,8 @@
|
|
13
13
|
|
14
14
|
namespace brotli {
|
15
15
|
|
16
|
-
static const
|
17
|
-
static const
|
16
|
+
static const size_t kMaxDictionaryMatchLen = 37;
|
17
|
+
static const uint32_t kInvalidMatch = 0xfffffff;
|
18
18
|
|
19
19
|
// Matches data against static dictionary words, and for each length l,
|
20
20
|
// for which a match is found, updates matches[l] to be the minimum possible
|
@@ -23,9 +23,9 @@ static const int kInvalidMatch = 0xfffffff;
|
|
23
23
|
// matches array is at least kMaxDictionaryMatchLen + 1 long
|
24
24
|
// all elements are initialized to kInvalidMatch
|
25
25
|
bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
size_t min_length,
|
27
|
+
size_t max_length,
|
28
|
+
uint32_t* matches);
|
29
29
|
|
30
30
|
} // namespace brotli
|
31
31
|
|
data/vendor/brotli/enc/streams.h
CHANGED
@@ -19,7 +19,7 @@ namespace brotli {
|
|
19
19
|
// Input interface for the compression routines.
|
20
20
|
class BrotliIn {
|
21
21
|
public:
|
22
|
-
virtual ~BrotliIn() {}
|
22
|
+
virtual ~BrotliIn(void) {}
|
23
23
|
|
24
24
|
// Return a pointer to the next block of input of at most n bytes.
|
25
25
|
// Return the actual length in *nread.
|
@@ -32,7 +32,7 @@ class BrotliIn {
|
|
32
32
|
// Output interface for the compression routines.
|
33
33
|
class BrotliOut {
|
34
34
|
public:
|
35
|
-
virtual ~BrotliOut() {}
|
35
|
+
virtual ~BrotliOut(void) {}
|
36
36
|
|
37
37
|
// Write n bytes of data from buf.
|
38
38
|
// Return true if all written, false otherwise.
|
@@ -47,7 +47,7 @@ class BrotliMemIn : public BrotliIn {
|
|
47
47
|
void Reset(const void* buf, size_t len);
|
48
48
|
|
49
49
|
// returns the amount of data consumed
|
50
|
-
size_t position() const { return pos_; }
|
50
|
+
size_t position(void) const { return pos_; }
|
51
51
|
|
52
52
|
const void* Read(size_t n, size_t* OUTPUT);
|
53
53
|
|
@@ -65,7 +65,7 @@ class BrotliMemOut : public BrotliOut {
|
|
65
65
|
void Reset(void* buf, size_t len);
|
66
66
|
|
67
67
|
// returns the amount of data written
|
68
|
-
size_t position() const { return pos_; }
|
68
|
+
size_t position(void) const { return pos_; }
|
69
69
|
|
70
70
|
bool Write(const void* buf, size_t n);
|
71
71
|
|
@@ -96,7 +96,7 @@ class BrotliStringOut : public BrotliOut {
|
|
96
96
|
class BrotliFileIn : public BrotliIn {
|
97
97
|
public:
|
98
98
|
BrotliFileIn(FILE* f, size_t max_read_size);
|
99
|
-
~BrotliFileIn();
|
99
|
+
~BrotliFileIn(void);
|
100
100
|
|
101
101
|
const void* Read(size_t n, size_t* bytes_read);
|
102
102
|
|
@@ -172,15 +172,11 @@ static const Transform kTransforms[] = {
|
|
172
172
|
static const size_t kNumTransforms =
|
173
173
|
sizeof(kTransforms) / sizeof(kTransforms[0]);
|
174
174
|
|
175
|
-
static const
|
176
|
-
0, 3, 11, 26, 34, 39, 40, 55, 0, 54
|
177
|
-
};
|
178
|
-
|
179
|
-
static const int kOmitLastNTransforms[10] = {
|
175
|
+
static const size_t kOmitLastNTransforms[10] = {
|
180
176
|
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
|
181
177
|
};
|
182
178
|
|
183
|
-
static
|
179
|
+
static size_t ToUpperCase(uint8_t *p, size_t len) {
|
184
180
|
if (len == 1 || p[0] < 0xc0) {
|
185
181
|
if (p[0] >= 'a' && p[0] <= 'z') {
|
186
182
|
p[0] ^= 32;
|
@@ -198,41 +194,50 @@ static int ToUpperCase(uint8_t *p, int len) {
|
|
198
194
|
return 3;
|
199
195
|
}
|
200
196
|
|
201
|
-
inline std::string
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
197
|
+
inline std::string TransformWord(
|
198
|
+
WordTransformType transform_type, const uint8_t* word, size_t len) {
|
199
|
+
if (transform_type <= kOmitLast9) {
|
200
|
+
if (len <= static_cast<size_t>(transform_type)) {
|
201
|
+
return std::string();
|
202
|
+
}
|
203
|
+
return std::string(word, word + len - transform_type);
|
206
204
|
}
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
205
|
+
|
206
|
+
if (transform_type >= kOmitFirst1) {
|
207
|
+
const size_t skip = transform_type - (kOmitFirst1 - 1);
|
208
|
+
if (len <= skip) {
|
209
|
+
return std::string();
|
210
|
+
}
|
211
|
+
return std::string(word + skip, word + len);
|
212
|
+
}
|
213
|
+
|
214
|
+
std::string ret = std::string(word, word + len);
|
215
|
+
uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[0]);
|
216
|
+
if (transform_type == kUppercaseFirst) {
|
217
|
+
ToUpperCase(uppercase, len);
|
218
|
+
} else if (transform_type == kUppercaseAll) {
|
219
|
+
size_t position = 0;
|
220
|
+
while (position < len) {
|
221
|
+
size_t step = ToUpperCase(uppercase, len - position);
|
222
|
+
uppercase += step;
|
223
|
+
position += step;
|
225
224
|
}
|
226
225
|
}
|
227
|
-
ret += std::string(t.suffix);
|
228
226
|
return ret;
|
229
227
|
}
|
230
228
|
|
231
|
-
inline std::string
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
229
|
+
inline std::string ApplyTransform(
|
230
|
+
const Transform& t, const uint8_t* word, size_t len) {
|
231
|
+
return std::string(t.prefix) +
|
232
|
+
TransformWord(t.word_transform, word, len) + std::string(t.suffix);
|
233
|
+
}
|
234
|
+
|
235
|
+
inline std::string GetTransformedDictionaryWord(size_t len_code,
|
236
|
+
size_t word_id) {
|
237
|
+
size_t num_words = 1u << kBrotliDictionarySizeBitsByLength[len_code];
|
238
|
+
size_t offset = kBrotliDictionaryOffsetsByLength[len_code];
|
239
|
+
size_t t = word_id / num_words;
|
240
|
+
size_t word_idx = word_id % num_words;
|
236
241
|
offset += len_code * word_idx;
|
237
242
|
const uint8_t* word = &kBrotliDictionary[offset];
|
238
243
|
return ApplyTransform(kTransforms[t], word, len_code);
|
data/vendor/brotli/enc/types.h
CHANGED
@@ -14,7 +14,7 @@ namespace brotli {
|
|
14
14
|
|
15
15
|
namespace {
|
16
16
|
|
17
|
-
|
17
|
+
size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
|
18
18
|
// ASCII
|
19
19
|
if ((input[0] & 0x80) == 0) {
|
20
20
|
*symbol = input[0];
|
@@ -72,7 +72,8 @@ bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
|
|
72
72
|
size_t i = 0;
|
73
73
|
while (i < length) {
|
74
74
|
int symbol;
|
75
|
-
|
75
|
+
size_t bytes_read = ParseAsUTF8(
|
76
|
+
&symbol, &data[(pos + i) & mask], length - i);
|
76
77
|
i += bytes_read;
|
77
78
|
if (symbol < 0x110000) size_utf8 += bytes_read;
|
78
79
|
}
|
@@ -34,9 +34,9 @@ namespace brotli {
|
|
34
34
|
//
|
35
35
|
// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
|
36
36
|
// and locate the rest in BYTE+1, BYTE+2, etc.
|
37
|
-
inline void WriteBits(
|
37
|
+
inline void WriteBits(size_t n_bits,
|
38
38
|
uint64_t bits,
|
39
|
-
|
39
|
+
size_t * __restrict pos,
|
40
40
|
uint8_t * __restrict array) {
|
41
41
|
#ifdef BIT_WRITER_DEBUG
|
42
42
|
printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
|
@@ -57,11 +57,11 @@ inline void WriteBits(int n_bits,
|
|
57
57
|
#else
|
58
58
|
// implicit & 0xff is assumed for uint8_t arithmetics
|
59
59
|
uint8_t *array_pos = &array[*pos >> 3];
|
60
|
-
const
|
60
|
+
const size_t bits_reserved_in_first_byte = (*pos & 7);
|
61
61
|
bits <<= bits_reserved_in_first_byte;
|
62
62
|
*array_pos++ |= static_cast<uint8_t>(bits);
|
63
|
-
for (
|
64
|
-
bits_left_to_write >=
|
63
|
+
for (size_t bits_left_to_write = n_bits + bits_reserved_in_first_byte;
|
64
|
+
bits_left_to_write >= 9;
|
65
65
|
bits_left_to_write -= 8) {
|
66
66
|
bits >>= 8;
|
67
67
|
*array_pos++ = static_cast<uint8_t>(bits);
|
@@ -71,7 +71,7 @@ inline void WriteBits(int n_bits,
|
|
71
71
|
#endif
|
72
72
|
}
|
73
73
|
|
74
|
-
inline void WriteBitsPrepareStorage(
|
74
|
+
inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
|
75
75
|
#ifdef BIT_WRITER_DEBUG
|
76
76
|
printf("WriteBitsPrepareStorage %10d\n", pos);
|
77
77
|
#endif
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: brotli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- miyucy
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-06-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -104,8 +104,6 @@ files:
|
|
104
104
|
- vendor/brotli/dec/prefix.h
|
105
105
|
- vendor/brotli/dec/state.c
|
106
106
|
- vendor/brotli/dec/state.h
|
107
|
-
- vendor/brotli/dec/streams.c
|
108
|
-
- vendor/brotli/dec/streams.h
|
109
107
|
- vendor/brotli/dec/transform.h
|
110
108
|
- vendor/brotli/dec/types.h
|
111
109
|
- vendor/brotli/enc/Makefile
|
@@ -118,6 +116,11 @@ files:
|
|
118
116
|
- vendor/brotli/enc/brotli_bit_stream.h
|
119
117
|
- vendor/brotli/enc/cluster.h
|
120
118
|
- vendor/brotli/enc/command.h
|
119
|
+
- vendor/brotli/enc/compress_fragment.cc
|
120
|
+
- vendor/brotli/enc/compress_fragment.h
|
121
|
+
- vendor/brotli/enc/compress_fragment_two_pass.cc
|
122
|
+
- vendor/brotli/enc/compress_fragment_two_pass.h
|
123
|
+
- vendor/brotli/enc/compressor.h
|
121
124
|
- vendor/brotli/enc/context.h
|
122
125
|
- vendor/brotli/enc/dictionary.cc
|
123
126
|
- vendor/brotli/enc/dictionary.h
|
@@ -128,6 +131,7 @@ files:
|
|
128
131
|
- vendor/brotli/enc/encode_parallel.h
|
129
132
|
- vendor/brotli/enc/entropy_encode.cc
|
130
133
|
- vendor/brotli/enc/entropy_encode.h
|
134
|
+
- vendor/brotli/enc/entropy_encode_static.h
|
131
135
|
- vendor/brotli/enc/fast_log.h
|
132
136
|
- vendor/brotli/enc/find_match_length.h
|
133
137
|
- vendor/brotli/enc/hash.h
|
@@ -169,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
169
173
|
version: '0'
|
170
174
|
requirements: []
|
171
175
|
rubyforge_project:
|
172
|
-
rubygems_version: 2.4
|
176
|
+
rubygems_version: 2.6.4
|
173
177
|
signing_key:
|
174
178
|
specification_version: 4
|
175
179
|
summary: Brotli compressor/decompressor
|