isomorfeus-ferret 0.12.4 → 0.12.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +612 -612
- data/README.md +77 -48
- data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
- data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
- data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
- data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
- data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -15
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
- data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
- data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
- data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
- data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
- data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
- data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
- data/ext/isomorfeus_ferret_ext/test.c +7 -1
- data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
- data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
- data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +125 -5
- data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
@@ -0,0 +1,167 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Sliding window over the input data. */
|
8
|
+
|
9
|
+
#ifndef BROTLI_ENC_RINGBUFFER_H_
|
10
|
+
#define BROTLI_ENC_RINGBUFFER_H_
|
11
|
+
|
12
|
+
#include <string.h> /* memcpy */
|
13
|
+
|
14
|
+
#include "brotli_common_platform.h"
|
15
|
+
#include "brotli_types.h"
|
16
|
+
#include "brotli_enc_memory.h"
|
17
|
+
#include "brotli_enc_quality.h"
|
18
|
+
|
19
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
20
|
+
extern "C" {
|
21
|
+
#endif
|
22
|
+
|
23
|
+
/* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
|
24
|
+
data in a circular manner: writing a byte writes it to:
|
25
|
+
`position() % (1 << window_bits)'.
|
26
|
+
For convenience, the RingBuffer array contains another copy of the
|
27
|
+
first `1 << tail_bits' bytes:
|
28
|
+
buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
|
29
|
+
and another copy of the last two bytes:
|
30
|
+
buffer_[-1] == buffer_[(1 << window_bits) - 1] and
|
31
|
+
buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
|
32
|
+
typedef struct RingBuffer {
|
33
|
+
/* Size of the ring-buffer is (1 << window_bits) + tail_size_. */
|
34
|
+
const uint32_t size_;
|
35
|
+
const uint32_t mask_;
|
36
|
+
const uint32_t tail_size_;
|
37
|
+
const uint32_t total_size_;
|
38
|
+
|
39
|
+
uint32_t cur_size_;
|
40
|
+
/* Position to write in the ring buffer. */
|
41
|
+
uint32_t pos_;
|
42
|
+
/* The actual ring buffer containing the copy of the last two bytes, the data,
|
43
|
+
and the copy of the beginning as a tail. */
|
44
|
+
uint8_t* data_;
|
45
|
+
/* The start of the ring-buffer. */
|
46
|
+
uint8_t* buffer_;
|
47
|
+
} RingBuffer;
|
48
|
+
|
49
|
+
static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
|
50
|
+
rb->cur_size_ = 0;
|
51
|
+
rb->pos_ = 0;
|
52
|
+
rb->data_ = 0;
|
53
|
+
rb->buffer_ = 0;
|
54
|
+
}
|
55
|
+
|
56
|
+
static BROTLI_INLINE void RingBufferSetup(
|
57
|
+
const BrotliEncoderParams* params, RingBuffer* rb) {
|
58
|
+
int window_bits = ComputeRbBits(params);
|
59
|
+
int tail_bits = params->lgblock;
|
60
|
+
*(uint32_t*)&rb->size_ = 1u << window_bits;
|
61
|
+
*(uint32_t*)&rb->mask_ = (1u << window_bits) - 1;
|
62
|
+
*(uint32_t*)&rb->tail_size_ = 1u << tail_bits;
|
63
|
+
*(uint32_t*)&rb->total_size_ = rb->size_ + rb->tail_size_;
|
64
|
+
}
|
65
|
+
|
66
|
+
static BROTLI_INLINE void RingBufferFree(MemoryManager* m, RingBuffer* rb) {
|
67
|
+
BROTLI_FREE(m, rb->data_);
|
68
|
+
}
|
69
|
+
|
70
|
+
/* Allocates or re-allocates data_ to the given length + plus some slack
|
71
|
+
region before and after. Fills the slack regions with zeros. */
|
72
|
+
static BROTLI_INLINE void RingBufferInitBuffer(
|
73
|
+
MemoryManager* m, const uint32_t buflen, RingBuffer* rb) {
|
74
|
+
static const size_t kSlackForEightByteHashingEverywhere = 7;
|
75
|
+
uint8_t* new_data = BROTLI_ALLOC(
|
76
|
+
m, uint8_t, 2 + buflen + kSlackForEightByteHashingEverywhere);
|
77
|
+
size_t i;
|
78
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_data)) return;
|
79
|
+
if (rb->data_) {
|
80
|
+
memcpy(new_data, rb->data_,
|
81
|
+
2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere);
|
82
|
+
BROTLI_FREE(m, rb->data_);
|
83
|
+
}
|
84
|
+
rb->data_ = new_data;
|
85
|
+
rb->cur_size_ = buflen;
|
86
|
+
rb->buffer_ = rb->data_ + 2;
|
87
|
+
rb->buffer_[-2] = rb->buffer_[-1] = 0;
|
88
|
+
for (i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
|
89
|
+
rb->buffer_[rb->cur_size_ + i] = 0;
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
static BROTLI_INLINE void RingBufferWriteTail(
|
94
|
+
const uint8_t* bytes, size_t n, RingBuffer* rb) {
|
95
|
+
const size_t masked_pos = rb->pos_ & rb->mask_;
|
96
|
+
if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) {
|
97
|
+
/* Just fill the tail buffer with the beginning data. */
|
98
|
+
const size_t p = rb->size_ + masked_pos;
|
99
|
+
memcpy(&rb->buffer_[p], bytes,
|
100
|
+
BROTLI_MIN(size_t, n, rb->tail_size_ - masked_pos));
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
/* Push bytes into the ring buffer. */
|
105
|
+
static BROTLI_INLINE void RingBufferWrite(
|
106
|
+
MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
|
107
|
+
if (rb->pos_ == 0 && n < rb->tail_size_) {
|
108
|
+
/* Special case for the first write: to process the first block, we don't
|
109
|
+
need to allocate the whole ring-buffer and we don't need the tail
|
110
|
+
either. However, we do this memory usage optimization only if the
|
111
|
+
first write is less than the tail size, which is also the input block
|
112
|
+
size, otherwise it is likely that other blocks will follow and we
|
113
|
+
will need to reallocate to the full size anyway. */
|
114
|
+
rb->pos_ = (uint32_t)n;
|
115
|
+
RingBufferInitBuffer(m, rb->pos_, rb);
|
116
|
+
if (BROTLI_IS_OOM(m)) return;
|
117
|
+
memcpy(rb->buffer_, bytes, n);
|
118
|
+
return;
|
119
|
+
}
|
120
|
+
if (rb->cur_size_ < rb->total_size_) {
|
121
|
+
/* Lazily allocate the full buffer. */
|
122
|
+
RingBufferInitBuffer(m, rb->total_size_, rb);
|
123
|
+
if (BROTLI_IS_OOM(m)) return;
|
124
|
+
/* Initialize the last two bytes to zero, so that we don't have to worry
|
125
|
+
later when we copy the last two bytes to the first two positions. */
|
126
|
+
rb->buffer_[rb->size_ - 2] = 0;
|
127
|
+
rb->buffer_[rb->size_ - 1] = 0;
|
128
|
+
/* Initialize tail; might be touched by "best_len++" optimization when
|
129
|
+
ring buffer is "full". */
|
130
|
+
rb->buffer_[rb->size_] = 241;
|
131
|
+
}
|
132
|
+
{
|
133
|
+
const size_t masked_pos = rb->pos_ & rb->mask_;
|
134
|
+
/* The length of the writes is limited so that we do not need to worry
|
135
|
+
about a write */
|
136
|
+
RingBufferWriteTail(bytes, n, rb);
|
137
|
+
if (BROTLI_PREDICT_TRUE(masked_pos + n <= rb->size_)) {
|
138
|
+
/* A single write fits. */
|
139
|
+
memcpy(&rb->buffer_[masked_pos], bytes, n);
|
140
|
+
} else {
|
141
|
+
/* Split into two writes.
|
142
|
+
Copy into the end of the buffer, including the tail buffer. */
|
143
|
+
memcpy(&rb->buffer_[masked_pos], bytes,
|
144
|
+
BROTLI_MIN(size_t, n, rb->total_size_ - masked_pos));
|
145
|
+
/* Copy into the beginning of the buffer */
|
146
|
+
memcpy(&rb->buffer_[0], bytes + (rb->size_ - masked_pos),
|
147
|
+
n - (rb->size_ - masked_pos));
|
148
|
+
}
|
149
|
+
}
|
150
|
+
{
|
151
|
+
BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
|
152
|
+
uint32_t rb_pos_mask = (1u << 31) - 1;
|
153
|
+
rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
|
154
|
+
rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
|
155
|
+
rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask);
|
156
|
+
if (not_first_lap) {
|
157
|
+
/* Wrap, but preserve not-a-first-lap feature. */
|
158
|
+
rb->pos_ |= 1u << 31;
|
159
|
+
}
|
160
|
+
}
|
161
|
+
}
|
162
|
+
|
163
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
164
|
+
} /* extern "C" */
|
165
|
+
#endif
|
166
|
+
|
167
|
+
#endif /* BROTLI_ENC_RINGBUFFER_H_ */
|
@@ -0,0 +1,486 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
#include "brotli_enc_static_dict.h"
|
8
|
+
|
9
|
+
#include "brotli_common_dictionary.h"
|
10
|
+
#include "brotli_common_platform.h"
|
11
|
+
#include "brotli_common_transform.h"
|
12
|
+
#include "brotli_enc_encoder_dict.h"
|
13
|
+
#include "brotli_enc_find_match_length.h"
|
14
|
+
|
15
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
16
|
+
extern "C" {
|
17
|
+
#endif
|
18
|
+
|
19
|
+
static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
|
20
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
|
21
|
+
/* The higher bits contain more mixture from the multiplication,
|
22
|
+
so we take our results from there. */
|
23
|
+
return h >> (32 - kDictNumBits);
|
24
|
+
}
|
25
|
+
|
26
|
+
static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
|
27
|
+
uint32_t* matches) {
|
28
|
+
uint32_t match = (uint32_t)((distance << 5) + len_code);
|
29
|
+
matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
|
30
|
+
}
|
31
|
+
|
32
|
+
static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
|
33
|
+
const uint8_t* data,
|
34
|
+
size_t id,
|
35
|
+
size_t len,
|
36
|
+
size_t maxlen) {
|
37
|
+
const size_t offset = dictionary->offsets_by_length[len] + len * id;
|
38
|
+
return FindMatchLengthWithLimit(&dictionary->data[offset], data,
|
39
|
+
BROTLI_MIN(size_t, len, maxlen));
|
40
|
+
}
|
41
|
+
|
42
|
+
static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
|
43
|
+
DictWord w, const uint8_t* data, size_t max_length) {
|
44
|
+
if (w.len > max_length) {
|
45
|
+
return BROTLI_FALSE;
|
46
|
+
} else {
|
47
|
+
const size_t offset = dictionary->offsets_by_length[w.len] +
|
48
|
+
(size_t)w.len * (size_t)w.idx;
|
49
|
+
const uint8_t* dict = &dictionary->data[offset];
|
50
|
+
if (w.transform == 0) {
|
51
|
+
/* Match against base dictionary word. */
|
52
|
+
return
|
53
|
+
TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
|
54
|
+
} else if (w.transform == 10) {
|
55
|
+
/* Match against uppercase first transform.
|
56
|
+
Note that there are only ASCII uppercase words in the lookup table. */
|
57
|
+
return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
|
58
|
+
(dict[0] ^ 32) == data[0] &&
|
59
|
+
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
|
60
|
+
w.len - 1u);
|
61
|
+
} else {
|
62
|
+
/* Match against uppercase all transform.
|
63
|
+
Note that there are only ASCII uppercase words in the lookup table. */
|
64
|
+
size_t i;
|
65
|
+
for (i = 0; i < w.len; ++i) {
|
66
|
+
if (dict[i] >= 'a' && dict[i] <= 'z') {
|
67
|
+
if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
|
68
|
+
} else {
|
69
|
+
if (dict[i] != data[i]) return BROTLI_FALSE;
|
70
|
+
}
|
71
|
+
}
|
72
|
+
return BROTLI_TRUE;
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
78
|
+
const BrotliEncoderDictionary* dictionary, const uint8_t* data,
|
79
|
+
size_t min_length, size_t max_length, uint32_t* matches) {
|
80
|
+
BROTLI_BOOL has_found_match = BROTLI_FALSE;
|
81
|
+
{
|
82
|
+
size_t offset = dictionary->buckets[Hash(data)];
|
83
|
+
BROTLI_BOOL end = !offset;
|
84
|
+
while (!end) {
|
85
|
+
DictWord w = dictionary->dict_words[offset++];
|
86
|
+
const size_t l = w.len & 0x1F;
|
87
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
88
|
+
const size_t id = w.idx;
|
89
|
+
end = !!(w.len & 0x80);
|
90
|
+
w.len = (uint8_t)l;
|
91
|
+
if (w.transform == 0) {
|
92
|
+
const size_t matchlen =
|
93
|
+
DictMatchLength(dictionary->words, data, id, l, max_length);
|
94
|
+
const uint8_t* s;
|
95
|
+
size_t minlen;
|
96
|
+
size_t maxlen;
|
97
|
+
size_t len;
|
98
|
+
/* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
|
99
|
+
if (matchlen == l) {
|
100
|
+
AddMatch(id, l, l, matches);
|
101
|
+
has_found_match = BROTLI_TRUE;
|
102
|
+
}
|
103
|
+
/* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
|
104
|
+
"" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
|
105
|
+
if (matchlen >= l - 1) {
|
106
|
+
AddMatch(id + 12 * n, l - 1, l, matches);
|
107
|
+
if (l + 2 < max_length &&
|
108
|
+
data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
|
109
|
+
data[l + 2] == ' ') {
|
110
|
+
AddMatch(id + 49 * n, l + 3, l, matches);
|
111
|
+
}
|
112
|
+
has_found_match = BROTLI_TRUE;
|
113
|
+
}
|
114
|
+
/* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
|
115
|
+
minlen = min_length;
|
116
|
+
if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
|
117
|
+
maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
|
118
|
+
for (len = minlen; len <= maxlen; ++len) {
|
119
|
+
size_t cut = l - len;
|
120
|
+
size_t transform_id = (cut << 2) +
|
121
|
+
(size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
|
122
|
+
AddMatch(id + transform_id * n, len, l, matches);
|
123
|
+
has_found_match = BROTLI_TRUE;
|
124
|
+
}
|
125
|
+
if (matchlen < l || l + 6 >= max_length) {
|
126
|
+
continue;
|
127
|
+
}
|
128
|
+
s = &data[l];
|
129
|
+
/* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
|
130
|
+
if (s[0] == ' ') {
|
131
|
+
AddMatch(id + n, l + 1, l, matches);
|
132
|
+
if (s[1] == 'a') {
|
133
|
+
if (s[2] == ' ') {
|
134
|
+
AddMatch(id + 28 * n, l + 3, l, matches);
|
135
|
+
} else if (s[2] == 's') {
|
136
|
+
if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
|
137
|
+
} else if (s[2] == 't') {
|
138
|
+
if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
|
139
|
+
} else if (s[2] == 'n') {
|
140
|
+
if (s[3] == 'd' && s[4] == ' ') {
|
141
|
+
AddMatch(id + 10 * n, l + 5, l, matches);
|
142
|
+
}
|
143
|
+
}
|
144
|
+
} else if (s[1] == 'b') {
|
145
|
+
if (s[2] == 'y' && s[3] == ' ') {
|
146
|
+
AddMatch(id + 38 * n, l + 4, l, matches);
|
147
|
+
}
|
148
|
+
} else if (s[1] == 'i') {
|
149
|
+
if (s[2] == 'n') {
|
150
|
+
if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
|
151
|
+
} else if (s[2] == 's') {
|
152
|
+
if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
|
153
|
+
}
|
154
|
+
} else if (s[1] == 'f') {
|
155
|
+
if (s[2] == 'o') {
|
156
|
+
if (s[3] == 'r' && s[4] == ' ') {
|
157
|
+
AddMatch(id + 25 * n, l + 5, l, matches);
|
158
|
+
}
|
159
|
+
} else if (s[2] == 'r') {
|
160
|
+
if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
|
161
|
+
AddMatch(id + 37 * n, l + 6, l, matches);
|
162
|
+
}
|
163
|
+
}
|
164
|
+
} else if (s[1] == 'o') {
|
165
|
+
if (s[2] == 'f') {
|
166
|
+
if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
|
167
|
+
} else if (s[2] == 'n') {
|
168
|
+
if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
|
169
|
+
}
|
170
|
+
} else if (s[1] == 'n') {
|
171
|
+
if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
|
172
|
+
AddMatch(id + 80 * n, l + 5, l, matches);
|
173
|
+
}
|
174
|
+
} else if (s[1] == 't') {
|
175
|
+
if (s[2] == 'h') {
|
176
|
+
if (s[3] == 'e') {
|
177
|
+
if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
|
178
|
+
} else if (s[3] == 'a') {
|
179
|
+
if (s[4] == 't' && s[5] == ' ') {
|
180
|
+
AddMatch(id + 29 * n, l + 6, l, matches);
|
181
|
+
}
|
182
|
+
}
|
183
|
+
} else if (s[2] == 'o') {
|
184
|
+
if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
|
185
|
+
}
|
186
|
+
} else if (s[1] == 'w') {
|
187
|
+
if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
|
188
|
+
AddMatch(id + 35 * n, l + 6, l, matches);
|
189
|
+
}
|
190
|
+
}
|
191
|
+
} else if (s[0] == '"') {
|
192
|
+
AddMatch(id + 19 * n, l + 1, l, matches);
|
193
|
+
if (s[1] == '>') {
|
194
|
+
AddMatch(id + 21 * n, l + 2, l, matches);
|
195
|
+
}
|
196
|
+
} else if (s[0] == '.') {
|
197
|
+
AddMatch(id + 20 * n, l + 1, l, matches);
|
198
|
+
if (s[1] == ' ') {
|
199
|
+
AddMatch(id + 31 * n, l + 2, l, matches);
|
200
|
+
if (s[2] == 'T' && s[3] == 'h') {
|
201
|
+
if (s[4] == 'e') {
|
202
|
+
if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
|
203
|
+
} else if (s[4] == 'i') {
|
204
|
+
if (s[5] == 's' && s[6] == ' ') {
|
205
|
+
AddMatch(id + 75 * n, l + 7, l, matches);
|
206
|
+
}
|
207
|
+
}
|
208
|
+
}
|
209
|
+
}
|
210
|
+
} else if (s[0] == ',') {
|
211
|
+
AddMatch(id + 76 * n, l + 1, l, matches);
|
212
|
+
if (s[1] == ' ') {
|
213
|
+
AddMatch(id + 14 * n, l + 2, l, matches);
|
214
|
+
}
|
215
|
+
} else if (s[0] == '\n') {
|
216
|
+
AddMatch(id + 22 * n, l + 1, l, matches);
|
217
|
+
if (s[1] == '\t') {
|
218
|
+
AddMatch(id + 50 * n, l + 2, l, matches);
|
219
|
+
}
|
220
|
+
} else if (s[0] == ']') {
|
221
|
+
AddMatch(id + 24 * n, l + 1, l, matches);
|
222
|
+
} else if (s[0] == '\'') {
|
223
|
+
AddMatch(id + 36 * n, l + 1, l, matches);
|
224
|
+
} else if (s[0] == ':') {
|
225
|
+
AddMatch(id + 51 * n, l + 1, l, matches);
|
226
|
+
} else if (s[0] == '(') {
|
227
|
+
AddMatch(id + 57 * n, l + 1, l, matches);
|
228
|
+
} else if (s[0] == '=') {
|
229
|
+
if (s[1] == '"') {
|
230
|
+
AddMatch(id + 70 * n, l + 2, l, matches);
|
231
|
+
} else if (s[1] == '\'') {
|
232
|
+
AddMatch(id + 86 * n, l + 2, l, matches);
|
233
|
+
}
|
234
|
+
} else if (s[0] == 'a') {
|
235
|
+
if (s[1] == 'l' && s[2] == ' ') {
|
236
|
+
AddMatch(id + 84 * n, l + 3, l, matches);
|
237
|
+
}
|
238
|
+
} else if (s[0] == 'e') {
|
239
|
+
if (s[1] == 'd') {
|
240
|
+
if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
|
241
|
+
} else if (s[1] == 'r') {
|
242
|
+
if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
|
243
|
+
} else if (s[1] == 's') {
|
244
|
+
if (s[2] == 't' && s[3] == ' ') {
|
245
|
+
AddMatch(id + 95 * n, l + 4, l, matches);
|
246
|
+
}
|
247
|
+
}
|
248
|
+
} else if (s[0] == 'f') {
|
249
|
+
if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
|
250
|
+
AddMatch(id + 90 * n, l + 4, l, matches);
|
251
|
+
}
|
252
|
+
} else if (s[0] == 'i') {
|
253
|
+
if (s[1] == 'v') {
|
254
|
+
if (s[2] == 'e' && s[3] == ' ') {
|
255
|
+
AddMatch(id + 92 * n, l + 4, l, matches);
|
256
|
+
}
|
257
|
+
} else if (s[1] == 'z') {
|
258
|
+
if (s[2] == 'e' && s[3] == ' ') {
|
259
|
+
AddMatch(id + 100 * n, l + 4, l, matches);
|
260
|
+
}
|
261
|
+
}
|
262
|
+
} else if (s[0] == 'l') {
|
263
|
+
if (s[1] == 'e') {
|
264
|
+
if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
|
265
|
+
AddMatch(id + 93 * n, l + 5, l, matches);
|
266
|
+
}
|
267
|
+
} else if (s[1] == 'y') {
|
268
|
+
if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
|
269
|
+
}
|
270
|
+
} else if (s[0] == 'o') {
|
271
|
+
if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
|
272
|
+
AddMatch(id + 106 * n, l + 4, l, matches);
|
273
|
+
}
|
274
|
+
}
|
275
|
+
} else {
|
276
|
+
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
|
277
|
+
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
|
278
|
+
transform. */
|
279
|
+
const BROTLI_BOOL is_all_caps =
|
280
|
+
TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
|
281
|
+
const uint8_t* s;
|
282
|
+
if (!IsMatch(dictionary->words, w, data, max_length)) {
|
283
|
+
continue;
|
284
|
+
}
|
285
|
+
/* Transform "" + kUppercase{First,All} + "" */
|
286
|
+
AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
|
287
|
+
has_found_match = BROTLI_TRUE;
|
288
|
+
if (l + 1 >= max_length) {
|
289
|
+
continue;
|
290
|
+
}
|
291
|
+
/* Transforms "" + kUppercase{First,All} + <suffix> */
|
292
|
+
s = &data[l];
|
293
|
+
if (s[0] == ' ') {
|
294
|
+
AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
|
295
|
+
} else if (s[0] == '"') {
|
296
|
+
AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
|
297
|
+
if (s[1] == '>') {
|
298
|
+
AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
|
299
|
+
}
|
300
|
+
} else if (s[0] == '.') {
|
301
|
+
AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
|
302
|
+
if (s[1] == ' ') {
|
303
|
+
AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
|
304
|
+
}
|
305
|
+
} else if (s[0] == ',') {
|
306
|
+
AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
|
307
|
+
if (s[1] == ' ') {
|
308
|
+
AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
|
309
|
+
}
|
310
|
+
} else if (s[0] == '\'') {
|
311
|
+
AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
|
312
|
+
} else if (s[0] == '(') {
|
313
|
+
AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
|
314
|
+
} else if (s[0] == '=') {
|
315
|
+
if (s[1] == '"') {
|
316
|
+
AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
|
317
|
+
} else if (s[1] == '\'') {
|
318
|
+
AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
|
319
|
+
}
|
320
|
+
}
|
321
|
+
}
|
322
|
+
}
|
323
|
+
}
|
324
|
+
/* Transforms with prefixes " " and "." */
|
325
|
+
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
|
326
|
+
BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
|
327
|
+
size_t offset = dictionary->buckets[Hash(&data[1])];
|
328
|
+
BROTLI_BOOL end = !offset;
|
329
|
+
while (!end) {
|
330
|
+
DictWord w = dictionary->dict_words[offset++];
|
331
|
+
const size_t l = w.len & 0x1F;
|
332
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
333
|
+
const size_t id = w.idx;
|
334
|
+
end = !!(w.len & 0x80);
|
335
|
+
w.len = (uint8_t)l;
|
336
|
+
if (w.transform == 0) {
|
337
|
+
const uint8_t* s;
|
338
|
+
if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
|
339
|
+
continue;
|
340
|
+
}
|
341
|
+
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
|
342
|
+
"." + BROTLI_TRANSFORM_IDENTITY + "" */
|
343
|
+
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
|
344
|
+
has_found_match = BROTLI_TRUE;
|
345
|
+
if (l + 2 >= max_length) {
|
346
|
+
continue;
|
347
|
+
}
|
348
|
+
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
|
349
|
+
"." + BROTLI_TRANSFORM_IDENTITY + <suffix>
|
350
|
+
*/
|
351
|
+
s = &data[l + 1];
|
352
|
+
if (s[0] == ' ') {
|
353
|
+
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
|
354
|
+
} else if (s[0] == '(') {
|
355
|
+
AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
|
356
|
+
} else if (is_space) {
|
357
|
+
if (s[0] == ',') {
|
358
|
+
AddMatch(id + 103 * n, l + 2, l, matches);
|
359
|
+
if (s[1] == ' ') {
|
360
|
+
AddMatch(id + 33 * n, l + 3, l, matches);
|
361
|
+
}
|
362
|
+
} else if (s[0] == '.') {
|
363
|
+
AddMatch(id + 71 * n, l + 2, l, matches);
|
364
|
+
if (s[1] == ' ') {
|
365
|
+
AddMatch(id + 52 * n, l + 3, l, matches);
|
366
|
+
}
|
367
|
+
} else if (s[0] == '=') {
|
368
|
+
if (s[1] == '"') {
|
369
|
+
AddMatch(id + 81 * n, l + 3, l, matches);
|
370
|
+
} else if (s[1] == '\'') {
|
371
|
+
AddMatch(id + 98 * n, l + 3, l, matches);
|
372
|
+
}
|
373
|
+
}
|
374
|
+
}
|
375
|
+
} else if (is_space) {
|
376
|
+
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
|
377
|
+
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
|
378
|
+
transform. */
|
379
|
+
const BROTLI_BOOL is_all_caps =
|
380
|
+
TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
|
381
|
+
const uint8_t* s;
|
382
|
+
if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
|
383
|
+
continue;
|
384
|
+
}
|
385
|
+
/* Transforms " " + kUppercase{First,All} + "" */
|
386
|
+
AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
|
387
|
+
has_found_match = BROTLI_TRUE;
|
388
|
+
if (l + 2 >= max_length) {
|
389
|
+
continue;
|
390
|
+
}
|
391
|
+
/* Transforms " " + kUppercase{First,All} + <suffix> */
|
392
|
+
s = &data[l + 1];
|
393
|
+
if (s[0] == ' ') {
|
394
|
+
AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
|
395
|
+
} else if (s[0] == ',') {
|
396
|
+
if (!is_all_caps) {
|
397
|
+
AddMatch(id + 109 * n, l + 2, l, matches);
|
398
|
+
}
|
399
|
+
if (s[1] == ' ') {
|
400
|
+
AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
|
401
|
+
}
|
402
|
+
} else if (s[0] == '.') {
|
403
|
+
AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
|
404
|
+
if (s[1] == ' ') {
|
405
|
+
AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
|
406
|
+
}
|
407
|
+
} else if (s[0] == '=') {
|
408
|
+
if (s[1] == '"') {
|
409
|
+
AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
|
410
|
+
} else if (s[1] == '\'') {
|
411
|
+
AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
|
412
|
+
}
|
413
|
+
}
|
414
|
+
}
|
415
|
+
}
|
416
|
+
}
|
417
|
+
if (max_length >= 6) {
|
418
|
+
/* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
|
419
|
+
if ((data[1] == ' ' &&
|
420
|
+
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
|
421
|
+
(data[0] == 0xC2 && data[1] == 0xA0)) {
|
422
|
+
size_t offset = dictionary->buckets[Hash(&data[2])];
|
423
|
+
BROTLI_BOOL end = !offset;
|
424
|
+
while (!end) {
|
425
|
+
DictWord w = dictionary->dict_words[offset++];
|
426
|
+
const size_t l = w.len & 0x1F;
|
427
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
428
|
+
const size_t id = w.idx;
|
429
|
+
end = !!(w.len & 0x80);
|
430
|
+
w.len = (uint8_t)l;
|
431
|
+
if (w.transform == 0 &&
|
432
|
+
IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
|
433
|
+
if (data[0] == 0xC2) {
|
434
|
+
AddMatch(id + 102 * n, l + 2, l, matches);
|
435
|
+
has_found_match = BROTLI_TRUE;
|
436
|
+
} else if (l + 2 < max_length && data[l + 2] == ' ') {
|
437
|
+
size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
|
438
|
+
AddMatch(id + t * n, l + 3, l, matches);
|
439
|
+
has_found_match = BROTLI_TRUE;
|
440
|
+
}
|
441
|
+
}
|
442
|
+
}
|
443
|
+
}
|
444
|
+
}
|
445
|
+
if (max_length >= 9) {
|
446
|
+
/* Transforms with prefixes " the " and ".com/" */
|
447
|
+
if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
|
448
|
+
data[3] == 'e' && data[4] == ' ') ||
|
449
|
+
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
|
450
|
+
data[3] == 'm' && data[4] == '/')) {
|
451
|
+
size_t offset = dictionary->buckets[Hash(&data[5])];
|
452
|
+
BROTLI_BOOL end = !offset;
|
453
|
+
while (!end) {
|
454
|
+
DictWord w = dictionary->dict_words[offset++];
|
455
|
+
const size_t l = w.len & 0x1F;
|
456
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
457
|
+
const size_t id = w.idx;
|
458
|
+
end = !!(w.len & 0x80);
|
459
|
+
w.len = (uint8_t)l;
|
460
|
+
if (w.transform == 0 &&
|
461
|
+
IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
|
462
|
+
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
463
|
+
has_found_match = BROTLI_TRUE;
|
464
|
+
if (l + 5 < max_length) {
|
465
|
+
const uint8_t* s = &data[l + 5];
|
466
|
+
if (data[0] == ' ') {
|
467
|
+
if (l + 8 < max_length &&
|
468
|
+
s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
|
469
|
+
AddMatch(id + 62 * n, l + 9, l, matches);
|
470
|
+
if (l + 12 < max_length &&
|
471
|
+
s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
|
472
|
+
AddMatch(id + 73 * n, l + 13, l, matches);
|
473
|
+
}
|
474
|
+
}
|
475
|
+
}
|
476
|
+
}
|
477
|
+
}
|
478
|
+
}
|
479
|
+
}
|
480
|
+
}
|
481
|
+
return has_found_match;
|
482
|
+
}
|
483
|
+
|
484
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
485
|
+
} /* extern "C" */
|
486
|
+
#endif
|
@@ -0,0 +1,40 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Class to model the static dictionary. */
|
8
|
+
|
9
|
+
#ifndef BROTLI_ENC_STATIC_DICT_H_
|
10
|
+
#define BROTLI_ENC_STATIC_DICT_H_
|
11
|
+
|
12
|
+
#include "brotli_common_dictionary.h"
|
13
|
+
#include "brotli_common_platform.h"
|
14
|
+
#include "brotli_types.h"
|
15
|
+
#include "brotli_enc_encoder_dict.h"
|
16
|
+
|
17
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
18
|
+
extern "C" {
|
19
|
+
#endif
|
20
|
+
|
21
|
+
#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
|
22
|
+
static const uint32_t kInvalidMatch = 0xFFFFFFF;
|
23
|
+
|
24
|
+
/* Matches data against static dictionary words, and for each length l,
|
25
|
+
for which a match is found, updates matches[l] to be the minimum possible
|
26
|
+
(distance << 5) + len_code.
|
27
|
+
Returns 1 if matches have been found, otherwise 0.
|
28
|
+
Prerequisites:
|
29
|
+
matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
|
30
|
+
all elements are initialized to kInvalidMatch */
|
31
|
+
BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
32
|
+
const BrotliEncoderDictionary* dictionary,
|
33
|
+
const uint8_t* data, size_t min_length, size_t max_length,
|
34
|
+
uint32_t* matches);
|
35
|
+
|
36
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
37
|
+
} /* extern "C" */
|
38
|
+
#endif
|
39
|
+
|
40
|
+
#endif /* BROTLI_ENC_STATIC_DICT_H_ */
|