isomorfeus-ferret 0.12.4 → 0.12.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +612 -612
- data/README.md +77 -48
- data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
- data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
- data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
- data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
- data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -15
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
- data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
- data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
- data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
- data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
- data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
- data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
- data/ext/isomorfeus_ferret_ext/test.c +7 -1
- data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
- data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
- data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +125 -5
- data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
data/README.md
CHANGED
@@ -1,48 +1,77 @@
|
|
1
|
-
<h1 align="center">
|
2
|
-
<img src="https://github.com/isomorfeus/isomorfeus-ferret/blob/master/Logo.png?raw=true" align="center" width="216" height="234" />
|
3
|
-
<br/>
|
4
|
-
Isomorfeus Ferret<br/>
|
5
|
-
</h1>
|
6
|
-
|
7
|
-
Convenient and well performing document store, indexing and search.
|
8
|
-
|
9
|
-
### Community and Support
|
10
|
-
At the [Isomorfeus Framework Project](
|
11
|
-
|
12
|
-
## About this project
|
13
|
-
|
14
|
-
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain.
|
15
|
-
During revival many things havbe been fixed, now all tests pass, no crashes and it
|
16
|
-
successfully compiles and runs with rubys >3. Its no longer a goal to have
|
17
|
-
a c library available, but instead the usage is meant as ruby gem with a c extension only.
|
18
|
-
|
19
|
-
It should work on *nixes and *nuxes and also works on Windows.
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
https://github.com/isomorfeus/isomorfeus-ferret/blob/master/
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
https://github.com/isomorfeus/isomorfeus-ferret/blob/master/
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
1
|
+
<h1 align="center">
|
2
|
+
<img src="https://github.com/isomorfeus/isomorfeus-ferret/blob/master/Logo.png?raw=true" align="center" width="216" height="234" />
|
3
|
+
<br/>
|
4
|
+
Isomorfeus Ferret<br/>
|
5
|
+
</h1>
|
6
|
+
|
7
|
+
Convenient and well performing document store, indexing and search.
|
8
|
+
|
9
|
+
### Community and Support
|
10
|
+
At the [Isomorfeus Framework Project](https://isomorfeus.com)
|
11
|
+
|
12
|
+
## About this project
|
13
|
+
|
14
|
+
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain.
|
15
|
+
During revival many things havbe been fixed, now all tests pass, no crashes and it
|
16
|
+
successfully compiles and runs with rubys >3. Its no longer a goal to have
|
17
|
+
a c library available, but instead the usage is meant as ruby gem with a c extension only.
|
18
|
+
|
19
|
+
It should work on *nixes and *nuxes and also works on Windows.
|
20
|
+
|
21
|
+
## Documentation
|
22
|
+
|
23
|
+
The documentations is currently scattered throughout the repo.
|
24
|
+
|
25
|
+
For a quick start its best to read:
|
26
|
+
https://github.com/isomorfeus/isomorfeus-ferret/blob/master/TUTORIAL.md
|
27
|
+
|
28
|
+
Further:
|
29
|
+
https://github.com/isomorfeus/isomorfeus-ferret/blob/master/lib/isomorfeus/ferret/index/index.rb
|
30
|
+
https://github.com/isomorfeus/isomorfeus-ferret/blob/master/lib/isomorfeus/ferret/document.rb
|
31
|
+
|
32
|
+
The query language and parser are documented here:
|
33
|
+
https://github.com/isomorfeus/isomorfeus-ferret/blob/master/ext/isomorfeus_ferret_ext/frb_qparser.c
|
34
|
+
|
35
|
+
Examples can be found in the 'test' directory or in 'misc/ferret_vs_lucene'.
|
36
|
+
|
37
|
+
## Running Specs
|
38
|
+
|
39
|
+
- clone repo
|
40
|
+
- bundle install
|
41
|
+
- rake
|
42
|
+
|
43
|
+
Ensure your locale is set to C.UTF-8, because the internal c tests don't know how to handle localized output.
|
44
|
+
|
45
|
+
## Benchmarks
|
46
|
+
|
47
|
+
- clone repo
|
48
|
+
- bundle install
|
49
|
+
- rake ferret_vs_lucene
|
50
|
+
|
51
|
+
A recent Java JDK must be installed to compile and run lucene benchmarks.
|
52
|
+
|
53
|
+
Results on Linux:
|
54
|
+
```
|
55
|
+
Ferret:
|
56
|
+
Indexing Secs: 7.36 Docs: 19043, 2587 docs/s
|
57
|
+
Searching took: 0.3366296s for 8000 queries
|
58
|
+
thats 23765 q/s
|
59
|
+
|
60
|
+
Lucene:
|
61
|
+
Indexing Secs: 4.22 Docs: 19043, 4516 docs/s
|
62
|
+
Searching took: 1.48s for 8000 queries
|
63
|
+
thats 5420 q/s
|
64
|
+
---------------------------------------------------
|
65
|
+
Lucene 9.0.0 0b18b3b965cedaf5eb129aa41243a44c83ca826d - jpountz - 2021-12-01 14:23:49
|
66
|
+
JVM 17.0.1 (Private Build)
|
67
|
+
```
|
68
|
+
|
69
|
+
## Future
|
70
|
+
|
71
|
+
Lots of things to do:
|
72
|
+
- Bring documentation in order in a docs directory
|
73
|
+
- Review code (especially for memory/stack issues, typical c issues)
|
74
|
+
- Take care of ruby GVL and threading
|
75
|
+
- See todo directory: https://github.com/isomorfeus/isomorfeus-ferret/tree/master/misc/todo
|
76
|
+
|
77
|
+
Any help, support much appreciated!
|
@@ -7,20 +7,21 @@
|
|
7
7
|
static void ferret_hash()
|
8
8
|
{
|
9
9
|
int i;
|
10
|
+
void *res = NULL;
|
10
11
|
for (i = 0; i < N; i++) {
|
11
12
|
FrtHash *h = frt_h_new_str(NULL, NULL);
|
12
13
|
const char **word;
|
13
14
|
char buf[100];
|
14
|
-
long res;
|
15
15
|
for (word = WORD_LIST; *word; word++) {
|
16
16
|
frt_h_set(h, *word, (void *)1);
|
17
17
|
}
|
18
18
|
for (word = WORD_LIST; *word; word++) {
|
19
19
|
strcpy(buf, *word);
|
20
|
-
res =
|
20
|
+
res = frt_h_get(h, buf);
|
21
21
|
}
|
22
22
|
frt_h_destroy(h);
|
23
23
|
}
|
24
|
+
(void)res;
|
24
25
|
}
|
25
26
|
|
26
27
|
BENCH(hash_implementations)
|
@@ -31,18 +32,19 @@ BENCH(hash_implementations)
|
|
31
32
|
static void standard_hash()
|
32
33
|
{
|
33
34
|
int i;
|
35
|
+
void *res = NULL;
|
34
36
|
for (i = 0; i < N; i++) {
|
35
37
|
FrtHash *h = frt_h_new_str(NULL, NULL);
|
36
38
|
const char **word;
|
37
39
|
char buf[100];
|
38
|
-
long res;
|
39
40
|
for (word = WORD_LIST; *word; word++) {
|
40
41
|
frt_h_set(h, *word, (void *)1);
|
41
42
|
strcpy(buf, *word);
|
42
|
-
res =
|
43
|
+
res = frt_h_get(h, buf);
|
43
44
|
}
|
44
45
|
frt_h_destroy(h);
|
45
46
|
}
|
47
|
+
(void)res;
|
46
48
|
}
|
47
49
|
|
48
50
|
#define PERTURB_SHIFT 5
|
@@ -96,19 +98,20 @@ static FrtHashEntry *h_lookup_str(FrtHash *ht, register const void *key)
|
|
96
98
|
static void string_hash()
|
97
99
|
{
|
98
100
|
int i;
|
101
|
+
void *res = NULL;
|
99
102
|
for (i = 0; i < N; i++) {
|
100
103
|
FrtHash *h = frt_h_new_str(NULL, NULL);
|
101
104
|
const char **word;
|
102
105
|
char buf[100];
|
103
|
-
long res;
|
104
106
|
h->lookup_i = &h_lookup_str;
|
105
107
|
for (word = WORD_LIST; *word; word++) {
|
106
108
|
frt_h_set(h, *word, (void *)1);
|
107
109
|
strcpy(buf, *word);
|
108
|
-
res =
|
110
|
+
res = frt_h_get(h, buf);
|
109
111
|
}
|
110
112
|
frt_h_destroy(h);
|
111
113
|
}
|
114
|
+
(void)res;
|
112
115
|
}
|
113
116
|
|
114
117
|
BENCH(specialized_string_hash)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
static void do_strcmp()
|
7
7
|
{
|
8
|
-
char **word;
|
8
|
+
const char **word;
|
9
9
|
char buf[100];
|
10
10
|
int res, i;
|
11
11
|
|
@@ -15,11 +15,12 @@ static void do_strcmp()
|
|
15
15
|
memcpy(buf, *word, len+1);
|
16
16
|
res = strcmp(buf, *word);
|
17
17
|
}
|
18
|
+
(void)res;
|
18
19
|
}
|
19
20
|
|
20
21
|
static void do_strncmp()
|
21
22
|
{
|
22
|
-
char **word;
|
23
|
+
const char **word;
|
23
24
|
char buf[100];
|
24
25
|
int res, i;
|
25
26
|
|
@@ -29,6 +30,7 @@ static void do_strncmp()
|
|
29
30
|
memcpy(buf, *word, len+1);
|
30
31
|
res = strncmp(buf, *word, len + 1);
|
31
32
|
}
|
33
|
+
(void)res;
|
32
34
|
}
|
33
35
|
|
34
36
|
BENCH(strcmp_when_length_is_known)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
#include "brotli_common_constants.h"
|
8
|
+
|
9
|
+
const BrotliPrefixCodeRange
|
10
|
+
_kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
|
11
|
+
{1, 2}, {5, 2}, {9, 2}, {13, 2}, {17, 3}, {25, 3},
|
12
|
+
{33, 3}, {41, 3}, {49, 4}, {65, 4}, {81, 4}, {97, 4},
|
13
|
+
{113, 5}, {145, 5}, {177, 5}, {209, 5}, {241, 6}, {305, 6},
|
14
|
+
{369, 7}, {497, 8}, {753, 9}, {1265, 10}, {2289, 11}, {4337, 12},
|
15
|
+
{8433, 13}, {16625, 24}};
|
@@ -0,0 +1,200 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/**
|
8
|
+
* @file
|
9
|
+
* Common constants used in decoder and encoder API.
|
10
|
+
*/
|
11
|
+
|
12
|
+
#ifndef BROTLI_COMMON_CONSTANTS_H_
|
13
|
+
#define BROTLI_COMMON_CONSTANTS_H_
|
14
|
+
|
15
|
+
#include "brotli_common_platform.h"
|
16
|
+
#include "brotli_port.h"
|
17
|
+
#include "brotli_types.h"
|
18
|
+
|
19
|
+
/* Specification: 7.3. Encoding of the context map */
|
20
|
+
#define BROTLI_CONTEXT_MAP_MAX_RLE 16
|
21
|
+
|
22
|
+
/* Specification: 2. Compressed representation overview */
|
23
|
+
#define BROTLI_MAX_NUMBER_OF_BLOCK_TYPES 256
|
24
|
+
|
25
|
+
/* Specification: 3.3. Alphabet sizes: insert-and-copy length */
|
26
|
+
#define BROTLI_NUM_LITERAL_SYMBOLS 256
|
27
|
+
#define BROTLI_NUM_COMMAND_SYMBOLS 704
|
28
|
+
#define BROTLI_NUM_BLOCK_LEN_SYMBOLS 26
|
29
|
+
#define BROTLI_MAX_CONTEXT_MAP_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + \
|
30
|
+
BROTLI_CONTEXT_MAP_MAX_RLE)
|
31
|
+
#define BROTLI_MAX_BLOCK_TYPE_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 2)
|
32
|
+
|
33
|
+
/* Specification: 3.5. Complex prefix codes */
|
34
|
+
#define BROTLI_REPEAT_PREVIOUS_CODE_LENGTH 16
|
35
|
+
#define BROTLI_REPEAT_ZERO_CODE_LENGTH 17
|
36
|
+
#define BROTLI_CODE_LENGTH_CODES (BROTLI_REPEAT_ZERO_CODE_LENGTH + 1)
|
37
|
+
/* "code length of 8 is repeated" */
|
38
|
+
#define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8
|
39
|
+
|
40
|
+
/* "Large Window Brotli" */
|
41
|
+
|
42
|
+
/**
|
43
|
+
* The theoretical maximum number of distance bits specified for large window
|
44
|
+
* brotli, for 64-bit encoders and decoders. Even when in practice 32-bit
|
45
|
+
* encoders and decoders only support up to 30 max distance bits, the value is
|
46
|
+
* set to 62 because it affects the large window brotli file format.
|
47
|
+
* Specifically, it affects the encoding of simple huffman tree for distances,
|
48
|
+
* see Specification RFC 7932 chapter 3.4.
|
49
|
+
*/
|
50
|
+
#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U
|
51
|
+
#define BROTLI_LARGE_MIN_WBITS 10
|
52
|
+
/**
|
53
|
+
* The maximum supported large brotli window bits by the encoder and decoder.
|
54
|
+
* Large window brotli allows up to 62 bits, however the current encoder and
|
55
|
+
* decoder, designed for 32-bit integers, only support up to 30 bits maximum.
|
56
|
+
*/
|
57
|
+
#define BROTLI_LARGE_MAX_WBITS 30
|
58
|
+
|
59
|
+
/* Specification: 4. Encoding of distances */
|
60
|
+
#define BROTLI_NUM_DISTANCE_SHORT_CODES 16
|
61
|
+
/**
|
62
|
+
* Maximal number of "postfix" bits.
|
63
|
+
*
|
64
|
+
* Number of "postfix" bits is stored as 2 bits in meta-block header.
|
65
|
+
*/
|
66
|
+
#define BROTLI_MAX_NPOSTFIX 3
|
67
|
+
#define BROTLI_MAX_NDIRECT 120
|
68
|
+
#define BROTLI_MAX_DISTANCE_BITS 24U
|
69
|
+
#define BROTLI_DISTANCE_ALPHABET_SIZE(NPOSTFIX, NDIRECT, MAXNBITS) ( \
|
70
|
+
BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) + \
|
71
|
+
((MAXNBITS) << ((NPOSTFIX) + 1)))
|
72
|
+
/* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */
|
73
|
+
#define BROTLI_NUM_DISTANCE_SYMBOLS \
|
74
|
+
BROTLI_DISTANCE_ALPHABET_SIZE( \
|
75
|
+
BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS)
|
76
|
+
|
77
|
+
/* ((1 << 26) - 4) is the maximal distance that can be expressed in RFC 7932
|
78
|
+
brotli stream using NPOSTFIX = 0 and NDIRECT = 0. With other NPOSTFIX and
|
79
|
+
NDIRECT values distances up to ((1 << 29) + 88) could be expressed. */
|
80
|
+
#define BROTLI_MAX_DISTANCE 0x3FFFFFC
|
81
|
+
|
82
|
+
/* ((1 << 31) - 4) is the safe distance limit. Using this number as a limit
|
83
|
+
allows safe distance calculation without overflows, given the distance
|
84
|
+
alphabet size is limited to corresponding size
|
85
|
+
(see kLargeWindowDistanceCodeLimits). */
|
86
|
+
#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC
|
87
|
+
|
88
|
+
|
89
|
+
/* Specification: 4. Encoding of Literal Insertion Lengths and Copy Lengths */
|
90
|
+
#define BROTLI_NUM_INS_COPY_CODES 24
|
91
|
+
|
92
|
+
/* 7.1. Context modes and context ID lookup for literals */
|
93
|
+
/* "context IDs for literals are in the range of 0..63" */
|
94
|
+
#define BROTLI_LITERAL_CONTEXT_BITS 6
|
95
|
+
|
96
|
+
/* 7.2. Context ID for distances */
|
97
|
+
#define BROTLI_DISTANCE_CONTEXT_BITS 2
|
98
|
+
|
99
|
+
/* 9.1. Format of the Stream Header */
|
100
|
+
/* Number of slack bytes for window size. Don't confuse
|
101
|
+
with BROTLI_NUM_DISTANCE_SHORT_CODES. */
|
102
|
+
#define BROTLI_WINDOW_GAP 16
|
103
|
+
#define BROTLI_MAX_BACKWARD_LIMIT(W) (((size_t)1 << (W)) - BROTLI_WINDOW_GAP)
|
104
|
+
|
105
|
+
typedef struct BrotliDistanceCodeLimit {
|
106
|
+
uint32_t max_alphabet_size;
|
107
|
+
uint32_t max_distance;
|
108
|
+
} BrotliDistanceCodeLimit;
|
109
|
+
|
110
|
+
/* This function calculates maximal size of distance alphabet, such that the
|
111
|
+
distances greater than the given values can not be represented.
|
112
|
+
|
113
|
+
This limits are designed to support fast and safe 32-bit decoders.
|
114
|
+
"32-bit" means that signed integer values up to ((1 << 31) - 1) could be
|
115
|
+
safely expressed.
|
116
|
+
|
117
|
+
Brotli distance alphabet symbols do not represent consecutive distance
|
118
|
+
ranges. Each distance alphabet symbol (excluding direct distances and short
|
119
|
+
codes), represent interleaved (for NPOSTFIX > 0) range of distances.
|
120
|
+
A "group" of consecutive (1 << NPOSTFIX) symbols represent non-interleaved
|
121
|
+
range. Two consecutive groups require the same amount of "extra bits".
|
122
|
+
|
123
|
+
It is important that distance alphabet represents complete "groups".
|
124
|
+
To avoid complex logic on encoder side about interleaved ranges
|
125
|
+
it was decided to restrict both sides to complete distance code "groups".
|
126
|
+
*/
|
127
|
+
BROTLI_UNUSED_FUNCTION BrotliDistanceCodeLimit BrotliCalculateDistanceCodeLimit(
|
128
|
+
uint32_t max_distance, uint32_t npostfix, uint32_t ndirect) {
|
129
|
+
BrotliDistanceCodeLimit result;
|
130
|
+
/* Marking this function as unused, because not all files
|
131
|
+
including "constants.h" use it -> compiler warns about that. */
|
132
|
+
BROTLI_UNUSED(&BrotliCalculateDistanceCodeLimit);
|
133
|
+
if (max_distance <= ndirect) {
|
134
|
+
/* This case never happens / exists only for the sake of completeness. */
|
135
|
+
result.max_alphabet_size = max_distance + BROTLI_NUM_DISTANCE_SHORT_CODES;
|
136
|
+
result.max_distance = max_distance;
|
137
|
+
return result;
|
138
|
+
} else {
|
139
|
+
/* The first prohibited value. */
|
140
|
+
uint32_t forbidden_distance = max_distance + 1;
|
141
|
+
/* Subtract "directly" encoded region. */
|
142
|
+
uint32_t offset = forbidden_distance - ndirect - 1;
|
143
|
+
uint32_t ndistbits = 0;
|
144
|
+
uint32_t tmp;
|
145
|
+
uint32_t half;
|
146
|
+
uint32_t group;
|
147
|
+
/* Postfix for the last dcode in the group. */
|
148
|
+
uint32_t postfix = (1u << npostfix) - 1;
|
149
|
+
uint32_t extra;
|
150
|
+
uint32_t start;
|
151
|
+
/* Remove postfix and "head-start". */
|
152
|
+
offset = (offset >> npostfix) + 4;
|
153
|
+
/* Calculate the number of distance bits. */
|
154
|
+
tmp = offset / 2;
|
155
|
+
/* Poor-man's log2floor, to avoid extra dependencies. */
|
156
|
+
while (tmp != 0) {ndistbits++; tmp = tmp >> 1;}
|
157
|
+
/* One bit is covered with subrange addressing ("half"). */
|
158
|
+
ndistbits--;
|
159
|
+
/* Find subrange. */
|
160
|
+
half = (offset >> ndistbits) & 1;
|
161
|
+
/* Calculate the "group" part of dcode. */
|
162
|
+
group = ((ndistbits - 1) << 1) | half;
|
163
|
+
/* Calculated "group" covers the prohibited distance value. */
|
164
|
+
if (group == 0) {
|
165
|
+
/* This case is added for correctness; does not occur for limit > 128. */
|
166
|
+
result.max_alphabet_size = ndirect + BROTLI_NUM_DISTANCE_SHORT_CODES;
|
167
|
+
result.max_distance = ndirect;
|
168
|
+
return result;
|
169
|
+
}
|
170
|
+
/* Decrement "group", so it is the last permitted "group". */
|
171
|
+
group--;
|
172
|
+
/* After group was decremented, ndistbits and half must be recalculated. */
|
173
|
+
ndistbits = (group >> 1) + 1;
|
174
|
+
/* The last available distance in the subrange has all extra bits set. */
|
175
|
+
extra = (1u << ndistbits) - 1;
|
176
|
+
/* Calculate region start. NB: ndistbits >= 1. */
|
177
|
+
start = (1u << (ndistbits + 1)) - 4;
|
178
|
+
/* Move to subregion. */
|
179
|
+
start += (group & 1) << ndistbits;
|
180
|
+
/* Calculate the alphabet size. */
|
181
|
+
result.max_alphabet_size = ((group << npostfix) | postfix) + ndirect +
|
182
|
+
BROTLI_NUM_DISTANCE_SHORT_CODES + 1;
|
183
|
+
/* Calculate the maximal distance representable by alphabet. */
|
184
|
+
result.max_distance = ((start + extra) << npostfix) + postfix + ndirect + 1;
|
185
|
+
return result;
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
/* Represents the range of values belonging to a prefix code:
|
190
|
+
[offset, offset + 2^nbits) */
|
191
|
+
typedef struct {
|
192
|
+
uint16_t offset;
|
193
|
+
uint8_t nbits;
|
194
|
+
} BrotliPrefixCodeRange;
|
195
|
+
|
196
|
+
/* "Soft-private", it is exported, but not "advertised" as API. */
|
197
|
+
BROTLI_COMMON_API extern const BrotliPrefixCodeRange
|
198
|
+
_kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
|
199
|
+
|
200
|
+
#endif /* BROTLI_COMMON_CONSTANTS_H_ */
|
@@ -0,0 +1,156 @@
|
|
1
|
+
#include "brotli_common_context.h"
|
2
|
+
|
3
|
+
#include "brotli_types.h"
|
4
|
+
|
5
|
+
/* Common context lookup table for all context modes. */
|
6
|
+
const uint8_t _kBrotliContextLookupTable[2048] = {
|
7
|
+
/* CONTEXT_LSB6, last byte. */
|
8
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
9
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
10
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
11
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
12
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
13
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
14
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
15
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
16
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
17
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
18
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
19
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
20
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
21
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
22
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
23
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
24
|
+
|
25
|
+
/* CONTEXT_LSB6, second last byte, */
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
38
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
41
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
+
|
43
|
+
/* CONTEXT_MSB6, last byte. */
|
44
|
+
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
|
45
|
+
4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
46
|
+
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
|
47
|
+
12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
|
48
|
+
16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
|
49
|
+
20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
|
50
|
+
24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
|
51
|
+
28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
|
52
|
+
32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
|
53
|
+
36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
|
54
|
+
40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
|
55
|
+
44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
|
56
|
+
48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
|
57
|
+
52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
|
58
|
+
56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
|
59
|
+
60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
|
60
|
+
|
61
|
+
/* CONTEXT_MSB6, second last byte, */
|
62
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
63
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
64
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
65
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
66
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
67
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
68
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
69
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
70
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
71
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
72
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
73
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
74
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
75
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
76
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
77
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
78
|
+
|
79
|
+
/* CONTEXT_UTF8, last byte. */
|
80
|
+
/* ASCII range. */
|
81
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
|
82
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
83
|
+
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
|
84
|
+
44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
|
85
|
+
12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
|
86
|
+
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
|
87
|
+
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
|
88
|
+
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
|
89
|
+
/* UTF8 continuation byte range. */
|
90
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
91
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
92
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
93
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
94
|
+
/* UTF8 lead byte range. */
|
95
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
96
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
97
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
98
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
99
|
+
|
100
|
+
/* CONTEXT_UTF8 second last byte. */
|
101
|
+
/* ASCII range. */
|
102
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
103
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
104
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
105
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
|
106
|
+
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
107
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
|
108
|
+
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
109
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
|
110
|
+
/* UTF8 continuation byte range. */
|
111
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
112
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
113
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
114
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
115
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
116
|
+
/* UTF8 lead byte range. */
|
117
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
118
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
119
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
120
|
+
|
121
|
+
/* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
|
122
|
+
0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
123
|
+
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
124
|
+
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
125
|
+
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
126
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
127
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
128
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
129
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
130
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
131
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
132
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
133
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
134
|
+
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
135
|
+
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
136
|
+
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
137
|
+
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
|
138
|
+
|
139
|
+
/* CONTEXT_SIGNED, second last byte. */
|
140
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
141
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
142
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
143
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
144
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
145
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
146
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
147
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
148
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
149
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
150
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
151
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
152
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
153
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
154
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
155
|
+
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
156
|
+
};
|