isomorfeus-ferret 0.12.4 → 0.12.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +612 -612
  3. data/README.md +77 -48
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
  5. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
  6. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  7. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  8. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  91. data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
  92. data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
  93. data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
  94. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  95. data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
  96. data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
  97. data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
  98. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
  99. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -15
  100. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
  101. data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
  102. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
  103. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
  104. data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
  105. data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
  106. data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
  144. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
  145. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
  162. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
  163. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
  164. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
  165. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
  166. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
  167. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
  168. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
  169. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
  170. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
  171. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
  172. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
  173. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
  174. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
  175. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
  176. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
  177. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
  178. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
  179. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
  180. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
  181. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
  182. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
  183. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
  184. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
  185. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
  186. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
  187. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
  188. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
  189. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
  190. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
  191. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
  192. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
  193. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
  194. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
  195. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
  196. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
  197. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
  198. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
  199. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
  200. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
  201. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
  202. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
  203. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
  204. data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
  205. data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
  206. data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
  207. data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
  208. data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
  209. data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
  210. data/ext/isomorfeus_ferret_ext/test.c +7 -1
  211. data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
  212. data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
  213. data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
  214. data/lib/isomorfeus/ferret/version.rb +1 -1
  215. metadata +125 -5
  216. data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
data/README.md CHANGED
@@ -1,48 +1,77 @@
1
- <h1 align="center">
2
- <img src="https://github.com/isomorfeus/isomorfeus-ferret/blob/master/Logo.png?raw=true" align="center" width="216" height="234" />
3
- <br/>
4
- &nbsp;&nbsp;&nbsp;Isomorfeus Ferret<br/>
5
- </h1>
6
-
7
- Convenient and well performing document store, indexing and search.
8
-
9
- ### Community and Support
10
- At the [Isomorfeus Framework Project](http://isomorfeus.com)
11
-
12
- ## About this project
13
-
14
- Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain.
15
- During revival many things havbe been fixed, now all tests pass, no crashes and it
16
- successfully compiles and runs with rubys >3. Its no longer a goal to have
17
- a c library available, but instead the usage is meant as ruby gem with a c extension only.
18
-
19
- It should work on *nixes and *nuxes and also works on Windows.
20
-
21
- However, the revival is still fresh and although it appears to be working, issues have to be expected.
22
-
23
-
24
- ## Documentation
25
-
26
- The documentations is currently scattered throughout the repo.
27
-
28
- For a quick start its best to read:
29
- https://github.com/isomorfeus/isomorfeus-ferret/blob/master/TUTORIAL.md
30
-
31
- Further:
32
- https://github.com/isomorfeus/isomorfeus-ferret/blob/master/lib/isomorfeus/ferret/index/index.rb
33
- https://github.com/isomorfeus/isomorfeus-ferret/blob/master/lib/isomorfeus/ferret/document.rb
34
-
35
- The query language and parser are documented here:
36
- https://github.com/isomorfeus/isomorfeus-ferret/blob/master/ext/isomorfeus_ferret_ext/frb_qparser.c
37
-
38
- Examples can be found in the 'test' directory or in 'misc/ferret_vs_lucene'.
39
- ## Future
40
-
41
- Lots of things to do:
42
- - Bring documentation in order in a docs directory
43
- - Review code (especially for memory/stack issues, typical c issues)
44
- - Take care of ruby GVL and threading
45
- - Check locking (thread and filesystem)
46
- - See todo directory: https://github.com/isomorfeus/isomorfeus-ferret/tree/master/misc/todo
47
-
48
- Any help, support much appreciated!
1
+ <h1 align="center">
2
+ <img src="https://github.com/isomorfeus/isomorfeus-ferret/blob/master/Logo.png?raw=true" align="center" width="216" height="234" />
3
+ <br/>
4
+ &nbsp;&nbsp;&nbsp;Isomorfeus Ferret<br/>
5
+ </h1>
6
+
7
+ Convenient and well performing document store, indexing and search.
8
+
9
+ ### Community and Support
10
+ At the [Isomorfeus Framework Project](https://isomorfeus.com)
11
+
12
+ ## About this project
13
+
14
+ Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain.
15
+ During revival many things havbe been fixed, now all tests pass, no crashes and it
16
+ successfully compiles and runs with rubys >3. Its no longer a goal to have
17
+ a c library available, but instead the usage is meant as ruby gem with a c extension only.
18
+
19
+ It should work on *nixes and *nuxes and also works on Windows.
20
+
21
+ ## Documentation
22
+
23
+ The documentations is currently scattered throughout the repo.
24
+
25
+ For a quick start its best to read:
26
+ https://github.com/isomorfeus/isomorfeus-ferret/blob/master/TUTORIAL.md
27
+
28
+ Further:
29
+ https://github.com/isomorfeus/isomorfeus-ferret/blob/master/lib/isomorfeus/ferret/index/index.rb
30
+ https://github.com/isomorfeus/isomorfeus-ferret/blob/master/lib/isomorfeus/ferret/document.rb
31
+
32
+ The query language and parser are documented here:
33
+ https://github.com/isomorfeus/isomorfeus-ferret/blob/master/ext/isomorfeus_ferret_ext/frb_qparser.c
34
+
35
+ Examples can be found in the 'test' directory or in 'misc/ferret_vs_lucene'.
36
+
37
+ ## Running Specs
38
+
39
+ - clone repo
40
+ - bundle install
41
+ - rake
42
+
43
+ Ensure your locale is set to C.UTF-8, because the internal c tests don't know how to handle localized output.
44
+
45
+ ## Benchmarks
46
+
47
+ - clone repo
48
+ - bundle install
49
+ - rake ferret_vs_lucene
50
+
51
+ A recent Java JDK must be installed to compile and run lucene benchmarks.
52
+
53
+ Results on Linux:
54
+ ```
55
+ Ferret:
56
+ Indexing Secs: 7.36 Docs: 19043, 2587 docs/s
57
+ Searching took: 0.3366296s for 8000 queries
58
+ thats 23765 q/s
59
+
60
+ Lucene:
61
+ Indexing Secs: 4.22 Docs: 19043, 4516 docs/s
62
+ Searching took: 1.48s for 8000 queries
63
+ thats 5420 q/s
64
+ ---------------------------------------------------
65
+ Lucene 9.0.0 0b18b3b965cedaf5eb129aa41243a44c83ca826d - jpountz - 2021-12-01 14:23:49
66
+ JVM 17.0.1 (Private Build)
67
+ ```
68
+
69
+ ## Future
70
+
71
+ Lots of things to do:
72
+ - Bring documentation in order in a docs directory
73
+ - Review code (especially for memory/stack issues, typical c issues)
74
+ - Take care of ruby GVL and threading
75
+ - See todo directory: https://github.com/isomorfeus/isomorfeus-ferret/tree/master/misc/todo
76
+
77
+ Any help, support much appreciated!
@@ -7,20 +7,21 @@
7
7
  static void ferret_hash()
8
8
  {
9
9
  int i;
10
+ void *res = NULL;
10
11
  for (i = 0; i < N; i++) {
11
12
  FrtHash *h = frt_h_new_str(NULL, NULL);
12
13
  const char **word;
13
14
  char buf[100];
14
- long res;
15
15
  for (word = WORD_LIST; *word; word++) {
16
16
  frt_h_set(h, *word, (void *)1);
17
17
  }
18
18
  for (word = WORD_LIST; *word; word++) {
19
19
  strcpy(buf, *word);
20
- res = (long)frt_h_get(h, buf);
20
+ res = frt_h_get(h, buf);
21
21
  }
22
22
  frt_h_destroy(h);
23
23
  }
24
+ (void)res;
24
25
  }
25
26
 
26
27
  BENCH(hash_implementations)
@@ -31,18 +32,19 @@ BENCH(hash_implementations)
31
32
  static void standard_hash()
32
33
  {
33
34
  int i;
35
+ void *res = NULL;
34
36
  for (i = 0; i < N; i++) {
35
37
  FrtHash *h = frt_h_new_str(NULL, NULL);
36
38
  const char **word;
37
39
  char buf[100];
38
- long res;
39
40
  for (word = WORD_LIST; *word; word++) {
40
41
  frt_h_set(h, *word, (void *)1);
41
42
  strcpy(buf, *word);
42
- res = (long)frt_h_get(h, buf);
43
+ res = frt_h_get(h, buf);
43
44
  }
44
45
  frt_h_destroy(h);
45
46
  }
47
+ (void)res;
46
48
  }
47
49
 
48
50
  #define PERTURB_SHIFT 5
@@ -96,19 +98,20 @@ static FrtHashEntry *h_lookup_str(FrtHash *ht, register const void *key)
96
98
  static void string_hash()
97
99
  {
98
100
  int i;
101
+ void *res = NULL;
99
102
  for (i = 0; i < N; i++) {
100
103
  FrtHash *h = frt_h_new_str(NULL, NULL);
101
104
  const char **word;
102
105
  char buf[100];
103
- long res;
104
106
  h->lookup_i = &h_lookup_str;
105
107
  for (word = WORD_LIST; *word; word++) {
106
108
  frt_h_set(h, *word, (void *)1);
107
109
  strcpy(buf, *word);
108
- res = (long)frt_h_get(h, buf);
110
+ res = frt_h_get(h, buf);
109
111
  }
110
112
  frt_h_destroy(h);
111
113
  }
114
+ (void)res;
112
115
  }
113
116
 
114
117
  BENCH(specialized_string_hash)
@@ -5,7 +5,7 @@
5
5
 
6
6
  static void do_strcmp()
7
7
  {
8
- char **word;
8
+ const char **word;
9
9
  char buf[100];
10
10
  int res, i;
11
11
 
@@ -15,11 +15,12 @@ static void do_strcmp()
15
15
  memcpy(buf, *word, len+1);
16
16
  res = strcmp(buf, *word);
17
17
  }
18
+ (void)res;
18
19
  }
19
20
 
20
21
  static void do_strncmp()
21
22
  {
22
- char **word;
23
+ const char **word;
23
24
  char buf[100];
24
25
  int res, i;
25
26
 
@@ -29,6 +30,7 @@ static void do_strncmp()
29
30
  memcpy(buf, *word, len+1);
30
31
  res = strncmp(buf, *word, len + 1);
31
32
  }
33
+ (void)res;
32
34
  }
33
35
 
34
36
  BENCH(strcmp_when_length_is_known)
@@ -0,0 +1,15 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ #include "brotli_common_constants.h"
8
+
9
+ const BrotliPrefixCodeRange
10
+ _kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
11
+ {1, 2}, {5, 2}, {9, 2}, {13, 2}, {17, 3}, {25, 3},
12
+ {33, 3}, {41, 3}, {49, 4}, {65, 4}, {81, 4}, {97, 4},
13
+ {113, 5}, {145, 5}, {177, 5}, {209, 5}, {241, 6}, {305, 6},
14
+ {369, 7}, {497, 8}, {753, 9}, {1265, 10}, {2289, 11}, {4337, 12},
15
+ {8433, 13}, {16625, 24}};
@@ -0,0 +1,200 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /**
8
+ * @file
9
+ * Common constants used in decoder and encoder API.
10
+ */
11
+
12
+ #ifndef BROTLI_COMMON_CONSTANTS_H_
13
+ #define BROTLI_COMMON_CONSTANTS_H_
14
+
15
+ #include "brotli_common_platform.h"
16
+ #include "brotli_port.h"
17
+ #include "brotli_types.h"
18
+
19
+ /* Specification: 7.3. Encoding of the context map */
20
+ #define BROTLI_CONTEXT_MAP_MAX_RLE 16
21
+
22
+ /* Specification: 2. Compressed representation overview */
23
+ #define BROTLI_MAX_NUMBER_OF_BLOCK_TYPES 256
24
+
25
+ /* Specification: 3.3. Alphabet sizes: insert-and-copy length */
26
+ #define BROTLI_NUM_LITERAL_SYMBOLS 256
27
+ #define BROTLI_NUM_COMMAND_SYMBOLS 704
28
+ #define BROTLI_NUM_BLOCK_LEN_SYMBOLS 26
29
+ #define BROTLI_MAX_CONTEXT_MAP_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + \
30
+ BROTLI_CONTEXT_MAP_MAX_RLE)
31
+ #define BROTLI_MAX_BLOCK_TYPE_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 2)
32
+
33
+ /* Specification: 3.5. Complex prefix codes */
34
+ #define BROTLI_REPEAT_PREVIOUS_CODE_LENGTH 16
35
+ #define BROTLI_REPEAT_ZERO_CODE_LENGTH 17
36
+ #define BROTLI_CODE_LENGTH_CODES (BROTLI_REPEAT_ZERO_CODE_LENGTH + 1)
37
+ /* "code length of 8 is repeated" */
38
+ #define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8
39
+
40
+ /* "Large Window Brotli" */
41
+
42
+ /**
43
+ * The theoretical maximum number of distance bits specified for large window
44
+ * brotli, for 64-bit encoders and decoders. Even when in practice 32-bit
45
+ * encoders and decoders only support up to 30 max distance bits, the value is
46
+ * set to 62 because it affects the large window brotli file format.
47
+ * Specifically, it affects the encoding of simple huffman tree for distances,
48
+ * see Specification RFC 7932 chapter 3.4.
49
+ */
50
+ #define BROTLI_LARGE_MAX_DISTANCE_BITS 62U
51
+ #define BROTLI_LARGE_MIN_WBITS 10
52
+ /**
53
+ * The maximum supported large brotli window bits by the encoder and decoder.
54
+ * Large window brotli allows up to 62 bits, however the current encoder and
55
+ * decoder, designed for 32-bit integers, only support up to 30 bits maximum.
56
+ */
57
+ #define BROTLI_LARGE_MAX_WBITS 30
58
+
59
+ /* Specification: 4. Encoding of distances */
60
+ #define BROTLI_NUM_DISTANCE_SHORT_CODES 16
61
+ /**
62
+ * Maximal number of "postfix" bits.
63
+ *
64
+ * Number of "postfix" bits is stored as 2 bits in meta-block header.
65
+ */
66
+ #define BROTLI_MAX_NPOSTFIX 3
67
+ #define BROTLI_MAX_NDIRECT 120
68
+ #define BROTLI_MAX_DISTANCE_BITS 24U
69
+ #define BROTLI_DISTANCE_ALPHABET_SIZE(NPOSTFIX, NDIRECT, MAXNBITS) ( \
70
+ BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) + \
71
+ ((MAXNBITS) << ((NPOSTFIX) + 1)))
72
+ /* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */
73
+ #define BROTLI_NUM_DISTANCE_SYMBOLS \
74
+ BROTLI_DISTANCE_ALPHABET_SIZE( \
75
+ BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS)
76
+
77
+ /* ((1 << 26) - 4) is the maximal distance that can be expressed in RFC 7932
78
+ brotli stream using NPOSTFIX = 0 and NDIRECT = 0. With other NPOSTFIX and
79
+ NDIRECT values distances up to ((1 << 29) + 88) could be expressed. */
80
+ #define BROTLI_MAX_DISTANCE 0x3FFFFFC
81
+
82
+ /* ((1 << 31) - 4) is the safe distance limit. Using this number as a limit
83
+ allows safe distance calculation without overflows, given the distance
84
+ alphabet size is limited to corresponding size
85
+ (see kLargeWindowDistanceCodeLimits). */
86
+ #define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC
87
+
88
+
89
+ /* Specification: 4. Encoding of Literal Insertion Lengths and Copy Lengths */
90
+ #define BROTLI_NUM_INS_COPY_CODES 24
91
+
92
+ /* 7.1. Context modes and context ID lookup for literals */
93
+ /* "context IDs for literals are in the range of 0..63" */
94
+ #define BROTLI_LITERAL_CONTEXT_BITS 6
95
+
96
+ /* 7.2. Context ID for distances */
97
+ #define BROTLI_DISTANCE_CONTEXT_BITS 2
98
+
99
+ /* 9.1. Format of the Stream Header */
100
+ /* Number of slack bytes for window size. Don't confuse
101
+ with BROTLI_NUM_DISTANCE_SHORT_CODES. */
102
+ #define BROTLI_WINDOW_GAP 16
103
+ #define BROTLI_MAX_BACKWARD_LIMIT(W) (((size_t)1 << (W)) - BROTLI_WINDOW_GAP)
104
+
105
+ typedef struct BrotliDistanceCodeLimit {
106
+ uint32_t max_alphabet_size;
107
+ uint32_t max_distance;
108
+ } BrotliDistanceCodeLimit;
109
+
110
+ /* This function calculates maximal size of distance alphabet, such that the
111
+ distances greater than the given values can not be represented.
112
+
113
+ This limits are designed to support fast and safe 32-bit decoders.
114
+ "32-bit" means that signed integer values up to ((1 << 31) - 1) could be
115
+ safely expressed.
116
+
117
+ Brotli distance alphabet symbols do not represent consecutive distance
118
+ ranges. Each distance alphabet symbol (excluding direct distances and short
119
+ codes), represent interleaved (for NPOSTFIX > 0) range of distances.
120
+ A "group" of consecutive (1 << NPOSTFIX) symbols represent non-interleaved
121
+ range. Two consecutive groups require the same amount of "extra bits".
122
+
123
+ It is important that distance alphabet represents complete "groups".
124
+ To avoid complex logic on encoder side about interleaved ranges
125
+ it was decided to restrict both sides to complete distance code "groups".
126
+ */
127
+ BROTLI_UNUSED_FUNCTION BrotliDistanceCodeLimit BrotliCalculateDistanceCodeLimit(
128
+ uint32_t max_distance, uint32_t npostfix, uint32_t ndirect) {
129
+ BrotliDistanceCodeLimit result;
130
+ /* Marking this function as unused, because not all files
131
+ including "constants.h" use it -> compiler warns about that. */
132
+ BROTLI_UNUSED(&BrotliCalculateDistanceCodeLimit);
133
+ if (max_distance <= ndirect) {
134
+ /* This case never happens / exists only for the sake of completeness. */
135
+ result.max_alphabet_size = max_distance + BROTLI_NUM_DISTANCE_SHORT_CODES;
136
+ result.max_distance = max_distance;
137
+ return result;
138
+ } else {
139
+ /* The first prohibited value. */
140
+ uint32_t forbidden_distance = max_distance + 1;
141
+ /* Subtract "directly" encoded region. */
142
+ uint32_t offset = forbidden_distance - ndirect - 1;
143
+ uint32_t ndistbits = 0;
144
+ uint32_t tmp;
145
+ uint32_t half;
146
+ uint32_t group;
147
+ /* Postfix for the last dcode in the group. */
148
+ uint32_t postfix = (1u << npostfix) - 1;
149
+ uint32_t extra;
150
+ uint32_t start;
151
+ /* Remove postfix and "head-start". */
152
+ offset = (offset >> npostfix) + 4;
153
+ /* Calculate the number of distance bits. */
154
+ tmp = offset / 2;
155
+ /* Poor-man's log2floor, to avoid extra dependencies. */
156
+ while (tmp != 0) {ndistbits++; tmp = tmp >> 1;}
157
+ /* One bit is covered with subrange addressing ("half"). */
158
+ ndistbits--;
159
+ /* Find subrange. */
160
+ half = (offset >> ndistbits) & 1;
161
+ /* Calculate the "group" part of dcode. */
162
+ group = ((ndistbits - 1) << 1) | half;
163
+ /* Calculated "group" covers the prohibited distance value. */
164
+ if (group == 0) {
165
+ /* This case is added for correctness; does not occur for limit > 128. */
166
+ result.max_alphabet_size = ndirect + BROTLI_NUM_DISTANCE_SHORT_CODES;
167
+ result.max_distance = ndirect;
168
+ return result;
169
+ }
170
+ /* Decrement "group", so it is the last permitted "group". */
171
+ group--;
172
+ /* After group was decremented, ndistbits and half must be recalculated. */
173
+ ndistbits = (group >> 1) + 1;
174
+ /* The last available distance in the subrange has all extra bits set. */
175
+ extra = (1u << ndistbits) - 1;
176
+ /* Calculate region start. NB: ndistbits >= 1. */
177
+ start = (1u << (ndistbits + 1)) - 4;
178
+ /* Move to subregion. */
179
+ start += (group & 1) << ndistbits;
180
+ /* Calculate the alphabet size. */
181
+ result.max_alphabet_size = ((group << npostfix) | postfix) + ndirect +
182
+ BROTLI_NUM_DISTANCE_SHORT_CODES + 1;
183
+ /* Calculate the maximal distance representable by alphabet. */
184
+ result.max_distance = ((start + extra) << npostfix) + postfix + ndirect + 1;
185
+ return result;
186
+ }
187
+ }
188
+
189
+ /* Represents the range of values belonging to a prefix code:
190
+ [offset, offset + 2^nbits) */
191
+ typedef struct {
192
+ uint16_t offset;
193
+ uint8_t nbits;
194
+ } BrotliPrefixCodeRange;
195
+
196
+ /* "Soft-private", it is exported, but not "advertised" as API. */
197
+ BROTLI_COMMON_API extern const BrotliPrefixCodeRange
198
+ _kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
199
+
200
+ #endif /* BROTLI_COMMON_CONSTANTS_H_ */
@@ -0,0 +1,156 @@
1
+ #include "brotli_common_context.h"
2
+
3
+ #include "brotli_types.h"
4
+
5
+ /* Common context lookup table for all context modes. */
6
+ const uint8_t _kBrotliContextLookupTable[2048] = {
7
+ /* CONTEXT_LSB6, last byte. */
8
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
9
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
10
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
11
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
12
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
14
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
15
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
16
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
18
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
20
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
21
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
22
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
23
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
24
+
25
+ /* CONTEXT_LSB6, second last byte, */
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
+
43
+ /* CONTEXT_MSB6, last byte. */
44
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
45
+ 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
46
+ 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
47
+ 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
48
+ 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
49
+ 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
50
+ 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
51
+ 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
52
+ 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
53
+ 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
54
+ 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
55
+ 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
56
+ 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
57
+ 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
58
+ 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
59
+ 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
60
+
61
+ /* CONTEXT_MSB6, second last byte, */
62
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
65
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
66
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
67
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
68
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
74
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78
+
79
+ /* CONTEXT_UTF8, last byte. */
80
+ /* ASCII range. */
81
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
82
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
83
+ 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
84
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
85
+ 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
86
+ 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
87
+ 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
88
+ 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
89
+ /* UTF8 continuation byte range. */
90
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
91
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
92
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
93
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
94
+ /* UTF8 lead byte range. */
95
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
96
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
97
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
98
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
99
+
100
+ /* CONTEXT_UTF8 second last byte. */
101
+ /* ASCII range. */
102
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
103
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
106
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
108
+ 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
110
+ /* UTF8 continuation byte range. */
111
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
112
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
113
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
116
+ /* UTF8 lead byte range. */
117
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
118
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120
+
121
+ /* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
122
+ 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
123
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
124
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
125
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
126
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
127
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
128
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
129
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
130
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
131
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
132
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
133
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
134
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
135
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
136
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
137
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
138
+
139
+ /* CONTEXT_SIGNED, second last byte. */
140
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
144
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
145
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
146
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
147
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
148
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
149
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
151
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
152
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
156
+ };