rroonga 7.1.1-x86-mingw32 → 9.0.2-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (980) hide show
  1. checksums.yaml +5 -5
  2. data/Rakefile +3 -3
  3. data/doc/text/news.md +22 -0
  4. data/ext/groonga/extconf.rb +29 -26
  5. data/ext/groonga/rb-grn.h +3 -3
  6. data/lib/2.2/groonga.so +0 -0
  7. data/lib/2.3/groonga.so +0 -0
  8. data/lib/2.4/groonga.so +0 -0
  9. data/lib/2.5/groonga.so +0 -0
  10. data/lib/groonga/expression-builder.rb +1 -1
  11. data/lib/groonga/schema.rb +13 -0
  12. data/rroonga-build.rb +4 -11
  13. data/test/test-expression-builder.rb +8 -0
  14. data/vendor/local/bin/cv2pdb.exe +0 -0
  15. data/vendor/local/bin/generate-pdb.bat +38 -36
  16. data/vendor/local/bin/grndb.exe +0 -0
  17. data/vendor/local/bin/groonga-benchmark.exe +0 -0
  18. data/vendor/local/bin/groonga-suggest-create-dataset.exe +0 -0
  19. data/vendor/local/bin/groonga.exe +0 -0
  20. data/vendor/local/bin/libgroonga-0.dll +0 -0
  21. data/vendor/local/bin/libmecab-2.dll +0 -0
  22. data/vendor/local/bin/libmsgpackc.dll +0 -0
  23. data/vendor/local/bin/libonigmo-6.dll +0 -0
  24. data/vendor/local/bin/libpcre-1.dll +0 -0
  25. data/vendor/local/bin/libpcrecpp-0.dll +0 -0
  26. data/vendor/local/bin/libpcreposix-0.dll +0 -0
  27. data/vendor/local/bin/lz4.exe +0 -0
  28. data/vendor/local/bin/lz4c.exe +0 -0
  29. data/vendor/local/bin/{lz4cat → lz4cat.exe} +0 -0
  30. data/vendor/local/bin/mecab.exe +0 -0
  31. data/vendor/local/bin/pcre-config +1 -1
  32. data/vendor/local/bin/pcregrep.exe +0 -0
  33. data/vendor/local/bin/pcretest.exe +0 -0
  34. data/vendor/local/bin/unlz4.exe +0 -0
  35. data/vendor/local/bin/zlib1.dll +0 -0
  36. data/vendor/local/include/groonga/groonga.h +16 -1
  37. data/vendor/local/include/groonga/groonga/accessor.h +5 -1
  38. data/vendor/local/include/groonga/groonga/column.h +4 -0
  39. data/vendor/local/include/groonga/groonga/db.h +3 -1
  40. data/vendor/local/include/groonga/groonga/expr.h +5 -0
  41. data/vendor/local/include/groonga/groonga/groonga.h +124 -171
  42. data/vendor/local/include/groonga/groonga/highlighter.h +57 -0
  43. data/vendor/local/include/groonga/groonga/ii.h +2 -0
  44. data/vendor/local/include/groonga/groonga/index_column.h +31 -0
  45. data/vendor/local/include/groonga/groonga/memory.h +29 -0
  46. data/vendor/local/include/groonga/groonga/msgpack.h +50 -0
  47. data/vendor/local/include/groonga/groonga/obj.h +22 -1
  48. data/vendor/local/include/groonga/groonga/option.h +61 -0
  49. data/vendor/local/include/groonga/groonga/output.h +57 -2
  50. data/vendor/local/include/groonga/groonga/output_columns.h +38 -0
  51. data/vendor/local/include/groonga/groonga/plugin.h +5 -0
  52. data/vendor/local/include/groonga/groonga/raw_string.h +60 -0
  53. data/vendor/local/include/groonga/groonga/string.h +113 -0
  54. data/vendor/local/include/groonga/groonga/table.h +89 -1
  55. data/vendor/local/include/groonga/groonga/thread.h +15 -0
  56. data/vendor/local/include/groonga/groonga/time.h +1 -0
  57. data/vendor/local/include/groonga/groonga/token.h +60 -10
  58. data/vendor/local/include/groonga/groonga/token_cursor.h +59 -0
  59. data/vendor/local/include/groonga/groonga/token_filter.h +24 -0
  60. data/vendor/local/include/groonga/groonga/token_metadata.h +49 -0
  61. data/vendor/local/include/groonga/groonga/tokenizer.h +99 -25
  62. data/vendor/local/include/groonga/groonga/tokenizer_query_deprecated.h +50 -0
  63. data/vendor/local/include/groonga/groonga/vector.h +80 -0
  64. data/vendor/local/include/groonga/groonga/version.h +32 -0
  65. data/vendor/local/include/groonga/groonga/window_function.h +18 -8
  66. data/vendor/local/include/groonga/groonga/window_function_executor.h +68 -0
  67. data/vendor/local/include/lz4.h +504 -212
  68. data/vendor/local/include/lz4frame.h +433 -153
  69. data/vendor/local/include/lz4frame_static.h +47 -0
  70. data/vendor/local/include/lz4hc.h +281 -108
  71. data/vendor/local/include/msgpack.hpp +4 -0
  72. data/vendor/local/include/msgpack/adaptor/adaptor_base.hpp +1 -0
  73. data/vendor/local/include/msgpack/adaptor/adaptor_base_decl.hpp +1 -0
  74. data/vendor/local/include/msgpack/adaptor/array_ref_decl.hpp +1 -0
  75. data/vendor/local/include/msgpack/adaptor/boost/msgpack_variant_decl.hpp +1 -0
  76. data/vendor/local/include/msgpack/adaptor/boost/string_view.hpp +15 -0
  77. data/vendor/local/include/msgpack/adaptor/check_container_size_decl.hpp +1 -0
  78. data/vendor/local/include/msgpack/adaptor/cpp17/optional.hpp +16 -0
  79. data/vendor/local/include/msgpack/adaptor/cpp17/string_view.hpp +16 -0
  80. data/vendor/local/include/msgpack/adaptor/define_decl.hpp +2 -0
  81. data/vendor/local/include/msgpack/adaptor/ext_decl.hpp +1 -0
  82. data/vendor/local/include/msgpack/adaptor/fixint_decl.hpp +1 -0
  83. data/vendor/local/include/msgpack/adaptor/int_decl.hpp +1 -0
  84. data/vendor/local/include/msgpack/adaptor/map_decl.hpp +1 -0
  85. data/vendor/local/include/msgpack/adaptor/msgpack_tuple_decl.hpp +1 -0
  86. data/vendor/local/include/msgpack/adaptor/nil_decl.hpp +1 -0
  87. data/vendor/local/include/msgpack/adaptor/raw_decl.hpp +1 -0
  88. data/vendor/local/include/msgpack/adaptor/size_equal_only_decl.hpp +1 -0
  89. data/vendor/local/include/msgpack/adaptor/tr1/unordered_map.hpp +2 -2
  90. data/vendor/local/include/msgpack/adaptor/tr1/unordered_set.hpp +2 -2
  91. data/vendor/local/include/msgpack/adaptor/v4raw_decl.hpp +1 -0
  92. data/vendor/local/include/msgpack/cpp_config_decl.hpp +1 -0
  93. data/vendor/local/include/msgpack/create_object_visitor.hpp +17 -0
  94. data/vendor/local/include/msgpack/create_object_visitor_decl.hpp +16 -0
  95. data/vendor/local/include/msgpack/fbuffer.h +1 -1
  96. data/vendor/local/include/msgpack/fbuffer_decl.hpp +1 -0
  97. data/vendor/local/include/msgpack/gcc_atomic.hpp +0 -2
  98. data/vendor/local/include/msgpack/iterator_decl.hpp +2 -1
  99. data/vendor/local/include/msgpack/meta_decl.hpp +1 -0
  100. data/vendor/local/include/msgpack/null_visitor.hpp +17 -0
  101. data/vendor/local/include/msgpack/null_visitor_decl.hpp +16 -0
  102. data/vendor/local/include/msgpack/object.h +5 -0
  103. data/vendor/local/include/msgpack/object_decl.hpp +1 -0
  104. data/vendor/local/include/msgpack/object_fwd.hpp +1 -0
  105. data/vendor/local/include/msgpack/object_fwd_decl.hpp +1 -0
  106. data/vendor/local/include/msgpack/pack.h +1 -0
  107. data/vendor/local/include/msgpack/pack_decl.hpp +1 -0
  108. data/vendor/local/include/msgpack/parse.hpp +18 -0
  109. data/vendor/local/include/msgpack/parse_decl.hpp +16 -0
  110. data/vendor/local/include/msgpack/parse_return.hpp +17 -0
  111. data/vendor/local/include/msgpack/sbuffer_decl.hpp +1 -0
  112. data/vendor/local/include/msgpack/sysdep.h +34 -26
  113. data/vendor/local/include/msgpack/type.hpp +9 -0
  114. data/vendor/local/include/msgpack/unpack.h +12 -1
  115. data/vendor/local/include/msgpack/unpack.hpp +1 -0
  116. data/vendor/local/include/msgpack/unpack_decl.hpp +1 -0
  117. data/vendor/local/include/msgpack/unpack_exception.hpp +15 -0
  118. data/vendor/local/include/msgpack/unpack_template.h +22 -30
  119. data/vendor/local/include/msgpack/v1/adaptor/array_ref.hpp +6 -6
  120. data/vendor/local/include/msgpack/v1/adaptor/boost/fusion.hpp +49 -6
  121. data/vendor/local/include/msgpack/v1/adaptor/boost/msgpack_variant.hpp +6 -4
  122. data/vendor/local/include/msgpack/v1/adaptor/boost/string_view.hpp +87 -0
  123. data/vendor/local/include/msgpack/v1/adaptor/carray.hpp +11 -11
  124. data/vendor/local/include/msgpack/v1/adaptor/char_ptr.hpp +1 -1
  125. data/vendor/local/include/msgpack/v1/adaptor/cpp11/array.hpp +1 -1
  126. data/vendor/local/include/msgpack/v1/adaptor/cpp11/array_char.hpp +8 -1
  127. data/vendor/local/include/msgpack/v1/adaptor/cpp11/array_unsigned_char.hpp +8 -1
  128. data/vendor/local/include/msgpack/v1/adaptor/cpp11/forward_list.hpp +1 -1
  129. data/vendor/local/include/msgpack/v1/adaptor/cpp11/tuple.hpp +2 -2
  130. data/vendor/local/include/msgpack/v1/adaptor/cpp11/unordered_map.hpp +4 -4
  131. data/vendor/local/include/msgpack/v1/adaptor/cpp11/unordered_set.hpp +2 -2
  132. data/vendor/local/include/msgpack/v1/adaptor/cpp17/optional.hpp +90 -0
  133. data/vendor/local/include/msgpack/v1/adaptor/cpp17/string_view.hpp +86 -0
  134. data/vendor/local/include/msgpack/v1/adaptor/deque.hpp +1 -1
  135. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_define_array.hpp +1088 -32
  136. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_define_map.hpp +32 -16
  137. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_msgpack_tuple.hpp +32 -32
  138. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_convert_helper.hpp +45 -0
  139. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_define_array.hpp +4 -3
  140. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_define_map.hpp +4 -2
  141. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_msgpack_tuple.hpp +2 -2
  142. data/vendor/local/include/msgpack/v1/adaptor/ext.hpp +1 -1
  143. data/vendor/local/include/msgpack/v1/adaptor/fixint.hpp +40 -24
  144. data/vendor/local/include/msgpack/v1/adaptor/float.hpp +4 -4
  145. data/vendor/local/include/msgpack/v1/adaptor/int.hpp +55 -33
  146. data/vendor/local/include/msgpack/v1/adaptor/list.hpp +1 -1
  147. data/vendor/local/include/msgpack/v1/adaptor/map.hpp +10 -10
  148. data/vendor/local/include/msgpack/v1/adaptor/pair.hpp +2 -2
  149. data/vendor/local/include/msgpack/v1/adaptor/set.hpp +2 -2
  150. data/vendor/local/include/msgpack/v1/adaptor/string.hpp +1 -1
  151. data/vendor/local/include/msgpack/v1/adaptor/tr1/unordered_map.hpp +2 -2
  152. data/vendor/local/include/msgpack/v1/adaptor/tr1/unordered_set.hpp +2 -2
  153. data/vendor/local/include/msgpack/v1/adaptor/vector.hpp +5 -5
  154. data/vendor/local/include/msgpack/v1/adaptor/vector_bool.hpp +1 -1
  155. data/vendor/local/include/msgpack/v1/adaptor/vector_char.hpp +9 -9
  156. data/vendor/local/include/msgpack/v1/adaptor/vector_unsigned_char.hpp +9 -9
  157. data/vendor/local/include/msgpack/v1/cpp_config.hpp +6 -0
  158. data/vendor/local/include/msgpack/v1/cpp_config_decl.hpp +6 -0
  159. data/vendor/local/include/msgpack/v1/detail/cpp03_zone.hpp +41 -34
  160. data/vendor/local/include/msgpack/v1/detail/cpp03_zone_decl.hpp +8 -0
  161. data/vendor/local/include/msgpack/v1/detail/cpp11_zone.hpp +25 -19
  162. data/vendor/local/include/msgpack/v1/detail/cpp11_zone_decl.hpp +8 -0
  163. data/vendor/local/include/msgpack/v1/meta.hpp +6 -0
  164. data/vendor/local/include/msgpack/v1/meta_decl.hpp +5 -0
  165. data/vendor/local/include/msgpack/v1/object.hpp +768 -393
  166. data/vendor/local/include/msgpack/v1/object_decl.hpp +11 -1
  167. data/vendor/local/include/msgpack/v1/object_fwd.hpp +4 -1
  168. data/vendor/local/include/msgpack/v1/object_fwd_decl.hpp +3 -1
  169. data/vendor/local/include/msgpack/v1/parse_return.hpp +36 -0
  170. data/vendor/local/include/msgpack/v1/unpack.hpp +39 -120
  171. data/vendor/local/include/msgpack/v1/unpack_decl.hpp +2 -9
  172. data/vendor/local/include/msgpack/v1/unpack_exception.hpp +122 -0
  173. data/vendor/local/include/msgpack/v1/vrefbuffer.hpp +2 -2
  174. data/vendor/local/include/msgpack/v2/create_object_visitor.hpp +250 -0
  175. data/vendor/local/include/msgpack/v2/create_object_visitor_decl.hpp +33 -0
  176. data/vendor/local/include/msgpack/v2/meta_decl.hpp +4 -0
  177. data/vendor/local/include/msgpack/v2/null_visitor.hpp +96 -0
  178. data/vendor/local/include/msgpack/v2/null_visitor_decl.hpp +29 -0
  179. data/vendor/local/include/msgpack/v2/object_decl.hpp +4 -0
  180. data/vendor/local/include/msgpack/v2/object_fwd.hpp +1 -1
  181. data/vendor/local/include/msgpack/v2/object_fwd_decl.hpp +2 -0
  182. data/vendor/local/include/msgpack/v2/pack_decl.hpp +1 -0
  183. data/vendor/local/include/msgpack/v2/parse.hpp +1072 -0
  184. data/vendor/local/include/msgpack/v2/parse_decl.hpp +79 -0
  185. data/vendor/local/include/msgpack/v2/parse_return.hpp +37 -0
  186. data/vendor/local/include/msgpack/v2/unpack.hpp +21 -1298
  187. data/vendor/local/include/msgpack/v2/unpack_decl.hpp +9 -45
  188. data/vendor/local/include/msgpack/v2/x3_parse.hpp +875 -0
  189. data/vendor/local/include/msgpack/v2/x3_parse_decl.hpp +36 -0
  190. data/vendor/local/include/msgpack/v2/x3_unpack.hpp +120 -0
  191. data/vendor/local/include/msgpack/v2/x3_unpack_decl.hpp +71 -0
  192. data/vendor/local/include/msgpack/v3/adaptor/adaptor_base.hpp +58 -0
  193. data/vendor/local/include/msgpack/v3/adaptor/adaptor_base_decl.hpp +52 -0
  194. data/vendor/local/include/msgpack/v3/adaptor/array_ref_decl.hpp +36 -0
  195. data/vendor/local/include/msgpack/v3/adaptor/boost/msgpack_variant_decl.hpp +42 -0
  196. data/vendor/local/include/msgpack/v3/adaptor/check_container_size_decl.hpp +39 -0
  197. data/vendor/local/include/msgpack/v3/adaptor/define_decl.hpp +23 -0
  198. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_define_array_decl.hpp +31 -0
  199. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_define_map_decl.hpp +31 -0
  200. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_msgpack_tuple_decl.hpp +43 -0
  201. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_define_array_decl.hpp +32 -0
  202. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_define_map_decl.hpp +31 -0
  203. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_msgpack_tuple_decl.hpp +59 -0
  204. data/vendor/local/include/msgpack/v3/adaptor/ext_decl.hpp +34 -0
  205. data/vendor/local/include/msgpack/v3/adaptor/fixint_decl.hpp +43 -0
  206. data/vendor/local/include/msgpack/v3/adaptor/int_decl.hpp +54 -0
  207. data/vendor/local/include/msgpack/v3/adaptor/map_decl.hpp +33 -0
  208. data/vendor/local/include/msgpack/v3/adaptor/msgpack_tuple_decl.hpp +21 -0
  209. data/vendor/local/include/msgpack/v3/adaptor/nil_decl.hpp +42 -0
  210. data/vendor/local/include/msgpack/v3/adaptor/raw_decl.hpp +33 -0
  211. data/vendor/local/include/msgpack/v3/adaptor/size_equal_only_decl.hpp +35 -0
  212. data/vendor/local/include/msgpack/v3/adaptor/v4raw_decl.hpp +34 -0
  213. data/vendor/local/include/msgpack/v3/cpp_config_decl.hpp +84 -0
  214. data/vendor/local/include/msgpack/v3/create_object_visitor_decl.hpp +33 -0
  215. data/vendor/local/include/msgpack/v3/detail/cpp03_zone_decl.hpp +31 -0
  216. data/vendor/local/include/msgpack/v3/detail/cpp11_zone_decl.hpp +31 -0
  217. data/vendor/local/include/msgpack/v3/fbuffer_decl.hpp +32 -0
  218. data/vendor/local/include/msgpack/v3/iterator_decl.hpp +33 -0
  219. data/vendor/local/include/msgpack/v3/meta_decl.hpp +50 -0
  220. data/vendor/local/include/msgpack/v3/null_visitor_decl.hpp +29 -0
  221. data/vendor/local/include/msgpack/v3/object_decl.hpp +53 -0
  222. data/vendor/local/include/msgpack/v3/object_fwd.hpp +70 -0
  223. data/vendor/local/include/msgpack/v3/object_fwd_decl.hpp +75 -0
  224. data/vendor/local/include/msgpack/v3/pack_decl.hpp +55 -0
  225. data/vendor/local/include/msgpack/v3/parse.hpp +677 -0
  226. data/vendor/local/include/msgpack/v3/parse_decl.hpp +49 -0
  227. data/vendor/local/include/msgpack/v3/parse_return.hpp +35 -0
  228. data/vendor/local/include/msgpack/v3/sbuffer_decl.hpp +33 -0
  229. data/vendor/local/include/msgpack/v3/unpack.hpp +192 -0
  230. data/vendor/local/include/msgpack/v3/unpack_decl.hpp +304 -0
  231. data/vendor/local/include/msgpack/v3/vrefbuffer_decl.hpp +29 -0
  232. data/vendor/local/include/msgpack/v3/x3_parse_decl.hpp +34 -0
  233. data/vendor/local/include/msgpack/v3/x3_unpack.hpp +97 -0
  234. data/vendor/local/include/msgpack/v3/x3_unpack_decl.hpp +65 -0
  235. data/vendor/local/include/msgpack/v3/zbuffer_decl.hpp +29 -0
  236. data/vendor/local/include/msgpack/v3/zone_decl.hpp +21 -0
  237. data/vendor/local/include/msgpack/version_master.h +2 -2
  238. data/vendor/local/include/msgpack/versioning.hpp +5 -3
  239. data/vendor/local/include/msgpack/vrefbuffer.h +1 -2
  240. data/vendor/local/include/msgpack/vrefbuffer_decl.hpp +1 -0
  241. data/vendor/local/include/msgpack/x3_parse.hpp +15 -0
  242. data/vendor/local/include/msgpack/x3_parse_decl.hpp +16 -0
  243. data/vendor/local/include/msgpack/x3_unpack.hpp +16 -0
  244. data/vendor/local/include/msgpack/x3_unpack_decl.hpp +16 -0
  245. data/vendor/local/include/msgpack/zbuffer_decl.hpp +1 -0
  246. data/vendor/local/include/msgpack/zone_decl.hpp +1 -0
  247. data/vendor/local/include/pcre.h +6 -6
  248. data/vendor/local/lib/cmake/msgpack/msgpack-config-version.cmake +46 -0
  249. data/vendor/local/lib/cmake/msgpack/msgpack-config.cmake +47 -0
  250. data/vendor/local/lib/cmake/msgpack/msgpack-targets-noconfig.cmake +29 -0
  251. data/vendor/local/lib/cmake/msgpack/msgpack-targets.cmake +101 -0
  252. data/vendor/local/lib/groonga/plugins/functions/index_column.a +0 -0
  253. data/vendor/local/lib/groonga/plugins/functions/index_column.dll +0 -0
  254. data/vendor/local/lib/groonga/plugins/functions/index_column.dll.a +0 -0
  255. data/vendor/local/lib/groonga/plugins/functions/index_column.la +1 -1
  256. data/vendor/local/lib/groonga/plugins/functions/math.a +0 -0
  257. data/vendor/local/lib/groonga/plugins/functions/math.dll +0 -0
  258. data/vendor/local/lib/groonga/plugins/functions/math.dll.a +0 -0
  259. data/vendor/local/lib/groonga/plugins/functions/math.la +1 -1
  260. data/vendor/local/lib/groonga/plugins/functions/number.a +0 -0
  261. data/vendor/local/lib/groonga/plugins/functions/number.dll +0 -0
  262. data/vendor/local/lib/groonga/plugins/functions/number.dll.a +0 -0
  263. data/vendor/local/lib/groonga/plugins/functions/number.la +1 -1
  264. data/vendor/local/lib/groonga/plugins/functions/string.a +0 -0
  265. data/vendor/local/lib/groonga/plugins/functions/string.dll +0 -0
  266. data/vendor/local/lib/groonga/plugins/functions/string.dll.a +0 -0
  267. data/vendor/local/lib/groonga/plugins/functions/string.la +1 -1
  268. data/vendor/local/lib/groonga/plugins/functions/time.a +0 -0
  269. data/vendor/local/lib/groonga/plugins/functions/time.dll +0 -0
  270. data/vendor/local/lib/groonga/plugins/functions/time.dll.a +0 -0
  271. data/vendor/local/lib/groonga/plugins/functions/time.la +1 -1
  272. data/vendor/local/lib/groonga/plugins/functions/vector.a +0 -0
  273. data/vendor/local/lib/groonga/plugins/functions/vector.dll +0 -0
  274. data/vendor/local/lib/groonga/plugins/functions/vector.dll.a +0 -0
  275. data/vendor/local/lib/groonga/plugins/functions/vector.la +1 -1
  276. data/vendor/local/lib/groonga/plugins/normalizers/mysql.a +0 -0
  277. data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll +0 -0
  278. data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll.a +0 -0
  279. data/vendor/local/lib/groonga/plugins/normalizers/mysql.la +2 -2
  280. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
  281. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
  282. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
  283. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +1 -1
  284. data/vendor/local/lib/groonga/plugins/sharding/dynamic_columns.rb +150 -19
  285. data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +123 -65
  286. data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +528 -113
  287. data/vendor/local/lib/groonga/plugins/sharding/logical_select.rb +142 -40
  288. data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
  289. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
  290. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
  291. data/vendor/local/lib/groonga/plugins/suggest/suggest.la +1 -1
  292. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
  293. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
  294. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
  295. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +1 -1
  296. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
  297. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
  298. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
  299. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +1 -1
  300. data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +1 -1
  301. data/vendor/local/lib/groonga/scripts/ruby/command_line/grndb.rb +64 -35
  302. data/vendor/local/lib/groonga/scripts/ruby/expression.rb +3 -1
  303. data/vendor/local/lib/groonga/scripts/ruby/expression_rewriters.rb +15 -21
  304. data/vendor/local/lib/groonga/scripts/ruby/expression_rewriters/optimizer.rb +274 -0
  305. data/vendor/local/lib/groonga/scripts/ruby/expression_tree.rb +8 -2
  306. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/assign.rb +22 -0
  307. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/assign_binary_operation.rb +24 -0
  308. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/binary_operation.rb +206 -8
  309. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/constant.rb +16 -1
  310. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/function_call.rb +30 -1
  311. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/logical_operation.rb +6 -0
  312. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/member.rb +18 -0
  313. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/null.rb +17 -0
  314. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/reference.rb +18 -0
  315. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/table.rb +14 -0
  316. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/unary_operation.rb +26 -0
  317. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/variable.rb +4 -0
  318. data/vendor/local/lib/groonga/scripts/ruby/expression_tree_builder.rb +78 -8
  319. data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +10 -0
  320. data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +2 -0
  321. data/vendor/local/lib/groonga/scripts/ruby/locale_output.rb +28 -0
  322. data/vendor/local/lib/groonga/scripts/ruby/logger.rb +36 -4
  323. data/vendor/local/lib/groonga/scripts/ruby/record.rb +1 -1
  324. data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +0 -3
  325. data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +46 -5
  326. data/vendor/local/lib/groonga/scripts/ruby/scan_info_data_size_estimator.rb +5 -136
  327. data/vendor/local/lib/groonga/scripts/ruby/table.rb +2 -2
  328. data/vendor/local/lib/libgroonga.a +0 -0
  329. data/vendor/local/lib/libgroonga.dll.a +0 -0
  330. data/vendor/local/lib/libgroonga.la +1 -1
  331. data/vendor/local/lib/liblz4.a +0 -0
  332. data/vendor/local/lib/liblz4.dll +0 -0
  333. data/vendor/local/lib/liblz4.dll.1 +0 -0
  334. data/vendor/local/lib/{liblz4.dll.1.5.0 → liblz4.dll.1.8.2} +0 -0
  335. data/vendor/local/lib/libmecab.dll.a +0 -0
  336. data/vendor/local/lib/libmsgpackc.a +0 -0
  337. data/vendor/local/lib/libmsgpackc.dll.a +0 -0
  338. data/vendor/local/lib/libonigmo.a +0 -0
  339. data/vendor/local/lib/libonigmo.dll.a +0 -0
  340. data/vendor/local/lib/libpcre.a +0 -0
  341. data/vendor/local/lib/libpcre.dll.a +0 -0
  342. data/vendor/local/lib/libpcre.la +2 -2
  343. data/vendor/local/lib/libpcrecpp.dll.a +0 -0
  344. data/vendor/local/lib/libpcrecpp.la +1 -1
  345. data/vendor/local/lib/libpcreposix.a +0 -0
  346. data/vendor/local/lib/libpcreposix.dll.a +0 -0
  347. data/vendor/local/lib/libpcreposix.la +2 -2
  348. data/vendor/local/lib/libz.dll.a +0 -0
  349. data/vendor/local/lib/pkgconfig/groonga-normalizer-mysql.pc +1 -1
  350. data/vendor/local/lib/pkgconfig/groonga.pc +2 -2
  351. data/vendor/local/lib/pkgconfig/liblz4.pc +3 -3
  352. data/vendor/local/lib/pkgconfig/libpcre.pc +1 -1
  353. data/vendor/local/lib/pkgconfig/libpcrecpp.pc +1 -1
  354. data/vendor/local/lib/pkgconfig/libpcreposix.pc +1 -1
  355. data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
  356. data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
  357. data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
  358. data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
  359. data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
  360. data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
  361. data/vendor/local/share/doc/groonga-normalizer-mysql/README.md +14 -22
  362. data/vendor/local/share/doc/groonga-normalizer-mysql/news.md +22 -2
  363. data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
  364. data/vendor/local/share/doc/groonga/en/html/_static/basic.css +113 -4
  365. data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +46 -19
  366. data/vendor/local/share/doc/groonga/en/html/_static/documentation_options.js +10 -0
  367. data/vendor/local/share/doc/groonga/en/html/_static/{jquery-3.1.0.js → jquery-3.2.1.js} +474 -295
  368. data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +4 -4
  369. data/vendor/local/share/doc/groonga/en/html/_static/language_data.js +297 -0
  370. data/vendor/local/share/doc/groonga/en/html/_static/pygments.css +4 -0
  371. data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +69 -322
  372. data/vendor/local/share/doc/groonga/en/html/characteristic.html +16 -24
  373. data/vendor/local/share/doc/groonga/en/html/client.html +15 -23
  374. data/vendor/local/share/doc/groonga/en/html/community.html +30 -38
  375. data/vendor/local/share/doc/groonga/en/html/contribution.html +23 -31
  376. data/vendor/local/share/doc/groonga/en/html/contribution/development.html +15 -23
  377. data/vendor/local/share/doc/groonga/en/html/contribution/development/build.html +15 -23
  378. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_autotools.html +58 -66
  379. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_cmake.html +51 -56
  380. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/windows_cmake.html +52 -56
  381. data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +27 -35
  382. data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +19 -27
  383. data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +26 -34
  384. data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +167 -167
  385. data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +16 -24
  386. data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +28 -36
  387. data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +15 -23
  388. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +15 -23
  389. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +59 -67
  390. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +31 -39
  391. data/vendor/local/share/doc/groonga/en/html/contribution/report.html +18 -26
  392. data/vendor/local/share/doc/groonga/en/html/development.html +15 -23
  393. data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +38 -43
  394. data/vendor/local/share/doc/groonga/en/html/genindex.html +50 -28
  395. data/vendor/local/share/doc/groonga/en/html/index.html +248 -234
  396. data/vendor/local/share/doc/groonga/en/html/install.html +43 -47
  397. data/vendor/local/share/doc/groonga/en/html/install/centos.html +43 -51
  398. data/vendor/local/share/doc/groonga/en/html/install/debian.html +52 -131
  399. data/vendor/local/share/doc/groonga/en/html/install/docker.html +155 -0
  400. data/vendor/local/share/doc/groonga/en/html/install/fedora.html +41 -49
  401. data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +29 -37
  402. data/vendor/local/share/doc/groonga/en/html/install/others.html +142 -150
  403. data/vendor/local/share/doc/groonga/en/html/install/solaris.html +30 -38
  404. data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +43 -51
  405. data/vendor/local/share/doc/groonga/en/html/install/windows.html +33 -41
  406. data/vendor/local/share/doc/groonga/en/html/limitations.html +36 -42
  407. data/vendor/local/share/doc/groonga/en/html/news.html +1586 -598
  408. data/vendor/local/share/doc/groonga/en/html/news/0.x.html +83 -83
  409. data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +147 -155
  410. data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +26 -34
  411. data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +225 -233
  412. data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +48 -56
  413. data/vendor/local/share/doc/groonga/en/html/news/2.x.html +378 -386
  414. data/vendor/local/share/doc/groonga/en/html/news/3.x.html +320 -328
  415. data/vendor/local/share/doc/groonga/en/html/news/4.x.html +442 -448
  416. data/vendor/local/share/doc/groonga/en/html/news/5.x.html +742 -860
  417. data/vendor/local/share/doc/groonga/en/html/news/6.x.html +544 -621
  418. data/vendor/local/share/doc/groonga/en/html/news/senna.html +32 -40
  419. data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
  420. data/vendor/local/share/doc/groonga/en/html/reference.html +208 -198
  421. data/vendor/local/share/doc/groonga/en/html/reference/alias.html +85 -93
  422. data/vendor/local/share/doc/groonga/en/html/reference/api.html +50 -57
  423. data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +62 -77
  424. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +117 -149
  425. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +140 -176
  426. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +43 -55
  427. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +48 -56
  428. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +194 -254
  429. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +106 -138
  430. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +62 -82
  431. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +117 -137
  432. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +74 -98
  433. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +79 -103
  434. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +40 -48
  435. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +57 -73
  436. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +75 -99
  437. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_inspect.html +495 -0
  438. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +52 -68
  439. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +291 -357
  440. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +69 -89
  441. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +47 -59
  442. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +226 -306
  443. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +120 -160
  444. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_thread.html +80 -103
  445. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +46 -58
  446. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +40 -52
  447. data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +52 -66
  448. data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +98 -122
  449. data/vendor/local/share/doc/groonga/en/html/reference/cast.html +40 -26
  450. data/vendor/local/share/doc/groonga/en/html/reference/column.html +16 -24
  451. data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +16 -24
  452. data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +30 -34
  453. data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +16 -24
  454. data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +92 -100
  455. data/vendor/local/share/doc/groonga/en/html/reference/command.html +76 -84
  456. data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +26 -34
  457. data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +64 -72
  458. data/vendor/local/share/doc/groonga/en/html/reference/command/pretty_print.html +21 -29
  459. data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +25 -33
  460. data/vendor/local/share/doc/groonga/en/html/reference/command/request_timeout.html +32 -40
  461. data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +105 -113
  462. data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +44 -50
  463. data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +85 -73
  464. data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +31 -37
  465. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_copy.html +131 -139
  466. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +370 -326
  467. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +115 -117
  468. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +38 -44
  469. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +47 -53
  470. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_delete.html +40 -48
  471. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_get.html +42 -50
  472. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_set.html +41 -49
  473. data/vendor/local/share/doc/groonga/en/html/reference/commands/database_unmap.html +37 -45
  474. data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +71 -63
  475. data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +31 -37
  476. data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +49 -51
  477. data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +64 -71
  478. data/vendor/local/share/doc/groonga/en/html/reference/commands/io_flush.html +335 -138
  479. data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +233 -87
  480. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_acquire.html +45 -53
  481. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +42 -48
  482. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_release.html +43 -51
  483. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +58 -64
  484. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +33 -38
  485. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +31 -38
  486. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +295 -218
  487. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_parameters.html +56 -64
  488. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +532 -214
  489. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_select.html +797 -388
  490. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_shard_list.html +35 -43
  491. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_table_remove.html +188 -196
  492. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +83 -90
  493. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +41 -48
  494. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_exist.html +41 -49
  495. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_inspect.html +401 -403
  496. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_list.html +253 -261
  497. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_remove.html +60 -68
  498. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +36 -44
  499. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +35 -43
  500. data/vendor/local/share/doc/groonga/en/html/reference/commands/query_expand.html +21 -29
  501. data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +22 -30
  502. data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +21 -29
  503. data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +39 -47
  504. data/vendor/local/share/doc/groonga/en/html/reference/commands/reindex.html +47 -53
  505. data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
  506. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +38 -45
  507. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +38 -45
  508. data/vendor/local/share/doc/groonga/en/html/reference/commands/schema.html +330 -338
  509. data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +1545 -1194
  510. data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +57 -65
  511. data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +83 -91
  512. data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +119 -133
  513. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_copy.html +30 -38
  514. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +165 -174
  515. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +50 -50
  516. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +104 -112
  517. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_rename.html +42 -50
  518. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +49 -57
  519. data/vendor/local/share/doc/groonga/en/html/reference/commands/thread_limit.html +46 -54
  520. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +110 -117
  521. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +41 -48
  522. data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +40 -46
  523. data/vendor/local/share/doc/groonga/en/html/reference/configuration.html +37 -45
  524. data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -27
  525. data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +134 -114
  526. data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +25 -31
  527. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +66 -66
  528. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +174 -182
  529. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +25 -33
  530. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +27 -35
  531. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +191 -199
  532. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +32 -40
  533. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +189 -163
  534. data/vendor/local/share/doc/groonga/en/html/reference/function.html +59 -64
  535. data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +71 -79
  536. data/vendor/local/share/doc/groonga/en/html/reference/functions/cast_loose.html +210 -0
  537. data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +49 -55
  538. data/vendor/local/share/doc/groonga/en/html/reference/functions/fuzzy_search.html +38 -46
  539. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +133 -142
  540. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +67 -73
  541. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +56 -62
  542. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +80 -88
  543. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +70 -78
  544. data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +56 -64
  545. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_records.html +87 -94
  546. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +54 -62
  547. data/vendor/local/share/doc/groonga/en/html/reference/functions/math_abs.html +55 -63
  548. data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -48
  549. data/vendor/local/share/doc/groonga/en/html/reference/functions/number_classify.html +36 -44
  550. data/vendor/local/share/doc/groonga/en/html/reference/functions/prefix_rk_search.html +74 -82
  551. data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +152 -160
  552. data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +45 -52
  553. data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +76 -84
  554. data/vendor/local/share/doc/groonga/en/html/reference/functions/string_length.html +37 -45
  555. data/vendor/local/share/doc/groonga/en/html/reference/functions/string_substring.html +39 -47
  556. data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +76 -84
  557. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day.html +37 -45
  558. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day_of_week.html +278 -0
  559. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_hour.html +37 -45
  560. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_minute.html +36 -44
  561. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_month.html +36 -44
  562. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_second.html +36 -44
  563. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_week.html +36 -44
  564. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_year.html +37 -45
  565. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_find.html +368 -0
  566. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_new.html +40 -48
  567. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_size.html +54 -62
  568. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_slice.html +40 -47
  569. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +44 -52
  570. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +307 -316
  571. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +486 -492
  572. data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +44 -52
  573. data/vendor/local/share/doc/groonga/en/html/reference/log.html +128 -147
  574. data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +43 -92
  575. data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_auto.html +179 -0
  576. data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_nfkc100.html +897 -0
  577. data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_nfkc51.html +162 -0
  578. data/vendor/local/share/doc/groonga/en/html/reference/operations.html +26 -34
  579. data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +48 -56
  580. data/vendor/local/share/doc/groonga/en/html/reference/operations/prefix_rk_search.html +47 -55
  581. data/vendor/local/share/doc/groonga/en/html/reference/output.html +47 -55
  582. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +20 -28
  583. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +93 -101
  584. data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +228 -225
  585. data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +59 -67
  586. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +50 -58
  587. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +57 -65
  588. data/vendor/local/share/doc/groonga/en/html/reference/sharding.html +76 -86
  589. data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +43 -51
  590. data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +159 -167
  591. data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +93 -101
  592. data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +85 -93
  593. data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +88 -96
  594. data/vendor/local/share/doc/groonga/en/html/reference/tables.html +142 -150
  595. data/vendor/local/share/doc/groonga/en/html/reference/token_filter/summary.html +147 -0
  596. data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +31 -223
  597. data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_nfkc100.html +626 -0
  598. data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_stem.html +291 -0
  599. data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_stop_word.html +287 -0
  600. data/vendor/local/share/doc/groonga/en/html/reference/tokenizer/summary.html +259 -0
  601. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +42 -1455
  602. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram.html +368 -0
  603. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank.html +221 -0
  604. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol.html +240 -0
  605. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html +270 -0
  606. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html +292 -0
  607. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol.html +179 -0
  608. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol_alpha.html +200 -0
  609. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol_alpha_digit.html +212 -0
  610. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_delimit.html +357 -0
  611. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_delimit_null.html +162 -0
  612. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_mecab.html +783 -0
  613. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_regexp.html +289 -0
  614. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_trigram.html +194 -0
  615. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_unigram.html +194 -0
  616. data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +71 -79
  617. data/vendor/local/share/doc/groonga/en/html/reference/types.html +64 -72
  618. data/vendor/local/share/doc/groonga/en/html/reference/window_function.html +29 -37
  619. data/vendor/local/share/doc/groonga/en/html/reference/window_functions/record_number.html +38 -46
  620. data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_count.html +38 -46
  621. data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_record_number.html +38 -46
  622. data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_sum.html +38 -46
  623. data/vendor/local/share/doc/groonga/en/html/search.html +13 -24
  624. data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
  625. data/vendor/local/share/doc/groonga/en/html/server.html +15 -23
  626. data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +27 -35
  627. data/vendor/local/share/doc/groonga/en/html/server/http.html +18 -26
  628. data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +94 -102
  629. data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +15 -23
  630. data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +15 -23
  631. data/vendor/local/share/doc/groonga/en/html/server/memcached.html +18 -26
  632. data/vendor/local/share/doc/groonga/en/html/server/package.html +101 -109
  633. data/vendor/local/share/doc/groonga/en/html/spec.html +19 -27
  634. data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +207 -215
  635. data/vendor/local/share/doc/groonga/en/html/spec/search.html +39 -39
  636. data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +15 -23
  637. data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +46 -50
  638. data/vendor/local/share/doc/groonga/en/html/troubleshooting/how_to_analyze_error_message.html +27 -35
  639. data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +26 -31
  640. data/vendor/local/share/doc/groonga/en/html/tutorial.html +17 -25
  641. data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +46 -54
  642. data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +63 -71
  643. data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +30 -38
  644. data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +88 -97
  645. data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +19 -27
  646. data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +61 -69
  647. data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +108 -116
  648. data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +24 -32
  649. data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +19 -27
  650. data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +32 -40
  651. data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +52 -60
  652. data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
  653. data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +113 -4
  654. data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +46 -19
  655. data/vendor/local/share/doc/groonga/ja/html/_static/documentation_options.js +10 -0
  656. data/vendor/local/share/doc/groonga/ja/html/_static/{jquery-3.1.0.js → jquery-3.2.1.js} +474 -295
  657. data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +4 -4
  658. data/vendor/local/share/doc/groonga/ja/html/_static/language_data.js +124 -0
  659. data/vendor/local/share/doc/groonga/ja/html/_static/pygments.css +4 -0
  660. data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +70 -150
  661. data/vendor/local/share/doc/groonga/ja/html/characteristic.html +15 -23
  662. data/vendor/local/share/doc/groonga/ja/html/client.html +15 -23
  663. data/vendor/local/share/doc/groonga/ja/html/community.html +29 -37
  664. data/vendor/local/share/doc/groonga/ja/html/contribution.html +23 -31
  665. data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +15 -23
  666. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build.html +15 -23
  667. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_autotools.html +50 -58
  668. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_cmake.html +43 -48
  669. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/windows_cmake.html +47 -51
  670. data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +26 -34
  671. data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +18 -26
  672. data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +23 -31
  673. data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +162 -162
  674. data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +16 -24
  675. data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +26 -34
  676. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +15 -23
  677. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +15 -23
  678. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +50 -58
  679. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +28 -36
  680. data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -25
  681. data/vendor/local/share/doc/groonga/ja/html/development.html +15 -23
  682. data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +32 -37
  683. data/vendor/local/share/doc/groonga/ja/html/genindex.html +50 -28
  684. data/vendor/local/share/doc/groonga/ja/html/index.html +247 -233
  685. data/vendor/local/share/doc/groonga/ja/html/install.html +41 -45
  686. data/vendor/local/share/doc/groonga/ja/html/install/centos.html +44 -52
  687. data/vendor/local/share/doc/groonga/ja/html/install/debian.html +52 -121
  688. data/vendor/local/share/doc/groonga/ja/html/install/docker.html +155 -0
  689. data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +40 -48
  690. data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +28 -36
  691. data/vendor/local/share/doc/groonga/ja/html/install/others.html +116 -124
  692. data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +28 -36
  693. data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +43 -51
  694. data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -37
  695. data/vendor/local/share/doc/groonga/ja/html/limitations.html +30 -36
  696. data/vendor/local/share/doc/groonga/ja/html/news.html +1234 -384
  697. data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +82 -82
  698. data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +146 -154
  699. data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +25 -33
  700. data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +191 -199
  701. data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +41 -49
  702. data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +283 -291
  703. data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +229 -237
  704. data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +274 -280
  705. data/vendor/local/share/doc/groonga/ja/html/news/5.x.html +475 -593
  706. data/vendor/local/share/doc/groonga/ja/html/news/6.x.html +313 -390
  707. data/vendor/local/share/doc/groonga/ja/html/news/senna.html +31 -39
  708. data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
  709. data/vendor/local/share/doc/groonga/ja/html/reference.html +208 -198
  710. data/vendor/local/share/doc/groonga/ja/html/reference/alias.html +70 -78
  711. data/vendor/local/share/doc/groonga/ja/html/reference/api.html +50 -57
  712. data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +57 -72
  713. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +107 -139
  714. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +137 -173
  715. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +40 -52
  716. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +46 -54
  717. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +184 -244
  718. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +99 -131
  719. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +57 -77
  720. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +100 -120
  721. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +71 -95
  722. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +75 -99
  723. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +37 -45
  724. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +54 -70
  725. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +71 -95
  726. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_inspect.html +487 -0
  727. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +49 -65
  728. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +286 -352
  729. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +64 -84
  730. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +44 -56
  731. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +219 -299
  732. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +116 -156
  733. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_thread.html +70 -93
  734. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +42 -54
  735. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +36 -48
  736. data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +48 -62
  737. data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +94 -118
  738. data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +39 -25
  739. data/vendor/local/share/doc/groonga/ja/html/reference/column.html +15 -23
  740. data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +15 -23
  741. data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +28 -32
  742. data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +15 -23
  743. data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +76 -84
  744. data/vendor/local/share/doc/groonga/ja/html/reference/command.html +76 -84
  745. data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +25 -33
  746. data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +51 -59
  747. data/vendor/local/share/doc/groonga/ja/html/reference/command/pretty_print.html +20 -28
  748. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +21 -29
  749. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_timeout.html +27 -35
  750. data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +101 -109
  751. data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +39 -45
  752. data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +84 -72
  753. data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +30 -36
  754. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_copy.html +104 -112
  755. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +271 -237
  756. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +100 -102
  757. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +37 -43
  758. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +41 -47
  759. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_delete.html +38 -46
  760. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_get.html +39 -47
  761. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_set.html +39 -47
  762. data/vendor/local/share/doc/groonga/ja/html/reference/commands/database_unmap.html +34 -42
  763. data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +70 -62
  764. data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +30 -36
  765. data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +42 -44
  766. data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +59 -68
  767. data/vendor/local/share/doc/groonga/ja/html/reference/commands/io_flush.html +300 -126
  768. data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +212 -80
  769. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_acquire.html +42 -50
  770. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +40 -46
  771. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_release.html +41 -49
  772. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +57 -63
  773. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +32 -37
  774. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +30 -37
  775. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +246 -178
  776. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_parameters.html +51 -59
  777. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +479 -175
  778. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_select.html +718 -326
  779. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_shard_list.html +34 -42
  780. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_table_remove.html +145 -153
  781. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +78 -85
  782. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +40 -47
  783. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_exist.html +36 -44
  784. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_inspect.html +360 -362
  785. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_list.html +221 -229
  786. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_remove.html +47 -55
  787. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +32 -40
  788. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +33 -41
  789. data/vendor/local/share/doc/groonga/ja/html/reference/commands/query_expand.html +20 -28
  790. data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +21 -29
  791. data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +20 -28
  792. data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +35 -43
  793. data/vendor/local/share/doc/groonga/ja/html/reference/commands/reindex.html +42 -48
  794. data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +57 -57
  795. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +36 -43
  796. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +36 -43
  797. data/vendor/local/share/doc/groonga/ja/html/reference/commands/schema.html +317 -325
  798. data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +1246 -917
  799. data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +50 -58
  800. data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +77 -85
  801. data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +109 -123
  802. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_copy.html +29 -37
  803. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +131 -140
  804. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -49
  805. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +87 -95
  806. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_rename.html +36 -44
  807. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +44 -52
  808. data/vendor/local/share/doc/groonga/ja/html/reference/commands/thread_limit.html +38 -46
  809. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +93 -100
  810. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +40 -47
  811. data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +39 -45
  812. data/vendor/local/share/doc/groonga/ja/html/reference/configuration.html +36 -44
  813. data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -27
  814. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +125 -107
  815. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +23 -29
  816. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +62 -62
  817. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +132 -140
  818. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +23 -31
  819. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +25 -33
  820. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +166 -174
  821. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +31 -39
  822. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +189 -165
  823. data/vendor/local/share/doc/groonga/ja/html/reference/function.html +59 -64
  824. data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +69 -77
  825. data/vendor/local/share/doc/groonga/ja/html/reference/functions/cast_loose.html +208 -0
  826. data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +48 -54
  827. data/vendor/local/share/doc/groonga/ja/html/reference/functions/fuzzy_search.html +37 -45
  828. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +115 -124
  829. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +66 -72
  830. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +55 -61
  831. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +69 -77
  832. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +60 -68
  833. data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +54 -62
  834. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_records.html +85 -93
  835. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +54 -62
  836. data/vendor/local/share/doc/groonga/ja/html/reference/functions/math_abs.html +54 -62
  837. data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +39 -47
  838. data/vendor/local/share/doc/groonga/ja/html/reference/functions/number_classify.html +35 -43
  839. data/vendor/local/share/doc/groonga/ja/html/reference/functions/prefix_rk_search.html +67 -75
  840. data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +130 -138
  841. data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +44 -51
  842. data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +61 -69
  843. data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_length.html +36 -44
  844. data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_substring.html +38 -46
  845. data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +63 -71
  846. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day.html +36 -44
  847. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day_of_week.html +276 -0
  848. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_hour.html +36 -44
  849. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_minute.html +35 -43
  850. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_month.html +35 -43
  851. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_second.html +35 -43
  852. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_week.html +35 -43
  853. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_year.html +36 -44
  854. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_find.html +353 -0
  855. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_new.html +39 -47
  856. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_size.html +52 -61
  857. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_slice.html +38 -46
  858. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +38 -46
  859. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +200 -208
  860. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +375 -382
  861. data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +39 -47
  862. data/vendor/local/share/doc/groonga/ja/html/reference/log.html +125 -144
  863. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +36 -70
  864. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_auto.html +168 -0
  865. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_nfkc100.html +887 -0
  866. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_nfkc51.html +160 -0
  867. data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +26 -34
  868. data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +38 -46
  869. data/vendor/local/share/doc/groonga/ja/html/reference/operations/prefix_rk_search.html +41 -49
  870. data/vendor/local/share/doc/groonga/ja/html/reference/output.html +42 -50
  871. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +20 -28
  872. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +68 -76
  873. data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +178 -184
  874. data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +38 -46
  875. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +38 -46
  876. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +39 -47
  877. data/vendor/local/share/doc/groonga/ja/html/reference/sharding.html +63 -73
  878. data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +43 -51
  879. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +130 -138
  880. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +72 -80
  881. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +68 -76
  882. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +76 -86
  883. data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +129 -137
  884. data/vendor/local/share/doc/groonga/ja/html/reference/token_filter/summary.html +145 -0
  885. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +31 -215
  886. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_nfkc100.html +617 -0
  887. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_stem.html +289 -0
  888. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_stop_word.html +284 -0
  889. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizer/summary.html +233 -0
  890. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +42 -1349
  891. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram.html +344 -0
  892. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank.html +219 -0
  893. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol.html +237 -0
  894. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html +267 -0
  895. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html +287 -0
  896. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol.html +179 -0
  897. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol_alpha.html +199 -0
  898. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol_alpha_digit.html +209 -0
  899. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_delimit.html +344 -0
  900. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_delimit_null.html +160 -0
  901. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_mecab.html +764 -0
  902. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_regexp.html +284 -0
  903. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_trigram.html +191 -0
  904. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_unigram.html +191 -0
  905. data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +65 -73
  906. data/vendor/local/share/doc/groonga/ja/html/reference/types.html +48 -56
  907. data/vendor/local/share/doc/groonga/ja/html/reference/window_function.html +29 -37
  908. data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/record_number.html +37 -45
  909. data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_count.html +37 -45
  910. data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_record_number.html +37 -45
  911. data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_sum.html +37 -45
  912. data/vendor/local/share/doc/groonga/ja/html/search.html +13 -24
  913. data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
  914. data/vendor/local/share/doc/groonga/ja/html/server.html +15 -23
  915. data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +22 -30
  916. data/vendor/local/share/doc/groonga/ja/html/server/http.html +17 -25
  917. data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +82 -90
  918. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +15 -23
  919. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +15 -23
  920. data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +16 -24
  921. data/vendor/local/share/doc/groonga/ja/html/server/package.html +99 -107
  922. data/vendor/local/share/doc/groonga/ja/html/spec.html +19 -27
  923. data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +201 -209
  924. data/vendor/local/share/doc/groonga/ja/html/spec/search.html +36 -36
  925. data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +15 -23
  926. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +44 -48
  927. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/how_to_analyze_error_message.html +21 -29
  928. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +24 -29
  929. data/vendor/local/share/doc/groonga/ja/html/tutorial.html +16 -24
  930. data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +32 -40
  931. data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +62 -70
  932. data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +22 -30
  933. data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +77 -86
  934. data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +15 -23
  935. data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +56 -64
  936. data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +84 -92
  937. data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +20 -28
  938. data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -26
  939. data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +21 -29
  940. data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +43 -51
  941. data/vendor/local/share/doc/pcre/AUTHORS +3 -3
  942. data/vendor/local/share/doc/pcre/ChangeLog +53 -0
  943. data/vendor/local/share/doc/pcre/LICENCE +3 -3
  944. data/vendor/local/share/doc/pcre/NEWS +6 -0
  945. data/vendor/local/share/doc/pcre/html/NON-AUTOTOOLS-BUILD.txt +8 -7
  946. data/vendor/local/share/groonga/mruby/LEGAL +35 -35
  947. data/vendor/local/share/license/cv2pdb/{README → README.MD} +28 -10
  948. data/vendor/local/share/license/groonga-normalizer-mysql/README.md +14 -22
  949. data/vendor/local/share/license/lz4/LICENSE +2 -2
  950. data/vendor/local/share/license/mruby/AUTHORS +3 -0
  951. data/vendor/local/share/license/mruby/MITL +1 -1
  952. data/vendor/local/share/license/mruby/README.md +1 -1
  953. data/vendor/local/share/license/msgpack/README.md +5 -34
  954. data/vendor/local/share/license/pcre/LICENCE +3 -3
  955. data/vendor/local/share/man/man1/lz4.1 +221 -86
  956. data/vendor/local/share/man/man1/lz4c.1 +222 -32
  957. data/vendor/local/share/man/man1/lz4cat.1 +221 -30
  958. data/vendor/local/share/man/man1/unlz4.1 +223 -0
  959. metadata +231 -87
  960. data/lib/2.1/groonga.so +0 -0
  961. data/vendor/local/lib/groonga/plugins/expression_rewriters/optimizer.rb +0 -147
  962. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/options.rb +0 -14
  963. data/vendor/local/share/doc/groonga/en/html/_static/ajax-loader.gif +0 -0
  964. data/vendor/local/share/doc/groonga/en/html/_static/comment-bright.png +0 -0
  965. data/vendor/local/share/doc/groonga/en/html/_static/comment-close.png +0 -0
  966. data/vendor/local/share/doc/groonga/en/html/_static/comment.png +0 -0
  967. data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
  968. data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
  969. data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
  970. data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
  971. data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +0 -808
  972. data/vendor/local/share/doc/groonga/ja/html/_static/ajax-loader.gif +0 -0
  973. data/vendor/local/share/doc/groonga/ja/html/_static/comment-bright.png +0 -0
  974. data/vendor/local/share/doc/groonga/ja/html/_static/comment-close.png +0 -0
  975. data/vendor/local/share/doc/groonga/ja/html/_static/comment.png +0 -0
  976. data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
  977. data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
  978. data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
  979. data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
  980. data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +0 -808
@@ -0,0 +1,259 @@
1
+
2
+
3
+ <!DOCTYPE html>
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
6
+ <head>
7
+ <meta charset="utf-8" />
8
+ <title>7.8.1. Summary &#8212; Groonga v9.0.2 documentation</title>
9
+ <link rel="stylesheet" href="../../_static/groonga.css" type="text/css" />
10
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
11
+
12
+ <script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
13
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
14
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
15
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
16
+ <script type="text/javascript" src="../../_static/language_data.js"></script>
17
+
18
+ <link rel="shortcut icon" href="../../_static/favicon.ico"/>
19
+ <link rel="index" title="Index" href="../../genindex.html" />
20
+ <link rel="search" title="Search" href="../../search.html" />
21
+ <link rel="next" title="7.8.2. TokenBigram" href="../tokenizers/token_bigram.html" />
22
+ <link rel="prev" title="7.8. Tokenizers" href="../tokenizers.html" />
23
+ </head><body>
24
+ <div class="header">
25
+ <h1 class="title">
26
+ <a id="top-link" href="../../index.html">
27
+ <span class="project">groonga</span>
28
+ <span class="separator">-</span>
29
+ <span class="description">An open-source fulltext search engine and column store.</span>
30
+ </a>
31
+ </h1>
32
+
33
+ <div class="other-language-links">
34
+ <ul>
35
+ <li><a href="../../../../ja/html/reference/tokenizer/summary.html">日本語</a></li>
36
+ </ul>
37
+ </div>
38
+ </div>
39
+
40
+
41
+ <div class="related" role="navigation" aria-label="related navigation">
42
+ <h3>Navigation</h3>
43
+ <ul>
44
+ <li class="right" style="margin-right: 10px">
45
+ <a href="../../genindex.html" title="General Index"
46
+ accesskey="I">index</a></li>
47
+ <li class="right" >
48
+ <a href="../tokenizers/token_bigram.html" title="7.8.2. TokenBigram"
49
+ accesskey="N">next</a> |</li>
50
+ <li class="right" >
51
+ <a href="../tokenizers.html" title="7.8. Tokenizers"
52
+ accesskey="P">previous</a> |</li>
53
+ <li class="nav-item nav-item-0"><a href="../../index.html">Groonga v9.0.2 documentation</a> &#187;</li>
54
+ <li class="nav-item nav-item-1"><a href="../../reference.html" >7. Reference manual</a> &#187;</li>
55
+ <li class="nav-item nav-item-2"><a href="../tokenizers.html" accesskey="U">7.8. Tokenizers</a> &#187;</li>
56
+ </ul>
57
+ </div>
58
+
59
+ <div class="document">
60
+ <div class="documentwrapper">
61
+ <div class="bodywrapper">
62
+ <div class="body" role="main">
63
+
64
+ <div class="section" id="summary">
65
+ <h1>7.8.1. Summary<a class="headerlink" href="#summary" title="Permalink to this headline">¶</a></h1>
66
+ <p>Groonga has tokenizer module that tokenizes text. It is used when
67
+ the following cases:</p>
68
+ <blockquote>
69
+ <div><ul>
70
+ <li><p>Indexing text</p>
71
+ <div class="figure align-center" id="id1">
72
+ <a class="reference internal image-reference" href="../../_images/used-when-indexing.png"><img alt="../../_images/used-when-indexing.png" src="../../_images/used-when-indexing.png" style="width: 80%;" /></a>
73
+ <p class="caption"><span class="caption-text">Tokenizer is used when indexing text.</span><a class="headerlink" href="#id1" title="Permalink to this image">¶</a></p>
74
+ </div>
75
+ </li>
76
+ <li><p>Searching by query</p>
77
+ <div class="figure align-center" id="id2">
78
+ <a class="reference internal image-reference" href="../../_images/used-when-searching.png"><img alt="../../_images/used-when-searching.png" src="../../_images/used-when-searching.png" style="width: 80%;" /></a>
79
+ <p class="caption"><span class="caption-text">Tokenizer is used when searching by query.</span><a class="headerlink" href="#id2" title="Permalink to this image">¶</a></p>
80
+ </div>
81
+ </li>
82
+ </ul>
83
+ </div></blockquote>
84
+ <p>Tokenizer is an important module for full-text search. You can change
85
+ trade-off between <a class="reference external" href="http://en.wikipedia.org/wiki/Precision_and_recall">precision and recall</a> by changing
86
+ tokenizer.</p>
87
+ <p>Normally, <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> is a suitable tokenizer. If you don’t
88
+ know much about tokenizer, it’s recommended that you choose
89
+ <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a>.</p>
90
+ <p>You can try a tokenizer by <a class="reference internal" href="../commands/tokenize.html"><span class="doc">tokenize</span></a> and
91
+ <a class="reference internal" href="../commands/table_tokenize.html"><span class="doc">table_tokenize</span></a>. Here is an example to
92
+ try <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> tokenizer by
93
+ <a class="reference internal" href="../commands/tokenize.html"><span class="doc">tokenize</span></a>:</p>
94
+ <p>Execution example:</p>
95
+ <div class="highlight-none notranslate"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello World&quot;
96
+ # [
97
+ # [
98
+ # 0,
99
+ # 1337566253.89858,
100
+ # 0.000355720520019531
101
+ # ],
102
+ # [
103
+ # {
104
+ # &quot;position&quot;: 0,
105
+ # &quot;force_prefix&quot;: false,
106
+ # &quot;value&quot;: &quot;He&quot;
107
+ # },
108
+ # {
109
+ # &quot;position&quot;: 1,
110
+ # &quot;force_prefix&quot;: false,
111
+ # &quot;value&quot;: &quot;el&quot;
112
+ # },
113
+ # {
114
+ # &quot;position&quot;: 2,
115
+ # &quot;force_prefix&quot;: false,
116
+ # &quot;value&quot;: &quot;ll&quot;
117
+ # },
118
+ # {
119
+ # &quot;position&quot;: 3,
120
+ # &quot;force_prefix&quot;: false,
121
+ # &quot;value&quot;: &quot;lo&quot;
122
+ # },
123
+ # {
124
+ # &quot;position&quot;: 4,
125
+ # &quot;force_prefix&quot;: false,
126
+ # &quot;value&quot;: &quot;o &quot;
127
+ # },
128
+ # {
129
+ # &quot;position&quot;: 5,
130
+ # &quot;force_prefix&quot;: false,
131
+ # &quot;value&quot;: &quot; W&quot;
132
+ # },
133
+ # {
134
+ # &quot;position&quot;: 6,
135
+ # &quot;force_prefix&quot;: false,
136
+ # &quot;value&quot;: &quot;Wo&quot;
137
+ # },
138
+ # {
139
+ # &quot;position&quot;: 7,
140
+ # &quot;force_prefix&quot;: false,
141
+ # &quot;value&quot;: &quot;or&quot;
142
+ # },
143
+ # {
144
+ # &quot;position&quot;: 8,
145
+ # &quot;force_prefix&quot;: false,
146
+ # &quot;value&quot;: &quot;rl&quot;
147
+ # },
148
+ # {
149
+ # &quot;position&quot;: 9,
150
+ # &quot;force_prefix&quot;: false,
151
+ # &quot;value&quot;: &quot;ld&quot;
152
+ # },
153
+ # {
154
+ # &quot;position&quot;: 10,
155
+ # &quot;force_prefix&quot;: false,
156
+ # &quot;value&quot;: &quot;d&quot;
157
+ # }
158
+ # ]
159
+ # ]
160
+ </pre></div>
161
+ </div>
162
+ <p>“tokenize” is the process that extracts zero or more tokens from a
163
+ text. There are some “tokenize” methods.</p>
164
+ <p>For example, <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to the following tokens by
165
+ bigram tokenize method:</p>
166
+ <blockquote>
167
+ <div><ul class="simple">
168
+ <li><p><code class="docutils literal notranslate"><span class="pre">He</span></code></p></li>
169
+ <li><p><code class="docutils literal notranslate"><span class="pre">el</span></code></p></li>
170
+ <li><p><code class="docutils literal notranslate"><span class="pre">ll</span></code></p></li>
171
+ <li><p><code class="docutils literal notranslate"><span class="pre">lo</span></code></p></li>
172
+ <li><p><code class="docutils literal notranslate"><span class="pre">o_</span></code> (<code class="docutils literal notranslate"><span class="pre">_</span></code> means a white-space)</p></li>
173
+ <li><p><code class="docutils literal notranslate"><span class="pre">_W</span></code> (<code class="docutils literal notranslate"><span class="pre">_</span></code> means a white-space)</p></li>
174
+ <li><p><code class="docutils literal notranslate"><span class="pre">Wo</span></code></p></li>
175
+ <li><p><code class="docutils literal notranslate"><span class="pre">or</span></code></p></li>
176
+ <li><p><code class="docutils literal notranslate"><span class="pre">rl</span></code></p></li>
177
+ <li><p><code class="docutils literal notranslate"><span class="pre">ld</span></code></p></li>
178
+ </ul>
179
+ </div></blockquote>
180
+ <p>In the above example, 10 tokens are extracted from one text <code class="docutils literal notranslate"><span class="pre">Hello</span>
181
+ <span class="pre">World</span></code>.</p>
182
+ <p>For example, <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to the following tokens by
183
+ white-space-separate tokenize method:</p>
184
+ <blockquote>
185
+ <div><ul class="simple">
186
+ <li><p><code class="docutils literal notranslate"><span class="pre">Hello</span></code></p></li>
187
+ <li><p><code class="docutils literal notranslate"><span class="pre">World</span></code></p></li>
188
+ </ul>
189
+ </div></blockquote>
190
+ <p>In the above example, 2 tokens are extracted from one text <code class="docutils literal notranslate"><span class="pre">Hello</span>
191
+ <span class="pre">World</span></code>.</p>
192
+ <p>Token is used as search key. You can find indexed documents only by
193
+ tokens that are extracted by used tokenize method. For example, you
194
+ can find <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> by <code class="docutils literal notranslate"><span class="pre">ll</span></code> with bigram tokenize method but you
195
+ can’t find <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> by <code class="docutils literal notranslate"><span class="pre">ll</span></code> with white-space-separate tokenize
196
+ method. Because white-space-separate tokenize method doesn’t extract
197
+ <code class="docutils literal notranslate"><span class="pre">ll</span></code> token. It just extracts <code class="docutils literal notranslate"><span class="pre">Hello</span></code> and <code class="docutils literal notranslate"><span class="pre">World</span></code> tokens.</p>
198
+ <p>In general, tokenize method that generates small tokens increases
199
+ recall but decreases precision. Tokenize method that generates large
200
+ tokens increases precision but decreases recall.</p>
201
+ <p>For example, we can find <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> and <code class="docutils literal notranslate"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> by <code class="docutils literal notranslate"><span class="pre">or</span></code> with
202
+ bigram tokenize method. <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> is a noise for people who
203
+ wants to search “logical and”. It means that precision is
204
+ decreased. But recall is increased.</p>
205
+ <p>We can find only <code class="docutils literal notranslate"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> by <code class="docutils literal notranslate"><span class="pre">or</span></code> with white-space-separate
206
+ tokenize method. Because <code class="docutils literal notranslate"><span class="pre">World</span></code> is tokenized to one token <code class="docutils literal notranslate"><span class="pre">World</span></code>
207
+ with white-space-separate tokenize method. It means that precision is
208
+ increased for people who wants to search “logical and”. But recall is
209
+ decreased because <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> that contains <code class="docutils literal notranslate"><span class="pre">or</span></code> isn’t found.</p>
210
+ </div>
211
+
212
+
213
+ </div>
214
+ </div>
215
+ </div>
216
+ <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
217
+ <div class="sphinxsidebarwrapper">
218
+ <h4>Previous topic</h4>
219
+ <p class="topless"><a href="../tokenizers.html"
220
+ title="previous chapter">7.8. Tokenizers</a></p>
221
+ <h4>Next topic</h4>
222
+ <p class="topless"><a href="../tokenizers/token_bigram.html"
223
+ title="next chapter">7.8.2. <code class="docutils literal notranslate"><span class="pre">TokenBigram</span></code></a></p>
224
+ <div id="searchbox" style="display: none" role="search">
225
+ <h3>Quick search</h3>
226
+ <div class="searchformwrapper">
227
+ <form class="search" action="../../search.html" method="get">
228
+ <input type="text" name="q" />
229
+ <input type="submit" value="Go" />
230
+ </form>
231
+ </div>
232
+ </div>
233
+ <script type="text/javascript">$('#searchbox').show(0);</script>
234
+ </div>
235
+ </div>
236
+ <div class="clearer"></div>
237
+ </div>
238
+ <div class="related" role="navigation" aria-label="related navigation">
239
+ <h3>Navigation</h3>
240
+ <ul>
241
+ <li class="right" style="margin-right: 10px">
242
+ <a href="../../genindex.html" title="General Index"
243
+ >index</a></li>
244
+ <li class="right" >
245
+ <a href="../tokenizers/token_bigram.html" title="7.8.2. TokenBigram"
246
+ >next</a> |</li>
247
+ <li class="right" >
248
+ <a href="../tokenizers.html" title="7.8. Tokenizers"
249
+ >previous</a> |</li>
250
+ <li class="nav-item nav-item-0"><a href="../../index.html">Groonga v9.0.2 documentation</a> &#187;</li>
251
+ <li class="nav-item nav-item-1"><a href="../../reference.html" >7. Reference manual</a> &#187;</li>
252
+ <li class="nav-item nav-item-2"><a href="../tokenizers.html" >7.8. Tokenizers</a> &#187;</li>
253
+ </ul>
254
+ </div>
255
+ <div class="footer" role="contentinfo">
256
+ &#169; Copyright 2009-2019, Brazil, Inc.
257
+ </div>
258
+ </body>
259
+ </html>
@@ -1,34 +1,26 @@
1
1
 
2
2
 
3
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3
+ <!DOCTYPE html>
5
4
 
6
5
  <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
7
6
  <head>
8
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
9
- <title>7.8. Tokenizers &#8212; Groonga v7.1.0-73-g6d02cfa documentation</title>
7
+ <meta charset="utf-8" />
8
+ <title>7.8. Tokenizers &#8212; Groonga v9.0.2 documentation</title>
10
9
  <link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
11
10
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
12
- <script type="text/javascript">
13
- var DOCUMENTATION_OPTIONS = {
14
- URL_ROOT: '../',
15
- VERSION: '7.1.0-73-g6d02cfa',
16
- COLLAPSE_INDEX: false,
17
- FILE_SUFFIX: '.html',
18
- HAS_SOURCE: false,
19
- SOURCELINK_SUFFIX: '.txt'
20
- };
21
- </script>
11
+
12
+ <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
22
13
  <script type="text/javascript" src="../_static/jquery.js"></script>
23
14
  <script type="text/javascript" src="../_static/underscore.js"></script>
24
15
  <script type="text/javascript" src="../_static/doctools.js"></script>
16
+ <script type="text/javascript" src="../_static/language_data.js"></script>
17
+
25
18
  <link rel="shortcut icon" href="../_static/favicon.ico"/>
26
19
  <link rel="index" title="Index" href="../genindex.html" />
27
20
  <link rel="search" title="Search" href="../search.html" />
28
- <link rel="next" title="7.9. Token filters" href="token_filters.html" />
29
- <link rel="prev" title="7.7. Normalizers" href="normalizers.html" />
30
- </head>
31
- <body>
21
+ <link rel="next" title="7.8.1. Summary" href="tokenizer/summary.html" />
22
+ <link rel="prev" title="7.7.2.3. NormalizerNFKC51" href="normalizers/normalizer_nfkc51.html" />
23
+ </head><body>
32
24
  <div class="header">
33
25
  <h1 class="title">
34
26
  <a id="top-link" href="../index.html">
@@ -53,12 +45,12 @@
53
45
  <a href="../genindex.html" title="General Index"
54
46
  accesskey="I">index</a></li>
55
47
  <li class="right" >
56
- <a href="token_filters.html" title="7.9. Token filters"
48
+ <a href="tokenizer/summary.html" title="7.8.1. Summary"
57
49
  accesskey="N">next</a> |</li>
58
50
  <li class="right" >
59
- <a href="normalizers.html" title="7.7. Normalizers"
51
+ <a href="normalizers/normalizer_nfkc51.html" title="7.7.2.3. NormalizerNFKC51"
60
52
  accesskey="P">previous</a> |</li>
61
- <li class="nav-item nav-item-0"><a href="../index.html">Groonga v7.1.0-73-g6d02cfa documentation</a> &#187;</li>
53
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v9.0.2 documentation</a> &#187;</li>
62
54
  <li class="nav-item nav-item-1"><a href="../reference.html" accesskey="U">7. Reference manual</a> &#187;</li>
63
55
  </ul>
64
56
  </div>
@@ -70,1403 +62,24 @@
70
62
 
71
63
  <div class="section" id="tokenizers">
72
64
  <h1>7.8. Tokenizers<a class="headerlink" href="#tokenizers" title="Permalink to this headline">¶</a></h1>
73
- <div class="section" id="summary">
74
- <h2>7.8.1. Summary<a class="headerlink" href="#summary" title="Permalink to this headline">¶</a></h2>
75
- <p>Groonga has tokenizer module that tokenizes text. It is used when
76
- the following cases:</p>
77
- <blockquote>
78
- <div><ul>
79
- <li><p class="first">Indexing text</p>
80
- <div class="figure align-center" id="id1">
81
- <a class="reference internal image-reference" href="../_images/used-when-indexing.png"><img alt="../_images/used-when-indexing.png" src="../_images/used-when-indexing.png" style="width: 80%;" /></a>
82
- <p class="caption"><span class="caption-text">Tokenizer is used when indexing text.</span></p>
83
- </div>
84
- </li>
85
- <li><p class="first">Searching by query</p>
86
- <div class="figure align-center" id="id2">
87
- <a class="reference internal image-reference" href="../_images/used-when-searching.png"><img alt="../_images/used-when-searching.png" src="../_images/used-when-searching.png" style="width: 80%;" /></a>
88
- <p class="caption"><span class="caption-text">Tokenizer is used when searching by query.</span></p>
89
- </div>
90
- </li>
91
- </ul>
92
- </div></blockquote>
93
- <p>Tokenizer is an important module for full-text search. You can change
94
- trade-off between <a class="reference external" href="http://en.wikipedia.org/wiki/Precision_and_recall">precision and recall</a> by changing
95
- tokenizer.</p>
96
- <p>Normally, <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> is a suitable tokenizer. If you don't
97
- know much about tokenizer, it's recommended that you choose
98
- <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>.</p>
99
- <p>You can try a tokenizer by <a class="reference internal" href="commands/tokenize.html"><span class="doc">tokenize</span></a> and
100
- <a class="reference internal" href="commands/table_tokenize.html"><span class="doc">table_tokenize</span></a>. Here is an example to
101
- try <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> tokenizer by
102
- <a class="reference internal" href="commands/tokenize.html"><span class="doc">tokenize</span></a>:</p>
103
- <p>Execution example:</p>
104
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello World&quot;
105
- # [
106
- # [
107
- # 0,
108
- # 1337566253.89858,
109
- # 0.000355720520019531
110
- # ],
111
- # [
112
- # {
113
- # &quot;position&quot;: 0,
114
- # &quot;force_prefix&quot;: false,
115
- # &quot;value&quot;: &quot;He&quot;
116
- # },
117
- # {
118
- # &quot;position&quot;: 1,
119
- # &quot;force_prefix&quot;: false,
120
- # &quot;value&quot;: &quot;el&quot;
121
- # },
122
- # {
123
- # &quot;position&quot;: 2,
124
- # &quot;force_prefix&quot;: false,
125
- # &quot;value&quot;: &quot;ll&quot;
126
- # },
127
- # {
128
- # &quot;position&quot;: 3,
129
- # &quot;force_prefix&quot;: false,
130
- # &quot;value&quot;: &quot;lo&quot;
131
- # },
132
- # {
133
- # &quot;position&quot;: 4,
134
- # &quot;force_prefix&quot;: false,
135
- # &quot;value&quot;: &quot;o &quot;
136
- # },
137
- # {
138
- # &quot;position&quot;: 5,
139
- # &quot;force_prefix&quot;: false,
140
- # &quot;value&quot;: &quot; W&quot;
141
- # },
142
- # {
143
- # &quot;position&quot;: 6,
144
- # &quot;force_prefix&quot;: false,
145
- # &quot;value&quot;: &quot;Wo&quot;
146
- # },
147
- # {
148
- # &quot;position&quot;: 7,
149
- # &quot;force_prefix&quot;: false,
150
- # &quot;value&quot;: &quot;or&quot;
151
- # },
152
- # {
153
- # &quot;position&quot;: 8,
154
- # &quot;force_prefix&quot;: false,
155
- # &quot;value&quot;: &quot;rl&quot;
156
- # },
157
- # {
158
- # &quot;position&quot;: 9,
159
- # &quot;force_prefix&quot;: false,
160
- # &quot;value&quot;: &quot;ld&quot;
161
- # },
162
- # {
163
- # &quot;position&quot;: 10,
164
- # &quot;force_prefix&quot;: false,
165
- # &quot;value&quot;: &quot;d&quot;
166
- # }
167
- # ]
168
- # ]
169
- </pre></div>
170
- </div>
171
- </div>
172
- <div class="section" id="what-is-tokenize">
173
- <h2>7.8.2. What is &quot;tokenize&quot;?<a class="headerlink" href="#what-is-tokenize" title="Permalink to this headline">¶</a></h2>
174
- <p>&quot;tokenize&quot; is the process that extracts zero or more tokens from a
175
- text. There are some &quot;tokenize&quot; methods.</p>
176
- <p>For example, <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to the following tokens by
177
- bigram tokenize method:</p>
178
- <blockquote>
179
- <div><ul class="simple">
180
- <li><code class="docutils literal"><span class="pre">He</span></code></li>
181
- <li><code class="docutils literal"><span class="pre">el</span></code></li>
182
- <li><code class="docutils literal"><span class="pre">ll</span></code></li>
183
- <li><code class="docutils literal"><span class="pre">lo</span></code></li>
184
- <li><code class="docutils literal"><span class="pre">o_</span></code> (<code class="docutils literal"><span class="pre">_</span></code> means a white-space)</li>
185
- <li><code class="docutils literal"><span class="pre">_W</span></code> (<code class="docutils literal"><span class="pre">_</span></code> means a white-space)</li>
186
- <li><code class="docutils literal"><span class="pre">Wo</span></code></li>
187
- <li><code class="docutils literal"><span class="pre">or</span></code></li>
188
- <li><code class="docutils literal"><span class="pre">rl</span></code></li>
189
- <li><code class="docutils literal"><span class="pre">ld</span></code></li>
190
- </ul>
191
- </div></blockquote>
192
- <p>In the above example, 10 tokens are extracted from one text <code class="docutils literal"><span class="pre">Hello</span>
193
- <span class="pre">World</span></code>.</p>
194
- <p>For example, <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to the following tokens by
195
- white-space-separate tokenize method:</p>
196
- <blockquote>
197
- <div><ul class="simple">
198
- <li><code class="docutils literal"><span class="pre">Hello</span></code></li>
199
- <li><code class="docutils literal"><span class="pre">World</span></code></li>
200
- </ul>
201
- </div></blockquote>
202
- <p>In the above example, 2 tokens are extracted from one text <code class="docutils literal"><span class="pre">Hello</span>
203
- <span class="pre">World</span></code>.</p>
204
- <p>Token is used as search key. You can find indexed documents only by
205
- tokens that are extracted by used tokenize method. For example, you
206
- can find <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> by <code class="docutils literal"><span class="pre">ll</span></code> with bigram tokenize method but you
207
- can't find <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> by <code class="docutils literal"><span class="pre">ll</span></code> with white-space-separate tokenize
208
- method. Because white-space-separate tokenize method doesn't extract
209
- <code class="docutils literal"><span class="pre">ll</span></code> token. It just extracts <code class="docutils literal"><span class="pre">Hello</span></code> and <code class="docutils literal"><span class="pre">World</span></code> tokens.</p>
210
- <p>In general, tokenize method that generates small tokens increases
211
- recall but decreases precision. Tokenize method that generates large
212
- tokens increases precision but decreases recall.</p>
213
- <p>For example, we can find <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> and <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> by <code class="docutils literal"><span class="pre">or</span></code> with
214
- bigram tokenize method. <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is a noise for people who
215
- wants to search &quot;logical and&quot;. It means that precision is
216
- decreased. But recall is increased.</p>
217
- <p>We can find only <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> by <code class="docutils literal"><span class="pre">or</span></code> with white-space-separate
218
- tokenize method. Because <code class="docutils literal"><span class="pre">World</span></code> is tokenized to one token <code class="docutils literal"><span class="pre">World</span></code>
219
- with white-space-separate tokenize method. It means that precision is
220
- increased for people who wants to search &quot;logical and&quot;. But recall is
221
- decreased because <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> that contains <code class="docutils literal"><span class="pre">or</span></code> isn't found.</p>
222
- </div>
223
- <div class="section" id="built-in-tokenizsers">
224
- <h2>7.8.3. Built-in tokenizsers<a class="headerlink" href="#built-in-tokenizsers" title="Permalink to this headline">¶</a></h2>
225
- <p>Here is a list of built-in tokenizers:</p>
226
- <blockquote>
227
- <div><ul class="simple">
228
- <li><code class="docutils literal"><span class="pre">TokenBigram</span></code></li>
229
- <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></li>
230
- <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></li>
231
- <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></li>
232
- <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></li>
233
- <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></li>
234
- <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></li>
235
- <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></li>
236
- <li><code class="docutils literal"><span class="pre">TokenUnigram</span></code></li>
237
- <li><code class="docutils literal"><span class="pre">TokenTrigram</span></code></li>
238
- <li><code class="docutils literal"><span class="pre">TokenDelimit</span></code></li>
239
- <li><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></li>
240
- <li><code class="docutils literal"><span class="pre">TokenMecab</span></code></li>
241
- <li><code class="docutils literal"><span class="pre">TokenRegexp</span></code></li>
242
- </ul>
243
- </div></blockquote>
244
- <div class="section" id="tokenbigram">
245
- <span id="token-bigram"></span><h3>7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code><a class="headerlink" href="#tokenbigram" title="Permalink to this headline">¶</a></h3>
246
- <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> is a bigram based tokenizer. It's recommended to use
247
- this tokenizer for most cases.</p>
248
- <p>Bigram tokenize method tokenizes a text to two adjacent characters
249
- tokens. For example, <code class="docutils literal"><span class="pre">Hello</span></code> is tokenized to the following tokens:</p>
250
- <blockquote>
251
- <div><ul class="simple">
252
- <li><code class="docutils literal"><span class="pre">He</span></code></li>
253
- <li><code class="docutils literal"><span class="pre">el</span></code></li>
254
- <li><code class="docutils literal"><span class="pre">ll</span></code></li>
255
- <li><code class="docutils literal"><span class="pre">lo</span></code></li>
256
- </ul>
257
- </div></blockquote>
258
- <p>Bigram tokenize method is good for recall because you can find all
259
- texts by query consists of two or more characters.</p>
260
- <p>In general, you can't find all texts by query consists of one
261
- character because one character token doesn't exist. But you can find
262
- all texts by query consists of one character in Groonga. Because
263
- Groonga find tokens that start with query by predictive search. For
264
- example, Groonga can find <code class="docutils literal"><span class="pre">ll</span></code> and <code class="docutils literal"><span class="pre">lo</span></code> tokens by <code class="docutils literal"><span class="pre">l</span></code> query.</p>
265
- <p>Bigram tokenize method isn't good for precision because you can find
266
- texts that includes query in word. For example, you can find <code class="docutils literal"><span class="pre">world</span></code>
267
- by <code class="docutils literal"><span class="pre">or</span></code>. This is more sensitive for ASCII only languages rather than
268
- non-ASCII languages. <code class="docutils literal"><span class="pre">TokenBigram</span></code> has solution for this problem
269
- described in the below.</p>
270
- <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> behavior is different when it's worked with any
271
- <a class="reference internal" href="normalizers.html"><span class="doc">Normalizers</span></a>.</p>
272
- <p>If no normalizer is used, <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses pure bigram (all tokens
273
- except the last token have two characters) tokenize method:</p>
274
- <p>Execution example:</p>
275
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello World&quot;
276
- # [
277
- # [
278
- # 0,
279
- # 1337566253.89858,
280
- # 0.000355720520019531
281
- # ],
282
- # [
283
- # {
284
- # &quot;position&quot;: 0,
285
- # &quot;force_prefix&quot;: false,
286
- # &quot;value&quot;: &quot;He&quot;
287
- # },
288
- # {
289
- # &quot;position&quot;: 1,
290
- # &quot;force_prefix&quot;: false,
291
- # &quot;value&quot;: &quot;el&quot;
292
- # },
293
- # {
294
- # &quot;position&quot;: 2,
295
- # &quot;force_prefix&quot;: false,
296
- # &quot;value&quot;: &quot;ll&quot;
297
- # },
298
- # {
299
- # &quot;position&quot;: 3,
300
- # &quot;force_prefix&quot;: false,
301
- # &quot;value&quot;: &quot;lo&quot;
302
- # },
303
- # {
304
- # &quot;position&quot;: 4,
305
- # &quot;force_prefix&quot;: false,
306
- # &quot;value&quot;: &quot;o &quot;
307
- # },
308
- # {
309
- # &quot;position&quot;: 5,
310
- # &quot;force_prefix&quot;: false,
311
- # &quot;value&quot;: &quot; W&quot;
312
- # },
313
- # {
314
- # &quot;position&quot;: 6,
315
- # &quot;force_prefix&quot;: false,
316
- # &quot;value&quot;: &quot;Wo&quot;
317
- # },
318
- # {
319
- # &quot;position&quot;: 7,
320
- # &quot;force_prefix&quot;: false,
321
- # &quot;value&quot;: &quot;or&quot;
322
- # },
323
- # {
324
- # &quot;position&quot;: 8,
325
- # &quot;force_prefix&quot;: false,
326
- # &quot;value&quot;: &quot;rl&quot;
327
- # },
328
- # {
329
- # &quot;position&quot;: 9,
330
- # &quot;force_prefix&quot;: false,
331
- # &quot;value&quot;: &quot;ld&quot;
332
- # },
333
- # {
334
- # &quot;position&quot;: 10,
335
- # &quot;force_prefix&quot;: false,
336
- # &quot;value&quot;: &quot;d&quot;
337
- # }
338
- # ]
339
- # ]
340
- </pre></div>
341
- </div>
342
- <p>If normalizer is used, <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses white-space-separate like
343
- tokenize method for ASCII characters. <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses bigram
344
- tokenize method for non-ASCII characters.</p>
345
- <p>You may be confused with this combined behavior. But it's reasonable
346
- for most use cases such as English text (only ASCII characters) and
347
- Japanese text (ASCII and non-ASCII characters are mixed).</p>
348
- <p>Most languages consists of only ASCII characters use white-space for
349
- word separator. White-space-separate tokenize method is suitable for
350
- the case.</p>
351
- <p>Languages consists of non-ASCII characters don't use white-space for
352
- word separator. Bigram tokenize method is suitable for the case.</p>
353
- <p>Mixed tokenize method is suitable for mixed language case.</p>
354
- <p>If you want to use bigram tokenize method for ASCII character, see
355
- <code class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></code> type tokenizers such as
356
- <a class="reference internal" href="#token-bigram-split-symbol-alpha"><span class="std std-ref">TokenBigramSplitSymbolAlpha</span></a>.</p>
357
- <p>Let's confirm <code class="docutils literal"><span class="pre">TokenBigram</span></code> behavior by example.</p>
358
- <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> uses one or more white-spaces as token delimiter for
359
- ASCII characters:</p>
360
- <p>Execution example:</p>
361
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello World&quot; NormalizerAuto
362
- # [
363
- # [
364
- # 0,
365
- # 1337566253.89858,
366
- # 0.000355720520019531
367
- # ],
368
- # [
369
- # {
370
- # &quot;position&quot;: 0,
371
- # &quot;force_prefix&quot;: false,
372
- # &quot;value&quot;: &quot;hello&quot;
373
- # },
374
- # {
375
- # &quot;position&quot;: 1,
376
- # &quot;force_prefix&quot;: false,
377
- # &quot;value&quot;: &quot;world&quot;
378
- # }
379
- # ]
380
- # ]
381
- </pre></div>
382
- </div>
383
- <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> uses character type change as token delimiter for
384
- ASCII characters. Character type is one of them:</p>
385
- <blockquote>
386
- <div><ul class="simple">
387
- <li>Alphabet</li>
388
- <li>Digit</li>
389
- <li>Symbol (such as <code class="docutils literal"><span class="pre">(</span></code>, <code class="docutils literal"><span class="pre">)</span></code> and <code class="docutils literal"><span class="pre">!</span></code>)</li>
390
- <li>Hiragana</li>
391
- <li>Katakana</li>
392
- <li>Kanji</li>
393
- <li>Others</li>
394
- </ul>
395
- </div></blockquote>
396
- <p>The following example shows two token delimiters:</p>
397
- <blockquote>
398
- <div><ul class="simple">
399
- <li>at between <code class="docutils literal"><span class="pre">100</span></code> (digits) and <code class="docutils literal"><span class="pre">cents</span></code> (alphabets)</li>
400
- <li>at between <code class="docutils literal"><span class="pre">cents</span></code> (alphabets) and <code class="docutils literal"><span class="pre">!!!</span></code> (symbols)</li>
401
- </ul>
402
- </div></blockquote>
403
- <p>Execution example:</p>
404
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;100cents!!!&quot; NormalizerAuto
405
- # [
406
- # [
407
- # 0,
408
- # 1337566253.89858,
409
- # 0.000355720520019531
410
- # ],
411
- # [
412
- # {
413
- # &quot;position&quot;: 0,
414
- # &quot;force_prefix&quot;: false,
415
- # &quot;value&quot;: &quot;100&quot;
416
- # },
417
- # {
418
- # &quot;position&quot;: 1,
419
- # &quot;force_prefix&quot;: false,
420
- # &quot;value&quot;: &quot;cents&quot;
421
- # },
422
- # {
423
- # &quot;position&quot;: 2,
424
- # &quot;force_prefix&quot;: false,
425
- # &quot;value&quot;: &quot;!!!&quot;
426
- # }
427
- # ]
428
- # ]
429
- </pre></div>
430
- </div>
431
- <p>Here is an example that <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses bigram tokenize method
432
- for non-ASCII characters.</p>
433
- <p>Execution example:</p>
434
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;日本語の勉強&quot; NormalizerAuto
435
- # [
436
- # [
437
- # 0,
438
- # 1337566253.89858,
439
- # 0.000355720520019531
440
- # ],
441
- # [
442
- # {
443
- # &quot;position&quot;: 0,
444
- # &quot;force_prefix&quot;: false,
445
- # &quot;value&quot;: &quot;日本&quot;
446
- # },
447
- # {
448
- # &quot;position&quot;: 1,
449
- # &quot;force_prefix&quot;: false,
450
- # &quot;value&quot;: &quot;本語&quot;
451
- # },
452
- # {
453
- # &quot;position&quot;: 2,
454
- # &quot;force_prefix&quot;: false,
455
- # &quot;value&quot;: &quot;語の&quot;
456
- # },
457
- # {
458
- # &quot;position&quot;: 3,
459
- # &quot;force_prefix&quot;: false,
460
- # &quot;value&quot;: &quot;の勉&quot;
461
- # },
462
- # {
463
- # &quot;position&quot;: 4,
464
- # &quot;force_prefix&quot;: false,
465
- # &quot;value&quot;: &quot;勉強&quot;
466
- # },
467
- # {
468
- # &quot;position&quot;: 5,
469
- # &quot;force_prefix&quot;: false,
470
- # &quot;value&quot;: &quot;強&quot;
471
- # }
472
- # ]
473
- # ]
474
- </pre></div>
475
- </div>
476
- </div>
477
- <div class="section" id="tokenbigramsplitsymbol">
478
- <span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code><a class="headerlink" href="#tokenbigramsplitsymbol" title="Permalink to this headline">¶</a></h3>
479
- <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code> is similar to <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The
480
- difference between them is symbol handling. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code>
481
- tokenizes symbols by bigram tokenize method:</p>
482
- <p>Execution example:</p>
483
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbol &quot;100cents!!!&quot; NormalizerAuto
484
- # [
485
- # [
486
- # 0,
487
- # 1337566253.89858,
488
- # 0.000355720520019531
489
- # ],
490
- # [
491
- # {
492
- # &quot;position&quot;: 0,
493
- # &quot;force_prefix&quot;: false,
494
- # &quot;value&quot;: &quot;100&quot;
495
- # },
496
- # {
497
- # &quot;position&quot;: 1,
498
- # &quot;force_prefix&quot;: false,
499
- # &quot;value&quot;: &quot;cents&quot;
500
- # },
501
- # {
502
- # &quot;position&quot;: 2,
503
- # &quot;force_prefix&quot;: false,
504
- # &quot;value&quot;: &quot;!!&quot;
505
- # },
506
- # {
507
- # &quot;position&quot;: 3,
508
- # &quot;force_prefix&quot;: false,
509
- # &quot;value&quot;: &quot;!!&quot;
510
- # },
511
- # {
512
- # &quot;position&quot;: 4,
513
- # &quot;force_prefix&quot;: false,
514
- # &quot;value&quot;: &quot;!&quot;
515
- # }
516
- # ]
517
- # ]
518
- </pre></div>
519
- </div>
520
- </div>
521
- <div class="section" id="tokenbigramsplitsymbolalpha">
522
- <span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
523
- <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> is similar to <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The
524
- difference between them is symbol and alphabet
525
- handling. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> tokenizes symbols and
526
- alphabets by bigram tokenize method:</p>
527
- <p>Execution example:</p>
528
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbolAlpha &quot;100cents!!!&quot; NormalizerAuto
529
- # [
530
- # [
531
- # 0,
532
- # 1337566253.89858,
533
- # 0.000355720520019531
534
- # ],
535
- # [
536
- # {
537
- # &quot;position&quot;: 0,
538
- # &quot;force_prefix&quot;: false,
539
- # &quot;value&quot;: &quot;100&quot;
540
- # },
541
- # {
542
- # &quot;position&quot;: 1,
543
- # &quot;force_prefix&quot;: false,
544
- # &quot;value&quot;: &quot;ce&quot;
545
- # },
546
- # {
547
- # &quot;position&quot;: 2,
548
- # &quot;force_prefix&quot;: false,
549
- # &quot;value&quot;: &quot;en&quot;
550
- # },
551
- # {
552
- # &quot;position&quot;: 3,
553
- # &quot;force_prefix&quot;: false,
554
- # &quot;value&quot;: &quot;nt&quot;
555
- # },
556
- # {
557
- # &quot;position&quot;: 4,
558
- # &quot;force_prefix&quot;: false,
559
- # &quot;value&quot;: &quot;ts&quot;
560
- # },
561
- # {
562
- # &quot;position&quot;: 5,
563
- # &quot;force_prefix&quot;: false,
564
- # &quot;value&quot;: &quot;s!&quot;
565
- # },
566
- # {
567
- # &quot;position&quot;: 6,
568
- # &quot;force_prefix&quot;: false,
569
- # &quot;value&quot;: &quot;!!&quot;
570
- # },
571
- # {
572
- # &quot;position&quot;: 7,
573
- # &quot;force_prefix&quot;: false,
574
- # &quot;value&quot;: &quot;!!&quot;
575
- # },
576
- # {
577
- # &quot;position&quot;: 8,
578
- # &quot;force_prefix&quot;: false,
579
- # &quot;value&quot;: &quot;!&quot;
580
- # }
581
- # ]
582
- # ]
583
- </pre></div>
584
- </div>
585
- </div>
586
- <div class="section" id="tokenbigramsplitsymbolalphadigit">
587
- <span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
588
- <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> is similar to
589
- <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The difference between them is symbol, alphabet
590
- and digit handling. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> tokenizes
591
- symbols, alphabets and digits by bigram tokenize method. It means that
592
- all characters are tokenized by bigram tokenize method:</p>
593
- <p>Execution example:</p>
594
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbolAlphaDigit &quot;100cents!!!&quot; NormalizerAuto
595
- # [
596
- # [
597
- # 0,
598
- # 1337566253.89858,
599
- # 0.000355720520019531
600
- # ],
601
- # [
602
- # {
603
- # &quot;position&quot;: 0,
604
- # &quot;force_prefix&quot;: false,
605
- # &quot;value&quot;: &quot;10&quot;
606
- # },
607
- # {
608
- # &quot;position&quot;: 1,
609
- # &quot;force_prefix&quot;: false,
610
- # &quot;value&quot;: &quot;00&quot;
611
- # },
612
- # {
613
- # &quot;position&quot;: 2,
614
- # &quot;force_prefix&quot;: false,
615
- # &quot;value&quot;: &quot;0c&quot;
616
- # },
617
- # {
618
- # &quot;position&quot;: 3,
619
- # &quot;force_prefix&quot;: false,
620
- # &quot;value&quot;: &quot;ce&quot;
621
- # },
622
- # {
623
- # &quot;position&quot;: 4,
624
- # &quot;force_prefix&quot;: false,
625
- # &quot;value&quot;: &quot;en&quot;
626
- # },
627
- # {
628
- # &quot;position&quot;: 5,
629
- # &quot;force_prefix&quot;: false,
630
- # &quot;value&quot;: &quot;nt&quot;
631
- # },
632
- # {
633
- # &quot;position&quot;: 6,
634
- # &quot;force_prefix&quot;: false,
635
- # &quot;value&quot;: &quot;ts&quot;
636
- # },
637
- # {
638
- # &quot;position&quot;: 7,
639
- # &quot;force_prefix&quot;: false,
640
- # &quot;value&quot;: &quot;s!&quot;
641
- # },
642
- # {
643
- # &quot;position&quot;: 8,
644
- # &quot;force_prefix&quot;: false,
645
- # &quot;value&quot;: &quot;!!&quot;
646
- # },
647
- # {
648
- # &quot;position&quot;: 9,
649
- # &quot;force_prefix&quot;: false,
650
- # &quot;value&quot;: &quot;!!&quot;
651
- # },
652
- # {
653
- # &quot;position&quot;: 10,
654
- # &quot;force_prefix&quot;: false,
655
- # &quot;value&quot;: &quot;!&quot;
656
- # }
657
- # ]
658
- # ]
659
- </pre></div>
660
- </div>
661
- </div>
662
- <div class="section" id="tokenbigramignoreblank">
663
- <span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code><a class="headerlink" href="#tokenbigramignoreblank" title="Permalink to this headline">¶</a></h3>
664
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> is similar to <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The
665
- difference between them is blank handling. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code>
666
- ignores white-spaces in continuous symbols and non-ASCII characters.</p>
667
- <p>You can find difference of them by <code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> text because it
668
- has symbols and non-ASCII characters.</p>
669
- <p>Here is a result by <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> :</p>
670
- <p>Execution example:</p>
671
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
672
- # [
673
- # [
674
- # 0,
675
- # 1337566253.89858,
676
- # 0.000355720520019531
677
- # ],
678
- # [
679
- # {
680
- # &quot;position&quot;: 0,
681
- # &quot;force_prefix&quot;: false,
682
- # &quot;value&quot;: &quot;日&quot;
683
- # },
684
- # {
685
- # &quot;position&quot;: 1,
686
- # &quot;force_prefix&quot;: false,
687
- # &quot;value&quot;: &quot;本&quot;
688
- # },
689
- # {
690
- # &quot;position&quot;: 2,
691
- # &quot;force_prefix&quot;: false,
692
- # &quot;value&quot;: &quot;語&quot;
693
- # },
694
- # {
695
- # &quot;position&quot;: 3,
696
- # &quot;force_prefix&quot;: false,
697
- # &quot;value&quot;: &quot;!&quot;
698
- # },
699
- # {
700
- # &quot;position&quot;: 4,
701
- # &quot;force_prefix&quot;: false,
702
- # &quot;value&quot;: &quot;!&quot;
703
- # },
704
- # {
705
- # &quot;position&quot;: 5,
706
- # &quot;force_prefix&quot;: false,
707
- # &quot;value&quot;: &quot;!&quot;
708
- # }
709
- # ]
710
- # ]
711
- </pre></div>
712
- </div>
713
- <p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code>:</p>
714
- <p>Execution example:</p>
715
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlank &quot;日 本 語 ! ! !&quot; NormalizerAuto
716
- # [
717
- # [
718
- # 0,
719
- # 1337566253.89858,
720
- # 0.000355720520019531
721
- # ],
722
- # [
723
- # {
724
- # &quot;position&quot;: 0,
725
- # &quot;force_prefix&quot;: false,
726
- # &quot;value&quot;: &quot;日本&quot;
727
- # },
728
- # {
729
- # &quot;position&quot;: 1,
730
- # &quot;force_prefix&quot;: false,
731
- # &quot;value&quot;: &quot;本語&quot;
732
- # },
733
- # {
734
- # &quot;position&quot;: 2,
735
- # &quot;force_prefix&quot;: false,
736
- # &quot;value&quot;: &quot;語&quot;
737
- # },
738
- # {
739
- # &quot;position&quot;: 3,
740
- # &quot;force_prefix&quot;: false,
741
- # &quot;value&quot;: &quot;!!!&quot;
742
- # }
743
- # ]
744
- # ]
745
- </pre></div>
746
- </div>
747
- </div>
748
- <div class="section" id="tokenbigramignoreblanksplitsymbol">
749
- <span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="Permalink to this headline">¶</a></h3>
750
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> is similar to
751
- <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The differences between them are the followings:</p>
752
- <blockquote>
753
- <div><ul class="simple">
754
- <li>Blank handling</li>
755
- <li>Symbol handling</li>
756
- </ul>
757
- </div></blockquote>
758
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> ignores white-spaces in
759
- continuous symbols and non-ASCII characters.</p>
760
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> tokenizes symbols by bigram
761
- tokenize method.</p>
762
- <p>You can find difference of them by <code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> text because it
763
- has symbols and non-ASCII characters.</p>
764
- <p>Here is a result by <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> :</p>
765
- <p>Execution example:</p>
766
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
767
- # [
768
- # [
769
- # 0,
770
- # 1337566253.89858,
771
- # 0.000355720520019531
772
- # ],
773
- # [
774
- # {
775
- # &quot;position&quot;: 0,
776
- # &quot;force_prefix&quot;: false,
777
- # &quot;value&quot;: &quot;日&quot;
778
- # },
779
- # {
780
- # &quot;position&quot;: 1,
781
- # &quot;force_prefix&quot;: false,
782
- # &quot;value&quot;: &quot;本&quot;
783
- # },
784
- # {
785
- # &quot;position&quot;: 2,
786
- # &quot;force_prefix&quot;: false,
787
- # &quot;value&quot;: &quot;語&quot;
788
- # },
789
- # {
790
- # &quot;position&quot;: 3,
791
- # &quot;force_prefix&quot;: false,
792
- # &quot;value&quot;: &quot;!&quot;
793
- # },
794
- # {
795
- # &quot;position&quot;: 4,
796
- # &quot;force_prefix&quot;: false,
797
- # &quot;value&quot;: &quot;!&quot;
798
- # },
799
- # {
800
- # &quot;position&quot;: 5,
801
- # &quot;force_prefix&quot;: false,
802
- # &quot;value&quot;: &quot;!&quot;
803
- # }
804
- # ]
805
- # ]
806
- </pre></div>
807
- </div>
808
- <p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code>:</p>
809
- <p>Execution example:</p>
810
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbol &quot;日 本 語 ! ! !&quot; NormalizerAuto
811
- # [
812
- # [
813
- # 0,
814
- # 1337566253.89858,
815
- # 0.000355720520019531
816
- # ],
817
- # [
818
- # {
819
- # &quot;position&quot;: 0,
820
- # &quot;force_prefix&quot;: false,
821
- # &quot;value&quot;: &quot;日本&quot;
822
- # },
823
- # {
824
- # &quot;position&quot;: 1,
825
- # &quot;force_prefix&quot;: false,
826
- # &quot;value&quot;: &quot;本語&quot;
827
- # },
828
- # {
829
- # &quot;position&quot;: 2,
830
- # &quot;force_prefix&quot;: false,
831
- # &quot;value&quot;: &quot;語!&quot;
832
- # },
833
- # {
834
- # &quot;position&quot;: 3,
835
- # &quot;force_prefix&quot;: false,
836
- # &quot;value&quot;: &quot;!!&quot;
837
- # },
838
- # {
839
- # &quot;position&quot;: 4,
840
- # &quot;force_prefix&quot;: false,
841
- # &quot;value&quot;: &quot;!!&quot;
842
- # },
843
- # {
844
- # &quot;position&quot;: 5,
845
- # &quot;force_prefix&quot;: false,
846
- # &quot;value&quot;: &quot;!&quot;
847
- # }
848
- # ]
849
- # ]
850
- </pre></div>
851
- </div>
852
- </div>
853
- <div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
854
- <span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
855
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> is similar to
856
- <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The differences between them are the followings:</p>
857
- <blockquote>
858
- <div><ul class="simple">
859
- <li>Blank handling</li>
860
- <li>Symbol and alphabet handling</li>
861
- </ul>
862
- </div></blockquote>
863
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> ignores white-spaces in
864
- continuous symbols and non-ASCII characters.</p>
865
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> tokenizes symbols and
866
- alphabets by bigram tokenize method.</p>
867
- <p>You can find difference of them by <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> text because it
868
- has symbols and non-ASCII characters with white spaces and alphabets.</p>
869
- <p>Here is a result by <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> :</p>
870
- <p>Execution example:</p>
871
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
872
- # [
873
- # [
874
- # 0,
875
- # 1337566253.89858,
876
- # 0.000355720520019531
877
- # ],
878
- # [
879
- # {
880
- # &quot;position&quot;: 0,
881
- # &quot;force_prefix&quot;: false,
882
- # &quot;value&quot;: &quot;hello&quot;
883
- # },
884
- # {
885
- # &quot;position&quot;: 1,
886
- # &quot;force_prefix&quot;: false,
887
- # &quot;value&quot;: &quot;日&quot;
888
- # },
889
- # {
890
- # &quot;position&quot;: 2,
891
- # &quot;force_prefix&quot;: false,
892
- # &quot;value&quot;: &quot;本&quot;
893
- # },
894
- # {
895
- # &quot;position&quot;: 3,
896
- # &quot;force_prefix&quot;: false,
897
- # &quot;value&quot;: &quot;語&quot;
898
- # },
899
- # {
900
- # &quot;position&quot;: 4,
901
- # &quot;force_prefix&quot;: false,
902
- # &quot;value&quot;: &quot;!&quot;
903
- # },
904
- # {
905
- # &quot;position&quot;: 5,
906
- # &quot;force_prefix&quot;: false,
907
- # &quot;value&quot;: &quot;!&quot;
908
- # },
909
- # {
910
- # &quot;position&quot;: 6,
911
- # &quot;force_prefix&quot;: false,
912
- # &quot;value&quot;: &quot;!&quot;
913
- # }
914
- # ]
915
- # ]
916
- </pre></div>
917
- </div>
918
- <p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code>:</p>
919
- <p>Execution example:</p>
920
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
921
- # [
922
- # [
923
- # 0,
924
- # 1337566253.89858,
925
- # 0.000355720520019531
926
- # ],
927
- # [
928
- # {
929
- # &quot;position&quot;: 0,
930
- # &quot;force_prefix&quot;: false,
931
- # &quot;value&quot;: &quot;he&quot;
932
- # },
933
- # {
934
- # &quot;position&quot;: 1,
935
- # &quot;force_prefix&quot;: false,
936
- # &quot;value&quot;: &quot;el&quot;
937
- # },
938
- # {
939
- # &quot;position&quot;: 2,
940
- # &quot;force_prefix&quot;: false,
941
- # &quot;value&quot;: &quot;ll&quot;
942
- # },
943
- # {
944
- # &quot;position&quot;: 3,
945
- # &quot;force_prefix&quot;: false,
946
- # &quot;value&quot;: &quot;lo&quot;
947
- # },
948
- # {
949
- # &quot;position&quot;: 4,
950
- # &quot;force_prefix&quot;: false,
951
- # &quot;value&quot;: &quot;o日&quot;
952
- # },
953
- # {
954
- # &quot;position&quot;: 5,
955
- # &quot;force_prefix&quot;: false,
956
- # &quot;value&quot;: &quot;日本&quot;
957
- # },
958
- # {
959
- # &quot;position&quot;: 6,
960
- # &quot;force_prefix&quot;: false,
961
- # &quot;value&quot;: &quot;本語&quot;
962
- # },
963
- # {
964
- # &quot;position&quot;: 7,
965
- # &quot;force_prefix&quot;: false,
966
- # &quot;value&quot;: &quot;語!&quot;
967
- # },
968
- # {
969
- # &quot;position&quot;: 8,
970
- # &quot;force_prefix&quot;: false,
971
- # &quot;value&quot;: &quot;!!&quot;
972
- # },
973
- # {
974
- # &quot;position&quot;: 9,
975
- # &quot;force_prefix&quot;: false,
976
- # &quot;value&quot;: &quot;!!&quot;
977
- # },
978
- # {
979
- # &quot;position&quot;: 10,
980
- # &quot;force_prefix&quot;: false,
981
- # &quot;value&quot;: &quot;!&quot;
982
- # }
983
- # ]
984
- # ]
985
- </pre></div>
986
- </div>
987
- </div>
988
- <div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
989
- <span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
990
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> is similar to
991
- <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The differences between them are the followings:</p>
992
- <blockquote>
993
- <div><ul class="simple">
994
- <li>Blank handling</li>
995
- <li>Symbol, alphabet and digit handling</li>
65
+ <div class="toctree-wrapper compound">
66
+ <ul>
67
+ <li class="toctree-l1"><a class="reference internal" href="tokenizer/summary.html">7.8.1. Summary</a></li>
68
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram.html">7.8.2. <code class="docutils literal notranslate"><span class="pre">TokenBigram</span></code></a></li>
69
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank.html">7.8.3. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
70
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol.html">7.8.4. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
71
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html">7.8.5. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
72
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html">7.8.6. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
73
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol.html">7.8.7. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
74
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol_alpha.html">7.8.8. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
75
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol_alpha_digit.html">7.8.9. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
76
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_delimit.html">7.8.10. <code class="docutils literal notranslate"><span class="pre">TokenDelimit</span></code></a></li>
77
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_delimit_null.html">7.8.11. <code class="docutils literal notranslate"><span class="pre">TokenDelimitNull</span></code></a></li>
78
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_mecab.html">7.8.12. <code class="docutils literal notranslate"><span class="pre">TokenMecab</span></code></a></li>
79
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_regexp.html">7.8.13. <code class="docutils literal notranslate"><span class="pre">TokenRegexp</span></code></a></li>
80
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_trigram.html">7.8.14. <code class="docutils literal notranslate"><span class="pre">TokenTrigram</span></code></a></li>
81
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_unigram.html">7.8.15. <code class="docutils literal notranslate"><span class="pre">TokenUnigram</span></code></a></li>
996
82
  </ul>
997
- </div></blockquote>
998
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> ignores white-spaces
999
- in continuous symbols and non-ASCII characters.</p>
1000
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> tokenizes symbols,
1001
- alphabets and digits by bigram tokenize method. It means that all
1002
- characters are tokenized by bigram tokenize method.</p>
1003
- <p>You can find difference of them by <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></code> text
1004
- because it has symbols and non-ASCII characters with white spaces,
1005
- alphabets and digits.</p>
1006
- <p>Here is a result by <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> :</p>
1007
- <p>Execution example:</p>
1008
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
1009
- # [
1010
- # [
1011
- # 0,
1012
- # 1337566253.89858,
1013
- # 0.000355720520019531
1014
- # ],
1015
- # [
1016
- # {
1017
- # &quot;position&quot;: 0,
1018
- # &quot;force_prefix&quot;: false,
1019
- # &quot;value&quot;: &quot;hello&quot;
1020
- # },
1021
- # {
1022
- # &quot;position&quot;: 1,
1023
- # &quot;force_prefix&quot;: false,
1024
- # &quot;value&quot;: &quot;日&quot;
1025
- # },
1026
- # {
1027
- # &quot;position&quot;: 2,
1028
- # &quot;force_prefix&quot;: false,
1029
- # &quot;value&quot;: &quot;本&quot;
1030
- # },
1031
- # {
1032
- # &quot;position&quot;: 3,
1033
- # &quot;force_prefix&quot;: false,
1034
- # &quot;value&quot;: &quot;語&quot;
1035
- # },
1036
- # {
1037
- # &quot;position&quot;: 4,
1038
- # &quot;force_prefix&quot;: false,
1039
- # &quot;value&quot;: &quot;!&quot;
1040
- # },
1041
- # {
1042
- # &quot;position&quot;: 5,
1043
- # &quot;force_prefix&quot;: false,
1044
- # &quot;value&quot;: &quot;!&quot;
1045
- # },
1046
- # {
1047
- # &quot;position&quot;: 6,
1048
- # &quot;force_prefix&quot;: false,
1049
- # &quot;value&quot;: &quot;!&quot;
1050
- # },
1051
- # {
1052
- # &quot;position&quot;: 7,
1053
- # &quot;force_prefix&quot;: false,
1054
- # &quot;value&quot;: &quot;777&quot;
1055
- # }
1056
- # ]
1057
- # ]
1058
- </pre></div>
1059
- </div>
1060
- <p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code>:</p>
1061
- <p>Execution example:</p>
1062
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
1063
- # [
1064
- # [
1065
- # 0,
1066
- # 1337566253.89858,
1067
- # 0.000355720520019531
1068
- # ],
1069
- # [
1070
- # {
1071
- # &quot;position&quot;: 0,
1072
- # &quot;force_prefix&quot;: false,
1073
- # &quot;value&quot;: &quot;he&quot;
1074
- # },
1075
- # {
1076
- # &quot;position&quot;: 1,
1077
- # &quot;force_prefix&quot;: false,
1078
- # &quot;value&quot;: &quot;el&quot;
1079
- # },
1080
- # {
1081
- # &quot;position&quot;: 2,
1082
- # &quot;force_prefix&quot;: false,
1083
- # &quot;value&quot;: &quot;ll&quot;
1084
- # },
1085
- # {
1086
- # &quot;position&quot;: 3,
1087
- # &quot;force_prefix&quot;: false,
1088
- # &quot;value&quot;: &quot;lo&quot;
1089
- # },
1090
- # {
1091
- # &quot;position&quot;: 4,
1092
- # &quot;force_prefix&quot;: false,
1093
- # &quot;value&quot;: &quot;o日&quot;
1094
- # },
1095
- # {
1096
- # &quot;position&quot;: 5,
1097
- # &quot;force_prefix&quot;: false,
1098
- # &quot;value&quot;: &quot;日本&quot;
1099
- # },
1100
- # {
1101
- # &quot;position&quot;: 6,
1102
- # &quot;force_prefix&quot;: false,
1103
- # &quot;value&quot;: &quot;本語&quot;
1104
- # },
1105
- # {
1106
- # &quot;position&quot;: 7,
1107
- # &quot;force_prefix&quot;: false,
1108
- # &quot;value&quot;: &quot;語!&quot;
1109
- # },
1110
- # {
1111
- # &quot;position&quot;: 8,
1112
- # &quot;force_prefix&quot;: false,
1113
- # &quot;value&quot;: &quot;!!&quot;
1114
- # },
1115
- # {
1116
- # &quot;position&quot;: 9,
1117
- # &quot;force_prefix&quot;: false,
1118
- # &quot;value&quot;: &quot;!!&quot;
1119
- # },
1120
- # {
1121
- # &quot;position&quot;: 10,
1122
- # &quot;force_prefix&quot;: false,
1123
- # &quot;value&quot;: &quot;!7&quot;
1124
- # },
1125
- # {
1126
- # &quot;position&quot;: 11,
1127
- # &quot;force_prefix&quot;: false,
1128
- # &quot;value&quot;: &quot;77&quot;
1129
- # },
1130
- # {
1131
- # &quot;position&quot;: 12,
1132
- # &quot;force_prefix&quot;: false,
1133
- # &quot;value&quot;: &quot;77&quot;
1134
- # },
1135
- # {
1136
- # &quot;position&quot;: 13,
1137
- # &quot;force_prefix&quot;: false,
1138
- # &quot;value&quot;: &quot;7&quot;
1139
- # }
1140
- # ]
1141
- # ]
1142
- </pre></div>
1143
- </div>
1144
- </div>
1145
- <div class="section" id="tokenunigram">
1146
- <span id="token-unigram"></span><h3>7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code><a class="headerlink" href="#tokenunigram" title="Permalink to this headline">¶</a></h3>
1147
- <p><code class="docutils literal"><span class="pre">TokenUnigram</span></code> is similar to <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The differences
1148
- between them is token unit. <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> uses 2 characters per
1149
- token. <code class="docutils literal"><span class="pre">TokenUnigram</span></code> uses 1 character per token.</p>
1150
- <p>Execution example:</p>
1151
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenUnigram &quot;100cents!!!&quot; NormalizerAuto
1152
- # [
1153
- # [
1154
- # 0,
1155
- # 1337566253.89858,
1156
- # 0.000355720520019531
1157
- # ],
1158
- # [
1159
- # {
1160
- # &quot;position&quot;: 0,
1161
- # &quot;force_prefix&quot;: false,
1162
- # &quot;value&quot;: &quot;100&quot;
1163
- # },
1164
- # {
1165
- # &quot;position&quot;: 1,
1166
- # &quot;force_prefix&quot;: false,
1167
- # &quot;value&quot;: &quot;cents&quot;
1168
- # },
1169
- # {
1170
- # &quot;position&quot;: 2,
1171
- # &quot;force_prefix&quot;: false,
1172
- # &quot;value&quot;: &quot;!!!&quot;
1173
- # }
1174
- # ]
1175
- # ]
1176
- </pre></div>
1177
- </div>
1178
- </div>
1179
- <div class="section" id="tokentrigram">
1180
- <span id="token-trigram"></span><h3>7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code><a class="headerlink" href="#tokentrigram" title="Permalink to this headline">¶</a></h3>
1181
- <p><code class="docutils literal"><span class="pre">TokenTrigram</span></code> is similar to <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The differences
1182
- between them is token unit. <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> uses 2 characters per
1183
- token. <code class="docutils literal"><span class="pre">TokenTrigram</span></code> uses 3 characters per token.</p>
1184
- <p>Execution example:</p>
1185
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenTrigram &quot;10000cents!!!!!&quot; NormalizerAuto
1186
- # [
1187
- # [
1188
- # 0,
1189
- # 1337566253.89858,
1190
- # 0.000355720520019531
1191
- # ],
1192
- # [
1193
- # {
1194
- # &quot;position&quot;: 0,
1195
- # &quot;force_prefix&quot;: false,
1196
- # &quot;value&quot;: &quot;10000&quot;
1197
- # },
1198
- # {
1199
- # &quot;position&quot;: 1,
1200
- # &quot;force_prefix&quot;: false,
1201
- # &quot;value&quot;: &quot;cents&quot;
1202
- # },
1203
- # {
1204
- # &quot;position&quot;: 2,
1205
- # &quot;force_prefix&quot;: false,
1206
- # &quot;value&quot;: &quot;!!!!!&quot;
1207
- # }
1208
- # ]
1209
- # ]
1210
- </pre></div>
1211
- </div>
1212
- </div>
1213
- <div class="section" id="tokendelimit">
1214
- <span id="token-delimit"></span><h3>7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code><a class="headerlink" href="#tokendelimit" title="Permalink to this headline">¶</a></h3>
1215
- <p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> extracts token by splitting one or more space
1216
- characters (<code class="docutils literal"><span class="pre">U+0020</span></code>). For example, <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to
1217
- <code class="docutils literal"><span class="pre">Hello</span></code> and <code class="docutils literal"><span class="pre">World</span></code>.</p>
1218
- <p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> is suitable for tag text. You can extract <code class="docutils literal"><span class="pre">groonga</span></code>
1219
- and <code class="docutils literal"><span class="pre">full-text-search</span></code> and <code class="docutils literal"><span class="pre">http</span></code> as tags from <code class="docutils literal"><span class="pre">groonga</span>
1220
- <span class="pre">full-text-search</span> <span class="pre">http</span></code>.</p>
1221
- <p>Here is an example of <code class="docutils literal"><span class="pre">TokenDelimit</span></code>:</p>
1222
- <p>Execution example:</p>
1223
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenDelimit &quot;Groonga full-text-search HTTP&quot; NormalizerAuto
1224
- # [
1225
- # [
1226
- # 0,
1227
- # 1337566253.89858,
1228
- # 0.000355720520019531
1229
- # ],
1230
- # [
1231
- # {
1232
- # &quot;position&quot;: 0,
1233
- # &quot;force_prefix&quot;: false,
1234
- # &quot;value&quot;: &quot;groonga&quot;
1235
- # },
1236
- # {
1237
- # &quot;position&quot;: 1,
1238
- # &quot;force_prefix&quot;: false,
1239
- # &quot;value&quot;: &quot;full-text-search&quot;
1240
- # },
1241
- # {
1242
- # &quot;position&quot;: 2,
1243
- # &quot;force_prefix&quot;: false,
1244
- # &quot;value&quot;: &quot;http&quot;
1245
- # }
1246
- # ]
1247
- # ]
1248
- </pre></div>
1249
- </div>
1250
- </div>
1251
- <div class="section" id="tokendelimitnull">
1252
- <span id="token-delimit-null"></span><h3>7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code><a class="headerlink" href="#tokendelimitnull" title="Permalink to this headline">¶</a></h3>
1253
- <p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> is similar to <a class="reference internal" href="#token-delimit"><span class="std std-ref">TokenDelimit</span></a>. The
1254
- difference between them is separator character. <a class="reference internal" href="#token-delimit"><span class="std std-ref">TokenDelimit</span></a>
1255
- uses space character (<code class="docutils literal"><span class="pre">U+0020</span></code>) but <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> uses NUL
1256
- character (<code class="docutils literal"><span class="pre">U+0000</span></code>).</p>
1257
- <p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> is also suitable for tag text.</p>
1258
- <p>Here is an example of <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code>:</p>
1259
- <p>Execution example:</p>
1260
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenDelimitNull &quot;Groonga\u0000full-text-search\u0000HTTP&quot; NormalizerAuto
1261
- # [
1262
- # [
1263
- # 0,
1264
- # 1337566253.89858,
1265
- # 0.000355720520019531
1266
- # ],
1267
- # [
1268
- # {
1269
- # &quot;position&quot;: 0,
1270
- # &quot;force_prefix&quot;: false,
1271
- # &quot;value&quot;: &quot;groongau0000full-text-searchu0000http&quot;
1272
- # }
1273
- # ]
1274
- # ]
1275
- </pre></div>
1276
- </div>
1277
- </div>
1278
- <div class="section" id="tokenmecab">
1279
- <span id="token-mecab"></span><h3>7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code><a class="headerlink" href="#tokenmecab" title="Permalink to this headline">¶</a></h3>
1280
- <p><code class="docutils literal"><span class="pre">TokenMecab</span></code> is a tokenizer based on <a class="reference external" href="https://taku910.github.io/mecab/">MeCab</a> part-of-speech and
1281
- morphological analyzer.</p>
1282
- <p>MeCab doesn't depend on Japanese. You can use MeCab for other
1283
- languages by creating dictionary for the languages. You can use <a class="reference external" href="http://osdn.jp/projects/naist-jdic/">NAIST
1284
- Japanese Dictionary</a>
1285
- for Japanese.</p>
1286
- <p><code class="docutils literal"><span class="pre">TokenMecab</span></code> is good for precision rather than recall. You can find
1287
- <code class="docutils literal"><span class="pre">東京都</span></code> and <code class="docutils literal"><span class="pre">京都</span></code> texts by <code class="docutils literal"><span class="pre">京都</span></code> query with
1288
- <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> but <code class="docutils literal"><span class="pre">東京都</span></code> isn't expected. You can find only
1289
- <code class="docutils literal"><span class="pre">京都</span></code> text by <code class="docutils literal"><span class="pre">京都</span></code> query with <code class="docutils literal"><span class="pre">TokenMecab</span></code>.</p>
1290
- <p>If you want to support neologisms, you need to keep updating your
1291
- MeCab dictionary. It needs maintain cost. (<a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> doesn't
1292
- require dictionary maintenance because <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> doesn't use
1293
- dictionary.) <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> may help you.</p>
1294
- <p>Here is an example of <code class="docutils literal"><span class="pre">TokenMeCab</span></code>. <code class="docutils literal"><span class="pre">東京都</span></code> is tokenized to <code class="docutils literal"><span class="pre">東京</span></code>
1295
- and <code class="docutils literal"><span class="pre">都</span></code>. They don't include <code class="docutils literal"><span class="pre">京都</span></code>:</p>
1296
- <p>Execution example:</p>
1297
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenMecab &quot;東京都&quot;
1298
- # [
1299
- # [
1300
- # -22,
1301
- # 1337566253.89858,
1302
- # 0.000355720520019531,
1303
- # &quot;[tokenize] nonexistent tokenizer: &lt;TokenMecab&gt;&quot;,
1304
- # [
1305
- # [
1306
- # &quot;create_lexicon_for_tokenize&quot;,
1307
- # &quot;proc_tokenize.c&quot;,
1308
- # 139
1309
- # ]
1310
- # ]
1311
- # ]
1312
- # ]
1313
- </pre></div>
1314
- </div>
1315
- </div>
1316
- <div class="section" id="tokenregexp">
1317
- <span id="token-regexp"></span><h3>7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code><a class="headerlink" href="#tokenregexp" title="Permalink to this headline">¶</a></h3>
1318
- <div class="versionadded">
1319
- <p><span class="versionmodified">New in version 5.0.1.</span></p>
1320
- </div>
1321
- <div class="admonition caution">
1322
- <p class="first admonition-title">Caution</p>
1323
- <p class="last">This tokenizer is experimental. Specification may be changed.</p>
1324
- </div>
1325
- <div class="admonition caution">
1326
- <p class="first admonition-title">Caution</p>
1327
- <p class="last">This tokenizer can be used only with UTF-8. You can't use this
1328
- tokenizer with EUC-JP, Shift_JIS and so on.</p>
1329
- </div>
1330
- <p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> is a tokenizer for supporting regular expression
1331
- search by index.</p>
1332
- <p>In general, regular expression search is evaluated as sequential
1333
- search. But the following cases can be evaluated as index search:</p>
1334
- <blockquote>
1335
- <div><ul class="simple">
1336
- <li>Literal only case such as <code class="docutils literal"><span class="pre">hello</span></code></li>
1337
- <li>The beginning of text and literal case such as <code class="docutils literal"><span class="pre">\A/home/alice</span></code></li>
1338
- <li>The end of text and literal case such as <code class="docutils literal"><span class="pre">\.txt\z</span></code></li>
1339
- </ul>
1340
- </div></blockquote>
1341
- <p>In most cases, index search is faster than sequential search.</p>
1342
- <p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> is based on bigram tokenize method. <code class="docutils literal"><span class="pre">TokenRegexp</span></code>
1343
- adds the beginning of text mark (<code class="docutils literal"><span class="pre">U+FFEF</span></code>) at the begging of text
1344
- and the end of text mark (<code class="docutils literal"><span class="pre">U+FFF0</span></code>) to the end of text when you
1345
- index text:</p>
1346
- <p>Execution example:</p>
1347
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenRegexp &quot;/home/alice/test.txt&quot; NormalizerAuto --mode ADD
1348
- # [
1349
- # [
1350
- # 0,
1351
- # 1337566253.89858,
1352
- # 0.000355720520019531
1353
- # ],
1354
- # [
1355
- # {
1356
- # &quot;position&quot;: 0,
1357
- # &quot;force_prefix&quot;: false,
1358
- # &quot;value&quot;: &quot;￯&quot;
1359
- # },
1360
- # {
1361
- # &quot;position&quot;: 1,
1362
- # &quot;force_prefix&quot;: false,
1363
- # &quot;value&quot;: &quot;/h&quot;
1364
- # },
1365
- # {
1366
- # &quot;position&quot;: 2,
1367
- # &quot;force_prefix&quot;: false,
1368
- # &quot;value&quot;: &quot;ho&quot;
1369
- # },
1370
- # {
1371
- # &quot;position&quot;: 3,
1372
- # &quot;force_prefix&quot;: false,
1373
- # &quot;value&quot;: &quot;om&quot;
1374
- # },
1375
- # {
1376
- # &quot;position&quot;: 4,
1377
- # &quot;force_prefix&quot;: false,
1378
- # &quot;value&quot;: &quot;me&quot;
1379
- # },
1380
- # {
1381
- # &quot;position&quot;: 5,
1382
- # &quot;force_prefix&quot;: false,
1383
- # &quot;value&quot;: &quot;e/&quot;
1384
- # },
1385
- # {
1386
- # &quot;position&quot;: 6,
1387
- # &quot;force_prefix&quot;: false,
1388
- # &quot;value&quot;: &quot;/a&quot;
1389
- # },
1390
- # {
1391
- # &quot;position&quot;: 7,
1392
- # &quot;force_prefix&quot;: false,
1393
- # &quot;value&quot;: &quot;al&quot;
1394
- # },
1395
- # {
1396
- # &quot;position&quot;: 8,
1397
- # &quot;force_prefix&quot;: false,
1398
- # &quot;value&quot;: &quot;li&quot;
1399
- # },
1400
- # {
1401
- # &quot;position&quot;: 9,
1402
- # &quot;force_prefix&quot;: false,
1403
- # &quot;value&quot;: &quot;ic&quot;
1404
- # },
1405
- # {
1406
- # &quot;position&quot;: 10,
1407
- # &quot;force_prefix&quot;: false,
1408
- # &quot;value&quot;: &quot;ce&quot;
1409
- # },
1410
- # {
1411
- # &quot;position&quot;: 11,
1412
- # &quot;force_prefix&quot;: false,
1413
- # &quot;value&quot;: &quot;e/&quot;
1414
- # },
1415
- # {
1416
- # &quot;position&quot;: 12,
1417
- # &quot;force_prefix&quot;: false,
1418
- # &quot;value&quot;: &quot;/t&quot;
1419
- # },
1420
- # {
1421
- # &quot;position&quot;: 13,
1422
- # &quot;force_prefix&quot;: false,
1423
- # &quot;value&quot;: &quot;te&quot;
1424
- # },
1425
- # {
1426
- # &quot;position&quot;: 14,
1427
- # &quot;force_prefix&quot;: false,
1428
- # &quot;value&quot;: &quot;es&quot;
1429
- # },
1430
- # {
1431
- # &quot;position&quot;: 15,
1432
- # &quot;force_prefix&quot;: false,
1433
- # &quot;value&quot;: &quot;st&quot;
1434
- # },
1435
- # {
1436
- # &quot;position&quot;: 16,
1437
- # &quot;force_prefix&quot;: false,
1438
- # &quot;value&quot;: &quot;t.&quot;
1439
- # },
1440
- # {
1441
- # &quot;position&quot;: 17,
1442
- # &quot;force_prefix&quot;: false,
1443
- # &quot;value&quot;: &quot;.t&quot;
1444
- # },
1445
- # {
1446
- # &quot;position&quot;: 18,
1447
- # &quot;force_prefix&quot;: false,
1448
- # &quot;value&quot;: &quot;tx&quot;
1449
- # },
1450
- # {
1451
- # &quot;position&quot;: 19,
1452
- # &quot;force_prefix&quot;: false,
1453
- # &quot;value&quot;: &quot;xt&quot;
1454
- # },
1455
- # {
1456
- # &quot;position&quot;: 20,
1457
- # &quot;force_prefix&quot;: false,
1458
- # &quot;value&quot;: &quot;t&quot;
1459
- # },
1460
- # {
1461
- # &quot;position&quot;: 21,
1462
- # &quot;force_prefix&quot;: false,
1463
- # &quot;value&quot;: &quot;￰&quot;
1464
- # }
1465
- # ]
1466
- # ]
1467
- </pre></div>
1468
- </div>
1469
- </div>
1470
83
  </div>
1471
84
  </div>
1472
85
 
@@ -1476,46 +89,20 @@ index text:</p>
1476
89
  </div>
1477
90
  <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
1478
91
  <div class="sphinxsidebarwrapper">
1479
- <h3><a href="../index.html">Table Of Contents</a></h3>
1480
- <ul>
1481
- <li><a class="reference internal" href="#">7.8. Tokenizers</a><ul>
1482
- <li><a class="reference internal" href="#summary">7.8.1. Summary</a></li>
1483
- <li><a class="reference internal" href="#what-is-tokenize">7.8.2. What is &quot;tokenize&quot;?</a></li>
1484
- <li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. Built-in tokenizsers</a><ul>
1485
- <li><a class="reference internal" href="#tokenbigram">7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code></a></li>
1486
- <li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
1487
- <li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
1488
- <li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
1489
- <li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
1490
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
1491
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
1492
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
1493
- <li><a class="reference internal" href="#tokenunigram">7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code></a></li>
1494
- <li><a class="reference internal" href="#tokentrigram">7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code></a></li>
1495
- <li><a class="reference internal" href="#tokendelimit">7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code></a></li>
1496
- <li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></a></li>
1497
- <li><a class="reference internal" href="#tokenmecab">7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code></a></li>
1498
- <li><a class="reference internal" href="#tokenregexp">7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code></a></li>
1499
- </ul>
1500
- </li>
1501
- </ul>
1502
- </li>
1503
- </ul>
1504
-
1505
92
  <h4>Previous topic</h4>
1506
- <p class="topless"><a href="normalizers.html"
1507
- title="previous chapter">7.7. Normalizers</a></p>
93
+ <p class="topless"><a href="normalizers/normalizer_nfkc51.html"
94
+ title="previous chapter">7.7.2.3. <code class="docutils literal notranslate"><span class="pre">NormalizerNFKC51</span></code></a></p>
1508
95
  <h4>Next topic</h4>
1509
- <p class="topless"><a href="token_filters.html"
1510
- title="next chapter">7.9. Token filters</a></p>
96
+ <p class="topless"><a href="tokenizer/summary.html"
97
+ title="next chapter">7.8.1. Summary</a></p>
1511
98
  <div id="searchbox" style="display: none" role="search">
1512
99
  <h3>Quick search</h3>
100
+ <div class="searchformwrapper">
1513
101
  <form class="search" action="../search.html" method="get">
1514
- <div><input type="text" name="q" /></div>
1515
- <div><input type="submit" value="Go" /></div>
1516
- <input type="hidden" name="check_keywords" value="yes" />
1517
- <input type="hidden" name="area" value="default" />
102
+ <input type="text" name="q" />
103
+ <input type="submit" value="Go" />
1518
104
  </form>
105
+ </div>
1519
106
  </div>
1520
107
  <script type="text/javascript">$('#searchbox').show(0);</script>
1521
108
  </div>
@@ -1529,17 +116,17 @@ index text:</p>
1529
116
  <a href="../genindex.html" title="General Index"
1530
117
  >index</a></li>
1531
118
  <li class="right" >
1532
- <a href="token_filters.html" title="7.9. Token filters"
119
+ <a href="tokenizer/summary.html" title="7.8.1. Summary"
1533
120
  >next</a> |</li>
1534
121
  <li class="right" >
1535
- <a href="normalizers.html" title="7.7. Normalizers"
122
+ <a href="normalizers/normalizer_nfkc51.html" title="7.7.2.3. NormalizerNFKC51"
1536
123
  >previous</a> |</li>
1537
- <li class="nav-item nav-item-0"><a href="../index.html">Groonga v7.1.0-73-g6d02cfa documentation</a> &#187;</li>
124
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v9.0.2 documentation</a> &#187;</li>
1538
125
  <li class="nav-item nav-item-1"><a href="../reference.html" >7. Reference manual</a> &#187;</li>
1539
126
  </ul>
1540
127
  </div>
1541
128
  <div class="footer" role="contentinfo">
1542
- &#169; Copyright 2009-2018, Brazil, Inc.
129
+ &#169; Copyright 2009-2019, Brazil, Inc.
1543
130
  </div>
1544
131
  </body>
1545
132
  </html>