rroonga 7.1.1-x64-mingw32 → 9.0.2-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (980) hide show
  1. checksums.yaml +5 -5
  2. data/Rakefile +3 -3
  3. data/doc/text/news.md +22 -0
  4. data/ext/groonga/extconf.rb +29 -26
  5. data/ext/groonga/rb-grn.h +3 -3
  6. data/lib/2.2/groonga.so +0 -0
  7. data/lib/2.3/groonga.so +0 -0
  8. data/lib/2.4/groonga.so +0 -0
  9. data/lib/2.5/groonga.so +0 -0
  10. data/lib/groonga/expression-builder.rb +1 -1
  11. data/lib/groonga/schema.rb +13 -0
  12. data/rroonga-build.rb +4 -11
  13. data/test/test-expression-builder.rb +8 -0
  14. data/vendor/local/bin/cv2pdb.exe +0 -0
  15. data/vendor/local/bin/generate-pdb.bat +38 -36
  16. data/vendor/local/bin/grndb.exe +0 -0
  17. data/vendor/local/bin/groonga-benchmark.exe +0 -0
  18. data/vendor/local/bin/groonga-suggest-create-dataset.exe +0 -0
  19. data/vendor/local/bin/groonga.exe +0 -0
  20. data/vendor/local/bin/libgroonga-0.dll +0 -0
  21. data/vendor/local/bin/libmecab-2.dll +0 -0
  22. data/vendor/local/bin/libmsgpackc.dll +0 -0
  23. data/vendor/local/bin/libonigmo-6.dll +0 -0
  24. data/vendor/local/bin/libpcre-1.dll +0 -0
  25. data/vendor/local/bin/libpcrecpp-0.dll +0 -0
  26. data/vendor/local/bin/libpcreposix-0.dll +0 -0
  27. data/vendor/local/bin/lz4.exe +0 -0
  28. data/vendor/local/bin/lz4c.exe +0 -0
  29. data/vendor/local/bin/{lz4cat → lz4cat.exe} +0 -0
  30. data/vendor/local/bin/mecab.exe +0 -0
  31. data/vendor/local/bin/pcre-config +1 -1
  32. data/vendor/local/bin/pcregrep.exe +0 -0
  33. data/vendor/local/bin/pcretest.exe +0 -0
  34. data/vendor/local/bin/unlz4.exe +0 -0
  35. data/vendor/local/bin/zlib1.dll +0 -0
  36. data/vendor/local/include/groonga/groonga.h +16 -1
  37. data/vendor/local/include/groonga/groonga/accessor.h +5 -1
  38. data/vendor/local/include/groonga/groonga/column.h +4 -0
  39. data/vendor/local/include/groonga/groonga/db.h +3 -1
  40. data/vendor/local/include/groonga/groonga/expr.h +5 -0
  41. data/vendor/local/include/groonga/groonga/groonga.h +124 -171
  42. data/vendor/local/include/groonga/groonga/highlighter.h +57 -0
  43. data/vendor/local/include/groonga/groonga/ii.h +2 -0
  44. data/vendor/local/include/groonga/groonga/index_column.h +31 -0
  45. data/vendor/local/include/groonga/groonga/memory.h +29 -0
  46. data/vendor/local/include/groonga/groonga/msgpack.h +50 -0
  47. data/vendor/local/include/groonga/groonga/obj.h +22 -1
  48. data/vendor/local/include/groonga/groonga/option.h +61 -0
  49. data/vendor/local/include/groonga/groonga/output.h +57 -2
  50. data/vendor/local/include/groonga/groonga/output_columns.h +38 -0
  51. data/vendor/local/include/groonga/groonga/plugin.h +5 -0
  52. data/vendor/local/include/groonga/groonga/raw_string.h +60 -0
  53. data/vendor/local/include/groonga/groonga/string.h +113 -0
  54. data/vendor/local/include/groonga/groonga/table.h +89 -1
  55. data/vendor/local/include/groonga/groonga/thread.h +15 -0
  56. data/vendor/local/include/groonga/groonga/time.h +1 -0
  57. data/vendor/local/include/groonga/groonga/token.h +60 -10
  58. data/vendor/local/include/groonga/groonga/token_cursor.h +59 -0
  59. data/vendor/local/include/groonga/groonga/token_filter.h +24 -0
  60. data/vendor/local/include/groonga/groonga/token_metadata.h +49 -0
  61. data/vendor/local/include/groonga/groonga/tokenizer.h +99 -25
  62. data/vendor/local/include/groonga/groonga/tokenizer_query_deprecated.h +50 -0
  63. data/vendor/local/include/groonga/groonga/vector.h +80 -0
  64. data/vendor/local/include/groonga/groonga/version.h +32 -0
  65. data/vendor/local/include/groonga/groonga/window_function.h +18 -8
  66. data/vendor/local/include/groonga/groonga/window_function_executor.h +68 -0
  67. data/vendor/local/include/lz4.h +504 -212
  68. data/vendor/local/include/lz4frame.h +433 -153
  69. data/vendor/local/include/lz4frame_static.h +47 -0
  70. data/vendor/local/include/lz4hc.h +281 -108
  71. data/vendor/local/include/msgpack.hpp +4 -0
  72. data/vendor/local/include/msgpack/adaptor/adaptor_base.hpp +1 -0
  73. data/vendor/local/include/msgpack/adaptor/adaptor_base_decl.hpp +1 -0
  74. data/vendor/local/include/msgpack/adaptor/array_ref_decl.hpp +1 -0
  75. data/vendor/local/include/msgpack/adaptor/boost/msgpack_variant_decl.hpp +1 -0
  76. data/vendor/local/include/msgpack/adaptor/boost/string_view.hpp +15 -0
  77. data/vendor/local/include/msgpack/adaptor/check_container_size_decl.hpp +1 -0
  78. data/vendor/local/include/msgpack/adaptor/cpp17/optional.hpp +16 -0
  79. data/vendor/local/include/msgpack/adaptor/cpp17/string_view.hpp +16 -0
  80. data/vendor/local/include/msgpack/adaptor/define_decl.hpp +2 -0
  81. data/vendor/local/include/msgpack/adaptor/ext_decl.hpp +1 -0
  82. data/vendor/local/include/msgpack/adaptor/fixint_decl.hpp +1 -0
  83. data/vendor/local/include/msgpack/adaptor/int_decl.hpp +1 -0
  84. data/vendor/local/include/msgpack/adaptor/map_decl.hpp +1 -0
  85. data/vendor/local/include/msgpack/adaptor/msgpack_tuple_decl.hpp +1 -0
  86. data/vendor/local/include/msgpack/adaptor/nil_decl.hpp +1 -0
  87. data/vendor/local/include/msgpack/adaptor/raw_decl.hpp +1 -0
  88. data/vendor/local/include/msgpack/adaptor/size_equal_only_decl.hpp +1 -0
  89. data/vendor/local/include/msgpack/adaptor/tr1/unordered_map.hpp +2 -2
  90. data/vendor/local/include/msgpack/adaptor/tr1/unordered_set.hpp +2 -2
  91. data/vendor/local/include/msgpack/adaptor/v4raw_decl.hpp +1 -0
  92. data/vendor/local/include/msgpack/cpp_config_decl.hpp +1 -0
  93. data/vendor/local/include/msgpack/create_object_visitor.hpp +17 -0
  94. data/vendor/local/include/msgpack/create_object_visitor_decl.hpp +16 -0
  95. data/vendor/local/include/msgpack/fbuffer.h +1 -1
  96. data/vendor/local/include/msgpack/fbuffer_decl.hpp +1 -0
  97. data/vendor/local/include/msgpack/gcc_atomic.hpp +0 -2
  98. data/vendor/local/include/msgpack/iterator_decl.hpp +2 -1
  99. data/vendor/local/include/msgpack/meta_decl.hpp +1 -0
  100. data/vendor/local/include/msgpack/null_visitor.hpp +17 -0
  101. data/vendor/local/include/msgpack/null_visitor_decl.hpp +16 -0
  102. data/vendor/local/include/msgpack/object.h +5 -0
  103. data/vendor/local/include/msgpack/object_decl.hpp +1 -0
  104. data/vendor/local/include/msgpack/object_fwd.hpp +1 -0
  105. data/vendor/local/include/msgpack/object_fwd_decl.hpp +1 -0
  106. data/vendor/local/include/msgpack/pack.h +1 -0
  107. data/vendor/local/include/msgpack/pack_decl.hpp +1 -0
  108. data/vendor/local/include/msgpack/parse.hpp +18 -0
  109. data/vendor/local/include/msgpack/parse_decl.hpp +16 -0
  110. data/vendor/local/include/msgpack/parse_return.hpp +17 -0
  111. data/vendor/local/include/msgpack/sbuffer_decl.hpp +1 -0
  112. data/vendor/local/include/msgpack/sysdep.h +34 -26
  113. data/vendor/local/include/msgpack/type.hpp +9 -0
  114. data/vendor/local/include/msgpack/unpack.h +12 -1
  115. data/vendor/local/include/msgpack/unpack.hpp +1 -0
  116. data/vendor/local/include/msgpack/unpack_decl.hpp +1 -0
  117. data/vendor/local/include/msgpack/unpack_exception.hpp +15 -0
  118. data/vendor/local/include/msgpack/unpack_template.h +22 -30
  119. data/vendor/local/include/msgpack/v1/adaptor/array_ref.hpp +6 -6
  120. data/vendor/local/include/msgpack/v1/adaptor/boost/fusion.hpp +49 -6
  121. data/vendor/local/include/msgpack/v1/adaptor/boost/msgpack_variant.hpp +6 -4
  122. data/vendor/local/include/msgpack/v1/adaptor/boost/string_view.hpp +87 -0
  123. data/vendor/local/include/msgpack/v1/adaptor/carray.hpp +11 -11
  124. data/vendor/local/include/msgpack/v1/adaptor/char_ptr.hpp +1 -1
  125. data/vendor/local/include/msgpack/v1/adaptor/cpp11/array.hpp +1 -1
  126. data/vendor/local/include/msgpack/v1/adaptor/cpp11/array_char.hpp +8 -1
  127. data/vendor/local/include/msgpack/v1/adaptor/cpp11/array_unsigned_char.hpp +8 -1
  128. data/vendor/local/include/msgpack/v1/adaptor/cpp11/forward_list.hpp +1 -1
  129. data/vendor/local/include/msgpack/v1/adaptor/cpp11/tuple.hpp +2 -2
  130. data/vendor/local/include/msgpack/v1/adaptor/cpp11/unordered_map.hpp +4 -4
  131. data/vendor/local/include/msgpack/v1/adaptor/cpp11/unordered_set.hpp +2 -2
  132. data/vendor/local/include/msgpack/v1/adaptor/cpp17/optional.hpp +90 -0
  133. data/vendor/local/include/msgpack/v1/adaptor/cpp17/string_view.hpp +86 -0
  134. data/vendor/local/include/msgpack/v1/adaptor/deque.hpp +1 -1
  135. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_define_array.hpp +1088 -32
  136. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_define_map.hpp +32 -16
  137. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_msgpack_tuple.hpp +32 -32
  138. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_convert_helper.hpp +45 -0
  139. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_define_array.hpp +4 -3
  140. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_define_map.hpp +4 -2
  141. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_msgpack_tuple.hpp +2 -2
  142. data/vendor/local/include/msgpack/v1/adaptor/ext.hpp +1 -1
  143. data/vendor/local/include/msgpack/v1/adaptor/fixint.hpp +40 -24
  144. data/vendor/local/include/msgpack/v1/adaptor/float.hpp +4 -4
  145. data/vendor/local/include/msgpack/v1/adaptor/int.hpp +55 -33
  146. data/vendor/local/include/msgpack/v1/adaptor/list.hpp +1 -1
  147. data/vendor/local/include/msgpack/v1/adaptor/map.hpp +10 -10
  148. data/vendor/local/include/msgpack/v1/adaptor/pair.hpp +2 -2
  149. data/vendor/local/include/msgpack/v1/adaptor/set.hpp +2 -2
  150. data/vendor/local/include/msgpack/v1/adaptor/string.hpp +1 -1
  151. data/vendor/local/include/msgpack/v1/adaptor/tr1/unordered_map.hpp +2 -2
  152. data/vendor/local/include/msgpack/v1/adaptor/tr1/unordered_set.hpp +2 -2
  153. data/vendor/local/include/msgpack/v1/adaptor/vector.hpp +5 -5
  154. data/vendor/local/include/msgpack/v1/adaptor/vector_bool.hpp +1 -1
  155. data/vendor/local/include/msgpack/v1/adaptor/vector_char.hpp +9 -9
  156. data/vendor/local/include/msgpack/v1/adaptor/vector_unsigned_char.hpp +9 -9
  157. data/vendor/local/include/msgpack/v1/cpp_config.hpp +6 -0
  158. data/vendor/local/include/msgpack/v1/cpp_config_decl.hpp +6 -0
  159. data/vendor/local/include/msgpack/v1/detail/cpp03_zone.hpp +41 -34
  160. data/vendor/local/include/msgpack/v1/detail/cpp03_zone_decl.hpp +8 -0
  161. data/vendor/local/include/msgpack/v1/detail/cpp11_zone.hpp +25 -19
  162. data/vendor/local/include/msgpack/v1/detail/cpp11_zone_decl.hpp +8 -0
  163. data/vendor/local/include/msgpack/v1/meta.hpp +6 -0
  164. data/vendor/local/include/msgpack/v1/meta_decl.hpp +5 -0
  165. data/vendor/local/include/msgpack/v1/object.hpp +768 -393
  166. data/vendor/local/include/msgpack/v1/object_decl.hpp +11 -1
  167. data/vendor/local/include/msgpack/v1/object_fwd.hpp +4 -1
  168. data/vendor/local/include/msgpack/v1/object_fwd_decl.hpp +3 -1
  169. data/vendor/local/include/msgpack/v1/parse_return.hpp +36 -0
  170. data/vendor/local/include/msgpack/v1/unpack.hpp +39 -120
  171. data/vendor/local/include/msgpack/v1/unpack_decl.hpp +2 -9
  172. data/vendor/local/include/msgpack/v1/unpack_exception.hpp +122 -0
  173. data/vendor/local/include/msgpack/v1/vrefbuffer.hpp +2 -2
  174. data/vendor/local/include/msgpack/v2/create_object_visitor.hpp +250 -0
  175. data/vendor/local/include/msgpack/v2/create_object_visitor_decl.hpp +33 -0
  176. data/vendor/local/include/msgpack/v2/meta_decl.hpp +4 -0
  177. data/vendor/local/include/msgpack/v2/null_visitor.hpp +96 -0
  178. data/vendor/local/include/msgpack/v2/null_visitor_decl.hpp +29 -0
  179. data/vendor/local/include/msgpack/v2/object_decl.hpp +4 -0
  180. data/vendor/local/include/msgpack/v2/object_fwd.hpp +1 -1
  181. data/vendor/local/include/msgpack/v2/object_fwd_decl.hpp +2 -0
  182. data/vendor/local/include/msgpack/v2/pack_decl.hpp +1 -0
  183. data/vendor/local/include/msgpack/v2/parse.hpp +1072 -0
  184. data/vendor/local/include/msgpack/v2/parse_decl.hpp +79 -0
  185. data/vendor/local/include/msgpack/v2/parse_return.hpp +37 -0
  186. data/vendor/local/include/msgpack/v2/unpack.hpp +21 -1298
  187. data/vendor/local/include/msgpack/v2/unpack_decl.hpp +9 -45
  188. data/vendor/local/include/msgpack/v2/x3_parse.hpp +875 -0
  189. data/vendor/local/include/msgpack/v2/x3_parse_decl.hpp +36 -0
  190. data/vendor/local/include/msgpack/v2/x3_unpack.hpp +120 -0
  191. data/vendor/local/include/msgpack/v2/x3_unpack_decl.hpp +71 -0
  192. data/vendor/local/include/msgpack/v3/adaptor/adaptor_base.hpp +58 -0
  193. data/vendor/local/include/msgpack/v3/adaptor/adaptor_base_decl.hpp +52 -0
  194. data/vendor/local/include/msgpack/v3/adaptor/array_ref_decl.hpp +36 -0
  195. data/vendor/local/include/msgpack/v3/adaptor/boost/msgpack_variant_decl.hpp +42 -0
  196. data/vendor/local/include/msgpack/v3/adaptor/check_container_size_decl.hpp +39 -0
  197. data/vendor/local/include/msgpack/v3/adaptor/define_decl.hpp +23 -0
  198. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_define_array_decl.hpp +31 -0
  199. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_define_map_decl.hpp +31 -0
  200. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_msgpack_tuple_decl.hpp +43 -0
  201. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_define_array_decl.hpp +32 -0
  202. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_define_map_decl.hpp +31 -0
  203. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_msgpack_tuple_decl.hpp +59 -0
  204. data/vendor/local/include/msgpack/v3/adaptor/ext_decl.hpp +34 -0
  205. data/vendor/local/include/msgpack/v3/adaptor/fixint_decl.hpp +43 -0
  206. data/vendor/local/include/msgpack/v3/adaptor/int_decl.hpp +54 -0
  207. data/vendor/local/include/msgpack/v3/adaptor/map_decl.hpp +33 -0
  208. data/vendor/local/include/msgpack/v3/adaptor/msgpack_tuple_decl.hpp +21 -0
  209. data/vendor/local/include/msgpack/v3/adaptor/nil_decl.hpp +42 -0
  210. data/vendor/local/include/msgpack/v3/adaptor/raw_decl.hpp +33 -0
  211. data/vendor/local/include/msgpack/v3/adaptor/size_equal_only_decl.hpp +35 -0
  212. data/vendor/local/include/msgpack/v3/adaptor/v4raw_decl.hpp +34 -0
  213. data/vendor/local/include/msgpack/v3/cpp_config_decl.hpp +84 -0
  214. data/vendor/local/include/msgpack/v3/create_object_visitor_decl.hpp +33 -0
  215. data/vendor/local/include/msgpack/v3/detail/cpp03_zone_decl.hpp +31 -0
  216. data/vendor/local/include/msgpack/v3/detail/cpp11_zone_decl.hpp +31 -0
  217. data/vendor/local/include/msgpack/v3/fbuffer_decl.hpp +32 -0
  218. data/vendor/local/include/msgpack/v3/iterator_decl.hpp +33 -0
  219. data/vendor/local/include/msgpack/v3/meta_decl.hpp +50 -0
  220. data/vendor/local/include/msgpack/v3/null_visitor_decl.hpp +29 -0
  221. data/vendor/local/include/msgpack/v3/object_decl.hpp +53 -0
  222. data/vendor/local/include/msgpack/v3/object_fwd.hpp +70 -0
  223. data/vendor/local/include/msgpack/v3/object_fwd_decl.hpp +75 -0
  224. data/vendor/local/include/msgpack/v3/pack_decl.hpp +55 -0
  225. data/vendor/local/include/msgpack/v3/parse.hpp +677 -0
  226. data/vendor/local/include/msgpack/v3/parse_decl.hpp +49 -0
  227. data/vendor/local/include/msgpack/v3/parse_return.hpp +35 -0
  228. data/vendor/local/include/msgpack/v3/sbuffer_decl.hpp +33 -0
  229. data/vendor/local/include/msgpack/v3/unpack.hpp +192 -0
  230. data/vendor/local/include/msgpack/v3/unpack_decl.hpp +304 -0
  231. data/vendor/local/include/msgpack/v3/vrefbuffer_decl.hpp +29 -0
  232. data/vendor/local/include/msgpack/v3/x3_parse_decl.hpp +34 -0
  233. data/vendor/local/include/msgpack/v3/x3_unpack.hpp +97 -0
  234. data/vendor/local/include/msgpack/v3/x3_unpack_decl.hpp +65 -0
  235. data/vendor/local/include/msgpack/v3/zbuffer_decl.hpp +29 -0
  236. data/vendor/local/include/msgpack/v3/zone_decl.hpp +21 -0
  237. data/vendor/local/include/msgpack/version_master.h +2 -2
  238. data/vendor/local/include/msgpack/versioning.hpp +5 -3
  239. data/vendor/local/include/msgpack/vrefbuffer.h +1 -2
  240. data/vendor/local/include/msgpack/vrefbuffer_decl.hpp +1 -0
  241. data/vendor/local/include/msgpack/x3_parse.hpp +15 -0
  242. data/vendor/local/include/msgpack/x3_parse_decl.hpp +16 -0
  243. data/vendor/local/include/msgpack/x3_unpack.hpp +16 -0
  244. data/vendor/local/include/msgpack/x3_unpack_decl.hpp +16 -0
  245. data/vendor/local/include/msgpack/zbuffer_decl.hpp +1 -0
  246. data/vendor/local/include/msgpack/zone_decl.hpp +1 -0
  247. data/vendor/local/include/pcre.h +6 -6
  248. data/vendor/local/lib/cmake/msgpack/msgpack-config-version.cmake +46 -0
  249. data/vendor/local/lib/cmake/msgpack/msgpack-config.cmake +47 -0
  250. data/vendor/local/lib/cmake/msgpack/msgpack-targets-noconfig.cmake +29 -0
  251. data/vendor/local/lib/cmake/msgpack/msgpack-targets.cmake +101 -0
  252. data/vendor/local/lib/groonga/plugins/functions/index_column.a +0 -0
  253. data/vendor/local/lib/groonga/plugins/functions/index_column.dll +0 -0
  254. data/vendor/local/lib/groonga/plugins/functions/index_column.dll.a +0 -0
  255. data/vendor/local/lib/groonga/plugins/functions/index_column.la +1 -1
  256. data/vendor/local/lib/groonga/plugins/functions/math.a +0 -0
  257. data/vendor/local/lib/groonga/plugins/functions/math.dll +0 -0
  258. data/vendor/local/lib/groonga/plugins/functions/math.dll.a +0 -0
  259. data/vendor/local/lib/groonga/plugins/functions/math.la +1 -1
  260. data/vendor/local/lib/groonga/plugins/functions/number.a +0 -0
  261. data/vendor/local/lib/groonga/plugins/functions/number.dll +0 -0
  262. data/vendor/local/lib/groonga/plugins/functions/number.dll.a +0 -0
  263. data/vendor/local/lib/groonga/plugins/functions/number.la +1 -1
  264. data/vendor/local/lib/groonga/plugins/functions/string.a +0 -0
  265. data/vendor/local/lib/groonga/plugins/functions/string.dll +0 -0
  266. data/vendor/local/lib/groonga/plugins/functions/string.dll.a +0 -0
  267. data/vendor/local/lib/groonga/plugins/functions/string.la +1 -1
  268. data/vendor/local/lib/groonga/plugins/functions/time.a +0 -0
  269. data/vendor/local/lib/groonga/plugins/functions/time.dll +0 -0
  270. data/vendor/local/lib/groonga/plugins/functions/time.dll.a +0 -0
  271. data/vendor/local/lib/groonga/plugins/functions/time.la +1 -1
  272. data/vendor/local/lib/groonga/plugins/functions/vector.a +0 -0
  273. data/vendor/local/lib/groonga/plugins/functions/vector.dll +0 -0
  274. data/vendor/local/lib/groonga/plugins/functions/vector.dll.a +0 -0
  275. data/vendor/local/lib/groonga/plugins/functions/vector.la +1 -1
  276. data/vendor/local/lib/groonga/plugins/normalizers/mysql.a +0 -0
  277. data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll +0 -0
  278. data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll.a +0 -0
  279. data/vendor/local/lib/groonga/plugins/normalizers/mysql.la +2 -2
  280. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
  281. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
  282. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
  283. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +1 -1
  284. data/vendor/local/lib/groonga/plugins/sharding/dynamic_columns.rb +150 -19
  285. data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +123 -65
  286. data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +528 -113
  287. data/vendor/local/lib/groonga/plugins/sharding/logical_select.rb +142 -40
  288. data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
  289. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
  290. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
  291. data/vendor/local/lib/groonga/plugins/suggest/suggest.la +1 -1
  292. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
  293. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
  294. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
  295. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +1 -1
  296. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
  297. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
  298. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
  299. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +1 -1
  300. data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +1 -1
  301. data/vendor/local/lib/groonga/scripts/ruby/command_line/grndb.rb +64 -35
  302. data/vendor/local/lib/groonga/scripts/ruby/expression.rb +3 -1
  303. data/vendor/local/lib/groonga/scripts/ruby/expression_rewriters.rb +15 -21
  304. data/vendor/local/lib/groonga/scripts/ruby/expression_rewriters/optimizer.rb +274 -0
  305. data/vendor/local/lib/groonga/scripts/ruby/expression_tree.rb +8 -2
  306. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/assign.rb +22 -0
  307. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/assign_binary_operation.rb +24 -0
  308. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/binary_operation.rb +206 -8
  309. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/constant.rb +16 -1
  310. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/function_call.rb +30 -1
  311. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/logical_operation.rb +6 -0
  312. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/member.rb +18 -0
  313. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/null.rb +17 -0
  314. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/reference.rb +18 -0
  315. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/table.rb +14 -0
  316. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/unary_operation.rb +26 -0
  317. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/variable.rb +4 -0
  318. data/vendor/local/lib/groonga/scripts/ruby/expression_tree_builder.rb +78 -8
  319. data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +10 -0
  320. data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +2 -0
  321. data/vendor/local/lib/groonga/scripts/ruby/locale_output.rb +28 -0
  322. data/vendor/local/lib/groonga/scripts/ruby/logger.rb +36 -4
  323. data/vendor/local/lib/groonga/scripts/ruby/record.rb +1 -1
  324. data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +0 -3
  325. data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +46 -5
  326. data/vendor/local/lib/groonga/scripts/ruby/scan_info_data_size_estimator.rb +5 -136
  327. data/vendor/local/lib/groonga/scripts/ruby/table.rb +2 -2
  328. data/vendor/local/lib/libgroonga.a +0 -0
  329. data/vendor/local/lib/libgroonga.dll.a +0 -0
  330. data/vendor/local/lib/libgroonga.la +1 -1
  331. data/vendor/local/lib/liblz4.a +0 -0
  332. data/vendor/local/lib/liblz4.dll +0 -0
  333. data/vendor/local/lib/liblz4.dll.1 +0 -0
  334. data/vendor/local/lib/{liblz4.dll.1.5.0 → liblz4.dll.1.8.2} +0 -0
  335. data/vendor/local/lib/libmecab.dll.a +0 -0
  336. data/vendor/local/lib/libmsgpackc.a +0 -0
  337. data/vendor/local/lib/libmsgpackc.dll.a +0 -0
  338. data/vendor/local/lib/libonigmo.a +0 -0
  339. data/vendor/local/lib/libonigmo.dll.a +0 -0
  340. data/vendor/local/lib/libpcre.a +0 -0
  341. data/vendor/local/lib/libpcre.dll.a +0 -0
  342. data/vendor/local/lib/libpcre.la +2 -2
  343. data/vendor/local/lib/libpcrecpp.dll.a +0 -0
  344. data/vendor/local/lib/libpcrecpp.la +1 -1
  345. data/vendor/local/lib/libpcreposix.a +0 -0
  346. data/vendor/local/lib/libpcreposix.dll.a +0 -0
  347. data/vendor/local/lib/libpcreposix.la +2 -2
  348. data/vendor/local/lib/libz.dll.a +0 -0
  349. data/vendor/local/lib/pkgconfig/groonga-normalizer-mysql.pc +1 -1
  350. data/vendor/local/lib/pkgconfig/groonga.pc +2 -2
  351. data/vendor/local/lib/pkgconfig/liblz4.pc +3 -3
  352. data/vendor/local/lib/pkgconfig/libpcre.pc +1 -1
  353. data/vendor/local/lib/pkgconfig/libpcrecpp.pc +1 -1
  354. data/vendor/local/lib/pkgconfig/libpcreposix.pc +1 -1
  355. data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
  356. data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
  357. data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
  358. data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
  359. data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
  360. data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
  361. data/vendor/local/share/doc/groonga-normalizer-mysql/README.md +14 -22
  362. data/vendor/local/share/doc/groonga-normalizer-mysql/news.md +22 -2
  363. data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
  364. data/vendor/local/share/doc/groonga/en/html/_static/basic.css +113 -4
  365. data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +46 -19
  366. data/vendor/local/share/doc/groonga/en/html/_static/documentation_options.js +10 -0
  367. data/vendor/local/share/doc/groonga/en/html/_static/{jquery-3.1.0.js → jquery-3.2.1.js} +474 -295
  368. data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +4 -4
  369. data/vendor/local/share/doc/groonga/en/html/_static/language_data.js +297 -0
  370. data/vendor/local/share/doc/groonga/en/html/_static/pygments.css +4 -0
  371. data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +69 -322
  372. data/vendor/local/share/doc/groonga/en/html/characteristic.html +16 -24
  373. data/vendor/local/share/doc/groonga/en/html/client.html +15 -23
  374. data/vendor/local/share/doc/groonga/en/html/community.html +30 -38
  375. data/vendor/local/share/doc/groonga/en/html/contribution.html +23 -31
  376. data/vendor/local/share/doc/groonga/en/html/contribution/development.html +15 -23
  377. data/vendor/local/share/doc/groonga/en/html/contribution/development/build.html +15 -23
  378. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_autotools.html +58 -66
  379. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_cmake.html +51 -56
  380. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/windows_cmake.html +52 -56
  381. data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +27 -35
  382. data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +19 -27
  383. data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +26 -34
  384. data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +167 -167
  385. data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +16 -24
  386. data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +28 -36
  387. data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +15 -23
  388. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +15 -23
  389. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +59 -67
  390. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +31 -39
  391. data/vendor/local/share/doc/groonga/en/html/contribution/report.html +18 -26
  392. data/vendor/local/share/doc/groonga/en/html/development.html +15 -23
  393. data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +38 -43
  394. data/vendor/local/share/doc/groonga/en/html/genindex.html +50 -28
  395. data/vendor/local/share/doc/groonga/en/html/index.html +248 -234
  396. data/vendor/local/share/doc/groonga/en/html/install.html +43 -47
  397. data/vendor/local/share/doc/groonga/en/html/install/centos.html +43 -51
  398. data/vendor/local/share/doc/groonga/en/html/install/debian.html +52 -131
  399. data/vendor/local/share/doc/groonga/en/html/install/docker.html +155 -0
  400. data/vendor/local/share/doc/groonga/en/html/install/fedora.html +41 -49
  401. data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +29 -37
  402. data/vendor/local/share/doc/groonga/en/html/install/others.html +142 -150
  403. data/vendor/local/share/doc/groonga/en/html/install/solaris.html +30 -38
  404. data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +43 -51
  405. data/vendor/local/share/doc/groonga/en/html/install/windows.html +33 -41
  406. data/vendor/local/share/doc/groonga/en/html/limitations.html +36 -42
  407. data/vendor/local/share/doc/groonga/en/html/news.html +1586 -598
  408. data/vendor/local/share/doc/groonga/en/html/news/0.x.html +83 -83
  409. data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +147 -155
  410. data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +26 -34
  411. data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +225 -233
  412. data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +48 -56
  413. data/vendor/local/share/doc/groonga/en/html/news/2.x.html +378 -386
  414. data/vendor/local/share/doc/groonga/en/html/news/3.x.html +320 -328
  415. data/vendor/local/share/doc/groonga/en/html/news/4.x.html +442 -448
  416. data/vendor/local/share/doc/groonga/en/html/news/5.x.html +742 -860
  417. data/vendor/local/share/doc/groonga/en/html/news/6.x.html +544 -621
  418. data/vendor/local/share/doc/groonga/en/html/news/senna.html +32 -40
  419. data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
  420. data/vendor/local/share/doc/groonga/en/html/reference.html +208 -198
  421. data/vendor/local/share/doc/groonga/en/html/reference/alias.html +85 -93
  422. data/vendor/local/share/doc/groonga/en/html/reference/api.html +50 -57
  423. data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +62 -77
  424. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +117 -149
  425. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +140 -176
  426. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +43 -55
  427. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +48 -56
  428. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +194 -254
  429. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +106 -138
  430. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +62 -82
  431. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +117 -137
  432. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +74 -98
  433. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +79 -103
  434. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +40 -48
  435. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +57 -73
  436. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +75 -99
  437. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_inspect.html +495 -0
  438. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +52 -68
  439. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +291 -357
  440. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +69 -89
  441. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +47 -59
  442. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +226 -306
  443. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +120 -160
  444. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_thread.html +80 -103
  445. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +46 -58
  446. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +40 -52
  447. data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +52 -66
  448. data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +98 -122
  449. data/vendor/local/share/doc/groonga/en/html/reference/cast.html +40 -26
  450. data/vendor/local/share/doc/groonga/en/html/reference/column.html +16 -24
  451. data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +16 -24
  452. data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +30 -34
  453. data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +16 -24
  454. data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +92 -100
  455. data/vendor/local/share/doc/groonga/en/html/reference/command.html +76 -84
  456. data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +26 -34
  457. data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +64 -72
  458. data/vendor/local/share/doc/groonga/en/html/reference/command/pretty_print.html +21 -29
  459. data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +25 -33
  460. data/vendor/local/share/doc/groonga/en/html/reference/command/request_timeout.html +32 -40
  461. data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +105 -113
  462. data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +44 -50
  463. data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +85 -73
  464. data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +31 -37
  465. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_copy.html +131 -139
  466. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +370 -326
  467. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +115 -117
  468. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +38 -44
  469. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +47 -53
  470. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_delete.html +40 -48
  471. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_get.html +42 -50
  472. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_set.html +41 -49
  473. data/vendor/local/share/doc/groonga/en/html/reference/commands/database_unmap.html +37 -45
  474. data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +71 -63
  475. data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +31 -37
  476. data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +49 -51
  477. data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +64 -71
  478. data/vendor/local/share/doc/groonga/en/html/reference/commands/io_flush.html +335 -138
  479. data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +233 -87
  480. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_acquire.html +45 -53
  481. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +42 -48
  482. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_release.html +43 -51
  483. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +58 -64
  484. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +33 -38
  485. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +31 -38
  486. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +295 -218
  487. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_parameters.html +56 -64
  488. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +532 -214
  489. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_select.html +797 -388
  490. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_shard_list.html +35 -43
  491. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_table_remove.html +188 -196
  492. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +83 -90
  493. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +41 -48
  494. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_exist.html +41 -49
  495. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_inspect.html +401 -403
  496. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_list.html +253 -261
  497. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_remove.html +60 -68
  498. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +36 -44
  499. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +35 -43
  500. data/vendor/local/share/doc/groonga/en/html/reference/commands/query_expand.html +21 -29
  501. data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +22 -30
  502. data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +21 -29
  503. data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +39 -47
  504. data/vendor/local/share/doc/groonga/en/html/reference/commands/reindex.html +47 -53
  505. data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
  506. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +38 -45
  507. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +38 -45
  508. data/vendor/local/share/doc/groonga/en/html/reference/commands/schema.html +330 -338
  509. data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +1545 -1194
  510. data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +57 -65
  511. data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +83 -91
  512. data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +119 -133
  513. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_copy.html +30 -38
  514. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +165 -174
  515. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +50 -50
  516. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +104 -112
  517. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_rename.html +42 -50
  518. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +49 -57
  519. data/vendor/local/share/doc/groonga/en/html/reference/commands/thread_limit.html +46 -54
  520. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +110 -117
  521. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +41 -48
  522. data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +40 -46
  523. data/vendor/local/share/doc/groonga/en/html/reference/configuration.html +37 -45
  524. data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -27
  525. data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +134 -114
  526. data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +25 -31
  527. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +66 -66
  528. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +174 -182
  529. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +25 -33
  530. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +27 -35
  531. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +191 -199
  532. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +32 -40
  533. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +189 -163
  534. data/vendor/local/share/doc/groonga/en/html/reference/function.html +59 -64
  535. data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +71 -79
  536. data/vendor/local/share/doc/groonga/en/html/reference/functions/cast_loose.html +210 -0
  537. data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +49 -55
  538. data/vendor/local/share/doc/groonga/en/html/reference/functions/fuzzy_search.html +38 -46
  539. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +133 -142
  540. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +67 -73
  541. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +56 -62
  542. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +80 -88
  543. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +70 -78
  544. data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +56 -64
  545. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_records.html +87 -94
  546. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +54 -62
  547. data/vendor/local/share/doc/groonga/en/html/reference/functions/math_abs.html +55 -63
  548. data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -48
  549. data/vendor/local/share/doc/groonga/en/html/reference/functions/number_classify.html +36 -44
  550. data/vendor/local/share/doc/groonga/en/html/reference/functions/prefix_rk_search.html +74 -82
  551. data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +152 -160
  552. data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +45 -52
  553. data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +76 -84
  554. data/vendor/local/share/doc/groonga/en/html/reference/functions/string_length.html +37 -45
  555. data/vendor/local/share/doc/groonga/en/html/reference/functions/string_substring.html +39 -47
  556. data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +76 -84
  557. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day.html +37 -45
  558. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day_of_week.html +278 -0
  559. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_hour.html +37 -45
  560. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_minute.html +36 -44
  561. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_month.html +36 -44
  562. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_second.html +36 -44
  563. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_week.html +36 -44
  564. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_year.html +37 -45
  565. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_find.html +368 -0
  566. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_new.html +40 -48
  567. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_size.html +54 -62
  568. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_slice.html +40 -47
  569. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +44 -52
  570. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +307 -316
  571. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +486 -492
  572. data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +44 -52
  573. data/vendor/local/share/doc/groonga/en/html/reference/log.html +128 -147
  574. data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +43 -92
  575. data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_auto.html +179 -0
  576. data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_nfkc100.html +897 -0
  577. data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_nfkc51.html +162 -0
  578. data/vendor/local/share/doc/groonga/en/html/reference/operations.html +26 -34
  579. data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +48 -56
  580. data/vendor/local/share/doc/groonga/en/html/reference/operations/prefix_rk_search.html +47 -55
  581. data/vendor/local/share/doc/groonga/en/html/reference/output.html +47 -55
  582. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +20 -28
  583. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +93 -101
  584. data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +228 -225
  585. data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +59 -67
  586. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +50 -58
  587. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +57 -65
  588. data/vendor/local/share/doc/groonga/en/html/reference/sharding.html +76 -86
  589. data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +43 -51
  590. data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +159 -167
  591. data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +93 -101
  592. data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +85 -93
  593. data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +88 -96
  594. data/vendor/local/share/doc/groonga/en/html/reference/tables.html +142 -150
  595. data/vendor/local/share/doc/groonga/en/html/reference/token_filter/summary.html +147 -0
  596. data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +31 -223
  597. data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_nfkc100.html +626 -0
  598. data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_stem.html +291 -0
  599. data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_stop_word.html +287 -0
  600. data/vendor/local/share/doc/groonga/en/html/reference/tokenizer/summary.html +259 -0
  601. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +42 -1455
  602. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram.html +368 -0
  603. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank.html +221 -0
  604. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol.html +240 -0
  605. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html +270 -0
  606. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html +292 -0
  607. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol.html +179 -0
  608. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol_alpha.html +200 -0
  609. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol_alpha_digit.html +212 -0
  610. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_delimit.html +357 -0
  611. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_delimit_null.html +162 -0
  612. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_mecab.html +783 -0
  613. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_regexp.html +289 -0
  614. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_trigram.html +194 -0
  615. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_unigram.html +194 -0
  616. data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +71 -79
  617. data/vendor/local/share/doc/groonga/en/html/reference/types.html +64 -72
  618. data/vendor/local/share/doc/groonga/en/html/reference/window_function.html +29 -37
  619. data/vendor/local/share/doc/groonga/en/html/reference/window_functions/record_number.html +38 -46
  620. data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_count.html +38 -46
  621. data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_record_number.html +38 -46
  622. data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_sum.html +38 -46
  623. data/vendor/local/share/doc/groonga/en/html/search.html +13 -24
  624. data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
  625. data/vendor/local/share/doc/groonga/en/html/server.html +15 -23
  626. data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +27 -35
  627. data/vendor/local/share/doc/groonga/en/html/server/http.html +18 -26
  628. data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +94 -102
  629. data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +15 -23
  630. data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +15 -23
  631. data/vendor/local/share/doc/groonga/en/html/server/memcached.html +18 -26
  632. data/vendor/local/share/doc/groonga/en/html/server/package.html +101 -109
  633. data/vendor/local/share/doc/groonga/en/html/spec.html +19 -27
  634. data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +207 -215
  635. data/vendor/local/share/doc/groonga/en/html/spec/search.html +39 -39
  636. data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +15 -23
  637. data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +46 -50
  638. data/vendor/local/share/doc/groonga/en/html/troubleshooting/how_to_analyze_error_message.html +27 -35
  639. data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +26 -31
  640. data/vendor/local/share/doc/groonga/en/html/tutorial.html +17 -25
  641. data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +46 -54
  642. data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +63 -71
  643. data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +30 -38
  644. data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +88 -97
  645. data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +19 -27
  646. data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +61 -69
  647. data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +108 -116
  648. data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +24 -32
  649. data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +19 -27
  650. data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +32 -40
  651. data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +52 -60
  652. data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
  653. data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +113 -4
  654. data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +46 -19
  655. data/vendor/local/share/doc/groonga/ja/html/_static/documentation_options.js +10 -0
  656. data/vendor/local/share/doc/groonga/ja/html/_static/{jquery-3.1.0.js → jquery-3.2.1.js} +474 -295
  657. data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +4 -4
  658. data/vendor/local/share/doc/groonga/ja/html/_static/language_data.js +124 -0
  659. data/vendor/local/share/doc/groonga/ja/html/_static/pygments.css +4 -0
  660. data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +70 -150
  661. data/vendor/local/share/doc/groonga/ja/html/characteristic.html +15 -23
  662. data/vendor/local/share/doc/groonga/ja/html/client.html +15 -23
  663. data/vendor/local/share/doc/groonga/ja/html/community.html +29 -37
  664. data/vendor/local/share/doc/groonga/ja/html/contribution.html +23 -31
  665. data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +15 -23
  666. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build.html +15 -23
  667. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_autotools.html +50 -58
  668. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_cmake.html +43 -48
  669. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/windows_cmake.html +47 -51
  670. data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +26 -34
  671. data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +18 -26
  672. data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +23 -31
  673. data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +162 -162
  674. data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +16 -24
  675. data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +26 -34
  676. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +15 -23
  677. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +15 -23
  678. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +50 -58
  679. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +28 -36
  680. data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -25
  681. data/vendor/local/share/doc/groonga/ja/html/development.html +15 -23
  682. data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +32 -37
  683. data/vendor/local/share/doc/groonga/ja/html/genindex.html +50 -28
  684. data/vendor/local/share/doc/groonga/ja/html/index.html +247 -233
  685. data/vendor/local/share/doc/groonga/ja/html/install.html +41 -45
  686. data/vendor/local/share/doc/groonga/ja/html/install/centos.html +44 -52
  687. data/vendor/local/share/doc/groonga/ja/html/install/debian.html +52 -121
  688. data/vendor/local/share/doc/groonga/ja/html/install/docker.html +155 -0
  689. data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +40 -48
  690. data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +28 -36
  691. data/vendor/local/share/doc/groonga/ja/html/install/others.html +116 -124
  692. data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +28 -36
  693. data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +43 -51
  694. data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -37
  695. data/vendor/local/share/doc/groonga/ja/html/limitations.html +30 -36
  696. data/vendor/local/share/doc/groonga/ja/html/news.html +1234 -384
  697. data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +82 -82
  698. data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +146 -154
  699. data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +25 -33
  700. data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +191 -199
  701. data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +41 -49
  702. data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +283 -291
  703. data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +229 -237
  704. data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +274 -280
  705. data/vendor/local/share/doc/groonga/ja/html/news/5.x.html +475 -593
  706. data/vendor/local/share/doc/groonga/ja/html/news/6.x.html +313 -390
  707. data/vendor/local/share/doc/groonga/ja/html/news/senna.html +31 -39
  708. data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
  709. data/vendor/local/share/doc/groonga/ja/html/reference.html +208 -198
  710. data/vendor/local/share/doc/groonga/ja/html/reference/alias.html +70 -78
  711. data/vendor/local/share/doc/groonga/ja/html/reference/api.html +50 -57
  712. data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +57 -72
  713. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +107 -139
  714. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +137 -173
  715. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +40 -52
  716. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +46 -54
  717. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +184 -244
  718. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +99 -131
  719. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +57 -77
  720. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +100 -120
  721. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +71 -95
  722. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +75 -99
  723. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +37 -45
  724. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +54 -70
  725. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +71 -95
  726. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_inspect.html +487 -0
  727. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +49 -65
  728. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +286 -352
  729. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +64 -84
  730. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +44 -56
  731. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +219 -299
  732. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +116 -156
  733. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_thread.html +70 -93
  734. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +42 -54
  735. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +36 -48
  736. data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +48 -62
  737. data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +94 -118
  738. data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +39 -25
  739. data/vendor/local/share/doc/groonga/ja/html/reference/column.html +15 -23
  740. data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +15 -23
  741. data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +28 -32
  742. data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +15 -23
  743. data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +76 -84
  744. data/vendor/local/share/doc/groonga/ja/html/reference/command.html +76 -84
  745. data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +25 -33
  746. data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +51 -59
  747. data/vendor/local/share/doc/groonga/ja/html/reference/command/pretty_print.html +20 -28
  748. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +21 -29
  749. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_timeout.html +27 -35
  750. data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +101 -109
  751. data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +39 -45
  752. data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +84 -72
  753. data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +30 -36
  754. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_copy.html +104 -112
  755. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +271 -237
  756. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +100 -102
  757. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +37 -43
  758. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +41 -47
  759. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_delete.html +38 -46
  760. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_get.html +39 -47
  761. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_set.html +39 -47
  762. data/vendor/local/share/doc/groonga/ja/html/reference/commands/database_unmap.html +34 -42
  763. data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +70 -62
  764. data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +30 -36
  765. data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +42 -44
  766. data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +59 -68
  767. data/vendor/local/share/doc/groonga/ja/html/reference/commands/io_flush.html +300 -126
  768. data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +212 -80
  769. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_acquire.html +42 -50
  770. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +40 -46
  771. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_release.html +41 -49
  772. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +57 -63
  773. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +32 -37
  774. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +30 -37
  775. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +246 -178
  776. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_parameters.html +51 -59
  777. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +479 -175
  778. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_select.html +718 -326
  779. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_shard_list.html +34 -42
  780. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_table_remove.html +145 -153
  781. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +78 -85
  782. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +40 -47
  783. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_exist.html +36 -44
  784. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_inspect.html +360 -362
  785. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_list.html +221 -229
  786. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_remove.html +47 -55
  787. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +32 -40
  788. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +33 -41
  789. data/vendor/local/share/doc/groonga/ja/html/reference/commands/query_expand.html +20 -28
  790. data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +21 -29
  791. data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +20 -28
  792. data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +35 -43
  793. data/vendor/local/share/doc/groonga/ja/html/reference/commands/reindex.html +42 -48
  794. data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +57 -57
  795. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +36 -43
  796. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +36 -43
  797. data/vendor/local/share/doc/groonga/ja/html/reference/commands/schema.html +317 -325
  798. data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +1246 -917
  799. data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +50 -58
  800. data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +77 -85
  801. data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +109 -123
  802. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_copy.html +29 -37
  803. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +131 -140
  804. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -49
  805. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +87 -95
  806. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_rename.html +36 -44
  807. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +44 -52
  808. data/vendor/local/share/doc/groonga/ja/html/reference/commands/thread_limit.html +38 -46
  809. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +93 -100
  810. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +40 -47
  811. data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +39 -45
  812. data/vendor/local/share/doc/groonga/ja/html/reference/configuration.html +36 -44
  813. data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -27
  814. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +125 -107
  815. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +23 -29
  816. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +62 -62
  817. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +132 -140
  818. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +23 -31
  819. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +25 -33
  820. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +166 -174
  821. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +31 -39
  822. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +189 -165
  823. data/vendor/local/share/doc/groonga/ja/html/reference/function.html +59 -64
  824. data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +69 -77
  825. data/vendor/local/share/doc/groonga/ja/html/reference/functions/cast_loose.html +208 -0
  826. data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +48 -54
  827. data/vendor/local/share/doc/groonga/ja/html/reference/functions/fuzzy_search.html +37 -45
  828. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +115 -124
  829. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +66 -72
  830. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +55 -61
  831. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +69 -77
  832. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +60 -68
  833. data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +54 -62
  834. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_records.html +85 -93
  835. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +54 -62
  836. data/vendor/local/share/doc/groonga/ja/html/reference/functions/math_abs.html +54 -62
  837. data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +39 -47
  838. data/vendor/local/share/doc/groonga/ja/html/reference/functions/number_classify.html +35 -43
  839. data/vendor/local/share/doc/groonga/ja/html/reference/functions/prefix_rk_search.html +67 -75
  840. data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +130 -138
  841. data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +44 -51
  842. data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +61 -69
  843. data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_length.html +36 -44
  844. data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_substring.html +38 -46
  845. data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +63 -71
  846. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day.html +36 -44
  847. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day_of_week.html +276 -0
  848. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_hour.html +36 -44
  849. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_minute.html +35 -43
  850. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_month.html +35 -43
  851. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_second.html +35 -43
  852. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_week.html +35 -43
  853. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_year.html +36 -44
  854. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_find.html +353 -0
  855. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_new.html +39 -47
  856. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_size.html +52 -61
  857. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_slice.html +38 -46
  858. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +38 -46
  859. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +200 -208
  860. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +375 -382
  861. data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +39 -47
  862. data/vendor/local/share/doc/groonga/ja/html/reference/log.html +125 -144
  863. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +36 -70
  864. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_auto.html +168 -0
  865. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_nfkc100.html +887 -0
  866. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_nfkc51.html +160 -0
  867. data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +26 -34
  868. data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +38 -46
  869. data/vendor/local/share/doc/groonga/ja/html/reference/operations/prefix_rk_search.html +41 -49
  870. data/vendor/local/share/doc/groonga/ja/html/reference/output.html +42 -50
  871. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +20 -28
  872. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +68 -76
  873. data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +178 -184
  874. data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +38 -46
  875. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +38 -46
  876. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +39 -47
  877. data/vendor/local/share/doc/groonga/ja/html/reference/sharding.html +63 -73
  878. data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +43 -51
  879. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +130 -138
  880. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +72 -80
  881. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +68 -76
  882. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +76 -86
  883. data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +129 -137
  884. data/vendor/local/share/doc/groonga/ja/html/reference/token_filter/summary.html +145 -0
  885. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +31 -215
  886. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_nfkc100.html +617 -0
  887. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_stem.html +289 -0
  888. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_stop_word.html +284 -0
  889. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizer/summary.html +233 -0
  890. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +42 -1349
  891. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram.html +344 -0
  892. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank.html +219 -0
  893. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol.html +237 -0
  894. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html +267 -0
  895. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html +287 -0
  896. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol.html +179 -0
  897. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol_alpha.html +199 -0
  898. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol_alpha_digit.html +209 -0
  899. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_delimit.html +344 -0
  900. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_delimit_null.html +160 -0
  901. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_mecab.html +764 -0
  902. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_regexp.html +284 -0
  903. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_trigram.html +191 -0
  904. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_unigram.html +191 -0
  905. data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +65 -73
  906. data/vendor/local/share/doc/groonga/ja/html/reference/types.html +48 -56
  907. data/vendor/local/share/doc/groonga/ja/html/reference/window_function.html +29 -37
  908. data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/record_number.html +37 -45
  909. data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_count.html +37 -45
  910. data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_record_number.html +37 -45
  911. data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_sum.html +37 -45
  912. data/vendor/local/share/doc/groonga/ja/html/search.html +13 -24
  913. data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
  914. data/vendor/local/share/doc/groonga/ja/html/server.html +15 -23
  915. data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +22 -30
  916. data/vendor/local/share/doc/groonga/ja/html/server/http.html +17 -25
  917. data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +82 -90
  918. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +15 -23
  919. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +15 -23
  920. data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +16 -24
  921. data/vendor/local/share/doc/groonga/ja/html/server/package.html +99 -107
  922. data/vendor/local/share/doc/groonga/ja/html/spec.html +19 -27
  923. data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +201 -209
  924. data/vendor/local/share/doc/groonga/ja/html/spec/search.html +36 -36
  925. data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +15 -23
  926. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +44 -48
  927. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/how_to_analyze_error_message.html +21 -29
  928. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +24 -29
  929. data/vendor/local/share/doc/groonga/ja/html/tutorial.html +16 -24
  930. data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +32 -40
  931. data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +62 -70
  932. data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +22 -30
  933. data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +77 -86
  934. data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +15 -23
  935. data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +56 -64
  936. data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +84 -92
  937. data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +20 -28
  938. data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -26
  939. data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +21 -29
  940. data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +43 -51
  941. data/vendor/local/share/doc/pcre/AUTHORS +3 -3
  942. data/vendor/local/share/doc/pcre/ChangeLog +53 -0
  943. data/vendor/local/share/doc/pcre/LICENCE +3 -3
  944. data/vendor/local/share/doc/pcre/NEWS +6 -0
  945. data/vendor/local/share/doc/pcre/html/NON-AUTOTOOLS-BUILD.txt +8 -7
  946. data/vendor/local/share/groonga/mruby/LEGAL +35 -35
  947. data/vendor/local/share/license/cv2pdb/{README → README.MD} +28 -10
  948. data/vendor/local/share/license/groonga-normalizer-mysql/README.md +14 -22
  949. data/vendor/local/share/license/lz4/LICENSE +2 -2
  950. data/vendor/local/share/license/mruby/AUTHORS +3 -0
  951. data/vendor/local/share/license/mruby/MITL +1 -1
  952. data/vendor/local/share/license/mruby/README.md +1 -1
  953. data/vendor/local/share/license/msgpack/README.md +5 -34
  954. data/vendor/local/share/license/pcre/LICENCE +3 -3
  955. data/vendor/local/share/man/man1/lz4.1 +221 -86
  956. data/vendor/local/share/man/man1/lz4c.1 +222 -32
  957. data/vendor/local/share/man/man1/lz4cat.1 +221 -30
  958. data/vendor/local/share/man/man1/unlz4.1 +223 -0
  959. metadata +231 -87
  960. data/lib/2.1/groonga.so +0 -0
  961. data/vendor/local/lib/groonga/plugins/expression_rewriters/optimizer.rb +0 -147
  962. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/options.rb +0 -14
  963. data/vendor/local/share/doc/groonga/en/html/_static/ajax-loader.gif +0 -0
  964. data/vendor/local/share/doc/groonga/en/html/_static/comment-bright.png +0 -0
  965. data/vendor/local/share/doc/groonga/en/html/_static/comment-close.png +0 -0
  966. data/vendor/local/share/doc/groonga/en/html/_static/comment.png +0 -0
  967. data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
  968. data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
  969. data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
  970. data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
  971. data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +0 -808
  972. data/vendor/local/share/doc/groonga/ja/html/_static/ajax-loader.gif +0 -0
  973. data/vendor/local/share/doc/groonga/ja/html/_static/comment-bright.png +0 -0
  974. data/vendor/local/share/doc/groonga/ja/html/_static/comment-close.png +0 -0
  975. data/vendor/local/share/doc/groonga/ja/html/_static/comment.png +0 -0
  976. data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
  977. data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
  978. data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
  979. data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
  980. data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +0 -808
@@ -0,0 +1,259 @@
1
+
2
+
3
+ <!DOCTYPE html>
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
6
+ <head>
7
+ <meta charset="utf-8" />
8
+ <title>7.8.1. Summary &#8212; Groonga v9.0.2 documentation</title>
9
+ <link rel="stylesheet" href="../../_static/groonga.css" type="text/css" />
10
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
11
+
12
+ <script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
13
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
14
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
15
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
16
+ <script type="text/javascript" src="../../_static/language_data.js"></script>
17
+
18
+ <link rel="shortcut icon" href="../../_static/favicon.ico"/>
19
+ <link rel="index" title="Index" href="../../genindex.html" />
20
+ <link rel="search" title="Search" href="../../search.html" />
21
+ <link rel="next" title="7.8.2. TokenBigram" href="../tokenizers/token_bigram.html" />
22
+ <link rel="prev" title="7.8. Tokenizers" href="../tokenizers.html" />
23
+ </head><body>
24
+ <div class="header">
25
+ <h1 class="title">
26
+ <a id="top-link" href="../../index.html">
27
+ <span class="project">groonga</span>
28
+ <span class="separator">-</span>
29
+ <span class="description">An open-source fulltext search engine and column store.</span>
30
+ </a>
31
+ </h1>
32
+
33
+ <div class="other-language-links">
34
+ <ul>
35
+ <li><a href="../../../../ja/html/reference/tokenizer/summary.html">日本語</a></li>
36
+ </ul>
37
+ </div>
38
+ </div>
39
+
40
+
41
+ <div class="related" role="navigation" aria-label="related navigation">
42
+ <h3>Navigation</h3>
43
+ <ul>
44
+ <li class="right" style="margin-right: 10px">
45
+ <a href="../../genindex.html" title="General Index"
46
+ accesskey="I">index</a></li>
47
+ <li class="right" >
48
+ <a href="../tokenizers/token_bigram.html" title="7.8.2. TokenBigram"
49
+ accesskey="N">next</a> |</li>
50
+ <li class="right" >
51
+ <a href="../tokenizers.html" title="7.8. Tokenizers"
52
+ accesskey="P">previous</a> |</li>
53
+ <li class="nav-item nav-item-0"><a href="../../index.html">Groonga v9.0.2 documentation</a> &#187;</li>
54
+ <li class="nav-item nav-item-1"><a href="../../reference.html" >7. Reference manual</a> &#187;</li>
55
+ <li class="nav-item nav-item-2"><a href="../tokenizers.html" accesskey="U">7.8. Tokenizers</a> &#187;</li>
56
+ </ul>
57
+ </div>
58
+
59
+ <div class="document">
60
+ <div class="documentwrapper">
61
+ <div class="bodywrapper">
62
+ <div class="body" role="main">
63
+
64
+ <div class="section" id="summary">
65
+ <h1>7.8.1. Summary<a class="headerlink" href="#summary" title="Permalink to this headline">¶</a></h1>
66
+ <p>Groonga has tokenizer module that tokenizes text. It is used when
67
+ the following cases:</p>
68
+ <blockquote>
69
+ <div><ul>
70
+ <li><p>Indexing text</p>
71
+ <div class="figure align-center" id="id1">
72
+ <a class="reference internal image-reference" href="../../_images/used-when-indexing.png"><img alt="../../_images/used-when-indexing.png" src="../../_images/used-when-indexing.png" style="width: 80%;" /></a>
73
+ <p class="caption"><span class="caption-text">Tokenizer is used when indexing text.</span><a class="headerlink" href="#id1" title="Permalink to this image">¶</a></p>
74
+ </div>
75
+ </li>
76
+ <li><p>Searching by query</p>
77
+ <div class="figure align-center" id="id2">
78
+ <a class="reference internal image-reference" href="../../_images/used-when-searching.png"><img alt="../../_images/used-when-searching.png" src="../../_images/used-when-searching.png" style="width: 80%;" /></a>
79
+ <p class="caption"><span class="caption-text">Tokenizer is used when searching by query.</span><a class="headerlink" href="#id2" title="Permalink to this image">¶</a></p>
80
+ </div>
81
+ </li>
82
+ </ul>
83
+ </div></blockquote>
84
+ <p>Tokenizer is an important module for full-text search. You can change
85
+ trade-off between <a class="reference external" href="http://en.wikipedia.org/wiki/Precision_and_recall">precision and recall</a> by changing
86
+ tokenizer.</p>
87
+ <p>Normally, <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> is a suitable tokenizer. If you don’t
88
+ know much about tokenizer, it’s recommended that you choose
89
+ <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a>.</p>
90
+ <p>You can try a tokenizer by <a class="reference internal" href="../commands/tokenize.html"><span class="doc">tokenize</span></a> and
91
+ <a class="reference internal" href="../commands/table_tokenize.html"><span class="doc">table_tokenize</span></a>. Here is an example to
92
+ try <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> tokenizer by
93
+ <a class="reference internal" href="../commands/tokenize.html"><span class="doc">tokenize</span></a>:</p>
94
+ <p>Execution example:</p>
95
+ <div class="highlight-none notranslate"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello World&quot;
96
+ # [
97
+ # [
98
+ # 0,
99
+ # 1337566253.89858,
100
+ # 0.000355720520019531
101
+ # ],
102
+ # [
103
+ # {
104
+ # &quot;position&quot;: 0,
105
+ # &quot;force_prefix&quot;: false,
106
+ # &quot;value&quot;: &quot;He&quot;
107
+ # },
108
+ # {
109
+ # &quot;position&quot;: 1,
110
+ # &quot;force_prefix&quot;: false,
111
+ # &quot;value&quot;: &quot;el&quot;
112
+ # },
113
+ # {
114
+ # &quot;position&quot;: 2,
115
+ # &quot;force_prefix&quot;: false,
116
+ # &quot;value&quot;: &quot;ll&quot;
117
+ # },
118
+ # {
119
+ # &quot;position&quot;: 3,
120
+ # &quot;force_prefix&quot;: false,
121
+ # &quot;value&quot;: &quot;lo&quot;
122
+ # },
123
+ # {
124
+ # &quot;position&quot;: 4,
125
+ # &quot;force_prefix&quot;: false,
126
+ # &quot;value&quot;: &quot;o &quot;
127
+ # },
128
+ # {
129
+ # &quot;position&quot;: 5,
130
+ # &quot;force_prefix&quot;: false,
131
+ # &quot;value&quot;: &quot; W&quot;
132
+ # },
133
+ # {
134
+ # &quot;position&quot;: 6,
135
+ # &quot;force_prefix&quot;: false,
136
+ # &quot;value&quot;: &quot;Wo&quot;
137
+ # },
138
+ # {
139
+ # &quot;position&quot;: 7,
140
+ # &quot;force_prefix&quot;: false,
141
+ # &quot;value&quot;: &quot;or&quot;
142
+ # },
143
+ # {
144
+ # &quot;position&quot;: 8,
145
+ # &quot;force_prefix&quot;: false,
146
+ # &quot;value&quot;: &quot;rl&quot;
147
+ # },
148
+ # {
149
+ # &quot;position&quot;: 9,
150
+ # &quot;force_prefix&quot;: false,
151
+ # &quot;value&quot;: &quot;ld&quot;
152
+ # },
153
+ # {
154
+ # &quot;position&quot;: 10,
155
+ # &quot;force_prefix&quot;: false,
156
+ # &quot;value&quot;: &quot;d&quot;
157
+ # }
158
+ # ]
159
+ # ]
160
+ </pre></div>
161
+ </div>
162
+ <p>“tokenize” is the process that extracts zero or more tokens from a
163
+ text. There are some “tokenize” methods.</p>
164
+ <p>For example, <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to the following tokens by
165
+ bigram tokenize method:</p>
166
+ <blockquote>
167
+ <div><ul class="simple">
168
+ <li><p><code class="docutils literal notranslate"><span class="pre">He</span></code></p></li>
169
+ <li><p><code class="docutils literal notranslate"><span class="pre">el</span></code></p></li>
170
+ <li><p><code class="docutils literal notranslate"><span class="pre">ll</span></code></p></li>
171
+ <li><p><code class="docutils literal notranslate"><span class="pre">lo</span></code></p></li>
172
+ <li><p><code class="docutils literal notranslate"><span class="pre">o_</span></code> (<code class="docutils literal notranslate"><span class="pre">_</span></code> means a white-space)</p></li>
173
+ <li><p><code class="docutils literal notranslate"><span class="pre">_W</span></code> (<code class="docutils literal notranslate"><span class="pre">_</span></code> means a white-space)</p></li>
174
+ <li><p><code class="docutils literal notranslate"><span class="pre">Wo</span></code></p></li>
175
+ <li><p><code class="docutils literal notranslate"><span class="pre">or</span></code></p></li>
176
+ <li><p><code class="docutils literal notranslate"><span class="pre">rl</span></code></p></li>
177
+ <li><p><code class="docutils literal notranslate"><span class="pre">ld</span></code></p></li>
178
+ </ul>
179
+ </div></blockquote>
180
+ <p>In the above example, 10 tokens are extracted from one text <code class="docutils literal notranslate"><span class="pre">Hello</span>
181
+ <span class="pre">World</span></code>.</p>
182
+ <p>For example, <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to the following tokens by
183
+ white-space-separate tokenize method:</p>
184
+ <blockquote>
185
+ <div><ul class="simple">
186
+ <li><p><code class="docutils literal notranslate"><span class="pre">Hello</span></code></p></li>
187
+ <li><p><code class="docutils literal notranslate"><span class="pre">World</span></code></p></li>
188
+ </ul>
189
+ </div></blockquote>
190
+ <p>In the above example, 2 tokens are extracted from one text <code class="docutils literal notranslate"><span class="pre">Hello</span>
191
+ <span class="pre">World</span></code>.</p>
192
+ <p>Token is used as search key. You can find indexed documents only by
193
+ tokens that are extracted by used tokenize method. For example, you
194
+ can find <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> by <code class="docutils literal notranslate"><span class="pre">ll</span></code> with bigram tokenize method but you
195
+ can’t find <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> by <code class="docutils literal notranslate"><span class="pre">ll</span></code> with white-space-separate tokenize
196
+ method. Because white-space-separate tokenize method doesn’t extract
197
+ <code class="docutils literal notranslate"><span class="pre">ll</span></code> token. It just extracts <code class="docutils literal notranslate"><span class="pre">Hello</span></code> and <code class="docutils literal notranslate"><span class="pre">World</span></code> tokens.</p>
198
+ <p>In general, tokenize method that generates small tokens increases
199
+ recall but decreases precision. Tokenize method that generates large
200
+ tokens increases precision but decreases recall.</p>
201
+ <p>For example, we can find <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> and <code class="docutils literal notranslate"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> by <code class="docutils literal notranslate"><span class="pre">or</span></code> with
202
+ bigram tokenize method. <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> is a noise for people who
203
+ wants to search “logical and”. It means that precision is
204
+ decreased. But recall is increased.</p>
205
+ <p>We can find only <code class="docutils literal notranslate"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> by <code class="docutils literal notranslate"><span class="pre">or</span></code> with white-space-separate
206
+ tokenize method. Because <code class="docutils literal notranslate"><span class="pre">World</span></code> is tokenized to one token <code class="docutils literal notranslate"><span class="pre">World</span></code>
207
+ with white-space-separate tokenize method. It means that precision is
208
+ increased for people who wants to search “logical and”. But recall is
209
+ decreased because <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> that contains <code class="docutils literal notranslate"><span class="pre">or</span></code> isn’t found.</p>
210
+ </div>
211
+
212
+
213
+ </div>
214
+ </div>
215
+ </div>
216
+ <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
217
+ <div class="sphinxsidebarwrapper">
218
+ <h4>Previous topic</h4>
219
+ <p class="topless"><a href="../tokenizers.html"
220
+ title="previous chapter">7.8. Tokenizers</a></p>
221
+ <h4>Next topic</h4>
222
+ <p class="topless"><a href="../tokenizers/token_bigram.html"
223
+ title="next chapter">7.8.2. <code class="docutils literal notranslate"><span class="pre">TokenBigram</span></code></a></p>
224
+ <div id="searchbox" style="display: none" role="search">
225
+ <h3>Quick search</h3>
226
+ <div class="searchformwrapper">
227
+ <form class="search" action="../../search.html" method="get">
228
+ <input type="text" name="q" />
229
+ <input type="submit" value="Go" />
230
+ </form>
231
+ </div>
232
+ </div>
233
+ <script type="text/javascript">$('#searchbox').show(0);</script>
234
+ </div>
235
+ </div>
236
+ <div class="clearer"></div>
237
+ </div>
238
+ <div class="related" role="navigation" aria-label="related navigation">
239
+ <h3>Navigation</h3>
240
+ <ul>
241
+ <li class="right" style="margin-right: 10px">
242
+ <a href="../../genindex.html" title="General Index"
243
+ >index</a></li>
244
+ <li class="right" >
245
+ <a href="../tokenizers/token_bigram.html" title="7.8.2. TokenBigram"
246
+ >next</a> |</li>
247
+ <li class="right" >
248
+ <a href="../tokenizers.html" title="7.8. Tokenizers"
249
+ >previous</a> |</li>
250
+ <li class="nav-item nav-item-0"><a href="../../index.html">Groonga v9.0.2 documentation</a> &#187;</li>
251
+ <li class="nav-item nav-item-1"><a href="../../reference.html" >7. Reference manual</a> &#187;</li>
252
+ <li class="nav-item nav-item-2"><a href="../tokenizers.html" >7.8. Tokenizers</a> &#187;</li>
253
+ </ul>
254
+ </div>
255
+ <div class="footer" role="contentinfo">
256
+ &#169; Copyright 2009-2019, Brazil, Inc.
257
+ </div>
258
+ </body>
259
+ </html>
@@ -1,34 +1,26 @@
1
1
 
2
2
 
3
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3
+ <!DOCTYPE html>
5
4
 
6
5
  <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
7
6
  <head>
8
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
9
- <title>7.8. Tokenizers &#8212; Groonga v7.1.0-73-g6d02cfa documentation</title>
7
+ <meta charset="utf-8" />
8
+ <title>7.8. Tokenizers &#8212; Groonga v9.0.2 documentation</title>
10
9
  <link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
11
10
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
12
- <script type="text/javascript">
13
- var DOCUMENTATION_OPTIONS = {
14
- URL_ROOT: '../',
15
- VERSION: '7.1.0-73-g6d02cfa',
16
- COLLAPSE_INDEX: false,
17
- FILE_SUFFIX: '.html',
18
- HAS_SOURCE: false,
19
- SOURCELINK_SUFFIX: '.txt'
20
- };
21
- </script>
11
+
12
+ <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
22
13
  <script type="text/javascript" src="../_static/jquery.js"></script>
23
14
  <script type="text/javascript" src="../_static/underscore.js"></script>
24
15
  <script type="text/javascript" src="../_static/doctools.js"></script>
16
+ <script type="text/javascript" src="../_static/language_data.js"></script>
17
+
25
18
  <link rel="shortcut icon" href="../_static/favicon.ico"/>
26
19
  <link rel="index" title="Index" href="../genindex.html" />
27
20
  <link rel="search" title="Search" href="../search.html" />
28
- <link rel="next" title="7.9. Token filters" href="token_filters.html" />
29
- <link rel="prev" title="7.7. Normalizers" href="normalizers.html" />
30
- </head>
31
- <body>
21
+ <link rel="next" title="7.8.1. Summary" href="tokenizer/summary.html" />
22
+ <link rel="prev" title="7.7.2.3. NormalizerNFKC51" href="normalizers/normalizer_nfkc51.html" />
23
+ </head><body>
32
24
  <div class="header">
33
25
  <h1 class="title">
34
26
  <a id="top-link" href="../index.html">
@@ -53,12 +45,12 @@
53
45
  <a href="../genindex.html" title="General Index"
54
46
  accesskey="I">index</a></li>
55
47
  <li class="right" >
56
- <a href="token_filters.html" title="7.9. Token filters"
48
+ <a href="tokenizer/summary.html" title="7.8.1. Summary"
57
49
  accesskey="N">next</a> |</li>
58
50
  <li class="right" >
59
- <a href="normalizers.html" title="7.7. Normalizers"
51
+ <a href="normalizers/normalizer_nfkc51.html" title="7.7.2.3. NormalizerNFKC51"
60
52
  accesskey="P">previous</a> |</li>
61
- <li class="nav-item nav-item-0"><a href="../index.html">Groonga v7.1.0-73-g6d02cfa documentation</a> &#187;</li>
53
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v9.0.2 documentation</a> &#187;</li>
62
54
  <li class="nav-item nav-item-1"><a href="../reference.html" accesskey="U">7. Reference manual</a> &#187;</li>
63
55
  </ul>
64
56
  </div>
@@ -70,1403 +62,24 @@
70
62
 
71
63
  <div class="section" id="tokenizers">
72
64
  <h1>7.8. Tokenizers<a class="headerlink" href="#tokenizers" title="Permalink to this headline">¶</a></h1>
73
- <div class="section" id="summary">
74
- <h2>7.8.1. Summary<a class="headerlink" href="#summary" title="Permalink to this headline">¶</a></h2>
75
- <p>Groonga has tokenizer module that tokenizes text. It is used when
76
- the following cases:</p>
77
- <blockquote>
78
- <div><ul>
79
- <li><p class="first">Indexing text</p>
80
- <div class="figure align-center" id="id1">
81
- <a class="reference internal image-reference" href="../_images/used-when-indexing.png"><img alt="../_images/used-when-indexing.png" src="../_images/used-when-indexing.png" style="width: 80%;" /></a>
82
- <p class="caption"><span class="caption-text">Tokenizer is used when indexing text.</span></p>
83
- </div>
84
- </li>
85
- <li><p class="first">Searching by query</p>
86
- <div class="figure align-center" id="id2">
87
- <a class="reference internal image-reference" href="../_images/used-when-searching.png"><img alt="../_images/used-when-searching.png" src="../_images/used-when-searching.png" style="width: 80%;" /></a>
88
- <p class="caption"><span class="caption-text">Tokenizer is used when searching by query.</span></p>
89
- </div>
90
- </li>
91
- </ul>
92
- </div></blockquote>
93
- <p>Tokenizer is an important module for full-text search. You can change
94
- trade-off between <a class="reference external" href="http://en.wikipedia.org/wiki/Precision_and_recall">precision and recall</a> by changing
95
- tokenizer.</p>
96
- <p>Normally, <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> is a suitable tokenizer. If you don't
97
- know much about tokenizer, it's recommended that you choose
98
- <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>.</p>
99
- <p>You can try a tokenizer by <a class="reference internal" href="commands/tokenize.html"><span class="doc">tokenize</span></a> and
100
- <a class="reference internal" href="commands/table_tokenize.html"><span class="doc">table_tokenize</span></a>. Here is an example to
101
- try <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> tokenizer by
102
- <a class="reference internal" href="commands/tokenize.html"><span class="doc">tokenize</span></a>:</p>
103
- <p>Execution example:</p>
104
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello World&quot;
105
- # [
106
- # [
107
- # 0,
108
- # 1337566253.89858,
109
- # 0.000355720520019531
110
- # ],
111
- # [
112
- # {
113
- # &quot;position&quot;: 0,
114
- # &quot;force_prefix&quot;: false,
115
- # &quot;value&quot;: &quot;He&quot;
116
- # },
117
- # {
118
- # &quot;position&quot;: 1,
119
- # &quot;force_prefix&quot;: false,
120
- # &quot;value&quot;: &quot;el&quot;
121
- # },
122
- # {
123
- # &quot;position&quot;: 2,
124
- # &quot;force_prefix&quot;: false,
125
- # &quot;value&quot;: &quot;ll&quot;
126
- # },
127
- # {
128
- # &quot;position&quot;: 3,
129
- # &quot;force_prefix&quot;: false,
130
- # &quot;value&quot;: &quot;lo&quot;
131
- # },
132
- # {
133
- # &quot;position&quot;: 4,
134
- # &quot;force_prefix&quot;: false,
135
- # &quot;value&quot;: &quot;o &quot;
136
- # },
137
- # {
138
- # &quot;position&quot;: 5,
139
- # &quot;force_prefix&quot;: false,
140
- # &quot;value&quot;: &quot; W&quot;
141
- # },
142
- # {
143
- # &quot;position&quot;: 6,
144
- # &quot;force_prefix&quot;: false,
145
- # &quot;value&quot;: &quot;Wo&quot;
146
- # },
147
- # {
148
- # &quot;position&quot;: 7,
149
- # &quot;force_prefix&quot;: false,
150
- # &quot;value&quot;: &quot;or&quot;
151
- # },
152
- # {
153
- # &quot;position&quot;: 8,
154
- # &quot;force_prefix&quot;: false,
155
- # &quot;value&quot;: &quot;rl&quot;
156
- # },
157
- # {
158
- # &quot;position&quot;: 9,
159
- # &quot;force_prefix&quot;: false,
160
- # &quot;value&quot;: &quot;ld&quot;
161
- # },
162
- # {
163
- # &quot;position&quot;: 10,
164
- # &quot;force_prefix&quot;: false,
165
- # &quot;value&quot;: &quot;d&quot;
166
- # }
167
- # ]
168
- # ]
169
- </pre></div>
170
- </div>
171
- </div>
172
- <div class="section" id="what-is-tokenize">
173
- <h2>7.8.2. What is &quot;tokenize&quot;?<a class="headerlink" href="#what-is-tokenize" title="Permalink to this headline">¶</a></h2>
174
- <p>&quot;tokenize&quot; is the process that extracts zero or more tokens from a
175
- text. There are some &quot;tokenize&quot; methods.</p>
176
- <p>For example, <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to the following tokens by
177
- bigram tokenize method:</p>
178
- <blockquote>
179
- <div><ul class="simple">
180
- <li><code class="docutils literal"><span class="pre">He</span></code></li>
181
- <li><code class="docutils literal"><span class="pre">el</span></code></li>
182
- <li><code class="docutils literal"><span class="pre">ll</span></code></li>
183
- <li><code class="docutils literal"><span class="pre">lo</span></code></li>
184
- <li><code class="docutils literal"><span class="pre">o_</span></code> (<code class="docutils literal"><span class="pre">_</span></code> means a white-space)</li>
185
- <li><code class="docutils literal"><span class="pre">_W</span></code> (<code class="docutils literal"><span class="pre">_</span></code> means a white-space)</li>
186
- <li><code class="docutils literal"><span class="pre">Wo</span></code></li>
187
- <li><code class="docutils literal"><span class="pre">or</span></code></li>
188
- <li><code class="docutils literal"><span class="pre">rl</span></code></li>
189
- <li><code class="docutils literal"><span class="pre">ld</span></code></li>
190
- </ul>
191
- </div></blockquote>
192
- <p>In the above example, 10 tokens are extracted from one text <code class="docutils literal"><span class="pre">Hello</span>
193
- <span class="pre">World</span></code>.</p>
194
- <p>For example, <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to the following tokens by
195
- white-space-separate tokenize method:</p>
196
- <blockquote>
197
- <div><ul class="simple">
198
- <li><code class="docutils literal"><span class="pre">Hello</span></code></li>
199
- <li><code class="docutils literal"><span class="pre">World</span></code></li>
200
- </ul>
201
- </div></blockquote>
202
- <p>In the above example, 2 tokens are extracted from one text <code class="docutils literal"><span class="pre">Hello</span>
203
- <span class="pre">World</span></code>.</p>
204
- <p>Token is used as search key. You can find indexed documents only by
205
- tokens that are extracted by used tokenize method. For example, you
206
- can find <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> by <code class="docutils literal"><span class="pre">ll</span></code> with bigram tokenize method but you
207
- can't find <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> by <code class="docutils literal"><span class="pre">ll</span></code> with white-space-separate tokenize
208
- method. Because white-space-separate tokenize method doesn't extract
209
- <code class="docutils literal"><span class="pre">ll</span></code> token. It just extracts <code class="docutils literal"><span class="pre">Hello</span></code> and <code class="docutils literal"><span class="pre">World</span></code> tokens.</p>
210
- <p>In general, tokenize method that generates small tokens increases
211
- recall but decreases precision. Tokenize method that generates large
212
- tokens increases precision but decreases recall.</p>
213
- <p>For example, we can find <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> and <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> by <code class="docutils literal"><span class="pre">or</span></code> with
214
- bigram tokenize method. <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is a noise for people who
215
- wants to search &quot;logical and&quot;. It means that precision is
216
- decreased. But recall is increased.</p>
217
- <p>We can find only <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> by <code class="docutils literal"><span class="pre">or</span></code> with white-space-separate
218
- tokenize method. Because <code class="docutils literal"><span class="pre">World</span></code> is tokenized to one token <code class="docutils literal"><span class="pre">World</span></code>
219
- with white-space-separate tokenize method. It means that precision is
220
- increased for people who wants to search &quot;logical and&quot;. But recall is
221
- decreased because <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> that contains <code class="docutils literal"><span class="pre">or</span></code> isn't found.</p>
222
- </div>
223
- <div class="section" id="built-in-tokenizsers">
224
- <h2>7.8.3. Built-in tokenizsers<a class="headerlink" href="#built-in-tokenizsers" title="Permalink to this headline">¶</a></h2>
225
- <p>Here is a list of built-in tokenizers:</p>
226
- <blockquote>
227
- <div><ul class="simple">
228
- <li><code class="docutils literal"><span class="pre">TokenBigram</span></code></li>
229
- <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></li>
230
- <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></li>
231
- <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></li>
232
- <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></li>
233
- <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></li>
234
- <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></li>
235
- <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></li>
236
- <li><code class="docutils literal"><span class="pre">TokenUnigram</span></code></li>
237
- <li><code class="docutils literal"><span class="pre">TokenTrigram</span></code></li>
238
- <li><code class="docutils literal"><span class="pre">TokenDelimit</span></code></li>
239
- <li><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></li>
240
- <li><code class="docutils literal"><span class="pre">TokenMecab</span></code></li>
241
- <li><code class="docutils literal"><span class="pre">TokenRegexp</span></code></li>
242
- </ul>
243
- </div></blockquote>
244
- <div class="section" id="tokenbigram">
245
- <span id="token-bigram"></span><h3>7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code><a class="headerlink" href="#tokenbigram" title="Permalink to this headline">¶</a></h3>
246
- <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> is a bigram based tokenizer. It's recommended to use
247
- this tokenizer for most cases.</p>
248
- <p>Bigram tokenize method tokenizes a text to two adjacent characters
249
- tokens. For example, <code class="docutils literal"><span class="pre">Hello</span></code> is tokenized to the following tokens:</p>
250
- <blockquote>
251
- <div><ul class="simple">
252
- <li><code class="docutils literal"><span class="pre">He</span></code></li>
253
- <li><code class="docutils literal"><span class="pre">el</span></code></li>
254
- <li><code class="docutils literal"><span class="pre">ll</span></code></li>
255
- <li><code class="docutils literal"><span class="pre">lo</span></code></li>
256
- </ul>
257
- </div></blockquote>
258
- <p>Bigram tokenize method is good for recall because you can find all
259
- texts by query consists of two or more characters.</p>
260
- <p>In general, you can't find all texts by query consists of one
261
- character because one character token doesn't exist. But you can find
262
- all texts by query consists of one character in Groonga. Because
263
- Groonga find tokens that start with query by predictive search. For
264
- example, Groonga can find <code class="docutils literal"><span class="pre">ll</span></code> and <code class="docutils literal"><span class="pre">lo</span></code> tokens by <code class="docutils literal"><span class="pre">l</span></code> query.</p>
265
- <p>Bigram tokenize method isn't good for precision because you can find
266
- texts that includes query in word. For example, you can find <code class="docutils literal"><span class="pre">world</span></code>
267
- by <code class="docutils literal"><span class="pre">or</span></code>. This is more sensitive for ASCII only languages rather than
268
- non-ASCII languages. <code class="docutils literal"><span class="pre">TokenBigram</span></code> has solution for this problem
269
- described in the below.</p>
270
- <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> behavior is different when it's worked with any
271
- <a class="reference internal" href="normalizers.html"><span class="doc">Normalizers</span></a>.</p>
272
- <p>If no normalizer is used, <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses pure bigram (all tokens
273
- except the last token have two characters) tokenize method:</p>
274
- <p>Execution example:</p>
275
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello World&quot;
276
- # [
277
- # [
278
- # 0,
279
- # 1337566253.89858,
280
- # 0.000355720520019531
281
- # ],
282
- # [
283
- # {
284
- # &quot;position&quot;: 0,
285
- # &quot;force_prefix&quot;: false,
286
- # &quot;value&quot;: &quot;He&quot;
287
- # },
288
- # {
289
- # &quot;position&quot;: 1,
290
- # &quot;force_prefix&quot;: false,
291
- # &quot;value&quot;: &quot;el&quot;
292
- # },
293
- # {
294
- # &quot;position&quot;: 2,
295
- # &quot;force_prefix&quot;: false,
296
- # &quot;value&quot;: &quot;ll&quot;
297
- # },
298
- # {
299
- # &quot;position&quot;: 3,
300
- # &quot;force_prefix&quot;: false,
301
- # &quot;value&quot;: &quot;lo&quot;
302
- # },
303
- # {
304
- # &quot;position&quot;: 4,
305
- # &quot;force_prefix&quot;: false,
306
- # &quot;value&quot;: &quot;o &quot;
307
- # },
308
- # {
309
- # &quot;position&quot;: 5,
310
- # &quot;force_prefix&quot;: false,
311
- # &quot;value&quot;: &quot; W&quot;
312
- # },
313
- # {
314
- # &quot;position&quot;: 6,
315
- # &quot;force_prefix&quot;: false,
316
- # &quot;value&quot;: &quot;Wo&quot;
317
- # },
318
- # {
319
- # &quot;position&quot;: 7,
320
- # &quot;force_prefix&quot;: false,
321
- # &quot;value&quot;: &quot;or&quot;
322
- # },
323
- # {
324
- # &quot;position&quot;: 8,
325
- # &quot;force_prefix&quot;: false,
326
- # &quot;value&quot;: &quot;rl&quot;
327
- # },
328
- # {
329
- # &quot;position&quot;: 9,
330
- # &quot;force_prefix&quot;: false,
331
- # &quot;value&quot;: &quot;ld&quot;
332
- # },
333
- # {
334
- # &quot;position&quot;: 10,
335
- # &quot;force_prefix&quot;: false,
336
- # &quot;value&quot;: &quot;d&quot;
337
- # }
338
- # ]
339
- # ]
340
- </pre></div>
341
- </div>
342
- <p>If normalizer is used, <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses white-space-separate like
343
- tokenize method for ASCII characters. <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses bigram
344
- tokenize method for non-ASCII characters.</p>
345
- <p>You may be confused with this combined behavior. But it's reasonable
346
- for most use cases such as English text (only ASCII characters) and
347
- Japanese text (ASCII and non-ASCII characters are mixed).</p>
348
- <p>Most languages consists of only ASCII characters use white-space for
349
- word separator. White-space-separate tokenize method is suitable for
350
- the case.</p>
351
- <p>Languages consists of non-ASCII characters don't use white-space for
352
- word separator. Bigram tokenize method is suitable for the case.</p>
353
- <p>Mixed tokenize method is suitable for mixed language case.</p>
354
- <p>If you want to use bigram tokenize method for ASCII character, see
355
- <code class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></code> type tokenizers such as
356
- <a class="reference internal" href="#token-bigram-split-symbol-alpha"><span class="std std-ref">TokenBigramSplitSymbolAlpha</span></a>.</p>
357
- <p>Let's confirm <code class="docutils literal"><span class="pre">TokenBigram</span></code> behavior by example.</p>
358
- <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> uses one or more white-spaces as token delimiter for
359
- ASCII characters:</p>
360
- <p>Execution example:</p>
361
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello World&quot; NormalizerAuto
362
- # [
363
- # [
364
- # 0,
365
- # 1337566253.89858,
366
- # 0.000355720520019531
367
- # ],
368
- # [
369
- # {
370
- # &quot;position&quot;: 0,
371
- # &quot;force_prefix&quot;: false,
372
- # &quot;value&quot;: &quot;hello&quot;
373
- # },
374
- # {
375
- # &quot;position&quot;: 1,
376
- # &quot;force_prefix&quot;: false,
377
- # &quot;value&quot;: &quot;world&quot;
378
- # }
379
- # ]
380
- # ]
381
- </pre></div>
382
- </div>
383
- <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> uses character type change as token delimiter for
384
- ASCII characters. Character type is one of them:</p>
385
- <blockquote>
386
- <div><ul class="simple">
387
- <li>Alphabet</li>
388
- <li>Digit</li>
389
- <li>Symbol (such as <code class="docutils literal"><span class="pre">(</span></code>, <code class="docutils literal"><span class="pre">)</span></code> and <code class="docutils literal"><span class="pre">!</span></code>)</li>
390
- <li>Hiragana</li>
391
- <li>Katakana</li>
392
- <li>Kanji</li>
393
- <li>Others</li>
394
- </ul>
395
- </div></blockquote>
396
- <p>The following example shows two token delimiters:</p>
397
- <blockquote>
398
- <div><ul class="simple">
399
- <li>at between <code class="docutils literal"><span class="pre">100</span></code> (digits) and <code class="docutils literal"><span class="pre">cents</span></code> (alphabets)</li>
400
- <li>at between <code class="docutils literal"><span class="pre">cents</span></code> (alphabets) and <code class="docutils literal"><span class="pre">!!!</span></code> (symbols)</li>
401
- </ul>
402
- </div></blockquote>
403
- <p>Execution example:</p>
404
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;100cents!!!&quot; NormalizerAuto
405
- # [
406
- # [
407
- # 0,
408
- # 1337566253.89858,
409
- # 0.000355720520019531
410
- # ],
411
- # [
412
- # {
413
- # &quot;position&quot;: 0,
414
- # &quot;force_prefix&quot;: false,
415
- # &quot;value&quot;: &quot;100&quot;
416
- # },
417
- # {
418
- # &quot;position&quot;: 1,
419
- # &quot;force_prefix&quot;: false,
420
- # &quot;value&quot;: &quot;cents&quot;
421
- # },
422
- # {
423
- # &quot;position&quot;: 2,
424
- # &quot;force_prefix&quot;: false,
425
- # &quot;value&quot;: &quot;!!!&quot;
426
- # }
427
- # ]
428
- # ]
429
- </pre></div>
430
- </div>
431
- <p>Here is an example that <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses bigram tokenize method
432
- for non-ASCII characters.</p>
433
- <p>Execution example:</p>
434
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;日本語の勉強&quot; NormalizerAuto
435
- # [
436
- # [
437
- # 0,
438
- # 1337566253.89858,
439
- # 0.000355720520019531
440
- # ],
441
- # [
442
- # {
443
- # &quot;position&quot;: 0,
444
- # &quot;force_prefix&quot;: false,
445
- # &quot;value&quot;: &quot;日本&quot;
446
- # },
447
- # {
448
- # &quot;position&quot;: 1,
449
- # &quot;force_prefix&quot;: false,
450
- # &quot;value&quot;: &quot;本語&quot;
451
- # },
452
- # {
453
- # &quot;position&quot;: 2,
454
- # &quot;force_prefix&quot;: false,
455
- # &quot;value&quot;: &quot;語の&quot;
456
- # },
457
- # {
458
- # &quot;position&quot;: 3,
459
- # &quot;force_prefix&quot;: false,
460
- # &quot;value&quot;: &quot;の勉&quot;
461
- # },
462
- # {
463
- # &quot;position&quot;: 4,
464
- # &quot;force_prefix&quot;: false,
465
- # &quot;value&quot;: &quot;勉強&quot;
466
- # },
467
- # {
468
- # &quot;position&quot;: 5,
469
- # &quot;force_prefix&quot;: false,
470
- # &quot;value&quot;: &quot;強&quot;
471
- # }
472
- # ]
473
- # ]
474
- </pre></div>
475
- </div>
476
- </div>
477
- <div class="section" id="tokenbigramsplitsymbol">
478
- <span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code><a class="headerlink" href="#tokenbigramsplitsymbol" title="Permalink to this headline">¶</a></h3>
479
- <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code> is similar to <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The
480
- difference between them is symbol handling. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code>
481
- tokenizes symbols by bigram tokenize method:</p>
482
- <p>Execution example:</p>
483
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbol &quot;100cents!!!&quot; NormalizerAuto
484
- # [
485
- # [
486
- # 0,
487
- # 1337566253.89858,
488
- # 0.000355720520019531
489
- # ],
490
- # [
491
- # {
492
- # &quot;position&quot;: 0,
493
- # &quot;force_prefix&quot;: false,
494
- # &quot;value&quot;: &quot;100&quot;
495
- # },
496
- # {
497
- # &quot;position&quot;: 1,
498
- # &quot;force_prefix&quot;: false,
499
- # &quot;value&quot;: &quot;cents&quot;
500
- # },
501
- # {
502
- # &quot;position&quot;: 2,
503
- # &quot;force_prefix&quot;: false,
504
- # &quot;value&quot;: &quot;!!&quot;
505
- # },
506
- # {
507
- # &quot;position&quot;: 3,
508
- # &quot;force_prefix&quot;: false,
509
- # &quot;value&quot;: &quot;!!&quot;
510
- # },
511
- # {
512
- # &quot;position&quot;: 4,
513
- # &quot;force_prefix&quot;: false,
514
- # &quot;value&quot;: &quot;!&quot;
515
- # }
516
- # ]
517
- # ]
518
- </pre></div>
519
- </div>
520
- </div>
521
- <div class="section" id="tokenbigramsplitsymbolalpha">
522
- <span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
523
- <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> is similar to <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The
524
- difference between them is symbol and alphabet
525
- handling. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> tokenizes symbols and
526
- alphabets by bigram tokenize method:</p>
527
- <p>Execution example:</p>
528
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbolAlpha &quot;100cents!!!&quot; NormalizerAuto
529
- # [
530
- # [
531
- # 0,
532
- # 1337566253.89858,
533
- # 0.000355720520019531
534
- # ],
535
- # [
536
- # {
537
- # &quot;position&quot;: 0,
538
- # &quot;force_prefix&quot;: false,
539
- # &quot;value&quot;: &quot;100&quot;
540
- # },
541
- # {
542
- # &quot;position&quot;: 1,
543
- # &quot;force_prefix&quot;: false,
544
- # &quot;value&quot;: &quot;ce&quot;
545
- # },
546
- # {
547
- # &quot;position&quot;: 2,
548
- # &quot;force_prefix&quot;: false,
549
- # &quot;value&quot;: &quot;en&quot;
550
- # },
551
- # {
552
- # &quot;position&quot;: 3,
553
- # &quot;force_prefix&quot;: false,
554
- # &quot;value&quot;: &quot;nt&quot;
555
- # },
556
- # {
557
- # &quot;position&quot;: 4,
558
- # &quot;force_prefix&quot;: false,
559
- # &quot;value&quot;: &quot;ts&quot;
560
- # },
561
- # {
562
- # &quot;position&quot;: 5,
563
- # &quot;force_prefix&quot;: false,
564
- # &quot;value&quot;: &quot;s!&quot;
565
- # },
566
- # {
567
- # &quot;position&quot;: 6,
568
- # &quot;force_prefix&quot;: false,
569
- # &quot;value&quot;: &quot;!!&quot;
570
- # },
571
- # {
572
- # &quot;position&quot;: 7,
573
- # &quot;force_prefix&quot;: false,
574
- # &quot;value&quot;: &quot;!!&quot;
575
- # },
576
- # {
577
- # &quot;position&quot;: 8,
578
- # &quot;force_prefix&quot;: false,
579
- # &quot;value&quot;: &quot;!&quot;
580
- # }
581
- # ]
582
- # ]
583
- </pre></div>
584
- </div>
585
- </div>
586
- <div class="section" id="tokenbigramsplitsymbolalphadigit">
587
- <span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
588
- <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> is similar to
589
- <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The difference between them is symbol, alphabet
590
- and digit handling. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> tokenizes
591
- symbols, alphabets and digits by bigram tokenize method. It means that
592
- all characters are tokenized by bigram tokenize method:</p>
593
- <p>Execution example:</p>
594
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbolAlphaDigit &quot;100cents!!!&quot; NormalizerAuto
595
- # [
596
- # [
597
- # 0,
598
- # 1337566253.89858,
599
- # 0.000355720520019531
600
- # ],
601
- # [
602
- # {
603
- # &quot;position&quot;: 0,
604
- # &quot;force_prefix&quot;: false,
605
- # &quot;value&quot;: &quot;10&quot;
606
- # },
607
- # {
608
- # &quot;position&quot;: 1,
609
- # &quot;force_prefix&quot;: false,
610
- # &quot;value&quot;: &quot;00&quot;
611
- # },
612
- # {
613
- # &quot;position&quot;: 2,
614
- # &quot;force_prefix&quot;: false,
615
- # &quot;value&quot;: &quot;0c&quot;
616
- # },
617
- # {
618
- # &quot;position&quot;: 3,
619
- # &quot;force_prefix&quot;: false,
620
- # &quot;value&quot;: &quot;ce&quot;
621
- # },
622
- # {
623
- # &quot;position&quot;: 4,
624
- # &quot;force_prefix&quot;: false,
625
- # &quot;value&quot;: &quot;en&quot;
626
- # },
627
- # {
628
- # &quot;position&quot;: 5,
629
- # &quot;force_prefix&quot;: false,
630
- # &quot;value&quot;: &quot;nt&quot;
631
- # },
632
- # {
633
- # &quot;position&quot;: 6,
634
- # &quot;force_prefix&quot;: false,
635
- # &quot;value&quot;: &quot;ts&quot;
636
- # },
637
- # {
638
- # &quot;position&quot;: 7,
639
- # &quot;force_prefix&quot;: false,
640
- # &quot;value&quot;: &quot;s!&quot;
641
- # },
642
- # {
643
- # &quot;position&quot;: 8,
644
- # &quot;force_prefix&quot;: false,
645
- # &quot;value&quot;: &quot;!!&quot;
646
- # },
647
- # {
648
- # &quot;position&quot;: 9,
649
- # &quot;force_prefix&quot;: false,
650
- # &quot;value&quot;: &quot;!!&quot;
651
- # },
652
- # {
653
- # &quot;position&quot;: 10,
654
- # &quot;force_prefix&quot;: false,
655
- # &quot;value&quot;: &quot;!&quot;
656
- # }
657
- # ]
658
- # ]
659
- </pre></div>
660
- </div>
661
- </div>
662
- <div class="section" id="tokenbigramignoreblank">
663
- <span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code><a class="headerlink" href="#tokenbigramignoreblank" title="Permalink to this headline">¶</a></h3>
664
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> is similar to <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The
665
- difference between them is blank handling. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code>
666
- ignores white-spaces in continuous symbols and non-ASCII characters.</p>
667
- <p>You can find difference of them by <code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> text because it
668
- has symbols and non-ASCII characters.</p>
669
- <p>Here is a result by <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> :</p>
670
- <p>Execution example:</p>
671
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
672
- # [
673
- # [
674
- # 0,
675
- # 1337566253.89858,
676
- # 0.000355720520019531
677
- # ],
678
- # [
679
- # {
680
- # &quot;position&quot;: 0,
681
- # &quot;force_prefix&quot;: false,
682
- # &quot;value&quot;: &quot;日&quot;
683
- # },
684
- # {
685
- # &quot;position&quot;: 1,
686
- # &quot;force_prefix&quot;: false,
687
- # &quot;value&quot;: &quot;本&quot;
688
- # },
689
- # {
690
- # &quot;position&quot;: 2,
691
- # &quot;force_prefix&quot;: false,
692
- # &quot;value&quot;: &quot;語&quot;
693
- # },
694
- # {
695
- # &quot;position&quot;: 3,
696
- # &quot;force_prefix&quot;: false,
697
- # &quot;value&quot;: &quot;!&quot;
698
- # },
699
- # {
700
- # &quot;position&quot;: 4,
701
- # &quot;force_prefix&quot;: false,
702
- # &quot;value&quot;: &quot;!&quot;
703
- # },
704
- # {
705
- # &quot;position&quot;: 5,
706
- # &quot;force_prefix&quot;: false,
707
- # &quot;value&quot;: &quot;!&quot;
708
- # }
709
- # ]
710
- # ]
711
- </pre></div>
712
- </div>
713
- <p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code>:</p>
714
- <p>Execution example:</p>
715
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlank &quot;日 本 語 ! ! !&quot; NormalizerAuto
716
- # [
717
- # [
718
- # 0,
719
- # 1337566253.89858,
720
- # 0.000355720520019531
721
- # ],
722
- # [
723
- # {
724
- # &quot;position&quot;: 0,
725
- # &quot;force_prefix&quot;: false,
726
- # &quot;value&quot;: &quot;日本&quot;
727
- # },
728
- # {
729
- # &quot;position&quot;: 1,
730
- # &quot;force_prefix&quot;: false,
731
- # &quot;value&quot;: &quot;本語&quot;
732
- # },
733
- # {
734
- # &quot;position&quot;: 2,
735
- # &quot;force_prefix&quot;: false,
736
- # &quot;value&quot;: &quot;語&quot;
737
- # },
738
- # {
739
- # &quot;position&quot;: 3,
740
- # &quot;force_prefix&quot;: false,
741
- # &quot;value&quot;: &quot;!!!&quot;
742
- # }
743
- # ]
744
- # ]
745
- </pre></div>
746
- </div>
747
- </div>
748
- <div class="section" id="tokenbigramignoreblanksplitsymbol">
749
- <span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="Permalink to this headline">¶</a></h3>
750
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> is similar to
751
- <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The differences between them are the followings:</p>
752
- <blockquote>
753
- <div><ul class="simple">
754
- <li>Blank handling</li>
755
- <li>Symbol handling</li>
756
- </ul>
757
- </div></blockquote>
758
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> ignores white-spaces in
759
- continuous symbols and non-ASCII characters.</p>
760
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> tokenizes symbols by bigram
761
- tokenize method.</p>
762
- <p>You can find difference of them by <code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> text because it
763
- has symbols and non-ASCII characters.</p>
764
- <p>Here is a result by <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> :</p>
765
- <p>Execution example:</p>
766
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
767
- # [
768
- # [
769
- # 0,
770
- # 1337566253.89858,
771
- # 0.000355720520019531
772
- # ],
773
- # [
774
- # {
775
- # &quot;position&quot;: 0,
776
- # &quot;force_prefix&quot;: false,
777
- # &quot;value&quot;: &quot;日&quot;
778
- # },
779
- # {
780
- # &quot;position&quot;: 1,
781
- # &quot;force_prefix&quot;: false,
782
- # &quot;value&quot;: &quot;本&quot;
783
- # },
784
- # {
785
- # &quot;position&quot;: 2,
786
- # &quot;force_prefix&quot;: false,
787
- # &quot;value&quot;: &quot;語&quot;
788
- # },
789
- # {
790
- # &quot;position&quot;: 3,
791
- # &quot;force_prefix&quot;: false,
792
- # &quot;value&quot;: &quot;!&quot;
793
- # },
794
- # {
795
- # &quot;position&quot;: 4,
796
- # &quot;force_prefix&quot;: false,
797
- # &quot;value&quot;: &quot;!&quot;
798
- # },
799
- # {
800
- # &quot;position&quot;: 5,
801
- # &quot;force_prefix&quot;: false,
802
- # &quot;value&quot;: &quot;!&quot;
803
- # }
804
- # ]
805
- # ]
806
- </pre></div>
807
- </div>
808
- <p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code>:</p>
809
- <p>Execution example:</p>
810
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbol &quot;日 本 語 ! ! !&quot; NormalizerAuto
811
- # [
812
- # [
813
- # 0,
814
- # 1337566253.89858,
815
- # 0.000355720520019531
816
- # ],
817
- # [
818
- # {
819
- # &quot;position&quot;: 0,
820
- # &quot;force_prefix&quot;: false,
821
- # &quot;value&quot;: &quot;日本&quot;
822
- # },
823
- # {
824
- # &quot;position&quot;: 1,
825
- # &quot;force_prefix&quot;: false,
826
- # &quot;value&quot;: &quot;本語&quot;
827
- # },
828
- # {
829
- # &quot;position&quot;: 2,
830
- # &quot;force_prefix&quot;: false,
831
- # &quot;value&quot;: &quot;語!&quot;
832
- # },
833
- # {
834
- # &quot;position&quot;: 3,
835
- # &quot;force_prefix&quot;: false,
836
- # &quot;value&quot;: &quot;!!&quot;
837
- # },
838
- # {
839
- # &quot;position&quot;: 4,
840
- # &quot;force_prefix&quot;: false,
841
- # &quot;value&quot;: &quot;!!&quot;
842
- # },
843
- # {
844
- # &quot;position&quot;: 5,
845
- # &quot;force_prefix&quot;: false,
846
- # &quot;value&quot;: &quot;!&quot;
847
- # }
848
- # ]
849
- # ]
850
- </pre></div>
851
- </div>
852
- </div>
853
- <div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
854
- <span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
855
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> is similar to
856
- <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The differences between them are the followings:</p>
857
- <blockquote>
858
- <div><ul class="simple">
859
- <li>Blank handling</li>
860
- <li>Symbol and alphabet handling</li>
861
- </ul>
862
- </div></blockquote>
863
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> ignores white-spaces in
864
- continuous symbols and non-ASCII characters.</p>
865
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> tokenizes symbols and
866
- alphabets by bigram tokenize method.</p>
867
- <p>You can find difference of them by <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> text because it
868
- has symbols and non-ASCII characters with white spaces and alphabets.</p>
869
- <p>Here is a result by <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> :</p>
870
- <p>Execution example:</p>
871
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
872
- # [
873
- # [
874
- # 0,
875
- # 1337566253.89858,
876
- # 0.000355720520019531
877
- # ],
878
- # [
879
- # {
880
- # &quot;position&quot;: 0,
881
- # &quot;force_prefix&quot;: false,
882
- # &quot;value&quot;: &quot;hello&quot;
883
- # },
884
- # {
885
- # &quot;position&quot;: 1,
886
- # &quot;force_prefix&quot;: false,
887
- # &quot;value&quot;: &quot;日&quot;
888
- # },
889
- # {
890
- # &quot;position&quot;: 2,
891
- # &quot;force_prefix&quot;: false,
892
- # &quot;value&quot;: &quot;本&quot;
893
- # },
894
- # {
895
- # &quot;position&quot;: 3,
896
- # &quot;force_prefix&quot;: false,
897
- # &quot;value&quot;: &quot;語&quot;
898
- # },
899
- # {
900
- # &quot;position&quot;: 4,
901
- # &quot;force_prefix&quot;: false,
902
- # &quot;value&quot;: &quot;!&quot;
903
- # },
904
- # {
905
- # &quot;position&quot;: 5,
906
- # &quot;force_prefix&quot;: false,
907
- # &quot;value&quot;: &quot;!&quot;
908
- # },
909
- # {
910
- # &quot;position&quot;: 6,
911
- # &quot;force_prefix&quot;: false,
912
- # &quot;value&quot;: &quot;!&quot;
913
- # }
914
- # ]
915
- # ]
916
- </pre></div>
917
- </div>
918
- <p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code>:</p>
919
- <p>Execution example:</p>
920
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
921
- # [
922
- # [
923
- # 0,
924
- # 1337566253.89858,
925
- # 0.000355720520019531
926
- # ],
927
- # [
928
- # {
929
- # &quot;position&quot;: 0,
930
- # &quot;force_prefix&quot;: false,
931
- # &quot;value&quot;: &quot;he&quot;
932
- # },
933
- # {
934
- # &quot;position&quot;: 1,
935
- # &quot;force_prefix&quot;: false,
936
- # &quot;value&quot;: &quot;el&quot;
937
- # },
938
- # {
939
- # &quot;position&quot;: 2,
940
- # &quot;force_prefix&quot;: false,
941
- # &quot;value&quot;: &quot;ll&quot;
942
- # },
943
- # {
944
- # &quot;position&quot;: 3,
945
- # &quot;force_prefix&quot;: false,
946
- # &quot;value&quot;: &quot;lo&quot;
947
- # },
948
- # {
949
- # &quot;position&quot;: 4,
950
- # &quot;force_prefix&quot;: false,
951
- # &quot;value&quot;: &quot;o日&quot;
952
- # },
953
- # {
954
- # &quot;position&quot;: 5,
955
- # &quot;force_prefix&quot;: false,
956
- # &quot;value&quot;: &quot;日本&quot;
957
- # },
958
- # {
959
- # &quot;position&quot;: 6,
960
- # &quot;force_prefix&quot;: false,
961
- # &quot;value&quot;: &quot;本語&quot;
962
- # },
963
- # {
964
- # &quot;position&quot;: 7,
965
- # &quot;force_prefix&quot;: false,
966
- # &quot;value&quot;: &quot;語!&quot;
967
- # },
968
- # {
969
- # &quot;position&quot;: 8,
970
- # &quot;force_prefix&quot;: false,
971
- # &quot;value&quot;: &quot;!!&quot;
972
- # },
973
- # {
974
- # &quot;position&quot;: 9,
975
- # &quot;force_prefix&quot;: false,
976
- # &quot;value&quot;: &quot;!!&quot;
977
- # },
978
- # {
979
- # &quot;position&quot;: 10,
980
- # &quot;force_prefix&quot;: false,
981
- # &quot;value&quot;: &quot;!&quot;
982
- # }
983
- # ]
984
- # ]
985
- </pre></div>
986
- </div>
987
- </div>
988
- <div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
989
- <span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
990
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> is similar to
991
- <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The differences between them are the followings:</p>
992
- <blockquote>
993
- <div><ul class="simple">
994
- <li>Blank handling</li>
995
- <li>Symbol, alphabet and digit handling</li>
65
+ <div class="toctree-wrapper compound">
66
+ <ul>
67
+ <li class="toctree-l1"><a class="reference internal" href="tokenizer/summary.html">7.8.1. Summary</a></li>
68
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram.html">7.8.2. <code class="docutils literal notranslate"><span class="pre">TokenBigram</span></code></a></li>
69
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank.html">7.8.3. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
70
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol.html">7.8.4. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
71
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html">7.8.5. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
72
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html">7.8.6. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
73
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol.html">7.8.7. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
74
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol_alpha.html">7.8.8. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
75
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol_alpha_digit.html">7.8.9. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
76
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_delimit.html">7.8.10. <code class="docutils literal notranslate"><span class="pre">TokenDelimit</span></code></a></li>
77
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_delimit_null.html">7.8.11. <code class="docutils literal notranslate"><span class="pre">TokenDelimitNull</span></code></a></li>
78
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_mecab.html">7.8.12. <code class="docutils literal notranslate"><span class="pre">TokenMecab</span></code></a></li>
79
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_regexp.html">7.8.13. <code class="docutils literal notranslate"><span class="pre">TokenRegexp</span></code></a></li>
80
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_trigram.html">7.8.14. <code class="docutils literal notranslate"><span class="pre">TokenTrigram</span></code></a></li>
81
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_unigram.html">7.8.15. <code class="docutils literal notranslate"><span class="pre">TokenUnigram</span></code></a></li>
996
82
  </ul>
997
- </div></blockquote>
998
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> ignores white-spaces
999
- in continuous symbols and non-ASCII characters.</p>
1000
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> tokenizes symbols,
1001
- alphabets and digits by bigram tokenize method. It means that all
1002
- characters are tokenized by bigram tokenize method.</p>
1003
- <p>You can find difference of them by <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></code> text
1004
- because it has symbols and non-ASCII characters with white spaces,
1005
- alphabets and digits.</p>
1006
- <p>Here is a result by <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> :</p>
1007
- <p>Execution example:</p>
1008
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
1009
- # [
1010
- # [
1011
- # 0,
1012
- # 1337566253.89858,
1013
- # 0.000355720520019531
1014
- # ],
1015
- # [
1016
- # {
1017
- # &quot;position&quot;: 0,
1018
- # &quot;force_prefix&quot;: false,
1019
- # &quot;value&quot;: &quot;hello&quot;
1020
- # },
1021
- # {
1022
- # &quot;position&quot;: 1,
1023
- # &quot;force_prefix&quot;: false,
1024
- # &quot;value&quot;: &quot;日&quot;
1025
- # },
1026
- # {
1027
- # &quot;position&quot;: 2,
1028
- # &quot;force_prefix&quot;: false,
1029
- # &quot;value&quot;: &quot;本&quot;
1030
- # },
1031
- # {
1032
- # &quot;position&quot;: 3,
1033
- # &quot;force_prefix&quot;: false,
1034
- # &quot;value&quot;: &quot;語&quot;
1035
- # },
1036
- # {
1037
- # &quot;position&quot;: 4,
1038
- # &quot;force_prefix&quot;: false,
1039
- # &quot;value&quot;: &quot;!&quot;
1040
- # },
1041
- # {
1042
- # &quot;position&quot;: 5,
1043
- # &quot;force_prefix&quot;: false,
1044
- # &quot;value&quot;: &quot;!&quot;
1045
- # },
1046
- # {
1047
- # &quot;position&quot;: 6,
1048
- # &quot;force_prefix&quot;: false,
1049
- # &quot;value&quot;: &quot;!&quot;
1050
- # },
1051
- # {
1052
- # &quot;position&quot;: 7,
1053
- # &quot;force_prefix&quot;: false,
1054
- # &quot;value&quot;: &quot;777&quot;
1055
- # }
1056
- # ]
1057
- # ]
1058
- </pre></div>
1059
- </div>
1060
- <p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code>:</p>
1061
- <p>Execution example:</p>
1062
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
1063
- # [
1064
- # [
1065
- # 0,
1066
- # 1337566253.89858,
1067
- # 0.000355720520019531
1068
- # ],
1069
- # [
1070
- # {
1071
- # &quot;position&quot;: 0,
1072
- # &quot;force_prefix&quot;: false,
1073
- # &quot;value&quot;: &quot;he&quot;
1074
- # },
1075
- # {
1076
- # &quot;position&quot;: 1,
1077
- # &quot;force_prefix&quot;: false,
1078
- # &quot;value&quot;: &quot;el&quot;
1079
- # },
1080
- # {
1081
- # &quot;position&quot;: 2,
1082
- # &quot;force_prefix&quot;: false,
1083
- # &quot;value&quot;: &quot;ll&quot;
1084
- # },
1085
- # {
1086
- # &quot;position&quot;: 3,
1087
- # &quot;force_prefix&quot;: false,
1088
- # &quot;value&quot;: &quot;lo&quot;
1089
- # },
1090
- # {
1091
- # &quot;position&quot;: 4,
1092
- # &quot;force_prefix&quot;: false,
1093
- # &quot;value&quot;: &quot;o日&quot;
1094
- # },
1095
- # {
1096
- # &quot;position&quot;: 5,
1097
- # &quot;force_prefix&quot;: false,
1098
- # &quot;value&quot;: &quot;日本&quot;
1099
- # },
1100
- # {
1101
- # &quot;position&quot;: 6,
1102
- # &quot;force_prefix&quot;: false,
1103
- # &quot;value&quot;: &quot;本語&quot;
1104
- # },
1105
- # {
1106
- # &quot;position&quot;: 7,
1107
- # &quot;force_prefix&quot;: false,
1108
- # &quot;value&quot;: &quot;語!&quot;
1109
- # },
1110
- # {
1111
- # &quot;position&quot;: 8,
1112
- # &quot;force_prefix&quot;: false,
1113
- # &quot;value&quot;: &quot;!!&quot;
1114
- # },
1115
- # {
1116
- # &quot;position&quot;: 9,
1117
- # &quot;force_prefix&quot;: false,
1118
- # &quot;value&quot;: &quot;!!&quot;
1119
- # },
1120
- # {
1121
- # &quot;position&quot;: 10,
1122
- # &quot;force_prefix&quot;: false,
1123
- # &quot;value&quot;: &quot;!7&quot;
1124
- # },
1125
- # {
1126
- # &quot;position&quot;: 11,
1127
- # &quot;force_prefix&quot;: false,
1128
- # &quot;value&quot;: &quot;77&quot;
1129
- # },
1130
- # {
1131
- # &quot;position&quot;: 12,
1132
- # &quot;force_prefix&quot;: false,
1133
- # &quot;value&quot;: &quot;77&quot;
1134
- # },
1135
- # {
1136
- # &quot;position&quot;: 13,
1137
- # &quot;force_prefix&quot;: false,
1138
- # &quot;value&quot;: &quot;7&quot;
1139
- # }
1140
- # ]
1141
- # ]
1142
- </pre></div>
1143
- </div>
1144
- </div>
1145
- <div class="section" id="tokenunigram">
1146
- <span id="token-unigram"></span><h3>7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code><a class="headerlink" href="#tokenunigram" title="Permalink to this headline">¶</a></h3>
1147
- <p><code class="docutils literal"><span class="pre">TokenUnigram</span></code> is similar to <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The differences
1148
- between them is token unit. <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> uses 2 characters per
1149
- token. <code class="docutils literal"><span class="pre">TokenUnigram</span></code> uses 1 character per token.</p>
1150
- <p>Execution example:</p>
1151
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenUnigram &quot;100cents!!!&quot; NormalizerAuto
1152
- # [
1153
- # [
1154
- # 0,
1155
- # 1337566253.89858,
1156
- # 0.000355720520019531
1157
- # ],
1158
- # [
1159
- # {
1160
- # &quot;position&quot;: 0,
1161
- # &quot;force_prefix&quot;: false,
1162
- # &quot;value&quot;: &quot;100&quot;
1163
- # },
1164
- # {
1165
- # &quot;position&quot;: 1,
1166
- # &quot;force_prefix&quot;: false,
1167
- # &quot;value&quot;: &quot;cents&quot;
1168
- # },
1169
- # {
1170
- # &quot;position&quot;: 2,
1171
- # &quot;force_prefix&quot;: false,
1172
- # &quot;value&quot;: &quot;!!!&quot;
1173
- # }
1174
- # ]
1175
- # ]
1176
- </pre></div>
1177
- </div>
1178
- </div>
1179
- <div class="section" id="tokentrigram">
1180
- <span id="token-trigram"></span><h3>7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code><a class="headerlink" href="#tokentrigram" title="Permalink to this headline">¶</a></h3>
1181
- <p><code class="docutils literal"><span class="pre">TokenTrigram</span></code> is similar to <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a>. The differences
1182
- between them is token unit. <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> uses 2 characters per
1183
- token. <code class="docutils literal"><span class="pre">TokenTrigram</span></code> uses 3 characters per token.</p>
1184
- <p>Execution example:</p>
1185
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenTrigram &quot;10000cents!!!!!&quot; NormalizerAuto
1186
- # [
1187
- # [
1188
- # 0,
1189
- # 1337566253.89858,
1190
- # 0.000355720520019531
1191
- # ],
1192
- # [
1193
- # {
1194
- # &quot;position&quot;: 0,
1195
- # &quot;force_prefix&quot;: false,
1196
- # &quot;value&quot;: &quot;10000&quot;
1197
- # },
1198
- # {
1199
- # &quot;position&quot;: 1,
1200
- # &quot;force_prefix&quot;: false,
1201
- # &quot;value&quot;: &quot;cents&quot;
1202
- # },
1203
- # {
1204
- # &quot;position&quot;: 2,
1205
- # &quot;force_prefix&quot;: false,
1206
- # &quot;value&quot;: &quot;!!!!!&quot;
1207
- # }
1208
- # ]
1209
- # ]
1210
- </pre></div>
1211
- </div>
1212
- </div>
1213
- <div class="section" id="tokendelimit">
1214
- <span id="token-delimit"></span><h3>7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code><a class="headerlink" href="#tokendelimit" title="Permalink to this headline">¶</a></h3>
1215
- <p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> extracts token by splitting one or more space
1216
- characters (<code class="docutils literal"><span class="pre">U+0020</span></code>). For example, <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to
1217
- <code class="docutils literal"><span class="pre">Hello</span></code> and <code class="docutils literal"><span class="pre">World</span></code>.</p>
1218
- <p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> is suitable for tag text. You can extract <code class="docutils literal"><span class="pre">groonga</span></code>
1219
- and <code class="docutils literal"><span class="pre">full-text-search</span></code> and <code class="docutils literal"><span class="pre">http</span></code> as tags from <code class="docutils literal"><span class="pre">groonga</span>
1220
- <span class="pre">full-text-search</span> <span class="pre">http</span></code>.</p>
1221
- <p>Here is an example of <code class="docutils literal"><span class="pre">TokenDelimit</span></code>:</p>
1222
- <p>Execution example:</p>
1223
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenDelimit &quot;Groonga full-text-search HTTP&quot; NormalizerAuto
1224
- # [
1225
- # [
1226
- # 0,
1227
- # 1337566253.89858,
1228
- # 0.000355720520019531
1229
- # ],
1230
- # [
1231
- # {
1232
- # &quot;position&quot;: 0,
1233
- # &quot;force_prefix&quot;: false,
1234
- # &quot;value&quot;: &quot;groonga&quot;
1235
- # },
1236
- # {
1237
- # &quot;position&quot;: 1,
1238
- # &quot;force_prefix&quot;: false,
1239
- # &quot;value&quot;: &quot;full-text-search&quot;
1240
- # },
1241
- # {
1242
- # &quot;position&quot;: 2,
1243
- # &quot;force_prefix&quot;: false,
1244
- # &quot;value&quot;: &quot;http&quot;
1245
- # }
1246
- # ]
1247
- # ]
1248
- </pre></div>
1249
- </div>
1250
- </div>
1251
- <div class="section" id="tokendelimitnull">
1252
- <span id="token-delimit-null"></span><h3>7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code><a class="headerlink" href="#tokendelimitnull" title="Permalink to this headline">¶</a></h3>
1253
- <p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> is similar to <a class="reference internal" href="#token-delimit"><span class="std std-ref">TokenDelimit</span></a>. The
1254
- difference between them is separator character. <a class="reference internal" href="#token-delimit"><span class="std std-ref">TokenDelimit</span></a>
1255
- uses space character (<code class="docutils literal"><span class="pre">U+0020</span></code>) but <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> uses NUL
1256
- character (<code class="docutils literal"><span class="pre">U+0000</span></code>).</p>
1257
- <p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> is also suitable for tag text.</p>
1258
- <p>Here is an example of <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code>:</p>
1259
- <p>Execution example:</p>
1260
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenDelimitNull &quot;Groonga\u0000full-text-search\u0000HTTP&quot; NormalizerAuto
1261
- # [
1262
- # [
1263
- # 0,
1264
- # 1337566253.89858,
1265
- # 0.000355720520019531
1266
- # ],
1267
- # [
1268
- # {
1269
- # &quot;position&quot;: 0,
1270
- # &quot;force_prefix&quot;: false,
1271
- # &quot;value&quot;: &quot;groongau0000full-text-searchu0000http&quot;
1272
- # }
1273
- # ]
1274
- # ]
1275
- </pre></div>
1276
- </div>
1277
- </div>
1278
- <div class="section" id="tokenmecab">
1279
- <span id="token-mecab"></span><h3>7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code><a class="headerlink" href="#tokenmecab" title="Permalink to this headline">¶</a></h3>
1280
- <p><code class="docutils literal"><span class="pre">TokenMecab</span></code> is a tokenizer based on <a class="reference external" href="https://taku910.github.io/mecab/">MeCab</a> part-of-speech and
1281
- morphological analyzer.</p>
1282
- <p>MeCab doesn't depend on Japanese. You can use MeCab for other
1283
- languages by creating dictionary for the languages. You can use <a class="reference external" href="http://osdn.jp/projects/naist-jdic/">NAIST
1284
- Japanese Dictionary</a>
1285
- for Japanese.</p>
1286
- <p><code class="docutils literal"><span class="pre">TokenMecab</span></code> is good for precision rather than recall. You can find
1287
- <code class="docutils literal"><span class="pre">東京都</span></code> and <code class="docutils literal"><span class="pre">京都</span></code> texts by <code class="docutils literal"><span class="pre">京都</span></code> query with
1288
- <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> but <code class="docutils literal"><span class="pre">東京都</span></code> isn't expected. You can find only
1289
- <code class="docutils literal"><span class="pre">京都</span></code> text by <code class="docutils literal"><span class="pre">京都</span></code> query with <code class="docutils literal"><span class="pre">TokenMecab</span></code>.</p>
1290
- <p>If you want to support neologisms, you need to keep updating your
1291
- MeCab dictionary. It needs maintain cost. (<a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> doesn't
1292
- require dictionary maintenance because <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> doesn't use
1293
- dictionary.) <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> may help you.</p>
1294
- <p>Here is an example of <code class="docutils literal"><span class="pre">TokenMeCab</span></code>. <code class="docutils literal"><span class="pre">東京都</span></code> is tokenized to <code class="docutils literal"><span class="pre">東京</span></code>
1295
- and <code class="docutils literal"><span class="pre">都</span></code>. They don't include <code class="docutils literal"><span class="pre">京都</span></code>:</p>
1296
- <p>Execution example:</p>
1297
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenMecab &quot;東京都&quot;
1298
- # [
1299
- # [
1300
- # -22,
1301
- # 1337566253.89858,
1302
- # 0.000355720520019531,
1303
- # &quot;[tokenize] nonexistent tokenizer: &lt;TokenMecab&gt;&quot;,
1304
- # [
1305
- # [
1306
- # &quot;create_lexicon_for_tokenize&quot;,
1307
- # &quot;proc_tokenize.c&quot;,
1308
- # 139
1309
- # ]
1310
- # ]
1311
- # ]
1312
- # ]
1313
- </pre></div>
1314
- </div>
1315
- </div>
1316
- <div class="section" id="tokenregexp">
1317
- <span id="token-regexp"></span><h3>7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code><a class="headerlink" href="#tokenregexp" title="Permalink to this headline">¶</a></h3>
1318
- <div class="versionadded">
1319
- <p><span class="versionmodified">New in version 5.0.1.</span></p>
1320
- </div>
1321
- <div class="admonition caution">
1322
- <p class="first admonition-title">Caution</p>
1323
- <p class="last">This tokenizer is experimental. Specification may be changed.</p>
1324
- </div>
1325
- <div class="admonition caution">
1326
- <p class="first admonition-title">Caution</p>
1327
- <p class="last">This tokenizer can be used only with UTF-8. You can't use this
1328
- tokenizer with EUC-JP, Shift_JIS and so on.</p>
1329
- </div>
1330
- <p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> is a tokenizer for supporting regular expression
1331
- search by index.</p>
1332
- <p>In general, regular expression search is evaluated as sequential
1333
- search. But the following cases can be evaluated as index search:</p>
1334
- <blockquote>
1335
- <div><ul class="simple">
1336
- <li>Literal only case such as <code class="docutils literal"><span class="pre">hello</span></code></li>
1337
- <li>The beginning of text and literal case such as <code class="docutils literal"><span class="pre">\A/home/alice</span></code></li>
1338
- <li>The end of text and literal case such as <code class="docutils literal"><span class="pre">\.txt\z</span></code></li>
1339
- </ul>
1340
- </div></blockquote>
1341
- <p>In most cases, index search is faster than sequential search.</p>
1342
- <p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> is based on bigram tokenize method. <code class="docutils literal"><span class="pre">TokenRegexp</span></code>
1343
- adds the beginning of text mark (<code class="docutils literal"><span class="pre">U+FFEF</span></code>) at the begging of text
1344
- and the end of text mark (<code class="docutils literal"><span class="pre">U+FFF0</span></code>) to the end of text when you
1345
- index text:</p>
1346
- <p>Execution example:</p>
1347
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenRegexp &quot;/home/alice/test.txt&quot; NormalizerAuto --mode ADD
1348
- # [
1349
- # [
1350
- # 0,
1351
- # 1337566253.89858,
1352
- # 0.000355720520019531
1353
- # ],
1354
- # [
1355
- # {
1356
- # &quot;position&quot;: 0,
1357
- # &quot;force_prefix&quot;: false,
1358
- # &quot;value&quot;: &quot;￯&quot;
1359
- # },
1360
- # {
1361
- # &quot;position&quot;: 1,
1362
- # &quot;force_prefix&quot;: false,
1363
- # &quot;value&quot;: &quot;/h&quot;
1364
- # },
1365
- # {
1366
- # &quot;position&quot;: 2,
1367
- # &quot;force_prefix&quot;: false,
1368
- # &quot;value&quot;: &quot;ho&quot;
1369
- # },
1370
- # {
1371
- # &quot;position&quot;: 3,
1372
- # &quot;force_prefix&quot;: false,
1373
- # &quot;value&quot;: &quot;om&quot;
1374
- # },
1375
- # {
1376
- # &quot;position&quot;: 4,
1377
- # &quot;force_prefix&quot;: false,
1378
- # &quot;value&quot;: &quot;me&quot;
1379
- # },
1380
- # {
1381
- # &quot;position&quot;: 5,
1382
- # &quot;force_prefix&quot;: false,
1383
- # &quot;value&quot;: &quot;e/&quot;
1384
- # },
1385
- # {
1386
- # &quot;position&quot;: 6,
1387
- # &quot;force_prefix&quot;: false,
1388
- # &quot;value&quot;: &quot;/a&quot;
1389
- # },
1390
- # {
1391
- # &quot;position&quot;: 7,
1392
- # &quot;force_prefix&quot;: false,
1393
- # &quot;value&quot;: &quot;al&quot;
1394
- # },
1395
- # {
1396
- # &quot;position&quot;: 8,
1397
- # &quot;force_prefix&quot;: false,
1398
- # &quot;value&quot;: &quot;li&quot;
1399
- # },
1400
- # {
1401
- # &quot;position&quot;: 9,
1402
- # &quot;force_prefix&quot;: false,
1403
- # &quot;value&quot;: &quot;ic&quot;
1404
- # },
1405
- # {
1406
- # &quot;position&quot;: 10,
1407
- # &quot;force_prefix&quot;: false,
1408
- # &quot;value&quot;: &quot;ce&quot;
1409
- # },
1410
- # {
1411
- # &quot;position&quot;: 11,
1412
- # &quot;force_prefix&quot;: false,
1413
- # &quot;value&quot;: &quot;e/&quot;
1414
- # },
1415
- # {
1416
- # &quot;position&quot;: 12,
1417
- # &quot;force_prefix&quot;: false,
1418
- # &quot;value&quot;: &quot;/t&quot;
1419
- # },
1420
- # {
1421
- # &quot;position&quot;: 13,
1422
- # &quot;force_prefix&quot;: false,
1423
- # &quot;value&quot;: &quot;te&quot;
1424
- # },
1425
- # {
1426
- # &quot;position&quot;: 14,
1427
- # &quot;force_prefix&quot;: false,
1428
- # &quot;value&quot;: &quot;es&quot;
1429
- # },
1430
- # {
1431
- # &quot;position&quot;: 15,
1432
- # &quot;force_prefix&quot;: false,
1433
- # &quot;value&quot;: &quot;st&quot;
1434
- # },
1435
- # {
1436
- # &quot;position&quot;: 16,
1437
- # &quot;force_prefix&quot;: false,
1438
- # &quot;value&quot;: &quot;t.&quot;
1439
- # },
1440
- # {
1441
- # &quot;position&quot;: 17,
1442
- # &quot;force_prefix&quot;: false,
1443
- # &quot;value&quot;: &quot;.t&quot;
1444
- # },
1445
- # {
1446
- # &quot;position&quot;: 18,
1447
- # &quot;force_prefix&quot;: false,
1448
- # &quot;value&quot;: &quot;tx&quot;
1449
- # },
1450
- # {
1451
- # &quot;position&quot;: 19,
1452
- # &quot;force_prefix&quot;: false,
1453
- # &quot;value&quot;: &quot;xt&quot;
1454
- # },
1455
- # {
1456
- # &quot;position&quot;: 20,
1457
- # &quot;force_prefix&quot;: false,
1458
- # &quot;value&quot;: &quot;t&quot;
1459
- # },
1460
- # {
1461
- # &quot;position&quot;: 21,
1462
- # &quot;force_prefix&quot;: false,
1463
- # &quot;value&quot;: &quot;￰&quot;
1464
- # }
1465
- # ]
1466
- # ]
1467
- </pre></div>
1468
- </div>
1469
- </div>
1470
83
  </div>
1471
84
  </div>
1472
85
 
@@ -1476,46 +89,20 @@ index text:</p>
1476
89
  </div>
1477
90
  <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
1478
91
  <div class="sphinxsidebarwrapper">
1479
- <h3><a href="../index.html">Table Of Contents</a></h3>
1480
- <ul>
1481
- <li><a class="reference internal" href="#">7.8. Tokenizers</a><ul>
1482
- <li><a class="reference internal" href="#summary">7.8.1. Summary</a></li>
1483
- <li><a class="reference internal" href="#what-is-tokenize">7.8.2. What is &quot;tokenize&quot;?</a></li>
1484
- <li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. Built-in tokenizsers</a><ul>
1485
- <li><a class="reference internal" href="#tokenbigram">7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code></a></li>
1486
- <li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
1487
- <li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
1488
- <li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
1489
- <li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
1490
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
1491
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
1492
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
1493
- <li><a class="reference internal" href="#tokenunigram">7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code></a></li>
1494
- <li><a class="reference internal" href="#tokentrigram">7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code></a></li>
1495
- <li><a class="reference internal" href="#tokendelimit">7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code></a></li>
1496
- <li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></a></li>
1497
- <li><a class="reference internal" href="#tokenmecab">7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code></a></li>
1498
- <li><a class="reference internal" href="#tokenregexp">7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code></a></li>
1499
- </ul>
1500
- </li>
1501
- </ul>
1502
- </li>
1503
- </ul>
1504
-
1505
92
  <h4>Previous topic</h4>
1506
- <p class="topless"><a href="normalizers.html"
1507
- title="previous chapter">7.7. Normalizers</a></p>
93
+ <p class="topless"><a href="normalizers/normalizer_nfkc51.html"
94
+ title="previous chapter">7.7.2.3. <code class="docutils literal notranslate"><span class="pre">NormalizerNFKC51</span></code></a></p>
1508
95
  <h4>Next topic</h4>
1509
- <p class="topless"><a href="token_filters.html"
1510
- title="next chapter">7.9. Token filters</a></p>
96
+ <p class="topless"><a href="tokenizer/summary.html"
97
+ title="next chapter">7.8.1. Summary</a></p>
1511
98
  <div id="searchbox" style="display: none" role="search">
1512
99
  <h3>Quick search</h3>
100
+ <div class="searchformwrapper">
1513
101
  <form class="search" action="../search.html" method="get">
1514
- <div><input type="text" name="q" /></div>
1515
- <div><input type="submit" value="Go" /></div>
1516
- <input type="hidden" name="check_keywords" value="yes" />
1517
- <input type="hidden" name="area" value="default" />
102
+ <input type="text" name="q" />
103
+ <input type="submit" value="Go" />
1518
104
  </form>
105
+ </div>
1519
106
  </div>
1520
107
  <script type="text/javascript">$('#searchbox').show(0);</script>
1521
108
  </div>
@@ -1529,17 +116,17 @@ index text:</p>
1529
116
  <a href="../genindex.html" title="General Index"
1530
117
  >index</a></li>
1531
118
  <li class="right" >
1532
- <a href="token_filters.html" title="7.9. Token filters"
119
+ <a href="tokenizer/summary.html" title="7.8.1. Summary"
1533
120
  >next</a> |</li>
1534
121
  <li class="right" >
1535
- <a href="normalizers.html" title="7.7. Normalizers"
122
+ <a href="normalizers/normalizer_nfkc51.html" title="7.7.2.3. NormalizerNFKC51"
1536
123
  >previous</a> |</li>
1537
- <li class="nav-item nav-item-0"><a href="../index.html">Groonga v7.1.0-73-g6d02cfa documentation</a> &#187;</li>
124
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v9.0.2 documentation</a> &#187;</li>
1538
125
  <li class="nav-item nav-item-1"><a href="../reference.html" >7. Reference manual</a> &#187;</li>
1539
126
  </ul>
1540
127
  </div>
1541
128
  <div class="footer" role="contentinfo">
1542
- &#169; Copyright 2009-2018, Brazil, Inc.
129
+ &#169; Copyright 2009-2019, Brazil, Inc.
1543
130
  </div>
1544
131
  </body>
1545
132
  </html>