rroonga 7.1.1-x86-mingw32 → 9.0.2-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (980) hide show
  1. checksums.yaml +5 -5
  2. data/Rakefile +3 -3
  3. data/doc/text/news.md +22 -0
  4. data/ext/groonga/extconf.rb +29 -26
  5. data/ext/groonga/rb-grn.h +3 -3
  6. data/lib/2.2/groonga.so +0 -0
  7. data/lib/2.3/groonga.so +0 -0
  8. data/lib/2.4/groonga.so +0 -0
  9. data/lib/2.5/groonga.so +0 -0
  10. data/lib/groonga/expression-builder.rb +1 -1
  11. data/lib/groonga/schema.rb +13 -0
  12. data/rroonga-build.rb +4 -11
  13. data/test/test-expression-builder.rb +8 -0
  14. data/vendor/local/bin/cv2pdb.exe +0 -0
  15. data/vendor/local/bin/generate-pdb.bat +38 -36
  16. data/vendor/local/bin/grndb.exe +0 -0
  17. data/vendor/local/bin/groonga-benchmark.exe +0 -0
  18. data/vendor/local/bin/groonga-suggest-create-dataset.exe +0 -0
  19. data/vendor/local/bin/groonga.exe +0 -0
  20. data/vendor/local/bin/libgroonga-0.dll +0 -0
  21. data/vendor/local/bin/libmecab-2.dll +0 -0
  22. data/vendor/local/bin/libmsgpackc.dll +0 -0
  23. data/vendor/local/bin/libonigmo-6.dll +0 -0
  24. data/vendor/local/bin/libpcre-1.dll +0 -0
  25. data/vendor/local/bin/libpcrecpp-0.dll +0 -0
  26. data/vendor/local/bin/libpcreposix-0.dll +0 -0
  27. data/vendor/local/bin/lz4.exe +0 -0
  28. data/vendor/local/bin/lz4c.exe +0 -0
  29. data/vendor/local/bin/{lz4cat → lz4cat.exe} +0 -0
  30. data/vendor/local/bin/mecab.exe +0 -0
  31. data/vendor/local/bin/pcre-config +1 -1
  32. data/vendor/local/bin/pcregrep.exe +0 -0
  33. data/vendor/local/bin/pcretest.exe +0 -0
  34. data/vendor/local/bin/unlz4.exe +0 -0
  35. data/vendor/local/bin/zlib1.dll +0 -0
  36. data/vendor/local/include/groonga/groonga.h +16 -1
  37. data/vendor/local/include/groonga/groonga/accessor.h +5 -1
  38. data/vendor/local/include/groonga/groonga/column.h +4 -0
  39. data/vendor/local/include/groonga/groonga/db.h +3 -1
  40. data/vendor/local/include/groonga/groonga/expr.h +5 -0
  41. data/vendor/local/include/groonga/groonga/groonga.h +124 -171
  42. data/vendor/local/include/groonga/groonga/highlighter.h +57 -0
  43. data/vendor/local/include/groonga/groonga/ii.h +2 -0
  44. data/vendor/local/include/groonga/groonga/index_column.h +31 -0
  45. data/vendor/local/include/groonga/groonga/memory.h +29 -0
  46. data/vendor/local/include/groonga/groonga/msgpack.h +50 -0
  47. data/vendor/local/include/groonga/groonga/obj.h +22 -1
  48. data/vendor/local/include/groonga/groonga/option.h +61 -0
  49. data/vendor/local/include/groonga/groonga/output.h +57 -2
  50. data/vendor/local/include/groonga/groonga/output_columns.h +38 -0
  51. data/vendor/local/include/groonga/groonga/plugin.h +5 -0
  52. data/vendor/local/include/groonga/groonga/raw_string.h +60 -0
  53. data/vendor/local/include/groonga/groonga/string.h +113 -0
  54. data/vendor/local/include/groonga/groonga/table.h +89 -1
  55. data/vendor/local/include/groonga/groonga/thread.h +15 -0
  56. data/vendor/local/include/groonga/groonga/time.h +1 -0
  57. data/vendor/local/include/groonga/groonga/token.h +60 -10
  58. data/vendor/local/include/groonga/groonga/token_cursor.h +59 -0
  59. data/vendor/local/include/groonga/groonga/token_filter.h +24 -0
  60. data/vendor/local/include/groonga/groonga/token_metadata.h +49 -0
  61. data/vendor/local/include/groonga/groonga/tokenizer.h +99 -25
  62. data/vendor/local/include/groonga/groonga/tokenizer_query_deprecated.h +50 -0
  63. data/vendor/local/include/groonga/groonga/vector.h +80 -0
  64. data/vendor/local/include/groonga/groonga/version.h +32 -0
  65. data/vendor/local/include/groonga/groonga/window_function.h +18 -8
  66. data/vendor/local/include/groonga/groonga/window_function_executor.h +68 -0
  67. data/vendor/local/include/lz4.h +504 -212
  68. data/vendor/local/include/lz4frame.h +433 -153
  69. data/vendor/local/include/lz4frame_static.h +47 -0
  70. data/vendor/local/include/lz4hc.h +281 -108
  71. data/vendor/local/include/msgpack.hpp +4 -0
  72. data/vendor/local/include/msgpack/adaptor/adaptor_base.hpp +1 -0
  73. data/vendor/local/include/msgpack/adaptor/adaptor_base_decl.hpp +1 -0
  74. data/vendor/local/include/msgpack/adaptor/array_ref_decl.hpp +1 -0
  75. data/vendor/local/include/msgpack/adaptor/boost/msgpack_variant_decl.hpp +1 -0
  76. data/vendor/local/include/msgpack/adaptor/boost/string_view.hpp +15 -0
  77. data/vendor/local/include/msgpack/adaptor/check_container_size_decl.hpp +1 -0
  78. data/vendor/local/include/msgpack/adaptor/cpp17/optional.hpp +16 -0
  79. data/vendor/local/include/msgpack/adaptor/cpp17/string_view.hpp +16 -0
  80. data/vendor/local/include/msgpack/adaptor/define_decl.hpp +2 -0
  81. data/vendor/local/include/msgpack/adaptor/ext_decl.hpp +1 -0
  82. data/vendor/local/include/msgpack/adaptor/fixint_decl.hpp +1 -0
  83. data/vendor/local/include/msgpack/adaptor/int_decl.hpp +1 -0
  84. data/vendor/local/include/msgpack/adaptor/map_decl.hpp +1 -0
  85. data/vendor/local/include/msgpack/adaptor/msgpack_tuple_decl.hpp +1 -0
  86. data/vendor/local/include/msgpack/adaptor/nil_decl.hpp +1 -0
  87. data/vendor/local/include/msgpack/adaptor/raw_decl.hpp +1 -0
  88. data/vendor/local/include/msgpack/adaptor/size_equal_only_decl.hpp +1 -0
  89. data/vendor/local/include/msgpack/adaptor/tr1/unordered_map.hpp +2 -2
  90. data/vendor/local/include/msgpack/adaptor/tr1/unordered_set.hpp +2 -2
  91. data/vendor/local/include/msgpack/adaptor/v4raw_decl.hpp +1 -0
  92. data/vendor/local/include/msgpack/cpp_config_decl.hpp +1 -0
  93. data/vendor/local/include/msgpack/create_object_visitor.hpp +17 -0
  94. data/vendor/local/include/msgpack/create_object_visitor_decl.hpp +16 -0
  95. data/vendor/local/include/msgpack/fbuffer.h +1 -1
  96. data/vendor/local/include/msgpack/fbuffer_decl.hpp +1 -0
  97. data/vendor/local/include/msgpack/gcc_atomic.hpp +0 -2
  98. data/vendor/local/include/msgpack/iterator_decl.hpp +2 -1
  99. data/vendor/local/include/msgpack/meta_decl.hpp +1 -0
  100. data/vendor/local/include/msgpack/null_visitor.hpp +17 -0
  101. data/vendor/local/include/msgpack/null_visitor_decl.hpp +16 -0
  102. data/vendor/local/include/msgpack/object.h +5 -0
  103. data/vendor/local/include/msgpack/object_decl.hpp +1 -0
  104. data/vendor/local/include/msgpack/object_fwd.hpp +1 -0
  105. data/vendor/local/include/msgpack/object_fwd_decl.hpp +1 -0
  106. data/vendor/local/include/msgpack/pack.h +1 -0
  107. data/vendor/local/include/msgpack/pack_decl.hpp +1 -0
  108. data/vendor/local/include/msgpack/parse.hpp +18 -0
  109. data/vendor/local/include/msgpack/parse_decl.hpp +16 -0
  110. data/vendor/local/include/msgpack/parse_return.hpp +17 -0
  111. data/vendor/local/include/msgpack/sbuffer_decl.hpp +1 -0
  112. data/vendor/local/include/msgpack/sysdep.h +34 -26
  113. data/vendor/local/include/msgpack/type.hpp +9 -0
  114. data/vendor/local/include/msgpack/unpack.h +12 -1
  115. data/vendor/local/include/msgpack/unpack.hpp +1 -0
  116. data/vendor/local/include/msgpack/unpack_decl.hpp +1 -0
  117. data/vendor/local/include/msgpack/unpack_exception.hpp +15 -0
  118. data/vendor/local/include/msgpack/unpack_template.h +22 -30
  119. data/vendor/local/include/msgpack/v1/adaptor/array_ref.hpp +6 -6
  120. data/vendor/local/include/msgpack/v1/adaptor/boost/fusion.hpp +49 -6
  121. data/vendor/local/include/msgpack/v1/adaptor/boost/msgpack_variant.hpp +6 -4
  122. data/vendor/local/include/msgpack/v1/adaptor/boost/string_view.hpp +87 -0
  123. data/vendor/local/include/msgpack/v1/adaptor/carray.hpp +11 -11
  124. data/vendor/local/include/msgpack/v1/adaptor/char_ptr.hpp +1 -1
  125. data/vendor/local/include/msgpack/v1/adaptor/cpp11/array.hpp +1 -1
  126. data/vendor/local/include/msgpack/v1/adaptor/cpp11/array_char.hpp +8 -1
  127. data/vendor/local/include/msgpack/v1/adaptor/cpp11/array_unsigned_char.hpp +8 -1
  128. data/vendor/local/include/msgpack/v1/adaptor/cpp11/forward_list.hpp +1 -1
  129. data/vendor/local/include/msgpack/v1/adaptor/cpp11/tuple.hpp +2 -2
  130. data/vendor/local/include/msgpack/v1/adaptor/cpp11/unordered_map.hpp +4 -4
  131. data/vendor/local/include/msgpack/v1/adaptor/cpp11/unordered_set.hpp +2 -2
  132. data/vendor/local/include/msgpack/v1/adaptor/cpp17/optional.hpp +90 -0
  133. data/vendor/local/include/msgpack/v1/adaptor/cpp17/string_view.hpp +86 -0
  134. data/vendor/local/include/msgpack/v1/adaptor/deque.hpp +1 -1
  135. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_define_array.hpp +1088 -32
  136. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_define_map.hpp +32 -16
  137. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_msgpack_tuple.hpp +32 -32
  138. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_convert_helper.hpp +45 -0
  139. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_define_array.hpp +4 -3
  140. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_define_map.hpp +4 -2
  141. data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_msgpack_tuple.hpp +2 -2
  142. data/vendor/local/include/msgpack/v1/adaptor/ext.hpp +1 -1
  143. data/vendor/local/include/msgpack/v1/adaptor/fixint.hpp +40 -24
  144. data/vendor/local/include/msgpack/v1/adaptor/float.hpp +4 -4
  145. data/vendor/local/include/msgpack/v1/adaptor/int.hpp +55 -33
  146. data/vendor/local/include/msgpack/v1/adaptor/list.hpp +1 -1
  147. data/vendor/local/include/msgpack/v1/adaptor/map.hpp +10 -10
  148. data/vendor/local/include/msgpack/v1/adaptor/pair.hpp +2 -2
  149. data/vendor/local/include/msgpack/v1/adaptor/set.hpp +2 -2
  150. data/vendor/local/include/msgpack/v1/adaptor/string.hpp +1 -1
  151. data/vendor/local/include/msgpack/v1/adaptor/tr1/unordered_map.hpp +2 -2
  152. data/vendor/local/include/msgpack/v1/adaptor/tr1/unordered_set.hpp +2 -2
  153. data/vendor/local/include/msgpack/v1/adaptor/vector.hpp +5 -5
  154. data/vendor/local/include/msgpack/v1/adaptor/vector_bool.hpp +1 -1
  155. data/vendor/local/include/msgpack/v1/adaptor/vector_char.hpp +9 -9
  156. data/vendor/local/include/msgpack/v1/adaptor/vector_unsigned_char.hpp +9 -9
  157. data/vendor/local/include/msgpack/v1/cpp_config.hpp +6 -0
  158. data/vendor/local/include/msgpack/v1/cpp_config_decl.hpp +6 -0
  159. data/vendor/local/include/msgpack/v1/detail/cpp03_zone.hpp +41 -34
  160. data/vendor/local/include/msgpack/v1/detail/cpp03_zone_decl.hpp +8 -0
  161. data/vendor/local/include/msgpack/v1/detail/cpp11_zone.hpp +25 -19
  162. data/vendor/local/include/msgpack/v1/detail/cpp11_zone_decl.hpp +8 -0
  163. data/vendor/local/include/msgpack/v1/meta.hpp +6 -0
  164. data/vendor/local/include/msgpack/v1/meta_decl.hpp +5 -0
  165. data/vendor/local/include/msgpack/v1/object.hpp +768 -393
  166. data/vendor/local/include/msgpack/v1/object_decl.hpp +11 -1
  167. data/vendor/local/include/msgpack/v1/object_fwd.hpp +4 -1
  168. data/vendor/local/include/msgpack/v1/object_fwd_decl.hpp +3 -1
  169. data/vendor/local/include/msgpack/v1/parse_return.hpp +36 -0
  170. data/vendor/local/include/msgpack/v1/unpack.hpp +39 -120
  171. data/vendor/local/include/msgpack/v1/unpack_decl.hpp +2 -9
  172. data/vendor/local/include/msgpack/v1/unpack_exception.hpp +122 -0
  173. data/vendor/local/include/msgpack/v1/vrefbuffer.hpp +2 -2
  174. data/vendor/local/include/msgpack/v2/create_object_visitor.hpp +250 -0
  175. data/vendor/local/include/msgpack/v2/create_object_visitor_decl.hpp +33 -0
  176. data/vendor/local/include/msgpack/v2/meta_decl.hpp +4 -0
  177. data/vendor/local/include/msgpack/v2/null_visitor.hpp +96 -0
  178. data/vendor/local/include/msgpack/v2/null_visitor_decl.hpp +29 -0
  179. data/vendor/local/include/msgpack/v2/object_decl.hpp +4 -0
  180. data/vendor/local/include/msgpack/v2/object_fwd.hpp +1 -1
  181. data/vendor/local/include/msgpack/v2/object_fwd_decl.hpp +2 -0
  182. data/vendor/local/include/msgpack/v2/pack_decl.hpp +1 -0
  183. data/vendor/local/include/msgpack/v2/parse.hpp +1072 -0
  184. data/vendor/local/include/msgpack/v2/parse_decl.hpp +79 -0
  185. data/vendor/local/include/msgpack/v2/parse_return.hpp +37 -0
  186. data/vendor/local/include/msgpack/v2/unpack.hpp +21 -1298
  187. data/vendor/local/include/msgpack/v2/unpack_decl.hpp +9 -45
  188. data/vendor/local/include/msgpack/v2/x3_parse.hpp +875 -0
  189. data/vendor/local/include/msgpack/v2/x3_parse_decl.hpp +36 -0
  190. data/vendor/local/include/msgpack/v2/x3_unpack.hpp +120 -0
  191. data/vendor/local/include/msgpack/v2/x3_unpack_decl.hpp +71 -0
  192. data/vendor/local/include/msgpack/v3/adaptor/adaptor_base.hpp +58 -0
  193. data/vendor/local/include/msgpack/v3/adaptor/adaptor_base_decl.hpp +52 -0
  194. data/vendor/local/include/msgpack/v3/adaptor/array_ref_decl.hpp +36 -0
  195. data/vendor/local/include/msgpack/v3/adaptor/boost/msgpack_variant_decl.hpp +42 -0
  196. data/vendor/local/include/msgpack/v3/adaptor/check_container_size_decl.hpp +39 -0
  197. data/vendor/local/include/msgpack/v3/adaptor/define_decl.hpp +23 -0
  198. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_define_array_decl.hpp +31 -0
  199. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_define_map_decl.hpp +31 -0
  200. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_msgpack_tuple_decl.hpp +43 -0
  201. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_define_array_decl.hpp +32 -0
  202. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_define_map_decl.hpp +31 -0
  203. data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_msgpack_tuple_decl.hpp +59 -0
  204. data/vendor/local/include/msgpack/v3/adaptor/ext_decl.hpp +34 -0
  205. data/vendor/local/include/msgpack/v3/adaptor/fixint_decl.hpp +43 -0
  206. data/vendor/local/include/msgpack/v3/adaptor/int_decl.hpp +54 -0
  207. data/vendor/local/include/msgpack/v3/adaptor/map_decl.hpp +33 -0
  208. data/vendor/local/include/msgpack/v3/adaptor/msgpack_tuple_decl.hpp +21 -0
  209. data/vendor/local/include/msgpack/v3/adaptor/nil_decl.hpp +42 -0
  210. data/vendor/local/include/msgpack/v3/adaptor/raw_decl.hpp +33 -0
  211. data/vendor/local/include/msgpack/v3/adaptor/size_equal_only_decl.hpp +35 -0
  212. data/vendor/local/include/msgpack/v3/adaptor/v4raw_decl.hpp +34 -0
  213. data/vendor/local/include/msgpack/v3/cpp_config_decl.hpp +84 -0
  214. data/vendor/local/include/msgpack/v3/create_object_visitor_decl.hpp +33 -0
  215. data/vendor/local/include/msgpack/v3/detail/cpp03_zone_decl.hpp +31 -0
  216. data/vendor/local/include/msgpack/v3/detail/cpp11_zone_decl.hpp +31 -0
  217. data/vendor/local/include/msgpack/v3/fbuffer_decl.hpp +32 -0
  218. data/vendor/local/include/msgpack/v3/iterator_decl.hpp +33 -0
  219. data/vendor/local/include/msgpack/v3/meta_decl.hpp +50 -0
  220. data/vendor/local/include/msgpack/v3/null_visitor_decl.hpp +29 -0
  221. data/vendor/local/include/msgpack/v3/object_decl.hpp +53 -0
  222. data/vendor/local/include/msgpack/v3/object_fwd.hpp +70 -0
  223. data/vendor/local/include/msgpack/v3/object_fwd_decl.hpp +75 -0
  224. data/vendor/local/include/msgpack/v3/pack_decl.hpp +55 -0
  225. data/vendor/local/include/msgpack/v3/parse.hpp +677 -0
  226. data/vendor/local/include/msgpack/v3/parse_decl.hpp +49 -0
  227. data/vendor/local/include/msgpack/v3/parse_return.hpp +35 -0
  228. data/vendor/local/include/msgpack/v3/sbuffer_decl.hpp +33 -0
  229. data/vendor/local/include/msgpack/v3/unpack.hpp +192 -0
  230. data/vendor/local/include/msgpack/v3/unpack_decl.hpp +304 -0
  231. data/vendor/local/include/msgpack/v3/vrefbuffer_decl.hpp +29 -0
  232. data/vendor/local/include/msgpack/v3/x3_parse_decl.hpp +34 -0
  233. data/vendor/local/include/msgpack/v3/x3_unpack.hpp +97 -0
  234. data/vendor/local/include/msgpack/v3/x3_unpack_decl.hpp +65 -0
  235. data/vendor/local/include/msgpack/v3/zbuffer_decl.hpp +29 -0
  236. data/vendor/local/include/msgpack/v3/zone_decl.hpp +21 -0
  237. data/vendor/local/include/msgpack/version_master.h +2 -2
  238. data/vendor/local/include/msgpack/versioning.hpp +5 -3
  239. data/vendor/local/include/msgpack/vrefbuffer.h +1 -2
  240. data/vendor/local/include/msgpack/vrefbuffer_decl.hpp +1 -0
  241. data/vendor/local/include/msgpack/x3_parse.hpp +15 -0
  242. data/vendor/local/include/msgpack/x3_parse_decl.hpp +16 -0
  243. data/vendor/local/include/msgpack/x3_unpack.hpp +16 -0
  244. data/vendor/local/include/msgpack/x3_unpack_decl.hpp +16 -0
  245. data/vendor/local/include/msgpack/zbuffer_decl.hpp +1 -0
  246. data/vendor/local/include/msgpack/zone_decl.hpp +1 -0
  247. data/vendor/local/include/pcre.h +6 -6
  248. data/vendor/local/lib/cmake/msgpack/msgpack-config-version.cmake +46 -0
  249. data/vendor/local/lib/cmake/msgpack/msgpack-config.cmake +47 -0
  250. data/vendor/local/lib/cmake/msgpack/msgpack-targets-noconfig.cmake +29 -0
  251. data/vendor/local/lib/cmake/msgpack/msgpack-targets.cmake +101 -0
  252. data/vendor/local/lib/groonga/plugins/functions/index_column.a +0 -0
  253. data/vendor/local/lib/groonga/plugins/functions/index_column.dll +0 -0
  254. data/vendor/local/lib/groonga/plugins/functions/index_column.dll.a +0 -0
  255. data/vendor/local/lib/groonga/plugins/functions/index_column.la +1 -1
  256. data/vendor/local/lib/groonga/plugins/functions/math.a +0 -0
  257. data/vendor/local/lib/groonga/plugins/functions/math.dll +0 -0
  258. data/vendor/local/lib/groonga/plugins/functions/math.dll.a +0 -0
  259. data/vendor/local/lib/groonga/plugins/functions/math.la +1 -1
  260. data/vendor/local/lib/groonga/plugins/functions/number.a +0 -0
  261. data/vendor/local/lib/groonga/plugins/functions/number.dll +0 -0
  262. data/vendor/local/lib/groonga/plugins/functions/number.dll.a +0 -0
  263. data/vendor/local/lib/groonga/plugins/functions/number.la +1 -1
  264. data/vendor/local/lib/groonga/plugins/functions/string.a +0 -0
  265. data/vendor/local/lib/groonga/plugins/functions/string.dll +0 -0
  266. data/vendor/local/lib/groonga/plugins/functions/string.dll.a +0 -0
  267. data/vendor/local/lib/groonga/plugins/functions/string.la +1 -1
  268. data/vendor/local/lib/groonga/plugins/functions/time.a +0 -0
  269. data/vendor/local/lib/groonga/plugins/functions/time.dll +0 -0
  270. data/vendor/local/lib/groonga/plugins/functions/time.dll.a +0 -0
  271. data/vendor/local/lib/groonga/plugins/functions/time.la +1 -1
  272. data/vendor/local/lib/groonga/plugins/functions/vector.a +0 -0
  273. data/vendor/local/lib/groonga/plugins/functions/vector.dll +0 -0
  274. data/vendor/local/lib/groonga/plugins/functions/vector.dll.a +0 -0
  275. data/vendor/local/lib/groonga/plugins/functions/vector.la +1 -1
  276. data/vendor/local/lib/groonga/plugins/normalizers/mysql.a +0 -0
  277. data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll +0 -0
  278. data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll.a +0 -0
  279. data/vendor/local/lib/groonga/plugins/normalizers/mysql.la +2 -2
  280. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
  281. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
  282. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
  283. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +1 -1
  284. data/vendor/local/lib/groonga/plugins/sharding/dynamic_columns.rb +150 -19
  285. data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +123 -65
  286. data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +528 -113
  287. data/vendor/local/lib/groonga/plugins/sharding/logical_select.rb +142 -40
  288. data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
  289. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
  290. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
  291. data/vendor/local/lib/groonga/plugins/suggest/suggest.la +1 -1
  292. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
  293. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
  294. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
  295. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +1 -1
  296. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
  297. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
  298. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
  299. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +1 -1
  300. data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +1 -1
  301. data/vendor/local/lib/groonga/scripts/ruby/command_line/grndb.rb +64 -35
  302. data/vendor/local/lib/groonga/scripts/ruby/expression.rb +3 -1
  303. data/vendor/local/lib/groonga/scripts/ruby/expression_rewriters.rb +15 -21
  304. data/vendor/local/lib/groonga/scripts/ruby/expression_rewriters/optimizer.rb +274 -0
  305. data/vendor/local/lib/groonga/scripts/ruby/expression_tree.rb +8 -2
  306. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/assign.rb +22 -0
  307. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/assign_binary_operation.rb +24 -0
  308. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/binary_operation.rb +206 -8
  309. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/constant.rb +16 -1
  310. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/function_call.rb +30 -1
  311. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/logical_operation.rb +6 -0
  312. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/member.rb +18 -0
  313. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/null.rb +17 -0
  314. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/reference.rb +18 -0
  315. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/table.rb +14 -0
  316. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/unary_operation.rb +26 -0
  317. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/variable.rb +4 -0
  318. data/vendor/local/lib/groonga/scripts/ruby/expression_tree_builder.rb +78 -8
  319. data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +10 -0
  320. data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +2 -0
  321. data/vendor/local/lib/groonga/scripts/ruby/locale_output.rb +28 -0
  322. data/vendor/local/lib/groonga/scripts/ruby/logger.rb +36 -4
  323. data/vendor/local/lib/groonga/scripts/ruby/record.rb +1 -1
  324. data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +0 -3
  325. data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +46 -5
  326. data/vendor/local/lib/groonga/scripts/ruby/scan_info_data_size_estimator.rb +5 -136
  327. data/vendor/local/lib/groonga/scripts/ruby/table.rb +2 -2
  328. data/vendor/local/lib/libgroonga.a +0 -0
  329. data/vendor/local/lib/libgroonga.dll.a +0 -0
  330. data/vendor/local/lib/libgroonga.la +1 -1
  331. data/vendor/local/lib/liblz4.a +0 -0
  332. data/vendor/local/lib/liblz4.dll +0 -0
  333. data/vendor/local/lib/liblz4.dll.1 +0 -0
  334. data/vendor/local/lib/{liblz4.dll.1.5.0 → liblz4.dll.1.8.2} +0 -0
  335. data/vendor/local/lib/libmecab.dll.a +0 -0
  336. data/vendor/local/lib/libmsgpackc.a +0 -0
  337. data/vendor/local/lib/libmsgpackc.dll.a +0 -0
  338. data/vendor/local/lib/libonigmo.a +0 -0
  339. data/vendor/local/lib/libonigmo.dll.a +0 -0
  340. data/vendor/local/lib/libpcre.a +0 -0
  341. data/vendor/local/lib/libpcre.dll.a +0 -0
  342. data/vendor/local/lib/libpcre.la +2 -2
  343. data/vendor/local/lib/libpcrecpp.dll.a +0 -0
  344. data/vendor/local/lib/libpcrecpp.la +1 -1
  345. data/vendor/local/lib/libpcreposix.a +0 -0
  346. data/vendor/local/lib/libpcreposix.dll.a +0 -0
  347. data/vendor/local/lib/libpcreposix.la +2 -2
  348. data/vendor/local/lib/libz.dll.a +0 -0
  349. data/vendor/local/lib/pkgconfig/groonga-normalizer-mysql.pc +1 -1
  350. data/vendor/local/lib/pkgconfig/groonga.pc +2 -2
  351. data/vendor/local/lib/pkgconfig/liblz4.pc +3 -3
  352. data/vendor/local/lib/pkgconfig/libpcre.pc +1 -1
  353. data/vendor/local/lib/pkgconfig/libpcrecpp.pc +1 -1
  354. data/vendor/local/lib/pkgconfig/libpcreposix.pc +1 -1
  355. data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
  356. data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
  357. data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
  358. data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
  359. data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
  360. data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
  361. data/vendor/local/share/doc/groonga-normalizer-mysql/README.md +14 -22
  362. data/vendor/local/share/doc/groonga-normalizer-mysql/news.md +22 -2
  363. data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
  364. data/vendor/local/share/doc/groonga/en/html/_static/basic.css +113 -4
  365. data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +46 -19
  366. data/vendor/local/share/doc/groonga/en/html/_static/documentation_options.js +10 -0
  367. data/vendor/local/share/doc/groonga/en/html/_static/{jquery-3.1.0.js → jquery-3.2.1.js} +474 -295
  368. data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +4 -4
  369. data/vendor/local/share/doc/groonga/en/html/_static/language_data.js +297 -0
  370. data/vendor/local/share/doc/groonga/en/html/_static/pygments.css +4 -0
  371. data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +69 -322
  372. data/vendor/local/share/doc/groonga/en/html/characteristic.html +16 -24
  373. data/vendor/local/share/doc/groonga/en/html/client.html +15 -23
  374. data/vendor/local/share/doc/groonga/en/html/community.html +30 -38
  375. data/vendor/local/share/doc/groonga/en/html/contribution.html +23 -31
  376. data/vendor/local/share/doc/groonga/en/html/contribution/development.html +15 -23
  377. data/vendor/local/share/doc/groonga/en/html/contribution/development/build.html +15 -23
  378. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_autotools.html +58 -66
  379. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_cmake.html +51 -56
  380. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/windows_cmake.html +52 -56
  381. data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +27 -35
  382. data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +19 -27
  383. data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +26 -34
  384. data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +167 -167
  385. data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +16 -24
  386. data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +28 -36
  387. data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +15 -23
  388. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +15 -23
  389. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +59 -67
  390. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +31 -39
  391. data/vendor/local/share/doc/groonga/en/html/contribution/report.html +18 -26
  392. data/vendor/local/share/doc/groonga/en/html/development.html +15 -23
  393. data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +38 -43
  394. data/vendor/local/share/doc/groonga/en/html/genindex.html +50 -28
  395. data/vendor/local/share/doc/groonga/en/html/index.html +248 -234
  396. data/vendor/local/share/doc/groonga/en/html/install.html +43 -47
  397. data/vendor/local/share/doc/groonga/en/html/install/centos.html +43 -51
  398. data/vendor/local/share/doc/groonga/en/html/install/debian.html +52 -131
  399. data/vendor/local/share/doc/groonga/en/html/install/docker.html +155 -0
  400. data/vendor/local/share/doc/groonga/en/html/install/fedora.html +41 -49
  401. data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +29 -37
  402. data/vendor/local/share/doc/groonga/en/html/install/others.html +142 -150
  403. data/vendor/local/share/doc/groonga/en/html/install/solaris.html +30 -38
  404. data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +43 -51
  405. data/vendor/local/share/doc/groonga/en/html/install/windows.html +33 -41
  406. data/vendor/local/share/doc/groonga/en/html/limitations.html +36 -42
  407. data/vendor/local/share/doc/groonga/en/html/news.html +1586 -598
  408. data/vendor/local/share/doc/groonga/en/html/news/0.x.html +83 -83
  409. data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +147 -155
  410. data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +26 -34
  411. data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +225 -233
  412. data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +48 -56
  413. data/vendor/local/share/doc/groonga/en/html/news/2.x.html +378 -386
  414. data/vendor/local/share/doc/groonga/en/html/news/3.x.html +320 -328
  415. data/vendor/local/share/doc/groonga/en/html/news/4.x.html +442 -448
  416. data/vendor/local/share/doc/groonga/en/html/news/5.x.html +742 -860
  417. data/vendor/local/share/doc/groonga/en/html/news/6.x.html +544 -621
  418. data/vendor/local/share/doc/groonga/en/html/news/senna.html +32 -40
  419. data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
  420. data/vendor/local/share/doc/groonga/en/html/reference.html +208 -198
  421. data/vendor/local/share/doc/groonga/en/html/reference/alias.html +85 -93
  422. data/vendor/local/share/doc/groonga/en/html/reference/api.html +50 -57
  423. data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +62 -77
  424. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +117 -149
  425. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +140 -176
  426. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +43 -55
  427. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +48 -56
  428. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +194 -254
  429. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +106 -138
  430. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +62 -82
  431. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +117 -137
  432. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +74 -98
  433. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +79 -103
  434. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +40 -48
  435. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +57 -73
  436. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +75 -99
  437. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_inspect.html +495 -0
  438. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +52 -68
  439. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +291 -357
  440. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +69 -89
  441. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +47 -59
  442. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +226 -306
  443. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +120 -160
  444. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_thread.html +80 -103
  445. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +46 -58
  446. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +40 -52
  447. data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +52 -66
  448. data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +98 -122
  449. data/vendor/local/share/doc/groonga/en/html/reference/cast.html +40 -26
  450. data/vendor/local/share/doc/groonga/en/html/reference/column.html +16 -24
  451. data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +16 -24
  452. data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +30 -34
  453. data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +16 -24
  454. data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +92 -100
  455. data/vendor/local/share/doc/groonga/en/html/reference/command.html +76 -84
  456. data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +26 -34
  457. data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +64 -72
  458. data/vendor/local/share/doc/groonga/en/html/reference/command/pretty_print.html +21 -29
  459. data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +25 -33
  460. data/vendor/local/share/doc/groonga/en/html/reference/command/request_timeout.html +32 -40
  461. data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +105 -113
  462. data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +44 -50
  463. data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +85 -73
  464. data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +31 -37
  465. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_copy.html +131 -139
  466. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +370 -326
  467. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +115 -117
  468. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +38 -44
  469. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +47 -53
  470. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_delete.html +40 -48
  471. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_get.html +42 -50
  472. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_set.html +41 -49
  473. data/vendor/local/share/doc/groonga/en/html/reference/commands/database_unmap.html +37 -45
  474. data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +71 -63
  475. data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +31 -37
  476. data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +49 -51
  477. data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +64 -71
  478. data/vendor/local/share/doc/groonga/en/html/reference/commands/io_flush.html +335 -138
  479. data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +233 -87
  480. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_acquire.html +45 -53
  481. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +42 -48
  482. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_release.html +43 -51
  483. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +58 -64
  484. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +33 -38
  485. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +31 -38
  486. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +295 -218
  487. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_parameters.html +56 -64
  488. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +532 -214
  489. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_select.html +797 -388
  490. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_shard_list.html +35 -43
  491. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_table_remove.html +188 -196
  492. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +83 -90
  493. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +41 -48
  494. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_exist.html +41 -49
  495. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_inspect.html +401 -403
  496. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_list.html +253 -261
  497. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_remove.html +60 -68
  498. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +36 -44
  499. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +35 -43
  500. data/vendor/local/share/doc/groonga/en/html/reference/commands/query_expand.html +21 -29
  501. data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +22 -30
  502. data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +21 -29
  503. data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +39 -47
  504. data/vendor/local/share/doc/groonga/en/html/reference/commands/reindex.html +47 -53
  505. data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
  506. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +38 -45
  507. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +38 -45
  508. data/vendor/local/share/doc/groonga/en/html/reference/commands/schema.html +330 -338
  509. data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +1545 -1194
  510. data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +57 -65
  511. data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +83 -91
  512. data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +119 -133
  513. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_copy.html +30 -38
  514. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +165 -174
  515. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +50 -50
  516. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +104 -112
  517. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_rename.html +42 -50
  518. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +49 -57
  519. data/vendor/local/share/doc/groonga/en/html/reference/commands/thread_limit.html +46 -54
  520. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +110 -117
  521. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +41 -48
  522. data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +40 -46
  523. data/vendor/local/share/doc/groonga/en/html/reference/configuration.html +37 -45
  524. data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -27
  525. data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +134 -114
  526. data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +25 -31
  527. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +66 -66
  528. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +174 -182
  529. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +25 -33
  530. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +27 -35
  531. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +191 -199
  532. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +32 -40
  533. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +189 -163
  534. data/vendor/local/share/doc/groonga/en/html/reference/function.html +59 -64
  535. data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +71 -79
  536. data/vendor/local/share/doc/groonga/en/html/reference/functions/cast_loose.html +210 -0
  537. data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +49 -55
  538. data/vendor/local/share/doc/groonga/en/html/reference/functions/fuzzy_search.html +38 -46
  539. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +133 -142
  540. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +67 -73
  541. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +56 -62
  542. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +80 -88
  543. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +70 -78
  544. data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +56 -64
  545. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_records.html +87 -94
  546. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +54 -62
  547. data/vendor/local/share/doc/groonga/en/html/reference/functions/math_abs.html +55 -63
  548. data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -48
  549. data/vendor/local/share/doc/groonga/en/html/reference/functions/number_classify.html +36 -44
  550. data/vendor/local/share/doc/groonga/en/html/reference/functions/prefix_rk_search.html +74 -82
  551. data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +152 -160
  552. data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +45 -52
  553. data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +76 -84
  554. data/vendor/local/share/doc/groonga/en/html/reference/functions/string_length.html +37 -45
  555. data/vendor/local/share/doc/groonga/en/html/reference/functions/string_substring.html +39 -47
  556. data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +76 -84
  557. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day.html +37 -45
  558. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day_of_week.html +278 -0
  559. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_hour.html +37 -45
  560. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_minute.html +36 -44
  561. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_month.html +36 -44
  562. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_second.html +36 -44
  563. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_week.html +36 -44
  564. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_year.html +37 -45
  565. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_find.html +368 -0
  566. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_new.html +40 -48
  567. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_size.html +54 -62
  568. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_slice.html +40 -47
  569. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +44 -52
  570. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +307 -316
  571. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +486 -492
  572. data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +44 -52
  573. data/vendor/local/share/doc/groonga/en/html/reference/log.html +128 -147
  574. data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +43 -92
  575. data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_auto.html +179 -0
  576. data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_nfkc100.html +897 -0
  577. data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_nfkc51.html +162 -0
  578. data/vendor/local/share/doc/groonga/en/html/reference/operations.html +26 -34
  579. data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +48 -56
  580. data/vendor/local/share/doc/groonga/en/html/reference/operations/prefix_rk_search.html +47 -55
  581. data/vendor/local/share/doc/groonga/en/html/reference/output.html +47 -55
  582. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +20 -28
  583. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +93 -101
  584. data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +228 -225
  585. data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +59 -67
  586. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +50 -58
  587. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +57 -65
  588. data/vendor/local/share/doc/groonga/en/html/reference/sharding.html +76 -86
  589. data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +43 -51
  590. data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +159 -167
  591. data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +93 -101
  592. data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +85 -93
  593. data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +88 -96
  594. data/vendor/local/share/doc/groonga/en/html/reference/tables.html +142 -150
  595. data/vendor/local/share/doc/groonga/en/html/reference/token_filter/summary.html +147 -0
  596. data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +31 -223
  597. data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_nfkc100.html +626 -0
  598. data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_stem.html +291 -0
  599. data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_stop_word.html +287 -0
  600. data/vendor/local/share/doc/groonga/en/html/reference/tokenizer/summary.html +259 -0
  601. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +42 -1455
  602. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram.html +368 -0
  603. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank.html +221 -0
  604. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol.html +240 -0
  605. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html +270 -0
  606. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html +292 -0
  607. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol.html +179 -0
  608. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol_alpha.html +200 -0
  609. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol_alpha_digit.html +212 -0
  610. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_delimit.html +357 -0
  611. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_delimit_null.html +162 -0
  612. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_mecab.html +783 -0
  613. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_regexp.html +289 -0
  614. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_trigram.html +194 -0
  615. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_unigram.html +194 -0
  616. data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +71 -79
  617. data/vendor/local/share/doc/groonga/en/html/reference/types.html +64 -72
  618. data/vendor/local/share/doc/groonga/en/html/reference/window_function.html +29 -37
  619. data/vendor/local/share/doc/groonga/en/html/reference/window_functions/record_number.html +38 -46
  620. data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_count.html +38 -46
  621. data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_record_number.html +38 -46
  622. data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_sum.html +38 -46
  623. data/vendor/local/share/doc/groonga/en/html/search.html +13 -24
  624. data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
  625. data/vendor/local/share/doc/groonga/en/html/server.html +15 -23
  626. data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +27 -35
  627. data/vendor/local/share/doc/groonga/en/html/server/http.html +18 -26
  628. data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +94 -102
  629. data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +15 -23
  630. data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +15 -23
  631. data/vendor/local/share/doc/groonga/en/html/server/memcached.html +18 -26
  632. data/vendor/local/share/doc/groonga/en/html/server/package.html +101 -109
  633. data/vendor/local/share/doc/groonga/en/html/spec.html +19 -27
  634. data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +207 -215
  635. data/vendor/local/share/doc/groonga/en/html/spec/search.html +39 -39
  636. data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +15 -23
  637. data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +46 -50
  638. data/vendor/local/share/doc/groonga/en/html/troubleshooting/how_to_analyze_error_message.html +27 -35
  639. data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +26 -31
  640. data/vendor/local/share/doc/groonga/en/html/tutorial.html +17 -25
  641. data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +46 -54
  642. data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +63 -71
  643. data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +30 -38
  644. data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +88 -97
  645. data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +19 -27
  646. data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +61 -69
  647. data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +108 -116
  648. data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +24 -32
  649. data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +19 -27
  650. data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +32 -40
  651. data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +52 -60
  652. data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
  653. data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +113 -4
  654. data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +46 -19
  655. data/vendor/local/share/doc/groonga/ja/html/_static/documentation_options.js +10 -0
  656. data/vendor/local/share/doc/groonga/ja/html/_static/{jquery-3.1.0.js → jquery-3.2.1.js} +474 -295
  657. data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +4 -4
  658. data/vendor/local/share/doc/groonga/ja/html/_static/language_data.js +124 -0
  659. data/vendor/local/share/doc/groonga/ja/html/_static/pygments.css +4 -0
  660. data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +70 -150
  661. data/vendor/local/share/doc/groonga/ja/html/characteristic.html +15 -23
  662. data/vendor/local/share/doc/groonga/ja/html/client.html +15 -23
  663. data/vendor/local/share/doc/groonga/ja/html/community.html +29 -37
  664. data/vendor/local/share/doc/groonga/ja/html/contribution.html +23 -31
  665. data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +15 -23
  666. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build.html +15 -23
  667. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_autotools.html +50 -58
  668. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_cmake.html +43 -48
  669. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/windows_cmake.html +47 -51
  670. data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +26 -34
  671. data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +18 -26
  672. data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +23 -31
  673. data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +162 -162
  674. data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +16 -24
  675. data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +26 -34
  676. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +15 -23
  677. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +15 -23
  678. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +50 -58
  679. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +28 -36
  680. data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -25
  681. data/vendor/local/share/doc/groonga/ja/html/development.html +15 -23
  682. data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +32 -37
  683. data/vendor/local/share/doc/groonga/ja/html/genindex.html +50 -28
  684. data/vendor/local/share/doc/groonga/ja/html/index.html +247 -233
  685. data/vendor/local/share/doc/groonga/ja/html/install.html +41 -45
  686. data/vendor/local/share/doc/groonga/ja/html/install/centos.html +44 -52
  687. data/vendor/local/share/doc/groonga/ja/html/install/debian.html +52 -121
  688. data/vendor/local/share/doc/groonga/ja/html/install/docker.html +155 -0
  689. data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +40 -48
  690. data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +28 -36
  691. data/vendor/local/share/doc/groonga/ja/html/install/others.html +116 -124
  692. data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +28 -36
  693. data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +43 -51
  694. data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -37
  695. data/vendor/local/share/doc/groonga/ja/html/limitations.html +30 -36
  696. data/vendor/local/share/doc/groonga/ja/html/news.html +1234 -384
  697. data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +82 -82
  698. data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +146 -154
  699. data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +25 -33
  700. data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +191 -199
  701. data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +41 -49
  702. data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +283 -291
  703. data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +229 -237
  704. data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +274 -280
  705. data/vendor/local/share/doc/groonga/ja/html/news/5.x.html +475 -593
  706. data/vendor/local/share/doc/groonga/ja/html/news/6.x.html +313 -390
  707. data/vendor/local/share/doc/groonga/ja/html/news/senna.html +31 -39
  708. data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
  709. data/vendor/local/share/doc/groonga/ja/html/reference.html +208 -198
  710. data/vendor/local/share/doc/groonga/ja/html/reference/alias.html +70 -78
  711. data/vendor/local/share/doc/groonga/ja/html/reference/api.html +50 -57
  712. data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +57 -72
  713. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +107 -139
  714. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +137 -173
  715. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +40 -52
  716. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +46 -54
  717. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +184 -244
  718. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +99 -131
  719. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +57 -77
  720. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +100 -120
  721. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +71 -95
  722. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +75 -99
  723. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +37 -45
  724. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +54 -70
  725. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +71 -95
  726. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_inspect.html +487 -0
  727. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +49 -65
  728. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +286 -352
  729. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +64 -84
  730. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +44 -56
  731. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +219 -299
  732. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +116 -156
  733. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_thread.html +70 -93
  734. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +42 -54
  735. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +36 -48
  736. data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +48 -62
  737. data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +94 -118
  738. data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +39 -25
  739. data/vendor/local/share/doc/groonga/ja/html/reference/column.html +15 -23
  740. data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +15 -23
  741. data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +28 -32
  742. data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +15 -23
  743. data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +76 -84
  744. data/vendor/local/share/doc/groonga/ja/html/reference/command.html +76 -84
  745. data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +25 -33
  746. data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +51 -59
  747. data/vendor/local/share/doc/groonga/ja/html/reference/command/pretty_print.html +20 -28
  748. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +21 -29
  749. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_timeout.html +27 -35
  750. data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +101 -109
  751. data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +39 -45
  752. data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +84 -72
  753. data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +30 -36
  754. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_copy.html +104 -112
  755. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +271 -237
  756. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +100 -102
  757. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +37 -43
  758. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +41 -47
  759. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_delete.html +38 -46
  760. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_get.html +39 -47
  761. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_set.html +39 -47
  762. data/vendor/local/share/doc/groonga/ja/html/reference/commands/database_unmap.html +34 -42
  763. data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +70 -62
  764. data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +30 -36
  765. data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +42 -44
  766. data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +59 -68
  767. data/vendor/local/share/doc/groonga/ja/html/reference/commands/io_flush.html +300 -126
  768. data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +212 -80
  769. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_acquire.html +42 -50
  770. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +40 -46
  771. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_release.html +41 -49
  772. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +57 -63
  773. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +32 -37
  774. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +30 -37
  775. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +246 -178
  776. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_parameters.html +51 -59
  777. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +479 -175
  778. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_select.html +718 -326
  779. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_shard_list.html +34 -42
  780. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_table_remove.html +145 -153
  781. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +78 -85
  782. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +40 -47
  783. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_exist.html +36 -44
  784. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_inspect.html +360 -362
  785. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_list.html +221 -229
  786. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_remove.html +47 -55
  787. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +32 -40
  788. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +33 -41
  789. data/vendor/local/share/doc/groonga/ja/html/reference/commands/query_expand.html +20 -28
  790. data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +21 -29
  791. data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +20 -28
  792. data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +35 -43
  793. data/vendor/local/share/doc/groonga/ja/html/reference/commands/reindex.html +42 -48
  794. data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +57 -57
  795. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +36 -43
  796. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +36 -43
  797. data/vendor/local/share/doc/groonga/ja/html/reference/commands/schema.html +317 -325
  798. data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +1246 -917
  799. data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +50 -58
  800. data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +77 -85
  801. data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +109 -123
  802. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_copy.html +29 -37
  803. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +131 -140
  804. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -49
  805. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +87 -95
  806. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_rename.html +36 -44
  807. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +44 -52
  808. data/vendor/local/share/doc/groonga/ja/html/reference/commands/thread_limit.html +38 -46
  809. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +93 -100
  810. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +40 -47
  811. data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +39 -45
  812. data/vendor/local/share/doc/groonga/ja/html/reference/configuration.html +36 -44
  813. data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -27
  814. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +125 -107
  815. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +23 -29
  816. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +62 -62
  817. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +132 -140
  818. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +23 -31
  819. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +25 -33
  820. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +166 -174
  821. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +31 -39
  822. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +189 -165
  823. data/vendor/local/share/doc/groonga/ja/html/reference/function.html +59 -64
  824. data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +69 -77
  825. data/vendor/local/share/doc/groonga/ja/html/reference/functions/cast_loose.html +208 -0
  826. data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +48 -54
  827. data/vendor/local/share/doc/groonga/ja/html/reference/functions/fuzzy_search.html +37 -45
  828. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +115 -124
  829. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +66 -72
  830. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +55 -61
  831. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +69 -77
  832. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +60 -68
  833. data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +54 -62
  834. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_records.html +85 -93
  835. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +54 -62
  836. data/vendor/local/share/doc/groonga/ja/html/reference/functions/math_abs.html +54 -62
  837. data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +39 -47
  838. data/vendor/local/share/doc/groonga/ja/html/reference/functions/number_classify.html +35 -43
  839. data/vendor/local/share/doc/groonga/ja/html/reference/functions/prefix_rk_search.html +67 -75
  840. data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +130 -138
  841. data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +44 -51
  842. data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +61 -69
  843. data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_length.html +36 -44
  844. data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_substring.html +38 -46
  845. data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +63 -71
  846. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day.html +36 -44
  847. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day_of_week.html +276 -0
  848. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_hour.html +36 -44
  849. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_minute.html +35 -43
  850. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_month.html +35 -43
  851. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_second.html +35 -43
  852. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_week.html +35 -43
  853. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_year.html +36 -44
  854. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_find.html +353 -0
  855. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_new.html +39 -47
  856. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_size.html +52 -61
  857. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_slice.html +38 -46
  858. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +38 -46
  859. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +200 -208
  860. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +375 -382
  861. data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +39 -47
  862. data/vendor/local/share/doc/groonga/ja/html/reference/log.html +125 -144
  863. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +36 -70
  864. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_auto.html +168 -0
  865. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_nfkc100.html +887 -0
  866. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_nfkc51.html +160 -0
  867. data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +26 -34
  868. data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +38 -46
  869. data/vendor/local/share/doc/groonga/ja/html/reference/operations/prefix_rk_search.html +41 -49
  870. data/vendor/local/share/doc/groonga/ja/html/reference/output.html +42 -50
  871. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +20 -28
  872. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +68 -76
  873. data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +178 -184
  874. data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +38 -46
  875. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +38 -46
  876. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +39 -47
  877. data/vendor/local/share/doc/groonga/ja/html/reference/sharding.html +63 -73
  878. data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +43 -51
  879. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +130 -138
  880. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +72 -80
  881. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +68 -76
  882. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +76 -86
  883. data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +129 -137
  884. data/vendor/local/share/doc/groonga/ja/html/reference/token_filter/summary.html +145 -0
  885. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +31 -215
  886. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_nfkc100.html +617 -0
  887. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_stem.html +289 -0
  888. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_stop_word.html +284 -0
  889. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizer/summary.html +233 -0
  890. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +42 -1349
  891. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram.html +344 -0
  892. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank.html +219 -0
  893. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol.html +237 -0
  894. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html +267 -0
  895. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html +287 -0
  896. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol.html +179 -0
  897. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol_alpha.html +199 -0
  898. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol_alpha_digit.html +209 -0
  899. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_delimit.html +344 -0
  900. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_delimit_null.html +160 -0
  901. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_mecab.html +764 -0
  902. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_regexp.html +284 -0
  903. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_trigram.html +191 -0
  904. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_unigram.html +191 -0
  905. data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +65 -73
  906. data/vendor/local/share/doc/groonga/ja/html/reference/types.html +48 -56
  907. data/vendor/local/share/doc/groonga/ja/html/reference/window_function.html +29 -37
  908. data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/record_number.html +37 -45
  909. data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_count.html +37 -45
  910. data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_record_number.html +37 -45
  911. data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_sum.html +37 -45
  912. data/vendor/local/share/doc/groonga/ja/html/search.html +13 -24
  913. data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
  914. data/vendor/local/share/doc/groonga/ja/html/server.html +15 -23
  915. data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +22 -30
  916. data/vendor/local/share/doc/groonga/ja/html/server/http.html +17 -25
  917. data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +82 -90
  918. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +15 -23
  919. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +15 -23
  920. data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +16 -24
  921. data/vendor/local/share/doc/groonga/ja/html/server/package.html +99 -107
  922. data/vendor/local/share/doc/groonga/ja/html/spec.html +19 -27
  923. data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +201 -209
  924. data/vendor/local/share/doc/groonga/ja/html/spec/search.html +36 -36
  925. data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +15 -23
  926. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +44 -48
  927. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/how_to_analyze_error_message.html +21 -29
  928. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +24 -29
  929. data/vendor/local/share/doc/groonga/ja/html/tutorial.html +16 -24
  930. data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +32 -40
  931. data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +62 -70
  932. data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +22 -30
  933. data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +77 -86
  934. data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +15 -23
  935. data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +56 -64
  936. data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +84 -92
  937. data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +20 -28
  938. data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -26
  939. data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +21 -29
  940. data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +43 -51
  941. data/vendor/local/share/doc/pcre/AUTHORS +3 -3
  942. data/vendor/local/share/doc/pcre/ChangeLog +53 -0
  943. data/vendor/local/share/doc/pcre/LICENCE +3 -3
  944. data/vendor/local/share/doc/pcre/NEWS +6 -0
  945. data/vendor/local/share/doc/pcre/html/NON-AUTOTOOLS-BUILD.txt +8 -7
  946. data/vendor/local/share/groonga/mruby/LEGAL +35 -35
  947. data/vendor/local/share/license/cv2pdb/{README → README.MD} +28 -10
  948. data/vendor/local/share/license/groonga-normalizer-mysql/README.md +14 -22
  949. data/vendor/local/share/license/lz4/LICENSE +2 -2
  950. data/vendor/local/share/license/mruby/AUTHORS +3 -0
  951. data/vendor/local/share/license/mruby/MITL +1 -1
  952. data/vendor/local/share/license/mruby/README.md +1 -1
  953. data/vendor/local/share/license/msgpack/README.md +5 -34
  954. data/vendor/local/share/license/pcre/LICENCE +3 -3
  955. data/vendor/local/share/man/man1/lz4.1 +221 -86
  956. data/vendor/local/share/man/man1/lz4c.1 +222 -32
  957. data/vendor/local/share/man/man1/lz4cat.1 +221 -30
  958. data/vendor/local/share/man/man1/unlz4.1 +223 -0
  959. metadata +231 -87
  960. data/lib/2.1/groonga.so +0 -0
  961. data/vendor/local/lib/groonga/plugins/expression_rewriters/optimizer.rb +0 -147
  962. data/vendor/local/lib/groonga/scripts/ruby/expression_tree/options.rb +0 -14
  963. data/vendor/local/share/doc/groonga/en/html/_static/ajax-loader.gif +0 -0
  964. data/vendor/local/share/doc/groonga/en/html/_static/comment-bright.png +0 -0
  965. data/vendor/local/share/doc/groonga/en/html/_static/comment-close.png +0 -0
  966. data/vendor/local/share/doc/groonga/en/html/_static/comment.png +0 -0
  967. data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
  968. data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
  969. data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
  970. data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
  971. data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +0 -808
  972. data/vendor/local/share/doc/groonga/ja/html/_static/ajax-loader.gif +0 -0
  973. data/vendor/local/share/doc/groonga/ja/html/_static/comment-bright.png +0 -0
  974. data/vendor/local/share/doc/groonga/ja/html/_static/comment-close.png +0 -0
  975. data/vendor/local/share/doc/groonga/ja/html/_static/comment.png +0 -0
  976. data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
  977. data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
  978. data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
  979. data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
  980. data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +0 -808
@@ -0,0 +1,233 @@
1
+
2
+
3
+ <!DOCTYPE html>
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" lang="ja">
6
+ <head>
7
+ <meta charset="utf-8" />
8
+ <title>7.8.1. 概要 &#8212; Groonga v9.0.2ドキュメント</title>
9
+ <link rel="stylesheet" href="../../_static/groonga.css" type="text/css" />
10
+ <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
11
+
12
+ <script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
13
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
14
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
15
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
16
+ <script type="text/javascript" src="../../_static/language_data.js"></script>
17
+ <script type="text/javascript" src="../../_static/translations.js"></script>
18
+
19
+ <link rel="shortcut icon" href="../../_static/favicon.ico"/>
20
+ <link rel="index" title="索引" href="../../genindex.html" />
21
+ <link rel="search" title="検索" href="../../search.html" />
22
+ <link rel="next" title="7.8.2. TokenBigram" href="../tokenizers/token_bigram.html" />
23
+ <link rel="prev" title="7.8. トークナイザー" href="../tokenizers.html" />
24
+ </head><body>
25
+ <div class="header">
26
+ <h1 class="title">
27
+ <a id="top-link" href="../../index.html">
28
+ <span class="project">groonga</span>
29
+ <span class="separator">-</span>
30
+ <span class="description">オープンソースのカラムストア機能付き全文検索エンジン</span>
31
+ </a>
32
+ </h1>
33
+
34
+ <div class="other-language-links">
35
+ <ul>
36
+ <li><a href="../../../../en/html/reference/tokenizer/summary.html">English</a></li>
37
+ </ul>
38
+ </div>
39
+ </div>
40
+
41
+
42
+ <div class="related" role="navigation" aria-label="related navigation">
43
+ <h3>ナビゲーション</h3>
44
+ <ul>
45
+ <li class="right" style="margin-right: 10px">
46
+ <a href="../../genindex.html" title="総合索引"
47
+ accesskey="I">索引</a></li>
48
+ <li class="right" >
49
+ <a href="../tokenizers/token_bigram.html" title="7.8.2. TokenBigram"
50
+ accesskey="N">次へ</a> |</li>
51
+ <li class="right" >
52
+ <a href="../tokenizers.html" title="7.8. トークナイザー"
53
+ accesskey="P">前へ</a> |</li>
54
+ <li class="nav-item nav-item-0"><a href="../../index.html">Groonga v9.0.2ドキュメント</a> &#187;</li>
55
+ <li class="nav-item nav-item-1"><a href="../../reference.html" >7. リファレンスマニュアル</a> &#187;</li>
56
+ <li class="nav-item nav-item-2"><a href="../tokenizers.html" accesskey="U">7.8. トークナイザー</a> &#187;</li>
57
+ </ul>
58
+ </div>
59
+
60
+ <div class="document">
61
+ <div class="documentwrapper">
62
+ <div class="bodywrapper">
63
+ <div class="body" role="main">
64
+
65
+ <div class="section" id="summary">
66
+ <h1>7.8.1. 概要<a class="headerlink" href="#summary" title="このヘッドラインへのパーマリンク">¶</a></h1>
67
+ <p>Groongaにはテキストをトークナイズするトークナイザーモージュールがあります。次のケースのときにトークナイザーを使います。</p>
68
+ <blockquote>
69
+ <div><ul>
70
+ <li><p>テキストのインデックスを構築するとき</p>
71
+ <div class="figure align-center" id="id1">
72
+ <a class="reference internal image-reference" href="../../_images/used-when-indexing.png"><img alt="../../_images/used-when-indexing.png" src="../../_images/used-when-indexing.png" style="width: 80%;" /></a>
73
+ <p class="caption"><span class="caption-text">テキストのインデックスを構築するときにトークナイザーを使います。</span><a class="headerlink" href="#id1" title="この画像へのパーマリンク">¶</a></p>
74
+ </div>
75
+ </li>
76
+ <li><p>クエリーで検索するとき</p>
77
+ <div class="figure align-center" id="id2">
78
+ <a class="reference internal image-reference" href="../../_images/used-when-searching.png"><img alt="../../_images/used-when-searching.png" src="../../_images/used-when-searching.png" style="width: 80%;" /></a>
79
+ <p class="caption"><span class="caption-text">クエリーで検索するときにトークナイザーを使います。</span><a class="headerlink" href="#id2" title="この画像へのパーマリンク">¶</a></p>
80
+ </div>
81
+ </li>
82
+ </ul>
83
+ </div></blockquote>
84
+ <p>全文検索ではトークナイザーは重要なモジュールです。トークナイザーを変えることで <a class="reference external" href="http://ja.wikipedia.org/wiki/%E6%83%85%E5%A0%B1%E6%A4%9C%E7%B4%A2">適合率と再現率</a> のトレードオフを調整することができます。</p>
85
+ <p>一般的に <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> が適切なトークナイザーです。トークナイザーについてよく知らない場合は <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> を使うことをオススメします。</p>
86
+ <p><a class="reference internal" href="../commands/tokenize.html"><span class="doc">tokenize</span></a> コマンドと <a class="reference internal" href="../commands/table_tokenize.html"><span class="doc">table_tokenize</span></a> コマンドを使うことでトークナイザーを試すことができます。 <a class="reference internal" href="../commands/tokenize.html"><span class="doc">tokenize</span></a> コマンドを使って <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> トークナイザーを試す例を以下に示します。</p>
87
+ <p>実行例:</p>
88
+ <div class="highlight-none notranslate"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello World&quot;
89
+ # [
90
+ # [
91
+ # 0,
92
+ # 1337566253.89858,
93
+ # 0.000355720520019531
94
+ # ],
95
+ # [
96
+ # {
97
+ # &quot;position&quot;: 0,
98
+ # &quot;force_prefix&quot;: false,
99
+ # &quot;value&quot;: &quot;He&quot;
100
+ # },
101
+ # {
102
+ # &quot;position&quot;: 1,
103
+ # &quot;force_prefix&quot;: false,
104
+ # &quot;value&quot;: &quot;el&quot;
105
+ # },
106
+ # {
107
+ # &quot;position&quot;: 2,
108
+ # &quot;force_prefix&quot;: false,
109
+ # &quot;value&quot;: &quot;ll&quot;
110
+ # },
111
+ # {
112
+ # &quot;position&quot;: 3,
113
+ # &quot;force_prefix&quot;: false,
114
+ # &quot;value&quot;: &quot;lo&quot;
115
+ # },
116
+ # {
117
+ # &quot;position&quot;: 4,
118
+ # &quot;force_prefix&quot;: false,
119
+ # &quot;value&quot;: &quot;o &quot;
120
+ # },
121
+ # {
122
+ # &quot;position&quot;: 5,
123
+ # &quot;force_prefix&quot;: false,
124
+ # &quot;value&quot;: &quot; W&quot;
125
+ # },
126
+ # {
127
+ # &quot;position&quot;: 6,
128
+ # &quot;force_prefix&quot;: false,
129
+ # &quot;value&quot;: &quot;Wo&quot;
130
+ # },
131
+ # {
132
+ # &quot;position&quot;: 7,
133
+ # &quot;force_prefix&quot;: false,
134
+ # &quot;value&quot;: &quot;or&quot;
135
+ # },
136
+ # {
137
+ # &quot;position&quot;: 8,
138
+ # &quot;force_prefix&quot;: false,
139
+ # &quot;value&quot;: &quot;rl&quot;
140
+ # },
141
+ # {
142
+ # &quot;position&quot;: 9,
143
+ # &quot;force_prefix&quot;: false,
144
+ # &quot;value&quot;: &quot;ld&quot;
145
+ # },
146
+ # {
147
+ # &quot;position&quot;: 10,
148
+ # &quot;force_prefix&quot;: false,
149
+ # &quot;value&quot;: &quot;d&quot;
150
+ # }
151
+ # ]
152
+ # ]
153
+ </pre></div>
154
+ </div>
155
+ <p>「トークナイズ」はテキストから0個以上のトークンを抽出する処理です。「トークナイズ」する方法はいくつかあります。</p>
156
+ <p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
157
+ <blockquote>
158
+ <div><ul class="simple">
159
+ <li><p><code class="docutils literal notranslate"><span class="pre">He</span></code></p></li>
160
+ <li><p><code class="docutils literal notranslate"><span class="pre">el</span></code></p></li>
161
+ <li><p><code class="docutils literal notranslate"><span class="pre">ll</span></code></p></li>
162
+ <li><p><code class="docutils literal notranslate"><span class="pre">lo</span></code></p></li>
163
+ <li><p><code class="docutils literal notranslate"><span class="pre">o_</span></code> ( <code class="docutils literal notranslate"><span class="pre">_</span></code> は空白文字という意味)</p></li>
164
+ <li><p><code class="docutils literal notranslate"><span class="pre">_W</span></code> ( <code class="docutils literal notranslate"><span class="pre">_</span></code> は空白文字という意味)</p></li>
165
+ <li><p><code class="docutils literal notranslate"><span class="pre">Wo</span></code></p></li>
166
+ <li><p><code class="docutils literal notranslate"><span class="pre">or</span></code></p></li>
167
+ <li><p><code class="docutils literal notranslate"><span class="pre">rl</span></code></p></li>
168
+ <li><p><code class="docutils literal notranslate"><span class="pre">ld</span></code></p></li>
169
+ </ul>
170
+ </div></blockquote>
171
+ <p>上記の例では、 <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> から10個のトークンを抽出しました。</p>
172
+ <p>例えば、空白区切りのトークナイズ方法では <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
173
+ <blockquote>
174
+ <div><ul class="simple">
175
+ <li><p><code class="docutils literal notranslate"><span class="pre">Hello</span></code></p></li>
176
+ <li><p><code class="docutils literal notranslate"><span class="pre">World</span></code></p></li>
177
+ </ul>
178
+ </div></blockquote>
179
+ <p>上記の例では、<code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> から2つのトークンを抽出しました。</p>
180
+ <p>トークンは検索時のキーとして使われます。使用したトークナイズ方法で抽出したトークンでしかインデックス化されたドキュメントを探すことはできません。例えば、トークナイズ方法としてバイグラムを使った場合は <code class="docutils literal notranslate"><span class="pre">ll</span></code> で <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることができます。しかし、空白区切りのトークナイズ方法を使ったときは <code class="docutils literal notranslate"><span class="pre">ll</span></code> で <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることはできません。なぜなら、空白区切りのトークナイズ方法は <code class="docutils literal notranslate"><span class="pre">ll</span></code> というトークンを抽出していないからです。空白区切りのトークナイズ方法は <code class="docutils literal notranslate"><span class="pre">Hello</span></code> というトークンと <code class="docutils literal notranslate"><span class="pre">World</span></code> というトークンしか抽出していません。</p>
181
+ <p>一般的に、小さいトークンを生成するトークナイズ方法は再現率が高い代わりに適合率が低くなりがちです。一方、大きいトークンを生成するトークナイズ方法は適合率が高い代わりに再現率が低くなりがちです。</p>
182
+ <p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal notranslate"><span class="pre">or</span></code> で <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> と <code class="docutils literal notranslate"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> を検索できます。しかし、「論理和」を検索したい人にとっては <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> は不要な結果です。これは、適合率が下がったということです。しかし、再現率は上がっています。</p>
183
+ <p>空白区切りのトークナイズ方法を使った場合は <code class="docutils literal notranslate"><span class="pre">or</span></code> で <code class="docutils literal notranslate"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> だけが見つかります。なぜなら、空白区切りのトークナイズ方法では <code class="docutils literal notranslate"><span class="pre">World</span></code> は <code class="docutils literal notranslate"><span class="pre">World</span></code> という1つのトークンだけにトークナイズされるからです。これは、「論理和」を探したい人にとっては適合率が挙がっています。しかし、 <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> も <code class="docutils literal notranslate"><span class="pre">or</span></code> を含んでいるのに見つかっていないので再現率が下がっています。</p>
184
+ </div>
185
+
186
+
187
+ </div>
188
+ </div>
189
+ </div>
190
+ <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
191
+ <div class="sphinxsidebarwrapper">
192
+ <h4>前のトピックへ</h4>
193
+ <p class="topless"><a href="../tokenizers.html"
194
+ title="前の章へ">7.8. トークナイザー</a></p>
195
+ <h4>次のトピックへ</h4>
196
+ <p class="topless"><a href="../tokenizers/token_bigram.html"
197
+ title="次の章へ">7.8.2. <code class="docutils literal notranslate"><span class="pre">TokenBigram</span></code></a></p>
198
+ <div id="searchbox" style="display: none" role="search">
199
+ <h3>クイック検索</h3>
200
+ <div class="searchformwrapper">
201
+ <form class="search" action="../../search.html" method="get">
202
+ <input type="text" name="q" />
203
+ <input type="submit" value="検索" />
204
+ </form>
205
+ </div>
206
+ </div>
207
+ <script type="text/javascript">$('#searchbox').show(0);</script>
208
+ </div>
209
+ </div>
210
+ <div class="clearer"></div>
211
+ </div>
212
+ <div class="related" role="navigation" aria-label="related navigation">
213
+ <h3>ナビゲーション</h3>
214
+ <ul>
215
+ <li class="right" style="margin-right: 10px">
216
+ <a href="../../genindex.html" title="総合索引"
217
+ >索引</a></li>
218
+ <li class="right" >
219
+ <a href="../tokenizers/token_bigram.html" title="7.8.2. TokenBigram"
220
+ >次へ</a> |</li>
221
+ <li class="right" >
222
+ <a href="../tokenizers.html" title="7.8. トークナイザー"
223
+ >前へ</a> |</li>
224
+ <li class="nav-item nav-item-0"><a href="../../index.html">Groonga v9.0.2ドキュメント</a> &#187;</li>
225
+ <li class="nav-item nav-item-1"><a href="../../reference.html" >7. リファレンスマニュアル</a> &#187;</li>
226
+ <li class="nav-item nav-item-2"><a href="../tokenizers.html" >7.8. トークナイザー</a> &#187;</li>
227
+ </ul>
228
+ </div>
229
+ <div class="footer" role="contentinfo">
230
+ &#169; Copyright 2009-2019, Brazil, Inc.
231
+ </div>
232
+ </body>
233
+ </html>
@@ -1,35 +1,27 @@
1
1
 
2
2
 
3
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3
+ <!DOCTYPE html>
5
4
 
6
5
  <html xmlns="http://www.w3.org/1999/xhtml" lang="ja">
7
6
  <head>
8
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
9
- <title>7.8. トークナイザー &#8212; Groonga v7.1.0-73-g6d02cfaドキュメント</title>
7
+ <meta charset="utf-8" />
8
+ <title>7.8. トークナイザー &#8212; Groonga v9.0.2ドキュメント</title>
10
9
  <link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
11
10
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
12
- <script type="text/javascript">
13
- var DOCUMENTATION_OPTIONS = {
14
- URL_ROOT: '../',
15
- VERSION: '7.1.0-73-g6d02cfa',
16
- COLLAPSE_INDEX: false,
17
- FILE_SUFFIX: '.html',
18
- HAS_SOURCE: false,
19
- SOURCELINK_SUFFIX: '.txt'
20
- };
21
- </script>
11
+
12
+ <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
22
13
  <script type="text/javascript" src="../_static/jquery.js"></script>
23
14
  <script type="text/javascript" src="../_static/underscore.js"></script>
24
15
  <script type="text/javascript" src="../_static/doctools.js"></script>
16
+ <script type="text/javascript" src="../_static/language_data.js"></script>
25
17
  <script type="text/javascript" src="../_static/translations.js"></script>
18
+
26
19
  <link rel="shortcut icon" href="../_static/favicon.ico"/>
27
20
  <link rel="index" title="索引" href="../genindex.html" />
28
21
  <link rel="search" title="検索" href="../search.html" />
29
- <link rel="next" title="7.9. トークンフィルター" href="token_filters.html" />
30
- <link rel="prev" title="7.7. ノーマライザー" href="normalizers.html" />
31
- </head>
32
- <body>
22
+ <link rel="next" title="7.8.1. 概要" href="tokenizer/summary.html" />
23
+ <link rel="prev" title="7.7.2.3. NormalizerNFKC51" href="normalizers/normalizer_nfkc51.html" />
24
+ </head><body>
33
25
  <div class="header">
34
26
  <h1 class="title">
35
27
  <a id="top-link" href="../index.html">
@@ -54,12 +46,12 @@
54
46
  <a href="../genindex.html" title="総合索引"
55
47
  accesskey="I">索引</a></li>
56
48
  <li class="right" >
57
- <a href="token_filters.html" title="7.9. トークンフィルター"
49
+ <a href="tokenizer/summary.html" title="7.8.1. 概要"
58
50
  accesskey="N">次へ</a> |</li>
59
51
  <li class="right" >
60
- <a href="normalizers.html" title="7.7. ノーマライザー"
52
+ <a href="normalizers/normalizer_nfkc51.html" title="7.7.2.3. NormalizerNFKC51"
61
53
  accesskey="P">前へ</a> |</li>
62
- <li class="nav-item nav-item-0"><a href="../index.html">Groonga v7.1.0-73-g6d02cfaドキュメント</a> &#187;</li>
54
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v9.0.2ドキュメント</a> &#187;</li>
63
55
  <li class="nav-item nav-item-1"><a href="../reference.html" accesskey="U">7. リファレンスマニュアル</a> &#187;</li>
64
56
  </ul>
65
57
  </div>
@@ -71,1297 +63,24 @@
71
63
 
72
64
  <div class="section" id="tokenizers">
73
65
  <h1>7.8. トークナイザー<a class="headerlink" href="#tokenizers" title="このヘッドラインへのパーマリンク">¶</a></h1>
74
- <div class="section" id="summary">
75
- <h2>7.8.1. 概要<a class="headerlink" href="#summary" title="このヘッドラインへのパーマリンク">¶</a></h2>
76
- <p>Groongaにはテキストをトークナイズするトークナイザーモージュールがあります。次のケースのときにトークナイザーを使います。</p>
77
- <blockquote>
78
- <div><ul>
79
- <li><p class="first">テキストのインデックスを構築するとき</p>
80
- <div class="figure align-center" id="id1">
81
- <a class="reference internal image-reference" href="../_images/used-when-indexing.png"><img alt="../_images/used-when-indexing.png" src="../_images/used-when-indexing.png" style="width: 80%;" /></a>
82
- <p class="caption"><span class="caption-text">テキストのインデックスを構築するときにトークナイザーを使います。</span></p>
83
- </div>
84
- </li>
85
- <li><p class="first">クエリーで検索するとき</p>
86
- <div class="figure align-center" id="id2">
87
- <a class="reference internal image-reference" href="../_images/used-when-searching.png"><img alt="../_images/used-when-searching.png" src="../_images/used-when-searching.png" style="width: 80%;" /></a>
88
- <p class="caption"><span class="caption-text">クエリーで検索するときにトークナイザーを使います。</span></p>
89
- </div>
90
- </li>
91
- </ul>
92
- </div></blockquote>
93
- <p>全文検索ではトークナイザーは重要なモジュールです。トークナイザーを変えることで <a class="reference external" href="http://ja.wikipedia.org/wiki/%E6%83%85%E5%A0%B1%E6%A4%9C%E7%B4%A2">適合率と再現率</a> のトレードオフを調整することができます。</p>
94
- <p>一般的に <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> が適切なトークナイザーです。トークナイザーについてよく知らない場合は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> を使うことをオススメします。</p>
95
- <p><a class="reference internal" href="commands/tokenize.html"><span class="doc">tokenize</span></a> コマンドと <a class="reference internal" href="commands/table_tokenize.html"><span class="doc">table_tokenize</span></a> コマンドを使うことでトークナイザーを試すことができます。 <a class="reference internal" href="commands/tokenize.html"><span class="doc">tokenize</span></a> コマンドを使って <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> トークナイザーを試す例を以下に示します。</p>
96
- <p>実行例:</p>
97
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello World&quot;
98
- # [
99
- # [
100
- # 0,
101
- # 1337566253.89858,
102
- # 0.000355720520019531
103
- # ],
104
- # [
105
- # {
106
- # &quot;position&quot;: 0,
107
- # &quot;force_prefix&quot;: false,
108
- # &quot;value&quot;: &quot;He&quot;
109
- # },
110
- # {
111
- # &quot;position&quot;: 1,
112
- # &quot;force_prefix&quot;: false,
113
- # &quot;value&quot;: &quot;el&quot;
114
- # },
115
- # {
116
- # &quot;position&quot;: 2,
117
- # &quot;force_prefix&quot;: false,
118
- # &quot;value&quot;: &quot;ll&quot;
119
- # },
120
- # {
121
- # &quot;position&quot;: 3,
122
- # &quot;force_prefix&quot;: false,
123
- # &quot;value&quot;: &quot;lo&quot;
124
- # },
125
- # {
126
- # &quot;position&quot;: 4,
127
- # &quot;force_prefix&quot;: false,
128
- # &quot;value&quot;: &quot;o &quot;
129
- # },
130
- # {
131
- # &quot;position&quot;: 5,
132
- # &quot;force_prefix&quot;: false,
133
- # &quot;value&quot;: &quot; W&quot;
134
- # },
135
- # {
136
- # &quot;position&quot;: 6,
137
- # &quot;force_prefix&quot;: false,
138
- # &quot;value&quot;: &quot;Wo&quot;
139
- # },
140
- # {
141
- # &quot;position&quot;: 7,
142
- # &quot;force_prefix&quot;: false,
143
- # &quot;value&quot;: &quot;or&quot;
144
- # },
145
- # {
146
- # &quot;position&quot;: 8,
147
- # &quot;force_prefix&quot;: false,
148
- # &quot;value&quot;: &quot;rl&quot;
149
- # },
150
- # {
151
- # &quot;position&quot;: 9,
152
- # &quot;force_prefix&quot;: false,
153
- # &quot;value&quot;: &quot;ld&quot;
154
- # },
155
- # {
156
- # &quot;position&quot;: 10,
157
- # &quot;force_prefix&quot;: false,
158
- # &quot;value&quot;: &quot;d&quot;
159
- # }
160
- # ]
161
- # ]
162
- </pre></div>
163
- </div>
164
- </div>
165
- <div class="section" id="what-is-tokenize">
166
- <h2>7.8.2. 「トークナイズ」とはなにか<a class="headerlink" href="#what-is-tokenize" title="このヘッドラインへのパーマリンク">¶</a></h2>
167
- <p>「トークナイズ」はテキストから0個以上のトークンを抽出する処理です。「トークナイズ」する方法はいくつかあります。</p>
168
- <p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
169
- <blockquote>
170
- <div><ul class="simple">
171
- <li><code class="docutils literal"><span class="pre">He</span></code></li>
172
- <li><code class="docutils literal"><span class="pre">el</span></code></li>
173
- <li><code class="docutils literal"><span class="pre">ll</span></code></li>
174
- <li><code class="docutils literal"><span class="pre">lo</span></code></li>
175
- <li><code class="docutils literal"><span class="pre">o_</span></code> ( <code class="docutils literal"><span class="pre">_</span></code> は空白文字という意味)</li>
176
- <li><code class="docutils literal"><span class="pre">_W</span></code> ( <code class="docutils literal"><span class="pre">_</span></code> は空白文字という意味)</li>
177
- <li><code class="docutils literal"><span class="pre">Wo</span></code></li>
178
- <li><code class="docutils literal"><span class="pre">or</span></code></li>
179
- <li><code class="docutils literal"><span class="pre">rl</span></code></li>
180
- <li><code class="docutils literal"><span class="pre">ld</span></code></li>
181
- </ul>
182
- </div></blockquote>
183
- <p>上記の例では、 <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> から10個のトークンを抽出しました。</p>
184
- <p>例えば、空白区切りのトークナイズ方法では <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
185
- <blockquote>
186
- <div><ul class="simple">
187
- <li><code class="docutils literal"><span class="pre">Hello</span></code></li>
188
- <li><code class="docutils literal"><span class="pre">World</span></code></li>
189
- </ul>
190
- </div></blockquote>
191
- <p>上記の例では、<code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> から2つのトークンを抽出しました。</p>
192
- <p>トークンは検索時のキーとして使われます。使用したトークナイズ方法で抽出したトークンでしかインデックス化されたドキュメントを探すことはできません。例えば、トークナイズ方法としてバイグラムを使った場合は <code class="docutils literal"><span class="pre">ll</span></code> で <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることができます。しかし、空白区切りのトークナイズ方法を使ったときは <code class="docutils literal"><span class="pre">ll</span></code> で <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることはできません。なぜなら、空白区切りのトークナイズ方法は <code class="docutils literal"><span class="pre">ll</span></code> というトークンを抽出していないからです。空白区切りのトークナイズ方法は <code class="docutils literal"><span class="pre">Hello</span></code> というトークンと <code class="docutils literal"><span class="pre">World</span></code> というトークンしか抽出していません。</p>
193
- <p>一般的に、小さいトークンを生成するトークナイズ方法は再現率が高い代わりに適合率が低くなりがちです。一方、大きいトークンを生成するトークナイズ方法は適合率が高い代わりに再現率が低くなりがちです。</p>
194
- <p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal"><span class="pre">or</span></code> で <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> と <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> を検索できます。しかし、「論理和」を検索したい人にとっては <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は不要な結果です。これは、適合率が下がったということです。しかし、再現率は上がっています。</p>
195
- <p>空白区切りのトークナイズ方法を使った場合は <code class="docutils literal"><span class="pre">or</span></code> で <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> だけが見つかります。なぜなら、空白区切りのトークナイズ方法では <code class="docutils literal"><span class="pre">World</span></code> は <code class="docutils literal"><span class="pre">World</span></code> という1つのトークンだけにトークナイズされるからです。これは、「論理和」を探したい人にとっては適合率が挙がっています。しかし、 <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> も <code class="docutils literal"><span class="pre">or</span></code> を含んでいるのに見つかっていないので再現率が下がっています。</p>
196
- </div>
197
- <div class="section" id="built-in-tokenizsers">
198
- <h2>7.8.3. 組み込みトークナイザー<a class="headerlink" href="#built-in-tokenizsers" title="このヘッドラインへのパーマリンク">¶</a></h2>
199
- <p>以下は組み込みのトークナイザーのリストです。</p>
200
- <blockquote>
201
- <div><ul class="simple">
202
- <li><code class="docutils literal"><span class="pre">TokenBigram</span></code></li>
203
- <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></li>
204
- <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></li>
205
- <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></li>
206
- <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></li>
207
- <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></li>
208
- <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></li>
209
- <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></li>
210
- <li><code class="docutils literal"><span class="pre">TokenUnigram</span></code></li>
211
- <li><code class="docutils literal"><span class="pre">TokenTrigram</span></code></li>
212
- <li><code class="docutils literal"><span class="pre">TokenDelimit</span></code></li>
213
- <li><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></li>
214
- <li><code class="docutils literal"><span class="pre">TokenMecab</span></code></li>
215
- <li><code class="docutils literal"><span class="pre">TokenRegexp</span></code></li>
216
- </ul>
217
- </div></blockquote>
218
- <div class="section" id="tokenbigram">
219
- <span id="token-bigram"></span><h3>7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code><a class="headerlink" href="#tokenbigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
220
- <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> はバイグラムベースのトークナイザーです。多くのケースでは、このトークナイザーを使うことをオススメします。</p>
221
- <p>バイグラムというトークナイズ方法は、隣り合った2つの文字を1つのトークンとしてテキストをトークナイズします。例えば、 <code class="docutils literal"><span class="pre">Hello</span></code> は次のトークンにトークナイズします。</p>
222
- <blockquote>
223
- <div><ul class="simple">
224
- <li><code class="docutils literal"><span class="pre">He</span></code></li>
225
- <li><code class="docutils literal"><span class="pre">el</span></code></li>
226
- <li><code class="docutils literal"><span class="pre">ll</span></code></li>
227
- <li><code class="docutils literal"><span class="pre">lo</span></code></li>
228
- </ul>
229
- </div></blockquote>
230
- <p>バイグラムというトークナイズ方法は再現性に優れています。なぜなら、2文字以上の文字のクエリーに対してはすべてのテキストを見つけることができるからです。</p>
231
- <p>一般的に、1文字のクエリーではすべてのテキストを見つけることはできません。なぜなら、1つの文字のトークンが存在しないからです。しかし、Groongaでは1文字のクエリーでもすべてのテキストを見つけることができます。なぜなら、Groongaは前方一致検索によりクエリーで指定した文字で始まるトークンをすべて見つけることができるからです。例えば、Groongaは <code class="docutils literal"><span class="pre">l</span></code> というクエリーから <code class="docutils literal"><span class="pre">ll</span></code> というトークンと <code class="docutils literal"><span class="pre">lo</span></code> というトークンを見つけることができます。</p>
232
- <p>バイグラムというトークナイズ方法は適合率はそれほど優れていません。なぜなら、単語の一部にクエリーが含まれていればすべてのテキストが見つかってしまうからです。例えば、 <code class="docutils literal"><span class="pre">or</span></code> で <code class="docutils literal"><span class="pre">world</span></code> が見つかります。これは非ASCIIを使う言語よりASCIIのみを使う言語で顕著です。以降の説明で触れる通り、 <code class="docutils literal"><span class="pre">TokenBigram</span></code> はこの問題を解決しています。</p>
233
- <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> の挙動は <a class="reference internal" href="normalizers.html"><span class="doc">ノーマライザー</span></a> を使うかどうかで変わります。</p>
234
- <p>ノーマライザーを使っていない場合は <code class="docutils literal"><span class="pre">TokenBigram</span></code> は純粋なバイグラム(最後のトークンをのぞいてすべてのトークンを2文字にする)のトークナイズ方法を使います。</p>
235
- <p>実行例:</p>
236
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello World&quot;
237
- # [
238
- # [
239
- # 0,
240
- # 1337566253.89858,
241
- # 0.000355720520019531
242
- # ],
243
- # [
244
- # {
245
- # &quot;position&quot;: 0,
246
- # &quot;force_prefix&quot;: false,
247
- # &quot;value&quot;: &quot;He&quot;
248
- # },
249
- # {
250
- # &quot;position&quot;: 1,
251
- # &quot;force_prefix&quot;: false,
252
- # &quot;value&quot;: &quot;el&quot;
253
- # },
254
- # {
255
- # &quot;position&quot;: 2,
256
- # &quot;force_prefix&quot;: false,
257
- # &quot;value&quot;: &quot;ll&quot;
258
- # },
259
- # {
260
- # &quot;position&quot;: 3,
261
- # &quot;force_prefix&quot;: false,
262
- # &quot;value&quot;: &quot;lo&quot;
263
- # },
264
- # {
265
- # &quot;position&quot;: 4,
266
- # &quot;force_prefix&quot;: false,
267
- # &quot;value&quot;: &quot;o &quot;
268
- # },
269
- # {
270
- # &quot;position&quot;: 5,
271
- # &quot;force_prefix&quot;: false,
272
- # &quot;value&quot;: &quot; W&quot;
273
- # },
274
- # {
275
- # &quot;position&quot;: 6,
276
- # &quot;force_prefix&quot;: false,
277
- # &quot;value&quot;: &quot;Wo&quot;
278
- # },
279
- # {
280
- # &quot;position&quot;: 7,
281
- # &quot;force_prefix&quot;: false,
282
- # &quot;value&quot;: &quot;or&quot;
283
- # },
284
- # {
285
- # &quot;position&quot;: 8,
286
- # &quot;force_prefix&quot;: false,
287
- # &quot;value&quot;: &quot;rl&quot;
288
- # },
289
- # {
290
- # &quot;position&quot;: 9,
291
- # &quot;force_prefix&quot;: false,
292
- # &quot;value&quot;: &quot;ld&quot;
293
- # },
294
- # {
295
- # &quot;position&quot;: 10,
296
- # &quot;force_prefix&quot;: false,
297
- # &quot;value&quot;: &quot;d&quot;
298
- # }
299
- # ]
300
- # ]
301
- </pre></div>
302
- </div>
303
- <p>ノーマライザーを使っている場合は <code class="docutils literal"><span class="pre">TokenBigram</span></code> はASCIIの文字には空白区切りのようなトークナイズ方法を使います。非ASCII文字にはバイグラムのトークナイズ方法を使います。</p>
304
- <p>もしかしたら、複数の方法が混ざったこの挙動はわかりにくいかもしれません。しかし、英語のテキスト(ASCII文字列のみ)や日本語テキスト(ASCII文字列と非ASCII文字列が混ざっている)ような多くのユースケースでは合理的な方法です。</p>
305
- <p>ASCII文字しか使わない多くの言語は単語の区切りに空白文字を使っています。このようなケースに空白区切りのトークナイズ方法は適切です。</p>
306
- <p>非ASCII文字を使う言語では単語の区切りに空白文字を使いません。このケースにはバイグラムなトークナイズ方法は適切です。</p>
307
- <p>複数の言語が混ざっている場合は、複数の方法を組み合わせたトークナイズ方法が適切です。</p>
308
- <p>ASCII文字にバイグラムなトークナイズ方法を使いたい場合は <a class="reference internal" href="#token-bigram-split-symbol-alpha"><span class="std std-ref">TokenBigramSplitSymbolAlpha</span></a> のような <code class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></code> というトークナイザーを参照してください。</p>
309
- <p>例を使いながら <code class="docutils literal"><span class="pre">TokenBigram</span></code> の挙動を確認しましょう。</p>
310
- <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> はASCII文字には1つ以上の空白文字をトークンの区切りとして使います。</p>
311
- <p>実行例:</p>
312
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello World&quot; NormalizerAuto
313
- # [
314
- # [
315
- # 0,
316
- # 1337566253.89858,
317
- # 0.000355720520019531
318
- # ],
319
- # [
320
- # {
321
- # &quot;position&quot;: 0,
322
- # &quot;force_prefix&quot;: false,
323
- # &quot;value&quot;: &quot;hello&quot;
324
- # },
325
- # {
326
- # &quot;position&quot;: 1,
327
- # &quot;force_prefix&quot;: false,
328
- # &quot;value&quot;: &quot;world&quot;
329
- # }
330
- # ]
331
- # ]
332
- </pre></div>
333
- </div>
334
- <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> はASCII文字には文字の種類が変わったところをトークンの区切りとします。文字の種類は次のどれかです。</p>
335
- <blockquote>
336
- <div><ul class="simple">
337
- <li>アルファベット</li>
338
- <li>数字</li>
339
- <li>記号(たとえば <code class="docutils literal"><span class="pre">(</span></code> 、 <code class="docutils literal"><span class="pre">)</span></code> 、 <code class="docutils literal"><span class="pre">!</span></code> など)</li>
340
- <li>ひらがな</li>
341
- <li>カタカナ</li>
342
- <li>漢字</li>
343
- <li>その他</li>
344
- </ul>
345
- </div></blockquote>
346
- <p>次の例は2つのトークン区切りを示しています。</p>
347
- <blockquote>
348
- <div><ul class="simple">
349
- <li><code class="docutils literal"><span class="pre">100</span></code> (数字)と <code class="docutils literal"><span class="pre">cents</span></code> (アルファベット)の間のところ</li>
350
- <li><code class="docutils literal"><span class="pre">cents</span></code> (アルファベット)と <code class="docutils literal"><span class="pre">!!!</span></code> (記号)の間のところ</li>
351
- </ul>
352
- </div></blockquote>
353
- <p>実行例:</p>
354
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;100cents!!!&quot; NormalizerAuto
355
- # [
356
- # [
357
- # 0,
358
- # 1337566253.89858,
359
- # 0.000355720520019531
360
- # ],
361
- # [
362
- # {
363
- # &quot;position&quot;: 0,
364
- # &quot;force_prefix&quot;: false,
365
- # &quot;value&quot;: &quot;100&quot;
366
- # },
367
- # {
368
- # &quot;position&quot;: 1,
369
- # &quot;force_prefix&quot;: false,
370
- # &quot;value&quot;: &quot;cents&quot;
371
- # },
372
- # {
373
- # &quot;position&quot;: 2,
374
- # &quot;force_prefix&quot;: false,
375
- # &quot;value&quot;: &quot;!!!&quot;
376
- # }
377
- # ]
378
- # ]
379
- </pre></div>
380
- </div>
381
- <p>以下は <code class="docutils literal"><span class="pre">TokenBigram</span></code> が非ASCII文字にはトークナイズ方法としてバイグラムを使う例です。</p>
382
- <p>実行例:</p>
383
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;日本語の勉強&quot; NormalizerAuto
384
- # [
385
- # [
386
- # 0,
387
- # 1337566253.89858,
388
- # 0.000355720520019531
389
- # ],
390
- # [
391
- # {
392
- # &quot;position&quot;: 0,
393
- # &quot;force_prefix&quot;: false,
394
- # &quot;value&quot;: &quot;日本&quot;
395
- # },
396
- # {
397
- # &quot;position&quot;: 1,
398
- # &quot;force_prefix&quot;: false,
399
- # &quot;value&quot;: &quot;本語&quot;
400
- # },
401
- # {
402
- # &quot;position&quot;: 2,
403
- # &quot;force_prefix&quot;: false,
404
- # &quot;value&quot;: &quot;語の&quot;
405
- # },
406
- # {
407
- # &quot;position&quot;: 3,
408
- # &quot;force_prefix&quot;: false,
409
- # &quot;value&quot;: &quot;の勉&quot;
410
- # },
411
- # {
412
- # &quot;position&quot;: 4,
413
- # &quot;force_prefix&quot;: false,
414
- # &quot;value&quot;: &quot;勉強&quot;
415
- # },
416
- # {
417
- # &quot;position&quot;: 5,
418
- # &quot;force_prefix&quot;: false,
419
- # &quot;value&quot;: &quot;強&quot;
420
- # }
421
- # ]
422
- # ]
423
- </pre></div>
424
- </div>
425
- </div>
426
- <div class="section" id="tokenbigramsplitsymbol">
427
- <span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code><a class="headerlink" href="#tokenbigramsplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
428
- <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは記号の扱いです。 <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code> は記号のトークナイズ方法にバイグラムを使います。</p>
429
- <p>実行例:</p>
430
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbol &quot;100cents!!!&quot; NormalizerAuto
431
- # [
432
- # [
433
- # 0,
434
- # 1337566253.89858,
435
- # 0.000355720520019531
436
- # ],
437
- # [
438
- # {
439
- # &quot;position&quot;: 0,
440
- # &quot;force_prefix&quot;: false,
441
- # &quot;value&quot;: &quot;100&quot;
442
- # },
443
- # {
444
- # &quot;position&quot;: 1,
445
- # &quot;force_prefix&quot;: false,
446
- # &quot;value&quot;: &quot;cents&quot;
447
- # },
448
- # {
449
- # &quot;position&quot;: 2,
450
- # &quot;force_prefix&quot;: false,
451
- # &quot;value&quot;: &quot;!!&quot;
452
- # },
453
- # {
454
- # &quot;position&quot;: 3,
455
- # &quot;force_prefix&quot;: false,
456
- # &quot;value&quot;: &quot;!!&quot;
457
- # },
458
- # {
459
- # &quot;position&quot;: 4,
460
- # &quot;force_prefix&quot;: false,
461
- # &quot;value&quot;: &quot;!&quot;
462
- # }
463
- # ]
464
- # ]
465
- </pre></div>
466
- </div>
467
- </div>
468
- <div class="section" id="tokenbigramsplitsymbolalpha">
469
- <span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
470
- <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは記号とアルファベットの扱いです。 <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> は記号とアルファベットのトークナイズ方法にバイグラムを使います。</p>
471
- <p>実行例:</p>
472
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbolAlpha &quot;100cents!!!&quot; NormalizerAuto
473
- # [
474
- # [
475
- # 0,
476
- # 1337566253.89858,
477
- # 0.000355720520019531
478
- # ],
479
- # [
480
- # {
481
- # &quot;position&quot;: 0,
482
- # &quot;force_prefix&quot;: false,
483
- # &quot;value&quot;: &quot;100&quot;
484
- # },
485
- # {
486
- # &quot;position&quot;: 1,
487
- # &quot;force_prefix&quot;: false,
488
- # &quot;value&quot;: &quot;ce&quot;
489
- # },
490
- # {
491
- # &quot;position&quot;: 2,
492
- # &quot;force_prefix&quot;: false,
493
- # &quot;value&quot;: &quot;en&quot;
494
- # },
495
- # {
496
- # &quot;position&quot;: 3,
497
- # &quot;force_prefix&quot;: false,
498
- # &quot;value&quot;: &quot;nt&quot;
499
- # },
500
- # {
501
- # &quot;position&quot;: 4,
502
- # &quot;force_prefix&quot;: false,
503
- # &quot;value&quot;: &quot;ts&quot;
504
- # },
505
- # {
506
- # &quot;position&quot;: 5,
507
- # &quot;force_prefix&quot;: false,
508
- # &quot;value&quot;: &quot;s!&quot;
509
- # },
510
- # {
511
- # &quot;position&quot;: 6,
512
- # &quot;force_prefix&quot;: false,
513
- # &quot;value&quot;: &quot;!!&quot;
514
- # },
515
- # {
516
- # &quot;position&quot;: 7,
517
- # &quot;force_prefix&quot;: false,
518
- # &quot;value&quot;: &quot;!!&quot;
519
- # },
520
- # {
521
- # &quot;position&quot;: 8,
522
- # &quot;force_prefix&quot;: false,
523
- # &quot;value&quot;: &quot;!&quot;
524
- # }
525
- # ]
526
- # ]
527
- </pre></div>
528
- </div>
529
- </div>
530
- <div class="section" id="tokenbigramsplitsymbolalphadigit">
531
- <span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
532
- <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは記号とアルファベットと数字の扱いです。 <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> は記号とアルファベット数字のトークナイズ方法にバイグラムを使います。つまり、すべての文字をバイグラムでトークナイズします。</p>
533
- <p>実行例:</p>
534
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbolAlphaDigit &quot;100cents!!!&quot; NormalizerAuto
535
- # [
536
- # [
537
- # 0,
538
- # 1337566253.89858,
539
- # 0.000355720520019531
540
- # ],
541
- # [
542
- # {
543
- # &quot;position&quot;: 0,
544
- # &quot;force_prefix&quot;: false,
545
- # &quot;value&quot;: &quot;10&quot;
546
- # },
547
- # {
548
- # &quot;position&quot;: 1,
549
- # &quot;force_prefix&quot;: false,
550
- # &quot;value&quot;: &quot;00&quot;
551
- # },
552
- # {
553
- # &quot;position&quot;: 2,
554
- # &quot;force_prefix&quot;: false,
555
- # &quot;value&quot;: &quot;0c&quot;
556
- # },
557
- # {
558
- # &quot;position&quot;: 3,
559
- # &quot;force_prefix&quot;: false,
560
- # &quot;value&quot;: &quot;ce&quot;
561
- # },
562
- # {
563
- # &quot;position&quot;: 4,
564
- # &quot;force_prefix&quot;: false,
565
- # &quot;value&quot;: &quot;en&quot;
566
- # },
567
- # {
568
- # &quot;position&quot;: 5,
569
- # &quot;force_prefix&quot;: false,
570
- # &quot;value&quot;: &quot;nt&quot;
571
- # },
572
- # {
573
- # &quot;position&quot;: 6,
574
- # &quot;force_prefix&quot;: false,
575
- # &quot;value&quot;: &quot;ts&quot;
576
- # },
577
- # {
578
- # &quot;position&quot;: 7,
579
- # &quot;force_prefix&quot;: false,
580
- # &quot;value&quot;: &quot;s!&quot;
581
- # },
582
- # {
583
- # &quot;position&quot;: 8,
584
- # &quot;force_prefix&quot;: false,
585
- # &quot;value&quot;: &quot;!!&quot;
586
- # },
587
- # {
588
- # &quot;position&quot;: 9,
589
- # &quot;force_prefix&quot;: false,
590
- # &quot;value&quot;: &quot;!!&quot;
591
- # },
592
- # {
593
- # &quot;position&quot;: 10,
594
- # &quot;force_prefix&quot;: false,
595
- # &quot;value&quot;: &quot;!&quot;
596
- # }
597
- # ]
598
- # ]
599
- </pre></div>
600
- </div>
601
- </div>
602
- <div class="section" id="tokenbigramignoreblank">
603
- <span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code><a class="headerlink" href="#tokenbigramignoreblank" title="このヘッドラインへのパーマリンク">¶</a></h3>
604
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは空白文字の扱いです。 <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> は連続する記号と非ASCII文字の間にある空白文字を無視します。</p>
605
- <p><code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
606
- <p><a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> での実行結果です。</p>
607
- <p>実行例:</p>
608
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
609
- # [
610
- # [
611
- # 0,
612
- # 1337566253.89858,
613
- # 0.000355720520019531
614
- # ],
615
- # [
616
- # {
617
- # &quot;position&quot;: 0,
618
- # &quot;force_prefix&quot;: false,
619
- # &quot;value&quot;: &quot;日&quot;
620
- # },
621
- # {
622
- # &quot;position&quot;: 1,
623
- # &quot;force_prefix&quot;: false,
624
- # &quot;value&quot;: &quot;本&quot;
625
- # },
626
- # {
627
- # &quot;position&quot;: 2,
628
- # &quot;force_prefix&quot;: false,
629
- # &quot;value&quot;: &quot;語&quot;
630
- # },
631
- # {
632
- # &quot;position&quot;: 3,
633
- # &quot;force_prefix&quot;: false,
634
- # &quot;value&quot;: &quot;!&quot;
635
- # },
636
- # {
637
- # &quot;position&quot;: 4,
638
- # &quot;force_prefix&quot;: false,
639
- # &quot;value&quot;: &quot;!&quot;
640
- # },
641
- # {
642
- # &quot;position&quot;: 5,
643
- # &quot;force_prefix&quot;: false,
644
- # &quot;value&quot;: &quot;!&quot;
645
- # }
646
- # ]
647
- # ]
648
- </pre></div>
649
- </div>
650
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> での実行結果です。</p>
651
- <p>実行例:</p>
652
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlank &quot;日 本 語 ! ! !&quot; NormalizerAuto
653
- # [
654
- # [
655
- # 0,
656
- # 1337566253.89858,
657
- # 0.000355720520019531
658
- # ],
659
- # [
660
- # {
661
- # &quot;position&quot;: 0,
662
- # &quot;force_prefix&quot;: false,
663
- # &quot;value&quot;: &quot;日本&quot;
664
- # },
665
- # {
666
- # &quot;position&quot;: 1,
667
- # &quot;force_prefix&quot;: false,
668
- # &quot;value&quot;: &quot;本語&quot;
669
- # },
670
- # {
671
- # &quot;position&quot;: 2,
672
- # &quot;force_prefix&quot;: false,
673
- # &quot;value&quot;: &quot;語&quot;
674
- # },
675
- # {
676
- # &quot;position&quot;: 3,
677
- # &quot;force_prefix&quot;: false,
678
- # &quot;value&quot;: &quot;!!!&quot;
679
- # }
680
- # ]
681
- # ]
682
- </pre></div>
683
- </div>
684
- </div>
685
- <div class="section" id="tokenbigramignoreblanksplitsymbol">
686
- <span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
687
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは次の通りです。</p>
688
- <blockquote>
689
- <div><ul class="simple">
690
- <li>空白文字の扱い</li>
691
- <li>記号の扱い</li>
692
- </ul>
693
- </div></blockquote>
694
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
695
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> は記号をバイグラムでトークナイズします。</p>
696
- <p><code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
697
- <p><a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> での実行結果です。</p>
698
- <p>実行例:</p>
699
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
700
- # [
701
- # [
702
- # 0,
703
- # 1337566253.89858,
704
- # 0.000355720520019531
705
- # ],
706
- # [
707
- # {
708
- # &quot;position&quot;: 0,
709
- # &quot;force_prefix&quot;: false,
710
- # &quot;value&quot;: &quot;日&quot;
711
- # },
712
- # {
713
- # &quot;position&quot;: 1,
714
- # &quot;force_prefix&quot;: false,
715
- # &quot;value&quot;: &quot;本&quot;
716
- # },
717
- # {
718
- # &quot;position&quot;: 2,
719
- # &quot;force_prefix&quot;: false,
720
- # &quot;value&quot;: &quot;語&quot;
721
- # },
722
- # {
723
- # &quot;position&quot;: 3,
724
- # &quot;force_prefix&quot;: false,
725
- # &quot;value&quot;: &quot;!&quot;
726
- # },
727
- # {
728
- # &quot;position&quot;: 4,
729
- # &quot;force_prefix&quot;: false,
730
- # &quot;value&quot;: &quot;!&quot;
731
- # },
732
- # {
733
- # &quot;position&quot;: 5,
734
- # &quot;force_prefix&quot;: false,
735
- # &quot;value&quot;: &quot;!&quot;
736
- # }
737
- # ]
738
- # ]
739
- </pre></div>
740
- </div>
741
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> の実行結果です。</p>
742
- <p>実行例:</p>
743
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbol &quot;日 本 語 ! ! !&quot; NormalizerAuto
744
- # [
745
- # [
746
- # 0,
747
- # 1337566253.89858,
748
- # 0.000355720520019531
749
- # ],
750
- # [
751
- # {
752
- # &quot;position&quot;: 0,
753
- # &quot;force_prefix&quot;: false,
754
- # &quot;value&quot;: &quot;日本&quot;
755
- # },
756
- # {
757
- # &quot;position&quot;: 1,
758
- # &quot;force_prefix&quot;: false,
759
- # &quot;value&quot;: &quot;本語&quot;
760
- # },
761
- # {
762
- # &quot;position&quot;: 2,
763
- # &quot;force_prefix&quot;: false,
764
- # &quot;value&quot;: &quot;語!&quot;
765
- # },
766
- # {
767
- # &quot;position&quot;: 3,
768
- # &quot;force_prefix&quot;: false,
769
- # &quot;value&quot;: &quot;!!&quot;
770
- # },
771
- # {
772
- # &quot;position&quot;: 4,
773
- # &quot;force_prefix&quot;: false,
774
- # &quot;value&quot;: &quot;!!&quot;
775
- # },
776
- # {
777
- # &quot;position&quot;: 5,
778
- # &quot;force_prefix&quot;: false,
779
- # &quot;value&quot;: &quot;!&quot;
780
- # }
781
- # ]
782
- # ]
783
- </pre></div>
784
- </div>
785
- </div>
786
- <div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
787
- <span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
788
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは次の通りです。</p>
789
- <blockquote>
790
- <div><ul class="simple">
791
- <li>空白文字の扱い</li>
792
- <li>記号とアルファベットの扱い</li>
793
- </ul>
794
- </div></blockquote>
795
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
796
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> は記号とアルファベットをバイグラムでトークナイズします。</p>
797
- <p><code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> というテキストを使うと違いがわかります。なぜなら空白文字入りの記号と非ASCII文字だけでなく、アルファベットも含んでいるからです。</p>
798
- <p><a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> での実行結果です。</p>
799
- <p>実行例:</p>
800
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
801
- # [
802
- # [
803
- # 0,
804
- # 1337566253.89858,
805
- # 0.000355720520019531
806
- # ],
807
- # [
808
- # {
809
- # &quot;position&quot;: 0,
810
- # &quot;force_prefix&quot;: false,
811
- # &quot;value&quot;: &quot;hello&quot;
812
- # },
813
- # {
814
- # &quot;position&quot;: 1,
815
- # &quot;force_prefix&quot;: false,
816
- # &quot;value&quot;: &quot;日&quot;
817
- # },
818
- # {
819
- # &quot;position&quot;: 2,
820
- # &quot;force_prefix&quot;: false,
821
- # &quot;value&quot;: &quot;本&quot;
822
- # },
823
- # {
824
- # &quot;position&quot;: 3,
825
- # &quot;force_prefix&quot;: false,
826
- # &quot;value&quot;: &quot;語&quot;
827
- # },
828
- # {
829
- # &quot;position&quot;: 4,
830
- # &quot;force_prefix&quot;: false,
831
- # &quot;value&quot;: &quot;!&quot;
832
- # },
833
- # {
834
- # &quot;position&quot;: 5,
835
- # &quot;force_prefix&quot;: false,
836
- # &quot;value&quot;: &quot;!&quot;
837
- # },
838
- # {
839
- # &quot;position&quot;: 6,
840
- # &quot;force_prefix&quot;: false,
841
- # &quot;value&quot;: &quot;!&quot;
842
- # }
843
- # ]
844
- # ]
845
- </pre></div>
846
- </div>
847
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> の実行結果です。</p>
848
- <p>実行例:</p>
849
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
850
- # [
851
- # [
852
- # 0,
853
- # 1337566253.89858,
854
- # 0.000355720520019531
855
- # ],
856
- # [
857
- # {
858
- # &quot;position&quot;: 0,
859
- # &quot;force_prefix&quot;: false,
860
- # &quot;value&quot;: &quot;he&quot;
861
- # },
862
- # {
863
- # &quot;position&quot;: 1,
864
- # &quot;force_prefix&quot;: false,
865
- # &quot;value&quot;: &quot;el&quot;
866
- # },
867
- # {
868
- # &quot;position&quot;: 2,
869
- # &quot;force_prefix&quot;: false,
870
- # &quot;value&quot;: &quot;ll&quot;
871
- # },
872
- # {
873
- # &quot;position&quot;: 3,
874
- # &quot;force_prefix&quot;: false,
875
- # &quot;value&quot;: &quot;lo&quot;
876
- # },
877
- # {
878
- # &quot;position&quot;: 4,
879
- # &quot;force_prefix&quot;: false,
880
- # &quot;value&quot;: &quot;o日&quot;
881
- # },
882
- # {
883
- # &quot;position&quot;: 5,
884
- # &quot;force_prefix&quot;: false,
885
- # &quot;value&quot;: &quot;日本&quot;
886
- # },
887
- # {
888
- # &quot;position&quot;: 6,
889
- # &quot;force_prefix&quot;: false,
890
- # &quot;value&quot;: &quot;本語&quot;
891
- # },
892
- # {
893
- # &quot;position&quot;: 7,
894
- # &quot;force_prefix&quot;: false,
895
- # &quot;value&quot;: &quot;語!&quot;
896
- # },
897
- # {
898
- # &quot;position&quot;: 8,
899
- # &quot;force_prefix&quot;: false,
900
- # &quot;value&quot;: &quot;!!&quot;
901
- # },
902
- # {
903
- # &quot;position&quot;: 9,
904
- # &quot;force_prefix&quot;: false,
905
- # &quot;value&quot;: &quot;!!&quot;
906
- # },
907
- # {
908
- # &quot;position&quot;: 10,
909
- # &quot;force_prefix&quot;: false,
910
- # &quot;value&quot;: &quot;!&quot;
911
- # }
912
- # ]
913
- # ]
914
- </pre></div>
915
- </div>
916
- </div>
917
- <div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
918
- <span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
919
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは次の通りです。</p>
920
- <blockquote>
921
- <div><ul class="simple">
922
- <li>空白文字の扱い</li>
923
- <li>記号とアルファベットと数字の扱い</li>
66
+ <div class="toctree-wrapper compound">
67
+ <ul>
68
+ <li class="toctree-l1"><a class="reference internal" href="tokenizer/summary.html">7.8.1. 概要</a></li>
69
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram.html">7.8.2. <code class="docutils literal notranslate"><span class="pre">TokenBigram</span></code></a></li>
70
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank.html">7.8.3. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
71
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol.html">7.8.4. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
72
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html">7.8.5. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
73
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html">7.8.6. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
74
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol.html">7.8.7. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
75
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol_alpha.html">7.8.8. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
76
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol_alpha_digit.html">7.8.9. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
77
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_delimit.html">7.8.10. <code class="docutils literal notranslate"><span class="pre">TokenDelimit</span></code></a></li>
78
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_delimit_null.html">7.8.11. <code class="docutils literal notranslate"><span class="pre">TokenDelimitNull</span></code></a></li>
79
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_mecab.html">7.8.12. <code class="docutils literal notranslate"><span class="pre">TokenMecab</span></code></a></li>
80
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_regexp.html">7.8.13. <code class="docutils literal notranslate"><span class="pre">TokenRegexp</span></code></a></li>
81
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_trigram.html">7.8.14. <code class="docutils literal notranslate"><span class="pre">TokenTrigram</span></code></a></li>
82
+ <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_unigram.html">7.8.15. <code class="docutils literal notranslate"><span class="pre">TokenUnigram</span></code></a></li>
924
83
  </ul>
925
- </div></blockquote>
926
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
927
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> は記号、アルファベット、数字をバイグラムでトークナイズします。つまり、すべての文字をバイグラムでトークナイズします。</p>
928
- <p><code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></code> というテキストを使うと違いがわかります。なぜなら、このテキストは空白文字入りの記号と非ASCII文字だけでなく、アルファベットと数字も含んでいるからです。</p>
929
- <p><a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> での実行結果です。</p>
930
- <p>実行例:</p>
931
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
932
- # [
933
- # [
934
- # 0,
935
- # 1337566253.89858,
936
- # 0.000355720520019531
937
- # ],
938
- # [
939
- # {
940
- # &quot;position&quot;: 0,
941
- # &quot;force_prefix&quot;: false,
942
- # &quot;value&quot;: &quot;hello&quot;
943
- # },
944
- # {
945
- # &quot;position&quot;: 1,
946
- # &quot;force_prefix&quot;: false,
947
- # &quot;value&quot;: &quot;日&quot;
948
- # },
949
- # {
950
- # &quot;position&quot;: 2,
951
- # &quot;force_prefix&quot;: false,
952
- # &quot;value&quot;: &quot;本&quot;
953
- # },
954
- # {
955
- # &quot;position&quot;: 3,
956
- # &quot;force_prefix&quot;: false,
957
- # &quot;value&quot;: &quot;語&quot;
958
- # },
959
- # {
960
- # &quot;position&quot;: 4,
961
- # &quot;force_prefix&quot;: false,
962
- # &quot;value&quot;: &quot;!&quot;
963
- # },
964
- # {
965
- # &quot;position&quot;: 5,
966
- # &quot;force_prefix&quot;: false,
967
- # &quot;value&quot;: &quot;!&quot;
968
- # },
969
- # {
970
- # &quot;position&quot;: 6,
971
- # &quot;force_prefix&quot;: false,
972
- # &quot;value&quot;: &quot;!&quot;
973
- # },
974
- # {
975
- # &quot;position&quot;: 7,
976
- # &quot;force_prefix&quot;: false,
977
- # &quot;value&quot;: &quot;777&quot;
978
- # }
979
- # ]
980
- # ]
981
- </pre></div>
982
- </div>
983
- <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> の実行結果です。</p>
984
- <p>実行例:</p>
985
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
986
- # [
987
- # [
988
- # 0,
989
- # 1337566253.89858,
990
- # 0.000355720520019531
991
- # ],
992
- # [
993
- # {
994
- # &quot;position&quot;: 0,
995
- # &quot;force_prefix&quot;: false,
996
- # &quot;value&quot;: &quot;he&quot;
997
- # },
998
- # {
999
- # &quot;position&quot;: 1,
1000
- # &quot;force_prefix&quot;: false,
1001
- # &quot;value&quot;: &quot;el&quot;
1002
- # },
1003
- # {
1004
- # &quot;position&quot;: 2,
1005
- # &quot;force_prefix&quot;: false,
1006
- # &quot;value&quot;: &quot;ll&quot;
1007
- # },
1008
- # {
1009
- # &quot;position&quot;: 3,
1010
- # &quot;force_prefix&quot;: false,
1011
- # &quot;value&quot;: &quot;lo&quot;
1012
- # },
1013
- # {
1014
- # &quot;position&quot;: 4,
1015
- # &quot;force_prefix&quot;: false,
1016
- # &quot;value&quot;: &quot;o日&quot;
1017
- # },
1018
- # {
1019
- # &quot;position&quot;: 5,
1020
- # &quot;force_prefix&quot;: false,
1021
- # &quot;value&quot;: &quot;日本&quot;
1022
- # },
1023
- # {
1024
- # &quot;position&quot;: 6,
1025
- # &quot;force_prefix&quot;: false,
1026
- # &quot;value&quot;: &quot;本語&quot;
1027
- # },
1028
- # {
1029
- # &quot;position&quot;: 7,
1030
- # &quot;force_prefix&quot;: false,
1031
- # &quot;value&quot;: &quot;語!&quot;
1032
- # },
1033
- # {
1034
- # &quot;position&quot;: 8,
1035
- # &quot;force_prefix&quot;: false,
1036
- # &quot;value&quot;: &quot;!!&quot;
1037
- # },
1038
- # {
1039
- # &quot;position&quot;: 9,
1040
- # &quot;force_prefix&quot;: false,
1041
- # &quot;value&quot;: &quot;!!&quot;
1042
- # },
1043
- # {
1044
- # &quot;position&quot;: 10,
1045
- # &quot;force_prefix&quot;: false,
1046
- # &quot;value&quot;: &quot;!7&quot;
1047
- # },
1048
- # {
1049
- # &quot;position&quot;: 11,
1050
- # &quot;force_prefix&quot;: false,
1051
- # &quot;value&quot;: &quot;77&quot;
1052
- # },
1053
- # {
1054
- # &quot;position&quot;: 12,
1055
- # &quot;force_prefix&quot;: false,
1056
- # &quot;value&quot;: &quot;77&quot;
1057
- # },
1058
- # {
1059
- # &quot;position&quot;: 13,
1060
- # &quot;force_prefix&quot;: false,
1061
- # &quot;value&quot;: &quot;7&quot;
1062
- # }
1063
- # ]
1064
- # ]
1065
- </pre></div>
1066
- </div>
1067
- </div>
1068
- <div class="section" id="tokenunigram">
1069
- <span id="token-unigram"></span><h3>7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code><a class="headerlink" href="#tokenunigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
1070
- <p><code class="docutils literal"><span class="pre">TokenUnigram</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> は各トークンが2文字ですが、 <code class="docutils literal"><span class="pre">TokenUnigram</span></code> は各トークンが1文字です。</p>
1071
- <p>実行例:</p>
1072
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenUnigram &quot;100cents!!!&quot; NormalizerAuto
1073
- # [
1074
- # [
1075
- # 0,
1076
- # 1337566253.89858,
1077
- # 0.000355720520019531
1078
- # ],
1079
- # [
1080
- # {
1081
- # &quot;position&quot;: 0,
1082
- # &quot;force_prefix&quot;: false,
1083
- # &quot;value&quot;: &quot;100&quot;
1084
- # },
1085
- # {
1086
- # &quot;position&quot;: 1,
1087
- # &quot;force_prefix&quot;: false,
1088
- # &quot;value&quot;: &quot;cents&quot;
1089
- # },
1090
- # {
1091
- # &quot;position&quot;: 2,
1092
- # &quot;force_prefix&quot;: false,
1093
- # &quot;value&quot;: &quot;!!!&quot;
1094
- # }
1095
- # ]
1096
- # ]
1097
- </pre></div>
1098
- </div>
1099
- </div>
1100
- <div class="section" id="tokentrigram">
1101
- <span id="token-trigram"></span><h3>7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code><a class="headerlink" href="#tokentrigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
1102
- <p><code class="docutils literal"><span class="pre">TokenTrigram</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> は各トークンが2文字ですが、 <code class="docutils literal"><span class="pre">TokenTrigram</span></code> は各トークンが3文字です。</p>
1103
- <p>実行例:</p>
1104
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenTrigram &quot;10000cents!!!!!&quot; NormalizerAuto
1105
- # [
1106
- # [
1107
- # 0,
1108
- # 1337566253.89858,
1109
- # 0.000355720520019531
1110
- # ],
1111
- # [
1112
- # {
1113
- # &quot;position&quot;: 0,
1114
- # &quot;force_prefix&quot;: false,
1115
- # &quot;value&quot;: &quot;10000&quot;
1116
- # },
1117
- # {
1118
- # &quot;position&quot;: 1,
1119
- # &quot;force_prefix&quot;: false,
1120
- # &quot;value&quot;: &quot;cents&quot;
1121
- # },
1122
- # {
1123
- # &quot;position&quot;: 2,
1124
- # &quot;force_prefix&quot;: false,
1125
- # &quot;value&quot;: &quot;!!!!!&quot;
1126
- # }
1127
- # ]
1128
- # ]
1129
- </pre></div>
1130
- </div>
1131
- </div>
1132
- <div class="section" id="tokendelimit">
1133
- <span id="token-delimit"></span><h3>7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code><a class="headerlink" href="#tokendelimit" title="このヘッドラインへのパーマリンク">¶</a></h3>
1134
- <p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> は1つ以上の空白文字( <code class="docutils literal"><span class="pre">U+0020</span></code> )で分割してトークンを抽出します。たとえば、 <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は <code class="docutils literal"><span class="pre">Hello</span></code> と <code class="docutils literal"><span class="pre">World</span></code> にトークナイズされます。</p>
1135
- <p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> はタグテキストに適切です。 <code class="docutils literal"><span class="pre">groonga</span> <span class="pre">full-text-search</span> <span class="pre">http</span></code> というテキストから <code class="docutils literal"><span class="pre">groonga</span></code> 、 <code class="docutils literal"><span class="pre">full-text-search</span></code> 、 <code class="docutils literal"><span class="pre">http</span></code> を抽出します。</p>
1136
- <p>以下は <code class="docutils literal"><span class="pre">TokenDelimit</span></code> の例です。</p>
1137
- <p>実行例:</p>
1138
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenDelimit &quot;Groonga full-text-search HTTP&quot; NormalizerAuto
1139
- # [
1140
- # [
1141
- # 0,
1142
- # 1337566253.89858,
1143
- # 0.000355720520019531
1144
- # ],
1145
- # [
1146
- # {
1147
- # &quot;position&quot;: 0,
1148
- # &quot;force_prefix&quot;: false,
1149
- # &quot;value&quot;: &quot;groonga&quot;
1150
- # },
1151
- # {
1152
- # &quot;position&quot;: 1,
1153
- # &quot;force_prefix&quot;: false,
1154
- # &quot;value&quot;: &quot;full-text-search&quot;
1155
- # },
1156
- # {
1157
- # &quot;position&quot;: 2,
1158
- # &quot;force_prefix&quot;: false,
1159
- # &quot;value&quot;: &quot;http&quot;
1160
- # }
1161
- # ]
1162
- # ]
1163
- </pre></div>
1164
- </div>
1165
- </div>
1166
- <div class="section" id="tokendelimitnull">
1167
- <span id="token-delimit-null"></span><h3>7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code><a class="headerlink" href="#tokendelimitnull" title="このヘッドラインへのパーマリンク">¶</a></h3>
1168
- <p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> は <a class="reference internal" href="#token-delimit"><span class="std std-ref">TokenDelimit</span></a> に似ています。違いは区切り文字です。 <a class="reference internal" href="#token-delimit"><span class="std std-ref">TokenDelimit</span></a> は空白文字( <code class="docutils literal"><span class="pre">U+0020</span></code> )を使いますが、 <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> はNUL文字( <code class="docutils literal"><span class="pre">U+0000</span></code> )を使います。</p>
1169
- <p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> もタグテキストに適切です。</p>
1170
- <p>以下は <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> の例です。</p>
1171
- <p>実行例:</p>
1172
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenDelimitNull &quot;Groonga\u0000full-text-search\u0000HTTP&quot; NormalizerAuto
1173
- # [
1174
- # [
1175
- # 0,
1176
- # 1337566253.89858,
1177
- # 0.000355720520019531
1178
- # ],
1179
- # [
1180
- # {
1181
- # &quot;position&quot;: 0,
1182
- # &quot;force_prefix&quot;: false,
1183
- # &quot;value&quot;: &quot;groongau0000full-text-searchu0000http&quot;
1184
- # }
1185
- # ]
1186
- # ]
1187
- </pre></div>
1188
- </div>
1189
- </div>
1190
- <div class="section" id="tokenmecab">
1191
- <span id="token-mecab"></span><h3>7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code><a class="headerlink" href="#tokenmecab" title="このヘッドラインへのパーマリンク">¶</a></h3>
1192
- <p><code class="docutils literal"><span class="pre">TokenMecab</span></code> は <a class="reference external" href="https://taku910.github.io/mecab/">MeCab</a> 形態素解析器をベースにしたトークナイザーです。</p>
1193
- <p>MeCabは日本語に依存していません。その言語用の辞書を用意すれば日本語以外でもMeCabを使えます。日本語用の辞書には <a class="reference external" href="http://sourceforge.jp/projects/naist-jdic/">NAIST Japanese Dictionary</a> を使えます。</p>
1194
- <p><code class="docutils literal"><span class="pre">TokenMecab</span></code> は再現率より適合率に優れています。 <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> では <code class="docutils literal"><span class="pre">京都</span></code> というクエリーで <code class="docutils literal"><span class="pre">東京都</span></code> も <code class="docutils literal"><span class="pre">京都</span></code> も見つかりますが、この場合は <code class="docutils literal"><span class="pre">東京都</span></code> は期待した結果ではありません。 <code class="docutils literal"><span class="pre">TokenMecab</span></code> を使うと <code class="docutils literal"><span class="pre">京都</span></code> というクエリーで <code class="docutils literal"><span class="pre">京都</span></code> だけを見つけられます。</p>
1195
- <p>新語をサポートしたい場合は、MeCabの辞書を更新し続ける必要があります。これはメンテナンスコストがかかります。( <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> には辞書のメンテナンスコストはありません。なぜなら、 <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> は辞書を使っていないからです。)新語への対応に <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> が役に立つかもしれません。</p>
1196
- <p>以下は <code class="docutils literal"><span class="pre">TokenMeCab</span></code> の例です。 <code class="docutils literal"><span class="pre">東京都</span></code> は <code class="docutils literal"><span class="pre">東京</span></code> と <code class="docutils literal"><span class="pre">都</span></code> にトークナイズされています。 <code class="docutils literal"><span class="pre">京都</span></code> というトークンはありません。</p>
1197
- <p>実行例:</p>
1198
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenMecab &quot;東京都&quot;
1199
- # [
1200
- # [
1201
- # -22,
1202
- # 1337566253.89858,
1203
- # 0.000355720520019531,
1204
- # &quot;[tokenize] nonexistent tokenizer: &lt;TokenMecab&gt;&quot;,
1205
- # [
1206
- # [
1207
- # &quot;create_lexicon_for_tokenize&quot;,
1208
- # &quot;proc_tokenize.c&quot;,
1209
- # 139
1210
- # ]
1211
- # ]
1212
- # ]
1213
- # ]
1214
- </pre></div>
1215
- </div>
1216
- </div>
1217
- <div class="section" id="tokenregexp">
1218
- <span id="token-regexp"></span><h3>7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code><a class="headerlink" href="#tokenregexp" title="このヘッドラインへのパーマリンク">¶</a></h3>
1219
- <div class="versionadded">
1220
- <p><span class="versionmodified">バージョン 5.0.1 で追加.</span></p>
1221
- </div>
1222
- <div class="admonition caution">
1223
- <p class="first admonition-title">ご用心</p>
1224
- <p class="last">このトークナイザーは実験的です。仕様が変わる可能性があります。</p>
1225
- </div>
1226
- <div class="admonition caution">
1227
- <p class="first admonition-title">ご用心</p>
1228
- <p class="last">このトークナイザーはUTF-8でしか使えません。EUC-JPやShift_JISなどと一緒には使えません。</p>
1229
- </div>
1230
- <p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> はインデックスを使った正規表現検索をサポートするトークナイザーです。</p>
1231
- <p>一般的に、正規表現検索は逐次検索で実行します。しかし、次のケースはインデックスを使って検索できます。</p>
1232
- <blockquote>
1233
- <div><ul class="simple">
1234
- <li><code class="docutils literal"><span class="pre">hello</span></code> のようにリテラルしかないケース</li>
1235
- <li><code class="docutils literal"><span class="pre">\A/home/alice</span></code> のようにテキストの最初でのマッチとリテラルのみのケース</li>
1236
- <li><code class="docutils literal"><span class="pre">\.txt\z</span></code> のようにテキストの最後でのマッチとリテラルのみのケース</li>
1237
- </ul>
1238
- </div></blockquote>
1239
- <p>多くのケースでは、逐次検索よりもインデックスを使った検索の方が高速です。</p>
1240
- <p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> はベースはバイグラムを使います。 <code class="docutils literal"><span class="pre">TokenRegexp</span></code> は、インデックス時に、テキストの先頭にテキストの先頭であるというマーク( <code class="docutils literal"><span class="pre">U+FFEF</span></code> )を入れ、テキストの最後にテキストの最後であるというマーク( <code class="docutils literal"><span class="pre">U+FFF0</span></code> )を入れます。</p>
1241
- <p>実行例:</p>
1242
- <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenRegexp &quot;/home/alice/test.txt&quot; NormalizerAuto --mode ADD
1243
- # [
1244
- # [
1245
- # 0,
1246
- # 1337566253.89858,
1247
- # 0.000355720520019531
1248
- # ],
1249
- # [
1250
- # {
1251
- # &quot;position&quot;: 0,
1252
- # &quot;force_prefix&quot;: false,
1253
- # &quot;value&quot;: &quot;￯&quot;
1254
- # },
1255
- # {
1256
- # &quot;position&quot;: 1,
1257
- # &quot;force_prefix&quot;: false,
1258
- # &quot;value&quot;: &quot;/h&quot;
1259
- # },
1260
- # {
1261
- # &quot;position&quot;: 2,
1262
- # &quot;force_prefix&quot;: false,
1263
- # &quot;value&quot;: &quot;ho&quot;
1264
- # },
1265
- # {
1266
- # &quot;position&quot;: 3,
1267
- # &quot;force_prefix&quot;: false,
1268
- # &quot;value&quot;: &quot;om&quot;
1269
- # },
1270
- # {
1271
- # &quot;position&quot;: 4,
1272
- # &quot;force_prefix&quot;: false,
1273
- # &quot;value&quot;: &quot;me&quot;
1274
- # },
1275
- # {
1276
- # &quot;position&quot;: 5,
1277
- # &quot;force_prefix&quot;: false,
1278
- # &quot;value&quot;: &quot;e/&quot;
1279
- # },
1280
- # {
1281
- # &quot;position&quot;: 6,
1282
- # &quot;force_prefix&quot;: false,
1283
- # &quot;value&quot;: &quot;/a&quot;
1284
- # },
1285
- # {
1286
- # &quot;position&quot;: 7,
1287
- # &quot;force_prefix&quot;: false,
1288
- # &quot;value&quot;: &quot;al&quot;
1289
- # },
1290
- # {
1291
- # &quot;position&quot;: 8,
1292
- # &quot;force_prefix&quot;: false,
1293
- # &quot;value&quot;: &quot;li&quot;
1294
- # },
1295
- # {
1296
- # &quot;position&quot;: 9,
1297
- # &quot;force_prefix&quot;: false,
1298
- # &quot;value&quot;: &quot;ic&quot;
1299
- # },
1300
- # {
1301
- # &quot;position&quot;: 10,
1302
- # &quot;force_prefix&quot;: false,
1303
- # &quot;value&quot;: &quot;ce&quot;
1304
- # },
1305
- # {
1306
- # &quot;position&quot;: 11,
1307
- # &quot;force_prefix&quot;: false,
1308
- # &quot;value&quot;: &quot;e/&quot;
1309
- # },
1310
- # {
1311
- # &quot;position&quot;: 12,
1312
- # &quot;force_prefix&quot;: false,
1313
- # &quot;value&quot;: &quot;/t&quot;
1314
- # },
1315
- # {
1316
- # &quot;position&quot;: 13,
1317
- # &quot;force_prefix&quot;: false,
1318
- # &quot;value&quot;: &quot;te&quot;
1319
- # },
1320
- # {
1321
- # &quot;position&quot;: 14,
1322
- # &quot;force_prefix&quot;: false,
1323
- # &quot;value&quot;: &quot;es&quot;
1324
- # },
1325
- # {
1326
- # &quot;position&quot;: 15,
1327
- # &quot;force_prefix&quot;: false,
1328
- # &quot;value&quot;: &quot;st&quot;
1329
- # },
1330
- # {
1331
- # &quot;position&quot;: 16,
1332
- # &quot;force_prefix&quot;: false,
1333
- # &quot;value&quot;: &quot;t.&quot;
1334
- # },
1335
- # {
1336
- # &quot;position&quot;: 17,
1337
- # &quot;force_prefix&quot;: false,
1338
- # &quot;value&quot;: &quot;.t&quot;
1339
- # },
1340
- # {
1341
- # &quot;position&quot;: 18,
1342
- # &quot;force_prefix&quot;: false,
1343
- # &quot;value&quot;: &quot;tx&quot;
1344
- # },
1345
- # {
1346
- # &quot;position&quot;: 19,
1347
- # &quot;force_prefix&quot;: false,
1348
- # &quot;value&quot;: &quot;xt&quot;
1349
- # },
1350
- # {
1351
- # &quot;position&quot;: 20,
1352
- # &quot;force_prefix&quot;: false,
1353
- # &quot;value&quot;: &quot;t&quot;
1354
- # },
1355
- # {
1356
- # &quot;position&quot;: 21,
1357
- # &quot;force_prefix&quot;: false,
1358
- # &quot;value&quot;: &quot;￰&quot;
1359
- # }
1360
- # ]
1361
- # ]
1362
- </pre></div>
1363
- </div>
1364
- </div>
1365
84
  </div>
1366
85
  </div>
1367
86
 
@@ -1371,46 +90,20 @@
1371
90
  </div>
1372
91
  <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
1373
92
  <div class="sphinxsidebarwrapper">
1374
- <h3><a href="../index.html">目次</a></h3>
1375
- <ul>
1376
- <li><a class="reference internal" href="#">7.8. トークナイザー</a><ul>
1377
- <li><a class="reference internal" href="#summary">7.8.1. 概要</a></li>
1378
- <li><a class="reference internal" href="#what-is-tokenize">7.8.2. 「トークナイズ」とはなにか</a></li>
1379
- <li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. 組み込みトークナイザー</a><ul>
1380
- <li><a class="reference internal" href="#tokenbigram">7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code></a></li>
1381
- <li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
1382
- <li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
1383
- <li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
1384
- <li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
1385
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
1386
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
1387
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
1388
- <li><a class="reference internal" href="#tokenunigram">7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code></a></li>
1389
- <li><a class="reference internal" href="#tokentrigram">7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code></a></li>
1390
- <li><a class="reference internal" href="#tokendelimit">7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code></a></li>
1391
- <li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></a></li>
1392
- <li><a class="reference internal" href="#tokenmecab">7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code></a></li>
1393
- <li><a class="reference internal" href="#tokenregexp">7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code></a></li>
1394
- </ul>
1395
- </li>
1396
- </ul>
1397
- </li>
1398
- </ul>
1399
-
1400
93
  <h4>前のトピックへ</h4>
1401
- <p class="topless"><a href="normalizers.html"
1402
- title="前の章へ">7.7. ノーマライザー</a></p>
94
+ <p class="topless"><a href="normalizers/normalizer_nfkc51.html"
95
+ title="前の章へ">7.7.2.3. <code class="docutils literal notranslate"><span class="pre">NormalizerNFKC51</span></code></a></p>
1403
96
  <h4>次のトピックへ</h4>
1404
- <p class="topless"><a href="token_filters.html"
1405
- title="次の章へ">7.9. トークンフィルター</a></p>
97
+ <p class="topless"><a href="tokenizer/summary.html"
98
+ title="次の章へ">7.8.1. 概要</a></p>
1406
99
  <div id="searchbox" style="display: none" role="search">
1407
100
  <h3>クイック検索</h3>
101
+ <div class="searchformwrapper">
1408
102
  <form class="search" action="../search.html" method="get">
1409
- <div><input type="text" name="q" /></div>
1410
- <div><input type="submit" value="検索" /></div>
1411
- <input type="hidden" name="check_keywords" value="yes" />
1412
- <input type="hidden" name="area" value="default" />
103
+ <input type="text" name="q" />
104
+ <input type="submit" value="検索" />
1413
105
  </form>
106
+ </div>
1414
107
  </div>
1415
108
  <script type="text/javascript">$('#searchbox').show(0);</script>
1416
109
  </div>
@@ -1424,17 +117,17 @@
1424
117
  <a href="../genindex.html" title="総合索引"
1425
118
  >索引</a></li>
1426
119
  <li class="right" >
1427
- <a href="token_filters.html" title="7.9. トークンフィルター"
120
+ <a href="tokenizer/summary.html" title="7.8.1. 概要"
1428
121
  >次へ</a> |</li>
1429
122
  <li class="right" >
1430
- <a href="normalizers.html" title="7.7. ノーマライザー"
123
+ <a href="normalizers/normalizer_nfkc51.html" title="7.7.2.3. NormalizerNFKC51"
1431
124
  >前へ</a> |</li>
1432
- <li class="nav-item nav-item-0"><a href="../index.html">Groonga v7.1.0-73-g6d02cfaドキュメント</a> &#187;</li>
125
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v9.0.2ドキュメント</a> &#187;</li>
1433
126
  <li class="nav-item nav-item-1"><a href="../reference.html" >7. リファレンスマニュアル</a> &#187;</li>
1434
127
  </ul>
1435
128
  </div>
1436
129
  <div class="footer" role="contentinfo">
1437
- &#169; Copyright 2009-2018, Brazil, Inc.
130
+ &#169; Copyright 2009-2019, Brazil, Inc.
1438
131
  </div>
1439
132
  </body>
1440
133
  </html>