rroonga 5.0.4-x86-mingw32 → 5.0.5-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (751) hide show
  1. checksums.yaml +8 -8
  2. data/README.md +2 -2
  3. data/example/measure-data-column-disk-usage.rb +124 -0
  4. data/example/measure-index-column-disk-usage.rb +81 -0
  5. data/example/measure-table-disk-usage.rb +100 -0
  6. data/ext/groonga/rb-grn-database.c +31 -0
  7. data/ext/groonga/rb-grn-double-array-trie.c +1 -8
  8. data/ext/groonga/rb-grn-logger.c +45 -0
  9. data/ext/groonga/rb-grn-object.c +29 -1
  10. data/ext/groonga/rb-grn-patricia-trie.c +1 -8
  11. data/ext/groonga/rb-grn-table-cursor.c +8 -3
  12. data/ext/groonga/rb-grn-table.c +10 -5
  13. data/ext/groonga/rb-grn-thread.c +160 -0
  14. data/ext/groonga/rb-grn-windows-event-logger.c +79 -0
  15. data/ext/groonga/rb-grn.h +3 -1
  16. data/ext/groonga/rb-groonga.c +3 -1
  17. data/lib/1.9/groonga.so +0 -0
  18. data/lib/2.0/groonga.so +0 -0
  19. data/lib/2.1/groonga.so +0 -0
  20. data/lib/2.2/groonga.so +0 -0
  21. data/lib/groonga/dumper.rb +6 -1
  22. data/rroonga-build.rb +4 -4
  23. data/test/groonga-test-utils.rb +5 -1
  24. data/test/test-database.rb +11 -0
  25. data/test/test-logger.rb +6 -0
  26. data/test/test-operator.rb +6 -6
  27. data/test/test-procedure.rb +15 -0
  28. data/test/test-table-dumper.rb +170 -1
  29. data/test/test-thread.rb +42 -0
  30. data/test/test-windows-event-logger.rb +28 -0
  31. data/vendor/local/bin/grndb.exe +0 -0
  32. data/vendor/local/bin/groonga-benchmark.exe +0 -0
  33. data/vendor/local/bin/groonga.exe +0 -0
  34. data/vendor/local/bin/libgcc_s_sjlj-1.dll +0 -0
  35. data/vendor/local/bin/libgroonga-0.dll +0 -0
  36. data/vendor/local/bin/libmecab-1.dll +0 -0
  37. data/vendor/local/bin/libmsgpack-4.dll +0 -0
  38. data/vendor/local/bin/libmsgpackc-2.dll +0 -0
  39. data/vendor/local/bin/libonig-5.dll +0 -0
  40. data/vendor/local/bin/libstdc++-6.dll +0 -0
  41. data/vendor/local/bin/libwinpthread-1.dll +0 -0
  42. data/vendor/local/bin/lz4.exe +0 -0
  43. data/vendor/local/bin/lz4c.exe +0 -0
  44. data/vendor/local/bin/lz4cat +0 -0
  45. data/vendor/local/bin/mecab-config +2 -2
  46. data/vendor/local/bin/mecab.exe +0 -0
  47. data/vendor/local/bin/onig-config +1 -1
  48. data/vendor/local/bin/zlib1.dll +0 -0
  49. data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +2 -2
  50. data/vendor/local/etc/groonga/windows_event_log/provider.man +38 -0
  51. data/vendor/local/include/groonga/groonga.h +2 -0
  52. data/vendor/local/include/groonga/groonga/command.h +2 -0
  53. data/vendor/local/include/groonga/groonga/groonga.h +5 -0
  54. data/vendor/local/include/groonga/groonga/obj.h +1 -0
  55. data/vendor/local/include/groonga/groonga/portability.h +16 -0
  56. data/vendor/local/include/groonga/groonga/thread.h +42 -0
  57. data/vendor/local/include/groonga/groonga/windows_event_logger.h +33 -0
  58. data/vendor/local/lib/groonga/plugins/functions/vector.a +0 -0
  59. data/vendor/local/lib/groonga/plugins/functions/vector.dll +0 -0
  60. data/vendor/local/lib/groonga/plugins/functions/vector.dll.a +0 -0
  61. data/vendor/local/lib/groonga/plugins/functions/vector.la +2 -2
  62. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
  63. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
  64. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
  65. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +2 -2
  66. data/vendor/local/lib/groonga/plugins/ruby/eval.a +0 -0
  67. data/vendor/local/lib/groonga/plugins/ruby/eval.dll +0 -0
  68. data/vendor/local/lib/groonga/plugins/ruby/eval.dll.a +0 -0
  69. data/vendor/local/lib/groonga/plugins/ruby/eval.la +2 -2
  70. data/vendor/local/lib/groonga/plugins/ruby/load.a +0 -0
  71. data/vendor/local/lib/groonga/plugins/ruby/load.dll +0 -0
  72. data/vendor/local/lib/groonga/plugins/ruby/load.dll.a +0 -0
  73. data/vendor/local/lib/groonga/plugins/ruby/load.la +2 -2
  74. data/vendor/local/lib/groonga/plugins/sharding.rb +5 -0
  75. data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +43 -6
  76. data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +32 -25
  77. data/vendor/local/lib/groonga/plugins/sharding/logical_parameters.rb +44 -0
  78. data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +217 -49
  79. data/vendor/local/lib/groonga/plugins/sharding/logical_select.rb +507 -45
  80. data/vendor/local/lib/groonga/plugins/sharding/logical_shard_list.rb +28 -0
  81. data/vendor/local/lib/groonga/plugins/sharding/logical_table_remove.rb +11 -6
  82. data/vendor/local/lib/groonga/plugins/sharding/parameters.rb +10 -0
  83. data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
  84. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
  85. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
  86. data/vendor/local/lib/groonga/plugins/suggest/suggest.la +2 -2
  87. data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
  88. data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
  89. data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
  90. data/vendor/local/lib/groonga/plugins/table/table.la +2 -2
  91. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
  92. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
  93. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
  94. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +2 -2
  95. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
  96. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
  97. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
  98. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +2 -2
  99. data/vendor/local/lib/groonga/scripts/ruby/command.rb +31 -1
  100. data/vendor/local/lib/groonga/scripts/ruby/context.rb +18 -2
  101. data/vendor/local/lib/groonga/scripts/ruby/database.rb +12 -4
  102. data/vendor/local/lib/groonga/scripts/ruby/expression_size_estimator.rb +31 -28
  103. data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +1 -0
  104. data/vendor/local/lib/groonga/scripts/ruby/logger/level.rb +4 -2
  105. data/vendor/local/lib/groonga/scripts/ruby/query_logger.rb +9 -0
  106. data/vendor/local/lib/groonga/scripts/ruby/query_logger/flag.rb +39 -0
  107. data/vendor/local/lib/groonga/scripts/ruby/record.rb +12 -0
  108. data/vendor/local/lib/groonga/scripts/ruby/table.rb +35 -1
  109. data/vendor/local/lib/libgroonga.a +0 -0
  110. data/vendor/local/lib/libgroonga.dll.a +0 -0
  111. data/vendor/local/lib/libgroonga.la +2 -2
  112. data/vendor/local/lib/liblz4.dll +0 -0
  113. data/vendor/local/lib/liblz4.dll.1 +0 -0
  114. data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
  115. data/vendor/local/lib/libmecab.a +0 -0
  116. data/vendor/local/lib/libmecab.dll.a +0 -0
  117. data/vendor/local/lib/libmecab.la +2 -2
  118. data/vendor/local/lib/libmsgpack.a +0 -0
  119. data/vendor/local/lib/libmsgpack.dll.a +0 -0
  120. data/vendor/local/lib/libmsgpack.la +2 -2
  121. data/vendor/local/lib/libmsgpackc.a +0 -0
  122. data/vendor/local/lib/libmsgpackc.dll.a +0 -0
  123. data/vendor/local/lib/libmsgpackc.la +2 -2
  124. data/vendor/local/lib/libonig.a +0 -0
  125. data/vendor/local/lib/libonig.dll.a +0 -0
  126. data/vendor/local/lib/libonig.la +2 -2
  127. data/vendor/local/lib/libz.a +0 -0
  128. data/vendor/local/lib/libz.dll.a +0 -0
  129. data/vendor/local/lib/pkgconfig/groonga.pc +3 -3
  130. data/vendor/local/lib/pkgconfig/liblz4.pc +5 -5
  131. data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
  132. data/vendor/local/lib/pkgconfig/oniguruma.pc +6 -6
  133. data/vendor/local/lib/pkgconfig/zlib.pc +3 -3
  134. data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
  135. data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
  136. data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
  137. data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
  138. data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
  139. data/vendor/local/sbin/groonga-httpd-restart +1 -1
  140. data/vendor/local/sbin/groonga-httpd.exe +0 -0
  141. data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
  142. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development.txt +3 -2
  143. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build.txt +19 -0
  144. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/unix_autotools.txt +101 -0
  145. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/unix_cmake.txt +94 -0
  146. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/windows_cmake.txt +93 -0
  147. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +16 -7
  148. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/repository.txt +7 -3
  149. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/test.txt +4 -0
  150. data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
  151. data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +4 -4
  152. data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +3 -3
  153. data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
  154. data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +4 -4
  155. data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
  156. data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -4
  157. data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
  158. data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +319 -0
  159. data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +1 -0
  160. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +1 -1
  161. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_db.txt +23 -0
  162. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_thread.txt +122 -0
  163. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +1 -1
  164. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_copy.txt +381 -0
  165. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +1 -1
  166. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +3 -1
  167. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/database_unmap.txt +85 -0
  168. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/io_flush.txt +218 -9
  169. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +1 -3
  170. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/log_level.txt +1 -1
  171. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +3 -1
  172. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_parameters.txt +138 -0
  173. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +97 -10
  174. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_select.txt +745 -23
  175. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_shard_list.txt +107 -0
  176. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_table_remove.txt +3 -1
  177. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +2 -3
  178. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalizer_list.txt +1 -2
  179. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_exist.txt +90 -0
  180. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +1 -1
  181. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +1 -1
  182. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +1 -1
  183. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +1 -3
  184. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +1 -1
  185. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +1 -1
  186. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +240 -56
  187. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +33 -7
  188. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_rename.txt +90 -0
  189. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +2 -1
  190. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/thread_limit.txt +110 -0
  191. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +2 -1
  192. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenizer_list.txt +1 -3
  193. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +1 -3
  194. data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-httpd.txt +3 -4
  195. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +0 -1
  196. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +0 -1
  197. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +2 -2
  198. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/snippet_html.txt +1 -1
  199. data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +1 -1
  200. data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +34 -14
  201. data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +2 -2
  202. data/vendor/local/share/doc/groonga/en/html/_sources/reference/query_expanders/tsv.txt +1 -1
  203. data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +3 -0
  204. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scoring_note.txt +2 -0
  205. data/vendor/local/share/doc/groonga/en/html/_sources/reference/sharding.txt +108 -0
  206. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +0 -21
  207. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tuning.txt +1 -1
  208. data/vendor/local/share/doc/groonga/en/html/_sources/spec/search.txt +1 -1
  209. data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +4 -4
  210. data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/introduction.txt +24 -18
  211. data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/match_columns.txt +19 -19
  212. data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/micro_blog.txt +9 -9
  213. data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/query_expansion.txt +1 -1
  214. data/vendor/local/share/doc/groonga/en/html/_static/basic.css +68 -6
  215. data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +27 -2
  216. data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
  217. data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
  218. data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
  219. data/vendor/local/share/doc/groonga/en/html/_static/jquery-1.11.1.js +10308 -0
  220. data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +4 -9404
  221. data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
  222. data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
  223. data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +2 -2
  224. data/vendor/local/share/doc/groonga/en/html/_static/underscore-1.3.1.js +999 -0
  225. data/vendor/local/share/doc/groonga/en/html/_static/underscore.js +31 -1415
  226. data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
  227. data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
  228. data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +15 -15
  229. data/vendor/local/share/doc/groonga/en/html/characteristic.html +19 -17
  230. data/vendor/local/share/doc/groonga/en/html/client.html +19 -17
  231. data/vendor/local/share/doc/groonga/en/html/community.html +19 -17
  232. data/vendor/local/share/doc/groonga/en/html/contribution.html +78 -70
  233. data/vendor/local/share/doc/groonga/en/html/contribution/development.html +30 -27
  234. data/vendor/local/share/doc/groonga/en/html/contribution/development/build.html +146 -0
  235. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_autotools.html +237 -0
  236. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_cmake.html +227 -0
  237. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/windows_cmake.html +231 -0
  238. data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +37 -35
  239. data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +54 -52
  240. data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +80 -78
  241. data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +135 -122
  242. data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +38 -34
  243. data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +58 -54
  244. data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +21 -19
  245. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +27 -25
  246. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +23 -21
  247. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +30 -28
  248. data/vendor/local/share/doc/groonga/en/html/contribution/report.html +21 -19
  249. data/vendor/local/share/doc/groonga/en/html/development.html +19 -17
  250. data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +30 -28
  251. data/vendor/local/share/doc/groonga/en/html/genindex.html +48 -20
  252. data/vendor/local/share/doc/groonga/en/html/index.html +123 -105
  253. data/vendor/local/share/doc/groonga/en/html/install.html +33 -31
  254. data/vendor/local/share/doc/groonga/en/html/install/centos.html +32 -30
  255. data/vendor/local/share/doc/groonga/en/html/install/debian.html +31 -29
  256. data/vendor/local/share/doc/groonga/en/html/install/fedora.html +29 -27
  257. data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +26 -24
  258. data/vendor/local/share/doc/groonga/en/html/install/others.html +92 -90
  259. data/vendor/local/share/doc/groonga/en/html/install/solaris.html +26 -24
  260. data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +29 -28
  261. data/vendor/local/share/doc/groonga/en/html/install/windows.html +34 -32
  262. data/vendor/local/share/doc/groonga/en/html/limitations.html +19 -17
  263. data/vendor/local/share/doc/groonga/en/html/news.html +509 -142
  264. data/vendor/local/share/doc/groonga/en/html/news/0.x.html +19 -17
  265. data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +20 -18
  266. data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +19 -17
  267. data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +34 -32
  268. data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +29 -27
  269. data/vendor/local/share/doc/groonga/en/html/news/2.x.html +110 -108
  270. data/vendor/local/share/doc/groonga/en/html/news/3.x.html +73 -71
  271. data/vendor/local/share/doc/groonga/en/html/news/4.x.html +111 -109
  272. data/vendor/local/share/doc/groonga/en/html/news/senna.html +19 -17
  273. data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
  274. data/vendor/local/share/doc/groonga/en/html/reference.html +111 -94
  275. data/vendor/local/share/doc/groonga/en/html/reference/api.html +55 -52
  276. data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +51 -49
  277. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +63 -61
  278. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +84 -82
  279. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +46 -44
  280. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +41 -39
  281. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +89 -87
  282. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +88 -50
  283. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +48 -46
  284. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +83 -81
  285. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +46 -44
  286. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +48 -46
  287. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +46 -44
  288. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +45 -43
  289. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +45 -43
  290. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +44 -42
  291. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +93 -91
  292. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +48 -46
  293. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +43 -41
  294. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +79 -77
  295. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +69 -67
  296. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_thread.html +296 -0
  297. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +45 -43
  298. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +38 -36
  299. data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +51 -49
  300. data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +60 -58
  301. data/vendor/local/share/doc/groonga/en/html/reference/cast.html +19 -17
  302. data/vendor/local/share/doc/groonga/en/html/reference/column.html +21 -19
  303. data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +23 -21
  304. data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +28 -26
  305. data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +23 -21
  306. data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +72 -70
  307. data/vendor/local/share/doc/groonga/en/html/reference/command.html +70 -61
  308. data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +23 -21
  309. data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +59 -57
  310. data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +28 -26
  311. data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +100 -98
  312. data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +44 -42
  313. data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +49 -47
  314. data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +33 -31
  315. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_copy.html +796 -0
  316. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +62 -60
  317. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +96 -94
  318. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +46 -44
  319. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +67 -64
  320. data/vendor/local/share/doc/groonga/en/html/reference/commands/database_unmap.html +236 -0
  321. data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +62 -60
  322. data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +46 -44
  323. data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +59 -57
  324. data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +63 -61
  325. data/vendor/local/share/doc/groonga/en/html/reference/commands/io_flush.html +281 -54
  326. data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +66 -64
  327. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +53 -52
  328. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +48 -46
  329. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +48 -46
  330. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +48 -46
  331. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +78 -75
  332. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_parameters.html +283 -0
  333. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +160 -85
  334. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_select.html +2071 -83
  335. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_shard_list.html +287 -0
  336. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_table_remove.html +71 -68
  337. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +86 -84
  338. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +52 -50
  339. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_exist.html +227 -0
  340. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +57 -55
  341. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +53 -51
  342. data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +43 -41
  343. data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +43 -41
  344. data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +56 -54
  345. data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +75 -74
  346. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +59 -57
  347. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +59 -57
  348. data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +898 -647
  349. data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +43 -41
  350. data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +50 -48
  351. data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +87 -85
  352. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +175 -152
  353. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +55 -53
  354. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +46 -44
  355. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_rename.html +327 -0
  356. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +77 -75
  357. data/vendor/local/share/doc/groonga/en/html/reference/commands/thread_limit.html +241 -0
  358. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +108 -106
  359. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +56 -51
  360. data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +56 -55
  361. data/vendor/local/share/doc/groonga/en/html/reference/executables.html +24 -22
  362. data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +41 -39
  363. data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +28 -26
  364. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +33 -31
  365. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +83 -81
  366. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +29 -27
  367. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +23 -21
  368. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +48 -46
  369. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +40 -38
  370. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +84 -82
  371. data/vendor/local/share/doc/groonga/en/html/reference/function.html +22 -20
  372. data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +47 -45
  373. data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +25 -23
  374. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +85 -83
  375. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +36 -34
  376. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +26 -24
  377. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +66 -64
  378. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +54 -52
  379. data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +35 -33
  380. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +34 -32
  381. data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +23 -21
  382. data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +55 -53
  383. data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +24 -22
  384. data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +49 -47
  385. data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +45 -43
  386. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_size.html +33 -31
  387. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +29 -27
  388. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +244 -242
  389. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +327 -325
  390. data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +35 -29
  391. data/vendor/local/share/doc/groonga/en/html/reference/log.html +43 -41
  392. data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +49 -47
  393. data/vendor/local/share/doc/groonga/en/html/reference/operations.html +21 -19
  394. data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +23 -21
  395. data/vendor/local/share/doc/groonga/en/html/reference/output.html +36 -34
  396. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +21 -19
  397. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +62 -60
  398. data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +107 -103
  399. data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +50 -40
  400. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +27 -25
  401. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +41 -31
  402. data/vendor/local/share/doc/groonga/en/html/reference/scoring_note.html +19 -17
  403. data/vendor/local/share/doc/groonga/en/html/reference/sharding.html +241 -0
  404. data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +21 -19
  405. data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +30 -28
  406. data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +23 -21
  407. data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +23 -21
  408. data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +23 -21
  409. data/vendor/local/share/doc/groonga/en/html/reference/tables.html +56 -54
  410. data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +41 -39
  411. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +341 -289
  412. data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +68 -66
  413. data/vendor/local/share/doc/groonga/en/html/reference/types.html +43 -41
  414. data/vendor/local/share/doc/groonga/en/html/search.html +11 -11
  415. data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
  416. data/vendor/local/share/doc/groonga/en/html/server.html +19 -17
  417. data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +22 -20
  418. data/vendor/local/share/doc/groonga/en/html/server/http.html +21 -19
  419. data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +37 -35
  420. data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +23 -21
  421. data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +23 -21
  422. data/vendor/local/share/doc/groonga/en/html/server/memcached.html +21 -19
  423. data/vendor/local/share/doc/groonga/en/html/server/package.html +39 -37
  424. data/vendor/local/share/doc/groonga/en/html/spec.html +23 -21
  425. data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +129 -127
  426. data/vendor/local/share/doc/groonga/en/html/spec/search.html +22 -20
  427. data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +19 -17
  428. data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +25 -23
  429. data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +22 -20
  430. data/vendor/local/share/doc/groonga/en/html/tutorial.html +20 -18
  431. data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +29 -23
  432. data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +35 -33
  433. data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +25 -23
  434. data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +46 -39
  435. data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +21 -19
  436. data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +65 -63
  437. data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +55 -53
  438. data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +25 -23
  439. data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +22 -20
  440. data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +25 -23
  441. data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +37 -35
  442. data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
  443. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development.txt +3 -2
  444. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build.txt +19 -0
  445. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/unix_autotools.txt +101 -0
  446. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/unix_cmake.txt +94 -0
  447. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/windows_cmake.txt +93 -0
  448. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +16 -7
  449. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/repository.txt +7 -3
  450. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/test.txt +4 -0
  451. data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
  452. data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +4 -4
  453. data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +3 -3
  454. data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
  455. data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +4 -4
  456. data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
  457. data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -4
  458. data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
  459. data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +319 -0
  460. data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +1 -0
  461. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +1 -1
  462. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_db.txt +23 -0
  463. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_thread.txt +122 -0
  464. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +1 -1
  465. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_copy.txt +381 -0
  466. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +1 -1
  467. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +3 -1
  468. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/database_unmap.txt +85 -0
  469. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/io_flush.txt +218 -9
  470. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +1 -3
  471. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/log_level.txt +1 -1
  472. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +3 -1
  473. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_parameters.txt +138 -0
  474. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +97 -10
  475. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_select.txt +745 -23
  476. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_shard_list.txt +107 -0
  477. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_table_remove.txt +3 -1
  478. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +2 -3
  479. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalizer_list.txt +1 -2
  480. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_exist.txt +90 -0
  481. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +1 -1
  482. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +1 -1
  483. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +1 -1
  484. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +1 -3
  485. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +1 -1
  486. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +1 -1
  487. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +240 -56
  488. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +33 -7
  489. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_rename.txt +90 -0
  490. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +2 -1
  491. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/thread_limit.txt +110 -0
  492. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +2 -1
  493. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenizer_list.txt +1 -3
  494. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +1 -3
  495. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-httpd.txt +3 -4
  496. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +0 -1
  497. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +0 -1
  498. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +2 -2
  499. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/snippet_html.txt +1 -1
  500. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +1 -1
  501. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +34 -14
  502. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +2 -2
  503. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/query_expanders/tsv.txt +1 -1
  504. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +3 -0
  505. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scoring_note.txt +2 -0
  506. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/sharding.txt +108 -0
  507. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +0 -21
  508. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tuning.txt +1 -1
  509. data/vendor/local/share/doc/groonga/ja/html/_sources/spec/search.txt +1 -1
  510. data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +4 -4
  511. data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/introduction.txt +24 -18
  512. data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/match_columns.txt +19 -19
  513. data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/micro_blog.txt +9 -9
  514. data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/query_expansion.txt +1 -1
  515. data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +68 -6
  516. data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +27 -2
  517. data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
  518. data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
  519. data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
  520. data/vendor/local/share/doc/groonga/ja/html/_static/jquery-1.11.1.js +10308 -0
  521. data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +4 -9404
  522. data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
  523. data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
  524. data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +2 -2
  525. data/vendor/local/share/doc/groonga/ja/html/_static/underscore-1.3.1.js +999 -0
  526. data/vendor/local/share/doc/groonga/ja/html/_static/underscore.js +31 -1415
  527. data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
  528. data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
  529. data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +15 -15
  530. data/vendor/local/share/doc/groonga/ja/html/characteristic.html +19 -17
  531. data/vendor/local/share/doc/groonga/ja/html/client.html +19 -17
  532. data/vendor/local/share/doc/groonga/ja/html/community.html +19 -17
  533. data/vendor/local/share/doc/groonga/ja/html/contribution.html +77 -69
  534. data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +30 -27
  535. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build.html +144 -0
  536. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_autotools.html +226 -0
  537. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_cmake.html +215 -0
  538. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/windows_cmake.html +229 -0
  539. data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +36 -34
  540. data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +53 -51
  541. data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +79 -77
  542. data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +134 -121
  543. data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +29 -27
  544. data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +57 -53
  545. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +21 -19
  546. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +23 -21
  547. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +23 -21
  548. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +30 -28
  549. data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +21 -19
  550. data/vendor/local/share/doc/groonga/ja/html/development.html +19 -17
  551. data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +25 -23
  552. data/vendor/local/share/doc/groonga/ja/html/genindex.html +48 -20
  553. data/vendor/local/share/doc/groonga/ja/html/index.html +122 -104
  554. data/vendor/local/share/doc/groonga/ja/html/install.html +33 -31
  555. data/vendor/local/share/doc/groonga/ja/html/install/centos.html +35 -33
  556. data/vendor/local/share/doc/groonga/ja/html/install/debian.html +33 -31
  557. data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +30 -28
  558. data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +26 -24
  559. data/vendor/local/share/doc/groonga/ja/html/install/others.html +83 -81
  560. data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +25 -23
  561. data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +30 -29
  562. data/vendor/local/share/doc/groonga/ja/html/install/windows.html +33 -31
  563. data/vendor/local/share/doc/groonga/ja/html/limitations.html +19 -17
  564. data/vendor/local/share/doc/groonga/ja/html/news.html +460 -126
  565. data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +19 -17
  566. data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +20 -18
  567. data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +19 -17
  568. data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +34 -32
  569. data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +29 -27
  570. data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +102 -100
  571. data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +66 -64
  572. data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +91 -89
  573. data/vendor/local/share/doc/groonga/ja/html/news/senna.html +19 -17
  574. data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
  575. data/vendor/local/share/doc/groonga/ja/html/reference.html +111 -94
  576. data/vendor/local/share/doc/groonga/ja/html/reference/api.html +55 -52
  577. data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +51 -49
  578. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +58 -56
  579. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +84 -82
  580. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +46 -44
  581. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +41 -39
  582. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +85 -83
  583. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +88 -50
  584. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +48 -46
  585. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +78 -76
  586. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +46 -44
  587. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +48 -46
  588. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +46 -44
  589. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +45 -43
  590. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +45 -43
  591. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +44 -42
  592. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +93 -91
  593. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +48 -46
  594. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +43 -41
  595. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +79 -77
  596. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +69 -67
  597. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_thread.html +286 -0
  598. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +45 -43
  599. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +38 -36
  600. data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +48 -46
  601. data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +60 -58
  602. data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +19 -17
  603. data/vendor/local/share/doc/groonga/ja/html/reference/column.html +21 -19
  604. data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +23 -21
  605. data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +28 -26
  606. data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +23 -21
  607. data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +61 -59
  608. data/vendor/local/share/doc/groonga/ja/html/reference/command.html +70 -61
  609. data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +23 -21
  610. data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +49 -47
  611. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +28 -26
  612. data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +99 -97
  613. data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +41 -39
  614. data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +49 -47
  615. data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +33 -31
  616. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_copy.html +781 -0
  617. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +62 -60
  618. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +88 -86
  619. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +46 -44
  620. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +66 -63
  621. data/vendor/local/share/doc/groonga/ja/html/reference/commands/database_unmap.html +229 -0
  622. data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +62 -60
  623. data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +46 -44
  624. data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +58 -56
  625. data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +63 -61
  626. data/vendor/local/share/doc/groonga/ja/html/reference/commands/io_flush.html +266 -54
  627. data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +59 -57
  628. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +53 -52
  629. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +48 -46
  630. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +48 -46
  631. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +48 -46
  632. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +78 -75
  633. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_parameters.html +276 -0
  634. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +158 -85
  635. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_select.html +2008 -80
  636. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_shard_list.html +285 -0
  637. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_table_remove.html +71 -68
  638. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +82 -79
  639. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +52 -50
  640. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_exist.html +220 -0
  641. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +53 -51
  642. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +51 -49
  643. data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +43 -41
  644. data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +43 -41
  645. data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +52 -50
  646. data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +68 -67
  647. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +59 -57
  648. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +59 -57
  649. data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +680 -448
  650. data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +43 -41
  651. data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +48 -46
  652. data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +84 -82
  653. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +146 -126
  654. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +55 -53
  655. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +46 -44
  656. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_rename.html +322 -0
  657. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +73 -70
  658. data/vendor/local/share/doc/groonga/ja/html/reference/commands/thread_limit.html +229 -0
  659. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +94 -91
  660. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +56 -51
  661. data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +56 -55
  662. data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +24 -22
  663. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +39 -37
  664. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +28 -26
  665. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +33 -31
  666. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +73 -72
  667. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +29 -27
  668. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +23 -21
  669. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +48 -46
  670. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +40 -38
  671. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +84 -82
  672. data/vendor/local/share/doc/groonga/ja/html/reference/function.html +22 -20
  673. data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +46 -44
  674. data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +25 -23
  675. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +69 -67
  676. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +36 -34
  677. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +26 -24
  678. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +56 -54
  679. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +45 -43
  680. data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +34 -32
  681. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +35 -33
  682. data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +23 -21
  683. data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +48 -46
  684. data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +24 -22
  685. data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +35 -33
  686. data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +37 -35
  687. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_size.html +33 -31
  688. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +26 -24
  689. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +143 -141
  690. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +236 -234
  691. data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +34 -28
  692. data/vendor/local/share/doc/groonga/ja/html/reference/log.html +43 -41
  693. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +39 -37
  694. data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +21 -19
  695. data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +23 -21
  696. data/vendor/local/share/doc/groonga/ja/html/reference/output.html +32 -30
  697. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +21 -19
  698. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +42 -39
  699. data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +91 -88
  700. data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +50 -40
  701. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +27 -25
  702. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +41 -31
  703. data/vendor/local/share/doc/groonga/ja/html/reference/scoring_note.html +19 -17
  704. data/vendor/local/share/doc/groonga/ja/html/reference/sharding.html +223 -0
  705. data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +21 -19
  706. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +27 -25
  707. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +23 -21
  708. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +23 -21
  709. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +23 -21
  710. data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +46 -44
  711. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +38 -36
  712. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +303 -243
  713. data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +62 -60
  714. data/vendor/local/share/doc/groonga/ja/html/reference/types.html +42 -40
  715. data/vendor/local/share/doc/groonga/ja/html/search.html +11 -11
  716. data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
  717. data/vendor/local/share/doc/groonga/ja/html/server.html +19 -17
  718. data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +21 -19
  719. data/vendor/local/share/doc/groonga/ja/html/server/http.html +21 -19
  720. data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +31 -29
  721. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +23 -21
  722. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +23 -21
  723. data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +21 -19
  724. data/vendor/local/share/doc/groonga/ja/html/server/package.html +38 -36
  725. data/vendor/local/share/doc/groonga/ja/html/spec.html +23 -21
  726. data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +128 -126
  727. data/vendor/local/share/doc/groonga/ja/html/spec/search.html +22 -20
  728. data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +19 -17
  729. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +25 -23
  730. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +22 -20
  731. data/vendor/local/share/doc/groonga/ja/html/tutorial.html +20 -18
  732. data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +29 -23
  733. data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +34 -32
  734. data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +22 -20
  735. data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +46 -39
  736. data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +21 -19
  737. data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +66 -64
  738. data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +52 -50
  739. data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +25 -23
  740. data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +22 -20
  741. data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +25 -23
  742. data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +35 -33
  743. data/vendor/local/share/license/groonga/README.md +6 -0
  744. data/vendor/local/share/license/mruby/AUTHORS +1 -0
  745. data/vendor/local/share/license/mruby/MITL +1 -1
  746. data/vendor/local/share/license/mruby/README.md +6 -5
  747. data/vendor/local/share/license/msgpack/README +219 -0
  748. data/vendor/local/share/man/ja/man1/groonga.1 +23512 -15126
  749. data/vendor/local/share/man/man1/groonga.1 +26542 -17745
  750. metadata +77 -3
  751. data/vendor/local/share/license/msgpack/AUTHORS +0 -0
@@ -7,7 +7,7 @@
7
7
  <head>
8
8
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
9
9
 
10
- <title>7.9. Token filters &mdash; Groonga v5.0.4-139-g6629adb documentation</title>
10
+ <title>7.9. Token filters &mdash; Groonga v5.0.6-226-gd7da7e7 documentation</title>
11
11
 
12
12
  <link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
13
13
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
@@ -15,7 +15,7 @@
15
15
  <script type="text/javascript">
16
16
  var DOCUMENTATION_OPTIONS = {
17
17
  URL_ROOT: '../',
18
- VERSION: '5.0.4-139-g6629adb',
18
+ VERSION: '5.0.6-226-gd7da7e7',
19
19
  COLLAPSE_INDEX: false,
20
20
  FILE_SUFFIX: '.html',
21
21
  HAS_SOURCE: true
@@ -25,12 +25,12 @@
25
25
  <script type="text/javascript" src="../_static/underscore.js"></script>
26
26
  <script type="text/javascript" src="../_static/doctools.js"></script>
27
27
  <link rel="shortcut icon" href="../_static/favicon.ico"/>
28
- <link rel="top" title="Groonga v5.0.4-139-g6629adb documentation" href="../index.html" />
28
+ <link rel="top" title="Groonga v5.0.6-226-gd7da7e7 documentation" href="../index.html" />
29
29
  <link rel="up" title="7. Reference manual" href="../reference.html" />
30
30
  <link rel="next" title="7.10. Query expanders" href="query_expanders.html" />
31
31
  <link rel="prev" title="7.8. Tokenizers" href="tokenizers.html" />
32
32
  </head>
33
- <body>
33
+ <body role="document">
34
34
  <div class="header">
35
35
  <h1 class="title">
36
36
  <a id="top-link" href="../index.html">
@@ -48,7 +48,7 @@
48
48
  </div>
49
49
 
50
50
 
51
- <div class="related">
51
+ <div class="related" role="navigation" aria-label="related navigation">
52
52
  <h3>Navigation</h3>
53
53
  <ul>
54
54
  <li class="right" style="margin-right: 10px">
@@ -60,15 +60,15 @@
60
60
  <li class="right" >
61
61
  <a href="tokenizers.html" title="7.8. Tokenizers"
62
62
  accesskey="P">previous</a> |</li>
63
- <li><a href="../index.html">Groonga v5.0.4-139-g6629adb documentation</a> &raquo;</li>
64
- <li><a href="../reference.html" accesskey="U">7. Reference manual</a> &raquo;</li>
63
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v5.0.6-226-gd7da7e7 documentation</a> &raquo;</li>
64
+ <li class="nav-item nav-item-1"><a href="../reference.html" accesskey="U">7. Reference manual</a> &raquo;</li>
65
65
  </ul>
66
66
  </div>
67
67
 
68
68
  <div class="document">
69
69
  <div class="documentwrapper">
70
70
  <div class="bodywrapper">
71
- <div class="body">
71
+ <div class="body" role="main">
72
72
 
73
73
  <div class="section" id="token-filters">
74
74
  <h1>7.9. Token filters<a class="headerlink" href="#token-filters" title="Permalink to this headline">¶</a></h1>
@@ -78,9 +78,9 @@
78
78
  <p>Token filter module can be added as a plugin.</p>
79
79
  <p>You can customize tokenized token by registering your token filters plugins to Groonga.</p>
80
80
  <p>A table can have zero or more token filters. You can attach token
81
- filters to a table by <a class="reference internal" href="commands/table_create.html#table-create-token-filters"><em>token_filters</em></a> option in
81
+ filters to a table by <a class="reference internal" href="commands/table_create.html#table-create-token-filters"><span>token_filters</span></a> option in
82
82
  <a class="reference internal" href="commands/table_create.html"><em>table_create</em></a>.</p>
83
- <p>Here is an example <tt class="docutils literal"><span class="pre">table_create</span></tt> that uses <tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt>
83
+ <p>Here is an example <code class="docutils literal"><span class="pre">table_create</span></code> that uses <code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code>
84
84
  token filter module:</p>
85
85
  <p>Execution example:</p>
86
86
  <div class="highlight-none"><div class="highlight"><pre>register token_filters/stop_word
@@ -97,17 +97,17 @@ table_create Terms TABLE_PAT_KEY ShortText \
97
97
  <h2>7.9.2. Available token filters<a class="headerlink" href="#available-token-filters" title="Permalink to this headline">¶</a></h2>
98
98
  <p>Here is the list of available token filters:</p>
99
99
  <ul class="simple">
100
- <li><tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt></li>
101
- <li><tt class="docutils literal"><span class="pre">TokenFilterStem</span></tt></li>
100
+ <li><code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code></li>
101
+ <li><code class="docutils literal"><span class="pre">TokenFilterStem</span></code></li>
102
102
  </ul>
103
103
  <div class="section" id="tokenfilterstopword">
104
- <span id="token-filter-stop-word"></span><h3>7.9.2.1. <tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt><a class="headerlink" href="#tokenfilterstopword" title="Permalink to this headline">¶</a></h3>
105
- <p><tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt> removes stop words from tokenized token
104
+ <span id="token-filter-stop-word"></span><h3>7.9.2.1. <code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code><a class="headerlink" href="#tokenfilterstopword" title="Permalink to this headline">¶</a></h3>
105
+ <p><code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code> removes stop words from tokenized token
106
106
  in searching the documents.</p>
107
- <p><tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt> can specify stop word after adding the
107
+ <p><code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code> can specify stop word after adding the
108
108
  documents because it removes token in searching the documents.</p>
109
- <p>The stop word is specified <tt class="docutils literal"><span class="pre">is_stop_word</span></tt> column on lexicon table.</p>
110
- <p>Here is an example that uses <tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt> token filter:</p>
109
+ <p>The stop word is specified <code class="docutils literal"><span class="pre">is_stop_word</span></code> column on lexicon table.</p>
110
+ <p>Here is an example that uses <code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code> token filter:</p>
111
111
  <p>Execution example:</p>
112
112
  <div class="highlight-none"><div class="highlight"><pre>register token_filters/stop_word
113
113
  # [[0, 1337566253.89858, 0.000355720520019531], true]
@@ -171,14 +171,14 @@ select Memos --match_columns content --query &quot;Hello and&quot;
171
171
  # ]
172
172
  </pre></div>
173
173
  </div>
174
- <p><tt class="docutils literal"><span class="pre">and</span></tt> token is marked as stop word in <tt class="docutils literal"><span class="pre">Terms</span></tt> table.</p>
175
- <p><tt class="docutils literal"><span class="pre">&quot;Hello&quot;</span></tt> that doesn't have <tt class="docutils literal"><span class="pre">and</span></tt> in content is matched. Because
176
- <tt class="docutils literal"><span class="pre">and</span></tt> is a stop word and <tt class="docutils literal"><span class="pre">and</span></tt> is removed from query.</p>
174
+ <p><code class="docutils literal"><span class="pre">and</span></code> token is marked as stop word in <code class="docutils literal"><span class="pre">Terms</span></code> table.</p>
175
+ <p><code class="docutils literal"><span class="pre">&quot;Hello&quot;</span></code> that doesn't have <code class="docutils literal"><span class="pre">and</span></code> in content is matched. Because
176
+ <code class="docutils literal"><span class="pre">and</span></code> is a stop word and <code class="docutils literal"><span class="pre">and</span></code> is removed from query.</p>
177
177
  </div>
178
178
  <div class="section" id="tokenfilterstem">
179
- <span id="token-filter-stem"></span><h3>7.9.2.2. <tt class="docutils literal"><span class="pre">TokenFilterStem</span></tt><a class="headerlink" href="#tokenfilterstem" title="Permalink to this headline">¶</a></h3>
180
- <p><tt class="docutils literal"><span class="pre">TokenFilterStem</span></tt> stems tokenized token.</p>
181
- <p>Here is an example that uses <tt class="docutils literal"><span class="pre">TokenFilterStem</span></tt> token filter:</p>
179
+ <span id="token-filter-stem"></span><h3>7.9.2.2. <code class="docutils literal"><span class="pre">TokenFilterStem</span></code><a class="headerlink" href="#tokenfilterstem" title="Permalink to this headline">¶</a></h3>
180
+ <p><code class="docutils literal"><span class="pre">TokenFilterStem</span></code> stems tokenized token.</p>
181
+ <p>Here is an example that uses <code class="docutils literal"><span class="pre">TokenFilterStem</span></code> token filter:</p>
182
182
  <p>Execution example:</p>
183
183
  <div class="highlight-none"><div class="highlight"><pre>register token_filters/stem
184
184
  # [[0, 1337566253.89858, 0.000355720520019531], true]
@@ -239,9 +239,9 @@ select Memos --match_columns content --query &quot;develops&quot;
239
239
  # ]
240
240
  </pre></div>
241
241
  </div>
242
- <p>All of <tt class="docutils literal"><span class="pre">develop</span></tt>, <tt class="docutils literal"><span class="pre">developing</span></tt>, <tt class="docutils literal"><span class="pre">developed</span></tt> and <tt class="docutils literal"><span class="pre">develops</span></tt>
243
- tokens are stemmed as <tt class="docutils literal"><span class="pre">develop</span></tt>. So we can find <tt class="docutils literal"><span class="pre">develop</span></tt>,
244
- <tt class="docutils literal"><span class="pre">developing</span></tt> and <tt class="docutils literal"><span class="pre">developed</span></tt> by <tt class="docutils literal"><span class="pre">develops</span></tt> query.</p>
242
+ <p>All of <code class="docutils literal"><span class="pre">develop</span></code>, <code class="docutils literal"><span class="pre">developing</span></code>, <code class="docutils literal"><span class="pre">developed</span></code> and <code class="docutils literal"><span class="pre">develops</span></code>
243
+ tokens are stemmed as <code class="docutils literal"><span class="pre">develop</span></code>. So we can find <code class="docutils literal"><span class="pre">develop</span></code>,
244
+ <code class="docutils literal"><span class="pre">developing</span></code> and <code class="docutils literal"><span class="pre">developed</span></code> by <code class="docutils literal"><span class="pre">develops</span></code> query.</p>
245
245
  </div>
246
246
  </div>
247
247
  <div class="section" id="see-also">
@@ -256,15 +256,15 @@ tokens are stemmed as <tt class="docutils literal"><span class="pre">develop</sp
256
256
  </div>
257
257
  </div>
258
258
  </div>
259
- <div class="sphinxsidebar">
259
+ <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
260
260
  <div class="sphinxsidebarwrapper">
261
261
  <h3><a href="../index.html">Table Of Contents</a></h3>
262
262
  <ul>
263
263
  <li><a class="reference internal" href="#">7.9. Token filters</a><ul>
264
264
  <li><a class="reference internal" href="#summary">7.9.1. Summary</a></li>
265
265
  <li><a class="reference internal" href="#available-token-filters">7.9.2. Available token filters</a><ul>
266
- <li><a class="reference internal" href="#tokenfilterstopword">7.9.2.1. <tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt></a></li>
267
- <li><a class="reference internal" href="#tokenfilterstem">7.9.2.2. <tt class="docutils literal"><span class="pre">TokenFilterStem</span></tt></a></li>
266
+ <li><a class="reference internal" href="#tokenfilterstopword">7.9.2.1. <code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code></a></li>
267
+ <li><a class="reference internal" href="#tokenfilterstem">7.9.2.2. <code class="docutils literal"><span class="pre">TokenFilterStem</span></code></a></li>
268
268
  </ul>
269
269
  </li>
270
270
  <li><a class="reference internal" href="#see-also">7.9.3. See also</a></li>
@@ -278,12 +278,14 @@ tokens are stemmed as <tt class="docutils literal"><span class="pre">develop</sp
278
278
  <h4>Next topic</h4>
279
279
  <p class="topless"><a href="query_expanders.html"
280
280
  title="next chapter">7.10. Query expanders</a></p>
281
- <h3>This Page</h3>
282
- <ul class="this-page-menu">
283
- <li><a href="../_sources/reference/token_filters.txt"
284
- rel="nofollow">Show Source</a></li>
285
- </ul>
286
- <div id="searchbox" style="display: none">
281
+ <div role="note" aria-label="source link">
282
+ <h3>This Page</h3>
283
+ <ul class="this-page-menu">
284
+ <li><a href="../_sources/reference/token_filters.txt"
285
+ rel="nofollow">Show Source</a></li>
286
+ </ul>
287
+ </div>
288
+ <div id="searchbox" style="display: none" role="search">
287
289
  <h3>Quick search</h3>
288
290
  <form class="search" action="../search.html" method="get">
289
291
  <input type="text" name="q" />
@@ -300,7 +302,7 @@ tokens are stemmed as <tt class="docutils literal"><span class="pre">develop</sp
300
302
  </div>
301
303
  <div class="clearer"></div>
302
304
  </div>
303
- <div class="related">
305
+ <div class="related" role="navigation" aria-label="related navigation">
304
306
  <h3>Navigation</h3>
305
307
  <ul>
306
308
  <li class="right" style="margin-right: 10px">
@@ -312,11 +314,11 @@ tokens are stemmed as <tt class="docutils literal"><span class="pre">develop</sp
312
314
  <li class="right" >
313
315
  <a href="tokenizers.html" title="7.8. Tokenizers"
314
316
  >previous</a> |</li>
315
- <li><a href="../index.html">Groonga v5.0.4-139-g6629adb documentation</a> &raquo;</li>
316
- <li><a href="../reference.html" >7. Reference manual</a> &raquo;</li>
317
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v5.0.6-226-gd7da7e7 documentation</a> &raquo;</li>
318
+ <li class="nav-item nav-item-1"><a href="../reference.html" >7. Reference manual</a> &raquo;</li>
317
319
  </ul>
318
320
  </div>
319
- <div class="footer">
321
+ <div class="footer" role="contentinfo">
320
322
  &copy; Copyright 2009-2015, Brazil, Inc.
321
323
  </div>
322
324
  </body>
@@ -7,7 +7,7 @@
7
7
  <head>
8
8
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
9
9
 
10
- <title>7.8. Tokenizers &mdash; Groonga v5.0.4-139-g6629adb documentation</title>
10
+ <title>7.8. Tokenizers &mdash; Groonga v5.0.6-226-gd7da7e7 documentation</title>
11
11
 
12
12
  <link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
13
13
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
@@ -15,7 +15,7 @@
15
15
  <script type="text/javascript">
16
16
  var DOCUMENTATION_OPTIONS = {
17
17
  URL_ROOT: '../',
18
- VERSION: '5.0.4-139-g6629adb',
18
+ VERSION: '5.0.6-226-gd7da7e7',
19
19
  COLLAPSE_INDEX: false,
20
20
  FILE_SUFFIX: '.html',
21
21
  HAS_SOURCE: true
@@ -25,12 +25,12 @@
25
25
  <script type="text/javascript" src="../_static/underscore.js"></script>
26
26
  <script type="text/javascript" src="../_static/doctools.js"></script>
27
27
  <link rel="shortcut icon" href="../_static/favicon.ico"/>
28
- <link rel="top" title="Groonga v5.0.4-139-g6629adb documentation" href="../index.html" />
28
+ <link rel="top" title="Groonga v5.0.6-226-gd7da7e7 documentation" href="../index.html" />
29
29
  <link rel="up" title="7. Reference manual" href="../reference.html" />
30
30
  <link rel="next" title="7.9. Token filters" href="token_filters.html" />
31
31
  <link rel="prev" title="7.7. Normalizers" href="normalizers.html" />
32
32
  </head>
33
- <body>
33
+ <body role="document">
34
34
  <div class="header">
35
35
  <h1 class="title">
36
36
  <a id="top-link" href="../index.html">
@@ -48,7 +48,7 @@
48
48
  </div>
49
49
 
50
50
 
51
- <div class="related">
51
+ <div class="related" role="navigation" aria-label="related navigation">
52
52
  <h3>Navigation</h3>
53
53
  <ul>
54
54
  <li class="right" style="margin-right: 10px">
@@ -60,15 +60,15 @@
60
60
  <li class="right" >
61
61
  <a href="normalizers.html" title="7.7. Normalizers"
62
62
  accesskey="P">previous</a> |</li>
63
- <li><a href="../index.html">Groonga v5.0.4-139-g6629adb documentation</a> &raquo;</li>
64
- <li><a href="../reference.html" accesskey="U">7. Reference manual</a> &raquo;</li>
63
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v5.0.6-226-gd7da7e7 documentation</a> &raquo;</li>
64
+ <li class="nav-item nav-item-1"><a href="../reference.html" accesskey="U">7. Reference manual</a> &raquo;</li>
65
65
  </ul>
66
66
  </div>
67
67
 
68
68
  <div class="document">
69
69
  <div class="documentwrapper">
70
70
  <div class="bodywrapper">
71
- <div class="body">
71
+ <div class="body" role="main">
72
72
 
73
73
  <div class="section" id="tokenizers">
74
74
  <h1>7.8. Tokenizers<a class="headerlink" href="#tokenizers" title="Permalink to this headline">¶</a></h1>
@@ -79,15 +79,15 @@ the following cases:</p>
79
79
  <blockquote>
80
80
  <div><ul>
81
81
  <li><p class="first">Indexing text</p>
82
- <div class="figure align-center">
82
+ <div class="figure align-center" id="id1">
83
83
  <a class="reference internal image-reference" href="../_images/used-when-indexing.png"><img alt="../_images/used-when-indexing.png" src="../_images/used-when-indexing.png" style="width: 80%;" /></a>
84
- <p class="caption">Tokenizer is used when indexing text.</p>
84
+ <p class="caption"><span class="caption-text">Tokenizer is used when indexing text.</span></p>
85
85
  </div>
86
86
  </li>
87
87
  <li><p class="first">Searching by query</p>
88
- <div class="figure align-center">
88
+ <div class="figure align-center" id="id2">
89
89
  <a class="reference internal image-reference" href="../_images/used-when-searching.png"><img alt="../_images/used-when-searching.png" src="../_images/used-when-searching.png" style="width: 80%;" /></a>
90
- <p class="caption">Tokenizer is used when searching by query.</p>
90
+ <p class="caption"><span class="caption-text">Tokenizer is used when searching by query.</span></p>
91
91
  </div>
92
92
  </li>
93
93
  </ul>
@@ -95,12 +95,12 @@ the following cases:</p>
95
95
  <p>Tokenizer is an important module for full-text search. You can change
96
96
  trade-off between <a class="reference external" href="http://en.wikipedia.org/wiki/Precision_and_recall">precision and recall</a> by changing
97
97
  tokenizer.</p>
98
- <p>Normally, <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> is a suitable tokenizer. If you don't
98
+ <p>Normally, <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> is a suitable tokenizer. If you don't
99
99
  know much about tokenizer, it's recommended that you choose
100
- <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>.</p>
100
+ <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>.</p>
101
101
  <p>You can try a tokenizer by <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> and
102
102
  <a class="reference internal" href="commands/table_tokenize.html"><em>table_tokenize</em></a>. Here is an example to
103
- try <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> tokenizer by
103
+ try <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> tokenizer by
104
104
  <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a>:</p>
105
105
  <p>Execution example:</p>
106
106
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot;
@@ -113,46 +113,57 @@ try <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>
113
113
  # [
114
114
  # {
115
115
  # &quot;position&quot;: 0,
116
+ # &quot;force_prefix&quot;: false,
116
117
  # &quot;value&quot;: &quot;He&quot;
117
118
  # },
118
119
  # {
119
120
  # &quot;position&quot;: 1,
121
+ # &quot;force_prefix&quot;: false,
120
122
  # &quot;value&quot;: &quot;el&quot;
121
123
  # },
122
124
  # {
123
125
  # &quot;position&quot;: 2,
126
+ # &quot;force_prefix&quot;: false,
124
127
  # &quot;value&quot;: &quot;ll&quot;
125
128
  # },
126
129
  # {
127
130
  # &quot;position&quot;: 3,
131
+ # &quot;force_prefix&quot;: false,
128
132
  # &quot;value&quot;: &quot;lo&quot;
129
133
  # },
130
134
  # {
131
135
  # &quot;position&quot;: 4,
136
+ # &quot;force_prefix&quot;: false,
132
137
  # &quot;value&quot;: &quot;o &quot;
133
138
  # },
134
139
  # {
135
140
  # &quot;position&quot;: 5,
141
+ # &quot;force_prefix&quot;: false,
136
142
  # &quot;value&quot;: &quot; W&quot;
137
143
  # },
138
144
  # {
139
145
  # &quot;position&quot;: 6,
146
+ # &quot;force_prefix&quot;: false,
140
147
  # &quot;value&quot;: &quot;Wo&quot;
141
148
  # },
142
149
  # {
143
150
  # &quot;position&quot;: 7,
151
+ # &quot;force_prefix&quot;: false,
144
152
  # &quot;value&quot;: &quot;or&quot;
145
153
  # },
146
154
  # {
147
155
  # &quot;position&quot;: 8,
156
+ # &quot;force_prefix&quot;: false,
148
157
  # &quot;value&quot;: &quot;rl&quot;
149
158
  # },
150
159
  # {
151
160
  # &quot;position&quot;: 9,
161
+ # &quot;force_prefix&quot;: false,
152
162
  # &quot;value&quot;: &quot;ld&quot;
153
163
  # },
154
164
  # {
155
165
  # &quot;position&quot;: 10,
166
+ # &quot;force_prefix&quot;: false,
156
167
  # &quot;value&quot;: &quot;d&quot;
157
168
  # }
158
169
  # ]
@@ -164,86 +175,86 @@ try <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>
164
175
  <h2>7.8.2. What is &quot;tokenize&quot;?<a class="headerlink" href="#what-is-tokenize" title="Permalink to this headline">¶</a></h2>
165
176
  <p>&quot;tokenize&quot; is the process that extracts zero or more tokens from a
166
177
  text. There are some &quot;tokenize&quot; methods.</p>
167
- <p>For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to the following tokens by
178
+ <p>For example, <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to the following tokens by
168
179
  bigram tokenize method:</p>
169
180
  <blockquote>
170
181
  <div><ul class="simple">
171
- <li><tt class="docutils literal"><span class="pre">He</span></tt></li>
172
- <li><tt class="docutils literal"><span class="pre">el</span></tt></li>
173
- <li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
174
- <li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
175
- <li><tt class="docutils literal"><span class="pre">o_</span></tt> (<tt class="docutils literal"><span class="pre">_</span></tt> means a white-space)</li>
176
- <li><tt class="docutils literal"><span class="pre">_W</span></tt> (<tt class="docutils literal"><span class="pre">_</span></tt> means a white-space)</li>
177
- <li><tt class="docutils literal"><span class="pre">Wo</span></tt></li>
178
- <li><tt class="docutils literal"><span class="pre">or</span></tt></li>
179
- <li><tt class="docutils literal"><span class="pre">rl</span></tt></li>
180
- <li><tt class="docutils literal"><span class="pre">ld</span></tt></li>
182
+ <li><code class="docutils literal"><span class="pre">He</span></code></li>
183
+ <li><code class="docutils literal"><span class="pre">el</span></code></li>
184
+ <li><code class="docutils literal"><span class="pre">ll</span></code></li>
185
+ <li><code class="docutils literal"><span class="pre">lo</span></code></li>
186
+ <li><code class="docutils literal"><span class="pre">o_</span></code> (<code class="docutils literal"><span class="pre">_</span></code> means a white-space)</li>
187
+ <li><code class="docutils literal"><span class="pre">_W</span></code> (<code class="docutils literal"><span class="pre">_</span></code> means a white-space)</li>
188
+ <li><code class="docutils literal"><span class="pre">Wo</span></code></li>
189
+ <li><code class="docutils literal"><span class="pre">or</span></code></li>
190
+ <li><code class="docutils literal"><span class="pre">rl</span></code></li>
191
+ <li><code class="docutils literal"><span class="pre">ld</span></code></li>
181
192
  </ul>
182
193
  </div></blockquote>
183
- <p>In the above example, 10 tokens are extracted from one text <tt class="docutils literal"><span class="pre">Hello</span>
184
- <span class="pre">World</span></tt>.</p>
185
- <p>For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to the following tokens by
194
+ <p>In the above example, 10 tokens are extracted from one text <code class="docutils literal"><span class="pre">Hello</span>
195
+ <span class="pre">World</span></code>.</p>
196
+ <p>For example, <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to the following tokens by
186
197
  white-space-separate tokenize method:</p>
187
198
  <blockquote>
188
199
  <div><ul class="simple">
189
- <li><tt class="docutils literal"><span class="pre">Hello</span></tt></li>
190
- <li><tt class="docutils literal"><span class="pre">World</span></tt></li>
200
+ <li><code class="docutils literal"><span class="pre">Hello</span></code></li>
201
+ <li><code class="docutils literal"><span class="pre">World</span></code></li>
191
202
  </ul>
192
203
  </div></blockquote>
193
- <p>In the above example, 2 tokens are extracted from one text <tt class="docutils literal"><span class="pre">Hello</span>
194
- <span class="pre">World</span></tt>.</p>
204
+ <p>In the above example, 2 tokens are extracted from one text <code class="docutils literal"><span class="pre">Hello</span>
205
+ <span class="pre">World</span></code>.</p>
195
206
  <p>Token is used as search key. You can find indexed documents only by
196
207
  tokens that are extracted by used tokenize method. For example, you
197
- can find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> by <tt class="docutils literal"><span class="pre">ll</span></tt> with bigram tokenize method but you
198
- can't find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> by <tt class="docutils literal"><span class="pre">ll</span></tt> with white-space-separate tokenize
208
+ can find <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> by <code class="docutils literal"><span class="pre">ll</span></code> with bigram tokenize method but you
209
+ can't find <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> by <code class="docutils literal"><span class="pre">ll</span></code> with white-space-separate tokenize
199
210
  method. Because white-space-separate tokenize method doesn't extract
200
- <tt class="docutils literal"><span class="pre">ll</span></tt> token. It just extracts <tt class="docutils literal"><span class="pre">Hello</span></tt> and <tt class="docutils literal"><span class="pre">World</span></tt> tokens.</p>
211
+ <code class="docutils literal"><span class="pre">ll</span></code> token. It just extracts <code class="docutils literal"><span class="pre">Hello</span></code> and <code class="docutils literal"><span class="pre">World</span></code> tokens.</p>
201
212
  <p>In general, tokenize method that generates small tokens increases
202
213
  recall but decreases precision. Tokenize method that generates large
203
214
  tokens increases precision but decreases recall.</p>
204
- <p>For example, we can find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> and <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> by <tt class="docutils literal"><span class="pre">or</span></tt> with
205
- bigram tokenize method. <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is a noise for people who
215
+ <p>For example, we can find <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> and <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> by <code class="docutils literal"><span class="pre">or</span></code> with
216
+ bigram tokenize method. <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is a noise for people who
206
217
  wants to search &quot;logical and&quot;. It means that precision is
207
218
  decreased. But recall is increased.</p>
208
- <p>We can find only <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> by <tt class="docutils literal"><span class="pre">or</span></tt> with white-space-separate
209
- tokenize method. Because <tt class="docutils literal"><span class="pre">World</span></tt> is tokenized to one token <tt class="docutils literal"><span class="pre">World</span></tt>
219
+ <p>We can find only <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> by <code class="docutils literal"><span class="pre">or</span></code> with white-space-separate
220
+ tokenize method. Because <code class="docutils literal"><span class="pre">World</span></code> is tokenized to one token <code class="docutils literal"><span class="pre">World</span></code>
210
221
  with white-space-separate tokenize method. It means that precision is
211
222
  increased for people who wants to search &quot;logical and&quot;. But recall is
212
- decreased because <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> that contains <tt class="docutils literal"><span class="pre">or</span></tt> isn't found.</p>
223
+ decreased because <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> that contains <code class="docutils literal"><span class="pre">or</span></code> isn't found.</p>
213
224
  </div>
214
225
  <div class="section" id="built-in-tokenizsers">
215
226
  <h2>7.8.3. Built-in tokenizsers<a class="headerlink" href="#built-in-tokenizsers" title="Permalink to this headline">¶</a></h2>
216
227
  <p>Here is a list of built-in tokenizers:</p>
217
228
  <blockquote>
218
229
  <div><ul class="simple">
219
- <li><tt class="docutils literal"><span class="pre">TokenBigram</span></tt></li>
220
- <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></li>
221
- <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></li>
222
- <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></li>
223
- <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></li>
224
- <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></li>
225
- <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlpha</span></tt></li>
226
- <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlphaDigit</span></tt></li>
227
- <li><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></li>
228
- <li><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></li>
229
- <li><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></li>
230
- <li><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></li>
231
- <li><tt class="docutils literal"><span class="pre">TokenMecab</span></tt></li>
232
- <li><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></li>
230
+ <li><code class="docutils literal"><span class="pre">TokenBigram</span></code></li>
231
+ <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></li>
232
+ <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></li>
233
+ <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></li>
234
+ <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></li>
235
+ <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></li>
236
+ <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlpha</span></code></li>
237
+ <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlphaDigit</span></code></li>
238
+ <li><code class="docutils literal"><span class="pre">TokenUnigram</span></code></li>
239
+ <li><code class="docutils literal"><span class="pre">TokenTrigram</span></code></li>
240
+ <li><code class="docutils literal"><span class="pre">TokenDelimit</span></code></li>
241
+ <li><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></li>
242
+ <li><code class="docutils literal"><span class="pre">TokenMecab</span></code></li>
243
+ <li><code class="docutils literal"><span class="pre">TokenRegexp</span></code></li>
233
244
  </ul>
234
245
  </div></blockquote>
235
246
  <div class="section" id="tokenbigram">
236
- <span id="token-bigram"></span><h3>7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt><a class="headerlink" href="#tokenbigram" title="Permalink to this headline">¶</a></h3>
237
- <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> is a bigram based tokenizer. It's recommended to use
247
+ <span id="token-bigram"></span><h3>7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code><a class="headerlink" href="#tokenbigram" title="Permalink to this headline">¶</a></h3>
248
+ <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> is a bigram based tokenizer. It's recommended to use
238
249
  this tokenizer for most cases.</p>
239
250
  <p>Bigram tokenize method tokenizes a text to two adjacent characters
240
- tokens. For example, <tt class="docutils literal"><span class="pre">Hello</span></tt> is tokenized to the following tokens:</p>
251
+ tokens. For example, <code class="docutils literal"><span class="pre">Hello</span></code> is tokenized to the following tokens:</p>
241
252
  <blockquote>
242
253
  <div><ul class="simple">
243
- <li><tt class="docutils literal"><span class="pre">He</span></tt></li>
244
- <li><tt class="docutils literal"><span class="pre">el</span></tt></li>
245
- <li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
246
- <li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
254
+ <li><code class="docutils literal"><span class="pre">He</span></code></li>
255
+ <li><code class="docutils literal"><span class="pre">el</span></code></li>
256
+ <li><code class="docutils literal"><span class="pre">ll</span></code></li>
257
+ <li><code class="docutils literal"><span class="pre">lo</span></code></li>
247
258
  </ul>
248
259
  </div></blockquote>
249
260
  <p>Bigram tokenize method is good for recall because you can find all
@@ -252,15 +263,15 @@ texts by query consists of two or more characters.</p>
252
263
  character because one character token doesn't exist. But you can find
253
264
  all texts by query consists of one character in Groonga. Because
254
265
  Groonga find tokens that start with query by predictive search. For
255
- example, Groonga can find <tt class="docutils literal"><span class="pre">ll</span></tt> and <tt class="docutils literal"><span class="pre">lo</span></tt> tokens by <tt class="docutils literal"><span class="pre">l</span></tt> query.</p>
266
+ example, Groonga can find <code class="docutils literal"><span class="pre">ll</span></code> and <code class="docutils literal"><span class="pre">lo</span></code> tokens by <code class="docutils literal"><span class="pre">l</span></code> query.</p>
256
267
  <p>Bigram tokenize method isn't good for precision because you can find
257
- texts that includes query in word. For example, you can find <tt class="docutils literal"><span class="pre">world</span></tt>
258
- by <tt class="docutils literal"><span class="pre">or</span></tt>. This is more sensitive for ASCII only languages rather than
259
- non-ASCII languages. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> has solution for this problem
268
+ texts that includes query in word. For example, you can find <code class="docutils literal"><span class="pre">world</span></code>
269
+ by <code class="docutils literal"><span class="pre">or</span></code>. This is more sensitive for ASCII only languages rather than
270
+ non-ASCII languages. <code class="docutils literal"><span class="pre">TokenBigram</span></code> has solution for this problem
260
271
  described in the bellow.</p>
261
- <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> behavior is different when it's worked with any
272
+ <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> behavior is different when it's worked with any
262
273
  <a class="reference internal" href="normalizers.html"><em>Normalizers</em></a>.</p>
263
- <p>If no normalizer is used, <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses pure bigram (all tokens
274
+ <p>If no normalizer is used, <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses pure bigram (all tokens
264
275
  except the last token have two characters) tokenize method:</p>
265
276
  <p>Execution example:</p>
266
277
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot;
@@ -273,54 +284,65 @@ except the last token have two characters) tokenize method:</p>
273
284
  # [
274
285
  # {
275
286
  # &quot;position&quot;: 0,
287
+ # &quot;force_prefix&quot;: false,
276
288
  # &quot;value&quot;: &quot;He&quot;
277
289
  # },
278
290
  # {
279
291
  # &quot;position&quot;: 1,
292
+ # &quot;force_prefix&quot;: false,
280
293
  # &quot;value&quot;: &quot;el&quot;
281
294
  # },
282
295
  # {
283
296
  # &quot;position&quot;: 2,
297
+ # &quot;force_prefix&quot;: false,
284
298
  # &quot;value&quot;: &quot;ll&quot;
285
299
  # },
286
300
  # {
287
301
  # &quot;position&quot;: 3,
302
+ # &quot;force_prefix&quot;: false,
288
303
  # &quot;value&quot;: &quot;lo&quot;
289
304
  # },
290
305
  # {
291
306
  # &quot;position&quot;: 4,
307
+ # &quot;force_prefix&quot;: false,
292
308
  # &quot;value&quot;: &quot;o &quot;
293
309
  # },
294
310
  # {
295
311
  # &quot;position&quot;: 5,
312
+ # &quot;force_prefix&quot;: false,
296
313
  # &quot;value&quot;: &quot; W&quot;
297
314
  # },
298
315
  # {
299
316
  # &quot;position&quot;: 6,
317
+ # &quot;force_prefix&quot;: false,
300
318
  # &quot;value&quot;: &quot;Wo&quot;
301
319
  # },
302
320
  # {
303
321
  # &quot;position&quot;: 7,
322
+ # &quot;force_prefix&quot;: false,
304
323
  # &quot;value&quot;: &quot;or&quot;
305
324
  # },
306
325
  # {
307
326
  # &quot;position&quot;: 8,
327
+ # &quot;force_prefix&quot;: false,
308
328
  # &quot;value&quot;: &quot;rl&quot;
309
329
  # },
310
330
  # {
311
331
  # &quot;position&quot;: 9,
332
+ # &quot;force_prefix&quot;: false,
312
333
  # &quot;value&quot;: &quot;ld&quot;
313
334
  # },
314
335
  # {
315
336
  # &quot;position&quot;: 10,
337
+ # &quot;force_prefix&quot;: false,
316
338
  # &quot;value&quot;: &quot;d&quot;
317
339
  # }
318
340
  # ]
319
341
  # ]
320
342
  </pre></div>
321
343
  </div>
322
- <p>If normalizer is used, <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses white-space-separate like
323
- tokenize method for ASCII characters. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses bigram
344
+ <p>If normalizer is used, <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses white-space-separate like
345
+ tokenize method for ASCII characters. <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses bigram
324
346
  tokenize method for non-ASCII characters.</p>
325
347
  <p>You may be confused with this combined behavior. But it's reasonable
326
348
  for most use cases such as English text (only ASCII characters) and
@@ -332,10 +354,10 @@ the case.</p>
332
354
  word separator. Bigram tokenize method is suitable for the case.</p>
333
355
  <p>Mixed tokenize method is suitable for mixed language case.</p>
334
356
  <p>If you want to use bigram tokenize method for ASCII character, see
335
- <tt class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></tt> type tokenizers such as
336
- <a class="reference internal" href="#token-bigram-split-symbol-alpha"><em>TokenBigramSplitSymbolAlpha</em></a>.</p>
337
- <p>Let's confirm <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> behavior by example.</p>
338
- <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses one or more white-spaces as token delimiter for
357
+ <code class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></code> type tokenizers such as
358
+ <a class="reference internal" href="#token-bigram-split-symbol-alpha"><span>TokenBigramSplitSymbolAlpha</span></a>.</p>
359
+ <p>Let's confirm <code class="docutils literal"><span class="pre">TokenBigram</span></code> behavior by example.</p>
360
+ <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> uses one or more white-spaces as token delimiter for
339
361
  ASCII characters:</p>
340
362
  <p>Execution example:</p>
341
363
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot; NormalizerAuto
@@ -348,23 +370,25 @@ ASCII characters:</p>
348
370
  # [
349
371
  # {
350
372
  # &quot;position&quot;: 0,
373
+ # &quot;force_prefix&quot;: false,
351
374
  # &quot;value&quot;: &quot;hello&quot;
352
375
  # },
353
376
  # {
354
377
  # &quot;position&quot;: 1,
378
+ # &quot;force_prefix&quot;: false,
355
379
  # &quot;value&quot;: &quot;world&quot;
356
380
  # }
357
381
  # ]
358
382
  # ]
359
383
  </pre></div>
360
384
  </div>
361
- <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses character type change as token delimiter for
385
+ <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> uses character type change as token delimiter for
362
386
  ASCII characters. Character type is one of them:</p>
363
387
  <blockquote>
364
388
  <div><ul class="simple">
365
389
  <li>Alphabet</li>
366
390
  <li>Digit</li>
367
- <li>Symbol (such as <tt class="docutils literal"><span class="pre">(</span></tt>, <tt class="docutils literal"><span class="pre">)</span></tt> and <tt class="docutils literal"><span class="pre">!</span></tt>)</li>
391
+ <li>Symbol (such as <code class="docutils literal"><span class="pre">(</span></code>, <code class="docutils literal"><span class="pre">)</span></code> and <code class="docutils literal"><span class="pre">!</span></code>)</li>
368
392
  <li>Hiragana</li>
369
393
  <li>Katakana</li>
370
394
  <li>Kanji</li>
@@ -374,8 +398,8 @@ ASCII characters. Character type is one of them:</p>
374
398
  <p>The following example shows two token delimiters:</p>
375
399
  <blockquote>
376
400
  <div><ul class="simple">
377
- <li>at between <tt class="docutils literal"><span class="pre">100</span></tt> (digits) and <tt class="docutils literal"><span class="pre">cents</span></tt> (alphabets)</li>
378
- <li>at between <tt class="docutils literal"><span class="pre">cents</span></tt> (alphabets) and <tt class="docutils literal"><span class="pre">!!!</span></tt> (symbols)</li>
401
+ <li>at between <code class="docutils literal"><span class="pre">100</span></code> (digits) and <code class="docutils literal"><span class="pre">cents</span></code> (alphabets)</li>
402
+ <li>at between <code class="docutils literal"><span class="pre">cents</span></code> (alphabets) and <code class="docutils literal"><span class="pre">!!!</span></code> (symbols)</li>
379
403
  </ul>
380
404
  </div></blockquote>
381
405
  <p>Execution example:</p>
@@ -389,21 +413,24 @@ ASCII characters. Character type is one of them:</p>
389
413
  # [
390
414
  # {
391
415
  # &quot;position&quot;: 0,
416
+ # &quot;force_prefix&quot;: false,
392
417
  # &quot;value&quot;: &quot;100&quot;
393
418
  # },
394
419
  # {
395
420
  # &quot;position&quot;: 1,
421
+ # &quot;force_prefix&quot;: false,
396
422
  # &quot;value&quot;: &quot;cents&quot;
397
423
  # },
398
424
  # {
399
425
  # &quot;position&quot;: 2,
426
+ # &quot;force_prefix&quot;: false,
400
427
  # &quot;value&quot;: &quot;!!!&quot;
401
428
  # }
402
429
  # ]
403
430
  # ]
404
431
  </pre></div>
405
432
  </div>
406
- <p>Here is an example that <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses bigram tokenize method
433
+ <p>Here is an example that <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses bigram tokenize method
407
434
  for non-ASCII characters.</p>
408
435
  <p>Execution example:</p>
409
436
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日本語の勉強&quot; NormalizerAuto
@@ -416,26 +443,32 @@ for non-ASCII characters.</p>
416
443
  # [
417
444
  # {
418
445
  # &quot;position&quot;: 0,
446
+ # &quot;force_prefix&quot;: false,
419
447
  # &quot;value&quot;: &quot;日本&quot;
420
448
  # },
421
449
  # {
422
450
  # &quot;position&quot;: 1,
451
+ # &quot;force_prefix&quot;: false,
423
452
  # &quot;value&quot;: &quot;本語&quot;
424
453
  # },
425
454
  # {
426
455
  # &quot;position&quot;: 2,
456
+ # &quot;force_prefix&quot;: false,
427
457
  # &quot;value&quot;: &quot;語の&quot;
428
458
  # },
429
459
  # {
430
460
  # &quot;position&quot;: 3,
461
+ # &quot;force_prefix&quot;: false,
431
462
  # &quot;value&quot;: &quot;の勉&quot;
432
463
  # },
433
464
  # {
434
465
  # &quot;position&quot;: 4,
466
+ # &quot;force_prefix&quot;: false,
435
467
  # &quot;value&quot;: &quot;勉強&quot;
436
468
  # },
437
469
  # {
438
470
  # &quot;position&quot;: 5,
471
+ # &quot;force_prefix&quot;: false,
439
472
  # &quot;value&quot;: &quot;強&quot;
440
473
  # }
441
474
  # ]
@@ -444,9 +477,9 @@ for non-ASCII characters.</p>
444
477
  </div>
445
478
  </div>
446
479
  <div class="section" id="tokenbigramsplitsymbol">
447
- <span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramsplitsymbol" title="Permalink to this headline">¶</a></h3>
448
- <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
449
- difference between them is symbol handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt>
480
+ <span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code><a class="headerlink" href="#tokenbigramsplitsymbol" title="Permalink to this headline">¶</a></h3>
481
+ <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code> is similar to <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The
482
+ difference between them is symbol handling. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code>
450
483
  tokenizes symbols by bigram tokenize method:</p>
451
484
  <p>Execution example:</p>
452
485
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbol &quot;100cents!!!&quot; NormalizerAuto
@@ -459,22 +492,27 @@ tokenizes symbols by bigram tokenize method:</p>
459
492
  # [
460
493
  # {
461
494
  # &quot;position&quot;: 0,
495
+ # &quot;force_prefix&quot;: false,
462
496
  # &quot;value&quot;: &quot;100&quot;
463
497
  # },
464
498
  # {
465
499
  # &quot;position&quot;: 1,
500
+ # &quot;force_prefix&quot;: false,
466
501
  # &quot;value&quot;: &quot;cents&quot;
467
502
  # },
468
503
  # {
469
504
  # &quot;position&quot;: 2,
505
+ # &quot;force_prefix&quot;: false,
470
506
  # &quot;value&quot;: &quot;!!&quot;
471
507
  # },
472
508
  # {
473
509
  # &quot;position&quot;: 3,
510
+ # &quot;force_prefix&quot;: false,
474
511
  # &quot;value&quot;: &quot;!!&quot;
475
512
  # },
476
513
  # {
477
514
  # &quot;position&quot;: 4,
515
+ # &quot;force_prefix&quot;: false,
478
516
  # &quot;value&quot;: &quot;!&quot;
479
517
  # }
480
518
  # ]
@@ -483,10 +521,10 @@ tokenizes symbols by bigram tokenize method:</p>
483
521
  </div>
484
522
  </div>
485
523
  <div class="section" id="tokenbigramsplitsymbolalpha">
486
- <span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
487
- <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
524
+ <span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
525
+ <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> is similar to <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The
488
526
  difference between them is symbol and alphabet
489
- handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> tokenizes symbols and
527
+ handling. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> tokenizes symbols and
490
528
  alphabets by bigram tokenize method:</p>
491
529
  <p>Execution example:</p>
492
530
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlpha &quot;100cents!!!&quot; NormalizerAuto
@@ -499,38 +537,47 @@ alphabets by bigram tokenize method:</p>
499
537
  # [
500
538
  # {
501
539
  # &quot;position&quot;: 0,
540
+ # &quot;force_prefix&quot;: false,
502
541
  # &quot;value&quot;: &quot;100&quot;
503
542
  # },
504
543
  # {
505
544
  # &quot;position&quot;: 1,
545
+ # &quot;force_prefix&quot;: false,
506
546
  # &quot;value&quot;: &quot;ce&quot;
507
547
  # },
508
548
  # {
509
549
  # &quot;position&quot;: 2,
550
+ # &quot;force_prefix&quot;: false,
510
551
  # &quot;value&quot;: &quot;en&quot;
511
552
  # },
512
553
  # {
513
554
  # &quot;position&quot;: 3,
555
+ # &quot;force_prefix&quot;: false,
514
556
  # &quot;value&quot;: &quot;nt&quot;
515
557
  # },
516
558
  # {
517
559
  # &quot;position&quot;: 4,
560
+ # &quot;force_prefix&quot;: false,
518
561
  # &quot;value&quot;: &quot;ts&quot;
519
562
  # },
520
563
  # {
521
564
  # &quot;position&quot;: 5,
565
+ # &quot;force_prefix&quot;: false,
522
566
  # &quot;value&quot;: &quot;s!&quot;
523
567
  # },
524
568
  # {
525
569
  # &quot;position&quot;: 6,
570
+ # &quot;force_prefix&quot;: false,
526
571
  # &quot;value&quot;: &quot;!!&quot;
527
572
  # },
528
573
  # {
529
574
  # &quot;position&quot;: 7,
575
+ # &quot;force_prefix&quot;: false,
530
576
  # &quot;value&quot;: &quot;!!&quot;
531
577
  # },
532
578
  # {
533
579
  # &quot;position&quot;: 8,
580
+ # &quot;force_prefix&quot;: false,
534
581
  # &quot;value&quot;: &quot;!&quot;
535
582
  # }
536
583
  # ]
@@ -539,10 +586,10 @@ alphabets by bigram tokenize method:</p>
539
586
  </div>
540
587
  </div>
541
588
  <div class="section" id="tokenbigramsplitsymbolalphadigit">
542
- <span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
543
- <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> is similar to
544
- <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The difference between them is symbol, alphabet
545
- and digit handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> tokenizes
589
+ <span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
590
+ <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> is similar to
591
+ <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The difference between them is symbol, alphabet
592
+ and digit handling. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> tokenizes
546
593
  symbols, alphabets and digits by bigram tokenize method. It means that
547
594
  all characters are tokenized by bigram tokenize method:</p>
548
595
  <p>Execution example:</p>
@@ -556,46 +603,57 @@ all characters are tokenized by bigram tokenize method:</p>
556
603
  # [
557
604
  # {
558
605
  # &quot;position&quot;: 0,
606
+ # &quot;force_prefix&quot;: false,
559
607
  # &quot;value&quot;: &quot;10&quot;
560
608
  # },
561
609
  # {
562
610
  # &quot;position&quot;: 1,
611
+ # &quot;force_prefix&quot;: false,
563
612
  # &quot;value&quot;: &quot;00&quot;
564
613
  # },
565
614
  # {
566
615
  # &quot;position&quot;: 2,
616
+ # &quot;force_prefix&quot;: false,
567
617
  # &quot;value&quot;: &quot;0c&quot;
568
618
  # },
569
619
  # {
570
620
  # &quot;position&quot;: 3,
621
+ # &quot;force_prefix&quot;: false,
571
622
  # &quot;value&quot;: &quot;ce&quot;
572
623
  # },
573
624
  # {
574
625
  # &quot;position&quot;: 4,
626
+ # &quot;force_prefix&quot;: false,
575
627
  # &quot;value&quot;: &quot;en&quot;
576
628
  # },
577
629
  # {
578
630
  # &quot;position&quot;: 5,
631
+ # &quot;force_prefix&quot;: false,
579
632
  # &quot;value&quot;: &quot;nt&quot;
580
633
  # },
581
634
  # {
582
635
  # &quot;position&quot;: 6,
636
+ # &quot;force_prefix&quot;: false,
583
637
  # &quot;value&quot;: &quot;ts&quot;
584
638
  # },
585
639
  # {
586
640
  # &quot;position&quot;: 7,
641
+ # &quot;force_prefix&quot;: false,
587
642
  # &quot;value&quot;: &quot;s!&quot;
588
643
  # },
589
644
  # {
590
645
  # &quot;position&quot;: 8,
646
+ # &quot;force_prefix&quot;: false,
591
647
  # &quot;value&quot;: &quot;!!&quot;
592
648
  # },
593
649
  # {
594
650
  # &quot;position&quot;: 9,
651
+ # &quot;force_prefix&quot;: false,
595
652
  # &quot;value&quot;: &quot;!!&quot;
596
653
  # },
597
654
  # {
598
655
  # &quot;position&quot;: 10,
656
+ # &quot;force_prefix&quot;: false,
599
657
  # &quot;value&quot;: &quot;!&quot;
600
658
  # }
601
659
  # ]
@@ -604,13 +662,13 @@ all characters are tokenized by bigram tokenize method:</p>
604
662
  </div>
605
663
  </div>
606
664
  <div class="section" id="tokenbigramignoreblank">
607
- <span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt><a class="headerlink" href="#tokenbigramignoreblank" title="Permalink to this headline">¶</a></h3>
608
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
609
- difference between them is blank handling. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt>
665
+ <span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code><a class="headerlink" href="#tokenbigramignoreblank" title="Permalink to this headline">¶</a></h3>
666
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> is similar to <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The
667
+ difference between them is blank handling. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code>
610
668
  ignores white-spaces in continuous symbols and non-ASCII characters.</p>
611
- <p>You can find difference of them by <tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
669
+ <p>You can find difference of them by <code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> text because it
612
670
  has symbols and non-ASCII characters.</p>
613
- <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
671
+ <p>Here is a result by <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> :</p>
614
672
  <p>Execution example:</p>
615
673
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
616
674
  # [
@@ -622,33 +680,39 @@ has symbols and non-ASCII characters.</p>
622
680
  # [
623
681
  # {
624
682
  # &quot;position&quot;: 0,
683
+ # &quot;force_prefix&quot;: false,
625
684
  # &quot;value&quot;: &quot;日&quot;
626
685
  # },
627
686
  # {
628
687
  # &quot;position&quot;: 1,
688
+ # &quot;force_prefix&quot;: false,
629
689
  # &quot;value&quot;: &quot;本&quot;
630
690
  # },
631
691
  # {
632
692
  # &quot;position&quot;: 2,
693
+ # &quot;force_prefix&quot;: false,
633
694
  # &quot;value&quot;: &quot;語&quot;
634
695
  # },
635
696
  # {
636
697
  # &quot;position&quot;: 3,
698
+ # &quot;force_prefix&quot;: false,
637
699
  # &quot;value&quot;: &quot;!&quot;
638
700
  # },
639
701
  # {
640
702
  # &quot;position&quot;: 4,
703
+ # &quot;force_prefix&quot;: false,
641
704
  # &quot;value&quot;: &quot;!&quot;
642
705
  # },
643
706
  # {
644
707
  # &quot;position&quot;: 5,
708
+ # &quot;force_prefix&quot;: false,
645
709
  # &quot;value&quot;: &quot;!&quot;
646
710
  # }
647
711
  # ]
648
712
  # ]
649
713
  </pre></div>
650
714
  </div>
651
- <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt>:</p>
715
+ <p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code>:</p>
652
716
  <p>Execution example:</p>
653
717
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlank &quot;日 本 語 ! ! !&quot; NormalizerAuto
654
718
  # [
@@ -660,18 +724,22 @@ has symbols and non-ASCII characters.</p>
660
724
  # [
661
725
  # {
662
726
  # &quot;position&quot;: 0,
727
+ # &quot;force_prefix&quot;: false,
663
728
  # &quot;value&quot;: &quot;日本&quot;
664
729
  # },
665
730
  # {
666
731
  # &quot;position&quot;: 1,
732
+ # &quot;force_prefix&quot;: false,
667
733
  # &quot;value&quot;: &quot;本語&quot;
668
734
  # },
669
735
  # {
670
736
  # &quot;position&quot;: 2,
737
+ # &quot;force_prefix&quot;: false,
671
738
  # &quot;value&quot;: &quot;語&quot;
672
739
  # },
673
740
  # {
674
741
  # &quot;position&quot;: 3,
742
+ # &quot;force_prefix&quot;: false,
675
743
  # &quot;value&quot;: &quot;!!!&quot;
676
744
  # }
677
745
  # ]
@@ -680,22 +748,22 @@ has symbols and non-ASCII characters.</p>
680
748
  </div>
681
749
  </div>
682
750
  <div class="section" id="tokenbigramignoreblanksplitsymbol">
683
- <span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="Permalink to this headline">¶</a></h3>
684
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> is similar to
685
- <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
751
+ <span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="Permalink to this headline">¶</a></h3>
752
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> is similar to
753
+ <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The differences between them are the followings:</p>
686
754
  <blockquote>
687
755
  <div><ul class="simple">
688
756
  <li>Blank handling</li>
689
757
  <li>Symbol handling</li>
690
758
  </ul>
691
759
  </div></blockquote>
692
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> ignores white-spaces in
760
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> ignores white-spaces in
693
761
  continuous symbols and non-ASCII characters.</p>
694
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> tokenizes symbols by bigram
762
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> tokenizes symbols by bigram
695
763
  tokenize method.</p>
696
- <p>You can find difference of them by <tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
764
+ <p>You can find difference of them by <code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> text because it
697
765
  has symbols and non-ASCII characters.</p>
698
- <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
766
+ <p>Here is a result by <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> :</p>
699
767
  <p>Execution example:</p>
700
768
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
701
769
  # [
@@ -707,33 +775,39 @@ has symbols and non-ASCII characters.</p>
707
775
  # [
708
776
  # {
709
777
  # &quot;position&quot;: 0,
778
+ # &quot;force_prefix&quot;: false,
710
779
  # &quot;value&quot;: &quot;日&quot;
711
780
  # },
712
781
  # {
713
782
  # &quot;position&quot;: 1,
783
+ # &quot;force_prefix&quot;: false,
714
784
  # &quot;value&quot;: &quot;本&quot;
715
785
  # },
716
786
  # {
717
787
  # &quot;position&quot;: 2,
788
+ # &quot;force_prefix&quot;: false,
718
789
  # &quot;value&quot;: &quot;語&quot;
719
790
  # },
720
791
  # {
721
792
  # &quot;position&quot;: 3,
793
+ # &quot;force_prefix&quot;: false,
722
794
  # &quot;value&quot;: &quot;!&quot;
723
795
  # },
724
796
  # {
725
797
  # &quot;position&quot;: 4,
798
+ # &quot;force_prefix&quot;: false,
726
799
  # &quot;value&quot;: &quot;!&quot;
727
800
  # },
728
801
  # {
729
802
  # &quot;position&quot;: 5,
803
+ # &quot;force_prefix&quot;: false,
730
804
  # &quot;value&quot;: &quot;!&quot;
731
805
  # }
732
806
  # ]
733
807
  # ]
734
808
  </pre></div>
735
809
  </div>
736
- <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt>:</p>
810
+ <p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code>:</p>
737
811
  <p>Execution example:</p>
738
812
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbol &quot;日 本 語 ! ! !&quot; NormalizerAuto
739
813
  # [
@@ -745,26 +819,32 @@ has symbols and non-ASCII characters.</p>
745
819
  # [
746
820
  # {
747
821
  # &quot;position&quot;: 0,
822
+ # &quot;force_prefix&quot;: false,
748
823
  # &quot;value&quot;: &quot;日本&quot;
749
824
  # },
750
825
  # {
751
826
  # &quot;position&quot;: 1,
827
+ # &quot;force_prefix&quot;: false,
752
828
  # &quot;value&quot;: &quot;本語&quot;
753
829
  # },
754
830
  # {
755
831
  # &quot;position&quot;: 2,
832
+ # &quot;force_prefix&quot;: false,
756
833
  # &quot;value&quot;: &quot;語!&quot;
757
834
  # },
758
835
  # {
759
836
  # &quot;position&quot;: 3,
837
+ # &quot;force_prefix&quot;: false,
760
838
  # &quot;value&quot;: &quot;!!&quot;
761
839
  # },
762
840
  # {
763
841
  # &quot;position&quot;: 4,
842
+ # &quot;force_prefix&quot;: false,
764
843
  # &quot;value&quot;: &quot;!!&quot;
765
844
  # },
766
845
  # {
767
846
  # &quot;position&quot;: 5,
847
+ # &quot;force_prefix&quot;: false,
768
848
  # &quot;value&quot;: &quot;!&quot;
769
849
  # }
770
850
  # ]
@@ -773,22 +853,22 @@ has symbols and non-ASCII characters.</p>
773
853
  </div>
774
854
  </div>
775
855
  <div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
776
- <span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
777
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> is similar to
778
- <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
856
+ <span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
857
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> is similar to
858
+ <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The differences between them are the followings:</p>
779
859
  <blockquote>
780
860
  <div><ul class="simple">
781
861
  <li>Blank handling</li>
782
862
  <li>Symbol and alphabet handling</li>
783
863
  </ul>
784
864
  </div></blockquote>
785
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> ignores white-spaces in
865
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> ignores white-spaces in
786
866
  continuous symbols and non-ASCII characters.</p>
787
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> tokenizes symbols and
867
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> tokenizes symbols and
788
868
  alphabets by bigram tokenize method.</p>
789
- <p>You can find difference of them by <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
869
+ <p>You can find difference of them by <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> text because it
790
870
  has symbols and non-ASCII characters with white spaces and alphabets.</p>
791
- <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
871
+ <p>Here is a result by <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> :</p>
792
872
  <p>Execution example:</p>
793
873
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
794
874
  # [
@@ -800,37 +880,44 @@ has symbols and non-ASCII characters with white spaces and alphabets.</p>
800
880
  # [
801
881
  # {
802
882
  # &quot;position&quot;: 0,
883
+ # &quot;force_prefix&quot;: false,
803
884
  # &quot;value&quot;: &quot;hello&quot;
804
885
  # },
805
886
  # {
806
887
  # &quot;position&quot;: 1,
888
+ # &quot;force_prefix&quot;: false,
807
889
  # &quot;value&quot;: &quot;日&quot;
808
890
  # },
809
891
  # {
810
892
  # &quot;position&quot;: 2,
893
+ # &quot;force_prefix&quot;: false,
811
894
  # &quot;value&quot;: &quot;本&quot;
812
895
  # },
813
896
  # {
814
897
  # &quot;position&quot;: 3,
898
+ # &quot;force_prefix&quot;: false,
815
899
  # &quot;value&quot;: &quot;語&quot;
816
900
  # },
817
901
  # {
818
902
  # &quot;position&quot;: 4,
903
+ # &quot;force_prefix&quot;: false,
819
904
  # &quot;value&quot;: &quot;!&quot;
820
905
  # },
821
906
  # {
822
907
  # &quot;position&quot;: 5,
908
+ # &quot;force_prefix&quot;: false,
823
909
  # &quot;value&quot;: &quot;!&quot;
824
910
  # },
825
911
  # {
826
912
  # &quot;position&quot;: 6,
913
+ # &quot;force_prefix&quot;: false,
827
914
  # &quot;value&quot;: &quot;!&quot;
828
915
  # }
829
916
  # ]
830
917
  # ]
831
918
  </pre></div>
832
919
  </div>
833
- <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt>:</p>
920
+ <p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code>:</p>
834
921
  <p>Execution example:</p>
835
922
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
836
923
  # [
@@ -842,46 +929,57 @@ has symbols and non-ASCII characters with white spaces and alphabets.</p>
842
929
  # [
843
930
  # {
844
931
  # &quot;position&quot;: 0,
932
+ # &quot;force_prefix&quot;: false,
845
933
  # &quot;value&quot;: &quot;he&quot;
846
934
  # },
847
935
  # {
848
936
  # &quot;position&quot;: 1,
937
+ # &quot;force_prefix&quot;: false,
849
938
  # &quot;value&quot;: &quot;el&quot;
850
939
  # },
851
940
  # {
852
941
  # &quot;position&quot;: 2,
942
+ # &quot;force_prefix&quot;: false,
853
943
  # &quot;value&quot;: &quot;ll&quot;
854
944
  # },
855
945
  # {
856
946
  # &quot;position&quot;: 3,
947
+ # &quot;force_prefix&quot;: false,
857
948
  # &quot;value&quot;: &quot;lo&quot;
858
949
  # },
859
950
  # {
860
951
  # &quot;position&quot;: 4,
952
+ # &quot;force_prefix&quot;: false,
861
953
  # &quot;value&quot;: &quot;o日&quot;
862
954
  # },
863
955
  # {
864
956
  # &quot;position&quot;: 5,
957
+ # &quot;force_prefix&quot;: false,
865
958
  # &quot;value&quot;: &quot;日本&quot;
866
959
  # },
867
960
  # {
868
961
  # &quot;position&quot;: 6,
962
+ # &quot;force_prefix&quot;: false,
869
963
  # &quot;value&quot;: &quot;本語&quot;
870
964
  # },
871
965
  # {
872
966
  # &quot;position&quot;: 7,
967
+ # &quot;force_prefix&quot;: false,
873
968
  # &quot;value&quot;: &quot;語!&quot;
874
969
  # },
875
970
  # {
876
971
  # &quot;position&quot;: 8,
972
+ # &quot;force_prefix&quot;: false,
877
973
  # &quot;value&quot;: &quot;!!&quot;
878
974
  # },
879
975
  # {
880
976
  # &quot;position&quot;: 9,
977
+ # &quot;force_prefix&quot;: false,
881
978
  # &quot;value&quot;: &quot;!!&quot;
882
979
  # },
883
980
  # {
884
981
  # &quot;position&quot;: 10,
982
+ # &quot;force_prefix&quot;: false,
885
983
  # &quot;value&quot;: &quot;!&quot;
886
984
  # }
887
985
  # ]
@@ -890,24 +988,24 @@ has symbols and non-ASCII characters with white spaces and alphabets.</p>
890
988
  </div>
891
989
  </div>
892
990
  <div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
893
- <span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
894
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> is similar to
895
- <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
991
+ <span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
992
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> is similar to
993
+ <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The differences between them are the followings:</p>
896
994
  <blockquote>
897
995
  <div><ul class="simple">
898
996
  <li>Blank handling</li>
899
997
  <li>Symbol, alphabet and digit handling</li>
900
998
  </ul>
901
999
  </div></blockquote>
902
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> ignores white-spaces
1000
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> ignores white-spaces
903
1001
  in continuous symbols and non-ASCII characters.</p>
904
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> tokenizes symbols,
1002
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> tokenizes symbols,
905
1003
  alphabets and digits by bigram tokenize method. It means that all
906
1004
  characters are tokenized by bigram tokenize method.</p>
907
- <p>You can find difference of them by <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></tt> text
1005
+ <p>You can find difference of them by <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></code> text
908
1006
  because it has symbols and non-ASCII characters with white spaces,
909
1007
  alphabets and digits.</p>
910
- <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
1008
+ <p>Here is a result by <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> :</p>
911
1009
  <p>Execution example:</p>
912
1010
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
913
1011
  # [
@@ -919,41 +1017,49 @@ alphabets and digits.</p>
919
1017
  # [
920
1018
  # {
921
1019
  # &quot;position&quot;: 0,
1020
+ # &quot;force_prefix&quot;: false,
922
1021
  # &quot;value&quot;: &quot;hello&quot;
923
1022
  # },
924
1023
  # {
925
1024
  # &quot;position&quot;: 1,
1025
+ # &quot;force_prefix&quot;: false,
926
1026
  # &quot;value&quot;: &quot;日&quot;
927
1027
  # },
928
1028
  # {
929
1029
  # &quot;position&quot;: 2,
1030
+ # &quot;force_prefix&quot;: false,
930
1031
  # &quot;value&quot;: &quot;本&quot;
931
1032
  # },
932
1033
  # {
933
1034
  # &quot;position&quot;: 3,
1035
+ # &quot;force_prefix&quot;: false,
934
1036
  # &quot;value&quot;: &quot;語&quot;
935
1037
  # },
936
1038
  # {
937
1039
  # &quot;position&quot;: 4,
1040
+ # &quot;force_prefix&quot;: false,
938
1041
  # &quot;value&quot;: &quot;!&quot;
939
1042
  # },
940
1043
  # {
941
1044
  # &quot;position&quot;: 5,
1045
+ # &quot;force_prefix&quot;: false,
942
1046
  # &quot;value&quot;: &quot;!&quot;
943
1047
  # },
944
1048
  # {
945
1049
  # &quot;position&quot;: 6,
1050
+ # &quot;force_prefix&quot;: false,
946
1051
  # &quot;value&quot;: &quot;!&quot;
947
1052
  # },
948
1053
  # {
949
1054
  # &quot;position&quot;: 7,
1055
+ # &quot;force_prefix&quot;: false,
950
1056
  # &quot;value&quot;: &quot;777&quot;
951
1057
  # }
952
1058
  # ]
953
1059
  # ]
954
1060
  </pre></div>
955
1061
  </div>
956
- <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt>:</p>
1062
+ <p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code>:</p>
957
1063
  <p>Execution example:</p>
958
1064
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
959
1065
  # [
@@ -965,58 +1071,72 @@ alphabets and digits.</p>
965
1071
  # [
966
1072
  # {
967
1073
  # &quot;position&quot;: 0,
1074
+ # &quot;force_prefix&quot;: false,
968
1075
  # &quot;value&quot;: &quot;he&quot;
969
1076
  # },
970
1077
  # {
971
1078
  # &quot;position&quot;: 1,
1079
+ # &quot;force_prefix&quot;: false,
972
1080
  # &quot;value&quot;: &quot;el&quot;
973
1081
  # },
974
1082
  # {
975
1083
  # &quot;position&quot;: 2,
1084
+ # &quot;force_prefix&quot;: false,
976
1085
  # &quot;value&quot;: &quot;ll&quot;
977
1086
  # },
978
1087
  # {
979
1088
  # &quot;position&quot;: 3,
1089
+ # &quot;force_prefix&quot;: false,
980
1090
  # &quot;value&quot;: &quot;lo&quot;
981
1091
  # },
982
1092
  # {
983
1093
  # &quot;position&quot;: 4,
1094
+ # &quot;force_prefix&quot;: false,
984
1095
  # &quot;value&quot;: &quot;o日&quot;
985
1096
  # },
986
1097
  # {
987
1098
  # &quot;position&quot;: 5,
1099
+ # &quot;force_prefix&quot;: false,
988
1100
  # &quot;value&quot;: &quot;日本&quot;
989
1101
  # },
990
1102
  # {
991
1103
  # &quot;position&quot;: 6,
1104
+ # &quot;force_prefix&quot;: false,
992
1105
  # &quot;value&quot;: &quot;本語&quot;
993
1106
  # },
994
1107
  # {
995
1108
  # &quot;position&quot;: 7,
1109
+ # &quot;force_prefix&quot;: false,
996
1110
  # &quot;value&quot;: &quot;語!&quot;
997
1111
  # },
998
1112
  # {
999
1113
  # &quot;position&quot;: 8,
1114
+ # &quot;force_prefix&quot;: false,
1000
1115
  # &quot;value&quot;: &quot;!!&quot;
1001
1116
  # },
1002
1117
  # {
1003
1118
  # &quot;position&quot;: 9,
1119
+ # &quot;force_prefix&quot;: false,
1004
1120
  # &quot;value&quot;: &quot;!!&quot;
1005
1121
  # },
1006
1122
  # {
1007
1123
  # &quot;position&quot;: 10,
1124
+ # &quot;force_prefix&quot;: false,
1008
1125
  # &quot;value&quot;: &quot;!7&quot;
1009
1126
  # },
1010
1127
  # {
1011
1128
  # &quot;position&quot;: 11,
1129
+ # &quot;force_prefix&quot;: false,
1012
1130
  # &quot;value&quot;: &quot;77&quot;
1013
1131
  # },
1014
1132
  # {
1015
1133
  # &quot;position&quot;: 12,
1134
+ # &quot;force_prefix&quot;: false,
1016
1135
  # &quot;value&quot;: &quot;77&quot;
1017
1136
  # },
1018
1137
  # {
1019
1138
  # &quot;position&quot;: 13,
1139
+ # &quot;force_prefix&quot;: false,
1020
1140
  # &quot;value&quot;: &quot;7&quot;
1021
1141
  # }
1022
1142
  # ]
@@ -1025,10 +1145,10 @@ alphabets and digits.</p>
1025
1145
  </div>
1026
1146
  </div>
1027
1147
  <div class="section" id="tokenunigram">
1028
- <span id="token-unigram"></span><h3>7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt><a class="headerlink" href="#tokenunigram" title="Permalink to this headline">¶</a></h3>
1029
- <p><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences
1030
- between them is token unit. <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> uses 2 characters per
1031
- token. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> uses 1 character per token.</p>
1148
+ <span id="token-unigram"></span><h3>7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code><a class="headerlink" href="#tokenunigram" title="Permalink to this headline">¶</a></h3>
1149
+ <p><code class="docutils literal"><span class="pre">TokenUnigram</span></code> is similar to <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The differences
1150
+ between them is token unit. <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> uses 2 characters per
1151
+ token. <code class="docutils literal"><span class="pre">TokenUnigram</span></code> uses 1 character per token.</p>
1032
1152
  <p>Execution example:</p>
1033
1153
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenUnigram &quot;100cents!!!&quot; NormalizerAuto
1034
1154
  # [
@@ -1040,14 +1160,17 @@ token. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> u
1040
1160
  # [
1041
1161
  # {
1042
1162
  # &quot;position&quot;: 0,
1163
+ # &quot;force_prefix&quot;: false,
1043
1164
  # &quot;value&quot;: &quot;100&quot;
1044
1165
  # },
1045
1166
  # {
1046
1167
  # &quot;position&quot;: 1,
1168
+ # &quot;force_prefix&quot;: false,
1047
1169
  # &quot;value&quot;: &quot;cents&quot;
1048
1170
  # },
1049
1171
  # {
1050
1172
  # &quot;position&quot;: 2,
1173
+ # &quot;force_prefix&quot;: false,
1051
1174
  # &quot;value&quot;: &quot;!!!&quot;
1052
1175
  # }
1053
1176
  # ]
@@ -1056,10 +1179,10 @@ token. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> u
1056
1179
  </div>
1057
1180
  </div>
1058
1181
  <div class="section" id="tokentrigram">
1059
- <span id="token-trigram"></span><h3>7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt><a class="headerlink" href="#tokentrigram" title="Permalink to this headline">¶</a></h3>
1060
- <p><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences
1061
- between them is token unit. <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> uses 2 characters per
1062
- token. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> uses 3 characters per token.</p>
1182
+ <span id="token-trigram"></span><h3>7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code><a class="headerlink" href="#tokentrigram" title="Permalink to this headline">¶</a></h3>
1183
+ <p><code class="docutils literal"><span class="pre">TokenTrigram</span></code> is similar to <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The differences
1184
+ between them is token unit. <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> uses 2 characters per
1185
+ token. <code class="docutils literal"><span class="pre">TokenTrigram</span></code> uses 3 characters per token.</p>
1063
1186
  <p>Execution example:</p>
1064
1187
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenTrigram &quot;10000cents!!!!!&quot; NormalizerAuto
1065
1188
  # [
@@ -1071,14 +1194,17 @@ token. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> u
1071
1194
  # [
1072
1195
  # {
1073
1196
  # &quot;position&quot;: 0,
1197
+ # &quot;force_prefix&quot;: false,
1074
1198
  # &quot;value&quot;: &quot;10000&quot;
1075
1199
  # },
1076
1200
  # {
1077
1201
  # &quot;position&quot;: 1,
1202
+ # &quot;force_prefix&quot;: false,
1078
1203
  # &quot;value&quot;: &quot;cents&quot;
1079
1204
  # },
1080
1205
  # {
1081
1206
  # &quot;position&quot;: 2,
1207
+ # &quot;force_prefix&quot;: false,
1082
1208
  # &quot;value&quot;: &quot;!!!!!&quot;
1083
1209
  # }
1084
1210
  # ]
@@ -1087,14 +1213,14 @@ token. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> u
1087
1213
  </div>
1088
1214
  </div>
1089
1215
  <div class="section" id="tokendelimit">
1090
- <span id="token-delimit"></span><h3>7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt><a class="headerlink" href="#tokendelimit" title="Permalink to this headline">¶</a></h3>
1091
- <p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> extracts token by splitting one or more space
1092
- characters (<tt class="docutils literal"><span class="pre">U+0020</span></tt>). For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to
1093
- <tt class="docutils literal"><span class="pre">Hello</span></tt> and <tt class="docutils literal"><span class="pre">World</span></tt>.</p>
1094
- <p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> is suitable for tag text. You can extract <tt class="docutils literal"><span class="pre">groonga</span></tt>
1095
- and <tt class="docutils literal"><span class="pre">full-text-search</span></tt> and <tt class="docutils literal"><span class="pre">http</span></tt> as tags from <tt class="docutils literal"><span class="pre">groonga</span>
1096
- <span class="pre">full-text-search</span> <span class="pre">http</span></tt>.</p>
1097
- <p>Here is an example of <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt>:</p>
1216
+ <span id="token-delimit"></span><h3>7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code><a class="headerlink" href="#tokendelimit" title="Permalink to this headline">¶</a></h3>
1217
+ <p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> extracts token by splitting one or more space
1218
+ characters (<code class="docutils literal"><span class="pre">U+0020</span></code>). For example, <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to
1219
+ <code class="docutils literal"><span class="pre">Hello</span></code> and <code class="docutils literal"><span class="pre">World</span></code>.</p>
1220
+ <p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> is suitable for tag text. You can extract <code class="docutils literal"><span class="pre">groonga</span></code>
1221
+ and <code class="docutils literal"><span class="pre">full-text-search</span></code> and <code class="docutils literal"><span class="pre">http</span></code> as tags from <code class="docutils literal"><span class="pre">groonga</span>
1222
+ <span class="pre">full-text-search</span> <span class="pre">http</span></code>.</p>
1223
+ <p>Here is an example of <code class="docutils literal"><span class="pre">TokenDelimit</span></code>:</p>
1098
1224
  <p>Execution example:</p>
1099
1225
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimit &quot;Groonga full-text-search HTTP&quot; NormalizerAuto
1100
1226
  # [
@@ -1106,14 +1232,17 @@ and <tt class="docutils literal"><span class="pre">full-text-search</span></tt>
1106
1232
  # [
1107
1233
  # {
1108
1234
  # &quot;position&quot;: 0,
1235
+ # &quot;force_prefix&quot;: false,
1109
1236
  # &quot;value&quot;: &quot;groonga&quot;
1110
1237
  # },
1111
1238
  # {
1112
1239
  # &quot;position&quot;: 1,
1240
+ # &quot;force_prefix&quot;: false,
1113
1241
  # &quot;value&quot;: &quot;full-text-search&quot;
1114
1242
  # },
1115
1243
  # {
1116
1244
  # &quot;position&quot;: 2,
1245
+ # &quot;force_prefix&quot;: false,
1117
1246
  # &quot;value&quot;: &quot;http&quot;
1118
1247
  # }
1119
1248
  # ]
@@ -1122,13 +1251,13 @@ and <tt class="docutils literal"><span class="pre">full-text-search</span></tt>
1122
1251
  </div>
1123
1252
  </div>
1124
1253
  <div class="section" id="tokendelimitnull">
1125
- <span id="token-delimit-null"></span><h3>7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt><a class="headerlink" href="#tokendelimitnull" title="Permalink to this headline">¶</a></h3>
1126
- <p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> is similar to <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a>. The
1127
- difference between them is separator character. <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a>
1128
- uses space character (<tt class="docutils literal"><span class="pre">U+0020</span></tt>) but <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> uses NUL
1129
- character (<tt class="docutils literal"><span class="pre">U+0000</span></tt>).</p>
1130
- <p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> is also suitable for tag text.</p>
1131
- <p>Here is an example of <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt>:</p>
1254
+ <span id="token-delimit-null"></span><h3>7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code><a class="headerlink" href="#tokendelimitnull" title="Permalink to this headline">¶</a></h3>
1255
+ <p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> is similar to <a class="reference internal" href="#token-delimit"><span>TokenDelimit</span></a>. The
1256
+ difference between them is separator character. <a class="reference internal" href="#token-delimit"><span>TokenDelimit</span></a>
1257
+ uses space character (<code class="docutils literal"><span class="pre">U+0020</span></code>) but <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> uses NUL
1258
+ character (<code class="docutils literal"><span class="pre">U+0000</span></code>).</p>
1259
+ <p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> is also suitable for tag text.</p>
1260
+ <p>Here is an example of <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code>:</p>
1132
1261
  <p>Execution example:</p>
1133
1262
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimitNull &quot;Groonga\u0000full-text-search\u0000HTTP&quot; NormalizerAuto
1134
1263
  # [
@@ -1140,6 +1269,7 @@ character (<tt class="docutils literal"><span class="pre">U+0000</span></tt>).</
1140
1269
  # [
1141
1270
  # {
1142
1271
  # &quot;position&quot;: 0,
1272
+ # &quot;force_prefix&quot;: false,
1143
1273
  # &quot;value&quot;: &quot;groongau0000full-text-searchu0000http&quot;
1144
1274
  # }
1145
1275
  # ]
@@ -1148,23 +1278,23 @@ character (<tt class="docutils literal"><span class="pre">U+0000</span></tt>).</
1148
1278
  </div>
1149
1279
  </div>
1150
1280
  <div class="section" id="tokenmecab">
1151
- <span id="token-mecab"></span><h3>7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt><a class="headerlink" href="#tokenmecab" title="Permalink to this headline">¶</a></h3>
1152
- <p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> is a tokenizer based on <a class="reference external" href="http://mecab.sourceforge.net/">MeCab</a> part-of-speech and
1281
+ <span id="token-mecab"></span><h3>7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code><a class="headerlink" href="#tokenmecab" title="Permalink to this headline">¶</a></h3>
1282
+ <p><code class="docutils literal"><span class="pre">TokenMecab</span></code> is a tokenizer based on <a class="reference external" href="http://mecab.sourceforge.net/">MeCab</a> part-of-speech and
1153
1283
  morphological analyzer.</p>
1154
1284
  <p>MeCab doesn't depend on Japanese. You can use MeCab for other
1155
1285
  languages by creating dictionary for the languages. You can use <a class="reference external" href="http://osdn.jp/projects/naist-jdic/">NAIST
1156
1286
  Japanese Dictionary</a>
1157
1287
  for Japanese.</p>
1158
- <p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> is good for precision rather than recall. You can find
1159
- <tt class="docutils literal"><span class="pre">東京都</span></tt> and <tt class="docutils literal"><span class="pre">京都</span></tt> texts by <tt class="docutils literal"><span class="pre">京都</span></tt> query with
1160
- <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> but <tt class="docutils literal"><span class="pre">東京都</span></tt> isn't expected. You can find only
1161
- <tt class="docutils literal"><span class="pre">京都</span></tt> text by <tt class="docutils literal"><span class="pre">京都</span></tt> query with <tt class="docutils literal"><span class="pre">TokenMecab</span></tt>.</p>
1288
+ <p><code class="docutils literal"><span class="pre">TokenMecab</span></code> is good for precision rather than recall. You can find
1289
+ <code class="docutils literal"><span class="pre">東京都</span></code> and <code class="docutils literal"><span class="pre">京都</span></code> texts by <code class="docutils literal"><span class="pre">京都</span></code> query with
1290
+ <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> but <code class="docutils literal"><span class="pre">東京都</span></code> isn't expected. You can find only
1291
+ <code class="docutils literal"><span class="pre">京都</span></code> text by <code class="docutils literal"><span class="pre">京都</span></code> query with <code class="docutils literal"><span class="pre">TokenMecab</span></code>.</p>
1162
1292
  <p>If you want to support neologisms, you need to keep updating your
1163
- MeCab dictionary. It needs maintain cost. (<a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> doesn't
1164
- require dictionary maintenance because <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> doesn't use
1293
+ MeCab dictionary. It needs maintain cost. (<a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> doesn't
1294
+ require dictionary maintenance because <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> doesn't use
1165
1295
  dictionary.) <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> may help you.</p>
1166
- <p>Here is an example of <tt class="docutils literal"><span class="pre">TokenMeCab</span></tt>. <tt class="docutils literal"><span class="pre">東京都</span></tt> is tokenized to <tt class="docutils literal"><span class="pre">東京</span></tt>
1167
- and <tt class="docutils literal"><span class="pre">都</span></tt>. They don't include <tt class="docutils literal"><span class="pre">京都</span></tt>:</p>
1296
+ <p>Here is an example of <code class="docutils literal"><span class="pre">TokenMeCab</span></code>. <code class="docutils literal"><span class="pre">東京都</span></code> is tokenized to <code class="docutils literal"><span class="pre">東京</span></code>
1297
+ and <code class="docutils literal"><span class="pre">都</span></code>. They don't include <code class="docutils literal"><span class="pre">京都</span></code>:</p>
1168
1298
  <p>Execution example:</p>
1169
1299
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenMecab &quot;東京都&quot;
1170
1300
  # [
@@ -1176,10 +1306,12 @@ and <tt class="docutils literal"><span class="pre">都</span></tt>. They don't i
1176
1306
  # [
1177
1307
  # {
1178
1308
  # &quot;position&quot;: 0,
1309
+ # &quot;force_prefix&quot;: false,
1179
1310
  # &quot;value&quot;: &quot;東京&quot;
1180
1311
  # },
1181
1312
  # {
1182
1313
  # &quot;position&quot;: 1,
1314
+ # &quot;force_prefix&quot;: false,
1183
1315
  # &quot;value&quot;: &quot;都&quot;
1184
1316
  # }
1185
1317
  # ]
@@ -1188,7 +1320,7 @@ and <tt class="docutils literal"><span class="pre">都</span></tt>. They don't i
1188
1320
  </div>
1189
1321
  </div>
1190
1322
  <div class="section" id="tokenregexp">
1191
- <span id="token-regexp"></span><h3>7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt><a class="headerlink" href="#tokenregexp" title="Permalink to this headline">¶</a></h3>
1323
+ <span id="token-regexp"></span><h3>7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code><a class="headerlink" href="#tokenregexp" title="Permalink to this headline">¶</a></h3>
1192
1324
  <div class="versionadded">
1193
1325
  <p><span class="versionmodified">New in version 5.0.1.</span></p>
1194
1326
  </div>
@@ -1201,21 +1333,21 @@ and <tt class="docutils literal"><span class="pre">都</span></tt>. They don't i
1201
1333
  <p class="last">This tokenizer can be used only with UTF-8. You can't use this
1202
1334
  tokenizer with EUC-JP, Shift_JIS and so on.</p>
1203
1335
  </div>
1204
- <p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> is a tokenizer for supporting regular expression
1336
+ <p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> is a tokenizer for supporting regular expression
1205
1337
  search by index.</p>
1206
1338
  <p>In general, regular expression search is evaluated as sequential
1207
1339
  search. But the following cases can be evaluated as index search:</p>
1208
1340
  <blockquote>
1209
1341
  <div><ul class="simple">
1210
- <li>Literal only case such as <tt class="docutils literal"><span class="pre">hello</span></tt></li>
1211
- <li>The beginning of text and literal case such as <tt class="docutils literal"><span class="pre">\A/home/alice</span></tt></li>
1212
- <li>The end of text and literal case such as <tt class="docutils literal"><span class="pre">\.txt\z</span></tt></li>
1342
+ <li>Literal only case such as <code class="docutils literal"><span class="pre">hello</span></code></li>
1343
+ <li>The beginning of text and literal case such as <code class="docutils literal"><span class="pre">\A/home/alice</span></code></li>
1344
+ <li>The end of text and literal case such as <code class="docutils literal"><span class="pre">\.txt\z</span></code></li>
1213
1345
  </ul>
1214
1346
  </div></blockquote>
1215
1347
  <p>In most cases, index search is faster than sequential search.</p>
1216
- <p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> is based on bigram tokenize method. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt>
1217
- adds the beginning of text mark (<tt class="docutils literal"><span class="pre">U+FFEF</span></tt>) at the begging of text
1218
- and the end of text mark (<tt class="docutils literal"><span class="pre">U+FFF0</span></tt>) to the end of text when you
1348
+ <p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> is based on bigram tokenize method. <code class="docutils literal"><span class="pre">TokenRegexp</span></code>
1349
+ adds the beginning of text mark (<code class="docutils literal"><span class="pre">U+FFEF</span></code>) at the begging of text
1350
+ and the end of text mark (<code class="docutils literal"><span class="pre">U+FFF0</span></code>) to the end of text when you
1219
1351
  index text:</p>
1220
1352
  <p>Execution example:</p>
1221
1353
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;/home/alice/test.txt&quot; NormalizerAuto --mode ADD
@@ -1228,194 +1360,112 @@ index text:</p>
1228
1360
  # [
1229
1361
  # {
1230
1362
  # &quot;position&quot;: 0,
1363
+ # &quot;force_prefix&quot;: false,
1231
1364
  # &quot;value&quot;: &quot;￯&quot;
1232
1365
  # },
1233
1366
  # {
1234
1367
  # &quot;position&quot;: 1,
1368
+ # &quot;force_prefix&quot;: false,
1235
1369
  # &quot;value&quot;: &quot;/h&quot;
1236
1370
  # },
1237
1371
  # {
1238
1372
  # &quot;position&quot;: 2,
1373
+ # &quot;force_prefix&quot;: false,
1239
1374
  # &quot;value&quot;: &quot;ho&quot;
1240
1375
  # },
1241
1376
  # {
1242
1377
  # &quot;position&quot;: 3,
1378
+ # &quot;force_prefix&quot;: false,
1243
1379
  # &quot;value&quot;: &quot;om&quot;
1244
1380
  # },
1245
1381
  # {
1246
1382
  # &quot;position&quot;: 4,
1383
+ # &quot;force_prefix&quot;: false,
1247
1384
  # &quot;value&quot;: &quot;me&quot;
1248
1385
  # },
1249
1386
  # {
1250
1387
  # &quot;position&quot;: 5,
1388
+ # &quot;force_prefix&quot;: false,
1251
1389
  # &quot;value&quot;: &quot;e/&quot;
1252
1390
  # },
1253
1391
  # {
1254
1392
  # &quot;position&quot;: 6,
1393
+ # &quot;force_prefix&quot;: false,
1255
1394
  # &quot;value&quot;: &quot;/a&quot;
1256
1395
  # },
1257
1396
  # {
1258
1397
  # &quot;position&quot;: 7,
1398
+ # &quot;force_prefix&quot;: false,
1259
1399
  # &quot;value&quot;: &quot;al&quot;
1260
1400
  # },
1261
1401
  # {
1262
1402
  # &quot;position&quot;: 8,
1403
+ # &quot;force_prefix&quot;: false,
1263
1404
  # &quot;value&quot;: &quot;li&quot;
1264
1405
  # },
1265
1406
  # {
1266
1407
  # &quot;position&quot;: 9,
1408
+ # &quot;force_prefix&quot;: false,
1267
1409
  # &quot;value&quot;: &quot;ic&quot;
1268
1410
  # },
1269
1411
  # {
1270
1412
  # &quot;position&quot;: 10,
1413
+ # &quot;force_prefix&quot;: false,
1271
1414
  # &quot;value&quot;: &quot;ce&quot;
1272
1415
  # },
1273
1416
  # {
1274
1417
  # &quot;position&quot;: 11,
1418
+ # &quot;force_prefix&quot;: false,
1275
1419
  # &quot;value&quot;: &quot;e/&quot;
1276
1420
  # },
1277
1421
  # {
1278
1422
  # &quot;position&quot;: 12,
1423
+ # &quot;force_prefix&quot;: false,
1279
1424
  # &quot;value&quot;: &quot;/t&quot;
1280
1425
  # },
1281
1426
  # {
1282
1427
  # &quot;position&quot;: 13,
1428
+ # &quot;force_prefix&quot;: false,
1283
1429
  # &quot;value&quot;: &quot;te&quot;
1284
1430
  # },
1285
1431
  # {
1286
1432
  # &quot;position&quot;: 14,
1433
+ # &quot;force_prefix&quot;: false,
1287
1434
  # &quot;value&quot;: &quot;es&quot;
1288
1435
  # },
1289
1436
  # {
1290
1437
  # &quot;position&quot;: 15,
1438
+ # &quot;force_prefix&quot;: false,
1291
1439
  # &quot;value&quot;: &quot;st&quot;
1292
1440
  # },
1293
1441
  # {
1294
1442
  # &quot;position&quot;: 16,
1443
+ # &quot;force_prefix&quot;: false,
1295
1444
  # &quot;value&quot;: &quot;t.&quot;
1296
1445
  # },
1297
1446
  # {
1298
1447
  # &quot;position&quot;: 17,
1448
+ # &quot;force_prefix&quot;: false,
1299
1449
  # &quot;value&quot;: &quot;.t&quot;
1300
1450
  # },
1301
1451
  # {
1302
1452
  # &quot;position&quot;: 18,
1453
+ # &quot;force_prefix&quot;: false,
1303
1454
  # &quot;value&quot;: &quot;tx&quot;
1304
1455
  # },
1305
1456
  # {
1306
1457
  # &quot;position&quot;: 19,
1458
+ # &quot;force_prefix&quot;: false,
1307
1459
  # &quot;value&quot;: &quot;xt&quot;
1308
1460
  # },
1309
1461
  # {
1310
1462
  # &quot;position&quot;: 20,
1463
+ # &quot;force_prefix&quot;: false,
1311
1464
  # &quot;value&quot;: &quot;t&quot;
1312
1465
  # },
1313
1466
  # {
1314
1467
  # &quot;position&quot;: 21,
1315
- # &quot;value&quot;: &quot;￰&quot;
1316
- # }
1317
- # ]
1318
- # ]
1319
- </pre></div>
1320
- </div>
1321
- <p>The beginning of text mark is used for the beginning of text search by
1322
- <tt class="docutils literal"><span class="pre">\A</span></tt>. If you use <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> for tokenizing query,
1323
- <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> adds the beginning of text mark (<tt class="docutils literal"><span class="pre">U+FFEF</span></tt>) as the
1324
- first token. The beginning of text mark must be appeared at the first,
1325
- you can get results of the beginning of text search.</p>
1326
- <p>Execution example:</p>
1327
- <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;\\A/home/alice/&quot; NormalizerAuto --mode GET
1328
- # [
1329
- # [
1330
- # 0,
1331
- # 1337566253.89858,
1332
- # 0.000355720520019531
1333
- # ],
1334
- # [
1335
- # {
1336
- # &quot;position&quot;: 0,
1337
- # &quot;value&quot;: &quot;￯&quot;
1338
- # },
1339
- # {
1340
- # &quot;position&quot;: 1,
1341
- # &quot;value&quot;: &quot;/h&quot;
1342
- # },
1343
- # {
1344
- # &quot;position&quot;: 2,
1345
- # &quot;value&quot;: &quot;ho&quot;
1346
- # },
1347
- # {
1348
- # &quot;position&quot;: 3,
1349
- # &quot;value&quot;: &quot;om&quot;
1350
- # },
1351
- # {
1352
- # &quot;position&quot;: 4,
1353
- # &quot;value&quot;: &quot;me&quot;
1354
- # },
1355
- # {
1356
- # &quot;position&quot;: 5,
1357
- # &quot;value&quot;: &quot;e/&quot;
1358
- # },
1359
- # {
1360
- # &quot;position&quot;: 6,
1361
- # &quot;value&quot;: &quot;/a&quot;
1362
- # },
1363
- # {
1364
- # &quot;position&quot;: 7,
1365
- # &quot;value&quot;: &quot;al&quot;
1366
- # },
1367
- # {
1368
- # &quot;position&quot;: 8,
1369
- # &quot;value&quot;: &quot;li&quot;
1370
- # },
1371
- # {
1372
- # &quot;position&quot;: 9,
1373
- # &quot;value&quot;: &quot;ic&quot;
1374
- # },
1375
- # {
1376
- # &quot;position&quot;: 10,
1377
- # &quot;value&quot;: &quot;ce&quot;
1378
- # },
1379
- # {
1380
- # &quot;position&quot;: 11,
1381
- # &quot;value&quot;: &quot;e/&quot;
1382
- # }
1383
- # ]
1384
- # ]
1385
- </pre></div>
1386
- </div>
1387
- <p>The end of text mark is used for the end of text search by <tt class="docutils literal"><span class="pre">\z</span></tt>.
1388
- If you use <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> for tokenizing query, <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> adds
1389
- the end of text mark (<tt class="docutils literal"><span class="pre">U+FFF0</span></tt>) as the last token. The end of text
1390
- mark must be appeared at the end, you can get results of the end of
1391
- text search.</p>
1392
- <p>Execution example:</p>
1393
- <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;\\.txt\\z&quot; NormalizerAuto --mode GET
1394
- # [
1395
- # [
1396
- # 0,
1397
- # 1337566253.89858,
1398
- # 0.000355720520019531
1399
- # ],
1400
- # [
1401
- # {
1402
- # &quot;position&quot;: 0,
1403
- # &quot;value&quot;: &quot;\\.&quot;
1404
- # },
1405
- # {
1406
- # &quot;position&quot;: 1,
1407
- # &quot;value&quot;: &quot;.t&quot;
1408
- # },
1409
- # {
1410
- # &quot;position&quot;: 2,
1411
- # &quot;value&quot;: &quot;tx&quot;
1412
- # },
1413
- # {
1414
- # &quot;position&quot;: 3,
1415
- # &quot;value&quot;: &quot;xt&quot;
1416
- # },
1417
- # {
1418
- # &quot;position&quot;: 5,
1468
+ # &quot;force_prefix&quot;: false,
1419
1469
  # &quot;value&quot;: &quot;￰&quot;
1420
1470
  # }
1421
1471
  # ]
@@ -1430,7 +1480,7 @@ text search.</p>
1430
1480
  </div>
1431
1481
  </div>
1432
1482
  </div>
1433
- <div class="sphinxsidebar">
1483
+ <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
1434
1484
  <div class="sphinxsidebarwrapper">
1435
1485
  <h3><a href="../index.html">Table Of Contents</a></h3>
1436
1486
  <ul>
@@ -1438,20 +1488,20 @@ text search.</p>
1438
1488
  <li><a class="reference internal" href="#summary">7.8.1. Summary</a></li>
1439
1489
  <li><a class="reference internal" href="#what-is-tokenize">7.8.2. What is &quot;tokenize&quot;?</a></li>
1440
1490
  <li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. Built-in tokenizsers</a><ul>
1441
- <li><a class="reference internal" href="#tokenbigram">7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt></a></li>
1442
- <li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></a></li>
1443
- <li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></a></li>
1444
- <li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></a></li>
1445
- <li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></a></li>
1446
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></a></li>
1447
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt></a></li>
1448
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt></a></li>
1449
- <li><a class="reference internal" href="#tokenunigram">7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></a></li>
1450
- <li><a class="reference internal" href="#tokentrigram">7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></a></li>
1451
- <li><a class="reference internal" href="#tokendelimit">7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></a></li>
1452
- <li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></a></li>
1453
- <li><a class="reference internal" href="#tokenmecab">7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt></a></li>
1454
- <li><a class="reference internal" href="#tokenregexp">7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></a></li>
1491
+ <li><a class="reference internal" href="#tokenbigram">7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code></a></li>
1492
+ <li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
1493
+ <li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
1494
+ <li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
1495
+ <li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
1496
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
1497
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
1498
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
1499
+ <li><a class="reference internal" href="#tokenunigram">7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code></a></li>
1500
+ <li><a class="reference internal" href="#tokentrigram">7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code></a></li>
1501
+ <li><a class="reference internal" href="#tokendelimit">7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code></a></li>
1502
+ <li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></a></li>
1503
+ <li><a class="reference internal" href="#tokenmecab">7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code></a></li>
1504
+ <li><a class="reference internal" href="#tokenregexp">7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code></a></li>
1455
1505
  </ul>
1456
1506
  </li>
1457
1507
  </ul>
@@ -1464,12 +1514,14 @@ text search.</p>
1464
1514
  <h4>Next topic</h4>
1465
1515
  <p class="topless"><a href="token_filters.html"
1466
1516
  title="next chapter">7.9. Token filters</a></p>
1467
- <h3>This Page</h3>
1468
- <ul class="this-page-menu">
1469
- <li><a href="../_sources/reference/tokenizers.txt"
1470
- rel="nofollow">Show Source</a></li>
1471
- </ul>
1472
- <div id="searchbox" style="display: none">
1517
+ <div role="note" aria-label="source link">
1518
+ <h3>This Page</h3>
1519
+ <ul class="this-page-menu">
1520
+ <li><a href="../_sources/reference/tokenizers.txt"
1521
+ rel="nofollow">Show Source</a></li>
1522
+ </ul>
1523
+ </div>
1524
+ <div id="searchbox" style="display: none" role="search">
1473
1525
  <h3>Quick search</h3>
1474
1526
  <form class="search" action="../search.html" method="get">
1475
1527
  <input type="text" name="q" />
@@ -1486,7 +1538,7 @@ text search.</p>
1486
1538
  </div>
1487
1539
  <div class="clearer"></div>
1488
1540
  </div>
1489
- <div class="related">
1541
+ <div class="related" role="navigation" aria-label="related navigation">
1490
1542
  <h3>Navigation</h3>
1491
1543
  <ul>
1492
1544
  <li class="right" style="margin-right: 10px">
@@ -1498,11 +1550,11 @@ text search.</p>
1498
1550
  <li class="right" >
1499
1551
  <a href="normalizers.html" title="7.7. Normalizers"
1500
1552
  >previous</a> |</li>
1501
- <li><a href="../index.html">Groonga v5.0.4-139-g6629adb documentation</a> &raquo;</li>
1502
- <li><a href="../reference.html" >7. Reference manual</a> &raquo;</li>
1553
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v5.0.6-226-gd7da7e7 documentation</a> &raquo;</li>
1554
+ <li class="nav-item nav-item-1"><a href="../reference.html" >7. Reference manual</a> &raquo;</li>
1503
1555
  </ul>
1504
1556
  </div>
1505
- <div class="footer">
1557
+ <div class="footer" role="contentinfo">
1506
1558
  &copy; Copyright 2009-2015, Brazil, Inc.
1507
1559
  </div>
1508
1560
  </body>