rroonga 5.0.0-x86-mingw32 → 5.0.1-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (733) hide show
  1. checksums.yaml +8 -8
  2. data/.yardopts +1 -0
  3. data/Rakefile +1 -16
  4. data/example/bookmark.rb +1 -6
  5. data/example/index-html.rb +0 -1
  6. data/ext/groonga/extconf.rb +4 -7
  7. data/ext/groonga/rb-grn-array.c +1 -1
  8. data/ext/groonga/rb-grn-column.c +33 -67
  9. data/ext/groonga/rb-grn-context.c +5 -5
  10. data/ext/groonga/rb-grn-database.c +2 -2
  11. data/ext/groonga/rb-grn-double-array-trie.c +4 -2
  12. data/ext/groonga/rb-grn-encoding-support.c +7 -1
  13. data/ext/groonga/rb-grn-equal-operator.c +85 -0
  14. data/ext/groonga/rb-grn-exception.c +17 -0
  15. data/ext/groonga/rb-grn-expression.c +85 -43
  16. data/ext/groonga/rb-grn-greater-equal-operator.c +88 -0
  17. data/ext/groonga/rb-grn-greater-operator.c +85 -0
  18. data/ext/groonga/rb-grn-hash.c +1 -1
  19. data/ext/groonga/rb-grn-index-column.c +150 -11
  20. data/ext/groonga/rb-grn-less-equal-operator.c +88 -0
  21. data/ext/groonga/rb-grn-less-operator.c +85 -0
  22. data/ext/groonga/rb-grn-logger.c +5 -5
  23. data/ext/groonga/rb-grn-match-operator.c +86 -0
  24. data/ext/groonga/rb-grn-normalizer.c +8 -1
  25. data/ext/groonga/rb-grn-not-equal-operator.c +85 -0
  26. data/ext/groonga/rb-grn-object.c +170 -36
  27. data/ext/groonga/rb-grn-operator.c +395 -172
  28. data/ext/groonga/rb-grn-patricia-trie.c +10 -8
  29. data/ext/groonga/rb-grn-plugin.c +51 -3
  30. data/ext/groonga/rb-grn-prefix-operator.c +86 -0
  31. data/ext/groonga/rb-grn-procedure-type.c +4 -0
  32. data/ext/groonga/rb-grn-query-logger.c +4 -4
  33. data/ext/groonga/rb-grn-regexp-operator.c +85 -0
  34. data/ext/groonga/rb-grn-snippet.c +1 -1
  35. data/ext/groonga/rb-grn-table-key-support.c +9 -5
  36. data/ext/groonga/rb-grn-table.c +52 -66
  37. data/ext/groonga/rb-grn-type.c +1 -1
  38. data/ext/groonga/rb-grn-utils.c +22 -3
  39. data/ext/groonga/rb-grn.h +31 -4
  40. data/ext/groonga/rb-groonga.c +9 -9
  41. data/lib/1.9/groonga.so +0 -0
  42. data/lib/2.0/groonga.so +0 -0
  43. data/lib/2.1/groonga.so +0 -0
  44. data/lib/2.2/groonga.so +0 -0
  45. data/lib/groonga/context.rb +31 -0
  46. data/lib/groonga/expression-builder.rb +14 -1
  47. data/lib/groonga/record.rb +10 -8
  48. data/lib/groonga/schema.rb +3 -1
  49. data/rroonga-build.rb +2 -2
  50. data/rroonga.gemspec +3 -3
  51. data/test/groonga-test-utils.rb +4 -0
  52. data/test/test-column.rb +28 -26
  53. data/test/test-exception.rb +1 -0
  54. data/test/test-expression-builder.rb +83 -1
  55. data/test/test-expression.rb +80 -48
  56. data/test/test-index-column.rb +102 -29
  57. data/test/test-normalizer.rb +35 -29
  58. data/test/test-operator.rb +214 -0
  59. data/test/test-plugin.rb +24 -6
  60. data/test/test-procedure.rb +29 -0
  61. data/test/test-schema-type.rb +14 -0
  62. data/test/test-table-select-mecab.rb +1 -4
  63. data/test/test-table.rb +7 -0
  64. data/test/test-token-regexp.rb +30 -0
  65. data/test/test-type.rb +24 -0
  66. data/vendor/local/bin/grndb.exe +0 -0
  67. data/vendor/local/bin/groonga-benchmark.exe +0 -0
  68. data/vendor/local/bin/groonga.exe +0 -0
  69. data/vendor/local/bin/libgcc_s_sjlj-1.dll +0 -0
  70. data/vendor/local/bin/libgroonga-0.dll +0 -0
  71. data/vendor/local/bin/libmecab-1.dll +0 -0
  72. data/vendor/local/bin/libmsgpack-3.dll +0 -0
  73. data/vendor/local/bin/libmsgpackc-2.dll +0 -0
  74. data/vendor/local/bin/libonig-5.dll +0 -0
  75. data/vendor/local/bin/libstdc++-6.dll +0 -0
  76. data/vendor/local/bin/lz4.exe +0 -0
  77. data/vendor/local/bin/lz4c.exe +0 -0
  78. data/vendor/local/bin/lz4cat +0 -0
  79. data/vendor/local/bin/mecab-config +2 -2
  80. data/vendor/local/bin/mecab.exe +0 -0
  81. data/vendor/local/bin/onig-config +1 -1
  82. data/vendor/local/bin/zlib1.dll +0 -0
  83. data/vendor/local/etc/groonga/groonga.conf +1 -1
  84. data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +2 -2
  85. data/vendor/local/include/groonga/groonga.h +1 -0
  86. data/vendor/local/include/groonga/groonga/expr.h +2 -0
  87. data/vendor/local/include/groonga/groonga/groonga.h +32 -5
  88. data/vendor/local/include/groonga/groonga/ii.h +7 -0
  89. data/vendor/local/include/groonga/groonga/obj.h +37 -0
  90. data/vendor/local/include/groonga/groonga/scorer.h +95 -0
  91. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
  92. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
  93. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
  94. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +2 -2
  95. data/vendor/local/lib/groonga/plugins/ruby/eval.a +0 -0
  96. data/vendor/local/lib/groonga/plugins/ruby/eval.dll +0 -0
  97. data/vendor/local/lib/groonga/plugins/ruby/eval.dll.a +0 -0
  98. data/vendor/local/lib/groonga/plugins/ruby/eval.la +2 -2
  99. data/vendor/local/lib/groonga/plugins/ruby/load.a +0 -0
  100. data/vendor/local/lib/groonga/plugins/ruby/load.dll +0 -0
  101. data/vendor/local/lib/groonga/plugins/ruby/load.dll.a +0 -0
  102. data/vendor/local/lib/groonga/plugins/ruby/load.la +2 -2
  103. data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +6 -3
  104. data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +6 -5
  105. data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +421 -17
  106. data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
  107. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
  108. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
  109. data/vendor/local/lib/groonga/plugins/suggest/suggest.la +2 -2
  110. data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
  111. data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
  112. data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
  113. data/vendor/local/lib/groonga/plugins/table/table.la +2 -2
  114. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
  115. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
  116. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
  117. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +2 -2
  118. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
  119. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
  120. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
  121. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +2 -2
  122. data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +12 -4
  123. data/vendor/local/lib/groonga/scripts/ruby/database.rb +11 -3
  124. data/vendor/local/lib/groonga/scripts/ruby/expression.rb +23 -0
  125. data/vendor/local/lib/groonga/scripts/ruby/expression_size_estimator.rb +158 -0
  126. data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +39 -0
  127. data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +4 -0
  128. data/vendor/local/lib/groonga/scripts/ruby/initialize/pre.rb +2 -0
  129. data/vendor/local/lib/groonga/scripts/ruby/logger.rb +11 -7
  130. data/vendor/local/lib/groonga/scripts/ruby/object.rb +11 -0
  131. data/vendor/local/lib/groonga/scripts/ruby/operator.rb +22 -0
  132. data/vendor/local/lib/groonga/scripts/ruby/scan_info.rb +7 -2
  133. data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +7 -11
  134. data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +137 -34
  135. data/vendor/local/lib/groonga/scripts/ruby/scan_info_search_index.rb +9 -0
  136. data/vendor/local/lib/libgroonga.a +0 -0
  137. data/vendor/local/lib/libgroonga.dll.a +0 -0
  138. data/vendor/local/lib/libgroonga.la +2 -2
  139. data/vendor/local/lib/liblz4.a +0 -0
  140. data/vendor/local/lib/liblz4.dll +0 -0
  141. data/vendor/local/lib/liblz4.dll.1 +0 -0
  142. data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
  143. data/vendor/local/lib/libmecab.a +0 -0
  144. data/vendor/local/lib/libmecab.dll.a +0 -0
  145. data/vendor/local/lib/libmecab.la +2 -2
  146. data/vendor/local/lib/libmsgpack.a +0 -0
  147. data/vendor/local/lib/libmsgpack.dll.a +0 -0
  148. data/vendor/local/lib/libmsgpack.la +2 -2
  149. data/vendor/local/lib/libmsgpackc.a +0 -0
  150. data/vendor/local/lib/libmsgpackc.dll.a +0 -0
  151. data/vendor/local/lib/libmsgpackc.la +2 -2
  152. data/vendor/local/lib/libonig.a +0 -0
  153. data/vendor/local/lib/libonig.dll.a +0 -0
  154. data/vendor/local/lib/libonig.la +2 -2
  155. data/vendor/local/lib/libz.a +0 -0
  156. data/vendor/local/lib/libz.dll.a +0 -0
  157. data/vendor/local/lib/pkgconfig/groonga.pc +3 -3
  158. data/vendor/local/lib/pkgconfig/liblz4.pc +5 -5
  159. data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
  160. data/vendor/local/lib/pkgconfig/oniguruma.pc +6 -6
  161. data/vendor/local/lib/pkgconfig/zlib.pc +3 -3
  162. data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
  163. data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
  164. data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
  165. data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
  166. data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
  167. data/vendor/local/sbin/groonga-httpd-restart +1 -1
  168. data/vendor/local/sbin/groonga-httpd.exe +0 -0
  169. data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
  170. data/vendor/local/share/doc/groonga/en/html/_images/used-when-indexing.png +0 -0
  171. data/vendor/local/share/doc/groonga/en/html/_images/used-when-searching.png +0 -0
  172. data/vendor/local/share/doc/groonga/en/html/_sources/characteristic.txt +1 -1
  173. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +32 -17
  174. data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
  175. data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +3 -3
  176. data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +4 -4
  177. data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
  178. data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +3 -3
  179. data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
  180. data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -3
  181. data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
  182. data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +194 -0
  183. data/vendor/local/share/doc/groonga/en/html/_sources/news/1.0.x.txt +1 -1
  184. data/vendor/local/share/doc/groonga/en/html/_sources/news/3.x.txt +2 -2
  185. data/vendor/local/share/doc/groonga/en/html/_sources/news/4.x.txt +2 -2
  186. data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +2 -0
  187. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api.txt +3 -0
  188. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +42 -0
  189. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/overview.txt +54 -0
  190. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +1 -1
  191. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_create.txt +2 -2
  192. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +1 -1
  193. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +3 -3
  194. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/delete.txt +4 -4
  195. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/load.txt +5 -5
  196. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +4 -4
  197. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +173 -0
  198. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +112 -0
  199. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +7 -6
  200. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +64 -0
  201. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +63 -0
  202. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +11 -1
  203. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +3 -2
  204. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +1 -1
  205. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +1 -1
  206. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +17 -17
  207. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/suggest.txt +12 -12
  208. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +7 -7
  209. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +4 -4
  210. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +6 -6
  211. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +1 -1
  212. data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga.txt +47 -26
  213. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/between.txt +5 -5
  214. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_distance.txt +3 -3
  215. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +6 -6
  216. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +1 -1
  217. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/html_untag.txt +1 -1
  218. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/in_values.txt +54 -2
  219. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +4 -4
  220. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/sub_filter.txt +4 -4
  221. data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
  222. data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
  223. data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +2 -0
  224. data/vendor/local/share/doc/groonga/en/html/_sources/reference/normalizers.txt +4 -0
  225. data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations.txt +2 -1
  226. data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +403 -0
  227. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorer.txt +217 -0
  228. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
  229. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
  230. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scoring_note.txt +13 -0
  231. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tables.txt +8 -0
  232. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +530 -16
  233. data/vendor/local/share/doc/groonga/en/html/_sources/server.txt +2 -1
  234. data/vendor/local/share/doc/groonga/en/html/_sources/server/memcached.txt +15 -0
  235. data/vendor/local/share/doc/groonga/en/html/_sources/spec/gqtp.txt +66 -1
  236. data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
  237. data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/network.txt +0 -81
  238. data/vendor/local/share/doc/groonga/en/html/_static/basic.css +6 -68
  239. data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +1 -26
  240. data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
  241. data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
  242. data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
  243. data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +9404 -4
  244. data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
  245. data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
  246. data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +2 -2
  247. data/vendor/local/share/doc/groonga/en/html/_static/underscore.js +1415 -31
  248. data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
  249. data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
  250. data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +15 -15
  251. data/vendor/local/share/doc/groonga/en/html/characteristic.html +18 -20
  252. data/vendor/local/share/doc/groonga/en/html/client.html +22 -24
  253. data/vendor/local/share/doc/groonga/en/html/community.html +17 -19
  254. data/vendor/local/share/doc/groonga/en/html/contribution.html +18 -20
  255. data/vendor/local/share/doc/groonga/en/html/contribution/development.html +17 -19
  256. data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +17 -19
  257. data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +17 -19
  258. data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +17 -19
  259. data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +51 -38
  260. data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +17 -19
  261. data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +17 -19
  262. data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +17 -19
  263. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +17 -19
  264. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +17 -19
  265. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +24 -26
  266. data/vendor/local/share/doc/groonga/en/html/contribution/report.html +17 -19
  267. data/vendor/local/share/doc/groonga/en/html/development.html +17 -19
  268. data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +24 -26
  269. data/vendor/local/share/doc/groonga/en/html/genindex.html +26 -14
  270. data/vendor/local/share/doc/groonga/en/html/index.html +150 -130
  271. data/vendor/local/share/doc/groonga/en/html/install.html +32 -34
  272. data/vendor/local/share/doc/groonga/en/html/install/centos.html +28 -30
  273. data/vendor/local/share/doc/groonga/en/html/install/debian.html +24 -26
  274. data/vendor/local/share/doc/groonga/en/html/install/fedora.html +28 -30
  275. data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +22 -24
  276. data/vendor/local/share/doc/groonga/en/html/install/others.html +87 -89
  277. data/vendor/local/share/doc/groonga/en/html/install/solaris.html +22 -24
  278. data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +25 -27
  279. data/vendor/local/share/doc/groonga/en/html/install/windows.html +30 -32
  280. data/vendor/local/share/doc/groonga/en/html/limitations.html +17 -19
  281. data/vendor/local/share/doc/groonga/en/html/news.html +256 -27
  282. data/vendor/local/share/doc/groonga/en/html/news/0.x.html +17 -19
  283. data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +19 -21
  284. data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +17 -19
  285. data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +32 -34
  286. data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +27 -29
  287. data/vendor/local/share/doc/groonga/en/html/news/2.x.html +98 -100
  288. data/vendor/local/share/doc/groonga/en/html/news/3.x.html +68 -70
  289. data/vendor/local/share/doc/groonga/en/html/news/4.x.html +102 -104
  290. data/vendor/local/share/doc/groonga/en/html/news/senna.html +17 -19
  291. data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
  292. data/vendor/local/share/doc/groonga/en/html/reference.html +139 -118
  293. data/vendor/local/share/doc/groonga/en/html/reference/api.html +51 -52
  294. data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +49 -51
  295. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +60 -62
  296. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +80 -82
  297. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +42 -44
  298. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +37 -39
  299. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +130 -80
  300. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +48 -50
  301. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +44 -46
  302. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +79 -81
  303. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +42 -44
  304. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +44 -46
  305. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +42 -44
  306. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +41 -43
  307. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +41 -43
  308. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +40 -42
  309. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +89 -91
  310. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +44 -46
  311. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +39 -41
  312. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +75 -77
  313. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +64 -66
  314. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +40 -42
  315. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +39 -41
  316. data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +202 -0
  317. data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +58 -60
  318. data/vendor/local/share/doc/groonga/en/html/reference/cast.html +17 -19
  319. data/vendor/local/share/doc/groonga/en/html/reference/column.html +17 -19
  320. data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +17 -19
  321. data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +22 -24
  322. data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +17 -19
  323. data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +58 -60
  324. data/vendor/local/share/doc/groonga/en/html/reference/command.html +56 -54
  325. data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +17 -19
  326. data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +53 -55
  327. data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +22 -24
  328. data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +94 -96
  329. data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +39 -41
  330. data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +43 -45
  331. data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +23 -25
  332. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +39 -41
  333. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +71 -73
  334. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +24 -26
  335. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +44 -46
  336. data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +37 -39
  337. data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +24 -26
  338. data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +35 -37
  339. data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +22 -24
  340. data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +43 -45
  341. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +49 -47
  342. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +23 -25
  343. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +24 -26
  344. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +26 -28
  345. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +314 -0
  346. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +252 -0
  347. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +87 -89
  348. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +46 -48
  349. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +195 -0
  350. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +193 -0
  351. data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +38 -40
  352. data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +37 -39
  353. data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +61 -51
  354. data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
  355. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +54 -56
  356. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +54 -56
  357. data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +590 -592
  358. data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +37 -39
  359. data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +40 -42
  360. data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +92 -94
  361. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +152 -154
  362. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +49 -51
  363. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +39 -41
  364. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +68 -70
  365. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +103 -105
  366. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +45 -47
  367. data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +52 -54
  368. data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -21
  369. data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +35 -37
  370. data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +21 -23
  371. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +26 -28
  372. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +73 -75
  373. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +17 -19
  374. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
  375. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +42 -44
  376. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +34 -36
  377. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +124 -90
  378. data/vendor/local/share/doc/groonga/en/html/reference/function.html +42 -44
  379. data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +66 -68
  380. data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +44 -46
  381. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +113 -115
  382. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +55 -57
  383. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +45 -47
  384. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +81 -83
  385. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +65 -67
  386. data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +54 -56
  387. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +135 -44
  388. data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -42
  389. data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +81 -83
  390. data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +43 -45
  391. data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +67 -69
  392. data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +66 -70
  393. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +39 -41
  394. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +349 -286
  395. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +483 -417
  396. data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +35 -37
  397. data/vendor/local/share/doc/groonga/en/html/reference/log.html +38 -40
  398. data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +44 -46
  399. data/vendor/local/share/doc/groonga/en/html/reference/operations.html +30 -31
  400. data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +32 -34
  401. data/vendor/local/share/doc/groonga/en/html/reference/output.html +32 -34
  402. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +17 -19
  403. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +60 -62
  404. data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +931 -0
  405. data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +442 -0
  406. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +153 -0
  407. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +287 -0
  408. data/vendor/local/share/doc/groonga/en/html/reference/scoring_note.html +114 -0
  409. data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +45 -47
  410. data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +51 -53
  411. data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +40 -42
  412. data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +38 -40
  413. data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +40 -42
  414. data/vendor/local/share/doc/groonga/en/html/reference/tables.html +52 -54
  415. data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +36 -38
  416. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +1394 -34
  417. data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +57 -59
  418. data/vendor/local/share/doc/groonga/en/html/reference/types.html +38 -40
  419. data/vendor/local/share/doc/groonga/en/html/search.html +11 -11
  420. data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
  421. data/vendor/local/share/doc/groonga/en/html/server.html +23 -24
  422. data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +28 -30
  423. data/vendor/local/share/doc/groonga/en/html/server/http.html +42 -44
  424. data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +68 -70
  425. data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +30 -32
  426. data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +29 -31
  427. data/vendor/local/share/doc/groonga/en/html/server/memcached.html +137 -0
  428. data/vendor/local/share/doc/groonga/en/html/server/package.html +36 -38
  429. data/vendor/local/share/doc/groonga/en/html/spec.html +22 -24
  430. data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +208 -129
  431. data/vendor/local/share/doc/groonga/en/html/spec/search.html +17 -19
  432. data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +17 -19
  433. data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
  434. data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
  435. data/vendor/local/share/doc/groonga/en/html/tutorial.html +21 -25
  436. data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +17 -19
  437. data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +31 -33
  438. data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +20 -22
  439. data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +17 -19
  440. data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +17 -19
  441. data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +21 -23
  442. data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +50 -52
  443. data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +27 -125
  444. data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +18 -20
  445. data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +20 -22
  446. data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +33 -35
  447. data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
  448. data/vendor/local/share/doc/groonga/ja/html/_images/used-when-indexing.png +0 -0
  449. data/vendor/local/share/doc/groonga/ja/html/_images/used-when-searching.png +0 -0
  450. data/vendor/local/share/doc/groonga/ja/html/_sources/characteristic.txt +1 -1
  451. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +32 -17
  452. data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
  453. data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +3 -3
  454. data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +4 -4
  455. data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
  456. data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +3 -3
  457. data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
  458. data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -3
  459. data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
  460. data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +194 -0
  461. data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.0.x.txt +1 -1
  462. data/vendor/local/share/doc/groonga/ja/html/_sources/news/3.x.txt +2 -2
  463. data/vendor/local/share/doc/groonga/ja/html/_sources/news/4.x.txt +2 -2
  464. data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +2 -0
  465. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api.txt +3 -0
  466. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +42 -0
  467. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/overview.txt +54 -0
  468. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +1 -1
  469. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_create.txt +2 -2
  470. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +1 -1
  471. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +3 -3
  472. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/delete.txt +4 -4
  473. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/load.txt +5 -5
  474. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +4 -4
  475. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +173 -0
  476. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +112 -0
  477. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +7 -6
  478. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +64 -0
  479. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +63 -0
  480. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +11 -1
  481. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +3 -2
  482. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +1 -1
  483. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +1 -1
  484. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +17 -17
  485. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/suggest.txt +12 -12
  486. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +7 -7
  487. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +4 -4
  488. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +6 -6
  489. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +1 -1
  490. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga.txt +47 -26
  491. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/between.txt +5 -5
  492. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_distance.txt +3 -3
  493. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +6 -6
  494. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +1 -1
  495. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/html_untag.txt +1 -1
  496. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/in_values.txt +54 -2
  497. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +4 -4
  498. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/sub_filter.txt +4 -4
  499. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
  500. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
  501. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +2 -0
  502. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/normalizers.txt +4 -0
  503. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations.txt +2 -1
  504. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +403 -0
  505. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorer.txt +217 -0
  506. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
  507. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
  508. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scoring_note.txt +13 -0
  509. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tables.txt +8 -0
  510. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +530 -16
  511. data/vendor/local/share/doc/groonga/ja/html/_sources/server.txt +2 -1
  512. data/vendor/local/share/doc/groonga/ja/html/_sources/server/memcached.txt +15 -0
  513. data/vendor/local/share/doc/groonga/ja/html/_sources/spec/gqtp.txt +66 -1
  514. data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
  515. data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/network.txt +0 -81
  516. data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +6 -68
  517. data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +1 -26
  518. data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
  519. data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
  520. data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
  521. data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +9404 -4
  522. data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
  523. data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
  524. data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +2 -2
  525. data/vendor/local/share/doc/groonga/ja/html/_static/underscore.js +1415 -31
  526. data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
  527. data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
  528. data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +15 -15
  529. data/vendor/local/share/doc/groonga/ja/html/characteristic.html +17 -19
  530. data/vendor/local/share/doc/groonga/ja/html/client.html +22 -24
  531. data/vendor/local/share/doc/groonga/ja/html/community.html +17 -19
  532. data/vendor/local/share/doc/groonga/ja/html/contribution.html +18 -20
  533. data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +17 -19
  534. data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +17 -19
  535. data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +17 -19
  536. data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +17 -19
  537. data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +51 -38
  538. data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +17 -19
  539. data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +17 -19
  540. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +17 -19
  541. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +17 -19
  542. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +17 -19
  543. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +24 -26
  544. data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -19
  545. data/vendor/local/share/doc/groonga/ja/html/development.html +17 -19
  546. data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +20 -22
  547. data/vendor/local/share/doc/groonga/ja/html/genindex.html +26 -14
  548. data/vendor/local/share/doc/groonga/ja/html/index.html +150 -130
  549. data/vendor/local/share/doc/groonga/ja/html/install.html +32 -34
  550. data/vendor/local/share/doc/groonga/ja/html/install/centos.html +31 -33
  551. data/vendor/local/share/doc/groonga/ja/html/install/debian.html +25 -27
  552. data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +29 -31
  553. data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +22 -24
  554. data/vendor/local/share/doc/groonga/ja/html/install/others.html +78 -80
  555. data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +21 -23
  556. data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +26 -28
  557. data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -31
  558. data/vendor/local/share/doc/groonga/ja/html/limitations.html +17 -19
  559. data/vendor/local/share/doc/groonga/ja/html/news.html +210 -27
  560. data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +17 -19
  561. data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +19 -21
  562. data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +17 -19
  563. data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +32 -34
  564. data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +27 -29
  565. data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +91 -93
  566. data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +59 -61
  567. data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +89 -91
  568. data/vendor/local/share/doc/groonga/ja/html/news/senna.html +17 -19
  569. data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
  570. data/vendor/local/share/doc/groonga/ja/html/reference.html +139 -118
  571. data/vendor/local/share/doc/groonga/ja/html/reference/api.html +51 -52
  572. data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +49 -51
  573. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +55 -57
  574. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +80 -82
  575. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +42 -44
  576. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +37 -39
  577. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +126 -76
  578. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +48 -50
  579. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +44 -46
  580. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +74 -76
  581. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +42 -44
  582. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +44 -46
  583. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +42 -44
  584. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +41 -43
  585. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +41 -43
  586. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +40 -42
  587. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +89 -91
  588. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +44 -46
  589. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +39 -41
  590. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +75 -77
  591. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +64 -66
  592. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +40 -42
  593. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +39 -41
  594. data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +197 -0
  595. data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +58 -60
  596. data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +17 -19
  597. data/vendor/local/share/doc/groonga/ja/html/reference/column.html +17 -19
  598. data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +17 -19
  599. data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +22 -24
  600. data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +17 -19
  601. data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +48 -50
  602. data/vendor/local/share/doc/groonga/ja/html/reference/command.html +56 -54
  603. data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +17 -19
  604. data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +43 -45
  605. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +22 -24
  606. data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +93 -95
  607. data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +35 -37
  608. data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +43 -45
  609. data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +23 -25
  610. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +39 -41
  611. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +62 -64
  612. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +24 -26
  613. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +38 -40
  614. data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +37 -39
  615. data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +24 -26
  616. data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +32 -34
  617. data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +22 -24
  618. data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +33 -35
  619. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +48 -46
  620. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +23 -25
  621. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +24 -26
  622. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +26 -28
  623. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +314 -0
  624. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +250 -0
  625. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +80 -81
  626. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +46 -48
  627. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +188 -0
  628. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +190 -0
  629. data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +38 -40
  630. data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +37 -39
  631. data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +57 -47
  632. data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +71 -73
  633. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +53 -55
  634. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +53 -55
  635. data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +394 -396
  636. data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +37 -39
  637. data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +38 -40
  638. data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +78 -80
  639. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +123 -125
  640. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -51
  641. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +39 -41
  642. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +61 -63
  643. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +89 -91
  644. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +46 -48
  645. data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +51 -53
  646. data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -21
  647. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +35 -37
  648. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +21 -23
  649. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +26 -28
  650. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +61 -63
  651. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +17 -19
  652. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
  653. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +42 -44
  654. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +34 -36
  655. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +126 -90
  656. data/vendor/local/share/doc/groonga/ja/html/reference/function.html +42 -44
  657. data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +63 -65
  658. data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +44 -46
  659. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +94 -96
  660. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +55 -57
  661. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +45 -47
  662. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +66 -68
  663. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +55 -57
  664. data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +53 -55
  665. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +135 -44
  666. data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +40 -42
  667. data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +70 -72
  668. data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +43 -45
  669. data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +53 -55
  670. data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +56 -62
  671. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +36 -38
  672. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +229 -171
  673. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +381 -322
  674. data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +34 -36
  675. data/vendor/local/share/doc/groonga/ja/html/reference/log.html +38 -40
  676. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +38 -40
  677. data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +28 -30
  678. data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +32 -34
  679. data/vendor/local/share/doc/groonga/ja/html/reference/output.html +28 -30
  680. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +17 -19
  681. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +39 -41
  682. data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +878 -0
  683. data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +442 -0
  684. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +154 -0
  685. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +287 -0
  686. data/vendor/local/share/doc/groonga/ja/html/reference/scoring_note.html +115 -0
  687. data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +45 -47
  688. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +48 -50
  689. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +40 -42
  690. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +38 -40
  691. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +40 -42
  692. data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +42 -44
  693. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +37 -39
  694. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +1300 -34
  695. data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +57 -59
  696. data/vendor/local/share/doc/groonga/ja/html/reference/types.html +38 -40
  697. data/vendor/local/share/doc/groonga/ja/html/search.html +11 -11
  698. data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
  699. data/vendor/local/share/doc/groonga/ja/html/server.html +23 -24
  700. data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +28 -30
  701. data/vendor/local/share/doc/groonga/ja/html/server/http.html +42 -44
  702. data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +62 -64
  703. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +30 -32
  704. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +29 -31
  705. data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +138 -0
  706. data/vendor/local/share/doc/groonga/ja/html/server/package.html +35 -37
  707. data/vendor/local/share/doc/groonga/ja/html/spec.html +22 -24
  708. data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +207 -128
  709. data/vendor/local/share/doc/groonga/ja/html/spec/search.html +17 -19
  710. data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +17 -19
  711. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
  712. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
  713. data/vendor/local/share/doc/groonga/ja/html/tutorial.html +21 -25
  714. data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +17 -19
  715. data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +30 -32
  716. data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +17 -19
  717. data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +17 -19
  718. data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +17 -19
  719. data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +23 -25
  720. data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +47 -49
  721. data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +27 -125
  722. data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -20
  723. data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +20 -22
  724. data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +31 -33
  725. data/vendor/local/share/license/mruby/README.md +2 -2
  726. data/vendor/local/share/man/ja/man1/groonga.1 +6205 -2251
  727. data/vendor/local/share/man/man1/groonga.1 +7210 -3029
  728. metadata +75 -11
  729. data/doc/text/news.textile +0 -1217
  730. data/vendor/local/share/doc/groonga/en/html/_static/jquery-1.11.1.js +0 -10308
  731. data/vendor/local/share/doc/groonga/en/html/_static/underscore-1.3.1.js +0 -999
  732. data/vendor/local/share/doc/groonga/ja/html/_static/jquery-1.11.1.js +0 -10308
  733. data/vendor/local/share/doc/groonga/ja/html/_static/underscore-1.3.1.js +0 -999
@@ -7,7 +7,7 @@
7
7
  <head>
8
8
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
9
9
 
10
- <title>7.8. Tokenizers &mdash; Groonga v5.0.0 documentation</title>
10
+ <title>7.8. Tokenizers &mdash; Groonga v5.0.1-42-g4d10df1 documentation</title>
11
11
 
12
12
  <link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
13
13
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
@@ -15,7 +15,7 @@
15
15
  <script type="text/javascript">
16
16
  var DOCUMENTATION_OPTIONS = {
17
17
  URL_ROOT: '../',
18
- VERSION: '5.0.0',
18
+ VERSION: '5.0.1-42-g4d10df1',
19
19
  COLLAPSE_INDEX: false,
20
20
  FILE_SUFFIX: '.html',
21
21
  HAS_SOURCE: true
@@ -25,12 +25,12 @@
25
25
  <script type="text/javascript" src="../_static/underscore.js"></script>
26
26
  <script type="text/javascript" src="../_static/doctools.js"></script>
27
27
  <link rel="shortcut icon" href="../_static/favicon.ico"/>
28
- <link rel="top" title="Groonga v5.0.0 documentation" href="../index.html" />
28
+ <link rel="top" title="Groonga v5.0.1-42-g4d10df1 documentation" href="../index.html" />
29
29
  <link rel="up" title="7. Reference manual" href="../reference.html" />
30
30
  <link rel="next" title="7.9. Token filters" href="token_filters.html" />
31
31
  <link rel="prev" title="7.7. Normalizers" href="normalizers.html" />
32
32
  </head>
33
- <body role="document">
33
+ <body>
34
34
  <div class="header">
35
35
  <h1 class="title">
36
36
  <a id="top-link" href="../index.html">
@@ -48,7 +48,7 @@
48
48
  </div>
49
49
 
50
50
 
51
- <div class="related" role="navigation" aria-label="related navigation">
51
+ <div class="related">
52
52
  <h3>Navigation</h3>
53
53
  <ul>
54
54
  <li class="right" style="margin-right: 10px">
@@ -60,7 +60,7 @@
60
60
  <li class="right" >
61
61
  <a href="normalizers.html" title="7.7. Normalizers"
62
62
  accesskey="P">previous</a> |</li>
63
- <li><a href="../index.html">Groonga v5.0.0 documentation</a> &raquo;</li>
63
+ <li><a href="../index.html">Groonga v5.0.1-42-g4d10df1 documentation</a> &raquo;</li>
64
64
  <li><a href="../reference.html" accesskey="U">7. Reference manual</a> &raquo;</li>
65
65
  </ul>
66
66
  </div>
@@ -68,48 +68,1408 @@
68
68
  <div class="document">
69
69
  <div class="documentwrapper">
70
70
  <div class="bodywrapper">
71
- <div class="body" role="main">
71
+ <div class="body">
72
72
 
73
73
  <div class="section" id="tokenizers">
74
74
  <h1>7.8. Tokenizers<a class="headerlink" href="#tokenizers" title="Permalink to this headline">¶</a></h1>
75
- <p>TODO: Write me.</p>
76
- <p>Here are the list of built-in tokenizers:</p>
77
- <ul class="simple">
78
- <li>TokenBigram</li>
79
- <li>TokenBigramSplitSymbol</li>
80
- <li>TokenBigramSplitSymbolAlpha</li>
81
- <li>TokenBigramSplitSymbolAlphaDigit</li>
82
- <li>TokenBigramIgnoreBlank</li>
83
- <li>TokenBigramIgnoreBlankSplitSymbol</li>
84
- <li>TokenBigramIgnoreBlankSplitAlpha</li>
85
- <li>TokenBigramIgnoreBlankSplitAlphaDigit</li>
86
- <li>TokenDelimit</li>
87
- <li>TokenDelimitNull</li>
88
- <li>TokenTrigram</li>
89
- <li>TokenUnigram</li>
75
+ <div class="section" id="summary">
76
+ <h2>7.8.1. Summary<a class="headerlink" href="#summary" title="Permalink to this headline">¶</a></h2>
77
+ <p>Groonga has tokenizer module that tokenizes text. It is used when
78
+ the following cases:</p>
79
+ <blockquote>
80
+ <div><ul>
81
+ <li><p class="first">Indexing text</p>
82
+ <div class="figure align-center">
83
+ <a class="reference internal image-reference" href="../_images/used-when-indexing.png"><img alt="../_images/used-when-indexing.png" src="../_images/used-when-indexing.png" style="width: 80%;" /></a>
84
+ <p class="caption">Tokenizer is used when indexing text.</p>
85
+ </div>
86
+ </li>
87
+ <li><p class="first">Searching by query</p>
88
+ <div class="figure align-center">
89
+ <a class="reference internal image-reference" href="../_images/used-when-searching.png"><img alt="../_images/used-when-searching.png" src="../_images/used-when-searching.png" style="width: 80%;" /></a>
90
+ <p class="caption">Tokenizer is used when searching by query.</p>
91
+ </div>
92
+ </li>
93
+ </ul>
94
+ </div></blockquote>
95
+ <p>Tokenizer is an important module for full-text search. You can change
96
+ trade-off between <a class="reference external" href="http://en.wikipedia.org/wiki/Precision_and_recall">precision and recall</a> by changing
97
+ tokenizer.</p>
98
+ <p>Normally, <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> is a suitable tokenizer. If you don't
99
+ know much about tokenizer, it's recommended that you choose
100
+ <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>.</p>
101
+ <p>You can try a tokenizer by <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> and
102
+ <a class="reference internal" href="commands/table_tokenize.html"><em>table_tokenize</em></a>. Here is an example to
103
+ try <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> tokenizer by
104
+ <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a>:</p>
105
+ <p>Execution example:</p>
106
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot;
107
+ # [
108
+ # [
109
+ # 0,
110
+ # 1337566253.89858,
111
+ # 0.000355720520019531
112
+ # ],
113
+ # [
114
+ # {
115
+ # &quot;position&quot;: 0,
116
+ # &quot;value&quot;: &quot;He&quot;
117
+ # },
118
+ # {
119
+ # &quot;position&quot;: 1,
120
+ # &quot;value&quot;: &quot;el&quot;
121
+ # },
122
+ # {
123
+ # &quot;position&quot;: 2,
124
+ # &quot;value&quot;: &quot;ll&quot;
125
+ # },
126
+ # {
127
+ # &quot;position&quot;: 3,
128
+ # &quot;value&quot;: &quot;lo&quot;
129
+ # },
130
+ # {
131
+ # &quot;position&quot;: 4,
132
+ # &quot;value&quot;: &quot;o &quot;
133
+ # },
134
+ # {
135
+ # &quot;position&quot;: 5,
136
+ # &quot;value&quot;: &quot; W&quot;
137
+ # },
138
+ # {
139
+ # &quot;position&quot;: 6,
140
+ # &quot;value&quot;: &quot;Wo&quot;
141
+ # },
142
+ # {
143
+ # &quot;position&quot;: 7,
144
+ # &quot;value&quot;: &quot;or&quot;
145
+ # },
146
+ # {
147
+ # &quot;position&quot;: 8,
148
+ # &quot;value&quot;: &quot;rl&quot;
149
+ # },
150
+ # {
151
+ # &quot;position&quot;: 9,
152
+ # &quot;value&quot;: &quot;ld&quot;
153
+ # },
154
+ # {
155
+ # &quot;position&quot;: 10,
156
+ # &quot;value&quot;: &quot;d&quot;
157
+ # }
158
+ # ]
159
+ # ]
160
+ </pre></div>
161
+ </div>
162
+ </div>
163
+ <div class="section" id="what-is-tokenize">
164
+ <h2>7.8.2. What is &quot;tokenize&quot;?<a class="headerlink" href="#what-is-tokenize" title="Permalink to this headline">¶</a></h2>
165
+ <p>&quot;tokenize&quot; is the process that extracts zero or more tokens from a
166
+ text. There are some &quot;tokenize&quot; methods.</p>
167
+ <p>For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to the following tokens by
168
+ bigram tokenize method:</p>
169
+ <blockquote>
170
+ <div><ul class="simple">
171
+ <li><tt class="docutils literal"><span class="pre">He</span></tt></li>
172
+ <li><tt class="docutils literal"><span class="pre">el</span></tt></li>
173
+ <li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
174
+ <li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
175
+ <li><tt class="docutils literal"><span class="pre">o_</span></tt> (<tt class="docutils literal"><span class="pre">_</span></tt> means a white-space)</li>
176
+ <li><tt class="docutils literal"><span class="pre">_W</span></tt> (<tt class="docutils literal"><span class="pre">_</span></tt> means a white-space)</li>
177
+ <li><tt class="docutils literal"><span class="pre">Wo</span></tt></li>
178
+ <li><tt class="docutils literal"><span class="pre">or</span></tt></li>
179
+ <li><tt class="docutils literal"><span class="pre">rl</span></tt></li>
180
+ <li><tt class="docutils literal"><span class="pre">ld</span></tt></li>
181
+ </ul>
182
+ </div></blockquote>
183
+ <p>In the above example, 10 tokens are extracted from one text <tt class="docutils literal"><span class="pre">Hello</span>
184
+ <span class="pre">World</span></tt>.</p>
185
+ <p>For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to the following tokens by
186
+ white-space-separate tokenize method:</p>
187
+ <blockquote>
188
+ <div><ul class="simple">
189
+ <li><tt class="docutils literal"><span class="pre">Hello</span></tt></li>
190
+ <li><tt class="docutils literal"><span class="pre">World</span></tt></li>
191
+ </ul>
192
+ </div></blockquote>
193
+ <p>In the above example, 2 tokens are extracted from one text <tt class="docutils literal"><span class="pre">Hello</span>
194
+ <span class="pre">World</span></tt>.</p>
195
+ <p>Token is used as search key. You can find indexed documents only by
196
+ tokens that are extracted by used tokenize method. For example, you
197
+ can find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> by <tt class="docutils literal"><span class="pre">ll</span></tt> with bigram tokenize method but you
198
+ can't find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> by <tt class="docutils literal"><span class="pre">ll</span></tt> with white-space-separate tokenize
199
+ method. Because white-space-separate tokenize method doesn't extract
200
+ <tt class="docutils literal"><span class="pre">ll</span></tt> token. It just extracts <tt class="docutils literal"><span class="pre">Hello</span></tt> and <tt class="docutils literal"><span class="pre">World</span></tt> tokens.</p>
201
+ <p>In general, tokenize method that generates small tokens increases
202
+ recall but decreases precision. Tokenize method that generates large
203
+ tokens increases precision but decreases recall.</p>
204
+ <p>For example, we can find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> and <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> by <tt class="docutils literal"><span class="pre">or</span></tt> with
205
+ bigram tokenize method. <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is a noise for people who
206
+ wants to search &quot;logical and&quot;. It means that precision is
207
+ decreased. But recall is increased.</p>
208
+ <p>We can find only <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> by <tt class="docutils literal"><span class="pre">or</span></tt> with white-space-separate
209
+ tokenize method. Because <tt class="docutils literal"><span class="pre">World</span></tt> is tokenized to one token <tt class="docutils literal"><span class="pre">World</span></tt>
210
+ with white-space-separate tokenize method. It means that precision is
211
+ increased for people who wants to search &quot;logical and&quot;. But recall is
212
+ decreased because <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> that contains <tt class="docutils literal"><span class="pre">or</span></tt> isn't found.</p>
213
+ </div>
214
+ <div class="section" id="built-in-tokenizsers">
215
+ <h2>7.8.3. Built-in tokenizsers<a class="headerlink" href="#built-in-tokenizsers" title="Permalink to this headline">¶</a></h2>
216
+ <p>Here is a list of built-in tokenizers:</p>
217
+ <blockquote>
218
+ <div><ul class="simple">
219
+ <li><tt class="docutils literal"><span class="pre">TokenBigram</span></tt></li>
220
+ <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></li>
221
+ <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></li>
222
+ <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></li>
223
+ <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></li>
224
+ <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></li>
225
+ <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlpha</span></tt></li>
226
+ <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlphaDigit</span></tt></li>
227
+ <li><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></li>
228
+ <li><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></li>
229
+ <li><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></li>
230
+ <li><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></li>
231
+ <li><tt class="docutils literal"><span class="pre">TokenMecab</span></tt></li>
232
+ <li><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></li>
90
233
  </ul>
234
+ </div></blockquote>
235
+ <div class="section" id="tokenbigram">
236
+ <span id="token-bigram"></span><h3>7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt><a class="headerlink" href="#tokenbigram" title="Permalink to this headline">¶</a></h3>
237
+ <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> is a bigram based tokenizer. It's recommended to use
238
+ this tokenizer for most cases.</p>
239
+ <p>Bigram tokenize method tokenizes a text to two adjacent characters
240
+ tokens. For example, <tt class="docutils literal"><span class="pre">Hello</span></tt> is tokenized to the following tokens:</p>
241
+ <blockquote>
242
+ <div><ul class="simple">
243
+ <li><tt class="docutils literal"><span class="pre">He</span></tt></li>
244
+ <li><tt class="docutils literal"><span class="pre">el</span></tt></li>
245
+ <li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
246
+ <li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
247
+ </ul>
248
+ </div></blockquote>
249
+ <p>Bigram tokenize method is good for recall because you can find all
250
+ texts by query consists of two or more characters.</p>
251
+ <p>In general, you can't find all texts by query consists of one
252
+ character because one character token doesn't exist. But you can find
253
+ all texts by query consists of one character in Groonga. Because
254
+ Groonga find tokens that start with query by predictive search. For
255
+ example, Groonga can find <tt class="docutils literal"><span class="pre">ll</span></tt> and <tt class="docutils literal"><span class="pre">lo</span></tt> tokens by <tt class="docutils literal"><span class="pre">l</span></tt> query.</p>
256
+ <p>Bigram tokenize method isn't good for precision because you can find
257
+ texts that includes query in word. For example, you can find <tt class="docutils literal"><span class="pre">world</span></tt>
258
+ by <tt class="docutils literal"><span class="pre">or</span></tt>. This is more sensitive for ASCII only languages rather than
259
+ non-ASCII languages. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> has solution for this problem
260
+ described in the bellow.</p>
261
+ <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> behavior is different when it's worked with any
262
+ <a class="reference internal" href="normalizers.html"><em>Normalizers</em></a>.</p>
263
+ <p>If no normalizer is used, <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses pure bigram (all tokens
264
+ except the last token have two characters) tokenize method:</p>
265
+ <p>Execution example:</p>
266
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot;
267
+ # [
268
+ # [
269
+ # 0,
270
+ # 1337566253.89858,
271
+ # 0.000355720520019531
272
+ # ],
273
+ # [
274
+ # {
275
+ # &quot;position&quot;: 0,
276
+ # &quot;value&quot;: &quot;He&quot;
277
+ # },
278
+ # {
279
+ # &quot;position&quot;: 1,
280
+ # &quot;value&quot;: &quot;el&quot;
281
+ # },
282
+ # {
283
+ # &quot;position&quot;: 2,
284
+ # &quot;value&quot;: &quot;ll&quot;
285
+ # },
286
+ # {
287
+ # &quot;position&quot;: 3,
288
+ # &quot;value&quot;: &quot;lo&quot;
289
+ # },
290
+ # {
291
+ # &quot;position&quot;: 4,
292
+ # &quot;value&quot;: &quot;o &quot;
293
+ # },
294
+ # {
295
+ # &quot;position&quot;: 5,
296
+ # &quot;value&quot;: &quot; W&quot;
297
+ # },
298
+ # {
299
+ # &quot;position&quot;: 6,
300
+ # &quot;value&quot;: &quot;Wo&quot;
301
+ # },
302
+ # {
303
+ # &quot;position&quot;: 7,
304
+ # &quot;value&quot;: &quot;or&quot;
305
+ # },
306
+ # {
307
+ # &quot;position&quot;: 8,
308
+ # &quot;value&quot;: &quot;rl&quot;
309
+ # },
310
+ # {
311
+ # &quot;position&quot;: 9,
312
+ # &quot;value&quot;: &quot;ld&quot;
313
+ # },
314
+ # {
315
+ # &quot;position&quot;: 10,
316
+ # &quot;value&quot;: &quot;d&quot;
317
+ # }
318
+ # ]
319
+ # ]
320
+ </pre></div>
321
+ </div>
322
+ <p>If normalizer is used, <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses white-space-separate like
323
+ tokenize method for ASCII characters. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses bigram
324
+ tokenize method for non-ASCII characters.</p>
325
+ <p>You may be confused with this combined behavior. But it's reasonable
326
+ for most use cases such as English text (only ASCII characters) and
327
+ Japanese text (ASCII and non-ASCII characters are mixed).</p>
328
+ <p>Most languages consists of only ASCII characters use white-space for
329
+ word separator. White-space-separate tokenize method is suitable for
330
+ the case.</p>
331
+ <p>Languages consists of non-ASCII characters don't use white-space for
332
+ word separator. Bigram tokenize method is suitable for the case.</p>
333
+ <p>Mixed tokenize method is suitable for mixed language case.</p>
334
+ <p>If you want to use bigram tokenize method for ASCII character, see
335
+ <tt class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></tt> type tokenizers such as
336
+ <a class="reference internal" href="#token-bigram-split-symbol-alpha"><em>TokenBigramSplitSymbolAlpha</em></a>.</p>
337
+ <p>Let's confirm <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> behavior by example.</p>
338
+ <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses one or more white-spaces as token delimiter for
339
+ ASCII characters:</p>
340
+ <p>Execution example:</p>
341
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot; NormalizerAuto
342
+ # [
343
+ # [
344
+ # 0,
345
+ # 1337566253.89858,
346
+ # 0.000355720520019531
347
+ # ],
348
+ # [
349
+ # {
350
+ # &quot;position&quot;: 0,
351
+ # &quot;value&quot;: &quot;hello&quot;
352
+ # },
353
+ # {
354
+ # &quot;position&quot;: 1,
355
+ # &quot;value&quot;: &quot;world&quot;
356
+ # }
357
+ # ]
358
+ # ]
359
+ </pre></div>
360
+ </div>
361
+ <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses character type change as token delimiter for
362
+ ASCII characters. Character type is one of them:</p>
363
+ <blockquote>
364
+ <div><ul class="simple">
365
+ <li>Alphabet</li>
366
+ <li>Digit</li>
367
+ <li>Symbol (such as <tt class="docutils literal"><span class="pre">(</span></tt>, <tt class="docutils literal"><span class="pre">)</span></tt> and <tt class="docutils literal"><span class="pre">!</span></tt>)</li>
368
+ <li>Hiragana</li>
369
+ <li>Katakana</li>
370
+ <li>Kanji</li>
371
+ <li>Others</li>
372
+ </ul>
373
+ </div></blockquote>
374
+ <p>The following example shows two token delimiters:</p>
375
+ <blockquote>
376
+ <div><ul class="simple">
377
+ <li>at between <tt class="docutils literal"><span class="pre">100</span></tt> (digits) and <tt class="docutils literal"><span class="pre">cents</span></tt> (alphabets)</li>
378
+ <li>at between <tt class="docutils literal"><span class="pre">cents</span></tt> (alphabets) and <tt class="docutils literal"><span class="pre">!!!</span></tt> (symbols)</li>
379
+ </ul>
380
+ </div></blockquote>
381
+ <p>Execution example:</p>
382
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;100cents!!!&quot; NormalizerAuto
383
+ # [
384
+ # [
385
+ # 0,
386
+ # 1337566253.89858,
387
+ # 0.000355720520019531
388
+ # ],
389
+ # [
390
+ # {
391
+ # &quot;position&quot;: 0,
392
+ # &quot;value&quot;: &quot;100&quot;
393
+ # },
394
+ # {
395
+ # &quot;position&quot;: 1,
396
+ # &quot;value&quot;: &quot;cents&quot;
397
+ # },
398
+ # {
399
+ # &quot;position&quot;: 2,
400
+ # &quot;value&quot;: &quot;!!!&quot;
401
+ # }
402
+ # ]
403
+ # ]
404
+ </pre></div>
405
+ </div>
406
+ <p>Here is an example that <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses bigram tokenize method
407
+ for non-ASCII characters.</p>
408
+ <p>Execution example:</p>
409
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日本語の勉強&quot; NormalizerAuto
410
+ # [
411
+ # [
412
+ # 0,
413
+ # 1337566253.89858,
414
+ # 0.000355720520019531
415
+ # ],
416
+ # [
417
+ # {
418
+ # &quot;position&quot;: 0,
419
+ # &quot;value&quot;: &quot;日本&quot;
420
+ # },
421
+ # {
422
+ # &quot;position&quot;: 1,
423
+ # &quot;value&quot;: &quot;本語&quot;
424
+ # },
425
+ # {
426
+ # &quot;position&quot;: 2,
427
+ # &quot;value&quot;: &quot;語の&quot;
428
+ # },
429
+ # {
430
+ # &quot;position&quot;: 3,
431
+ # &quot;value&quot;: &quot;の勉&quot;
432
+ # },
433
+ # {
434
+ # &quot;position&quot;: 4,
435
+ # &quot;value&quot;: &quot;勉強&quot;
436
+ # },
437
+ # {
438
+ # &quot;position&quot;: 5,
439
+ # &quot;value&quot;: &quot;強&quot;
440
+ # }
441
+ # ]
442
+ # ]
443
+ </pre></div>
444
+ </div>
445
+ </div>
446
+ <div class="section" id="tokenbigramsplitsymbol">
447
+ <span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramsplitsymbol" title="Permalink to this headline">¶</a></h3>
448
+ <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
449
+ difference between them is symbol handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt>
450
+ tokenizes symbols by bigram tokenize method:</p>
451
+ <p>Execution example:</p>
452
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbol &quot;100cents!!!&quot; NormalizerAuto
453
+ # [
454
+ # [
455
+ # 0,
456
+ # 1337566253.89858,
457
+ # 0.000355720520019531
458
+ # ],
459
+ # [
460
+ # {
461
+ # &quot;position&quot;: 0,
462
+ # &quot;value&quot;: &quot;100&quot;
463
+ # },
464
+ # {
465
+ # &quot;position&quot;: 1,
466
+ # &quot;value&quot;: &quot;cents&quot;
467
+ # },
468
+ # {
469
+ # &quot;position&quot;: 2,
470
+ # &quot;value&quot;: &quot;!!&quot;
471
+ # },
472
+ # {
473
+ # &quot;position&quot;: 3,
474
+ # &quot;value&quot;: &quot;!!&quot;
475
+ # },
476
+ # {
477
+ # &quot;position&quot;: 4,
478
+ # &quot;value&quot;: &quot;!&quot;
479
+ # }
480
+ # ]
481
+ # ]
482
+ </pre></div>
483
+ </div>
484
+ </div>
485
+ <div class="section" id="tokenbigramsplitsymbolalpha">
486
+ <span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
487
+ <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
488
+ difference between them is symbol and alphabet
489
+ handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> tokenizes symbols and
490
+ alphabets by bigram tokenize method:</p>
491
+ <p>Execution example:</p>
492
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlpha &quot;100cents!!!&quot; NormalizerAuto
493
+ # [
494
+ # [
495
+ # 0,
496
+ # 1337566253.89858,
497
+ # 0.000355720520019531
498
+ # ],
499
+ # [
500
+ # {
501
+ # &quot;position&quot;: 0,
502
+ # &quot;value&quot;: &quot;100&quot;
503
+ # },
504
+ # {
505
+ # &quot;position&quot;: 1,
506
+ # &quot;value&quot;: &quot;ce&quot;
507
+ # },
508
+ # {
509
+ # &quot;position&quot;: 2,
510
+ # &quot;value&quot;: &quot;en&quot;
511
+ # },
512
+ # {
513
+ # &quot;position&quot;: 3,
514
+ # &quot;value&quot;: &quot;nt&quot;
515
+ # },
516
+ # {
517
+ # &quot;position&quot;: 4,
518
+ # &quot;value&quot;: &quot;ts&quot;
519
+ # },
520
+ # {
521
+ # &quot;position&quot;: 5,
522
+ # &quot;value&quot;: &quot;s!&quot;
523
+ # },
524
+ # {
525
+ # &quot;position&quot;: 6,
526
+ # &quot;value&quot;: &quot;!!&quot;
527
+ # },
528
+ # {
529
+ # &quot;position&quot;: 7,
530
+ # &quot;value&quot;: &quot;!!&quot;
531
+ # },
532
+ # {
533
+ # &quot;position&quot;: 8,
534
+ # &quot;value&quot;: &quot;!&quot;
535
+ # }
536
+ # ]
537
+ # ]
538
+ </pre></div>
539
+ </div>
540
+ </div>
541
+ <div class="section" id="tokenbigramsplitsymbolalphadigit">
542
+ <span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
543
+ <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> is similar to
544
+ <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The difference between them is symbol, alphabet
545
+ and digit handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> tokenizes
546
+ symbols, alphabets and digits by bigram tokenize method. It means that
547
+ all characters are tokenized by bigram tokenize method:</p>
548
+ <p>Execution example:</p>
549
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlphaDigit &quot;100cents!!!&quot; NormalizerAuto
550
+ # [
551
+ # [
552
+ # 0,
553
+ # 1337566253.89858,
554
+ # 0.000355720520019531
555
+ # ],
556
+ # [
557
+ # {
558
+ # &quot;position&quot;: 0,
559
+ # &quot;value&quot;: &quot;10&quot;
560
+ # },
561
+ # {
562
+ # &quot;position&quot;: 1,
563
+ # &quot;value&quot;: &quot;00&quot;
564
+ # },
565
+ # {
566
+ # &quot;position&quot;: 2,
567
+ # &quot;value&quot;: &quot;0c&quot;
568
+ # },
569
+ # {
570
+ # &quot;position&quot;: 3,
571
+ # &quot;value&quot;: &quot;ce&quot;
572
+ # },
573
+ # {
574
+ # &quot;position&quot;: 4,
575
+ # &quot;value&quot;: &quot;en&quot;
576
+ # },
577
+ # {
578
+ # &quot;position&quot;: 5,
579
+ # &quot;value&quot;: &quot;nt&quot;
580
+ # },
581
+ # {
582
+ # &quot;position&quot;: 6,
583
+ # &quot;value&quot;: &quot;ts&quot;
584
+ # },
585
+ # {
586
+ # &quot;position&quot;: 7,
587
+ # &quot;value&quot;: &quot;s!&quot;
588
+ # },
589
+ # {
590
+ # &quot;position&quot;: 8,
591
+ # &quot;value&quot;: &quot;!!&quot;
592
+ # },
593
+ # {
594
+ # &quot;position&quot;: 9,
595
+ # &quot;value&quot;: &quot;!!&quot;
596
+ # },
597
+ # {
598
+ # &quot;position&quot;: 10,
599
+ # &quot;value&quot;: &quot;!&quot;
600
+ # }
601
+ # ]
602
+ # ]
603
+ </pre></div>
604
+ </div>
605
+ </div>
606
+ <div class="section" id="tokenbigramignoreblank">
607
+ <span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt><a class="headerlink" href="#tokenbigramignoreblank" title="Permalink to this headline">¶</a></h3>
608
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
609
+ difference between them is blank handling. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt>
610
+ ignores white-spaces in continuous symbols and non-ASCII characters.</p>
611
+ <p>You can find difference of them by <tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
612
+ has symbols and non-ASCII characters.</p>
613
+ <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
614
+ <p>Execution example:</p>
615
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
616
+ # [
617
+ # [
618
+ # 0,
619
+ # 1337566253.89858,
620
+ # 0.000355720520019531
621
+ # ],
622
+ # [
623
+ # {
624
+ # &quot;position&quot;: 0,
625
+ # &quot;value&quot;: &quot;日&quot;
626
+ # },
627
+ # {
628
+ # &quot;position&quot;: 1,
629
+ # &quot;value&quot;: &quot;本&quot;
630
+ # },
631
+ # {
632
+ # &quot;position&quot;: 2,
633
+ # &quot;value&quot;: &quot;語&quot;
634
+ # },
635
+ # {
636
+ # &quot;position&quot;: 3,
637
+ # &quot;value&quot;: &quot;!&quot;
638
+ # },
639
+ # {
640
+ # &quot;position&quot;: 4,
641
+ # &quot;value&quot;: &quot;!&quot;
642
+ # },
643
+ # {
644
+ # &quot;position&quot;: 5,
645
+ # &quot;value&quot;: &quot;!&quot;
646
+ # }
647
+ # ]
648
+ # ]
649
+ </pre></div>
650
+ </div>
651
+ <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt>:</p>
652
+ <p>Execution example:</p>
653
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlank &quot;日 本 語 ! ! !&quot; NormalizerAuto
654
+ # [
655
+ # [
656
+ # 0,
657
+ # 1337566253.89858,
658
+ # 0.000355720520019531
659
+ # ],
660
+ # [
661
+ # {
662
+ # &quot;position&quot;: 0,
663
+ # &quot;value&quot;: &quot;日本&quot;
664
+ # },
665
+ # {
666
+ # &quot;position&quot;: 1,
667
+ # &quot;value&quot;: &quot;本語&quot;
668
+ # },
669
+ # {
670
+ # &quot;position&quot;: 2,
671
+ # &quot;value&quot;: &quot;語&quot;
672
+ # },
673
+ # {
674
+ # &quot;position&quot;: 3,
675
+ # &quot;value&quot;: &quot;!!!&quot;
676
+ # }
677
+ # ]
678
+ # ]
679
+ </pre></div>
680
+ </div>
681
+ </div>
682
+ <div class="section" id="tokenbigramignoreblanksplitsymbol">
683
+ <span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="Permalink to this headline">¶</a></h3>
684
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> is similar to
685
+ <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
686
+ <blockquote>
687
+ <div><ul class="simple">
688
+ <li>Blank handling</li>
689
+ <li>Symbol handling</li>
690
+ </ul>
691
+ </div></blockquote>
692
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> ignores white-spaces in
693
+ continuous symbols and non-ASCII characters.</p>
694
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> tokenizes symbols by bigram
695
+ tokenize method.</p>
696
+ <p>You can find difference of them by <tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
697
+ has symbols and non-ASCII characters.</p>
698
+ <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
699
+ <p>Execution example:</p>
700
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
701
+ # [
702
+ # [
703
+ # 0,
704
+ # 1337566253.89858,
705
+ # 0.000355720520019531
706
+ # ],
707
+ # [
708
+ # {
709
+ # &quot;position&quot;: 0,
710
+ # &quot;value&quot;: &quot;日&quot;
711
+ # },
712
+ # {
713
+ # &quot;position&quot;: 1,
714
+ # &quot;value&quot;: &quot;本&quot;
715
+ # },
716
+ # {
717
+ # &quot;position&quot;: 2,
718
+ # &quot;value&quot;: &quot;語&quot;
719
+ # },
720
+ # {
721
+ # &quot;position&quot;: 3,
722
+ # &quot;value&quot;: &quot;!&quot;
723
+ # },
724
+ # {
725
+ # &quot;position&quot;: 4,
726
+ # &quot;value&quot;: &quot;!&quot;
727
+ # },
728
+ # {
729
+ # &quot;position&quot;: 5,
730
+ # &quot;value&quot;: &quot;!&quot;
731
+ # }
732
+ # ]
733
+ # ]
734
+ </pre></div>
735
+ </div>
736
+ <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt>:</p>
737
+ <p>Execution example:</p>
738
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbol &quot;日 本 語 ! ! !&quot; NormalizerAuto
739
+ # [
740
+ # [
741
+ # 0,
742
+ # 1337566253.89858,
743
+ # 0.000355720520019531
744
+ # ],
745
+ # [
746
+ # {
747
+ # &quot;position&quot;: 0,
748
+ # &quot;value&quot;: &quot;日本&quot;
749
+ # },
750
+ # {
751
+ # &quot;position&quot;: 1,
752
+ # &quot;value&quot;: &quot;本語&quot;
753
+ # },
754
+ # {
755
+ # &quot;position&quot;: 2,
756
+ # &quot;value&quot;: &quot;語!&quot;
757
+ # },
758
+ # {
759
+ # &quot;position&quot;: 3,
760
+ # &quot;value&quot;: &quot;!!&quot;
761
+ # },
762
+ # {
763
+ # &quot;position&quot;: 4,
764
+ # &quot;value&quot;: &quot;!!&quot;
765
+ # },
766
+ # {
767
+ # &quot;position&quot;: 5,
768
+ # &quot;value&quot;: &quot;!&quot;
769
+ # }
770
+ # ]
771
+ # ]
772
+ </pre></div>
773
+ </div>
774
+ </div>
775
+ <div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
776
+ <span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
777
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> is similar to
778
+ <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
779
+ <blockquote>
780
+ <div><ul class="simple">
781
+ <li>Blank handling</li>
782
+ <li>Symbol and alphabet handling</li>
783
+ </ul>
784
+ </div></blockquote>
785
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> ignores white-spaces in
786
+ continuous symbols and non-ASCII characters.</p>
787
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> tokenizes symbols and
788
+ alphabets by bigram tokenize method.</p>
789
+ <p>You can find difference of them by <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
790
+ has symbols and non-ASCII characters with white spaces and alphabets.</p>
791
+ <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
792
+ <p>Execution example:</p>
793
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
794
+ # [
795
+ # [
796
+ # 0,
797
+ # 1337566253.89858,
798
+ # 0.000355720520019531
799
+ # ],
800
+ # [
801
+ # {
802
+ # &quot;position&quot;: 0,
803
+ # &quot;value&quot;: &quot;hello&quot;
804
+ # },
805
+ # {
806
+ # &quot;position&quot;: 1,
807
+ # &quot;value&quot;: &quot;日&quot;
808
+ # },
809
+ # {
810
+ # &quot;position&quot;: 2,
811
+ # &quot;value&quot;: &quot;本&quot;
812
+ # },
813
+ # {
814
+ # &quot;position&quot;: 3,
815
+ # &quot;value&quot;: &quot;語&quot;
816
+ # },
817
+ # {
818
+ # &quot;position&quot;: 4,
819
+ # &quot;value&quot;: &quot;!&quot;
820
+ # },
821
+ # {
822
+ # &quot;position&quot;: 5,
823
+ # &quot;value&quot;: &quot;!&quot;
824
+ # },
825
+ # {
826
+ # &quot;position&quot;: 6,
827
+ # &quot;value&quot;: &quot;!&quot;
828
+ # }
829
+ # ]
830
+ # ]
831
+ </pre></div>
832
+ </div>
833
+ <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt>:</p>
834
+ <p>Execution example:</p>
835
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
836
+ # [
837
+ # [
838
+ # 0,
839
+ # 1337566253.89858,
840
+ # 0.000355720520019531
841
+ # ],
842
+ # [
843
+ # {
844
+ # &quot;position&quot;: 0,
845
+ # &quot;value&quot;: &quot;he&quot;
846
+ # },
847
+ # {
848
+ # &quot;position&quot;: 1,
849
+ # &quot;value&quot;: &quot;el&quot;
850
+ # },
851
+ # {
852
+ # &quot;position&quot;: 2,
853
+ # &quot;value&quot;: &quot;ll&quot;
854
+ # },
855
+ # {
856
+ # &quot;position&quot;: 3,
857
+ # &quot;value&quot;: &quot;lo&quot;
858
+ # },
859
+ # {
860
+ # &quot;position&quot;: 4,
861
+ # &quot;value&quot;: &quot;o日&quot;
862
+ # },
863
+ # {
864
+ # &quot;position&quot;: 5,
865
+ # &quot;value&quot;: &quot;日本&quot;
866
+ # },
867
+ # {
868
+ # &quot;position&quot;: 6,
869
+ # &quot;value&quot;: &quot;本語&quot;
870
+ # },
871
+ # {
872
+ # &quot;position&quot;: 7,
873
+ # &quot;value&quot;: &quot;語!&quot;
874
+ # },
875
+ # {
876
+ # &quot;position&quot;: 8,
877
+ # &quot;value&quot;: &quot;!!&quot;
878
+ # },
879
+ # {
880
+ # &quot;position&quot;: 9,
881
+ # &quot;value&quot;: &quot;!!&quot;
882
+ # },
883
+ # {
884
+ # &quot;position&quot;: 10,
885
+ # &quot;value&quot;: &quot;!&quot;
886
+ # }
887
+ # ]
888
+ # ]
889
+ </pre></div>
890
+ </div>
891
+ </div>
892
+ <div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
893
+ <span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
894
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> is similar to
895
+ <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
896
+ <blockquote>
897
+ <div><ul class="simple">
898
+ <li>Blank handling</li>
899
+ <li>Symbol, alphabet and digit handling</li>
900
+ </ul>
901
+ </div></blockquote>
902
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> ignores white-spaces
903
+ in continuous symbols and non-ASCII characters.</p>
904
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> tokenizes symbols,
905
+ alphabets and digits by bigram tokenize method. It means that all
906
+ characters are tokenized by bigram tokenize method.</p>
907
+ <p>You can find difference of them by <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></tt> text
908
+ because it has symbols and non-ASCII characters with white spaces,
909
+ alphabets and digits.</p>
910
+ <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
911
+ <p>Execution example:</p>
912
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
913
+ # [
914
+ # [
915
+ # 0,
916
+ # 1337566253.89858,
917
+ # 0.000355720520019531
918
+ # ],
919
+ # [
920
+ # {
921
+ # &quot;position&quot;: 0,
922
+ # &quot;value&quot;: &quot;hello&quot;
923
+ # },
924
+ # {
925
+ # &quot;position&quot;: 1,
926
+ # &quot;value&quot;: &quot;日&quot;
927
+ # },
928
+ # {
929
+ # &quot;position&quot;: 2,
930
+ # &quot;value&quot;: &quot;本&quot;
931
+ # },
932
+ # {
933
+ # &quot;position&quot;: 3,
934
+ # &quot;value&quot;: &quot;語&quot;
935
+ # },
936
+ # {
937
+ # &quot;position&quot;: 4,
938
+ # &quot;value&quot;: &quot;!&quot;
939
+ # },
940
+ # {
941
+ # &quot;position&quot;: 5,
942
+ # &quot;value&quot;: &quot;!&quot;
943
+ # },
944
+ # {
945
+ # &quot;position&quot;: 6,
946
+ # &quot;value&quot;: &quot;!&quot;
947
+ # },
948
+ # {
949
+ # &quot;position&quot;: 7,
950
+ # &quot;value&quot;: &quot;777&quot;
951
+ # }
952
+ # ]
953
+ # ]
954
+ </pre></div>
955
+ </div>
956
+ <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt>:</p>
957
+ <p>Execution example:</p>
958
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
959
+ # [
960
+ # [
961
+ # 0,
962
+ # 1337566253.89858,
963
+ # 0.000355720520019531
964
+ # ],
965
+ # [
966
+ # {
967
+ # &quot;position&quot;: 0,
968
+ # &quot;value&quot;: &quot;he&quot;
969
+ # },
970
+ # {
971
+ # &quot;position&quot;: 1,
972
+ # &quot;value&quot;: &quot;el&quot;
973
+ # },
974
+ # {
975
+ # &quot;position&quot;: 2,
976
+ # &quot;value&quot;: &quot;ll&quot;
977
+ # },
978
+ # {
979
+ # &quot;position&quot;: 3,
980
+ # &quot;value&quot;: &quot;lo&quot;
981
+ # },
982
+ # {
983
+ # &quot;position&quot;: 4,
984
+ # &quot;value&quot;: &quot;o日&quot;
985
+ # },
986
+ # {
987
+ # &quot;position&quot;: 5,
988
+ # &quot;value&quot;: &quot;日本&quot;
989
+ # },
990
+ # {
991
+ # &quot;position&quot;: 6,
992
+ # &quot;value&quot;: &quot;本語&quot;
993
+ # },
994
+ # {
995
+ # &quot;position&quot;: 7,
996
+ # &quot;value&quot;: &quot;語!&quot;
997
+ # },
998
+ # {
999
+ # &quot;position&quot;: 8,
1000
+ # &quot;value&quot;: &quot;!!&quot;
1001
+ # },
1002
+ # {
1003
+ # &quot;position&quot;: 9,
1004
+ # &quot;value&quot;: &quot;!!&quot;
1005
+ # },
1006
+ # {
1007
+ # &quot;position&quot;: 10,
1008
+ # &quot;value&quot;: &quot;!7&quot;
1009
+ # },
1010
+ # {
1011
+ # &quot;position&quot;: 11,
1012
+ # &quot;value&quot;: &quot;77&quot;
1013
+ # },
1014
+ # {
1015
+ # &quot;position&quot;: 12,
1016
+ # &quot;value&quot;: &quot;77&quot;
1017
+ # },
1018
+ # {
1019
+ # &quot;position&quot;: 13,
1020
+ # &quot;value&quot;: &quot;7&quot;
1021
+ # }
1022
+ # ]
1023
+ # ]
1024
+ </pre></div>
1025
+ </div>
1026
+ </div>
1027
+ <div class="section" id="tokenunigram">
1028
+ <span id="token-unigram"></span><h3>7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt><a class="headerlink" href="#tokenunigram" title="Permalink to this headline">¶</a></h3>
1029
+ <p><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences
1030
+ between them is token unit. <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> uses 2 characters per
1031
+ token. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> uses 1 character per token.</p>
1032
+ <p>Execution example:</p>
1033
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenUnigram &quot;100cents!!!&quot; NormalizerAuto
1034
+ # [
1035
+ # [
1036
+ # 0,
1037
+ # 1337566253.89858,
1038
+ # 0.000355720520019531
1039
+ # ],
1040
+ # [
1041
+ # {
1042
+ # &quot;position&quot;: 0,
1043
+ # &quot;value&quot;: &quot;100&quot;
1044
+ # },
1045
+ # {
1046
+ # &quot;position&quot;: 1,
1047
+ # &quot;value&quot;: &quot;cents&quot;
1048
+ # },
1049
+ # {
1050
+ # &quot;position&quot;: 2,
1051
+ # &quot;value&quot;: &quot;!!!&quot;
1052
+ # }
1053
+ # ]
1054
+ # ]
1055
+ </pre></div>
1056
+ </div>
1057
+ </div>
1058
+ <div class="section" id="tokentrigram">
1059
+ <span id="token-trigram"></span><h3>7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt><a class="headerlink" href="#tokentrigram" title="Permalink to this headline">¶</a></h3>
1060
+ <p><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences
1061
+ between them is token unit. <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> uses 2 characters per
1062
+ token. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> uses 3 characters per token.</p>
1063
+ <p>Execution example:</p>
1064
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenTrigram &quot;10000cents!!!!!&quot; NormalizerAuto
1065
+ # [
1066
+ # [
1067
+ # 0,
1068
+ # 1337566253.89858,
1069
+ # 0.000355720520019531
1070
+ # ],
1071
+ # [
1072
+ # {
1073
+ # &quot;position&quot;: 0,
1074
+ # &quot;value&quot;: &quot;10000&quot;
1075
+ # },
1076
+ # {
1077
+ # &quot;position&quot;: 1,
1078
+ # &quot;value&quot;: &quot;cents&quot;
1079
+ # },
1080
+ # {
1081
+ # &quot;position&quot;: 2,
1082
+ # &quot;value&quot;: &quot;!!!!!&quot;
1083
+ # }
1084
+ # ]
1085
+ # ]
1086
+ </pre></div>
1087
+ </div>
1088
+ </div>
1089
+ <div class="section" id="tokendelimit">
1090
+ <span id="token-delimit"></span><h3>7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt><a class="headerlink" href="#tokendelimit" title="Permalink to this headline">¶</a></h3>
1091
+ <p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> extracts token by splitting one or more space
1092
+ characters (<tt class="docutils literal"><span class="pre">U+0020</span></tt>). For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to
1093
+ <tt class="docutils literal"><span class="pre">Hello</span></tt> and <tt class="docutils literal"><span class="pre">World</span></tt>.</p>
1094
+ <p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> is suitable for tag text. You can extract <tt class="docutils literal"><span class="pre">groonga</span></tt>
1095
+ and <tt class="docutils literal"><span class="pre">full-text-search</span></tt> and <tt class="docutils literal"><span class="pre">http</span></tt> as tags from <tt class="docutils literal"><span class="pre">groonga</span>
1096
+ <span class="pre">full-text-search</span> <span class="pre">http</span></tt>.</p>
1097
+ <p>Here is an example of <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt>:</p>
1098
+ <p>Execution example:</p>
1099
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimit &quot;Groonga full-text-search HTTP&quot; NormalizerAuto
1100
+ # [
1101
+ # [
1102
+ # 0,
1103
+ # 1337566253.89858,
1104
+ # 0.000355720520019531
1105
+ # ],
1106
+ # [
1107
+ # {
1108
+ # &quot;position&quot;: 0,
1109
+ # &quot;value&quot;: &quot;groonga&quot;
1110
+ # },
1111
+ # {
1112
+ # &quot;position&quot;: 1,
1113
+ # &quot;value&quot;: &quot;full-text-search&quot;
1114
+ # },
1115
+ # {
1116
+ # &quot;position&quot;: 2,
1117
+ # &quot;value&quot;: &quot;http&quot;
1118
+ # }
1119
+ # ]
1120
+ # ]
1121
+ </pre></div>
1122
+ </div>
1123
+ </div>
1124
+ <div class="section" id="tokendelimitnull">
1125
+ <span id="token-delimit-null"></span><h3>7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt><a class="headerlink" href="#tokendelimitnull" title="Permalink to this headline">¶</a></h3>
1126
+ <p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> is similar to <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a>. The
1127
+ difference between them is separator character. <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a>
1128
+ uses space character (<tt class="docutils literal"><span class="pre">U+0020</span></tt>) but <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> uses NUL
1129
+ character (<tt class="docutils literal"><span class="pre">U+0000</span></tt>).</p>
1130
+ <p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> is also suitable for tag text.</p>
1131
+ <p>Here is an example of <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt>:</p>
1132
+ <p>Execution example:</p>
1133
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimitNull &quot;Groonga\u0000full-text-search\u0000HTTP&quot; NormalizerAuto
1134
+ # [
1135
+ # [
1136
+ # 0,
1137
+ # 1337566253.89858,
1138
+ # 0.000355720520019531
1139
+ # ],
1140
+ # [
1141
+ # {
1142
+ # &quot;position&quot;: 0,
1143
+ # &quot;value&quot;: &quot;groongau0000full-text-searchu0000http&quot;
1144
+ # }
1145
+ # ]
1146
+ # ]
1147
+ </pre></div>
1148
+ </div>
1149
+ </div>
1150
+ <div class="section" id="tokenmecab">
1151
+ <span id="token-mecab"></span><h3>7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt><a class="headerlink" href="#tokenmecab" title="Permalink to this headline">¶</a></h3>
1152
+ <p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> is a tokenizer based on <a class="reference external" href="http://mecab.sourceforge.net/">MeCab</a> part-of-speech and
1153
+ morphological analyzer.</p>
1154
+ <p>MeCab doesn't depend on Japanese. You can use MeCab for other
1155
+ languages by creating dictionary for the languages. You can use <a class="reference external" href="http://sourceforge.jp/projects/naist-jdic/">NAIST
1156
+ Japanese Dictionary</a>
1157
+ for Japanese.</p>
1158
+ <p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> is good for precision rather than recall. You can find
1159
+ <tt class="docutils literal"><span class="pre">東京都</span></tt> and <tt class="docutils literal"><span class="pre">京都</span></tt> texts by <tt class="docutils literal"><span class="pre">京都</span></tt> query with
1160
+ <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> but <tt class="docutils literal"><span class="pre">東京都</span></tt> isn't expected. You can find only
1161
+ <tt class="docutils literal"><span class="pre">京都</span></tt> text by <tt class="docutils literal"><span class="pre">京都</span></tt> query with <tt class="docutils literal"><span class="pre">TokenMecab</span></tt>.</p>
1162
+ <p>If you want to support neologisms, you need to keep updating your
1163
+ MeCab dictionary. It needs maintain cost. (<a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> doesn't
1164
+ require dictionary maintenance because <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> doesn't use
1165
+ dictionary.) <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> may help you.</p>
1166
+ <p>Here is an example of <tt class="docutils literal"><span class="pre">TokenMeCab</span></tt>. <tt class="docutils literal"><span class="pre">東京都</span></tt> is tokenized to <tt class="docutils literal"><span class="pre">東京</span></tt>
1167
+ and <tt class="docutils literal"><span class="pre">都</span></tt>. They don't include <tt class="docutils literal"><span class="pre">京都</span></tt>:</p>
1168
+ <p>Execution example:</p>
1169
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenMecab &quot;東京都&quot;
1170
+ # [
1171
+ # [
1172
+ # 0,
1173
+ # 1337566253.89858,
1174
+ # 0.000355720520019531
1175
+ # ],
1176
+ # [
1177
+ # {
1178
+ # &quot;position&quot;: 0,
1179
+ # &quot;value&quot;: &quot;東京&quot;
1180
+ # },
1181
+ # {
1182
+ # &quot;position&quot;: 1,
1183
+ # &quot;value&quot;: &quot;都&quot;
1184
+ # }
1185
+ # ]
1186
+ # ]
1187
+ </pre></div>
1188
+ </div>
1189
+ </div>
1190
+ <div class="section" id="tokenregexp">
1191
+ <span id="token-regexp"></span><h3>7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt><a class="headerlink" href="#tokenregexp" title="Permalink to this headline">¶</a></h3>
1192
+ <div class="versionadded">
1193
+ <p><span class="versionmodified">New in version 5.0.1.</span></p>
1194
+ </div>
1195
+ <div class="admonition caution">
1196
+ <p class="first admonition-title">Caution</p>
1197
+ <p class="last">This tokenizer is experimental. Specification may be changed.</p>
1198
+ </div>
1199
+ <div class="admonition caution">
1200
+ <p class="first admonition-title">Caution</p>
1201
+ <p class="last">This tokenizer can be used only with UTF-8. You can't use this
1202
+ tokenizer with EUC-JP, Shift_JIS and so on.</p>
1203
+ </div>
1204
+ <p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> is a tokenizer for supporting regular expression
1205
+ search by index.</p>
1206
+ <p>In general, regular expression search is evaluated as sequential
1207
+ search. But the following cases can be evaluated as index search:</p>
1208
+ <blockquote>
1209
+ <div><ul class="simple">
1210
+ <li>Literal only case such as <tt class="docutils literal"><span class="pre">hello</span></tt></li>
1211
+ <li>The beginning of text and literal case such as <tt class="docutils literal"><span class="pre">\A/home/alice</span></tt></li>
1212
+ <li>The end of text and literal case such as <tt class="docutils literal"><span class="pre">\.txt\z</span></tt></li>
1213
+ </ul>
1214
+ </div></blockquote>
1215
+ <p>In most cases, index search is faster than sequential search.</p>
1216
+ <p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> is based on bigram tokenize method. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt>
1217
+ adds the beginning of text mark (<tt class="docutils literal"><span class="pre">U+FFEF</span></tt>) at the begging of text
1218
+ and the end of text mark (<tt class="docutils literal"><span class="pre">U+FFF0</span></tt>) to the end of text when you
1219
+ index text:</p>
1220
+ <p>Execution example:</p>
1221
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;/home/alice/test.txt&quot; NormalizerAuto --mode ADD
1222
+ # [
1223
+ # [
1224
+ # 0,
1225
+ # 1337566253.89858,
1226
+ # 0.000355720520019531
1227
+ # ],
1228
+ # [
1229
+ # {
1230
+ # &quot;position&quot;: 0,
1231
+ # &quot;value&quot;: &quot;￯&quot;
1232
+ # },
1233
+ # {
1234
+ # &quot;position&quot;: 1,
1235
+ # &quot;value&quot;: &quot;/h&quot;
1236
+ # },
1237
+ # {
1238
+ # &quot;position&quot;: 2,
1239
+ # &quot;value&quot;: &quot;ho&quot;
1240
+ # },
1241
+ # {
1242
+ # &quot;position&quot;: 3,
1243
+ # &quot;value&quot;: &quot;om&quot;
1244
+ # },
1245
+ # {
1246
+ # &quot;position&quot;: 4,
1247
+ # &quot;value&quot;: &quot;me&quot;
1248
+ # },
1249
+ # {
1250
+ # &quot;position&quot;: 5,
1251
+ # &quot;value&quot;: &quot;e/&quot;
1252
+ # },
1253
+ # {
1254
+ # &quot;position&quot;: 6,
1255
+ # &quot;value&quot;: &quot;/a&quot;
1256
+ # },
1257
+ # {
1258
+ # &quot;position&quot;: 7,
1259
+ # &quot;value&quot;: &quot;al&quot;
1260
+ # },
1261
+ # {
1262
+ # &quot;position&quot;: 8,
1263
+ # &quot;value&quot;: &quot;li&quot;
1264
+ # },
1265
+ # {
1266
+ # &quot;position&quot;: 9,
1267
+ # &quot;value&quot;: &quot;ic&quot;
1268
+ # },
1269
+ # {
1270
+ # &quot;position&quot;: 10,
1271
+ # &quot;value&quot;: &quot;ce&quot;
1272
+ # },
1273
+ # {
1274
+ # &quot;position&quot;: 11,
1275
+ # &quot;value&quot;: &quot;e/&quot;
1276
+ # },
1277
+ # {
1278
+ # &quot;position&quot;: 12,
1279
+ # &quot;value&quot;: &quot;/t&quot;
1280
+ # },
1281
+ # {
1282
+ # &quot;position&quot;: 13,
1283
+ # &quot;value&quot;: &quot;te&quot;
1284
+ # },
1285
+ # {
1286
+ # &quot;position&quot;: 14,
1287
+ # &quot;value&quot;: &quot;es&quot;
1288
+ # },
1289
+ # {
1290
+ # &quot;position&quot;: 15,
1291
+ # &quot;value&quot;: &quot;st&quot;
1292
+ # },
1293
+ # {
1294
+ # &quot;position&quot;: 16,
1295
+ # &quot;value&quot;: &quot;t.&quot;
1296
+ # },
1297
+ # {
1298
+ # &quot;position&quot;: 17,
1299
+ # &quot;value&quot;: &quot;.t&quot;
1300
+ # },
1301
+ # {
1302
+ # &quot;position&quot;: 18,
1303
+ # &quot;value&quot;: &quot;tx&quot;
1304
+ # },
1305
+ # {
1306
+ # &quot;position&quot;: 19,
1307
+ # &quot;value&quot;: &quot;xt&quot;
1308
+ # },
1309
+ # {
1310
+ # &quot;position&quot;: 20,
1311
+ # &quot;value&quot;: &quot;t&quot;
1312
+ # },
1313
+ # {
1314
+ # &quot;position&quot;: 21,
1315
+ # &quot;value&quot;: &quot;￰&quot;
1316
+ # }
1317
+ # ]
1318
+ # ]
1319
+ </pre></div>
1320
+ </div>
1321
+ <p>The beginning of text mark is used for the beginning of text search by
1322
+ <tt class="docutils literal"><span class="pre">\A</span></tt>. If you use <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> for tokenizing query,
1323
+ <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> adds the beginning of text mark (<tt class="docutils literal"><span class="pre">U+FFEF</span></tt>) as the
1324
+ first token. The beginning of text mark must be appeared at the first,
1325
+ you can get results of the beginning of text search.</p>
1326
+ <p>Execution example:</p>
1327
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;\\A/home/alice/&quot; NormalizerAuto --mode GET
1328
+ # [
1329
+ # [
1330
+ # 0,
1331
+ # 1337566253.89858,
1332
+ # 0.000355720520019531
1333
+ # ],
1334
+ # [
1335
+ # {
1336
+ # &quot;position&quot;: 0,
1337
+ # &quot;value&quot;: &quot;￯&quot;
1338
+ # },
1339
+ # {
1340
+ # &quot;position&quot;: 1,
1341
+ # &quot;value&quot;: &quot;/h&quot;
1342
+ # },
1343
+ # {
1344
+ # &quot;position&quot;: 2,
1345
+ # &quot;value&quot;: &quot;ho&quot;
1346
+ # },
1347
+ # {
1348
+ # &quot;position&quot;: 3,
1349
+ # &quot;value&quot;: &quot;om&quot;
1350
+ # },
1351
+ # {
1352
+ # &quot;position&quot;: 4,
1353
+ # &quot;value&quot;: &quot;me&quot;
1354
+ # },
1355
+ # {
1356
+ # &quot;position&quot;: 5,
1357
+ # &quot;value&quot;: &quot;e/&quot;
1358
+ # },
1359
+ # {
1360
+ # &quot;position&quot;: 6,
1361
+ # &quot;value&quot;: &quot;/a&quot;
1362
+ # },
1363
+ # {
1364
+ # &quot;position&quot;: 7,
1365
+ # &quot;value&quot;: &quot;al&quot;
1366
+ # },
1367
+ # {
1368
+ # &quot;position&quot;: 8,
1369
+ # &quot;value&quot;: &quot;li&quot;
1370
+ # },
1371
+ # {
1372
+ # &quot;position&quot;: 9,
1373
+ # &quot;value&quot;: &quot;ic&quot;
1374
+ # },
1375
+ # {
1376
+ # &quot;position&quot;: 10,
1377
+ # &quot;value&quot;: &quot;ce&quot;
1378
+ # },
1379
+ # {
1380
+ # &quot;position&quot;: 11,
1381
+ # &quot;value&quot;: &quot;e/&quot;
1382
+ # }
1383
+ # ]
1384
+ # ]
1385
+ </pre></div>
1386
+ </div>
1387
+ <p>The end of text mark is used for the end of text search by <tt class="docutils literal"><span class="pre">\z</span></tt>.
1388
+ If you use <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> for tokenizing query, <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> adds
1389
+ the end of text mark (<tt class="docutils literal"><span class="pre">U+FFF0</span></tt>) as the last token. The end of text
1390
+ mark must be appeared at the end, you can get results of the end of
1391
+ text search.</p>
1392
+ <p>Execution example:</p>
1393
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;\\.txt\\z&quot; NormalizerAuto --mode GET
1394
+ # [
1395
+ # [
1396
+ # 0,
1397
+ # 1337566253.89858,
1398
+ # 0.000355720520019531
1399
+ # ],
1400
+ # [
1401
+ # {
1402
+ # &quot;position&quot;: 0,
1403
+ # &quot;value&quot;: &quot;\\.&quot;
1404
+ # },
1405
+ # {
1406
+ # &quot;position&quot;: 1,
1407
+ # &quot;value&quot;: &quot;.t&quot;
1408
+ # },
1409
+ # {
1410
+ # &quot;position&quot;: 2,
1411
+ # &quot;value&quot;: &quot;tx&quot;
1412
+ # },
1413
+ # {
1414
+ # &quot;position&quot;: 3,
1415
+ # &quot;value&quot;: &quot;xt&quot;
1416
+ # },
1417
+ # {
1418
+ # &quot;position&quot;: 5,
1419
+ # &quot;value&quot;: &quot;￰&quot;
1420
+ # }
1421
+ # ]
1422
+ # ]
1423
+ </pre></div>
1424
+ </div>
1425
+ </div>
1426
+ </div>
91
1427
  </div>
92
1428
 
93
1429
 
94
1430
  </div>
95
1431
  </div>
96
1432
  </div>
97
- <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
1433
+ <div class="sphinxsidebar">
98
1434
  <div class="sphinxsidebarwrapper">
1435
+ <h3><a href="../index.html">Table Of Contents</a></h3>
1436
+ <ul>
1437
+ <li><a class="reference internal" href="#">7.8. Tokenizers</a><ul>
1438
+ <li><a class="reference internal" href="#summary">7.8.1. Summary</a></li>
1439
+ <li><a class="reference internal" href="#what-is-tokenize">7.8.2. What is &quot;tokenize&quot;?</a></li>
1440
+ <li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. Built-in tokenizsers</a><ul>
1441
+ <li><a class="reference internal" href="#tokenbigram">7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt></a></li>
1442
+ <li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></a></li>
1443
+ <li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></a></li>
1444
+ <li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></a></li>
1445
+ <li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></a></li>
1446
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></a></li>
1447
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt></a></li>
1448
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt></a></li>
1449
+ <li><a class="reference internal" href="#tokenunigram">7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></a></li>
1450
+ <li><a class="reference internal" href="#tokentrigram">7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></a></li>
1451
+ <li><a class="reference internal" href="#tokendelimit">7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></a></li>
1452
+ <li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></a></li>
1453
+ <li><a class="reference internal" href="#tokenmecab">7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt></a></li>
1454
+ <li><a class="reference internal" href="#tokenregexp">7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></a></li>
1455
+ </ul>
1456
+ </li>
1457
+ </ul>
1458
+ </li>
1459
+ </ul>
1460
+
99
1461
  <h4>Previous topic</h4>
100
1462
  <p class="topless"><a href="normalizers.html"
101
1463
  title="previous chapter">7.7. Normalizers</a></p>
102
1464
  <h4>Next topic</h4>
103
1465
  <p class="topless"><a href="token_filters.html"
104
1466
  title="next chapter">7.9. Token filters</a></p>
105
- <div role="note" aria-label="source link">
106
- <h3>This Page</h3>
107
- <ul class="this-page-menu">
108
- <li><a href="../_sources/reference/tokenizers.txt"
109
- rel="nofollow">Show Source</a></li>
110
- </ul>
111
- </div>
112
- <div id="searchbox" style="display: none" role="search">
1467
+ <h3>This Page</h3>
1468
+ <ul class="this-page-menu">
1469
+ <li><a href="../_sources/reference/tokenizers.txt"
1470
+ rel="nofollow">Show Source</a></li>
1471
+ </ul>
1472
+ <div id="searchbox" style="display: none">
113
1473
  <h3>Quick search</h3>
114
1474
  <form class="search" action="../search.html" method="get">
115
1475
  <input type="text" name="q" />
@@ -126,7 +1486,7 @@
126
1486
  </div>
127
1487
  <div class="clearer"></div>
128
1488
  </div>
129
- <div class="related" role="navigation" aria-label="related navigation">
1489
+ <div class="related">
130
1490
  <h3>Navigation</h3>
131
1491
  <ul>
132
1492
  <li class="right" style="margin-right: 10px">
@@ -138,11 +1498,11 @@
138
1498
  <li class="right" >
139
1499
  <a href="normalizers.html" title="7.7. Normalizers"
140
1500
  >previous</a> |</li>
141
- <li><a href="../index.html">Groonga v5.0.0 documentation</a> &raquo;</li>
1501
+ <li><a href="../index.html">Groonga v5.0.1-42-g4d10df1 documentation</a> &raquo;</li>
142
1502
  <li><a href="../reference.html" >7. Reference manual</a> &raquo;</li>
143
1503
  </ul>
144
1504
  </div>
145
- <div class="footer" role="contentinfo">
1505
+ <div class="footer">
146
1506
  &copy; Copyright 2009-2015, Brazil, Inc.
147
1507
  </div>
148
1508
  </body>