rroonga 5.0.0-x86-mingw32 → 5.0.1-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (733) hide show
  1. checksums.yaml +8 -8
  2. data/.yardopts +1 -0
  3. data/Rakefile +1 -16
  4. data/example/bookmark.rb +1 -6
  5. data/example/index-html.rb +0 -1
  6. data/ext/groonga/extconf.rb +4 -7
  7. data/ext/groonga/rb-grn-array.c +1 -1
  8. data/ext/groonga/rb-grn-column.c +33 -67
  9. data/ext/groonga/rb-grn-context.c +5 -5
  10. data/ext/groonga/rb-grn-database.c +2 -2
  11. data/ext/groonga/rb-grn-double-array-trie.c +4 -2
  12. data/ext/groonga/rb-grn-encoding-support.c +7 -1
  13. data/ext/groonga/rb-grn-equal-operator.c +85 -0
  14. data/ext/groonga/rb-grn-exception.c +17 -0
  15. data/ext/groonga/rb-grn-expression.c +85 -43
  16. data/ext/groonga/rb-grn-greater-equal-operator.c +88 -0
  17. data/ext/groonga/rb-grn-greater-operator.c +85 -0
  18. data/ext/groonga/rb-grn-hash.c +1 -1
  19. data/ext/groonga/rb-grn-index-column.c +150 -11
  20. data/ext/groonga/rb-grn-less-equal-operator.c +88 -0
  21. data/ext/groonga/rb-grn-less-operator.c +85 -0
  22. data/ext/groonga/rb-grn-logger.c +5 -5
  23. data/ext/groonga/rb-grn-match-operator.c +86 -0
  24. data/ext/groonga/rb-grn-normalizer.c +8 -1
  25. data/ext/groonga/rb-grn-not-equal-operator.c +85 -0
  26. data/ext/groonga/rb-grn-object.c +170 -36
  27. data/ext/groonga/rb-grn-operator.c +395 -172
  28. data/ext/groonga/rb-grn-patricia-trie.c +10 -8
  29. data/ext/groonga/rb-grn-plugin.c +51 -3
  30. data/ext/groonga/rb-grn-prefix-operator.c +86 -0
  31. data/ext/groonga/rb-grn-procedure-type.c +4 -0
  32. data/ext/groonga/rb-grn-query-logger.c +4 -4
  33. data/ext/groonga/rb-grn-regexp-operator.c +85 -0
  34. data/ext/groonga/rb-grn-snippet.c +1 -1
  35. data/ext/groonga/rb-grn-table-key-support.c +9 -5
  36. data/ext/groonga/rb-grn-table.c +52 -66
  37. data/ext/groonga/rb-grn-type.c +1 -1
  38. data/ext/groonga/rb-grn-utils.c +22 -3
  39. data/ext/groonga/rb-grn.h +31 -4
  40. data/ext/groonga/rb-groonga.c +9 -9
  41. data/lib/1.9/groonga.so +0 -0
  42. data/lib/2.0/groonga.so +0 -0
  43. data/lib/2.1/groonga.so +0 -0
  44. data/lib/2.2/groonga.so +0 -0
  45. data/lib/groonga/context.rb +31 -0
  46. data/lib/groonga/expression-builder.rb +14 -1
  47. data/lib/groonga/record.rb +10 -8
  48. data/lib/groonga/schema.rb +3 -1
  49. data/rroonga-build.rb +2 -2
  50. data/rroonga.gemspec +3 -3
  51. data/test/groonga-test-utils.rb +4 -0
  52. data/test/test-column.rb +28 -26
  53. data/test/test-exception.rb +1 -0
  54. data/test/test-expression-builder.rb +83 -1
  55. data/test/test-expression.rb +80 -48
  56. data/test/test-index-column.rb +102 -29
  57. data/test/test-normalizer.rb +35 -29
  58. data/test/test-operator.rb +214 -0
  59. data/test/test-plugin.rb +24 -6
  60. data/test/test-procedure.rb +29 -0
  61. data/test/test-schema-type.rb +14 -0
  62. data/test/test-table-select-mecab.rb +1 -4
  63. data/test/test-table.rb +7 -0
  64. data/test/test-token-regexp.rb +30 -0
  65. data/test/test-type.rb +24 -0
  66. data/vendor/local/bin/grndb.exe +0 -0
  67. data/vendor/local/bin/groonga-benchmark.exe +0 -0
  68. data/vendor/local/bin/groonga.exe +0 -0
  69. data/vendor/local/bin/libgcc_s_sjlj-1.dll +0 -0
  70. data/vendor/local/bin/libgroonga-0.dll +0 -0
  71. data/vendor/local/bin/libmecab-1.dll +0 -0
  72. data/vendor/local/bin/libmsgpack-3.dll +0 -0
  73. data/vendor/local/bin/libmsgpackc-2.dll +0 -0
  74. data/vendor/local/bin/libonig-5.dll +0 -0
  75. data/vendor/local/bin/libstdc++-6.dll +0 -0
  76. data/vendor/local/bin/lz4.exe +0 -0
  77. data/vendor/local/bin/lz4c.exe +0 -0
  78. data/vendor/local/bin/lz4cat +0 -0
  79. data/vendor/local/bin/mecab-config +2 -2
  80. data/vendor/local/bin/mecab.exe +0 -0
  81. data/vendor/local/bin/onig-config +1 -1
  82. data/vendor/local/bin/zlib1.dll +0 -0
  83. data/vendor/local/etc/groonga/groonga.conf +1 -1
  84. data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +2 -2
  85. data/vendor/local/include/groonga/groonga.h +1 -0
  86. data/vendor/local/include/groonga/groonga/expr.h +2 -0
  87. data/vendor/local/include/groonga/groonga/groonga.h +32 -5
  88. data/vendor/local/include/groonga/groonga/ii.h +7 -0
  89. data/vendor/local/include/groonga/groonga/obj.h +37 -0
  90. data/vendor/local/include/groonga/groonga/scorer.h +95 -0
  91. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
  92. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
  93. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
  94. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +2 -2
  95. data/vendor/local/lib/groonga/plugins/ruby/eval.a +0 -0
  96. data/vendor/local/lib/groonga/plugins/ruby/eval.dll +0 -0
  97. data/vendor/local/lib/groonga/plugins/ruby/eval.dll.a +0 -0
  98. data/vendor/local/lib/groonga/plugins/ruby/eval.la +2 -2
  99. data/vendor/local/lib/groonga/plugins/ruby/load.a +0 -0
  100. data/vendor/local/lib/groonga/plugins/ruby/load.dll +0 -0
  101. data/vendor/local/lib/groonga/plugins/ruby/load.dll.a +0 -0
  102. data/vendor/local/lib/groonga/plugins/ruby/load.la +2 -2
  103. data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +6 -3
  104. data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +6 -5
  105. data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +421 -17
  106. data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
  107. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
  108. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
  109. data/vendor/local/lib/groonga/plugins/suggest/suggest.la +2 -2
  110. data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
  111. data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
  112. data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
  113. data/vendor/local/lib/groonga/plugins/table/table.la +2 -2
  114. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
  115. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
  116. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
  117. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +2 -2
  118. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
  119. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
  120. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
  121. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +2 -2
  122. data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +12 -4
  123. data/vendor/local/lib/groonga/scripts/ruby/database.rb +11 -3
  124. data/vendor/local/lib/groonga/scripts/ruby/expression.rb +23 -0
  125. data/vendor/local/lib/groonga/scripts/ruby/expression_size_estimator.rb +158 -0
  126. data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +39 -0
  127. data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +4 -0
  128. data/vendor/local/lib/groonga/scripts/ruby/initialize/pre.rb +2 -0
  129. data/vendor/local/lib/groonga/scripts/ruby/logger.rb +11 -7
  130. data/vendor/local/lib/groonga/scripts/ruby/object.rb +11 -0
  131. data/vendor/local/lib/groonga/scripts/ruby/operator.rb +22 -0
  132. data/vendor/local/lib/groonga/scripts/ruby/scan_info.rb +7 -2
  133. data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +7 -11
  134. data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +137 -34
  135. data/vendor/local/lib/groonga/scripts/ruby/scan_info_search_index.rb +9 -0
  136. data/vendor/local/lib/libgroonga.a +0 -0
  137. data/vendor/local/lib/libgroonga.dll.a +0 -0
  138. data/vendor/local/lib/libgroonga.la +2 -2
  139. data/vendor/local/lib/liblz4.a +0 -0
  140. data/vendor/local/lib/liblz4.dll +0 -0
  141. data/vendor/local/lib/liblz4.dll.1 +0 -0
  142. data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
  143. data/vendor/local/lib/libmecab.a +0 -0
  144. data/vendor/local/lib/libmecab.dll.a +0 -0
  145. data/vendor/local/lib/libmecab.la +2 -2
  146. data/vendor/local/lib/libmsgpack.a +0 -0
  147. data/vendor/local/lib/libmsgpack.dll.a +0 -0
  148. data/vendor/local/lib/libmsgpack.la +2 -2
  149. data/vendor/local/lib/libmsgpackc.a +0 -0
  150. data/vendor/local/lib/libmsgpackc.dll.a +0 -0
  151. data/vendor/local/lib/libmsgpackc.la +2 -2
  152. data/vendor/local/lib/libonig.a +0 -0
  153. data/vendor/local/lib/libonig.dll.a +0 -0
  154. data/vendor/local/lib/libonig.la +2 -2
  155. data/vendor/local/lib/libz.a +0 -0
  156. data/vendor/local/lib/libz.dll.a +0 -0
  157. data/vendor/local/lib/pkgconfig/groonga.pc +3 -3
  158. data/vendor/local/lib/pkgconfig/liblz4.pc +5 -5
  159. data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
  160. data/vendor/local/lib/pkgconfig/oniguruma.pc +6 -6
  161. data/vendor/local/lib/pkgconfig/zlib.pc +3 -3
  162. data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
  163. data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
  164. data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
  165. data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
  166. data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
  167. data/vendor/local/sbin/groonga-httpd-restart +1 -1
  168. data/vendor/local/sbin/groonga-httpd.exe +0 -0
  169. data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
  170. data/vendor/local/share/doc/groonga/en/html/_images/used-when-indexing.png +0 -0
  171. data/vendor/local/share/doc/groonga/en/html/_images/used-when-searching.png +0 -0
  172. data/vendor/local/share/doc/groonga/en/html/_sources/characteristic.txt +1 -1
  173. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +32 -17
  174. data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
  175. data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +3 -3
  176. data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +4 -4
  177. data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
  178. data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +3 -3
  179. data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
  180. data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -3
  181. data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
  182. data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +194 -0
  183. data/vendor/local/share/doc/groonga/en/html/_sources/news/1.0.x.txt +1 -1
  184. data/vendor/local/share/doc/groonga/en/html/_sources/news/3.x.txt +2 -2
  185. data/vendor/local/share/doc/groonga/en/html/_sources/news/4.x.txt +2 -2
  186. data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +2 -0
  187. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api.txt +3 -0
  188. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +42 -0
  189. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/overview.txt +54 -0
  190. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +1 -1
  191. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_create.txt +2 -2
  192. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +1 -1
  193. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +3 -3
  194. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/delete.txt +4 -4
  195. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/load.txt +5 -5
  196. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +4 -4
  197. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +173 -0
  198. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +112 -0
  199. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +7 -6
  200. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +64 -0
  201. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +63 -0
  202. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +11 -1
  203. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +3 -2
  204. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +1 -1
  205. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +1 -1
  206. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +17 -17
  207. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/suggest.txt +12 -12
  208. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +7 -7
  209. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +4 -4
  210. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +6 -6
  211. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +1 -1
  212. data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga.txt +47 -26
  213. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/between.txt +5 -5
  214. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_distance.txt +3 -3
  215. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +6 -6
  216. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +1 -1
  217. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/html_untag.txt +1 -1
  218. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/in_values.txt +54 -2
  219. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +4 -4
  220. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/sub_filter.txt +4 -4
  221. data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
  222. data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
  223. data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +2 -0
  224. data/vendor/local/share/doc/groonga/en/html/_sources/reference/normalizers.txt +4 -0
  225. data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations.txt +2 -1
  226. data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +403 -0
  227. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorer.txt +217 -0
  228. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
  229. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
  230. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scoring_note.txt +13 -0
  231. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tables.txt +8 -0
  232. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +530 -16
  233. data/vendor/local/share/doc/groonga/en/html/_sources/server.txt +2 -1
  234. data/vendor/local/share/doc/groonga/en/html/_sources/server/memcached.txt +15 -0
  235. data/vendor/local/share/doc/groonga/en/html/_sources/spec/gqtp.txt +66 -1
  236. data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
  237. data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/network.txt +0 -81
  238. data/vendor/local/share/doc/groonga/en/html/_static/basic.css +6 -68
  239. data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +1 -26
  240. data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
  241. data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
  242. data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
  243. data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +9404 -4
  244. data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
  245. data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
  246. data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +2 -2
  247. data/vendor/local/share/doc/groonga/en/html/_static/underscore.js +1415 -31
  248. data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
  249. data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
  250. data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +15 -15
  251. data/vendor/local/share/doc/groonga/en/html/characteristic.html +18 -20
  252. data/vendor/local/share/doc/groonga/en/html/client.html +22 -24
  253. data/vendor/local/share/doc/groonga/en/html/community.html +17 -19
  254. data/vendor/local/share/doc/groonga/en/html/contribution.html +18 -20
  255. data/vendor/local/share/doc/groonga/en/html/contribution/development.html +17 -19
  256. data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +17 -19
  257. data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +17 -19
  258. data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +17 -19
  259. data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +51 -38
  260. data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +17 -19
  261. data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +17 -19
  262. data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +17 -19
  263. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +17 -19
  264. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +17 -19
  265. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +24 -26
  266. data/vendor/local/share/doc/groonga/en/html/contribution/report.html +17 -19
  267. data/vendor/local/share/doc/groonga/en/html/development.html +17 -19
  268. data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +24 -26
  269. data/vendor/local/share/doc/groonga/en/html/genindex.html +26 -14
  270. data/vendor/local/share/doc/groonga/en/html/index.html +150 -130
  271. data/vendor/local/share/doc/groonga/en/html/install.html +32 -34
  272. data/vendor/local/share/doc/groonga/en/html/install/centos.html +28 -30
  273. data/vendor/local/share/doc/groonga/en/html/install/debian.html +24 -26
  274. data/vendor/local/share/doc/groonga/en/html/install/fedora.html +28 -30
  275. data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +22 -24
  276. data/vendor/local/share/doc/groonga/en/html/install/others.html +87 -89
  277. data/vendor/local/share/doc/groonga/en/html/install/solaris.html +22 -24
  278. data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +25 -27
  279. data/vendor/local/share/doc/groonga/en/html/install/windows.html +30 -32
  280. data/vendor/local/share/doc/groonga/en/html/limitations.html +17 -19
  281. data/vendor/local/share/doc/groonga/en/html/news.html +256 -27
  282. data/vendor/local/share/doc/groonga/en/html/news/0.x.html +17 -19
  283. data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +19 -21
  284. data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +17 -19
  285. data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +32 -34
  286. data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +27 -29
  287. data/vendor/local/share/doc/groonga/en/html/news/2.x.html +98 -100
  288. data/vendor/local/share/doc/groonga/en/html/news/3.x.html +68 -70
  289. data/vendor/local/share/doc/groonga/en/html/news/4.x.html +102 -104
  290. data/vendor/local/share/doc/groonga/en/html/news/senna.html +17 -19
  291. data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
  292. data/vendor/local/share/doc/groonga/en/html/reference.html +139 -118
  293. data/vendor/local/share/doc/groonga/en/html/reference/api.html +51 -52
  294. data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +49 -51
  295. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +60 -62
  296. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +80 -82
  297. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +42 -44
  298. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +37 -39
  299. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +130 -80
  300. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +48 -50
  301. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +44 -46
  302. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +79 -81
  303. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +42 -44
  304. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +44 -46
  305. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +42 -44
  306. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +41 -43
  307. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +41 -43
  308. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +40 -42
  309. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +89 -91
  310. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +44 -46
  311. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +39 -41
  312. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +75 -77
  313. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +64 -66
  314. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +40 -42
  315. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +39 -41
  316. data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +202 -0
  317. data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +58 -60
  318. data/vendor/local/share/doc/groonga/en/html/reference/cast.html +17 -19
  319. data/vendor/local/share/doc/groonga/en/html/reference/column.html +17 -19
  320. data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +17 -19
  321. data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +22 -24
  322. data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +17 -19
  323. data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +58 -60
  324. data/vendor/local/share/doc/groonga/en/html/reference/command.html +56 -54
  325. data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +17 -19
  326. data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +53 -55
  327. data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +22 -24
  328. data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +94 -96
  329. data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +39 -41
  330. data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +43 -45
  331. data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +23 -25
  332. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +39 -41
  333. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +71 -73
  334. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +24 -26
  335. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +44 -46
  336. data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +37 -39
  337. data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +24 -26
  338. data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +35 -37
  339. data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +22 -24
  340. data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +43 -45
  341. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +49 -47
  342. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +23 -25
  343. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +24 -26
  344. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +26 -28
  345. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +314 -0
  346. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +252 -0
  347. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +87 -89
  348. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +46 -48
  349. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +195 -0
  350. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +193 -0
  351. data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +38 -40
  352. data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +37 -39
  353. data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +61 -51
  354. data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
  355. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +54 -56
  356. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +54 -56
  357. data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +590 -592
  358. data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +37 -39
  359. data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +40 -42
  360. data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +92 -94
  361. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +152 -154
  362. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +49 -51
  363. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +39 -41
  364. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +68 -70
  365. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +103 -105
  366. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +45 -47
  367. data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +52 -54
  368. data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -21
  369. data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +35 -37
  370. data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +21 -23
  371. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +26 -28
  372. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +73 -75
  373. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +17 -19
  374. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
  375. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +42 -44
  376. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +34 -36
  377. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +124 -90
  378. data/vendor/local/share/doc/groonga/en/html/reference/function.html +42 -44
  379. data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +66 -68
  380. data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +44 -46
  381. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +113 -115
  382. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +55 -57
  383. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +45 -47
  384. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +81 -83
  385. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +65 -67
  386. data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +54 -56
  387. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +135 -44
  388. data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -42
  389. data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +81 -83
  390. data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +43 -45
  391. data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +67 -69
  392. data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +66 -70
  393. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +39 -41
  394. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +349 -286
  395. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +483 -417
  396. data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +35 -37
  397. data/vendor/local/share/doc/groonga/en/html/reference/log.html +38 -40
  398. data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +44 -46
  399. data/vendor/local/share/doc/groonga/en/html/reference/operations.html +30 -31
  400. data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +32 -34
  401. data/vendor/local/share/doc/groonga/en/html/reference/output.html +32 -34
  402. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +17 -19
  403. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +60 -62
  404. data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +931 -0
  405. data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +442 -0
  406. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +153 -0
  407. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +287 -0
  408. data/vendor/local/share/doc/groonga/en/html/reference/scoring_note.html +114 -0
  409. data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +45 -47
  410. data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +51 -53
  411. data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +40 -42
  412. data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +38 -40
  413. data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +40 -42
  414. data/vendor/local/share/doc/groonga/en/html/reference/tables.html +52 -54
  415. data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +36 -38
  416. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +1394 -34
  417. data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +57 -59
  418. data/vendor/local/share/doc/groonga/en/html/reference/types.html +38 -40
  419. data/vendor/local/share/doc/groonga/en/html/search.html +11 -11
  420. data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
  421. data/vendor/local/share/doc/groonga/en/html/server.html +23 -24
  422. data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +28 -30
  423. data/vendor/local/share/doc/groonga/en/html/server/http.html +42 -44
  424. data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +68 -70
  425. data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +30 -32
  426. data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +29 -31
  427. data/vendor/local/share/doc/groonga/en/html/server/memcached.html +137 -0
  428. data/vendor/local/share/doc/groonga/en/html/server/package.html +36 -38
  429. data/vendor/local/share/doc/groonga/en/html/spec.html +22 -24
  430. data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +208 -129
  431. data/vendor/local/share/doc/groonga/en/html/spec/search.html +17 -19
  432. data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +17 -19
  433. data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
  434. data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
  435. data/vendor/local/share/doc/groonga/en/html/tutorial.html +21 -25
  436. data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +17 -19
  437. data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +31 -33
  438. data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +20 -22
  439. data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +17 -19
  440. data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +17 -19
  441. data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +21 -23
  442. data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +50 -52
  443. data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +27 -125
  444. data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +18 -20
  445. data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +20 -22
  446. data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +33 -35
  447. data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
  448. data/vendor/local/share/doc/groonga/ja/html/_images/used-when-indexing.png +0 -0
  449. data/vendor/local/share/doc/groonga/ja/html/_images/used-when-searching.png +0 -0
  450. data/vendor/local/share/doc/groonga/ja/html/_sources/characteristic.txt +1 -1
  451. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +32 -17
  452. data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
  453. data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +3 -3
  454. data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +4 -4
  455. data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
  456. data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +3 -3
  457. data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
  458. data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -3
  459. data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
  460. data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +194 -0
  461. data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.0.x.txt +1 -1
  462. data/vendor/local/share/doc/groonga/ja/html/_sources/news/3.x.txt +2 -2
  463. data/vendor/local/share/doc/groonga/ja/html/_sources/news/4.x.txt +2 -2
  464. data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +2 -0
  465. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api.txt +3 -0
  466. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +42 -0
  467. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/overview.txt +54 -0
  468. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +1 -1
  469. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_create.txt +2 -2
  470. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +1 -1
  471. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +3 -3
  472. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/delete.txt +4 -4
  473. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/load.txt +5 -5
  474. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +4 -4
  475. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +173 -0
  476. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +112 -0
  477. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +7 -6
  478. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +64 -0
  479. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +63 -0
  480. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +11 -1
  481. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +3 -2
  482. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +1 -1
  483. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +1 -1
  484. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +17 -17
  485. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/suggest.txt +12 -12
  486. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +7 -7
  487. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +4 -4
  488. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +6 -6
  489. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +1 -1
  490. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga.txt +47 -26
  491. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/between.txt +5 -5
  492. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_distance.txt +3 -3
  493. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +6 -6
  494. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +1 -1
  495. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/html_untag.txt +1 -1
  496. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/in_values.txt +54 -2
  497. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +4 -4
  498. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/sub_filter.txt +4 -4
  499. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
  500. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
  501. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +2 -0
  502. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/normalizers.txt +4 -0
  503. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations.txt +2 -1
  504. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +403 -0
  505. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorer.txt +217 -0
  506. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
  507. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
  508. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scoring_note.txt +13 -0
  509. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tables.txt +8 -0
  510. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +530 -16
  511. data/vendor/local/share/doc/groonga/ja/html/_sources/server.txt +2 -1
  512. data/vendor/local/share/doc/groonga/ja/html/_sources/server/memcached.txt +15 -0
  513. data/vendor/local/share/doc/groonga/ja/html/_sources/spec/gqtp.txt +66 -1
  514. data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
  515. data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/network.txt +0 -81
  516. data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +6 -68
  517. data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +1 -26
  518. data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
  519. data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
  520. data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
  521. data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +9404 -4
  522. data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
  523. data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
  524. data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +2 -2
  525. data/vendor/local/share/doc/groonga/ja/html/_static/underscore.js +1415 -31
  526. data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
  527. data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
  528. data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +15 -15
  529. data/vendor/local/share/doc/groonga/ja/html/characteristic.html +17 -19
  530. data/vendor/local/share/doc/groonga/ja/html/client.html +22 -24
  531. data/vendor/local/share/doc/groonga/ja/html/community.html +17 -19
  532. data/vendor/local/share/doc/groonga/ja/html/contribution.html +18 -20
  533. data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +17 -19
  534. data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +17 -19
  535. data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +17 -19
  536. data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +17 -19
  537. data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +51 -38
  538. data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +17 -19
  539. data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +17 -19
  540. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +17 -19
  541. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +17 -19
  542. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +17 -19
  543. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +24 -26
  544. data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -19
  545. data/vendor/local/share/doc/groonga/ja/html/development.html +17 -19
  546. data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +20 -22
  547. data/vendor/local/share/doc/groonga/ja/html/genindex.html +26 -14
  548. data/vendor/local/share/doc/groonga/ja/html/index.html +150 -130
  549. data/vendor/local/share/doc/groonga/ja/html/install.html +32 -34
  550. data/vendor/local/share/doc/groonga/ja/html/install/centos.html +31 -33
  551. data/vendor/local/share/doc/groonga/ja/html/install/debian.html +25 -27
  552. data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +29 -31
  553. data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +22 -24
  554. data/vendor/local/share/doc/groonga/ja/html/install/others.html +78 -80
  555. data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +21 -23
  556. data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +26 -28
  557. data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -31
  558. data/vendor/local/share/doc/groonga/ja/html/limitations.html +17 -19
  559. data/vendor/local/share/doc/groonga/ja/html/news.html +210 -27
  560. data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +17 -19
  561. data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +19 -21
  562. data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +17 -19
  563. data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +32 -34
  564. data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +27 -29
  565. data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +91 -93
  566. data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +59 -61
  567. data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +89 -91
  568. data/vendor/local/share/doc/groonga/ja/html/news/senna.html +17 -19
  569. data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
  570. data/vendor/local/share/doc/groonga/ja/html/reference.html +139 -118
  571. data/vendor/local/share/doc/groonga/ja/html/reference/api.html +51 -52
  572. data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +49 -51
  573. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +55 -57
  574. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +80 -82
  575. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +42 -44
  576. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +37 -39
  577. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +126 -76
  578. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +48 -50
  579. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +44 -46
  580. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +74 -76
  581. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +42 -44
  582. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +44 -46
  583. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +42 -44
  584. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +41 -43
  585. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +41 -43
  586. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +40 -42
  587. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +89 -91
  588. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +44 -46
  589. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +39 -41
  590. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +75 -77
  591. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +64 -66
  592. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +40 -42
  593. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +39 -41
  594. data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +197 -0
  595. data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +58 -60
  596. data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +17 -19
  597. data/vendor/local/share/doc/groonga/ja/html/reference/column.html +17 -19
  598. data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +17 -19
  599. data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +22 -24
  600. data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +17 -19
  601. data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +48 -50
  602. data/vendor/local/share/doc/groonga/ja/html/reference/command.html +56 -54
  603. data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +17 -19
  604. data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +43 -45
  605. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +22 -24
  606. data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +93 -95
  607. data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +35 -37
  608. data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +43 -45
  609. data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +23 -25
  610. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +39 -41
  611. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +62 -64
  612. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +24 -26
  613. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +38 -40
  614. data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +37 -39
  615. data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +24 -26
  616. data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +32 -34
  617. data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +22 -24
  618. data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +33 -35
  619. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +48 -46
  620. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +23 -25
  621. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +24 -26
  622. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +26 -28
  623. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +314 -0
  624. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +250 -0
  625. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +80 -81
  626. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +46 -48
  627. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +188 -0
  628. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +190 -0
  629. data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +38 -40
  630. data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +37 -39
  631. data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +57 -47
  632. data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +71 -73
  633. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +53 -55
  634. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +53 -55
  635. data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +394 -396
  636. data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +37 -39
  637. data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +38 -40
  638. data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +78 -80
  639. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +123 -125
  640. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -51
  641. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +39 -41
  642. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +61 -63
  643. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +89 -91
  644. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +46 -48
  645. data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +51 -53
  646. data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -21
  647. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +35 -37
  648. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +21 -23
  649. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +26 -28
  650. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +61 -63
  651. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +17 -19
  652. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
  653. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +42 -44
  654. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +34 -36
  655. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +126 -90
  656. data/vendor/local/share/doc/groonga/ja/html/reference/function.html +42 -44
  657. data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +63 -65
  658. data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +44 -46
  659. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +94 -96
  660. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +55 -57
  661. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +45 -47
  662. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +66 -68
  663. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +55 -57
  664. data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +53 -55
  665. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +135 -44
  666. data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +40 -42
  667. data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +70 -72
  668. data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +43 -45
  669. data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +53 -55
  670. data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +56 -62
  671. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +36 -38
  672. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +229 -171
  673. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +381 -322
  674. data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +34 -36
  675. data/vendor/local/share/doc/groonga/ja/html/reference/log.html +38 -40
  676. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +38 -40
  677. data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +28 -30
  678. data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +32 -34
  679. data/vendor/local/share/doc/groonga/ja/html/reference/output.html +28 -30
  680. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +17 -19
  681. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +39 -41
  682. data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +878 -0
  683. data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +442 -0
  684. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +154 -0
  685. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +287 -0
  686. data/vendor/local/share/doc/groonga/ja/html/reference/scoring_note.html +115 -0
  687. data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +45 -47
  688. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +48 -50
  689. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +40 -42
  690. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +38 -40
  691. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +40 -42
  692. data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +42 -44
  693. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +37 -39
  694. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +1300 -34
  695. data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +57 -59
  696. data/vendor/local/share/doc/groonga/ja/html/reference/types.html +38 -40
  697. data/vendor/local/share/doc/groonga/ja/html/search.html +11 -11
  698. data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
  699. data/vendor/local/share/doc/groonga/ja/html/server.html +23 -24
  700. data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +28 -30
  701. data/vendor/local/share/doc/groonga/ja/html/server/http.html +42 -44
  702. data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +62 -64
  703. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +30 -32
  704. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +29 -31
  705. data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +138 -0
  706. data/vendor/local/share/doc/groonga/ja/html/server/package.html +35 -37
  707. data/vendor/local/share/doc/groonga/ja/html/spec.html +22 -24
  708. data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +207 -128
  709. data/vendor/local/share/doc/groonga/ja/html/spec/search.html +17 -19
  710. data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +17 -19
  711. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
  712. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
  713. data/vendor/local/share/doc/groonga/ja/html/tutorial.html +21 -25
  714. data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +17 -19
  715. data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +30 -32
  716. data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +17 -19
  717. data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +17 -19
  718. data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +17 -19
  719. data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +23 -25
  720. data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +47 -49
  721. data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +27 -125
  722. data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -20
  723. data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +20 -22
  724. data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +31 -33
  725. data/vendor/local/share/license/mruby/README.md +2 -2
  726. data/vendor/local/share/man/ja/man1/groonga.1 +6205 -2251
  727. data/vendor/local/share/man/man1/groonga.1 +7210 -3029
  728. metadata +75 -11
  729. data/doc/text/news.textile +0 -1217
  730. data/vendor/local/share/doc/groonga/en/html/_static/jquery-1.11.1.js +0 -10308
  731. data/vendor/local/share/doc/groonga/en/html/_static/underscore-1.3.1.js +0 -999
  732. data/vendor/local/share/doc/groonga/ja/html/_static/jquery-1.11.1.js +0 -10308
  733. data/vendor/local/share/doc/groonga/ja/html/_static/underscore-1.3.1.js +0 -999
@@ -7,7 +7,7 @@
7
7
  <head>
8
8
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
9
9
 
10
- <title>7.8. Tokenizers &mdash; Groonga v5.0.0 documentation</title>
10
+ <title>7.8. Tokenizers &mdash; Groonga v5.0.1-42-g4d10df1 documentation</title>
11
11
 
12
12
  <link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
13
13
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
@@ -15,7 +15,7 @@
15
15
  <script type="text/javascript">
16
16
  var DOCUMENTATION_OPTIONS = {
17
17
  URL_ROOT: '../',
18
- VERSION: '5.0.0',
18
+ VERSION: '5.0.1-42-g4d10df1',
19
19
  COLLAPSE_INDEX: false,
20
20
  FILE_SUFFIX: '.html',
21
21
  HAS_SOURCE: true
@@ -25,12 +25,12 @@
25
25
  <script type="text/javascript" src="../_static/underscore.js"></script>
26
26
  <script type="text/javascript" src="../_static/doctools.js"></script>
27
27
  <link rel="shortcut icon" href="../_static/favicon.ico"/>
28
- <link rel="top" title="Groonga v5.0.0 documentation" href="../index.html" />
28
+ <link rel="top" title="Groonga v5.0.1-42-g4d10df1 documentation" href="../index.html" />
29
29
  <link rel="up" title="7. Reference manual" href="../reference.html" />
30
30
  <link rel="next" title="7.9. Token filters" href="token_filters.html" />
31
31
  <link rel="prev" title="7.7. Normalizers" href="normalizers.html" />
32
32
  </head>
33
- <body role="document">
33
+ <body>
34
34
  <div class="header">
35
35
  <h1 class="title">
36
36
  <a id="top-link" href="../index.html">
@@ -48,7 +48,7 @@
48
48
  </div>
49
49
 
50
50
 
51
- <div class="related" role="navigation" aria-label="related navigation">
51
+ <div class="related">
52
52
  <h3>Navigation</h3>
53
53
  <ul>
54
54
  <li class="right" style="margin-right: 10px">
@@ -60,7 +60,7 @@
60
60
  <li class="right" >
61
61
  <a href="normalizers.html" title="7.7. Normalizers"
62
62
  accesskey="P">previous</a> |</li>
63
- <li><a href="../index.html">Groonga v5.0.0 documentation</a> &raquo;</li>
63
+ <li><a href="../index.html">Groonga v5.0.1-42-g4d10df1 documentation</a> &raquo;</li>
64
64
  <li><a href="../reference.html" accesskey="U">7. Reference manual</a> &raquo;</li>
65
65
  </ul>
66
66
  </div>
@@ -68,48 +68,1408 @@
68
68
  <div class="document">
69
69
  <div class="documentwrapper">
70
70
  <div class="bodywrapper">
71
- <div class="body" role="main">
71
+ <div class="body">
72
72
 
73
73
  <div class="section" id="tokenizers">
74
74
  <h1>7.8. Tokenizers<a class="headerlink" href="#tokenizers" title="Permalink to this headline">¶</a></h1>
75
- <p>TODO: Write me.</p>
76
- <p>Here are the list of built-in tokenizers:</p>
77
- <ul class="simple">
78
- <li>TokenBigram</li>
79
- <li>TokenBigramSplitSymbol</li>
80
- <li>TokenBigramSplitSymbolAlpha</li>
81
- <li>TokenBigramSplitSymbolAlphaDigit</li>
82
- <li>TokenBigramIgnoreBlank</li>
83
- <li>TokenBigramIgnoreBlankSplitSymbol</li>
84
- <li>TokenBigramIgnoreBlankSplitAlpha</li>
85
- <li>TokenBigramIgnoreBlankSplitAlphaDigit</li>
86
- <li>TokenDelimit</li>
87
- <li>TokenDelimitNull</li>
88
- <li>TokenTrigram</li>
89
- <li>TokenUnigram</li>
75
+ <div class="section" id="summary">
76
+ <h2>7.8.1. Summary<a class="headerlink" href="#summary" title="Permalink to this headline">¶</a></h2>
77
+ <p>Groonga has tokenizer module that tokenizes text. It is used when
78
+ the following cases:</p>
79
+ <blockquote>
80
+ <div><ul>
81
+ <li><p class="first">Indexing text</p>
82
+ <div class="figure align-center">
83
+ <a class="reference internal image-reference" href="../_images/used-when-indexing.png"><img alt="../_images/used-when-indexing.png" src="../_images/used-when-indexing.png" style="width: 80%;" /></a>
84
+ <p class="caption">Tokenizer is used when indexing text.</p>
85
+ </div>
86
+ </li>
87
+ <li><p class="first">Searching by query</p>
88
+ <div class="figure align-center">
89
+ <a class="reference internal image-reference" href="../_images/used-when-searching.png"><img alt="../_images/used-when-searching.png" src="../_images/used-when-searching.png" style="width: 80%;" /></a>
90
+ <p class="caption">Tokenizer is used when searching by query.</p>
91
+ </div>
92
+ </li>
93
+ </ul>
94
+ </div></blockquote>
95
+ <p>Tokenizer is an important module for full-text search. You can change
96
+ trade-off between <a class="reference external" href="http://en.wikipedia.org/wiki/Precision_and_recall">precision and recall</a> by changing
97
+ tokenizer.</p>
98
+ <p>Normally, <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> is a suitable tokenizer. If you don't
99
+ know much about tokenizer, it's recommended that you choose
100
+ <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>.</p>
101
+ <p>You can try a tokenizer by <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> and
102
+ <a class="reference internal" href="commands/table_tokenize.html"><em>table_tokenize</em></a>. Here is an example to
103
+ try <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> tokenizer by
104
+ <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a>:</p>
105
+ <p>Execution example:</p>
106
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot;
107
+ # [
108
+ # [
109
+ # 0,
110
+ # 1337566253.89858,
111
+ # 0.000355720520019531
112
+ # ],
113
+ # [
114
+ # {
115
+ # &quot;position&quot;: 0,
116
+ # &quot;value&quot;: &quot;He&quot;
117
+ # },
118
+ # {
119
+ # &quot;position&quot;: 1,
120
+ # &quot;value&quot;: &quot;el&quot;
121
+ # },
122
+ # {
123
+ # &quot;position&quot;: 2,
124
+ # &quot;value&quot;: &quot;ll&quot;
125
+ # },
126
+ # {
127
+ # &quot;position&quot;: 3,
128
+ # &quot;value&quot;: &quot;lo&quot;
129
+ # },
130
+ # {
131
+ # &quot;position&quot;: 4,
132
+ # &quot;value&quot;: &quot;o &quot;
133
+ # },
134
+ # {
135
+ # &quot;position&quot;: 5,
136
+ # &quot;value&quot;: &quot; W&quot;
137
+ # },
138
+ # {
139
+ # &quot;position&quot;: 6,
140
+ # &quot;value&quot;: &quot;Wo&quot;
141
+ # },
142
+ # {
143
+ # &quot;position&quot;: 7,
144
+ # &quot;value&quot;: &quot;or&quot;
145
+ # },
146
+ # {
147
+ # &quot;position&quot;: 8,
148
+ # &quot;value&quot;: &quot;rl&quot;
149
+ # },
150
+ # {
151
+ # &quot;position&quot;: 9,
152
+ # &quot;value&quot;: &quot;ld&quot;
153
+ # },
154
+ # {
155
+ # &quot;position&quot;: 10,
156
+ # &quot;value&quot;: &quot;d&quot;
157
+ # }
158
+ # ]
159
+ # ]
160
+ </pre></div>
161
+ </div>
162
+ </div>
163
+ <div class="section" id="what-is-tokenize">
164
+ <h2>7.8.2. What is &quot;tokenize&quot;?<a class="headerlink" href="#what-is-tokenize" title="Permalink to this headline">¶</a></h2>
165
+ <p>&quot;tokenize&quot; is the process that extracts zero or more tokens from a
166
+ text. There are some &quot;tokenize&quot; methods.</p>
167
+ <p>For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to the following tokens by
168
+ bigram tokenize method:</p>
169
+ <blockquote>
170
+ <div><ul class="simple">
171
+ <li><tt class="docutils literal"><span class="pre">He</span></tt></li>
172
+ <li><tt class="docutils literal"><span class="pre">el</span></tt></li>
173
+ <li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
174
+ <li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
175
+ <li><tt class="docutils literal"><span class="pre">o_</span></tt> (<tt class="docutils literal"><span class="pre">_</span></tt> means a white-space)</li>
176
+ <li><tt class="docutils literal"><span class="pre">_W</span></tt> (<tt class="docutils literal"><span class="pre">_</span></tt> means a white-space)</li>
177
+ <li><tt class="docutils literal"><span class="pre">Wo</span></tt></li>
178
+ <li><tt class="docutils literal"><span class="pre">or</span></tt></li>
179
+ <li><tt class="docutils literal"><span class="pre">rl</span></tt></li>
180
+ <li><tt class="docutils literal"><span class="pre">ld</span></tt></li>
181
+ </ul>
182
+ </div></blockquote>
183
+ <p>In the above example, 10 tokens are extracted from one text <tt class="docutils literal"><span class="pre">Hello</span>
184
+ <span class="pre">World</span></tt>.</p>
185
+ <p>For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to the following tokens by
186
+ white-space-separate tokenize method:</p>
187
+ <blockquote>
188
+ <div><ul class="simple">
189
+ <li><tt class="docutils literal"><span class="pre">Hello</span></tt></li>
190
+ <li><tt class="docutils literal"><span class="pre">World</span></tt></li>
191
+ </ul>
192
+ </div></blockquote>
193
+ <p>In the above example, 2 tokens are extracted from one text <tt class="docutils literal"><span class="pre">Hello</span>
194
+ <span class="pre">World</span></tt>.</p>
195
+ <p>Token is used as search key. You can find indexed documents only by
196
+ tokens that are extracted by used tokenize method. For example, you
197
+ can find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> by <tt class="docutils literal"><span class="pre">ll</span></tt> with bigram tokenize method but you
198
+ can't find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> by <tt class="docutils literal"><span class="pre">ll</span></tt> with white-space-separate tokenize
199
+ method. Because white-space-separate tokenize method doesn't extract
200
+ <tt class="docutils literal"><span class="pre">ll</span></tt> token. It just extracts <tt class="docutils literal"><span class="pre">Hello</span></tt> and <tt class="docutils literal"><span class="pre">World</span></tt> tokens.</p>
201
+ <p>In general, tokenize method that generates small tokens increases
202
+ recall but decreases precision. Tokenize method that generates large
203
+ tokens increases precision but decreases recall.</p>
204
+ <p>For example, we can find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> and <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> by <tt class="docutils literal"><span class="pre">or</span></tt> with
205
+ bigram tokenize method. <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is a noise for people who
206
+ wants to search &quot;logical and&quot;. It means that precision is
207
+ decreased. But recall is increased.</p>
208
+ <p>We can find only <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> by <tt class="docutils literal"><span class="pre">or</span></tt> with white-space-separate
209
+ tokenize method. Because <tt class="docutils literal"><span class="pre">World</span></tt> is tokenized to one token <tt class="docutils literal"><span class="pre">World</span></tt>
210
+ with white-space-separate tokenize method. It means that precision is
211
+ increased for people who wants to search &quot;logical and&quot;. But recall is
212
+ decreased because <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> that contains <tt class="docutils literal"><span class="pre">or</span></tt> isn't found.</p>
213
+ </div>
214
+ <div class="section" id="built-in-tokenizsers">
215
+ <h2>7.8.3. Built-in tokenizsers<a class="headerlink" href="#built-in-tokenizsers" title="Permalink to this headline">¶</a></h2>
216
+ <p>Here is a list of built-in tokenizers:</p>
217
+ <blockquote>
218
+ <div><ul class="simple">
219
+ <li><tt class="docutils literal"><span class="pre">TokenBigram</span></tt></li>
220
+ <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></li>
221
+ <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></li>
222
+ <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></li>
223
+ <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></li>
224
+ <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></li>
225
+ <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlpha</span></tt></li>
226
+ <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlphaDigit</span></tt></li>
227
+ <li><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></li>
228
+ <li><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></li>
229
+ <li><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></li>
230
+ <li><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></li>
231
+ <li><tt class="docutils literal"><span class="pre">TokenMecab</span></tt></li>
232
+ <li><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></li>
90
233
  </ul>
234
+ </div></blockquote>
235
+ <div class="section" id="tokenbigram">
236
+ <span id="token-bigram"></span><h3>7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt><a class="headerlink" href="#tokenbigram" title="Permalink to this headline">¶</a></h3>
237
+ <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> is a bigram based tokenizer. It's recommended to use
238
+ this tokenizer for most cases.</p>
239
+ <p>Bigram tokenize method tokenizes a text to two adjacent characters
240
+ tokens. For example, <tt class="docutils literal"><span class="pre">Hello</span></tt> is tokenized to the following tokens:</p>
241
+ <blockquote>
242
+ <div><ul class="simple">
243
+ <li><tt class="docutils literal"><span class="pre">He</span></tt></li>
244
+ <li><tt class="docutils literal"><span class="pre">el</span></tt></li>
245
+ <li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
246
+ <li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
247
+ </ul>
248
+ </div></blockquote>
249
+ <p>Bigram tokenize method is good for recall because you can find all
250
+ texts by query consists of two or more characters.</p>
251
+ <p>In general, you can't find all texts by query consists of one
252
+ character because one character token doesn't exist. But you can find
253
+ all texts by query consists of one character in Groonga. Because
254
+ Groonga find tokens that start with query by predictive search. For
255
+ example, Groonga can find <tt class="docutils literal"><span class="pre">ll</span></tt> and <tt class="docutils literal"><span class="pre">lo</span></tt> tokens by <tt class="docutils literal"><span class="pre">l</span></tt> query.</p>
256
+ <p>Bigram tokenize method isn't good for precision because you can find
257
+ texts that includes query in word. For example, you can find <tt class="docutils literal"><span class="pre">world</span></tt>
258
+ by <tt class="docutils literal"><span class="pre">or</span></tt>. This is more sensitive for ASCII only languages rather than
259
+ non-ASCII languages. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> has solution for this problem
260
+ described in the bellow.</p>
261
+ <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> behavior is different when it's worked with any
262
+ <a class="reference internal" href="normalizers.html"><em>Normalizers</em></a>.</p>
263
+ <p>If no normalizer is used, <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses pure bigram (all tokens
264
+ except the last token have two characters) tokenize method:</p>
265
+ <p>Execution example:</p>
266
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot;
267
+ # [
268
+ # [
269
+ # 0,
270
+ # 1337566253.89858,
271
+ # 0.000355720520019531
272
+ # ],
273
+ # [
274
+ # {
275
+ # &quot;position&quot;: 0,
276
+ # &quot;value&quot;: &quot;He&quot;
277
+ # },
278
+ # {
279
+ # &quot;position&quot;: 1,
280
+ # &quot;value&quot;: &quot;el&quot;
281
+ # },
282
+ # {
283
+ # &quot;position&quot;: 2,
284
+ # &quot;value&quot;: &quot;ll&quot;
285
+ # },
286
+ # {
287
+ # &quot;position&quot;: 3,
288
+ # &quot;value&quot;: &quot;lo&quot;
289
+ # },
290
+ # {
291
+ # &quot;position&quot;: 4,
292
+ # &quot;value&quot;: &quot;o &quot;
293
+ # },
294
+ # {
295
+ # &quot;position&quot;: 5,
296
+ # &quot;value&quot;: &quot; W&quot;
297
+ # },
298
+ # {
299
+ # &quot;position&quot;: 6,
300
+ # &quot;value&quot;: &quot;Wo&quot;
301
+ # },
302
+ # {
303
+ # &quot;position&quot;: 7,
304
+ # &quot;value&quot;: &quot;or&quot;
305
+ # },
306
+ # {
307
+ # &quot;position&quot;: 8,
308
+ # &quot;value&quot;: &quot;rl&quot;
309
+ # },
310
+ # {
311
+ # &quot;position&quot;: 9,
312
+ # &quot;value&quot;: &quot;ld&quot;
313
+ # },
314
+ # {
315
+ # &quot;position&quot;: 10,
316
+ # &quot;value&quot;: &quot;d&quot;
317
+ # }
318
+ # ]
319
+ # ]
320
+ </pre></div>
321
+ </div>
322
+ <p>If normalizer is used, <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses white-space-separate like
323
+ tokenize method for ASCII characters. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses bigram
324
+ tokenize method for non-ASCII characters.</p>
325
+ <p>You may be confused with this combined behavior. But it's reasonable
326
+ for most use cases such as English text (only ASCII characters) and
327
+ Japanese text (ASCII and non-ASCII characters are mixed).</p>
328
+ <p>Most languages consists of only ASCII characters use white-space for
329
+ word separator. White-space-separate tokenize method is suitable for
330
+ the case.</p>
331
+ <p>Languages consists of non-ASCII characters don't use white-space for
332
+ word separator. Bigram tokenize method is suitable for the case.</p>
333
+ <p>Mixed tokenize method is suitable for mixed language case.</p>
334
+ <p>If you want to use bigram tokenize method for ASCII character, see
335
+ <tt class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></tt> type tokenizers such as
336
+ <a class="reference internal" href="#token-bigram-split-symbol-alpha"><em>TokenBigramSplitSymbolAlpha</em></a>.</p>
337
+ <p>Let's confirm <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> behavior by example.</p>
338
+ <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses one or more white-spaces as token delimiter for
339
+ ASCII characters:</p>
340
+ <p>Execution example:</p>
341
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot; NormalizerAuto
342
+ # [
343
+ # [
344
+ # 0,
345
+ # 1337566253.89858,
346
+ # 0.000355720520019531
347
+ # ],
348
+ # [
349
+ # {
350
+ # &quot;position&quot;: 0,
351
+ # &quot;value&quot;: &quot;hello&quot;
352
+ # },
353
+ # {
354
+ # &quot;position&quot;: 1,
355
+ # &quot;value&quot;: &quot;world&quot;
356
+ # }
357
+ # ]
358
+ # ]
359
+ </pre></div>
360
+ </div>
361
+ <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses character type change as token delimiter for
362
+ ASCII characters. Character type is one of them:</p>
363
+ <blockquote>
364
+ <div><ul class="simple">
365
+ <li>Alphabet</li>
366
+ <li>Digit</li>
367
+ <li>Symbol (such as <tt class="docutils literal"><span class="pre">(</span></tt>, <tt class="docutils literal"><span class="pre">)</span></tt> and <tt class="docutils literal"><span class="pre">!</span></tt>)</li>
368
+ <li>Hiragana</li>
369
+ <li>Katakana</li>
370
+ <li>Kanji</li>
371
+ <li>Others</li>
372
+ </ul>
373
+ </div></blockquote>
374
+ <p>The following example shows two token delimiters:</p>
375
+ <blockquote>
376
+ <div><ul class="simple">
377
+ <li>at between <tt class="docutils literal"><span class="pre">100</span></tt> (digits) and <tt class="docutils literal"><span class="pre">cents</span></tt> (alphabets)</li>
378
+ <li>at between <tt class="docutils literal"><span class="pre">cents</span></tt> (alphabets) and <tt class="docutils literal"><span class="pre">!!!</span></tt> (symbols)</li>
379
+ </ul>
380
+ </div></blockquote>
381
+ <p>Execution example:</p>
382
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;100cents!!!&quot; NormalizerAuto
383
+ # [
384
+ # [
385
+ # 0,
386
+ # 1337566253.89858,
387
+ # 0.000355720520019531
388
+ # ],
389
+ # [
390
+ # {
391
+ # &quot;position&quot;: 0,
392
+ # &quot;value&quot;: &quot;100&quot;
393
+ # },
394
+ # {
395
+ # &quot;position&quot;: 1,
396
+ # &quot;value&quot;: &quot;cents&quot;
397
+ # },
398
+ # {
399
+ # &quot;position&quot;: 2,
400
+ # &quot;value&quot;: &quot;!!!&quot;
401
+ # }
402
+ # ]
403
+ # ]
404
+ </pre></div>
405
+ </div>
406
+ <p>Here is an example that <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses bigram tokenize method
407
+ for non-ASCII characters.</p>
408
+ <p>Execution example:</p>
409
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日本語の勉強&quot; NormalizerAuto
410
+ # [
411
+ # [
412
+ # 0,
413
+ # 1337566253.89858,
414
+ # 0.000355720520019531
415
+ # ],
416
+ # [
417
+ # {
418
+ # &quot;position&quot;: 0,
419
+ # &quot;value&quot;: &quot;日本&quot;
420
+ # },
421
+ # {
422
+ # &quot;position&quot;: 1,
423
+ # &quot;value&quot;: &quot;本語&quot;
424
+ # },
425
+ # {
426
+ # &quot;position&quot;: 2,
427
+ # &quot;value&quot;: &quot;語の&quot;
428
+ # },
429
+ # {
430
+ # &quot;position&quot;: 3,
431
+ # &quot;value&quot;: &quot;の勉&quot;
432
+ # },
433
+ # {
434
+ # &quot;position&quot;: 4,
435
+ # &quot;value&quot;: &quot;勉強&quot;
436
+ # },
437
+ # {
438
+ # &quot;position&quot;: 5,
439
+ # &quot;value&quot;: &quot;強&quot;
440
+ # }
441
+ # ]
442
+ # ]
443
+ </pre></div>
444
+ </div>
445
+ </div>
446
+ <div class="section" id="tokenbigramsplitsymbol">
447
+ <span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramsplitsymbol" title="Permalink to this headline">¶</a></h3>
448
+ <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
449
+ difference between them is symbol handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt>
450
+ tokenizes symbols by bigram tokenize method:</p>
451
+ <p>Execution example:</p>
452
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbol &quot;100cents!!!&quot; NormalizerAuto
453
+ # [
454
+ # [
455
+ # 0,
456
+ # 1337566253.89858,
457
+ # 0.000355720520019531
458
+ # ],
459
+ # [
460
+ # {
461
+ # &quot;position&quot;: 0,
462
+ # &quot;value&quot;: &quot;100&quot;
463
+ # },
464
+ # {
465
+ # &quot;position&quot;: 1,
466
+ # &quot;value&quot;: &quot;cents&quot;
467
+ # },
468
+ # {
469
+ # &quot;position&quot;: 2,
470
+ # &quot;value&quot;: &quot;!!&quot;
471
+ # },
472
+ # {
473
+ # &quot;position&quot;: 3,
474
+ # &quot;value&quot;: &quot;!!&quot;
475
+ # },
476
+ # {
477
+ # &quot;position&quot;: 4,
478
+ # &quot;value&quot;: &quot;!&quot;
479
+ # }
480
+ # ]
481
+ # ]
482
+ </pre></div>
483
+ </div>
484
+ </div>
485
+ <div class="section" id="tokenbigramsplitsymbolalpha">
486
+ <span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
487
+ <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
488
+ difference between them is symbol and alphabet
489
+ handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> tokenizes symbols and
490
+ alphabets by bigram tokenize method:</p>
491
+ <p>Execution example:</p>
492
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlpha &quot;100cents!!!&quot; NormalizerAuto
493
+ # [
494
+ # [
495
+ # 0,
496
+ # 1337566253.89858,
497
+ # 0.000355720520019531
498
+ # ],
499
+ # [
500
+ # {
501
+ # &quot;position&quot;: 0,
502
+ # &quot;value&quot;: &quot;100&quot;
503
+ # },
504
+ # {
505
+ # &quot;position&quot;: 1,
506
+ # &quot;value&quot;: &quot;ce&quot;
507
+ # },
508
+ # {
509
+ # &quot;position&quot;: 2,
510
+ # &quot;value&quot;: &quot;en&quot;
511
+ # },
512
+ # {
513
+ # &quot;position&quot;: 3,
514
+ # &quot;value&quot;: &quot;nt&quot;
515
+ # },
516
+ # {
517
+ # &quot;position&quot;: 4,
518
+ # &quot;value&quot;: &quot;ts&quot;
519
+ # },
520
+ # {
521
+ # &quot;position&quot;: 5,
522
+ # &quot;value&quot;: &quot;s!&quot;
523
+ # },
524
+ # {
525
+ # &quot;position&quot;: 6,
526
+ # &quot;value&quot;: &quot;!!&quot;
527
+ # },
528
+ # {
529
+ # &quot;position&quot;: 7,
530
+ # &quot;value&quot;: &quot;!!&quot;
531
+ # },
532
+ # {
533
+ # &quot;position&quot;: 8,
534
+ # &quot;value&quot;: &quot;!&quot;
535
+ # }
536
+ # ]
537
+ # ]
538
+ </pre></div>
539
+ </div>
540
+ </div>
541
+ <div class="section" id="tokenbigramsplitsymbolalphadigit">
542
+ <span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
543
+ <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> is similar to
544
+ <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The difference between them is symbol, alphabet
545
+ and digit handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> tokenizes
546
+ symbols, alphabets and digits by bigram tokenize method. It means that
547
+ all characters are tokenized by bigram tokenize method:</p>
548
+ <p>Execution example:</p>
549
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlphaDigit &quot;100cents!!!&quot; NormalizerAuto
550
+ # [
551
+ # [
552
+ # 0,
553
+ # 1337566253.89858,
554
+ # 0.000355720520019531
555
+ # ],
556
+ # [
557
+ # {
558
+ # &quot;position&quot;: 0,
559
+ # &quot;value&quot;: &quot;10&quot;
560
+ # },
561
+ # {
562
+ # &quot;position&quot;: 1,
563
+ # &quot;value&quot;: &quot;00&quot;
564
+ # },
565
+ # {
566
+ # &quot;position&quot;: 2,
567
+ # &quot;value&quot;: &quot;0c&quot;
568
+ # },
569
+ # {
570
+ # &quot;position&quot;: 3,
571
+ # &quot;value&quot;: &quot;ce&quot;
572
+ # },
573
+ # {
574
+ # &quot;position&quot;: 4,
575
+ # &quot;value&quot;: &quot;en&quot;
576
+ # },
577
+ # {
578
+ # &quot;position&quot;: 5,
579
+ # &quot;value&quot;: &quot;nt&quot;
580
+ # },
581
+ # {
582
+ # &quot;position&quot;: 6,
583
+ # &quot;value&quot;: &quot;ts&quot;
584
+ # },
585
+ # {
586
+ # &quot;position&quot;: 7,
587
+ # &quot;value&quot;: &quot;s!&quot;
588
+ # },
589
+ # {
590
+ # &quot;position&quot;: 8,
591
+ # &quot;value&quot;: &quot;!!&quot;
592
+ # },
593
+ # {
594
+ # &quot;position&quot;: 9,
595
+ # &quot;value&quot;: &quot;!!&quot;
596
+ # },
597
+ # {
598
+ # &quot;position&quot;: 10,
599
+ # &quot;value&quot;: &quot;!&quot;
600
+ # }
601
+ # ]
602
+ # ]
603
+ </pre></div>
604
+ </div>
605
+ </div>
606
+ <div class="section" id="tokenbigramignoreblank">
607
+ <span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt><a class="headerlink" href="#tokenbigramignoreblank" title="Permalink to this headline">¶</a></h3>
608
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
609
+ difference between them is blank handling. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt>
610
+ ignores white-spaces in continuous symbols and non-ASCII characters.</p>
611
+ <p>You can find difference of them by <tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
612
+ has symbols and non-ASCII characters.</p>
613
+ <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
614
+ <p>Execution example:</p>
615
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
616
+ # [
617
+ # [
618
+ # 0,
619
+ # 1337566253.89858,
620
+ # 0.000355720520019531
621
+ # ],
622
+ # [
623
+ # {
624
+ # &quot;position&quot;: 0,
625
+ # &quot;value&quot;: &quot;日&quot;
626
+ # },
627
+ # {
628
+ # &quot;position&quot;: 1,
629
+ # &quot;value&quot;: &quot;本&quot;
630
+ # },
631
+ # {
632
+ # &quot;position&quot;: 2,
633
+ # &quot;value&quot;: &quot;語&quot;
634
+ # },
635
+ # {
636
+ # &quot;position&quot;: 3,
637
+ # &quot;value&quot;: &quot;!&quot;
638
+ # },
639
+ # {
640
+ # &quot;position&quot;: 4,
641
+ # &quot;value&quot;: &quot;!&quot;
642
+ # },
643
+ # {
644
+ # &quot;position&quot;: 5,
645
+ # &quot;value&quot;: &quot;!&quot;
646
+ # }
647
+ # ]
648
+ # ]
649
+ </pre></div>
650
+ </div>
651
+ <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt>:</p>
652
+ <p>Execution example:</p>
653
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlank &quot;日 本 語 ! ! !&quot; NormalizerAuto
654
+ # [
655
+ # [
656
+ # 0,
657
+ # 1337566253.89858,
658
+ # 0.000355720520019531
659
+ # ],
660
+ # [
661
+ # {
662
+ # &quot;position&quot;: 0,
663
+ # &quot;value&quot;: &quot;日本&quot;
664
+ # },
665
+ # {
666
+ # &quot;position&quot;: 1,
667
+ # &quot;value&quot;: &quot;本語&quot;
668
+ # },
669
+ # {
670
+ # &quot;position&quot;: 2,
671
+ # &quot;value&quot;: &quot;語&quot;
672
+ # },
673
+ # {
674
+ # &quot;position&quot;: 3,
675
+ # &quot;value&quot;: &quot;!!!&quot;
676
+ # }
677
+ # ]
678
+ # ]
679
+ </pre></div>
680
+ </div>
681
+ </div>
682
+ <div class="section" id="tokenbigramignoreblanksplitsymbol">
683
+ <span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="Permalink to this headline">¶</a></h3>
684
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> is similar to
685
+ <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
686
+ <blockquote>
687
+ <div><ul class="simple">
688
+ <li>Blank handling</li>
689
+ <li>Symbol handling</li>
690
+ </ul>
691
+ </div></blockquote>
692
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> ignores white-spaces in
693
+ continuous symbols and non-ASCII characters.</p>
694
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> tokenizes symbols by bigram
695
+ tokenize method.</p>
696
+ <p>You can find difference of them by <tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
697
+ has symbols and non-ASCII characters.</p>
698
+ <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
699
+ <p>Execution example:</p>
700
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
701
+ # [
702
+ # [
703
+ # 0,
704
+ # 1337566253.89858,
705
+ # 0.000355720520019531
706
+ # ],
707
+ # [
708
+ # {
709
+ # &quot;position&quot;: 0,
710
+ # &quot;value&quot;: &quot;日&quot;
711
+ # },
712
+ # {
713
+ # &quot;position&quot;: 1,
714
+ # &quot;value&quot;: &quot;本&quot;
715
+ # },
716
+ # {
717
+ # &quot;position&quot;: 2,
718
+ # &quot;value&quot;: &quot;語&quot;
719
+ # },
720
+ # {
721
+ # &quot;position&quot;: 3,
722
+ # &quot;value&quot;: &quot;!&quot;
723
+ # },
724
+ # {
725
+ # &quot;position&quot;: 4,
726
+ # &quot;value&quot;: &quot;!&quot;
727
+ # },
728
+ # {
729
+ # &quot;position&quot;: 5,
730
+ # &quot;value&quot;: &quot;!&quot;
731
+ # }
732
+ # ]
733
+ # ]
734
+ </pre></div>
735
+ </div>
736
+ <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt>:</p>
737
+ <p>Execution example:</p>
738
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbol &quot;日 本 語 ! ! !&quot; NormalizerAuto
739
+ # [
740
+ # [
741
+ # 0,
742
+ # 1337566253.89858,
743
+ # 0.000355720520019531
744
+ # ],
745
+ # [
746
+ # {
747
+ # &quot;position&quot;: 0,
748
+ # &quot;value&quot;: &quot;日本&quot;
749
+ # },
750
+ # {
751
+ # &quot;position&quot;: 1,
752
+ # &quot;value&quot;: &quot;本語&quot;
753
+ # },
754
+ # {
755
+ # &quot;position&quot;: 2,
756
+ # &quot;value&quot;: &quot;語!&quot;
757
+ # },
758
+ # {
759
+ # &quot;position&quot;: 3,
760
+ # &quot;value&quot;: &quot;!!&quot;
761
+ # },
762
+ # {
763
+ # &quot;position&quot;: 4,
764
+ # &quot;value&quot;: &quot;!!&quot;
765
+ # },
766
+ # {
767
+ # &quot;position&quot;: 5,
768
+ # &quot;value&quot;: &quot;!&quot;
769
+ # }
770
+ # ]
771
+ # ]
772
+ </pre></div>
773
+ </div>
774
+ </div>
775
+ <div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
776
+ <span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
777
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> is similar to
778
+ <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
779
+ <blockquote>
780
+ <div><ul class="simple">
781
+ <li>Blank handling</li>
782
+ <li>Symbol and alphabet handling</li>
783
+ </ul>
784
+ </div></blockquote>
785
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> ignores white-spaces in
786
+ continuous symbols and non-ASCII characters.</p>
787
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> tokenizes symbols and
788
+ alphabets by bigram tokenize method.</p>
789
+ <p>You can find difference of them by <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
790
+ has symbols and non-ASCII characters with white spaces and alphabets.</p>
791
+ <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
792
+ <p>Execution example:</p>
793
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
794
+ # [
795
+ # [
796
+ # 0,
797
+ # 1337566253.89858,
798
+ # 0.000355720520019531
799
+ # ],
800
+ # [
801
+ # {
802
+ # &quot;position&quot;: 0,
803
+ # &quot;value&quot;: &quot;hello&quot;
804
+ # },
805
+ # {
806
+ # &quot;position&quot;: 1,
807
+ # &quot;value&quot;: &quot;日&quot;
808
+ # },
809
+ # {
810
+ # &quot;position&quot;: 2,
811
+ # &quot;value&quot;: &quot;本&quot;
812
+ # },
813
+ # {
814
+ # &quot;position&quot;: 3,
815
+ # &quot;value&quot;: &quot;語&quot;
816
+ # },
817
+ # {
818
+ # &quot;position&quot;: 4,
819
+ # &quot;value&quot;: &quot;!&quot;
820
+ # },
821
+ # {
822
+ # &quot;position&quot;: 5,
823
+ # &quot;value&quot;: &quot;!&quot;
824
+ # },
825
+ # {
826
+ # &quot;position&quot;: 6,
827
+ # &quot;value&quot;: &quot;!&quot;
828
+ # }
829
+ # ]
830
+ # ]
831
+ </pre></div>
832
+ </div>
833
+ <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt>:</p>
834
+ <p>Execution example:</p>
835
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
836
+ # [
837
+ # [
838
+ # 0,
839
+ # 1337566253.89858,
840
+ # 0.000355720520019531
841
+ # ],
842
+ # [
843
+ # {
844
+ # &quot;position&quot;: 0,
845
+ # &quot;value&quot;: &quot;he&quot;
846
+ # },
847
+ # {
848
+ # &quot;position&quot;: 1,
849
+ # &quot;value&quot;: &quot;el&quot;
850
+ # },
851
+ # {
852
+ # &quot;position&quot;: 2,
853
+ # &quot;value&quot;: &quot;ll&quot;
854
+ # },
855
+ # {
856
+ # &quot;position&quot;: 3,
857
+ # &quot;value&quot;: &quot;lo&quot;
858
+ # },
859
+ # {
860
+ # &quot;position&quot;: 4,
861
+ # &quot;value&quot;: &quot;o日&quot;
862
+ # },
863
+ # {
864
+ # &quot;position&quot;: 5,
865
+ # &quot;value&quot;: &quot;日本&quot;
866
+ # },
867
+ # {
868
+ # &quot;position&quot;: 6,
869
+ # &quot;value&quot;: &quot;本語&quot;
870
+ # },
871
+ # {
872
+ # &quot;position&quot;: 7,
873
+ # &quot;value&quot;: &quot;語!&quot;
874
+ # },
875
+ # {
876
+ # &quot;position&quot;: 8,
877
+ # &quot;value&quot;: &quot;!!&quot;
878
+ # },
879
+ # {
880
+ # &quot;position&quot;: 9,
881
+ # &quot;value&quot;: &quot;!!&quot;
882
+ # },
883
+ # {
884
+ # &quot;position&quot;: 10,
885
+ # &quot;value&quot;: &quot;!&quot;
886
+ # }
887
+ # ]
888
+ # ]
889
+ </pre></div>
890
+ </div>
891
+ </div>
892
+ <div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
893
+ <span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
894
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> is similar to
895
+ <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
896
+ <blockquote>
897
+ <div><ul class="simple">
898
+ <li>Blank handling</li>
899
+ <li>Symbol, alphabet and digit handling</li>
900
+ </ul>
901
+ </div></blockquote>
902
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> ignores white-spaces
903
+ in continuous symbols and non-ASCII characters.</p>
904
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> tokenizes symbols,
905
+ alphabets and digits by bigram tokenize method. It means that all
906
+ characters are tokenized by bigram tokenize method.</p>
907
+ <p>You can find difference of them by <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></tt> text
908
+ because it has symbols and non-ASCII characters with white spaces,
909
+ alphabets and digits.</p>
910
+ <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
911
+ <p>Execution example:</p>
912
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
913
+ # [
914
+ # [
915
+ # 0,
916
+ # 1337566253.89858,
917
+ # 0.000355720520019531
918
+ # ],
919
+ # [
920
+ # {
921
+ # &quot;position&quot;: 0,
922
+ # &quot;value&quot;: &quot;hello&quot;
923
+ # },
924
+ # {
925
+ # &quot;position&quot;: 1,
926
+ # &quot;value&quot;: &quot;日&quot;
927
+ # },
928
+ # {
929
+ # &quot;position&quot;: 2,
930
+ # &quot;value&quot;: &quot;本&quot;
931
+ # },
932
+ # {
933
+ # &quot;position&quot;: 3,
934
+ # &quot;value&quot;: &quot;語&quot;
935
+ # },
936
+ # {
937
+ # &quot;position&quot;: 4,
938
+ # &quot;value&quot;: &quot;!&quot;
939
+ # },
940
+ # {
941
+ # &quot;position&quot;: 5,
942
+ # &quot;value&quot;: &quot;!&quot;
943
+ # },
944
+ # {
945
+ # &quot;position&quot;: 6,
946
+ # &quot;value&quot;: &quot;!&quot;
947
+ # },
948
+ # {
949
+ # &quot;position&quot;: 7,
950
+ # &quot;value&quot;: &quot;777&quot;
951
+ # }
952
+ # ]
953
+ # ]
954
+ </pre></div>
955
+ </div>
956
+ <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt>:</p>
957
+ <p>Execution example:</p>
958
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
959
+ # [
960
+ # [
961
+ # 0,
962
+ # 1337566253.89858,
963
+ # 0.000355720520019531
964
+ # ],
965
+ # [
966
+ # {
967
+ # &quot;position&quot;: 0,
968
+ # &quot;value&quot;: &quot;he&quot;
969
+ # },
970
+ # {
971
+ # &quot;position&quot;: 1,
972
+ # &quot;value&quot;: &quot;el&quot;
973
+ # },
974
+ # {
975
+ # &quot;position&quot;: 2,
976
+ # &quot;value&quot;: &quot;ll&quot;
977
+ # },
978
+ # {
979
+ # &quot;position&quot;: 3,
980
+ # &quot;value&quot;: &quot;lo&quot;
981
+ # },
982
+ # {
983
+ # &quot;position&quot;: 4,
984
+ # &quot;value&quot;: &quot;o日&quot;
985
+ # },
986
+ # {
987
+ # &quot;position&quot;: 5,
988
+ # &quot;value&quot;: &quot;日本&quot;
989
+ # },
990
+ # {
991
+ # &quot;position&quot;: 6,
992
+ # &quot;value&quot;: &quot;本語&quot;
993
+ # },
994
+ # {
995
+ # &quot;position&quot;: 7,
996
+ # &quot;value&quot;: &quot;語!&quot;
997
+ # },
998
+ # {
999
+ # &quot;position&quot;: 8,
1000
+ # &quot;value&quot;: &quot;!!&quot;
1001
+ # },
1002
+ # {
1003
+ # &quot;position&quot;: 9,
1004
+ # &quot;value&quot;: &quot;!!&quot;
1005
+ # },
1006
+ # {
1007
+ # &quot;position&quot;: 10,
1008
+ # &quot;value&quot;: &quot;!7&quot;
1009
+ # },
1010
+ # {
1011
+ # &quot;position&quot;: 11,
1012
+ # &quot;value&quot;: &quot;77&quot;
1013
+ # },
1014
+ # {
1015
+ # &quot;position&quot;: 12,
1016
+ # &quot;value&quot;: &quot;77&quot;
1017
+ # },
1018
+ # {
1019
+ # &quot;position&quot;: 13,
1020
+ # &quot;value&quot;: &quot;7&quot;
1021
+ # }
1022
+ # ]
1023
+ # ]
1024
+ </pre></div>
1025
+ </div>
1026
+ </div>
1027
+ <div class="section" id="tokenunigram">
1028
+ <span id="token-unigram"></span><h3>7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt><a class="headerlink" href="#tokenunigram" title="Permalink to this headline">¶</a></h3>
1029
+ <p><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences
1030
+ between them is token unit. <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> uses 2 characters per
1031
+ token. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> uses 1 character per token.</p>
1032
+ <p>Execution example:</p>
1033
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenUnigram &quot;100cents!!!&quot; NormalizerAuto
1034
+ # [
1035
+ # [
1036
+ # 0,
1037
+ # 1337566253.89858,
1038
+ # 0.000355720520019531
1039
+ # ],
1040
+ # [
1041
+ # {
1042
+ # &quot;position&quot;: 0,
1043
+ # &quot;value&quot;: &quot;100&quot;
1044
+ # },
1045
+ # {
1046
+ # &quot;position&quot;: 1,
1047
+ # &quot;value&quot;: &quot;cents&quot;
1048
+ # },
1049
+ # {
1050
+ # &quot;position&quot;: 2,
1051
+ # &quot;value&quot;: &quot;!!!&quot;
1052
+ # }
1053
+ # ]
1054
+ # ]
1055
+ </pre></div>
1056
+ </div>
1057
+ </div>
1058
+ <div class="section" id="tokentrigram">
1059
+ <span id="token-trigram"></span><h3>7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt><a class="headerlink" href="#tokentrigram" title="Permalink to this headline">¶</a></h3>
1060
+ <p><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences
1061
+ between them is token unit. <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> uses 2 characters per
1062
+ token. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> uses 3 characters per token.</p>
1063
+ <p>Execution example:</p>
1064
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenTrigram &quot;10000cents!!!!!&quot; NormalizerAuto
1065
+ # [
1066
+ # [
1067
+ # 0,
1068
+ # 1337566253.89858,
1069
+ # 0.000355720520019531
1070
+ # ],
1071
+ # [
1072
+ # {
1073
+ # &quot;position&quot;: 0,
1074
+ # &quot;value&quot;: &quot;10000&quot;
1075
+ # },
1076
+ # {
1077
+ # &quot;position&quot;: 1,
1078
+ # &quot;value&quot;: &quot;cents&quot;
1079
+ # },
1080
+ # {
1081
+ # &quot;position&quot;: 2,
1082
+ # &quot;value&quot;: &quot;!!!!!&quot;
1083
+ # }
1084
+ # ]
1085
+ # ]
1086
+ </pre></div>
1087
+ </div>
1088
+ </div>
1089
+ <div class="section" id="tokendelimit">
1090
+ <span id="token-delimit"></span><h3>7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt><a class="headerlink" href="#tokendelimit" title="Permalink to this headline">¶</a></h3>
1091
+ <p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> extracts token by splitting one or more space
1092
+ characters (<tt class="docutils literal"><span class="pre">U+0020</span></tt>). For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to
1093
+ <tt class="docutils literal"><span class="pre">Hello</span></tt> and <tt class="docutils literal"><span class="pre">World</span></tt>.</p>
1094
+ <p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> is suitable for tag text. You can extract <tt class="docutils literal"><span class="pre">groonga</span></tt>
1095
+ and <tt class="docutils literal"><span class="pre">full-text-search</span></tt> and <tt class="docutils literal"><span class="pre">http</span></tt> as tags from <tt class="docutils literal"><span class="pre">groonga</span>
1096
+ <span class="pre">full-text-search</span> <span class="pre">http</span></tt>.</p>
1097
+ <p>Here is an example of <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt>:</p>
1098
+ <p>Execution example:</p>
1099
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimit &quot;Groonga full-text-search HTTP&quot; NormalizerAuto
1100
+ # [
1101
+ # [
1102
+ # 0,
1103
+ # 1337566253.89858,
1104
+ # 0.000355720520019531
1105
+ # ],
1106
+ # [
1107
+ # {
1108
+ # &quot;position&quot;: 0,
1109
+ # &quot;value&quot;: &quot;groonga&quot;
1110
+ # },
1111
+ # {
1112
+ # &quot;position&quot;: 1,
1113
+ # &quot;value&quot;: &quot;full-text-search&quot;
1114
+ # },
1115
+ # {
1116
+ # &quot;position&quot;: 2,
1117
+ # &quot;value&quot;: &quot;http&quot;
1118
+ # }
1119
+ # ]
1120
+ # ]
1121
+ </pre></div>
1122
+ </div>
1123
+ </div>
1124
+ <div class="section" id="tokendelimitnull">
1125
+ <span id="token-delimit-null"></span><h3>7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt><a class="headerlink" href="#tokendelimitnull" title="Permalink to this headline">¶</a></h3>
1126
+ <p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> is similar to <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a>. The
1127
+ difference between them is separator character. <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a>
1128
+ uses space character (<tt class="docutils literal"><span class="pre">U+0020</span></tt>) but <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> uses NUL
1129
+ character (<tt class="docutils literal"><span class="pre">U+0000</span></tt>).</p>
1130
+ <p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> is also suitable for tag text.</p>
1131
+ <p>Here is an example of <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt>:</p>
1132
+ <p>Execution example:</p>
1133
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimitNull &quot;Groonga\u0000full-text-search\u0000HTTP&quot; NormalizerAuto
1134
+ # [
1135
+ # [
1136
+ # 0,
1137
+ # 1337566253.89858,
1138
+ # 0.000355720520019531
1139
+ # ],
1140
+ # [
1141
+ # {
1142
+ # &quot;position&quot;: 0,
1143
+ # &quot;value&quot;: &quot;groongau0000full-text-searchu0000http&quot;
1144
+ # }
1145
+ # ]
1146
+ # ]
1147
+ </pre></div>
1148
+ </div>
1149
+ </div>
1150
+ <div class="section" id="tokenmecab">
1151
+ <span id="token-mecab"></span><h3>7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt><a class="headerlink" href="#tokenmecab" title="Permalink to this headline">¶</a></h3>
1152
+ <p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> is a tokenizer based on <a class="reference external" href="http://mecab.sourceforge.net/">MeCab</a> part-of-speech and
1153
+ morphological analyzer.</p>
1154
+ <p>MeCab doesn't depend on Japanese. You can use MeCab for other
1155
+ languages by creating dictionary for the languages. You can use <a class="reference external" href="http://sourceforge.jp/projects/naist-jdic/">NAIST
1156
+ Japanese Dictionary</a>
1157
+ for Japanese.</p>
1158
+ <p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> is good for precision rather than recall. You can find
1159
+ <tt class="docutils literal"><span class="pre">東京都</span></tt> and <tt class="docutils literal"><span class="pre">京都</span></tt> texts by <tt class="docutils literal"><span class="pre">京都</span></tt> query with
1160
+ <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> but <tt class="docutils literal"><span class="pre">東京都</span></tt> isn't expected. You can find only
1161
+ <tt class="docutils literal"><span class="pre">京都</span></tt> text by <tt class="docutils literal"><span class="pre">京都</span></tt> query with <tt class="docutils literal"><span class="pre">TokenMecab</span></tt>.</p>
1162
+ <p>If you want to support neologisms, you need to keep updating your
1163
+ MeCab dictionary. It needs maintain cost. (<a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> doesn't
1164
+ require dictionary maintenance because <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> doesn't use
1165
+ dictionary.) <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> may help you.</p>
1166
+ <p>Here is an example of <tt class="docutils literal"><span class="pre">TokenMeCab</span></tt>. <tt class="docutils literal"><span class="pre">東京都</span></tt> is tokenized to <tt class="docutils literal"><span class="pre">東京</span></tt>
1167
+ and <tt class="docutils literal"><span class="pre">都</span></tt>. They don't include <tt class="docutils literal"><span class="pre">京都</span></tt>:</p>
1168
+ <p>Execution example:</p>
1169
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenMecab &quot;東京都&quot;
1170
+ # [
1171
+ # [
1172
+ # 0,
1173
+ # 1337566253.89858,
1174
+ # 0.000355720520019531
1175
+ # ],
1176
+ # [
1177
+ # {
1178
+ # &quot;position&quot;: 0,
1179
+ # &quot;value&quot;: &quot;東京&quot;
1180
+ # },
1181
+ # {
1182
+ # &quot;position&quot;: 1,
1183
+ # &quot;value&quot;: &quot;都&quot;
1184
+ # }
1185
+ # ]
1186
+ # ]
1187
+ </pre></div>
1188
+ </div>
1189
+ </div>
1190
+ <div class="section" id="tokenregexp">
1191
+ <span id="token-regexp"></span><h3>7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt><a class="headerlink" href="#tokenregexp" title="Permalink to this headline">¶</a></h3>
1192
+ <div class="versionadded">
1193
+ <p><span class="versionmodified">New in version 5.0.1.</span></p>
1194
+ </div>
1195
+ <div class="admonition caution">
1196
+ <p class="first admonition-title">Caution</p>
1197
+ <p class="last">This tokenizer is experimental. Specification may be changed.</p>
1198
+ </div>
1199
+ <div class="admonition caution">
1200
+ <p class="first admonition-title">Caution</p>
1201
+ <p class="last">This tokenizer can be used only with UTF-8. You can't use this
1202
+ tokenizer with EUC-JP, Shift_JIS and so on.</p>
1203
+ </div>
1204
+ <p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> is a tokenizer for supporting regular expression
1205
+ search by index.</p>
1206
+ <p>In general, regular expression search is evaluated as sequential
1207
+ search. But the following cases can be evaluated as index search:</p>
1208
+ <blockquote>
1209
+ <div><ul class="simple">
1210
+ <li>Literal only case such as <tt class="docutils literal"><span class="pre">hello</span></tt></li>
1211
+ <li>The beginning of text and literal case such as <tt class="docutils literal"><span class="pre">\A/home/alice</span></tt></li>
1212
+ <li>The end of text and literal case such as <tt class="docutils literal"><span class="pre">\.txt\z</span></tt></li>
1213
+ </ul>
1214
+ </div></blockquote>
1215
+ <p>In most cases, index search is faster than sequential search.</p>
1216
+ <p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> is based on bigram tokenize method. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt>
1217
+ adds the beginning of text mark (<tt class="docutils literal"><span class="pre">U+FFEF</span></tt>) at the begging of text
1218
+ and the end of text mark (<tt class="docutils literal"><span class="pre">U+FFF0</span></tt>) to the end of text when you
1219
+ index text:</p>
1220
+ <p>Execution example:</p>
1221
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;/home/alice/test.txt&quot; NormalizerAuto --mode ADD
1222
+ # [
1223
+ # [
1224
+ # 0,
1225
+ # 1337566253.89858,
1226
+ # 0.000355720520019531
1227
+ # ],
1228
+ # [
1229
+ # {
1230
+ # &quot;position&quot;: 0,
1231
+ # &quot;value&quot;: &quot;￯&quot;
1232
+ # },
1233
+ # {
1234
+ # &quot;position&quot;: 1,
1235
+ # &quot;value&quot;: &quot;/h&quot;
1236
+ # },
1237
+ # {
1238
+ # &quot;position&quot;: 2,
1239
+ # &quot;value&quot;: &quot;ho&quot;
1240
+ # },
1241
+ # {
1242
+ # &quot;position&quot;: 3,
1243
+ # &quot;value&quot;: &quot;om&quot;
1244
+ # },
1245
+ # {
1246
+ # &quot;position&quot;: 4,
1247
+ # &quot;value&quot;: &quot;me&quot;
1248
+ # },
1249
+ # {
1250
+ # &quot;position&quot;: 5,
1251
+ # &quot;value&quot;: &quot;e/&quot;
1252
+ # },
1253
+ # {
1254
+ # &quot;position&quot;: 6,
1255
+ # &quot;value&quot;: &quot;/a&quot;
1256
+ # },
1257
+ # {
1258
+ # &quot;position&quot;: 7,
1259
+ # &quot;value&quot;: &quot;al&quot;
1260
+ # },
1261
+ # {
1262
+ # &quot;position&quot;: 8,
1263
+ # &quot;value&quot;: &quot;li&quot;
1264
+ # },
1265
+ # {
1266
+ # &quot;position&quot;: 9,
1267
+ # &quot;value&quot;: &quot;ic&quot;
1268
+ # },
1269
+ # {
1270
+ # &quot;position&quot;: 10,
1271
+ # &quot;value&quot;: &quot;ce&quot;
1272
+ # },
1273
+ # {
1274
+ # &quot;position&quot;: 11,
1275
+ # &quot;value&quot;: &quot;e/&quot;
1276
+ # },
1277
+ # {
1278
+ # &quot;position&quot;: 12,
1279
+ # &quot;value&quot;: &quot;/t&quot;
1280
+ # },
1281
+ # {
1282
+ # &quot;position&quot;: 13,
1283
+ # &quot;value&quot;: &quot;te&quot;
1284
+ # },
1285
+ # {
1286
+ # &quot;position&quot;: 14,
1287
+ # &quot;value&quot;: &quot;es&quot;
1288
+ # },
1289
+ # {
1290
+ # &quot;position&quot;: 15,
1291
+ # &quot;value&quot;: &quot;st&quot;
1292
+ # },
1293
+ # {
1294
+ # &quot;position&quot;: 16,
1295
+ # &quot;value&quot;: &quot;t.&quot;
1296
+ # },
1297
+ # {
1298
+ # &quot;position&quot;: 17,
1299
+ # &quot;value&quot;: &quot;.t&quot;
1300
+ # },
1301
+ # {
1302
+ # &quot;position&quot;: 18,
1303
+ # &quot;value&quot;: &quot;tx&quot;
1304
+ # },
1305
+ # {
1306
+ # &quot;position&quot;: 19,
1307
+ # &quot;value&quot;: &quot;xt&quot;
1308
+ # },
1309
+ # {
1310
+ # &quot;position&quot;: 20,
1311
+ # &quot;value&quot;: &quot;t&quot;
1312
+ # },
1313
+ # {
1314
+ # &quot;position&quot;: 21,
1315
+ # &quot;value&quot;: &quot;￰&quot;
1316
+ # }
1317
+ # ]
1318
+ # ]
1319
+ </pre></div>
1320
+ </div>
1321
+ <p>The beginning of text mark is used for the beginning of text search by
1322
+ <tt class="docutils literal"><span class="pre">\A</span></tt>. If you use <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> for tokenizing query,
1323
+ <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> adds the beginning of text mark (<tt class="docutils literal"><span class="pre">U+FFEF</span></tt>) as the
1324
+ first token. The beginning of text mark must be appeared at the first,
1325
+ you can get results of the beginning of text search.</p>
1326
+ <p>Execution example:</p>
1327
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;\\A/home/alice/&quot; NormalizerAuto --mode GET
1328
+ # [
1329
+ # [
1330
+ # 0,
1331
+ # 1337566253.89858,
1332
+ # 0.000355720520019531
1333
+ # ],
1334
+ # [
1335
+ # {
1336
+ # &quot;position&quot;: 0,
1337
+ # &quot;value&quot;: &quot;￯&quot;
1338
+ # },
1339
+ # {
1340
+ # &quot;position&quot;: 1,
1341
+ # &quot;value&quot;: &quot;/h&quot;
1342
+ # },
1343
+ # {
1344
+ # &quot;position&quot;: 2,
1345
+ # &quot;value&quot;: &quot;ho&quot;
1346
+ # },
1347
+ # {
1348
+ # &quot;position&quot;: 3,
1349
+ # &quot;value&quot;: &quot;om&quot;
1350
+ # },
1351
+ # {
1352
+ # &quot;position&quot;: 4,
1353
+ # &quot;value&quot;: &quot;me&quot;
1354
+ # },
1355
+ # {
1356
+ # &quot;position&quot;: 5,
1357
+ # &quot;value&quot;: &quot;e/&quot;
1358
+ # },
1359
+ # {
1360
+ # &quot;position&quot;: 6,
1361
+ # &quot;value&quot;: &quot;/a&quot;
1362
+ # },
1363
+ # {
1364
+ # &quot;position&quot;: 7,
1365
+ # &quot;value&quot;: &quot;al&quot;
1366
+ # },
1367
+ # {
1368
+ # &quot;position&quot;: 8,
1369
+ # &quot;value&quot;: &quot;li&quot;
1370
+ # },
1371
+ # {
1372
+ # &quot;position&quot;: 9,
1373
+ # &quot;value&quot;: &quot;ic&quot;
1374
+ # },
1375
+ # {
1376
+ # &quot;position&quot;: 10,
1377
+ # &quot;value&quot;: &quot;ce&quot;
1378
+ # },
1379
+ # {
1380
+ # &quot;position&quot;: 11,
1381
+ # &quot;value&quot;: &quot;e/&quot;
1382
+ # }
1383
+ # ]
1384
+ # ]
1385
+ </pre></div>
1386
+ </div>
1387
+ <p>The end of text mark is used for the end of text search by <tt class="docutils literal"><span class="pre">\z</span></tt>.
1388
+ If you use <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> for tokenizing query, <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> adds
1389
+ the end of text mark (<tt class="docutils literal"><span class="pre">U+FFF0</span></tt>) as the last token. The end of text
1390
+ mark must be appeared at the end, you can get results of the end of
1391
+ text search.</p>
1392
+ <p>Execution example:</p>
1393
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;\\.txt\\z&quot; NormalizerAuto --mode GET
1394
+ # [
1395
+ # [
1396
+ # 0,
1397
+ # 1337566253.89858,
1398
+ # 0.000355720520019531
1399
+ # ],
1400
+ # [
1401
+ # {
1402
+ # &quot;position&quot;: 0,
1403
+ # &quot;value&quot;: &quot;\\.&quot;
1404
+ # },
1405
+ # {
1406
+ # &quot;position&quot;: 1,
1407
+ # &quot;value&quot;: &quot;.t&quot;
1408
+ # },
1409
+ # {
1410
+ # &quot;position&quot;: 2,
1411
+ # &quot;value&quot;: &quot;tx&quot;
1412
+ # },
1413
+ # {
1414
+ # &quot;position&quot;: 3,
1415
+ # &quot;value&quot;: &quot;xt&quot;
1416
+ # },
1417
+ # {
1418
+ # &quot;position&quot;: 5,
1419
+ # &quot;value&quot;: &quot;￰&quot;
1420
+ # }
1421
+ # ]
1422
+ # ]
1423
+ </pre></div>
1424
+ </div>
1425
+ </div>
1426
+ </div>
91
1427
  </div>
92
1428
 
93
1429
 
94
1430
  </div>
95
1431
  </div>
96
1432
  </div>
97
- <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
1433
+ <div class="sphinxsidebar">
98
1434
  <div class="sphinxsidebarwrapper">
1435
+ <h3><a href="../index.html">Table Of Contents</a></h3>
1436
+ <ul>
1437
+ <li><a class="reference internal" href="#">7.8. Tokenizers</a><ul>
1438
+ <li><a class="reference internal" href="#summary">7.8.1. Summary</a></li>
1439
+ <li><a class="reference internal" href="#what-is-tokenize">7.8.2. What is &quot;tokenize&quot;?</a></li>
1440
+ <li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. Built-in tokenizsers</a><ul>
1441
+ <li><a class="reference internal" href="#tokenbigram">7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt></a></li>
1442
+ <li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></a></li>
1443
+ <li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></a></li>
1444
+ <li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></a></li>
1445
+ <li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></a></li>
1446
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></a></li>
1447
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt></a></li>
1448
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt></a></li>
1449
+ <li><a class="reference internal" href="#tokenunigram">7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></a></li>
1450
+ <li><a class="reference internal" href="#tokentrigram">7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></a></li>
1451
+ <li><a class="reference internal" href="#tokendelimit">7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></a></li>
1452
+ <li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></a></li>
1453
+ <li><a class="reference internal" href="#tokenmecab">7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt></a></li>
1454
+ <li><a class="reference internal" href="#tokenregexp">7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></a></li>
1455
+ </ul>
1456
+ </li>
1457
+ </ul>
1458
+ </li>
1459
+ </ul>
1460
+
99
1461
  <h4>Previous topic</h4>
100
1462
  <p class="topless"><a href="normalizers.html"
101
1463
  title="previous chapter">7.7. Normalizers</a></p>
102
1464
  <h4>Next topic</h4>
103
1465
  <p class="topless"><a href="token_filters.html"
104
1466
  title="next chapter">7.9. Token filters</a></p>
105
- <div role="note" aria-label="source link">
106
- <h3>This Page</h3>
107
- <ul class="this-page-menu">
108
- <li><a href="../_sources/reference/tokenizers.txt"
109
- rel="nofollow">Show Source</a></li>
110
- </ul>
111
- </div>
112
- <div id="searchbox" style="display: none" role="search">
1467
+ <h3>This Page</h3>
1468
+ <ul class="this-page-menu">
1469
+ <li><a href="../_sources/reference/tokenizers.txt"
1470
+ rel="nofollow">Show Source</a></li>
1471
+ </ul>
1472
+ <div id="searchbox" style="display: none">
113
1473
  <h3>Quick search</h3>
114
1474
  <form class="search" action="../search.html" method="get">
115
1475
  <input type="text" name="q" />
@@ -126,7 +1486,7 @@
126
1486
  </div>
127
1487
  <div class="clearer"></div>
128
1488
  </div>
129
- <div class="related" role="navigation" aria-label="related navigation">
1489
+ <div class="related">
130
1490
  <h3>Navigation</h3>
131
1491
  <ul>
132
1492
  <li class="right" style="margin-right: 10px">
@@ -138,11 +1498,11 @@
138
1498
  <li class="right" >
139
1499
  <a href="normalizers.html" title="7.7. Normalizers"
140
1500
  >previous</a> |</li>
141
- <li><a href="../index.html">Groonga v5.0.0 documentation</a> &raquo;</li>
1501
+ <li><a href="../index.html">Groonga v5.0.1-42-g4d10df1 documentation</a> &raquo;</li>
142
1502
  <li><a href="../reference.html" >7. Reference manual</a> &raquo;</li>
143
1503
  </ul>
144
1504
  </div>
145
- <div class="footer" role="contentinfo">
1505
+ <div class="footer">
146
1506
  &copy; Copyright 2009-2015, Brazil, Inc.
147
1507
  </div>
148
1508
  </body>