rroonga 5.0.0-x64-mingw32 → 5.0.1-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (733) hide show
  1. checksums.yaml +8 -8
  2. data/.yardopts +1 -0
  3. data/Rakefile +1 -16
  4. data/example/bookmark.rb +1 -6
  5. data/example/index-html.rb +0 -1
  6. data/ext/groonga/extconf.rb +4 -7
  7. data/ext/groonga/rb-grn-array.c +1 -1
  8. data/ext/groonga/rb-grn-column.c +33 -67
  9. data/ext/groonga/rb-grn-context.c +5 -5
  10. data/ext/groonga/rb-grn-database.c +2 -2
  11. data/ext/groonga/rb-grn-double-array-trie.c +4 -2
  12. data/ext/groonga/rb-grn-encoding-support.c +7 -1
  13. data/ext/groonga/rb-grn-equal-operator.c +85 -0
  14. data/ext/groonga/rb-grn-exception.c +17 -0
  15. data/ext/groonga/rb-grn-expression.c +85 -43
  16. data/ext/groonga/rb-grn-greater-equal-operator.c +88 -0
  17. data/ext/groonga/rb-grn-greater-operator.c +85 -0
  18. data/ext/groonga/rb-grn-hash.c +1 -1
  19. data/ext/groonga/rb-grn-index-column.c +150 -11
  20. data/ext/groonga/rb-grn-less-equal-operator.c +88 -0
  21. data/ext/groonga/rb-grn-less-operator.c +85 -0
  22. data/ext/groonga/rb-grn-logger.c +5 -5
  23. data/ext/groonga/rb-grn-match-operator.c +86 -0
  24. data/ext/groonga/rb-grn-normalizer.c +8 -1
  25. data/ext/groonga/rb-grn-not-equal-operator.c +85 -0
  26. data/ext/groonga/rb-grn-object.c +170 -36
  27. data/ext/groonga/rb-grn-operator.c +395 -172
  28. data/ext/groonga/rb-grn-patricia-trie.c +10 -8
  29. data/ext/groonga/rb-grn-plugin.c +51 -3
  30. data/ext/groonga/rb-grn-prefix-operator.c +86 -0
  31. data/ext/groonga/rb-grn-procedure-type.c +4 -0
  32. data/ext/groonga/rb-grn-query-logger.c +4 -4
  33. data/ext/groonga/rb-grn-regexp-operator.c +85 -0
  34. data/ext/groonga/rb-grn-snippet.c +1 -1
  35. data/ext/groonga/rb-grn-table-key-support.c +9 -5
  36. data/ext/groonga/rb-grn-table.c +52 -66
  37. data/ext/groonga/rb-grn-type.c +1 -1
  38. data/ext/groonga/rb-grn-utils.c +22 -3
  39. data/ext/groonga/rb-grn.h +31 -4
  40. data/ext/groonga/rb-groonga.c +9 -9
  41. data/lib/1.9/groonga.so +0 -0
  42. data/lib/2.0/groonga.so +0 -0
  43. data/lib/2.1/groonga.so +0 -0
  44. data/lib/2.2/groonga.so +0 -0
  45. data/lib/groonga/context.rb +31 -0
  46. data/lib/groonga/expression-builder.rb +14 -1
  47. data/lib/groonga/record.rb +10 -8
  48. data/lib/groonga/schema.rb +3 -1
  49. data/rroonga-build.rb +2 -2
  50. data/rroonga.gemspec +3 -3
  51. data/test/groonga-test-utils.rb +4 -0
  52. data/test/test-column.rb +28 -26
  53. data/test/test-exception.rb +1 -0
  54. data/test/test-expression-builder.rb +83 -1
  55. data/test/test-expression.rb +80 -48
  56. data/test/test-index-column.rb +102 -29
  57. data/test/test-normalizer.rb +35 -29
  58. data/test/test-operator.rb +214 -0
  59. data/test/test-plugin.rb +24 -6
  60. data/test/test-procedure.rb +29 -0
  61. data/test/test-schema-type.rb +14 -0
  62. data/test/test-table-select-mecab.rb +1 -4
  63. data/test/test-table.rb +7 -0
  64. data/test/test-token-regexp.rb +30 -0
  65. data/test/test-type.rb +24 -0
  66. data/vendor/local/bin/grndb.exe +0 -0
  67. data/vendor/local/bin/groonga-benchmark.exe +0 -0
  68. data/vendor/local/bin/groonga.exe +0 -0
  69. data/vendor/local/bin/libgcc_s_seh-1.dll +0 -0
  70. data/vendor/local/bin/libgroonga-0.dll +0 -0
  71. data/vendor/local/bin/libmecab-1.dll +0 -0
  72. data/vendor/local/bin/libmsgpack-3.dll +0 -0
  73. data/vendor/local/bin/libmsgpackc-2.dll +0 -0
  74. data/vendor/local/bin/libonig-5.dll +0 -0
  75. data/vendor/local/bin/libstdc++-6.dll +0 -0
  76. data/vendor/local/bin/lz4.exe +0 -0
  77. data/vendor/local/bin/lz4c.exe +0 -0
  78. data/vendor/local/bin/lz4cat +0 -0
  79. data/vendor/local/bin/mecab-config +2 -2
  80. data/vendor/local/bin/mecab.exe +0 -0
  81. data/vendor/local/bin/onig-config +1 -1
  82. data/vendor/local/bin/zlib1.dll +0 -0
  83. data/vendor/local/etc/groonga/groonga.conf +1 -1
  84. data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +2 -2
  85. data/vendor/local/include/groonga/groonga/expr.h +2 -0
  86. data/vendor/local/include/groonga/groonga/groonga.h +32 -5
  87. data/vendor/local/include/groonga/groonga/ii.h +7 -0
  88. data/vendor/local/include/groonga/groonga/obj.h +37 -0
  89. data/vendor/local/include/groonga/groonga/scorer.h +95 -0
  90. data/vendor/local/include/groonga/groonga.h +1 -0
  91. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
  92. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
  93. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
  94. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +2 -2
  95. data/vendor/local/lib/groonga/plugins/ruby/eval.a +0 -0
  96. data/vendor/local/lib/groonga/plugins/ruby/eval.dll +0 -0
  97. data/vendor/local/lib/groonga/plugins/ruby/eval.dll.a +0 -0
  98. data/vendor/local/lib/groonga/plugins/ruby/eval.la +2 -2
  99. data/vendor/local/lib/groonga/plugins/ruby/load.a +0 -0
  100. data/vendor/local/lib/groonga/plugins/ruby/load.dll +0 -0
  101. data/vendor/local/lib/groonga/plugins/ruby/load.dll.a +0 -0
  102. data/vendor/local/lib/groonga/plugins/ruby/load.la +2 -2
  103. data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +6 -3
  104. data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +6 -5
  105. data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +421 -17
  106. data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
  107. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
  108. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
  109. data/vendor/local/lib/groonga/plugins/suggest/suggest.la +2 -2
  110. data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
  111. data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
  112. data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
  113. data/vendor/local/lib/groonga/plugins/table/table.la +2 -2
  114. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
  115. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
  116. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
  117. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +2 -2
  118. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
  119. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
  120. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
  121. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +2 -2
  122. data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +12 -4
  123. data/vendor/local/lib/groonga/scripts/ruby/database.rb +11 -3
  124. data/vendor/local/lib/groonga/scripts/ruby/expression.rb +23 -0
  125. data/vendor/local/lib/groonga/scripts/ruby/expression_size_estimator.rb +158 -0
  126. data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +39 -0
  127. data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +4 -0
  128. data/vendor/local/lib/groonga/scripts/ruby/initialize/pre.rb +2 -0
  129. data/vendor/local/lib/groonga/scripts/ruby/logger.rb +11 -7
  130. data/vendor/local/lib/groonga/scripts/ruby/object.rb +11 -0
  131. data/vendor/local/lib/groonga/scripts/ruby/operator.rb +22 -0
  132. data/vendor/local/lib/groonga/scripts/ruby/scan_info.rb +7 -2
  133. data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +7 -11
  134. data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +137 -34
  135. data/vendor/local/lib/groonga/scripts/ruby/scan_info_search_index.rb +9 -0
  136. data/vendor/local/lib/libgroonga.a +0 -0
  137. data/vendor/local/lib/libgroonga.dll.a +0 -0
  138. data/vendor/local/lib/libgroonga.la +2 -2
  139. data/vendor/local/lib/liblz4.a +0 -0
  140. data/vendor/local/lib/liblz4.dll +0 -0
  141. data/vendor/local/lib/liblz4.dll.1 +0 -0
  142. data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
  143. data/vendor/local/lib/libmecab.a +0 -0
  144. data/vendor/local/lib/libmecab.dll.a +0 -0
  145. data/vendor/local/lib/libmecab.la +2 -2
  146. data/vendor/local/lib/libmsgpack.a +0 -0
  147. data/vendor/local/lib/libmsgpack.dll.a +0 -0
  148. data/vendor/local/lib/libmsgpack.la +2 -2
  149. data/vendor/local/lib/libmsgpackc.a +0 -0
  150. data/vendor/local/lib/libmsgpackc.dll.a +0 -0
  151. data/vendor/local/lib/libmsgpackc.la +2 -2
  152. data/vendor/local/lib/libonig.a +0 -0
  153. data/vendor/local/lib/libonig.dll.a +0 -0
  154. data/vendor/local/lib/libonig.la +2 -2
  155. data/vendor/local/lib/libz.a +0 -0
  156. data/vendor/local/lib/libz.dll.a +0 -0
  157. data/vendor/local/lib/pkgconfig/groonga.pc +3 -3
  158. data/vendor/local/lib/pkgconfig/liblz4.pc +5 -5
  159. data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
  160. data/vendor/local/lib/pkgconfig/oniguruma.pc +6 -6
  161. data/vendor/local/lib/pkgconfig/zlib.pc +3 -3
  162. data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
  163. data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
  164. data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
  165. data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
  166. data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
  167. data/vendor/local/sbin/groonga-httpd-restart +1 -1
  168. data/vendor/local/sbin/groonga-httpd.exe +0 -0
  169. data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
  170. data/vendor/local/share/doc/groonga/en/html/_images/used-when-indexing.png +0 -0
  171. data/vendor/local/share/doc/groonga/en/html/_images/used-when-searching.png +0 -0
  172. data/vendor/local/share/doc/groonga/en/html/_sources/characteristic.txt +1 -1
  173. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +32 -17
  174. data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
  175. data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +3 -3
  176. data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +4 -4
  177. data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
  178. data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +3 -3
  179. data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
  180. data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -3
  181. data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
  182. data/vendor/local/share/doc/groonga/en/html/_sources/news/1.0.x.txt +1 -1
  183. data/vendor/local/share/doc/groonga/en/html/_sources/news/3.x.txt +2 -2
  184. data/vendor/local/share/doc/groonga/en/html/_sources/news/4.x.txt +2 -2
  185. data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +194 -0
  186. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +42 -0
  187. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/overview.txt +54 -0
  188. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api.txt +3 -0
  189. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +1 -1
  190. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_create.txt +2 -2
  191. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +1 -1
  192. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +3 -3
  193. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/delete.txt +4 -4
  194. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/load.txt +5 -5
  195. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +4 -4
  196. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +173 -0
  197. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +112 -0
  198. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +7 -6
  199. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +64 -0
  200. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +63 -0
  201. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +11 -1
  202. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +3 -2
  203. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +1 -1
  204. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +1 -1
  205. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +17 -17
  206. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/suggest.txt +12 -12
  207. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +7 -7
  208. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +4 -4
  209. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +6 -6
  210. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +1 -1
  211. data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga.txt +47 -26
  212. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/between.txt +5 -5
  213. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_distance.txt +3 -3
  214. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +6 -6
  215. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +1 -1
  216. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/html_untag.txt +1 -1
  217. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/in_values.txt +54 -2
  218. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +4 -4
  219. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/sub_filter.txt +4 -4
  220. data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
  221. data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
  222. data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +2 -0
  223. data/vendor/local/share/doc/groonga/en/html/_sources/reference/normalizers.txt +4 -0
  224. data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations.txt +2 -1
  225. data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +403 -0
  226. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorer.txt +217 -0
  227. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
  228. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
  229. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scoring_note.txt +13 -0
  230. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tables.txt +8 -0
  231. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +530 -16
  232. data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +2 -0
  233. data/vendor/local/share/doc/groonga/en/html/_sources/server/memcached.txt +15 -0
  234. data/vendor/local/share/doc/groonga/en/html/_sources/server.txt +2 -1
  235. data/vendor/local/share/doc/groonga/en/html/_sources/spec/gqtp.txt +66 -1
  236. data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
  237. data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/network.txt +0 -81
  238. data/vendor/local/share/doc/groonga/en/html/_static/basic.css +6 -68
  239. data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +1 -26
  240. data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
  241. data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
  242. data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
  243. data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +9404 -4
  244. data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
  245. data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
  246. data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +2 -2
  247. data/vendor/local/share/doc/groonga/en/html/_static/underscore.js +1415 -31
  248. data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
  249. data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
  250. data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +15 -15
  251. data/vendor/local/share/doc/groonga/en/html/characteristic.html +18 -20
  252. data/vendor/local/share/doc/groonga/en/html/client.html +22 -24
  253. data/vendor/local/share/doc/groonga/en/html/community.html +17 -19
  254. data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +17 -19
  255. data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +17 -19
  256. data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +17 -19
  257. data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +51 -38
  258. data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +17 -19
  259. data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +17 -19
  260. data/vendor/local/share/doc/groonga/en/html/contribution/development.html +17 -19
  261. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +17 -19
  262. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +17 -19
  263. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +24 -26
  264. data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +17 -19
  265. data/vendor/local/share/doc/groonga/en/html/contribution/report.html +17 -19
  266. data/vendor/local/share/doc/groonga/en/html/contribution.html +18 -20
  267. data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +24 -26
  268. data/vendor/local/share/doc/groonga/en/html/development.html +17 -19
  269. data/vendor/local/share/doc/groonga/en/html/genindex.html +26 -14
  270. data/vendor/local/share/doc/groonga/en/html/index.html +150 -130
  271. data/vendor/local/share/doc/groonga/en/html/install/centos.html +28 -30
  272. data/vendor/local/share/doc/groonga/en/html/install/debian.html +24 -26
  273. data/vendor/local/share/doc/groonga/en/html/install/fedora.html +28 -30
  274. data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +22 -24
  275. data/vendor/local/share/doc/groonga/en/html/install/others.html +87 -89
  276. data/vendor/local/share/doc/groonga/en/html/install/solaris.html +22 -24
  277. data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +25 -27
  278. data/vendor/local/share/doc/groonga/en/html/install/windows.html +30 -32
  279. data/vendor/local/share/doc/groonga/en/html/install.html +32 -34
  280. data/vendor/local/share/doc/groonga/en/html/limitations.html +17 -19
  281. data/vendor/local/share/doc/groonga/en/html/news/0.x.html +17 -19
  282. data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +19 -21
  283. data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +17 -19
  284. data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +32 -34
  285. data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +27 -29
  286. data/vendor/local/share/doc/groonga/en/html/news/2.x.html +98 -100
  287. data/vendor/local/share/doc/groonga/en/html/news/3.x.html +68 -70
  288. data/vendor/local/share/doc/groonga/en/html/news/4.x.html +102 -104
  289. data/vendor/local/share/doc/groonga/en/html/news/senna.html +17 -19
  290. data/vendor/local/share/doc/groonga/en/html/news.html +256 -27
  291. data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
  292. data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +49 -51
  293. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +60 -62
  294. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +80 -82
  295. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +42 -44
  296. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +37 -39
  297. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +130 -80
  298. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +48 -50
  299. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +44 -46
  300. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +79 -81
  301. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +42 -44
  302. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +44 -46
  303. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +42 -44
  304. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +41 -43
  305. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +41 -43
  306. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +40 -42
  307. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +89 -91
  308. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +44 -46
  309. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +39 -41
  310. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +75 -77
  311. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +64 -66
  312. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +40 -42
  313. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +39 -41
  314. data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +202 -0
  315. data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +58 -60
  316. data/vendor/local/share/doc/groonga/en/html/reference/api.html +51 -52
  317. data/vendor/local/share/doc/groonga/en/html/reference/cast.html +17 -19
  318. data/vendor/local/share/doc/groonga/en/html/reference/column.html +17 -19
  319. data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +17 -19
  320. data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +22 -24
  321. data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +17 -19
  322. data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +58 -60
  323. data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +17 -19
  324. data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +53 -55
  325. data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +22 -24
  326. data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +94 -96
  327. data/vendor/local/share/doc/groonga/en/html/reference/command.html +56 -54
  328. data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +39 -41
  329. data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +43 -45
  330. data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +23 -25
  331. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +39 -41
  332. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +71 -73
  333. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +24 -26
  334. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +44 -46
  335. data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +37 -39
  336. data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +24 -26
  337. data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +35 -37
  338. data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +22 -24
  339. data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +43 -45
  340. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +49 -47
  341. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +23 -25
  342. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +24 -26
  343. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +26 -28
  344. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +314 -0
  345. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +252 -0
  346. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +87 -89
  347. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +46 -48
  348. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +195 -0
  349. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +193 -0
  350. data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +38 -40
  351. data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +37 -39
  352. data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +61 -51
  353. data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
  354. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +54 -56
  355. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +54 -56
  356. data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +590 -592
  357. data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +37 -39
  358. data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +40 -42
  359. data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +92 -94
  360. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +152 -154
  361. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +49 -51
  362. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +39 -41
  363. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +68 -70
  364. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +103 -105
  365. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +45 -47
  366. data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +52 -54
  367. data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +35 -37
  368. data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +21 -23
  369. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +26 -28
  370. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +73 -75
  371. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +17 -19
  372. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
  373. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +42 -44
  374. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +34 -36
  375. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +124 -90
  376. data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -21
  377. data/vendor/local/share/doc/groonga/en/html/reference/function.html +42 -44
  378. data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +66 -68
  379. data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +44 -46
  380. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +113 -115
  381. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +55 -57
  382. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +45 -47
  383. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +81 -83
  384. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +65 -67
  385. data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +54 -56
  386. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +135 -44
  387. data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -42
  388. data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +81 -83
  389. data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +43 -45
  390. data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +67 -69
  391. data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +66 -70
  392. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +349 -286
  393. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +483 -417
  394. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +39 -41
  395. data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +35 -37
  396. data/vendor/local/share/doc/groonga/en/html/reference/log.html +38 -40
  397. data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +44 -46
  398. data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +32 -34
  399. data/vendor/local/share/doc/groonga/en/html/reference/operations.html +30 -31
  400. data/vendor/local/share/doc/groonga/en/html/reference/output.html +32 -34
  401. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +60 -62
  402. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +17 -19
  403. data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +931 -0
  404. data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +442 -0
  405. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +153 -0
  406. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +287 -0
  407. data/vendor/local/share/doc/groonga/en/html/reference/scoring_note.html +114 -0
  408. data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +51 -53
  409. data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +40 -42
  410. data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +38 -40
  411. data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +40 -42
  412. data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +45 -47
  413. data/vendor/local/share/doc/groonga/en/html/reference/tables.html +52 -54
  414. data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +36 -38
  415. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +1394 -34
  416. data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +57 -59
  417. data/vendor/local/share/doc/groonga/en/html/reference/types.html +38 -40
  418. data/vendor/local/share/doc/groonga/en/html/reference.html +139 -118
  419. data/vendor/local/share/doc/groonga/en/html/search.html +11 -11
  420. data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
  421. data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +28 -30
  422. data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +68 -70
  423. data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +30 -32
  424. data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +29 -31
  425. data/vendor/local/share/doc/groonga/en/html/server/http.html +42 -44
  426. data/vendor/local/share/doc/groonga/en/html/server/memcached.html +137 -0
  427. data/vendor/local/share/doc/groonga/en/html/server/package.html +36 -38
  428. data/vendor/local/share/doc/groonga/en/html/server.html +23 -24
  429. data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +208 -129
  430. data/vendor/local/share/doc/groonga/en/html/spec/search.html +17 -19
  431. data/vendor/local/share/doc/groonga/en/html/spec.html +22 -24
  432. data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
  433. data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
  434. data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +17 -19
  435. data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +17 -19
  436. data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +31 -33
  437. data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +20 -22
  438. data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +17 -19
  439. data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +17 -19
  440. data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +21 -23
  441. data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +50 -52
  442. data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +27 -125
  443. data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +18 -20
  444. data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +20 -22
  445. data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +33 -35
  446. data/vendor/local/share/doc/groonga/en/html/tutorial.html +21 -25
  447. data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
  448. data/vendor/local/share/doc/groonga/ja/html/_images/used-when-indexing.png +0 -0
  449. data/vendor/local/share/doc/groonga/ja/html/_images/used-when-searching.png +0 -0
  450. data/vendor/local/share/doc/groonga/ja/html/_sources/characteristic.txt +1 -1
  451. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +32 -17
  452. data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
  453. data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +3 -3
  454. data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +4 -4
  455. data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
  456. data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +3 -3
  457. data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
  458. data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -3
  459. data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
  460. data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.0.x.txt +1 -1
  461. data/vendor/local/share/doc/groonga/ja/html/_sources/news/3.x.txt +2 -2
  462. data/vendor/local/share/doc/groonga/ja/html/_sources/news/4.x.txt +2 -2
  463. data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +194 -0
  464. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +42 -0
  465. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/overview.txt +54 -0
  466. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api.txt +3 -0
  467. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +1 -1
  468. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_create.txt +2 -2
  469. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +1 -1
  470. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +3 -3
  471. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/delete.txt +4 -4
  472. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/load.txt +5 -5
  473. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +4 -4
  474. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +173 -0
  475. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +112 -0
  476. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +7 -6
  477. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +64 -0
  478. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +63 -0
  479. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +11 -1
  480. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +3 -2
  481. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +1 -1
  482. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +1 -1
  483. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +17 -17
  484. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/suggest.txt +12 -12
  485. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +7 -7
  486. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +4 -4
  487. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +6 -6
  488. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +1 -1
  489. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga.txt +47 -26
  490. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/between.txt +5 -5
  491. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_distance.txt +3 -3
  492. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +6 -6
  493. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +1 -1
  494. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/html_untag.txt +1 -1
  495. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/in_values.txt +54 -2
  496. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +4 -4
  497. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/sub_filter.txt +4 -4
  498. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
  499. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
  500. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +2 -0
  501. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/normalizers.txt +4 -0
  502. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations.txt +2 -1
  503. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +403 -0
  504. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorer.txt +217 -0
  505. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
  506. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
  507. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scoring_note.txt +13 -0
  508. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tables.txt +8 -0
  509. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +530 -16
  510. data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +2 -0
  511. data/vendor/local/share/doc/groonga/ja/html/_sources/server/memcached.txt +15 -0
  512. data/vendor/local/share/doc/groonga/ja/html/_sources/server.txt +2 -1
  513. data/vendor/local/share/doc/groonga/ja/html/_sources/spec/gqtp.txt +66 -1
  514. data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
  515. data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/network.txt +0 -81
  516. data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +6 -68
  517. data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +1 -26
  518. data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
  519. data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
  520. data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
  521. data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +9404 -4
  522. data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
  523. data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
  524. data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +2 -2
  525. data/vendor/local/share/doc/groonga/ja/html/_static/underscore.js +1415 -31
  526. data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
  527. data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
  528. data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +15 -15
  529. data/vendor/local/share/doc/groonga/ja/html/characteristic.html +17 -19
  530. data/vendor/local/share/doc/groonga/ja/html/client.html +22 -24
  531. data/vendor/local/share/doc/groonga/ja/html/community.html +17 -19
  532. data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +17 -19
  533. data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +17 -19
  534. data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +17 -19
  535. data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +51 -38
  536. data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +17 -19
  537. data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +17 -19
  538. data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +17 -19
  539. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +17 -19
  540. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +17 -19
  541. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +24 -26
  542. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +17 -19
  543. data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -19
  544. data/vendor/local/share/doc/groonga/ja/html/contribution.html +18 -20
  545. data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +20 -22
  546. data/vendor/local/share/doc/groonga/ja/html/development.html +17 -19
  547. data/vendor/local/share/doc/groonga/ja/html/genindex.html +26 -14
  548. data/vendor/local/share/doc/groonga/ja/html/index.html +150 -130
  549. data/vendor/local/share/doc/groonga/ja/html/install/centos.html +31 -33
  550. data/vendor/local/share/doc/groonga/ja/html/install/debian.html +25 -27
  551. data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +29 -31
  552. data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +22 -24
  553. data/vendor/local/share/doc/groonga/ja/html/install/others.html +78 -80
  554. data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +21 -23
  555. data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +26 -28
  556. data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -31
  557. data/vendor/local/share/doc/groonga/ja/html/install.html +32 -34
  558. data/vendor/local/share/doc/groonga/ja/html/limitations.html +17 -19
  559. data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +17 -19
  560. data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +19 -21
  561. data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +17 -19
  562. data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +32 -34
  563. data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +27 -29
  564. data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +91 -93
  565. data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +59 -61
  566. data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +89 -91
  567. data/vendor/local/share/doc/groonga/ja/html/news/senna.html +17 -19
  568. data/vendor/local/share/doc/groonga/ja/html/news.html +210 -27
  569. data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
  570. data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +49 -51
  571. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +55 -57
  572. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +80 -82
  573. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +42 -44
  574. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +37 -39
  575. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +126 -76
  576. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +48 -50
  577. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +44 -46
  578. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +74 -76
  579. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +42 -44
  580. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +44 -46
  581. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +42 -44
  582. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +41 -43
  583. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +41 -43
  584. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +40 -42
  585. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +89 -91
  586. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +44 -46
  587. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +39 -41
  588. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +75 -77
  589. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +64 -66
  590. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +40 -42
  591. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +39 -41
  592. data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +197 -0
  593. data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +58 -60
  594. data/vendor/local/share/doc/groonga/ja/html/reference/api.html +51 -52
  595. data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +17 -19
  596. data/vendor/local/share/doc/groonga/ja/html/reference/column.html +17 -19
  597. data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +17 -19
  598. data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +22 -24
  599. data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +17 -19
  600. data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +48 -50
  601. data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +17 -19
  602. data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +43 -45
  603. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +22 -24
  604. data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +93 -95
  605. data/vendor/local/share/doc/groonga/ja/html/reference/command.html +56 -54
  606. data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +35 -37
  607. data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +43 -45
  608. data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +23 -25
  609. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +39 -41
  610. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +62 -64
  611. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +24 -26
  612. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +38 -40
  613. data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +37 -39
  614. data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +24 -26
  615. data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +32 -34
  616. data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +22 -24
  617. data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +33 -35
  618. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +48 -46
  619. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +23 -25
  620. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +24 -26
  621. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +26 -28
  622. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +314 -0
  623. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +250 -0
  624. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +80 -81
  625. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +46 -48
  626. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +188 -0
  627. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +190 -0
  628. data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +38 -40
  629. data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +37 -39
  630. data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +57 -47
  631. data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +71 -73
  632. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +53 -55
  633. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +53 -55
  634. data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +394 -396
  635. data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +37 -39
  636. data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +38 -40
  637. data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +78 -80
  638. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +123 -125
  639. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -51
  640. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +39 -41
  641. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +61 -63
  642. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +89 -91
  643. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +46 -48
  644. data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +51 -53
  645. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +35 -37
  646. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +21 -23
  647. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +26 -28
  648. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +61 -63
  649. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +17 -19
  650. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
  651. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +42 -44
  652. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +34 -36
  653. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +126 -90
  654. data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -21
  655. data/vendor/local/share/doc/groonga/ja/html/reference/function.html +42 -44
  656. data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +63 -65
  657. data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +44 -46
  658. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +94 -96
  659. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +55 -57
  660. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +45 -47
  661. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +66 -68
  662. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +55 -57
  663. data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +53 -55
  664. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +135 -44
  665. data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +40 -42
  666. data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +70 -72
  667. data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +43 -45
  668. data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +53 -55
  669. data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +56 -62
  670. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +229 -171
  671. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +381 -322
  672. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +36 -38
  673. data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +34 -36
  674. data/vendor/local/share/doc/groonga/ja/html/reference/log.html +38 -40
  675. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +38 -40
  676. data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +32 -34
  677. data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +28 -30
  678. data/vendor/local/share/doc/groonga/ja/html/reference/output.html +28 -30
  679. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +39 -41
  680. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +17 -19
  681. data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +878 -0
  682. data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +442 -0
  683. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +154 -0
  684. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +287 -0
  685. data/vendor/local/share/doc/groonga/ja/html/reference/scoring_note.html +115 -0
  686. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +48 -50
  687. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +40 -42
  688. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +38 -40
  689. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +40 -42
  690. data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +45 -47
  691. data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +42 -44
  692. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +37 -39
  693. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +1300 -34
  694. data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +57 -59
  695. data/vendor/local/share/doc/groonga/ja/html/reference/types.html +38 -40
  696. data/vendor/local/share/doc/groonga/ja/html/reference.html +139 -118
  697. data/vendor/local/share/doc/groonga/ja/html/search.html +11 -11
  698. data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
  699. data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +28 -30
  700. data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +62 -64
  701. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +30 -32
  702. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +29 -31
  703. data/vendor/local/share/doc/groonga/ja/html/server/http.html +42 -44
  704. data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +138 -0
  705. data/vendor/local/share/doc/groonga/ja/html/server/package.html +35 -37
  706. data/vendor/local/share/doc/groonga/ja/html/server.html +23 -24
  707. data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +207 -128
  708. data/vendor/local/share/doc/groonga/ja/html/spec/search.html +17 -19
  709. data/vendor/local/share/doc/groonga/ja/html/spec.html +22 -24
  710. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
  711. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
  712. data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +17 -19
  713. data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +17 -19
  714. data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +30 -32
  715. data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +17 -19
  716. data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +17 -19
  717. data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +17 -19
  718. data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +23 -25
  719. data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +47 -49
  720. data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +27 -125
  721. data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -20
  722. data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +20 -22
  723. data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +31 -33
  724. data/vendor/local/share/doc/groonga/ja/html/tutorial.html +21 -25
  725. data/vendor/local/share/license/mruby/README.md +2 -2
  726. data/vendor/local/share/man/ja/man1/groonga.1 +6205 -2251
  727. data/vendor/local/share/man/man1/groonga.1 +7210 -3029
  728. metadata +75 -11
  729. data/doc/text/news.textile +0 -1217
  730. data/vendor/local/share/doc/groonga/en/html/_static/jquery-1.11.1.js +0 -10308
  731. data/vendor/local/share/doc/groonga/en/html/_static/underscore-1.3.1.js +0 -999
  732. data/vendor/local/share/doc/groonga/ja/html/_static/jquery-1.11.1.js +0 -10308
  733. data/vendor/local/share/doc/groonga/ja/html/_static/underscore-1.3.1.js +0 -999
@@ -7,7 +7,7 @@
7
7
  <head>
8
8
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
9
9
 
10
- <title>7.8. Tokenizers &mdash; Groonga v5.0.0ドキュメント</title>
10
+ <title>7.8. トークナイザー &mdash; Groonga v5.0.1-42-g4d10df1ドキュメント</title>
11
11
 
12
12
  <link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
13
13
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
@@ -15,7 +15,7 @@
15
15
  <script type="text/javascript">
16
16
  var DOCUMENTATION_OPTIONS = {
17
17
  URL_ROOT: '../',
18
- VERSION: '5.0.0',
18
+ VERSION: '5.0.1-42-g4d10df1',
19
19
  COLLAPSE_INDEX: false,
20
20
  FILE_SUFFIX: '.html',
21
21
  HAS_SOURCE: true
@@ -26,12 +26,12 @@
26
26
  <script type="text/javascript" src="../_static/doctools.js"></script>
27
27
  <script type="text/javascript" src="../_static/translations.js"></script>
28
28
  <link rel="shortcut icon" href="../_static/favicon.ico"/>
29
- <link rel="top" title="Groonga v5.0.0ドキュメント" href="../index.html" />
29
+ <link rel="top" title="Groonga v5.0.1-42-g4d10df1ドキュメント" href="../index.html" />
30
30
  <link rel="up" title="7. リファレンスマニュアル" href="../reference.html" />
31
31
  <link rel="next" title="7.9. トークンフィルター" href="token_filters.html" />
32
32
  <link rel="prev" title="7.7. ノーマライザー" href="normalizers.html" />
33
33
  </head>
34
- <body role="document">
34
+ <body>
35
35
  <div class="header">
36
36
  <h1 class="title">
37
37
  <a id="top-link" href="../index.html">
@@ -49,7 +49,7 @@
49
49
  </div>
50
50
 
51
51
 
52
- <div class="related" role="navigation" aria-label="related navigation">
52
+ <div class="related">
53
53
  <h3>ナビゲーション</h3>
54
54
  <ul>
55
55
  <li class="right" style="margin-right: 10px">
@@ -61,7 +61,7 @@
61
61
  <li class="right" >
62
62
  <a href="normalizers.html" title="7.7. ノーマライザー"
63
63
  accesskey="P">前へ</a> |</li>
64
- <li><a href="../index.html">Groonga v5.0.0ドキュメント</a> &raquo;</li>
64
+ <li><a href="../index.html">Groonga v5.0.1-42-g4d10df1ドキュメント</a> &raquo;</li>
65
65
  <li><a href="../reference.html" accesskey="U">7. リファレンスマニュアル</a> &raquo;</li>
66
66
  </ul>
67
67
  </div>
@@ -69,48 +69,1314 @@
69
69
  <div class="document">
70
70
  <div class="documentwrapper">
71
71
  <div class="bodywrapper">
72
- <div class="body" role="main">
72
+ <div class="body">
73
73
 
74
74
  <div class="section" id="tokenizers">
75
- <h1>7.8. Tokenizers<a class="headerlink" href="#tokenizers" title="このヘッドラインへのパーマリンク">¶</a></h1>
76
- <p>TODO: Write me.</p>
75
+ <h1>7.8. トークナイザー<a class="headerlink" href="#tokenizers" title="このヘッドラインへのパーマリンク">¶</a></h1>
76
+ <div class="section" id="summary">
77
+ <h2>7.8.1. 概要<a class="headerlink" href="#summary" title="このヘッドラインへのパーマリンク">¶</a></h2>
78
+ <p>Groongaにはテキストをトークナイズするトークナイザーモージュールがあります。次のケースのときにトークナイザーを使います。</p>
79
+ <blockquote>
80
+ <div><ul>
81
+ <li><p class="first">テキストのインデックスを構築するとき</p>
82
+ <div class="figure align-center">
83
+ <a class="reference internal image-reference" href="../_images/used-when-indexing.png"><img alt="../_images/used-when-indexing.png" src="../_images/used-when-indexing.png" style="width: 80%;" /></a>
84
+ <p class="caption">テキストのインデックスを構築するときにトークナイザーを使います。</p>
85
+ </div>
86
+ </li>
87
+ <li><p class="first">クエリーで検索するとき</p>
88
+ <div class="figure align-center">
89
+ <a class="reference internal image-reference" href="../_images/used-when-searching.png"><img alt="../_images/used-when-searching.png" src="../_images/used-when-searching.png" style="width: 80%;" /></a>
90
+ <p class="caption">クエリーで検索するときにトークナイザーを使います。</p>
91
+ </div>
92
+ </li>
93
+ </ul>
94
+ </div></blockquote>
95
+ <p>全文検索ではトークナイザーは重要なモジュールです。トークナイザーを変えることで <a class="reference external" href="http://ja.wikipedia.org/wiki/%E6%83%85%E5%A0%B1%E6%A4%9C%E7%B4%A2">適合率と再現率</a> のトレードオフを調整することができます。</p>
96
+ <p>一般的に <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> が適切なトークナイザーです。トークナイザーについてよく知らない場合は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> を使うことをオススメします。</p>
97
+ <p><a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> コマンドと <a class="reference internal" href="commands/table_tokenize.html"><em>table_tokenize</em></a> コマンドを使うことでトークナイザーを試すことができます。 <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> コマンドを使って <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> トークナイザーを試す例を以下に示します。</p>
98
+ <p>実行例:</p>
99
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot;
100
+ # [
101
+ # [
102
+ # 0,
103
+ # 1337566253.89858,
104
+ # 0.000355720520019531
105
+ # ],
106
+ # [
107
+ # {
108
+ # &quot;position&quot;: 0,
109
+ # &quot;value&quot;: &quot;He&quot;
110
+ # },
111
+ # {
112
+ # &quot;position&quot;: 1,
113
+ # &quot;value&quot;: &quot;el&quot;
114
+ # },
115
+ # {
116
+ # &quot;position&quot;: 2,
117
+ # &quot;value&quot;: &quot;ll&quot;
118
+ # },
119
+ # {
120
+ # &quot;position&quot;: 3,
121
+ # &quot;value&quot;: &quot;lo&quot;
122
+ # },
123
+ # {
124
+ # &quot;position&quot;: 4,
125
+ # &quot;value&quot;: &quot;o &quot;
126
+ # },
127
+ # {
128
+ # &quot;position&quot;: 5,
129
+ # &quot;value&quot;: &quot; W&quot;
130
+ # },
131
+ # {
132
+ # &quot;position&quot;: 6,
133
+ # &quot;value&quot;: &quot;Wo&quot;
134
+ # },
135
+ # {
136
+ # &quot;position&quot;: 7,
137
+ # &quot;value&quot;: &quot;or&quot;
138
+ # },
139
+ # {
140
+ # &quot;position&quot;: 8,
141
+ # &quot;value&quot;: &quot;rl&quot;
142
+ # },
143
+ # {
144
+ # &quot;position&quot;: 9,
145
+ # &quot;value&quot;: &quot;ld&quot;
146
+ # },
147
+ # {
148
+ # &quot;position&quot;: 10,
149
+ # &quot;value&quot;: &quot;d&quot;
150
+ # }
151
+ # ]
152
+ # ]
153
+ </pre></div>
154
+ </div>
155
+ </div>
156
+ <div class="section" id="what-is-tokenize">
157
+ <h2>7.8.2. 「トークナイズ」とはなにか<a class="headerlink" href="#what-is-tokenize" title="このヘッドラインへのパーマリンク">¶</a></h2>
158
+ <p>「トークナイズ」はテキストから0個以上のトークンを抽出する処理です。「トークナイズ」する方法はいくつかあります。</p>
159
+ <p>例えば、バイグラムというトークナイズ方法では <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> は次のトークンにトークナイズされます。</p>
160
+ <blockquote>
161
+ <div><ul class="simple">
162
+ <li><tt class="docutils literal"><span class="pre">He</span></tt></li>
163
+ <li><tt class="docutils literal"><span class="pre">el</span></tt></li>
164
+ <li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
165
+ <li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
166
+ <li><p class="first"><tt class="docutils literal"><span class="pre">o_</span></tt> ( <tt class="docutils literal"><span class="pre">_</span></tt> は空白文字という意味)</p>
167
+ </li>
168
+ <li><p class="first"><tt class="docutils literal"><span class="pre">_W</span></tt> ( <tt class="docutils literal"><span class="pre">_</span></tt> は空白文字という意味)</p>
169
+ </li>
170
+ <li><tt class="docutils literal"><span class="pre">Wo</span></tt></li>
171
+ <li><tt class="docutils literal"><span class="pre">or</span></tt></li>
172
+ <li><tt class="docutils literal"><span class="pre">rl</span></tt></li>
173
+ <li><tt class="docutils literal"><span class="pre">ld</span></tt></li>
174
+ </ul>
175
+ </div></blockquote>
176
+ <p>上記の例では、 <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> から10個のトークンを抽出しました。</p>
177
+ <p>例えば、空白区切りのトークナイズ方法では <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> は次のトークンにトークナイズされます。</p>
178
+ <blockquote>
179
+ <div><ul class="simple">
180
+ <li><tt class="docutils literal"><span class="pre">Hello</span></tt></li>
181
+ <li><tt class="docutils literal"><span class="pre">World</span></tt></li>
182
+ </ul>
183
+ </div></blockquote>
184
+ <p>上記の例では、<tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> から2つのトークンを抽出しました。</p>
185
+ <p>トークンは検索時のキーとして使われます。使用したトークナイズ方法で抽出したトークンでしかインデックス化されたドキュメントを探すことはできません。例えば、トークナイズ方法としてバイグラムを使った場合は <tt class="docutils literal"><span class="pre">ll</span></tt> で <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> を見つけることができます。しかし、空白区切りのトークナイズ方法を使ったときは <tt class="docutils literal"><span class="pre">ll</span></tt> で <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> を見つけることはできません。なぜなら、空白区切りのトークナイズ方法は <tt class="docutils literal"><span class="pre">ll</span></tt> というトークンを抽出していないからです。空白区切りのトークナイズ方法は <tt class="docutils literal"><span class="pre">Hello</span></tt> というトークンと <tt class="docutils literal"><span class="pre">World</span></tt> というトークンしか抽出していません。</p>
186
+ <p>一般的に、小さいトークンを生成するトークナイズ方法は再現率が高い代わりに適合率が低くなりがちです。一方、大きいトークンを生成するトークナイズ方法は適合率が高い代わりに再現率が低くなりがちです。</p>
187
+ <p>例えば、バイグラムというトークナイズ方法では <tt class="docutils literal"><span class="pre">or</span></tt> で <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> と <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> を検索できます。しかし、「論理和」を検索したい人にとっては <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> は不要な結果です。これは、適合率が下がったということです。しかし、再現率は上がっています。</p>
188
+ <p>空白区切りのトークナイズ方法を使った場合は <tt class="docutils literal"><span class="pre">or</span></tt> で <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> だけが見つかります。なぜなら、空白区切りのトークナイズ方法では <tt class="docutils literal"><span class="pre">World</span></tt> は <tt class="docutils literal"><span class="pre">World</span></tt> という1つのトークンだけにトークナイズされるからです。これは、「論理和」を探したい人にとっては適合率が挙がっています。しかし、 <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> も <tt class="docutils literal"><span class="pre">or</span></tt> を含んでいるのに見つかっていないので再現率が下がっています。</p>
189
+ </div>
190
+ <div class="section" id="built-in-tokenizsers">
191
+ <h2>7.8.3. 組み込みトークナイザー<a class="headerlink" href="#built-in-tokenizsers" title="このヘッドラインへのパーマリンク">¶</a></h2>
77
192
  <p>以下は組み込みのトークナイザーのリストです。</p>
78
- <ul class="simple">
79
- <li>TokenBigram</li>
80
- <li>TokenBigramSplitSymbol</li>
81
- <li>TokenBigramSplitSymbolAlpha</li>
82
- <li>TokenBigramSplitSymbolAlphaDigit</li>
83
- <li>TokenBigramIgnoreBlank</li>
84
- <li>TokenBigramIgnoreBlankSplitSymbol</li>
85
- <li>TokenBigramIgnoreBlankSplitAlpha</li>
86
- <li>TokenBigramIgnoreBlankSplitAlphaDigit</li>
87
- <li>TokenDelimit</li>
88
- <li>TokenDelimitNull</li>
89
- <li>TokenTrigram</li>
90
- <li>TokenUnigram</li>
193
+ <blockquote>
194
+ <div><ul class="simple">
195
+ <li><tt class="docutils literal"><span class="pre">TokenBigram</span></tt></li>
196
+ <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></li>
197
+ <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></li>
198
+ <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></li>
199
+ <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></li>
200
+ <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></li>
201
+ <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlpha</span></tt></li>
202
+ <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlphaDigit</span></tt></li>
203
+ <li><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></li>
204
+ <li><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></li>
205
+ <li><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></li>
206
+ <li><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></li>
207
+ <li><tt class="docutils literal"><span class="pre">TokenMecab</span></tt></li>
208
+ <li><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></li>
91
209
  </ul>
210
+ </div></blockquote>
211
+ <div class="section" id="tokenbigram">
212
+ <span id="token-bigram"></span><h3>7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt><a class="headerlink" href="#tokenbigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
213
+ <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はバイグラムベースのトークナイザーです。多くのケースでは、このトークナイザーを使うことをオススメします。</p>
214
+ <p>バイグラムというトークナイズ方法は、隣り合った2つの文字を1つのトークンとしてテキストをトークナイズします。例えば、 <tt class="docutils literal"><span class="pre">Hello</span></tt> は次のトークンにトークナイズします。</p>
215
+ <blockquote>
216
+ <div><ul class="simple">
217
+ <li><tt class="docutils literal"><span class="pre">He</span></tt></li>
218
+ <li><tt class="docutils literal"><span class="pre">el</span></tt></li>
219
+ <li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
220
+ <li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
221
+ </ul>
222
+ </div></blockquote>
223
+ <p>バイグラムというトークナイズ方法は再現性に優れています。なぜなら、2文字以上の文字のクエリーに対してはすべてのテキストを見つけることができるからです。</p>
224
+ <p>一般的に、1文字のクエリーではすべてのテキストを見つけることはできません。なぜなら、1つの文字のトークンが存在しないからです。しかし、Groongaでは1文字のクエリーでもすべてのテキストを見つけることができます。なぜなら、Groongaは前方一致検索によりクエリーで指定した文字で始まるトークンをすべて見つけることができるからです。例えば、Groongaは <tt class="docutils literal"><span class="pre">l</span></tt> というクエリーから <tt class="docutils literal"><span class="pre">ll</span></tt> というトークンと <tt class="docutils literal"><span class="pre">lo</span></tt> というトークンを見つけることができます。</p>
225
+ <p>バイグラムというトークナイズ方法は適合率はそれほど優れていません。なぜなら、単語の一部にクエリーが含まれていればすべてのテキストが見つかってしまうからです。例えば、 <tt class="docutils literal"><span class="pre">or</span></tt> で <tt class="docutils literal"><span class="pre">world</span></tt> が見つかります。これは非ASCIIを使う言語よりASCIIのみを使う言語で顕著です。以降の説明で触れる通り、 <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はこの問題を解決しています。</p>
226
+ <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> の挙動は <a class="reference internal" href="normalizers.html"><em>ノーマライザー</em></a> を使うかどうかで変わります。</p>
227
+ <p>ノーマライザーを使っていない場合は <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> は純粋なバイグラム(最後のトークンをのぞいてすべてのトークンを2文字にする)のトークナイズ方法を使います。</p>
228
+ <p>実行例:</p>
229
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot;
230
+ # [
231
+ # [
232
+ # 0,
233
+ # 1337566253.89858,
234
+ # 0.000355720520019531
235
+ # ],
236
+ # [
237
+ # {
238
+ # &quot;position&quot;: 0,
239
+ # &quot;value&quot;: &quot;He&quot;
240
+ # },
241
+ # {
242
+ # &quot;position&quot;: 1,
243
+ # &quot;value&quot;: &quot;el&quot;
244
+ # },
245
+ # {
246
+ # &quot;position&quot;: 2,
247
+ # &quot;value&quot;: &quot;ll&quot;
248
+ # },
249
+ # {
250
+ # &quot;position&quot;: 3,
251
+ # &quot;value&quot;: &quot;lo&quot;
252
+ # },
253
+ # {
254
+ # &quot;position&quot;: 4,
255
+ # &quot;value&quot;: &quot;o &quot;
256
+ # },
257
+ # {
258
+ # &quot;position&quot;: 5,
259
+ # &quot;value&quot;: &quot; W&quot;
260
+ # },
261
+ # {
262
+ # &quot;position&quot;: 6,
263
+ # &quot;value&quot;: &quot;Wo&quot;
264
+ # },
265
+ # {
266
+ # &quot;position&quot;: 7,
267
+ # &quot;value&quot;: &quot;or&quot;
268
+ # },
269
+ # {
270
+ # &quot;position&quot;: 8,
271
+ # &quot;value&quot;: &quot;rl&quot;
272
+ # },
273
+ # {
274
+ # &quot;position&quot;: 9,
275
+ # &quot;value&quot;: &quot;ld&quot;
276
+ # },
277
+ # {
278
+ # &quot;position&quot;: 10,
279
+ # &quot;value&quot;: &quot;d&quot;
280
+ # }
281
+ # ]
282
+ # ]
283
+ </pre></div>
284
+ </div>
285
+ <p>ノーマライザーを使っている場合は <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はASCIIの文字には空白区切りのようなトークナイズ方法を使います。非ASCII文字にはバイグラムのトークナイズ方法を使います。</p>
286
+ <p>もしかしたら、複数の方法が混ざったこの挙動はわかりにくいかもしれません。しかし、英語のテキスト(ASCII文字列のみ)や日本語テキスト(ASCII文字列と非ASCII文字列が混ざっている)ような多くのユースケースでは合理的な方法です。</p>
287
+ <p>ASCII文字しか使わない多くの言語は単語の区切りに空白文字を使っています。このようなケースに空白区切りのトークナイズ方法は適切です。</p>
288
+ <p>非ASCII文字を使う言語では単語の区切りに空白文字を使いません。このケースにはバイグラムなトークナイズ方法は適切です。</p>
289
+ <p>複数の言語が混ざっている場合は、複数の方法を組み合わせたトークナイズ方法が適切です。</p>
290
+ <p>ASCII文字にバイグラムなトークナイズ方法を使いたい場合は <a class="reference internal" href="#token-bigram-split-symbol-alpha"><em>TokenBigramSplitSymbolAlpha</em></a> のような <tt class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></tt> というトークナイザーを参照してください。</p>
291
+ <p>例を使いながら <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> の挙動を確認しましょう。</p>
292
+ <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はASCII文字には1つ以上の空白文字をトークンの区切りとして使います。</p>
293
+ <p>実行例:</p>
294
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot; NormalizerAuto
295
+ # [
296
+ # [
297
+ # 0,
298
+ # 1337566253.89858,
299
+ # 0.000355720520019531
300
+ # ],
301
+ # [
302
+ # {
303
+ # &quot;position&quot;: 0,
304
+ # &quot;value&quot;: &quot;hello&quot;
305
+ # },
306
+ # {
307
+ # &quot;position&quot;: 1,
308
+ # &quot;value&quot;: &quot;world&quot;
309
+ # }
310
+ # ]
311
+ # ]
312
+ </pre></div>
313
+ </div>
314
+ <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はASCII文字には文字の種類が変わったところをトークンの区切りとします。文字の種類は次のどれかです。</p>
315
+ <blockquote>
316
+ <div><ul class="simple">
317
+ <li><p class="first">アルファベット</p>
318
+ </li>
319
+ <li><p class="first">数字</p>
320
+ </li>
321
+ <li><p class="first">記号(たとえば <tt class="docutils literal"><span class="pre">(</span></tt> 、 <tt class="docutils literal"><span class="pre">)</span></tt> 、 <tt class="docutils literal"><span class="pre">!</span></tt> など)</p>
322
+ </li>
323
+ <li><p class="first">ひらがな</p>
324
+ </li>
325
+ <li><p class="first">カタカナ</p>
326
+ </li>
327
+ <li><p class="first">漢字</p>
328
+ </li>
329
+ <li><p class="first">その他</p>
330
+ </li>
331
+ </ul>
332
+ </div></blockquote>
333
+ <p>次の例は2つのトークン区切りを示しています。</p>
334
+ <blockquote>
335
+ <div><ul class="simple">
336
+ <li><p class="first"><tt class="docutils literal"><span class="pre">100</span></tt> (数字)と <tt class="docutils literal"><span class="pre">cents</span></tt> (アルファベット)の間のところ</p>
337
+ </li>
338
+ <li><p class="first"><tt class="docutils literal"><span class="pre">cents</span></tt> (アルファベット)と <tt class="docutils literal"><span class="pre">!!!</span></tt> (記号)の間のところ</p>
339
+ </li>
340
+ </ul>
341
+ </div></blockquote>
342
+ <p>実行例:</p>
343
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;100cents!!!&quot; NormalizerAuto
344
+ # [
345
+ # [
346
+ # 0,
347
+ # 1337566253.89858,
348
+ # 0.000355720520019531
349
+ # ],
350
+ # [
351
+ # {
352
+ # &quot;position&quot;: 0,
353
+ # &quot;value&quot;: &quot;100&quot;
354
+ # },
355
+ # {
356
+ # &quot;position&quot;: 1,
357
+ # &quot;value&quot;: &quot;cents&quot;
358
+ # },
359
+ # {
360
+ # &quot;position&quot;: 2,
361
+ # &quot;value&quot;: &quot;!!!&quot;
362
+ # }
363
+ # ]
364
+ # ]
365
+ </pre></div>
366
+ </div>
367
+ <p>以下は <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> が非ASCII文字にはトークナイズ方法としてバイグラムを使う例です。</p>
368
+ <p>実行例:</p>
369
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日本語の勉強&quot; NormalizerAuto
370
+ # [
371
+ # [
372
+ # 0,
373
+ # 1337566253.89858,
374
+ # 0.000355720520019531
375
+ # ],
376
+ # [
377
+ # {
378
+ # &quot;position&quot;: 0,
379
+ # &quot;value&quot;: &quot;日本&quot;
380
+ # },
381
+ # {
382
+ # &quot;position&quot;: 1,
383
+ # &quot;value&quot;: &quot;本語&quot;
384
+ # },
385
+ # {
386
+ # &quot;position&quot;: 2,
387
+ # &quot;value&quot;: &quot;語の&quot;
388
+ # },
389
+ # {
390
+ # &quot;position&quot;: 3,
391
+ # &quot;value&quot;: &quot;の勉&quot;
392
+ # },
393
+ # {
394
+ # &quot;position&quot;: 4,
395
+ # &quot;value&quot;: &quot;勉強&quot;
396
+ # },
397
+ # {
398
+ # &quot;position&quot;: 5,
399
+ # &quot;value&quot;: &quot;強&quot;
400
+ # }
401
+ # ]
402
+ # ]
403
+ </pre></div>
404
+ </div>
405
+ </div>
406
+ <div class="section" id="tokenbigramsplitsymbol">
407
+ <span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramsplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
408
+ <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは記号の扱いです。 <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt> は記号のトークナイズ方法にバイグラムを使います。</p>
409
+ <p>実行例:</p>
410
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbol &quot;100cents!!!&quot; NormalizerAuto
411
+ # [
412
+ # [
413
+ # 0,
414
+ # 1337566253.89858,
415
+ # 0.000355720520019531
416
+ # ],
417
+ # [
418
+ # {
419
+ # &quot;position&quot;: 0,
420
+ # &quot;value&quot;: &quot;100&quot;
421
+ # },
422
+ # {
423
+ # &quot;position&quot;: 1,
424
+ # &quot;value&quot;: &quot;cents&quot;
425
+ # },
426
+ # {
427
+ # &quot;position&quot;: 2,
428
+ # &quot;value&quot;: &quot;!!&quot;
429
+ # },
430
+ # {
431
+ # &quot;position&quot;: 3,
432
+ # &quot;value&quot;: &quot;!!&quot;
433
+ # },
434
+ # {
435
+ # &quot;position&quot;: 4,
436
+ # &quot;value&quot;: &quot;!&quot;
437
+ # }
438
+ # ]
439
+ # ]
440
+ </pre></div>
441
+ </div>
442
+ </div>
443
+ <div class="section" id="tokenbigramsplitsymbolalpha">
444
+ <span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
445
+ <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは記号とアルファベットの扱いです。 <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> は記号とアルファベットのトークナイズ方法にバイグラムを使います。</p>
446
+ <p>実行例:</p>
447
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlpha &quot;100cents!!!&quot; NormalizerAuto
448
+ # [
449
+ # [
450
+ # 0,
451
+ # 1337566253.89858,
452
+ # 0.000355720520019531
453
+ # ],
454
+ # [
455
+ # {
456
+ # &quot;position&quot;: 0,
457
+ # &quot;value&quot;: &quot;100&quot;
458
+ # },
459
+ # {
460
+ # &quot;position&quot;: 1,
461
+ # &quot;value&quot;: &quot;ce&quot;
462
+ # },
463
+ # {
464
+ # &quot;position&quot;: 2,
465
+ # &quot;value&quot;: &quot;en&quot;
466
+ # },
467
+ # {
468
+ # &quot;position&quot;: 3,
469
+ # &quot;value&quot;: &quot;nt&quot;
470
+ # },
471
+ # {
472
+ # &quot;position&quot;: 4,
473
+ # &quot;value&quot;: &quot;ts&quot;
474
+ # },
475
+ # {
476
+ # &quot;position&quot;: 5,
477
+ # &quot;value&quot;: &quot;s!&quot;
478
+ # },
479
+ # {
480
+ # &quot;position&quot;: 6,
481
+ # &quot;value&quot;: &quot;!!&quot;
482
+ # },
483
+ # {
484
+ # &quot;position&quot;: 7,
485
+ # &quot;value&quot;: &quot;!!&quot;
486
+ # },
487
+ # {
488
+ # &quot;position&quot;: 8,
489
+ # &quot;value&quot;: &quot;!&quot;
490
+ # }
491
+ # ]
492
+ # ]
493
+ </pre></div>
494
+ </div>
495
+ </div>
496
+ <div class="section" id="tokenbigramsplitsymbolalphadigit">
497
+ <span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
498
+ <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは記号とアルファベットと数字の扱いです。 <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> は記号とアルファベット数字のトークナイズ方法にバイグラムを使います。つまり、すべての文字をバイグラムでトークナイズします。</p>
499
+ <p>実行例:</p>
500
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlphaDigit &quot;100cents!!!&quot; NormalizerAuto
501
+ # [
502
+ # [
503
+ # 0,
504
+ # 1337566253.89858,
505
+ # 0.000355720520019531
506
+ # ],
507
+ # [
508
+ # {
509
+ # &quot;position&quot;: 0,
510
+ # &quot;value&quot;: &quot;10&quot;
511
+ # },
512
+ # {
513
+ # &quot;position&quot;: 1,
514
+ # &quot;value&quot;: &quot;00&quot;
515
+ # },
516
+ # {
517
+ # &quot;position&quot;: 2,
518
+ # &quot;value&quot;: &quot;0c&quot;
519
+ # },
520
+ # {
521
+ # &quot;position&quot;: 3,
522
+ # &quot;value&quot;: &quot;ce&quot;
523
+ # },
524
+ # {
525
+ # &quot;position&quot;: 4,
526
+ # &quot;value&quot;: &quot;en&quot;
527
+ # },
528
+ # {
529
+ # &quot;position&quot;: 5,
530
+ # &quot;value&quot;: &quot;nt&quot;
531
+ # },
532
+ # {
533
+ # &quot;position&quot;: 6,
534
+ # &quot;value&quot;: &quot;ts&quot;
535
+ # },
536
+ # {
537
+ # &quot;position&quot;: 7,
538
+ # &quot;value&quot;: &quot;s!&quot;
539
+ # },
540
+ # {
541
+ # &quot;position&quot;: 8,
542
+ # &quot;value&quot;: &quot;!!&quot;
543
+ # },
544
+ # {
545
+ # &quot;position&quot;: 9,
546
+ # &quot;value&quot;: &quot;!!&quot;
547
+ # },
548
+ # {
549
+ # &quot;position&quot;: 10,
550
+ # &quot;value&quot;: &quot;!&quot;
551
+ # }
552
+ # ]
553
+ # ]
554
+ </pre></div>
555
+ </div>
556
+ </div>
557
+ <div class="section" id="tokenbigramignoreblank">
558
+ <span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt><a class="headerlink" href="#tokenbigramignoreblank" title="このヘッドラインへのパーマリンク">¶</a></h3>
559
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは空白文字の扱いです。 <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> は連続する記号と非ASCII文字の間にある空白文字を無視します。</p>
560
+ <p><tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
561
+ <p><a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> での実行結果です。</p>
562
+ <p>実行例:</p>
563
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
564
+ # [
565
+ # [
566
+ # 0,
567
+ # 1337566253.89858,
568
+ # 0.000355720520019531
569
+ # ],
570
+ # [
571
+ # {
572
+ # &quot;position&quot;: 0,
573
+ # &quot;value&quot;: &quot;日&quot;
574
+ # },
575
+ # {
576
+ # &quot;position&quot;: 1,
577
+ # &quot;value&quot;: &quot;本&quot;
578
+ # },
579
+ # {
580
+ # &quot;position&quot;: 2,
581
+ # &quot;value&quot;: &quot;語&quot;
582
+ # },
583
+ # {
584
+ # &quot;position&quot;: 3,
585
+ # &quot;value&quot;: &quot;!&quot;
586
+ # },
587
+ # {
588
+ # &quot;position&quot;: 4,
589
+ # &quot;value&quot;: &quot;!&quot;
590
+ # },
591
+ # {
592
+ # &quot;position&quot;: 5,
593
+ # &quot;value&quot;: &quot;!&quot;
594
+ # }
595
+ # ]
596
+ # ]
597
+ </pre></div>
598
+ </div>
599
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> での実行結果です。</p>
600
+ <p>実行例:</p>
601
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlank &quot;日 本 語 ! ! !&quot; NormalizerAuto
602
+ # [
603
+ # [
604
+ # 0,
605
+ # 1337566253.89858,
606
+ # 0.000355720520019531
607
+ # ],
608
+ # [
609
+ # {
610
+ # &quot;position&quot;: 0,
611
+ # &quot;value&quot;: &quot;日本&quot;
612
+ # },
613
+ # {
614
+ # &quot;position&quot;: 1,
615
+ # &quot;value&quot;: &quot;本語&quot;
616
+ # },
617
+ # {
618
+ # &quot;position&quot;: 2,
619
+ # &quot;value&quot;: &quot;語&quot;
620
+ # },
621
+ # {
622
+ # &quot;position&quot;: 3,
623
+ # &quot;value&quot;: &quot;!!!&quot;
624
+ # }
625
+ # ]
626
+ # ]
627
+ </pre></div>
628
+ </div>
629
+ </div>
630
+ <div class="section" id="tokenbigramignoreblanksplitsymbol">
631
+ <span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
632
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは次の通りです。</p>
633
+ <blockquote>
634
+ <div><ul class="simple">
635
+ <li><p class="first">空白文字の扱い</p>
636
+ </li>
637
+ <li><p class="first">記号の扱い</p>
638
+ </li>
639
+ </ul>
640
+ </div></blockquote>
641
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
642
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> は記号をバイグラムでトークナイズします。</p>
643
+ <p><tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
644
+ <p><a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> での実行結果です。</p>
645
+ <p>実行例:</p>
646
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
647
+ # [
648
+ # [
649
+ # 0,
650
+ # 1337566253.89858,
651
+ # 0.000355720520019531
652
+ # ],
653
+ # [
654
+ # {
655
+ # &quot;position&quot;: 0,
656
+ # &quot;value&quot;: &quot;日&quot;
657
+ # },
658
+ # {
659
+ # &quot;position&quot;: 1,
660
+ # &quot;value&quot;: &quot;本&quot;
661
+ # },
662
+ # {
663
+ # &quot;position&quot;: 2,
664
+ # &quot;value&quot;: &quot;語&quot;
665
+ # },
666
+ # {
667
+ # &quot;position&quot;: 3,
668
+ # &quot;value&quot;: &quot;!&quot;
669
+ # },
670
+ # {
671
+ # &quot;position&quot;: 4,
672
+ # &quot;value&quot;: &quot;!&quot;
673
+ # },
674
+ # {
675
+ # &quot;position&quot;: 5,
676
+ # &quot;value&quot;: &quot;!&quot;
677
+ # }
678
+ # ]
679
+ # ]
680
+ </pre></div>
681
+ </div>
682
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> の実行結果です。</p>
683
+ <p>実行例:</p>
684
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbol &quot;日 本 語 ! ! !&quot; NormalizerAuto
685
+ # [
686
+ # [
687
+ # 0,
688
+ # 1337566253.89858,
689
+ # 0.000355720520019531
690
+ # ],
691
+ # [
692
+ # {
693
+ # &quot;position&quot;: 0,
694
+ # &quot;value&quot;: &quot;日本&quot;
695
+ # },
696
+ # {
697
+ # &quot;position&quot;: 1,
698
+ # &quot;value&quot;: &quot;本語&quot;
699
+ # },
700
+ # {
701
+ # &quot;position&quot;: 2,
702
+ # &quot;value&quot;: &quot;語!&quot;
703
+ # },
704
+ # {
705
+ # &quot;position&quot;: 3,
706
+ # &quot;value&quot;: &quot;!!&quot;
707
+ # },
708
+ # {
709
+ # &quot;position&quot;: 4,
710
+ # &quot;value&quot;: &quot;!!&quot;
711
+ # },
712
+ # {
713
+ # &quot;position&quot;: 5,
714
+ # &quot;value&quot;: &quot;!&quot;
715
+ # }
716
+ # ]
717
+ # ]
718
+ </pre></div>
719
+ </div>
720
+ </div>
721
+ <div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
722
+ <span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
723
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは次の通りです。</p>
724
+ <blockquote>
725
+ <div><ul class="simple">
726
+ <li><p class="first">空白文字の扱い</p>
727
+ </li>
728
+ <li><p class="first">記号とアルファベットの扱い</p>
729
+ </li>
730
+ </ul>
731
+ </div></blockquote>
732
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
733
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> は記号とアルファベットをバイグラムでトークナイズします。</p>
734
+ <p><tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> というテキストを使うと違いがわかります。なぜなら空白文字入りの記号と非ASCII文字だけでなく、アルファベットも含んでいるからです。</p>
735
+ <p><a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> での実行結果です。</p>
736
+ <p>実行例:</p>
737
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
738
+ # [
739
+ # [
740
+ # 0,
741
+ # 1337566253.89858,
742
+ # 0.000355720520019531
743
+ # ],
744
+ # [
745
+ # {
746
+ # &quot;position&quot;: 0,
747
+ # &quot;value&quot;: &quot;hello&quot;
748
+ # },
749
+ # {
750
+ # &quot;position&quot;: 1,
751
+ # &quot;value&quot;: &quot;日&quot;
752
+ # },
753
+ # {
754
+ # &quot;position&quot;: 2,
755
+ # &quot;value&quot;: &quot;本&quot;
756
+ # },
757
+ # {
758
+ # &quot;position&quot;: 3,
759
+ # &quot;value&quot;: &quot;語&quot;
760
+ # },
761
+ # {
762
+ # &quot;position&quot;: 4,
763
+ # &quot;value&quot;: &quot;!&quot;
764
+ # },
765
+ # {
766
+ # &quot;position&quot;: 5,
767
+ # &quot;value&quot;: &quot;!&quot;
768
+ # },
769
+ # {
770
+ # &quot;position&quot;: 6,
771
+ # &quot;value&quot;: &quot;!&quot;
772
+ # }
773
+ # ]
774
+ # ]
775
+ </pre></div>
776
+ </div>
777
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> の実行結果です。</p>
778
+ <p>実行例:</p>
779
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
780
+ # [
781
+ # [
782
+ # 0,
783
+ # 1337566253.89858,
784
+ # 0.000355720520019531
785
+ # ],
786
+ # [
787
+ # {
788
+ # &quot;position&quot;: 0,
789
+ # &quot;value&quot;: &quot;he&quot;
790
+ # },
791
+ # {
792
+ # &quot;position&quot;: 1,
793
+ # &quot;value&quot;: &quot;el&quot;
794
+ # },
795
+ # {
796
+ # &quot;position&quot;: 2,
797
+ # &quot;value&quot;: &quot;ll&quot;
798
+ # },
799
+ # {
800
+ # &quot;position&quot;: 3,
801
+ # &quot;value&quot;: &quot;lo&quot;
802
+ # },
803
+ # {
804
+ # &quot;position&quot;: 4,
805
+ # &quot;value&quot;: &quot;o日&quot;
806
+ # },
807
+ # {
808
+ # &quot;position&quot;: 5,
809
+ # &quot;value&quot;: &quot;日本&quot;
810
+ # },
811
+ # {
812
+ # &quot;position&quot;: 6,
813
+ # &quot;value&quot;: &quot;本語&quot;
814
+ # },
815
+ # {
816
+ # &quot;position&quot;: 7,
817
+ # &quot;value&quot;: &quot;語!&quot;
818
+ # },
819
+ # {
820
+ # &quot;position&quot;: 8,
821
+ # &quot;value&quot;: &quot;!!&quot;
822
+ # },
823
+ # {
824
+ # &quot;position&quot;: 9,
825
+ # &quot;value&quot;: &quot;!!&quot;
826
+ # },
827
+ # {
828
+ # &quot;position&quot;: 10,
829
+ # &quot;value&quot;: &quot;!&quot;
830
+ # }
831
+ # ]
832
+ # ]
833
+ </pre></div>
834
+ </div>
835
+ </div>
836
+ <div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
837
+ <span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
838
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは次の通りです。</p>
839
+ <blockquote>
840
+ <div><ul class="simple">
841
+ <li><p class="first">空白文字の扱い</p>
842
+ </li>
843
+ <li><p class="first">記号とアルファベットと数字の扱い</p>
844
+ </li>
845
+ </ul>
846
+ </div></blockquote>
847
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
848
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> は記号、アルファベット、数字をバイグラムでトークナイズします。つまり、すべての文字をバイグラムでトークナイズします。</p>
849
+ <p><tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></tt> というテキストを使うと違いがわかります。なぜなら、このテキストは空白文字入りの記号と非ASCII文字だけでなく、アルファベットと数字も含んでいるからです。</p>
850
+ <p><a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> での実行結果です。</p>
851
+ <p>実行例:</p>
852
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
853
+ # [
854
+ # [
855
+ # 0,
856
+ # 1337566253.89858,
857
+ # 0.000355720520019531
858
+ # ],
859
+ # [
860
+ # {
861
+ # &quot;position&quot;: 0,
862
+ # &quot;value&quot;: &quot;hello&quot;
863
+ # },
864
+ # {
865
+ # &quot;position&quot;: 1,
866
+ # &quot;value&quot;: &quot;日&quot;
867
+ # },
868
+ # {
869
+ # &quot;position&quot;: 2,
870
+ # &quot;value&quot;: &quot;本&quot;
871
+ # },
872
+ # {
873
+ # &quot;position&quot;: 3,
874
+ # &quot;value&quot;: &quot;語&quot;
875
+ # },
876
+ # {
877
+ # &quot;position&quot;: 4,
878
+ # &quot;value&quot;: &quot;!&quot;
879
+ # },
880
+ # {
881
+ # &quot;position&quot;: 5,
882
+ # &quot;value&quot;: &quot;!&quot;
883
+ # },
884
+ # {
885
+ # &quot;position&quot;: 6,
886
+ # &quot;value&quot;: &quot;!&quot;
887
+ # },
888
+ # {
889
+ # &quot;position&quot;: 7,
890
+ # &quot;value&quot;: &quot;777&quot;
891
+ # }
892
+ # ]
893
+ # ]
894
+ </pre></div>
895
+ </div>
896
+ <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> の実行結果です。</p>
897
+ <p>実行例:</p>
898
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
899
+ # [
900
+ # [
901
+ # 0,
902
+ # 1337566253.89858,
903
+ # 0.000355720520019531
904
+ # ],
905
+ # [
906
+ # {
907
+ # &quot;position&quot;: 0,
908
+ # &quot;value&quot;: &quot;he&quot;
909
+ # },
910
+ # {
911
+ # &quot;position&quot;: 1,
912
+ # &quot;value&quot;: &quot;el&quot;
913
+ # },
914
+ # {
915
+ # &quot;position&quot;: 2,
916
+ # &quot;value&quot;: &quot;ll&quot;
917
+ # },
918
+ # {
919
+ # &quot;position&quot;: 3,
920
+ # &quot;value&quot;: &quot;lo&quot;
921
+ # },
922
+ # {
923
+ # &quot;position&quot;: 4,
924
+ # &quot;value&quot;: &quot;o日&quot;
925
+ # },
926
+ # {
927
+ # &quot;position&quot;: 5,
928
+ # &quot;value&quot;: &quot;日本&quot;
929
+ # },
930
+ # {
931
+ # &quot;position&quot;: 6,
932
+ # &quot;value&quot;: &quot;本語&quot;
933
+ # },
934
+ # {
935
+ # &quot;position&quot;: 7,
936
+ # &quot;value&quot;: &quot;語!&quot;
937
+ # },
938
+ # {
939
+ # &quot;position&quot;: 8,
940
+ # &quot;value&quot;: &quot;!!&quot;
941
+ # },
942
+ # {
943
+ # &quot;position&quot;: 9,
944
+ # &quot;value&quot;: &quot;!!&quot;
945
+ # },
946
+ # {
947
+ # &quot;position&quot;: 10,
948
+ # &quot;value&quot;: &quot;!7&quot;
949
+ # },
950
+ # {
951
+ # &quot;position&quot;: 11,
952
+ # &quot;value&quot;: &quot;77&quot;
953
+ # },
954
+ # {
955
+ # &quot;position&quot;: 12,
956
+ # &quot;value&quot;: &quot;77&quot;
957
+ # },
958
+ # {
959
+ # &quot;position&quot;: 13,
960
+ # &quot;value&quot;: &quot;7&quot;
961
+ # }
962
+ # ]
963
+ # ]
964
+ </pre></div>
965
+ </div>
966
+ </div>
967
+ <div class="section" id="tokenunigram">
968
+ <span id="token-unigram"></span><h3>7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt><a class="headerlink" href="#tokenunigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
969
+ <p><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> は各トークンが2文字ですが、 <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> は各トークンが1文字です。</p>
970
+ <p>実行例:</p>
971
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenUnigram &quot;100cents!!!&quot; NormalizerAuto
972
+ # [
973
+ # [
974
+ # 0,
975
+ # 1337566253.89858,
976
+ # 0.000355720520019531
977
+ # ],
978
+ # [
979
+ # {
980
+ # &quot;position&quot;: 0,
981
+ # &quot;value&quot;: &quot;100&quot;
982
+ # },
983
+ # {
984
+ # &quot;position&quot;: 1,
985
+ # &quot;value&quot;: &quot;cents&quot;
986
+ # },
987
+ # {
988
+ # &quot;position&quot;: 2,
989
+ # &quot;value&quot;: &quot;!!!&quot;
990
+ # }
991
+ # ]
992
+ # ]
993
+ </pre></div>
994
+ </div>
995
+ </div>
996
+ <div class="section" id="tokentrigram">
997
+ <span id="token-trigram"></span><h3>7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt><a class="headerlink" href="#tokentrigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
998
+ <p><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> は各トークンが2文字ですが、 <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> は各トークンが3文字です。</p>
999
+ <p>実行例:</p>
1000
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenTrigram &quot;10000cents!!!!!&quot; NormalizerAuto
1001
+ # [
1002
+ # [
1003
+ # 0,
1004
+ # 1337566253.89858,
1005
+ # 0.000355720520019531
1006
+ # ],
1007
+ # [
1008
+ # {
1009
+ # &quot;position&quot;: 0,
1010
+ # &quot;value&quot;: &quot;10000&quot;
1011
+ # },
1012
+ # {
1013
+ # &quot;position&quot;: 1,
1014
+ # &quot;value&quot;: &quot;cents&quot;
1015
+ # },
1016
+ # {
1017
+ # &quot;position&quot;: 2,
1018
+ # &quot;value&quot;: &quot;!!!!!&quot;
1019
+ # }
1020
+ # ]
1021
+ # ]
1022
+ </pre></div>
1023
+ </div>
1024
+ </div>
1025
+ <div class="section" id="tokendelimit">
1026
+ <span id="token-delimit"></span><h3>7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt><a class="headerlink" href="#tokendelimit" title="このヘッドラインへのパーマリンク">¶</a></h3>
1027
+ <p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> は1つ以上の空白文字( <tt class="docutils literal"><span class="pre">U+0020</span></tt> )で分割してトークンを抽出します。たとえば、 <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> は <tt class="docutils literal"><span class="pre">Hello</span></tt> と <tt class="docutils literal"><span class="pre">World</span></tt> にトークナイズされます。</p>
1028
+ <p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> はタグテキストに適切です。 <tt class="docutils literal"><span class="pre">groonga</span> <span class="pre">full-text-search</span> <span class="pre">http</span></tt> というテキストから <tt class="docutils literal"><span class="pre">groonga</span></tt> 、 <tt class="docutils literal"><span class="pre">full-text-search</span></tt> 、 <tt class="docutils literal"><span class="pre">http</span></tt> を抽出します。</p>
1029
+ <p>以下は <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> の例です。</p>
1030
+ <p>実行例:</p>
1031
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimit &quot;Groonga full-text-search HTTP&quot; NormalizerAuto
1032
+ # [
1033
+ # [
1034
+ # 0,
1035
+ # 1337566253.89858,
1036
+ # 0.000355720520019531
1037
+ # ],
1038
+ # [
1039
+ # {
1040
+ # &quot;position&quot;: 0,
1041
+ # &quot;value&quot;: &quot;groonga&quot;
1042
+ # },
1043
+ # {
1044
+ # &quot;position&quot;: 1,
1045
+ # &quot;value&quot;: &quot;full-text-search&quot;
1046
+ # },
1047
+ # {
1048
+ # &quot;position&quot;: 2,
1049
+ # &quot;value&quot;: &quot;http&quot;
1050
+ # }
1051
+ # ]
1052
+ # ]
1053
+ </pre></div>
1054
+ </div>
1055
+ </div>
1056
+ <div class="section" id="tokendelimitnull">
1057
+ <span id="token-delimit-null"></span><h3>7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt><a class="headerlink" href="#tokendelimitnull" title="このヘッドラインへのパーマリンク">¶</a></h3>
1058
+ <p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> は <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a> に似ています。違いは区切り文字です。 <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a> は空白文字( <tt class="docutils literal"><span class="pre">U+0020</span></tt> )を使いますが、 <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> はNUL文字( <tt class="docutils literal"><span class="pre">U+0000</span></tt> )を使います。</p>
1059
+ <p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> もタグテキストに適切です。</p>
1060
+ <p>以下は <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> の例です。</p>
1061
+ <p>実行例:</p>
1062
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimitNull &quot;Groonga\u0000full-text-search\u0000HTTP&quot; NormalizerAuto
1063
+ # [
1064
+ # [
1065
+ # 0,
1066
+ # 1337566253.89858,
1067
+ # 0.000355720520019531
1068
+ # ],
1069
+ # [
1070
+ # {
1071
+ # &quot;position&quot;: 0,
1072
+ # &quot;value&quot;: &quot;groongau0000full-text-searchu0000http&quot;
1073
+ # }
1074
+ # ]
1075
+ # ]
1076
+ </pre></div>
1077
+ </div>
1078
+ </div>
1079
+ <div class="section" id="tokenmecab">
1080
+ <span id="token-mecab"></span><h3>7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt><a class="headerlink" href="#tokenmecab" title="このヘッドラインへのパーマリンク">¶</a></h3>
1081
+ <p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> は <a class="reference external" href="http://mecab.sourceforge.net/">MeCab</a> 形態素解析器をベースにしたトークナイザーです。</p>
1082
+ <p>MeCabは日本語に依存していません。その言語用の辞書を用意すれば日本語以外でもMeCabを使えます。日本語用の辞書には <a class="reference external" href="http://sourceforge.jp/projects/naist-jdic/">NAIST Japanese Dictionary</a> を使えます。</p>
1083
+ <p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> は再現率より適合率に優れています。 <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> では <tt class="docutils literal"><span class="pre">京都</span></tt> というクエリーで <tt class="docutils literal"><span class="pre">東京都</span></tt> も <tt class="docutils literal"><span class="pre">京都</span></tt> も見つかりますが、この場合は <tt class="docutils literal"><span class="pre">東京都</span></tt> は期待した結果ではありません。 <tt class="docutils literal"><span class="pre">TokenMecab</span></tt> を使うと <tt class="docutils literal"><span class="pre">京都</span></tt> というクエリーで <tt class="docutils literal"><span class="pre">京都</span></tt> だけを見つけられます。</p>
1084
+ <p>新語をサポートしたい場合は、MeCabの辞書を更新し続ける筆意用があります。これはメンテナンスコストがかかります。( <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> には辞書のメンテナンスコストはありません。なぜなら、 <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> は辞書を使っていないからです。)新語への対応に <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> が役に立つかもしれません。</p>
1085
+ <p>以下は <tt class="docutils literal"><span class="pre">TokenMeCab</span></tt> の例です。 <tt class="docutils literal"><span class="pre">東京都</span></tt> は <tt class="docutils literal"><span class="pre">東京</span></tt> と <tt class="docutils literal"><span class="pre">都</span></tt> にトークナイズされています。 <tt class="docutils literal"><span class="pre">京都</span></tt> というトークンはありません。</p>
1086
+ <p>実行例:</p>
1087
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenMecab &quot;東京都&quot;
1088
+ # [
1089
+ # [
1090
+ # 0,
1091
+ # 1337566253.89858,
1092
+ # 0.000355720520019531
1093
+ # ],
1094
+ # [
1095
+ # {
1096
+ # &quot;position&quot;: 0,
1097
+ # &quot;value&quot;: &quot;東京&quot;
1098
+ # },
1099
+ # {
1100
+ # &quot;position&quot;: 1,
1101
+ # &quot;value&quot;: &quot;都&quot;
1102
+ # }
1103
+ # ]
1104
+ # ]
1105
+ </pre></div>
1106
+ </div>
1107
+ </div>
1108
+ <div class="section" id="tokenregexp">
1109
+ <span id="token-regexp"></span><h3>7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt><a class="headerlink" href="#tokenregexp" title="このヘッドラインへのパーマリンク">¶</a></h3>
1110
+ <div class="versionadded">
1111
+ <p><span class="versionmodified">バージョン 5.0.1 で追加.</span></p>
1112
+ </div>
1113
+ <div class="admonition caution">
1114
+ <p class="first admonition-title">ご用心</p>
1115
+ <p class="last">このトークナイザーは実験的です。仕様が変わる可能性があります。</p>
1116
+ </div>
1117
+ <div class="admonition caution">
1118
+ <p class="first admonition-title">ご用心</p>
1119
+ <p class="last">このトークナイザーはUTF-8でしか使えません。EUC-JPやShift_JISなどと一緒には使えません。</p>
1120
+ </div>
1121
+ <p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> はインデックスを使った正規表現検索をサポートするトークナイザーです。</p>
1122
+ <p>一般的に、正規表現検索は逐次検索で実行します。しかし、次のケースはインデックスを使って検索できます。</p>
1123
+ <blockquote>
1124
+ <div><ul class="simple">
1125
+ <li><p class="first"><tt class="docutils literal"><span class="pre">hello</span></tt> のようにリテラルしかないケース</p>
1126
+ </li>
1127
+ <li><p class="first"><tt class="docutils literal"><span class="pre">\A/home/alice</span></tt> のようにテキストの最初でのマッチとリテラルのみのケース</p>
1128
+ </li>
1129
+ <li><p class="first"><tt class="docutils literal"><span class="pre">\.txt\z</span></tt> のようにテキストの最後でのマッチとリテラルのみのケース</p>
1130
+ </li>
1131
+ </ul>
1132
+ </div></blockquote>
1133
+ <p>多くのケースでは、逐次検索よりもインデックスを使った検索の方が高速です。</p>
1134
+ <p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> はベースはバイグラムを使います。 <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> は、インデックス時に、テキストの先頭にテキストの先頭であるというマーク( <tt class="docutils literal"><span class="pre">U+FFEF</span></tt> )を入れ、テキストの最後にテキストの最後であるというマーク( <tt class="docutils literal"><span class="pre">U+FFF0</span></tt> )を入れます。</p>
1135
+ <p>実行例:</p>
1136
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;/home/alice/test.txt&quot; NormalizerAuto --mode ADD
1137
+ # [
1138
+ # [
1139
+ # 0,
1140
+ # 1337566253.89858,
1141
+ # 0.000355720520019531
1142
+ # ],
1143
+ # [
1144
+ # {
1145
+ # &quot;position&quot;: 0,
1146
+ # &quot;value&quot;: &quot;￯&quot;
1147
+ # },
1148
+ # {
1149
+ # &quot;position&quot;: 1,
1150
+ # &quot;value&quot;: &quot;/h&quot;
1151
+ # },
1152
+ # {
1153
+ # &quot;position&quot;: 2,
1154
+ # &quot;value&quot;: &quot;ho&quot;
1155
+ # },
1156
+ # {
1157
+ # &quot;position&quot;: 3,
1158
+ # &quot;value&quot;: &quot;om&quot;
1159
+ # },
1160
+ # {
1161
+ # &quot;position&quot;: 4,
1162
+ # &quot;value&quot;: &quot;me&quot;
1163
+ # },
1164
+ # {
1165
+ # &quot;position&quot;: 5,
1166
+ # &quot;value&quot;: &quot;e/&quot;
1167
+ # },
1168
+ # {
1169
+ # &quot;position&quot;: 6,
1170
+ # &quot;value&quot;: &quot;/a&quot;
1171
+ # },
1172
+ # {
1173
+ # &quot;position&quot;: 7,
1174
+ # &quot;value&quot;: &quot;al&quot;
1175
+ # },
1176
+ # {
1177
+ # &quot;position&quot;: 8,
1178
+ # &quot;value&quot;: &quot;li&quot;
1179
+ # },
1180
+ # {
1181
+ # &quot;position&quot;: 9,
1182
+ # &quot;value&quot;: &quot;ic&quot;
1183
+ # },
1184
+ # {
1185
+ # &quot;position&quot;: 10,
1186
+ # &quot;value&quot;: &quot;ce&quot;
1187
+ # },
1188
+ # {
1189
+ # &quot;position&quot;: 11,
1190
+ # &quot;value&quot;: &quot;e/&quot;
1191
+ # },
1192
+ # {
1193
+ # &quot;position&quot;: 12,
1194
+ # &quot;value&quot;: &quot;/t&quot;
1195
+ # },
1196
+ # {
1197
+ # &quot;position&quot;: 13,
1198
+ # &quot;value&quot;: &quot;te&quot;
1199
+ # },
1200
+ # {
1201
+ # &quot;position&quot;: 14,
1202
+ # &quot;value&quot;: &quot;es&quot;
1203
+ # },
1204
+ # {
1205
+ # &quot;position&quot;: 15,
1206
+ # &quot;value&quot;: &quot;st&quot;
1207
+ # },
1208
+ # {
1209
+ # &quot;position&quot;: 16,
1210
+ # &quot;value&quot;: &quot;t.&quot;
1211
+ # },
1212
+ # {
1213
+ # &quot;position&quot;: 17,
1214
+ # &quot;value&quot;: &quot;.t&quot;
1215
+ # },
1216
+ # {
1217
+ # &quot;position&quot;: 18,
1218
+ # &quot;value&quot;: &quot;tx&quot;
1219
+ # },
1220
+ # {
1221
+ # &quot;position&quot;: 19,
1222
+ # &quot;value&quot;: &quot;xt&quot;
1223
+ # },
1224
+ # {
1225
+ # &quot;position&quot;: 20,
1226
+ # &quot;value&quot;: &quot;t&quot;
1227
+ # },
1228
+ # {
1229
+ # &quot;position&quot;: 21,
1230
+ # &quot;value&quot;: &quot;￰&quot;
1231
+ # }
1232
+ # ]
1233
+ # ]
1234
+ </pre></div>
1235
+ </div>
1236
+ <p><tt class="docutils literal"><span class="pre">\A</span></tt> で検索したとき、テキストの先頭であるというマークを使います。クエリーをトークナイズするために <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> を使うときは、 <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> は最初のトークンとしてテキストの先頭であるというマーク( <tt class="docutils literal"><span class="pre">U+FFEF</span></tt> )を追加します。テキストの先頭であるというマークは先頭にしか存在しないはずなので、テキストの先頭であるという検索結果を得ることができます。</p>
1237
+ <p>実行例:</p>
1238
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;\\A/home/alice/&quot; NormalizerAuto --mode GET
1239
+ # [
1240
+ # [
1241
+ # 0,
1242
+ # 1337566253.89858,
1243
+ # 0.000355720520019531
1244
+ # ],
1245
+ # [
1246
+ # {
1247
+ # &quot;position&quot;: 0,
1248
+ # &quot;value&quot;: &quot;￯&quot;
1249
+ # },
1250
+ # {
1251
+ # &quot;position&quot;: 1,
1252
+ # &quot;value&quot;: &quot;/h&quot;
1253
+ # },
1254
+ # {
1255
+ # &quot;position&quot;: 2,
1256
+ # &quot;value&quot;: &quot;ho&quot;
1257
+ # },
1258
+ # {
1259
+ # &quot;position&quot;: 3,
1260
+ # &quot;value&quot;: &quot;om&quot;
1261
+ # },
1262
+ # {
1263
+ # &quot;position&quot;: 4,
1264
+ # &quot;value&quot;: &quot;me&quot;
1265
+ # },
1266
+ # {
1267
+ # &quot;position&quot;: 5,
1268
+ # &quot;value&quot;: &quot;e/&quot;
1269
+ # },
1270
+ # {
1271
+ # &quot;position&quot;: 6,
1272
+ # &quot;value&quot;: &quot;/a&quot;
1273
+ # },
1274
+ # {
1275
+ # &quot;position&quot;: 7,
1276
+ # &quot;value&quot;: &quot;al&quot;
1277
+ # },
1278
+ # {
1279
+ # &quot;position&quot;: 8,
1280
+ # &quot;value&quot;: &quot;li&quot;
1281
+ # },
1282
+ # {
1283
+ # &quot;position&quot;: 9,
1284
+ # &quot;value&quot;: &quot;ic&quot;
1285
+ # },
1286
+ # {
1287
+ # &quot;position&quot;: 10,
1288
+ # &quot;value&quot;: &quot;ce&quot;
1289
+ # },
1290
+ # {
1291
+ # &quot;position&quot;: 11,
1292
+ # &quot;value&quot;: &quot;e/&quot;
1293
+ # }
1294
+ # ]
1295
+ # ]
1296
+ </pre></div>
1297
+ </div>
1298
+ <p><tt class="docutils literal"><span class="pre">\z</span></tt> で検索したとき、テキストの最後であるというマークを使います。クエリーをトークナイズするために <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> を使うときは、 <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> は最後のトークンとしてテキストの最後であるというマーク( <tt class="docutils literal"><span class="pre">U+FFF0</span></tt> )を追加します。テキストの最後であるというマークは最後にしか存在しないはずなので、テキストの最後であるという検索結果を得ることができます。</p>
1299
+ <p>実行例:</p>
1300
+ <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;\\.txt\\z&quot; NormalizerAuto --mode GET
1301
+ # [
1302
+ # [
1303
+ # 0,
1304
+ # 1337566253.89858,
1305
+ # 0.000355720520019531
1306
+ # ],
1307
+ # [
1308
+ # {
1309
+ # &quot;position&quot;: 0,
1310
+ # &quot;value&quot;: &quot;\\.&quot;
1311
+ # },
1312
+ # {
1313
+ # &quot;position&quot;: 1,
1314
+ # &quot;value&quot;: &quot;.t&quot;
1315
+ # },
1316
+ # {
1317
+ # &quot;position&quot;: 2,
1318
+ # &quot;value&quot;: &quot;tx&quot;
1319
+ # },
1320
+ # {
1321
+ # &quot;position&quot;: 3,
1322
+ # &quot;value&quot;: &quot;xt&quot;
1323
+ # },
1324
+ # {
1325
+ # &quot;position&quot;: 5,
1326
+ # &quot;value&quot;: &quot;￰&quot;
1327
+ # }
1328
+ # ]
1329
+ # ]
1330
+ </pre></div>
1331
+ </div>
1332
+ </div>
1333
+ </div>
92
1334
  </div>
93
1335
 
94
1336
 
95
1337
  </div>
96
1338
  </div>
97
1339
  </div>
98
- <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
1340
+ <div class="sphinxsidebar">
99
1341
  <div class="sphinxsidebarwrapper">
1342
+ <h3><a href="../index.html">目次</a></h3>
1343
+ <ul>
1344
+ <li><a class="reference internal" href="#">7.8. トークナイザー</a><ul>
1345
+ <li><a class="reference internal" href="#summary">7.8.1. 概要</a></li>
1346
+ <li><a class="reference internal" href="#what-is-tokenize">7.8.2. 「トークナイズ」とはなにか</a></li>
1347
+ <li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. 組み込みトークナイザー</a><ul>
1348
+ <li><a class="reference internal" href="#tokenbigram">7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt></a></li>
1349
+ <li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></a></li>
1350
+ <li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></a></li>
1351
+ <li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></a></li>
1352
+ <li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></a></li>
1353
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></a></li>
1354
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt></a></li>
1355
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt></a></li>
1356
+ <li><a class="reference internal" href="#tokenunigram">7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></a></li>
1357
+ <li><a class="reference internal" href="#tokentrigram">7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></a></li>
1358
+ <li><a class="reference internal" href="#tokendelimit">7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></a></li>
1359
+ <li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></a></li>
1360
+ <li><a class="reference internal" href="#tokenmecab">7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt></a></li>
1361
+ <li><a class="reference internal" href="#tokenregexp">7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></a></li>
1362
+ </ul>
1363
+ </li>
1364
+ </ul>
1365
+ </li>
1366
+ </ul>
1367
+
100
1368
  <h4>前のトピックへ</h4>
101
1369
  <p class="topless"><a href="normalizers.html"
102
1370
  title="前の章へ">7.7. ノーマライザー</a></p>
103
1371
  <h4>次のトピックへ</h4>
104
1372
  <p class="topless"><a href="token_filters.html"
105
1373
  title="次の章へ">7.9. トークンフィルター</a></p>
106
- <div role="note" aria-label="source link">
107
- <h3>このページ</h3>
108
- <ul class="this-page-menu">
109
- <li><a href="../_sources/reference/tokenizers.txt"
110
- rel="nofollow">ソースコードを表示</a></li>
111
- </ul>
112
- </div>
113
- <div id="searchbox" style="display: none" role="search">
1374
+ <h3>このページ</h3>
1375
+ <ul class="this-page-menu">
1376
+ <li><a href="../_sources/reference/tokenizers.txt"
1377
+ rel="nofollow">ソースコードを表示</a></li>
1378
+ </ul>
1379
+ <div id="searchbox" style="display: none">
114
1380
  <h3>クイック検索</h3>
115
1381
  <form class="search" action="../search.html" method="get">
116
1382
  <input type="text" name="q" />
@@ -127,7 +1393,7 @@
127
1393
  </div>
128
1394
  <div class="clearer"></div>
129
1395
  </div>
130
- <div class="related" role="navigation" aria-label="related navigation">
1396
+ <div class="related">
131
1397
  <h3>ナビゲーション</h3>
132
1398
  <ul>
133
1399
  <li class="right" style="margin-right: 10px">
@@ -139,11 +1405,11 @@
139
1405
  <li class="right" >
140
1406
  <a href="normalizers.html" title="7.7. ノーマライザー"
141
1407
  >前へ</a> |</li>
142
- <li><a href="../index.html">Groonga v5.0.0ドキュメント</a> &raquo;</li>
1408
+ <li><a href="../index.html">Groonga v5.0.1-42-g4d10df1ドキュメント</a> &raquo;</li>
143
1409
  <li><a href="../reference.html" >7. リファレンスマニュアル</a> &raquo;</li>
144
1410
  </ul>
145
1411
  </div>
146
- <div class="footer" role="contentinfo">
1412
+ <div class="footer">
147
1413
  &copy; Copyright 2009-2015, Brazil, Inc.
148
1414
  </div>
149
1415
  </body>