rroonga 5.0.4-x86-mingw32 → 5.0.5-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (751) hide show
  1. checksums.yaml +8 -8
  2. data/README.md +2 -2
  3. data/example/measure-data-column-disk-usage.rb +124 -0
  4. data/example/measure-index-column-disk-usage.rb +81 -0
  5. data/example/measure-table-disk-usage.rb +100 -0
  6. data/ext/groonga/rb-grn-database.c +31 -0
  7. data/ext/groonga/rb-grn-double-array-trie.c +1 -8
  8. data/ext/groonga/rb-grn-logger.c +45 -0
  9. data/ext/groonga/rb-grn-object.c +29 -1
  10. data/ext/groonga/rb-grn-patricia-trie.c +1 -8
  11. data/ext/groonga/rb-grn-table-cursor.c +8 -3
  12. data/ext/groonga/rb-grn-table.c +10 -5
  13. data/ext/groonga/rb-grn-thread.c +160 -0
  14. data/ext/groonga/rb-grn-windows-event-logger.c +79 -0
  15. data/ext/groonga/rb-grn.h +3 -1
  16. data/ext/groonga/rb-groonga.c +3 -1
  17. data/lib/1.9/groonga.so +0 -0
  18. data/lib/2.0/groonga.so +0 -0
  19. data/lib/2.1/groonga.so +0 -0
  20. data/lib/2.2/groonga.so +0 -0
  21. data/lib/groonga/dumper.rb +6 -1
  22. data/rroonga-build.rb +4 -4
  23. data/test/groonga-test-utils.rb +5 -1
  24. data/test/test-database.rb +11 -0
  25. data/test/test-logger.rb +6 -0
  26. data/test/test-operator.rb +6 -6
  27. data/test/test-procedure.rb +15 -0
  28. data/test/test-table-dumper.rb +170 -1
  29. data/test/test-thread.rb +42 -0
  30. data/test/test-windows-event-logger.rb +28 -0
  31. data/vendor/local/bin/grndb.exe +0 -0
  32. data/vendor/local/bin/groonga-benchmark.exe +0 -0
  33. data/vendor/local/bin/groonga.exe +0 -0
  34. data/vendor/local/bin/libgcc_s_sjlj-1.dll +0 -0
  35. data/vendor/local/bin/libgroonga-0.dll +0 -0
  36. data/vendor/local/bin/libmecab-1.dll +0 -0
  37. data/vendor/local/bin/libmsgpack-4.dll +0 -0
  38. data/vendor/local/bin/libmsgpackc-2.dll +0 -0
  39. data/vendor/local/bin/libonig-5.dll +0 -0
  40. data/vendor/local/bin/libstdc++-6.dll +0 -0
  41. data/vendor/local/bin/libwinpthread-1.dll +0 -0
  42. data/vendor/local/bin/lz4.exe +0 -0
  43. data/vendor/local/bin/lz4c.exe +0 -0
  44. data/vendor/local/bin/lz4cat +0 -0
  45. data/vendor/local/bin/mecab-config +2 -2
  46. data/vendor/local/bin/mecab.exe +0 -0
  47. data/vendor/local/bin/onig-config +1 -1
  48. data/vendor/local/bin/zlib1.dll +0 -0
  49. data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +2 -2
  50. data/vendor/local/etc/groonga/windows_event_log/provider.man +38 -0
  51. data/vendor/local/include/groonga/groonga.h +2 -0
  52. data/vendor/local/include/groonga/groonga/command.h +2 -0
  53. data/vendor/local/include/groonga/groonga/groonga.h +5 -0
  54. data/vendor/local/include/groonga/groonga/obj.h +1 -0
  55. data/vendor/local/include/groonga/groonga/portability.h +16 -0
  56. data/vendor/local/include/groonga/groonga/thread.h +42 -0
  57. data/vendor/local/include/groonga/groonga/windows_event_logger.h +33 -0
  58. data/vendor/local/lib/groonga/plugins/functions/vector.a +0 -0
  59. data/vendor/local/lib/groonga/plugins/functions/vector.dll +0 -0
  60. data/vendor/local/lib/groonga/plugins/functions/vector.dll.a +0 -0
  61. data/vendor/local/lib/groonga/plugins/functions/vector.la +2 -2
  62. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
  63. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
  64. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
  65. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +2 -2
  66. data/vendor/local/lib/groonga/plugins/ruby/eval.a +0 -0
  67. data/vendor/local/lib/groonga/plugins/ruby/eval.dll +0 -0
  68. data/vendor/local/lib/groonga/plugins/ruby/eval.dll.a +0 -0
  69. data/vendor/local/lib/groonga/plugins/ruby/eval.la +2 -2
  70. data/vendor/local/lib/groonga/plugins/ruby/load.a +0 -0
  71. data/vendor/local/lib/groonga/plugins/ruby/load.dll +0 -0
  72. data/vendor/local/lib/groonga/plugins/ruby/load.dll.a +0 -0
  73. data/vendor/local/lib/groonga/plugins/ruby/load.la +2 -2
  74. data/vendor/local/lib/groonga/plugins/sharding.rb +5 -0
  75. data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +43 -6
  76. data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +32 -25
  77. data/vendor/local/lib/groonga/plugins/sharding/logical_parameters.rb +44 -0
  78. data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +217 -49
  79. data/vendor/local/lib/groonga/plugins/sharding/logical_select.rb +507 -45
  80. data/vendor/local/lib/groonga/plugins/sharding/logical_shard_list.rb +28 -0
  81. data/vendor/local/lib/groonga/plugins/sharding/logical_table_remove.rb +11 -6
  82. data/vendor/local/lib/groonga/plugins/sharding/parameters.rb +10 -0
  83. data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
  84. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
  85. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
  86. data/vendor/local/lib/groonga/plugins/suggest/suggest.la +2 -2
  87. data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
  88. data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
  89. data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
  90. data/vendor/local/lib/groonga/plugins/table/table.la +2 -2
  91. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
  92. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
  93. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
  94. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +2 -2
  95. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
  96. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
  97. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
  98. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +2 -2
  99. data/vendor/local/lib/groonga/scripts/ruby/command.rb +31 -1
  100. data/vendor/local/lib/groonga/scripts/ruby/context.rb +18 -2
  101. data/vendor/local/lib/groonga/scripts/ruby/database.rb +12 -4
  102. data/vendor/local/lib/groonga/scripts/ruby/expression_size_estimator.rb +31 -28
  103. data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +1 -0
  104. data/vendor/local/lib/groonga/scripts/ruby/logger/level.rb +4 -2
  105. data/vendor/local/lib/groonga/scripts/ruby/query_logger.rb +9 -0
  106. data/vendor/local/lib/groonga/scripts/ruby/query_logger/flag.rb +39 -0
  107. data/vendor/local/lib/groonga/scripts/ruby/record.rb +12 -0
  108. data/vendor/local/lib/groonga/scripts/ruby/table.rb +35 -1
  109. data/vendor/local/lib/libgroonga.a +0 -0
  110. data/vendor/local/lib/libgroonga.dll.a +0 -0
  111. data/vendor/local/lib/libgroonga.la +2 -2
  112. data/vendor/local/lib/liblz4.dll +0 -0
  113. data/vendor/local/lib/liblz4.dll.1 +0 -0
  114. data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
  115. data/vendor/local/lib/libmecab.a +0 -0
  116. data/vendor/local/lib/libmecab.dll.a +0 -0
  117. data/vendor/local/lib/libmecab.la +2 -2
  118. data/vendor/local/lib/libmsgpack.a +0 -0
  119. data/vendor/local/lib/libmsgpack.dll.a +0 -0
  120. data/vendor/local/lib/libmsgpack.la +2 -2
  121. data/vendor/local/lib/libmsgpackc.a +0 -0
  122. data/vendor/local/lib/libmsgpackc.dll.a +0 -0
  123. data/vendor/local/lib/libmsgpackc.la +2 -2
  124. data/vendor/local/lib/libonig.a +0 -0
  125. data/vendor/local/lib/libonig.dll.a +0 -0
  126. data/vendor/local/lib/libonig.la +2 -2
  127. data/vendor/local/lib/libz.a +0 -0
  128. data/vendor/local/lib/libz.dll.a +0 -0
  129. data/vendor/local/lib/pkgconfig/groonga.pc +3 -3
  130. data/vendor/local/lib/pkgconfig/liblz4.pc +5 -5
  131. data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
  132. data/vendor/local/lib/pkgconfig/oniguruma.pc +6 -6
  133. data/vendor/local/lib/pkgconfig/zlib.pc +3 -3
  134. data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
  135. data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
  136. data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
  137. data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
  138. data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
  139. data/vendor/local/sbin/groonga-httpd-restart +1 -1
  140. data/vendor/local/sbin/groonga-httpd.exe +0 -0
  141. data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
  142. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development.txt +3 -2
  143. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build.txt +19 -0
  144. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/unix_autotools.txt +101 -0
  145. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/unix_cmake.txt +94 -0
  146. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/windows_cmake.txt +93 -0
  147. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +16 -7
  148. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/repository.txt +7 -3
  149. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/test.txt +4 -0
  150. data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
  151. data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +4 -4
  152. data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +3 -3
  153. data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
  154. data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +4 -4
  155. data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
  156. data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -4
  157. data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
  158. data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +319 -0
  159. data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +1 -0
  160. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +1 -1
  161. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_db.txt +23 -0
  162. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_thread.txt +122 -0
  163. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +1 -1
  164. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_copy.txt +381 -0
  165. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +1 -1
  166. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +3 -1
  167. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/database_unmap.txt +85 -0
  168. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/io_flush.txt +218 -9
  169. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +1 -3
  170. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/log_level.txt +1 -1
  171. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +3 -1
  172. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_parameters.txt +138 -0
  173. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +97 -10
  174. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_select.txt +745 -23
  175. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_shard_list.txt +107 -0
  176. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_table_remove.txt +3 -1
  177. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +2 -3
  178. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalizer_list.txt +1 -2
  179. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_exist.txt +90 -0
  180. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +1 -1
  181. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +1 -1
  182. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +1 -1
  183. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +1 -3
  184. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +1 -1
  185. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +1 -1
  186. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +240 -56
  187. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +33 -7
  188. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_rename.txt +90 -0
  189. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +2 -1
  190. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/thread_limit.txt +110 -0
  191. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +2 -1
  192. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenizer_list.txt +1 -3
  193. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +1 -3
  194. data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-httpd.txt +3 -4
  195. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +0 -1
  196. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +0 -1
  197. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +2 -2
  198. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/snippet_html.txt +1 -1
  199. data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +1 -1
  200. data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +34 -14
  201. data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +2 -2
  202. data/vendor/local/share/doc/groonga/en/html/_sources/reference/query_expanders/tsv.txt +1 -1
  203. data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +3 -0
  204. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scoring_note.txt +2 -0
  205. data/vendor/local/share/doc/groonga/en/html/_sources/reference/sharding.txt +108 -0
  206. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +0 -21
  207. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tuning.txt +1 -1
  208. data/vendor/local/share/doc/groonga/en/html/_sources/spec/search.txt +1 -1
  209. data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +4 -4
  210. data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/introduction.txt +24 -18
  211. data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/match_columns.txt +19 -19
  212. data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/micro_blog.txt +9 -9
  213. data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/query_expansion.txt +1 -1
  214. data/vendor/local/share/doc/groonga/en/html/_static/basic.css +68 -6
  215. data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +27 -2
  216. data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
  217. data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
  218. data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
  219. data/vendor/local/share/doc/groonga/en/html/_static/jquery-1.11.1.js +10308 -0
  220. data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +4 -9404
  221. data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
  222. data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
  223. data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +2 -2
  224. data/vendor/local/share/doc/groonga/en/html/_static/underscore-1.3.1.js +999 -0
  225. data/vendor/local/share/doc/groonga/en/html/_static/underscore.js +31 -1415
  226. data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
  227. data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
  228. data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +15 -15
  229. data/vendor/local/share/doc/groonga/en/html/characteristic.html +19 -17
  230. data/vendor/local/share/doc/groonga/en/html/client.html +19 -17
  231. data/vendor/local/share/doc/groonga/en/html/community.html +19 -17
  232. data/vendor/local/share/doc/groonga/en/html/contribution.html +78 -70
  233. data/vendor/local/share/doc/groonga/en/html/contribution/development.html +30 -27
  234. data/vendor/local/share/doc/groonga/en/html/contribution/development/build.html +146 -0
  235. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_autotools.html +237 -0
  236. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_cmake.html +227 -0
  237. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/windows_cmake.html +231 -0
  238. data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +37 -35
  239. data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +54 -52
  240. data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +80 -78
  241. data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +135 -122
  242. data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +38 -34
  243. data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +58 -54
  244. data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +21 -19
  245. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +27 -25
  246. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +23 -21
  247. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +30 -28
  248. data/vendor/local/share/doc/groonga/en/html/contribution/report.html +21 -19
  249. data/vendor/local/share/doc/groonga/en/html/development.html +19 -17
  250. data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +30 -28
  251. data/vendor/local/share/doc/groonga/en/html/genindex.html +48 -20
  252. data/vendor/local/share/doc/groonga/en/html/index.html +123 -105
  253. data/vendor/local/share/doc/groonga/en/html/install.html +33 -31
  254. data/vendor/local/share/doc/groonga/en/html/install/centos.html +32 -30
  255. data/vendor/local/share/doc/groonga/en/html/install/debian.html +31 -29
  256. data/vendor/local/share/doc/groonga/en/html/install/fedora.html +29 -27
  257. data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +26 -24
  258. data/vendor/local/share/doc/groonga/en/html/install/others.html +92 -90
  259. data/vendor/local/share/doc/groonga/en/html/install/solaris.html +26 -24
  260. data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +29 -28
  261. data/vendor/local/share/doc/groonga/en/html/install/windows.html +34 -32
  262. data/vendor/local/share/doc/groonga/en/html/limitations.html +19 -17
  263. data/vendor/local/share/doc/groonga/en/html/news.html +509 -142
  264. data/vendor/local/share/doc/groonga/en/html/news/0.x.html +19 -17
  265. data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +20 -18
  266. data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +19 -17
  267. data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +34 -32
  268. data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +29 -27
  269. data/vendor/local/share/doc/groonga/en/html/news/2.x.html +110 -108
  270. data/vendor/local/share/doc/groonga/en/html/news/3.x.html +73 -71
  271. data/vendor/local/share/doc/groonga/en/html/news/4.x.html +111 -109
  272. data/vendor/local/share/doc/groonga/en/html/news/senna.html +19 -17
  273. data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
  274. data/vendor/local/share/doc/groonga/en/html/reference.html +111 -94
  275. data/vendor/local/share/doc/groonga/en/html/reference/api.html +55 -52
  276. data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +51 -49
  277. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +63 -61
  278. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +84 -82
  279. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +46 -44
  280. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +41 -39
  281. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +89 -87
  282. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +88 -50
  283. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +48 -46
  284. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +83 -81
  285. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +46 -44
  286. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +48 -46
  287. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +46 -44
  288. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +45 -43
  289. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +45 -43
  290. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +44 -42
  291. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +93 -91
  292. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +48 -46
  293. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +43 -41
  294. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +79 -77
  295. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +69 -67
  296. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_thread.html +296 -0
  297. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +45 -43
  298. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +38 -36
  299. data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +51 -49
  300. data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +60 -58
  301. data/vendor/local/share/doc/groonga/en/html/reference/cast.html +19 -17
  302. data/vendor/local/share/doc/groonga/en/html/reference/column.html +21 -19
  303. data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +23 -21
  304. data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +28 -26
  305. data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +23 -21
  306. data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +72 -70
  307. data/vendor/local/share/doc/groonga/en/html/reference/command.html +70 -61
  308. data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +23 -21
  309. data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +59 -57
  310. data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +28 -26
  311. data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +100 -98
  312. data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +44 -42
  313. data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +49 -47
  314. data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +33 -31
  315. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_copy.html +796 -0
  316. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +62 -60
  317. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +96 -94
  318. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +46 -44
  319. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +67 -64
  320. data/vendor/local/share/doc/groonga/en/html/reference/commands/database_unmap.html +236 -0
  321. data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +62 -60
  322. data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +46 -44
  323. data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +59 -57
  324. data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +63 -61
  325. data/vendor/local/share/doc/groonga/en/html/reference/commands/io_flush.html +281 -54
  326. data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +66 -64
  327. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +53 -52
  328. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +48 -46
  329. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +48 -46
  330. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +48 -46
  331. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +78 -75
  332. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_parameters.html +283 -0
  333. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +160 -85
  334. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_select.html +2071 -83
  335. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_shard_list.html +287 -0
  336. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_table_remove.html +71 -68
  337. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +86 -84
  338. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +52 -50
  339. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_exist.html +227 -0
  340. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +57 -55
  341. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +53 -51
  342. data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +43 -41
  343. data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +43 -41
  344. data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +56 -54
  345. data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +75 -74
  346. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +59 -57
  347. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +59 -57
  348. data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +898 -647
  349. data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +43 -41
  350. data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +50 -48
  351. data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +87 -85
  352. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +175 -152
  353. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +55 -53
  354. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +46 -44
  355. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_rename.html +327 -0
  356. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +77 -75
  357. data/vendor/local/share/doc/groonga/en/html/reference/commands/thread_limit.html +241 -0
  358. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +108 -106
  359. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +56 -51
  360. data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +56 -55
  361. data/vendor/local/share/doc/groonga/en/html/reference/executables.html +24 -22
  362. data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +41 -39
  363. data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +28 -26
  364. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +33 -31
  365. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +83 -81
  366. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +29 -27
  367. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +23 -21
  368. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +48 -46
  369. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +40 -38
  370. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +84 -82
  371. data/vendor/local/share/doc/groonga/en/html/reference/function.html +22 -20
  372. data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +47 -45
  373. data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +25 -23
  374. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +85 -83
  375. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +36 -34
  376. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +26 -24
  377. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +66 -64
  378. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +54 -52
  379. data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +35 -33
  380. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +34 -32
  381. data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +23 -21
  382. data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +55 -53
  383. data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +24 -22
  384. data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +49 -47
  385. data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +45 -43
  386. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_size.html +33 -31
  387. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +29 -27
  388. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +244 -242
  389. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +327 -325
  390. data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +35 -29
  391. data/vendor/local/share/doc/groonga/en/html/reference/log.html +43 -41
  392. data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +49 -47
  393. data/vendor/local/share/doc/groonga/en/html/reference/operations.html +21 -19
  394. data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +23 -21
  395. data/vendor/local/share/doc/groonga/en/html/reference/output.html +36 -34
  396. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +21 -19
  397. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +62 -60
  398. data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +107 -103
  399. data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +50 -40
  400. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +27 -25
  401. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +41 -31
  402. data/vendor/local/share/doc/groonga/en/html/reference/scoring_note.html +19 -17
  403. data/vendor/local/share/doc/groonga/en/html/reference/sharding.html +241 -0
  404. data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +21 -19
  405. data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +30 -28
  406. data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +23 -21
  407. data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +23 -21
  408. data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +23 -21
  409. data/vendor/local/share/doc/groonga/en/html/reference/tables.html +56 -54
  410. data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +41 -39
  411. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +341 -289
  412. data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +68 -66
  413. data/vendor/local/share/doc/groonga/en/html/reference/types.html +43 -41
  414. data/vendor/local/share/doc/groonga/en/html/search.html +11 -11
  415. data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
  416. data/vendor/local/share/doc/groonga/en/html/server.html +19 -17
  417. data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +22 -20
  418. data/vendor/local/share/doc/groonga/en/html/server/http.html +21 -19
  419. data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +37 -35
  420. data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +23 -21
  421. data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +23 -21
  422. data/vendor/local/share/doc/groonga/en/html/server/memcached.html +21 -19
  423. data/vendor/local/share/doc/groonga/en/html/server/package.html +39 -37
  424. data/vendor/local/share/doc/groonga/en/html/spec.html +23 -21
  425. data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +129 -127
  426. data/vendor/local/share/doc/groonga/en/html/spec/search.html +22 -20
  427. data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +19 -17
  428. data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +25 -23
  429. data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +22 -20
  430. data/vendor/local/share/doc/groonga/en/html/tutorial.html +20 -18
  431. data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +29 -23
  432. data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +35 -33
  433. data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +25 -23
  434. data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +46 -39
  435. data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +21 -19
  436. data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +65 -63
  437. data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +55 -53
  438. data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +25 -23
  439. data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +22 -20
  440. data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +25 -23
  441. data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +37 -35
  442. data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
  443. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development.txt +3 -2
  444. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build.txt +19 -0
  445. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/unix_autotools.txt +101 -0
  446. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/unix_cmake.txt +94 -0
  447. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/windows_cmake.txt +93 -0
  448. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +16 -7
  449. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/repository.txt +7 -3
  450. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/test.txt +4 -0
  451. data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
  452. data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +4 -4
  453. data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +3 -3
  454. data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
  455. data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +4 -4
  456. data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
  457. data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -4
  458. data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
  459. data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +319 -0
  460. data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +1 -0
  461. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +1 -1
  462. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_db.txt +23 -0
  463. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_thread.txt +122 -0
  464. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +1 -1
  465. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_copy.txt +381 -0
  466. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +1 -1
  467. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +3 -1
  468. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/database_unmap.txt +85 -0
  469. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/io_flush.txt +218 -9
  470. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +1 -3
  471. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/log_level.txt +1 -1
  472. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +3 -1
  473. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_parameters.txt +138 -0
  474. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +97 -10
  475. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_select.txt +745 -23
  476. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_shard_list.txt +107 -0
  477. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_table_remove.txt +3 -1
  478. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +2 -3
  479. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalizer_list.txt +1 -2
  480. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_exist.txt +90 -0
  481. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +1 -1
  482. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +1 -1
  483. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +1 -1
  484. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +1 -3
  485. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +1 -1
  486. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +1 -1
  487. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +240 -56
  488. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +33 -7
  489. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_rename.txt +90 -0
  490. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +2 -1
  491. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/thread_limit.txt +110 -0
  492. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +2 -1
  493. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenizer_list.txt +1 -3
  494. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +1 -3
  495. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-httpd.txt +3 -4
  496. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +0 -1
  497. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +0 -1
  498. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +2 -2
  499. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/snippet_html.txt +1 -1
  500. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +1 -1
  501. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +34 -14
  502. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +2 -2
  503. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/query_expanders/tsv.txt +1 -1
  504. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +3 -0
  505. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scoring_note.txt +2 -0
  506. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/sharding.txt +108 -0
  507. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +0 -21
  508. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tuning.txt +1 -1
  509. data/vendor/local/share/doc/groonga/ja/html/_sources/spec/search.txt +1 -1
  510. data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +4 -4
  511. data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/introduction.txt +24 -18
  512. data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/match_columns.txt +19 -19
  513. data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/micro_blog.txt +9 -9
  514. data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/query_expansion.txt +1 -1
  515. data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +68 -6
  516. data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +27 -2
  517. data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
  518. data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
  519. data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
  520. data/vendor/local/share/doc/groonga/ja/html/_static/jquery-1.11.1.js +10308 -0
  521. data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +4 -9404
  522. data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
  523. data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
  524. data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +2 -2
  525. data/vendor/local/share/doc/groonga/ja/html/_static/underscore-1.3.1.js +999 -0
  526. data/vendor/local/share/doc/groonga/ja/html/_static/underscore.js +31 -1415
  527. data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
  528. data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
  529. data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +15 -15
  530. data/vendor/local/share/doc/groonga/ja/html/characteristic.html +19 -17
  531. data/vendor/local/share/doc/groonga/ja/html/client.html +19 -17
  532. data/vendor/local/share/doc/groonga/ja/html/community.html +19 -17
  533. data/vendor/local/share/doc/groonga/ja/html/contribution.html +77 -69
  534. data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +30 -27
  535. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build.html +144 -0
  536. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_autotools.html +226 -0
  537. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_cmake.html +215 -0
  538. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/windows_cmake.html +229 -0
  539. data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +36 -34
  540. data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +53 -51
  541. data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +79 -77
  542. data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +134 -121
  543. data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +29 -27
  544. data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +57 -53
  545. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +21 -19
  546. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +23 -21
  547. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +23 -21
  548. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +30 -28
  549. data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +21 -19
  550. data/vendor/local/share/doc/groonga/ja/html/development.html +19 -17
  551. data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +25 -23
  552. data/vendor/local/share/doc/groonga/ja/html/genindex.html +48 -20
  553. data/vendor/local/share/doc/groonga/ja/html/index.html +122 -104
  554. data/vendor/local/share/doc/groonga/ja/html/install.html +33 -31
  555. data/vendor/local/share/doc/groonga/ja/html/install/centos.html +35 -33
  556. data/vendor/local/share/doc/groonga/ja/html/install/debian.html +33 -31
  557. data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +30 -28
  558. data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +26 -24
  559. data/vendor/local/share/doc/groonga/ja/html/install/others.html +83 -81
  560. data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +25 -23
  561. data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +30 -29
  562. data/vendor/local/share/doc/groonga/ja/html/install/windows.html +33 -31
  563. data/vendor/local/share/doc/groonga/ja/html/limitations.html +19 -17
  564. data/vendor/local/share/doc/groonga/ja/html/news.html +460 -126
  565. data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +19 -17
  566. data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +20 -18
  567. data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +19 -17
  568. data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +34 -32
  569. data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +29 -27
  570. data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +102 -100
  571. data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +66 -64
  572. data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +91 -89
  573. data/vendor/local/share/doc/groonga/ja/html/news/senna.html +19 -17
  574. data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
  575. data/vendor/local/share/doc/groonga/ja/html/reference.html +111 -94
  576. data/vendor/local/share/doc/groonga/ja/html/reference/api.html +55 -52
  577. data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +51 -49
  578. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +58 -56
  579. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +84 -82
  580. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +46 -44
  581. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +41 -39
  582. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +85 -83
  583. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +88 -50
  584. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +48 -46
  585. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +78 -76
  586. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +46 -44
  587. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +48 -46
  588. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +46 -44
  589. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +45 -43
  590. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +45 -43
  591. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +44 -42
  592. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +93 -91
  593. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +48 -46
  594. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +43 -41
  595. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +79 -77
  596. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +69 -67
  597. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_thread.html +286 -0
  598. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +45 -43
  599. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +38 -36
  600. data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +48 -46
  601. data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +60 -58
  602. data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +19 -17
  603. data/vendor/local/share/doc/groonga/ja/html/reference/column.html +21 -19
  604. data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +23 -21
  605. data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +28 -26
  606. data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +23 -21
  607. data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +61 -59
  608. data/vendor/local/share/doc/groonga/ja/html/reference/command.html +70 -61
  609. data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +23 -21
  610. data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +49 -47
  611. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +28 -26
  612. data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +99 -97
  613. data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +41 -39
  614. data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +49 -47
  615. data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +33 -31
  616. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_copy.html +781 -0
  617. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +62 -60
  618. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +88 -86
  619. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +46 -44
  620. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +66 -63
  621. data/vendor/local/share/doc/groonga/ja/html/reference/commands/database_unmap.html +229 -0
  622. data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +62 -60
  623. data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +46 -44
  624. data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +58 -56
  625. data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +63 -61
  626. data/vendor/local/share/doc/groonga/ja/html/reference/commands/io_flush.html +266 -54
  627. data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +59 -57
  628. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +53 -52
  629. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +48 -46
  630. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +48 -46
  631. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +48 -46
  632. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +78 -75
  633. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_parameters.html +276 -0
  634. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +158 -85
  635. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_select.html +2008 -80
  636. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_shard_list.html +285 -0
  637. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_table_remove.html +71 -68
  638. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +82 -79
  639. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +52 -50
  640. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_exist.html +220 -0
  641. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +53 -51
  642. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +51 -49
  643. data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +43 -41
  644. data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +43 -41
  645. data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +52 -50
  646. data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +68 -67
  647. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +59 -57
  648. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +59 -57
  649. data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +680 -448
  650. data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +43 -41
  651. data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +48 -46
  652. data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +84 -82
  653. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +146 -126
  654. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +55 -53
  655. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +46 -44
  656. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_rename.html +322 -0
  657. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +73 -70
  658. data/vendor/local/share/doc/groonga/ja/html/reference/commands/thread_limit.html +229 -0
  659. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +94 -91
  660. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +56 -51
  661. data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +56 -55
  662. data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +24 -22
  663. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +39 -37
  664. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +28 -26
  665. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +33 -31
  666. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +73 -72
  667. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +29 -27
  668. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +23 -21
  669. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +48 -46
  670. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +40 -38
  671. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +84 -82
  672. data/vendor/local/share/doc/groonga/ja/html/reference/function.html +22 -20
  673. data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +46 -44
  674. data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +25 -23
  675. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +69 -67
  676. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +36 -34
  677. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +26 -24
  678. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +56 -54
  679. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +45 -43
  680. data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +34 -32
  681. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +35 -33
  682. data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +23 -21
  683. data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +48 -46
  684. data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +24 -22
  685. data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +35 -33
  686. data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +37 -35
  687. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_size.html +33 -31
  688. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +26 -24
  689. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +143 -141
  690. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +236 -234
  691. data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +34 -28
  692. data/vendor/local/share/doc/groonga/ja/html/reference/log.html +43 -41
  693. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +39 -37
  694. data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +21 -19
  695. data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +23 -21
  696. data/vendor/local/share/doc/groonga/ja/html/reference/output.html +32 -30
  697. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +21 -19
  698. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +42 -39
  699. data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +91 -88
  700. data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +50 -40
  701. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +27 -25
  702. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +41 -31
  703. data/vendor/local/share/doc/groonga/ja/html/reference/scoring_note.html +19 -17
  704. data/vendor/local/share/doc/groonga/ja/html/reference/sharding.html +223 -0
  705. data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +21 -19
  706. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +27 -25
  707. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +23 -21
  708. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +23 -21
  709. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +23 -21
  710. data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +46 -44
  711. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +38 -36
  712. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +303 -243
  713. data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +62 -60
  714. data/vendor/local/share/doc/groonga/ja/html/reference/types.html +42 -40
  715. data/vendor/local/share/doc/groonga/ja/html/search.html +11 -11
  716. data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
  717. data/vendor/local/share/doc/groonga/ja/html/server.html +19 -17
  718. data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +21 -19
  719. data/vendor/local/share/doc/groonga/ja/html/server/http.html +21 -19
  720. data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +31 -29
  721. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +23 -21
  722. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +23 -21
  723. data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +21 -19
  724. data/vendor/local/share/doc/groonga/ja/html/server/package.html +38 -36
  725. data/vendor/local/share/doc/groonga/ja/html/spec.html +23 -21
  726. data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +128 -126
  727. data/vendor/local/share/doc/groonga/ja/html/spec/search.html +22 -20
  728. data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +19 -17
  729. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +25 -23
  730. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +22 -20
  731. data/vendor/local/share/doc/groonga/ja/html/tutorial.html +20 -18
  732. data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +29 -23
  733. data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +34 -32
  734. data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +22 -20
  735. data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +46 -39
  736. data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +21 -19
  737. data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +66 -64
  738. data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +52 -50
  739. data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +25 -23
  740. data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +22 -20
  741. data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +25 -23
  742. data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +35 -33
  743. data/vendor/local/share/license/groonga/README.md +6 -0
  744. data/vendor/local/share/license/mruby/AUTHORS +1 -0
  745. data/vendor/local/share/license/mruby/MITL +1 -1
  746. data/vendor/local/share/license/mruby/README.md +6 -5
  747. data/vendor/local/share/license/msgpack/README +219 -0
  748. data/vendor/local/share/man/ja/man1/groonga.1 +23512 -15126
  749. data/vendor/local/share/man/man1/groonga.1 +26542 -17745
  750. metadata +77 -3
  751. data/vendor/local/share/license/msgpack/AUTHORS +0 -0
@@ -7,7 +7,7 @@
7
7
  <head>
8
8
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
9
9
 
10
- <title>7.9. トークンフィルター &mdash; Groonga v5.0.4-139-g6629adbドキュメント</title>
10
+ <title>7.9. トークンフィルター &mdash; Groonga v5.0.6-226-gd7da7e7ドキュメント</title>
11
11
 
12
12
  <link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
13
13
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
@@ -15,7 +15,7 @@
15
15
  <script type="text/javascript">
16
16
  var DOCUMENTATION_OPTIONS = {
17
17
  URL_ROOT: '../',
18
- VERSION: '5.0.4-139-g6629adb',
18
+ VERSION: '5.0.6-226-gd7da7e7',
19
19
  COLLAPSE_INDEX: false,
20
20
  FILE_SUFFIX: '.html',
21
21
  HAS_SOURCE: true
@@ -26,12 +26,12 @@
26
26
  <script type="text/javascript" src="../_static/doctools.js"></script>
27
27
  <script type="text/javascript" src="../_static/translations.js"></script>
28
28
  <link rel="shortcut icon" href="../_static/favicon.ico"/>
29
- <link rel="top" title="Groonga v5.0.4-139-g6629adbドキュメント" href="../index.html" />
29
+ <link rel="top" title="Groonga v5.0.6-226-gd7da7e7ドキュメント" href="../index.html" />
30
30
  <link rel="up" title="7. リファレンスマニュアル" href="../reference.html" />
31
31
  <link rel="next" title="7.10. クエリー展開オブジェクト一覧" href="query_expanders.html" />
32
32
  <link rel="prev" title="7.8. トークナイザー" href="tokenizers.html" />
33
33
  </head>
34
- <body>
34
+ <body role="document">
35
35
  <div class="header">
36
36
  <h1 class="title">
37
37
  <a id="top-link" href="../index.html">
@@ -49,7 +49,7 @@
49
49
  </div>
50
50
 
51
51
 
52
- <div class="related">
52
+ <div class="related" role="navigation" aria-label="related navigation">
53
53
  <h3>ナビゲーション</h3>
54
54
  <ul>
55
55
  <li class="right" style="margin-right: 10px">
@@ -61,15 +61,15 @@
61
61
  <li class="right" >
62
62
  <a href="tokenizers.html" title="7.8. トークナイザー"
63
63
  accesskey="P">前へ</a> |</li>
64
- <li><a href="../index.html">Groonga v5.0.4-139-g6629adbドキュメント</a> &raquo;</li>
65
- <li><a href="../reference.html" accesskey="U">7. リファレンスマニュアル</a> &raquo;</li>
64
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v5.0.6-226-gd7da7e7ドキュメント</a> &raquo;</li>
65
+ <li class="nav-item nav-item-1"><a href="../reference.html" accesskey="U">7. リファレンスマニュアル</a> &raquo;</li>
66
66
  </ul>
67
67
  </div>
68
68
 
69
69
  <div class="document">
70
70
  <div class="documentwrapper">
71
71
  <div class="bodywrapper">
72
- <div class="body">
72
+ <div class="body" role="main">
73
73
 
74
74
  <div class="section" id="token-filters">
75
75
  <h1>7.9. トークンフィルター<a class="headerlink" href="#token-filters" title="このヘッドラインへのパーマリンク">¶</a></h1>
@@ -78,8 +78,8 @@
78
78
  <p>Groongaにはトークナイズされたトークンに所定の処理を行うトークンフィルターモジュールがあります。</p>
79
79
  <p>トークンフィルターモジュールはプラグインとして追加できます。</p>
80
80
  <p>トークンフィルタープラグインをGroongaに追加することでトークナイズされたトークンをカスタマイズできます。</p>
81
- <p>テーブルは0個以上のトークンフィルターを持てます。テーブルにトークンフィルターを付けるには <a class="reference internal" href="commands/table_create.html"><em>table_create</em></a> の <a class="reference internal" href="commands/table_create.html#table-create-token-filters"><em>token_filters</em></a> オプションを使います。</p>
82
- <p>以下は <tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt> トークンフィルターモジュールを使う <tt class="docutils literal"><span class="pre">table_create</span></tt> の例です。</p>
81
+ <p>テーブルは0個以上のトークンフィルターを持てます。テーブルにトークンフィルターを付けるには <a class="reference internal" href="commands/table_create.html"><em>table_create</em></a> の <a class="reference internal" href="commands/table_create.html#table-create-token-filters"><span>token_filters</span></a> オプションを使います。</p>
82
+ <p>以下は <code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code> トークンフィルターモジュールを使う <code class="docutils literal"><span class="pre">table_create</span></code> の例です。</p>
83
83
  <p>実行例:</p>
84
84
  <div class="highlight-none"><div class="highlight"><pre>register token_filters/stop_word
85
85
  # [[0, 1337566253.89858, 0.000355720520019531], true]
@@ -95,15 +95,15 @@ table_create Terms TABLE_PAT_KEY ShortText \
95
95
  <h2>7.9.2. 利用可能なトークンフィルター<a class="headerlink" href="#available-token-filters" title="このヘッドラインへのパーマリンク">¶</a></h2>
96
96
  <p>以下は組み込みのトークンフィルターのリストです。</p>
97
97
  <ul class="simple">
98
- <li><tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt></li>
99
- <li><tt class="docutils literal"><span class="pre">TokenFilterStem</span></tt></li>
98
+ <li><code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code></li>
99
+ <li><code class="docutils literal"><span class="pre">TokenFilterStem</span></code></li>
100
100
  </ul>
101
101
  <div class="section" id="tokenfilterstopword">
102
- <span id="token-filter-stop-word"></span><h3>7.9.2.1. <tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt><a class="headerlink" href="#tokenfilterstopword" title="このヘッドラインへのパーマリンク">¶</a></h3>
103
- <p><tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt> は、文書を検索する時にトークナイズされたトークンからストップワードを除去します。</p>
104
- <p><tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt> は、文書を検索する時のみトークン除去するため、文書を追加した後でストップワードを指定することもできます。</p>
105
- <p>ストップワードは、語彙表の <tt class="docutils literal"><span class="pre">is_stop_word</span></tt> カラムで指定します。</p>
106
- <p>以下は <tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt> トークンフィルターを使う例です。</p>
102
+ <span id="token-filter-stop-word"></span><h3>7.9.2.1. <code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code><a class="headerlink" href="#tokenfilterstopword" title="このヘッドラインへのパーマリンク">¶</a></h3>
103
+ <p><code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code> は、文書を検索する時にトークナイズされたトークンからストップワードを除去します。</p>
104
+ <p><code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code> は、文書を検索する時のみトークン除去するため、文書を追加した後でストップワードを指定することもできます。</p>
105
+ <p>ストップワードは、語彙表の <code class="docutils literal"><span class="pre">is_stop_word</span></code> カラムで指定します。</p>
106
+ <p>以下は <code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code> トークンフィルターを使う例です。</p>
107
107
  <p>実行例:</p>
108
108
  <div class="highlight-none"><div class="highlight"><pre>register token_filters/stop_word
109
109
  # [[0, 1337566253.89858, 0.000355720520019531], true]
@@ -167,13 +167,13 @@ select Memos --match_columns content --query &quot;Hello and&quot;
167
167
  # ]
168
168
  </pre></div>
169
169
  </div>
170
- <p><tt class="docutils literal"><span class="pre">and</span></tt> というトークンは <tt class="docutils literal"><span class="pre">Terms</span></tt> テーブルでストップワードと指定されています。</p>
171
- <p><tt class="docutils literal"><span class="pre">&quot;Hello&quot;</span></tt> は文書内に <tt class="docutils literal"><span class="pre">and</span></tt> がありませんがマッチしています。なぜなら、 <tt class="docutils literal"><span class="pre">and</span></tt> はストップワードと指定されているため、クエリーから除去されているからです。</p>
170
+ <p><code class="docutils literal"><span class="pre">and</span></code> というトークンは <code class="docutils literal"><span class="pre">Terms</span></code> テーブルでストップワードと指定されています。</p>
171
+ <p><code class="docutils literal"><span class="pre">&quot;Hello&quot;</span></code> は文書内に <code class="docutils literal"><span class="pre">and</span></code> がありませんがマッチしています。なぜなら、 <code class="docutils literal"><span class="pre">and</span></code> はストップワードと指定されているため、クエリーから除去されているからです。</p>
172
172
  </div>
173
173
  <div class="section" id="tokenfilterstem">
174
- <span id="token-filter-stem"></span><h3>7.9.2.2. <tt class="docutils literal"><span class="pre">TokenFilterStem</span></tt><a class="headerlink" href="#tokenfilterstem" title="このヘッドラインへのパーマリンク">¶</a></h3>
175
- <p><tt class="docutils literal"><span class="pre">TokenFilterStem</span></tt> は、トークナイズされたトークンをステミングします。</p>
176
- <p>以下は <tt class="docutils literal"><span class="pre">TokenFilterStem</span></tt> トークンフィルターを使う例です。</p>
174
+ <span id="token-filter-stem"></span><h3>7.9.2.2. <code class="docutils literal"><span class="pre">TokenFilterStem</span></code><a class="headerlink" href="#tokenfilterstem" title="このヘッドラインへのパーマリンク">¶</a></h3>
175
+ <p><code class="docutils literal"><span class="pre">TokenFilterStem</span></code> は、トークナイズされたトークンをステミングします。</p>
176
+ <p>以下は <code class="docutils literal"><span class="pre">TokenFilterStem</span></code> トークンフィルターを使う例です。</p>
177
177
  <p>実行例:</p>
178
178
  <div class="highlight-none"><div class="highlight"><pre>register token_filters/stem
179
179
  # [[0, 1337566253.89858, 0.000355720520019531], true]
@@ -234,7 +234,7 @@ select Memos --match_columns content --query &quot;develops&quot;
234
234
  # ]
235
235
  </pre></div>
236
236
  </div>
237
- <p><tt class="docutils literal"><span class="pre">develop</span></tt> も <tt class="docutils literal"><span class="pre">developing</span></tt> も <tt class="docutils literal"><span class="pre">developed</span></tt> も <tt class="docutils literal"><span class="pre">develops</span></tt> も、すべてステミングすると <tt class="docutils literal"><span class="pre">develop</span></tt> になります。そのため、 <tt class="docutils literal"><span class="pre">develops</span></tt> というクエリーで <tt class="docutils literal"><span class="pre">develop</span></tt> も <tt class="docutils literal"><span class="pre">developing</span></tt> も <tt class="docutils literal"><span class="pre">developed</span></tt> も検索できます。</p>
237
+ <p><code class="docutils literal"><span class="pre">develop</span></code> も <code class="docutils literal"><span class="pre">developing</span></code> も <code class="docutils literal"><span class="pre">developed</span></code> も <code class="docutils literal"><span class="pre">develops</span></code> も、すべてステミングすると <code class="docutils literal"><span class="pre">develop</span></code> になります。そのため、 <code class="docutils literal"><span class="pre">develops</span></code> というクエリーで <code class="docutils literal"><span class="pre">develop</span></code> も <code class="docutils literal"><span class="pre">developing</span></code> も <code class="docutils literal"><span class="pre">developed</span></code> も検索できます。</p>
238
238
  </div>
239
239
  </div>
240
240
  <div class="section" id="see-also">
@@ -249,15 +249,15 @@ select Memos --match_columns content --query &quot;develops&quot;
249
249
  </div>
250
250
  </div>
251
251
  </div>
252
- <div class="sphinxsidebar">
252
+ <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
253
253
  <div class="sphinxsidebarwrapper">
254
254
  <h3><a href="../index.html">目次</a></h3>
255
255
  <ul>
256
256
  <li><a class="reference internal" href="#">7.9. トークンフィルター</a><ul>
257
257
  <li><a class="reference internal" href="#summary">7.9.1. 概要</a></li>
258
258
  <li><a class="reference internal" href="#available-token-filters">7.9.2. 利用可能なトークンフィルター</a><ul>
259
- <li><a class="reference internal" href="#tokenfilterstopword">7.9.2.1. <tt class="docutils literal"><span class="pre">TokenFilterStopWord</span></tt></a></li>
260
- <li><a class="reference internal" href="#tokenfilterstem">7.9.2.2. <tt class="docutils literal"><span class="pre">TokenFilterStem</span></tt></a></li>
259
+ <li><a class="reference internal" href="#tokenfilterstopword">7.9.2.1. <code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code></a></li>
260
+ <li><a class="reference internal" href="#tokenfilterstem">7.9.2.2. <code class="docutils literal"><span class="pre">TokenFilterStem</span></code></a></li>
261
261
  </ul>
262
262
  </li>
263
263
  <li><a class="reference internal" href="#see-also">7.9.3. 参考</a></li>
@@ -271,12 +271,14 @@ select Memos --match_columns content --query &quot;develops&quot;
271
271
  <h4>次のトピックへ</h4>
272
272
  <p class="topless"><a href="query_expanders.html"
273
273
  title="次の章へ">7.10. クエリー展開オブジェクト一覧</a></p>
274
- <h3>このページ</h3>
275
- <ul class="this-page-menu">
276
- <li><a href="../_sources/reference/token_filters.txt"
277
- rel="nofollow">ソースコードを表示</a></li>
278
- </ul>
279
- <div id="searchbox" style="display: none">
274
+ <div role="note" aria-label="source link">
275
+ <h3>このページ</h3>
276
+ <ul class="this-page-menu">
277
+ <li><a href="../_sources/reference/token_filters.txt"
278
+ rel="nofollow">ソースコードを表示</a></li>
279
+ </ul>
280
+ </div>
281
+ <div id="searchbox" style="display: none" role="search">
280
282
  <h3>クイック検索</h3>
281
283
  <form class="search" action="../search.html" method="get">
282
284
  <input type="text" name="q" />
@@ -293,7 +295,7 @@ select Memos --match_columns content --query &quot;develops&quot;
293
295
  </div>
294
296
  <div class="clearer"></div>
295
297
  </div>
296
- <div class="related">
298
+ <div class="related" role="navigation" aria-label="related navigation">
297
299
  <h3>ナビゲーション</h3>
298
300
  <ul>
299
301
  <li class="right" style="margin-right: 10px">
@@ -305,11 +307,11 @@ select Memos --match_columns content --query &quot;develops&quot;
305
307
  <li class="right" >
306
308
  <a href="tokenizers.html" title="7.8. トークナイザー"
307
309
  >前へ</a> |</li>
308
- <li><a href="../index.html">Groonga v5.0.4-139-g6629adbドキュメント</a> &raquo;</li>
309
- <li><a href="../reference.html" >7. リファレンスマニュアル</a> &raquo;</li>
310
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v5.0.6-226-gd7da7e7ドキュメント</a> &raquo;</li>
311
+ <li class="nav-item nav-item-1"><a href="../reference.html" >7. リファレンスマニュアル</a> &raquo;</li>
310
312
  </ul>
311
313
  </div>
312
- <div class="footer">
314
+ <div class="footer" role="contentinfo">
313
315
  &copy; Copyright 2009-2015, Brazil, Inc.
314
316
  </div>
315
317
  </body>
@@ -7,7 +7,7 @@
7
7
  <head>
8
8
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
9
9
 
10
- <title>7.8. トークナイザー &mdash; Groonga v5.0.4-139-g6629adbドキュメント</title>
10
+ <title>7.8. トークナイザー &mdash; Groonga v5.0.6-226-gd7da7e7ドキュメント</title>
11
11
 
12
12
  <link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
13
13
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
@@ -15,7 +15,7 @@
15
15
  <script type="text/javascript">
16
16
  var DOCUMENTATION_OPTIONS = {
17
17
  URL_ROOT: '../',
18
- VERSION: '5.0.4-139-g6629adb',
18
+ VERSION: '5.0.6-226-gd7da7e7',
19
19
  COLLAPSE_INDEX: false,
20
20
  FILE_SUFFIX: '.html',
21
21
  HAS_SOURCE: true
@@ -26,12 +26,12 @@
26
26
  <script type="text/javascript" src="../_static/doctools.js"></script>
27
27
  <script type="text/javascript" src="../_static/translations.js"></script>
28
28
  <link rel="shortcut icon" href="../_static/favicon.ico"/>
29
- <link rel="top" title="Groonga v5.0.4-139-g6629adbドキュメント" href="../index.html" />
29
+ <link rel="top" title="Groonga v5.0.6-226-gd7da7e7ドキュメント" href="../index.html" />
30
30
  <link rel="up" title="7. リファレンスマニュアル" href="../reference.html" />
31
31
  <link rel="next" title="7.9. トークンフィルター" href="token_filters.html" />
32
32
  <link rel="prev" title="7.7. ノーマライザー" href="normalizers.html" />
33
33
  </head>
34
- <body>
34
+ <body role="document">
35
35
  <div class="header">
36
36
  <h1 class="title">
37
37
  <a id="top-link" href="../index.html">
@@ -49,7 +49,7 @@
49
49
  </div>
50
50
 
51
51
 
52
- <div class="related">
52
+ <div class="related" role="navigation" aria-label="related navigation">
53
53
  <h3>ナビゲーション</h3>
54
54
  <ul>
55
55
  <li class="right" style="margin-right: 10px">
@@ -61,15 +61,15 @@
61
61
  <li class="right" >
62
62
  <a href="normalizers.html" title="7.7. ノーマライザー"
63
63
  accesskey="P">前へ</a> |</li>
64
- <li><a href="../index.html">Groonga v5.0.4-139-g6629adbドキュメント</a> &raquo;</li>
65
- <li><a href="../reference.html" accesskey="U">7. リファレンスマニュアル</a> &raquo;</li>
64
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v5.0.6-226-gd7da7e7ドキュメント</a> &raquo;</li>
65
+ <li class="nav-item nav-item-1"><a href="../reference.html" accesskey="U">7. リファレンスマニュアル</a> &raquo;</li>
66
66
  </ul>
67
67
  </div>
68
68
 
69
69
  <div class="document">
70
70
  <div class="documentwrapper">
71
71
  <div class="bodywrapper">
72
- <div class="body">
72
+ <div class="body" role="main">
73
73
 
74
74
  <div class="section" id="tokenizers">
75
75
  <h1>7.8. トークナイザー<a class="headerlink" href="#tokenizers" title="このヘッドラインへのパーマリンク">¶</a></h1>
@@ -79,22 +79,22 @@
79
79
  <blockquote>
80
80
  <div><ul>
81
81
  <li><p class="first">テキストのインデックスを構築するとき</p>
82
- <div class="figure align-center">
82
+ <div class="figure align-center" id="id1">
83
83
  <a class="reference internal image-reference" href="../_images/used-when-indexing.png"><img alt="../_images/used-when-indexing.png" src="../_images/used-when-indexing.png" style="width: 80%;" /></a>
84
- <p class="caption">テキストのインデックスを構築するときにトークナイザーを使います。</p>
84
+ <p class="caption"><span class="caption-text">テキストのインデックスを構築するときにトークナイザーを使います。</span></p>
85
85
  </div>
86
86
  </li>
87
87
  <li><p class="first">クエリーで検索するとき</p>
88
- <div class="figure align-center">
88
+ <div class="figure align-center" id="id2">
89
89
  <a class="reference internal image-reference" href="../_images/used-when-searching.png"><img alt="../_images/used-when-searching.png" src="../_images/used-when-searching.png" style="width: 80%;" /></a>
90
- <p class="caption">クエリーで検索するときにトークナイザーを使います。</p>
90
+ <p class="caption"><span class="caption-text">クエリーで検索するときにトークナイザーを使います。</span></p>
91
91
  </div>
92
92
  </li>
93
93
  </ul>
94
94
  </div></blockquote>
95
95
  <p>全文検索ではトークナイザーは重要なモジュールです。トークナイザーを変えることで <a class="reference external" href="http://ja.wikipedia.org/wiki/%E6%83%85%E5%A0%B1%E6%A4%9C%E7%B4%A2">適合率と再現率</a> のトレードオフを調整することができます。</p>
96
- <p>一般的に <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> が適切なトークナイザーです。トークナイザーについてよく知らない場合は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> を使うことをオススメします。</p>
97
- <p><a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> コマンドと <a class="reference internal" href="commands/table_tokenize.html"><em>table_tokenize</em></a> コマンドを使うことでトークナイザーを試すことができます。 <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> コマンドを使って <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> トークナイザーを試す例を以下に示します。</p>
96
+ <p>一般的に <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> が適切なトークナイザーです。トークナイザーについてよく知らない場合は <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> を使うことをオススメします。</p>
97
+ <p><a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> コマンドと <a class="reference internal" href="commands/table_tokenize.html"><em>table_tokenize</em></a> コマンドを使うことでトークナイザーを試すことができます。 <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> コマンドを使って <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> トークナイザーを試す例を以下に示します。</p>
98
98
  <p>実行例:</p>
99
99
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot;
100
100
  # [
@@ -106,46 +106,57 @@
106
106
  # [
107
107
  # {
108
108
  # &quot;position&quot;: 0,
109
+ # &quot;force_prefix&quot;: false,
109
110
  # &quot;value&quot;: &quot;He&quot;
110
111
  # },
111
112
  # {
112
113
  # &quot;position&quot;: 1,
114
+ # &quot;force_prefix&quot;: false,
113
115
  # &quot;value&quot;: &quot;el&quot;
114
116
  # },
115
117
  # {
116
118
  # &quot;position&quot;: 2,
119
+ # &quot;force_prefix&quot;: false,
117
120
  # &quot;value&quot;: &quot;ll&quot;
118
121
  # },
119
122
  # {
120
123
  # &quot;position&quot;: 3,
124
+ # &quot;force_prefix&quot;: false,
121
125
  # &quot;value&quot;: &quot;lo&quot;
122
126
  # },
123
127
  # {
124
128
  # &quot;position&quot;: 4,
129
+ # &quot;force_prefix&quot;: false,
125
130
  # &quot;value&quot;: &quot;o &quot;
126
131
  # },
127
132
  # {
128
133
  # &quot;position&quot;: 5,
134
+ # &quot;force_prefix&quot;: false,
129
135
  # &quot;value&quot;: &quot; W&quot;
130
136
  # },
131
137
  # {
132
138
  # &quot;position&quot;: 6,
139
+ # &quot;force_prefix&quot;: false,
133
140
  # &quot;value&quot;: &quot;Wo&quot;
134
141
  # },
135
142
  # {
136
143
  # &quot;position&quot;: 7,
144
+ # &quot;force_prefix&quot;: false,
137
145
  # &quot;value&quot;: &quot;or&quot;
138
146
  # },
139
147
  # {
140
148
  # &quot;position&quot;: 8,
149
+ # &quot;force_prefix&quot;: false,
141
150
  # &quot;value&quot;: &quot;rl&quot;
142
151
  # },
143
152
  # {
144
153
  # &quot;position&quot;: 9,
154
+ # &quot;force_prefix&quot;: false,
145
155
  # &quot;value&quot;: &quot;ld&quot;
146
156
  # },
147
157
  # {
148
158
  # &quot;position&quot;: 10,
159
+ # &quot;force_prefix&quot;: false,
149
160
  # &quot;value&quot;: &quot;d&quot;
150
161
  # }
151
162
  # ]
@@ -156,75 +167,75 @@
156
167
  <div class="section" id="what-is-tokenize">
157
168
  <h2>7.8.2. 「トークナイズ」とはなにか<a class="headerlink" href="#what-is-tokenize" title="このヘッドラインへのパーマリンク">¶</a></h2>
158
169
  <p>「トークナイズ」はテキストから0個以上のトークンを抽出する処理です。「トークナイズ」する方法はいくつかあります。</p>
159
- <p>例えば、バイグラムというトークナイズ方法では <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> は次のトークンにトークナイズされます。</p>
170
+ <p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
160
171
  <blockquote>
161
172
  <div><ul class="simple">
162
- <li><tt class="docutils literal"><span class="pre">He</span></tt></li>
163
- <li><tt class="docutils literal"><span class="pre">el</span></tt></li>
164
- <li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
165
- <li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
166
- <li><p class="first"><tt class="docutils literal"><span class="pre">o_</span></tt> ( <tt class="docutils literal"><span class="pre">_</span></tt> は空白文字という意味)</p>
173
+ <li><code class="docutils literal"><span class="pre">He</span></code></li>
174
+ <li><code class="docutils literal"><span class="pre">el</span></code></li>
175
+ <li><code class="docutils literal"><span class="pre">ll</span></code></li>
176
+ <li><code class="docutils literal"><span class="pre">lo</span></code></li>
177
+ <li><p class="first"><code class="docutils literal"><span class="pre">o_</span></code> ( <code class="docutils literal"><span class="pre">_</span></code> は空白文字という意味)</p>
167
178
  </li>
168
- <li><p class="first"><tt class="docutils literal"><span class="pre">_W</span></tt> ( <tt class="docutils literal"><span class="pre">_</span></tt> は空白文字という意味)</p>
179
+ <li><p class="first"><code class="docutils literal"><span class="pre">_W</span></code> ( <code class="docutils literal"><span class="pre">_</span></code> は空白文字という意味)</p>
169
180
  </li>
170
- <li><tt class="docutils literal"><span class="pre">Wo</span></tt></li>
171
- <li><tt class="docutils literal"><span class="pre">or</span></tt></li>
172
- <li><tt class="docutils literal"><span class="pre">rl</span></tt></li>
173
- <li><tt class="docutils literal"><span class="pre">ld</span></tt></li>
181
+ <li><code class="docutils literal"><span class="pre">Wo</span></code></li>
182
+ <li><code class="docutils literal"><span class="pre">or</span></code></li>
183
+ <li><code class="docutils literal"><span class="pre">rl</span></code></li>
184
+ <li><code class="docutils literal"><span class="pre">ld</span></code></li>
174
185
  </ul>
175
186
  </div></blockquote>
176
- <p>上記の例では、 <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> から10個のトークンを抽出しました。</p>
177
- <p>例えば、空白区切りのトークナイズ方法では <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> は次のトークンにトークナイズされます。</p>
187
+ <p>上記の例では、 <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> から10個のトークンを抽出しました。</p>
188
+ <p>例えば、空白区切りのトークナイズ方法では <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
178
189
  <blockquote>
179
190
  <div><ul class="simple">
180
- <li><tt class="docutils literal"><span class="pre">Hello</span></tt></li>
181
- <li><tt class="docutils literal"><span class="pre">World</span></tt></li>
191
+ <li><code class="docutils literal"><span class="pre">Hello</span></code></li>
192
+ <li><code class="docutils literal"><span class="pre">World</span></code></li>
182
193
  </ul>
183
194
  </div></blockquote>
184
- <p>上記の例では、<tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> から2つのトークンを抽出しました。</p>
185
- <p>トークンは検索時のキーとして使われます。使用したトークナイズ方法で抽出したトークンでしかインデックス化されたドキュメントを探すことはできません。例えば、トークナイズ方法としてバイグラムを使った場合は <tt class="docutils literal"><span class="pre">ll</span></tt> で <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> を見つけることができます。しかし、空白区切りのトークナイズ方法を使ったときは <tt class="docutils literal"><span class="pre">ll</span></tt> で <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> を見つけることはできません。なぜなら、空白区切りのトークナイズ方法は <tt class="docutils literal"><span class="pre">ll</span></tt> というトークンを抽出していないからです。空白区切りのトークナイズ方法は <tt class="docutils literal"><span class="pre">Hello</span></tt> というトークンと <tt class="docutils literal"><span class="pre">World</span></tt> というトークンしか抽出していません。</p>
195
+ <p>上記の例では、<code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> から2つのトークンを抽出しました。</p>
196
+ <p>トークンは検索時のキーとして使われます。使用したトークナイズ方法で抽出したトークンでしかインデックス化されたドキュメントを探すことはできません。例えば、トークナイズ方法としてバイグラムを使った場合は <code class="docutils literal"><span class="pre">ll</span></code> で <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることができます。しかし、空白区切りのトークナイズ方法を使ったときは <code class="docutils literal"><span class="pre">ll</span></code> で <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることはできません。なぜなら、空白区切りのトークナイズ方法は <code class="docutils literal"><span class="pre">ll</span></code> というトークンを抽出していないからです。空白区切りのトークナイズ方法は <code class="docutils literal"><span class="pre">Hello</span></code> というトークンと <code class="docutils literal"><span class="pre">World</span></code> というトークンしか抽出していません。</p>
186
197
  <p>一般的に、小さいトークンを生成するトークナイズ方法は再現率が高い代わりに適合率が低くなりがちです。一方、大きいトークンを生成するトークナイズ方法は適合率が高い代わりに再現率が低くなりがちです。</p>
187
- <p>例えば、バイグラムというトークナイズ方法では <tt class="docutils literal"><span class="pre">or</span></tt> で <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> と <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> を検索できます。しかし、「論理和」を検索したい人にとっては <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> は不要な結果です。これは、適合率が下がったということです。しかし、再現率は上がっています。</p>
188
- <p>空白区切りのトークナイズ方法を使った場合は <tt class="docutils literal"><span class="pre">or</span></tt> で <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> だけが見つかります。なぜなら、空白区切りのトークナイズ方法では <tt class="docutils literal"><span class="pre">World</span></tt> は <tt class="docutils literal"><span class="pre">World</span></tt> という1つのトークンだけにトークナイズされるからです。これは、「論理和」を探したい人にとっては適合率が挙がっています。しかし、 <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> も <tt class="docutils literal"><span class="pre">or</span></tt> を含んでいるのに見つかっていないので再現率が下がっています。</p>
198
+ <p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal"><span class="pre">or</span></code> で <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> と <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> を検索できます。しかし、「論理和」を検索したい人にとっては <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は不要な結果です。これは、適合率が下がったということです。しかし、再現率は上がっています。</p>
199
+ <p>空白区切りのトークナイズ方法を使った場合は <code class="docutils literal"><span class="pre">or</span></code> で <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> だけが見つかります。なぜなら、空白区切りのトークナイズ方法では <code class="docutils literal"><span class="pre">World</span></code> は <code class="docutils literal"><span class="pre">World</span></code> という1つのトークンだけにトークナイズされるからです。これは、「論理和」を探したい人にとっては適合率が挙がっています。しかし、 <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> も <code class="docutils literal"><span class="pre">or</span></code> を含んでいるのに見つかっていないので再現率が下がっています。</p>
189
200
  </div>
190
201
  <div class="section" id="built-in-tokenizsers">
191
202
  <h2>7.8.3. 組み込みトークナイザー<a class="headerlink" href="#built-in-tokenizsers" title="このヘッドラインへのパーマリンク">¶</a></h2>
192
203
  <p>以下は組み込みのトークナイザーのリストです。</p>
193
204
  <blockquote>
194
205
  <div><ul class="simple">
195
- <li><tt class="docutils literal"><span class="pre">TokenBigram</span></tt></li>
196
- <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></li>
197
- <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></li>
198
- <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></li>
199
- <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></li>
200
- <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></li>
201
- <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlpha</span></tt></li>
202
- <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlphaDigit</span></tt></li>
203
- <li><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></li>
204
- <li><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></li>
205
- <li><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></li>
206
- <li><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></li>
207
- <li><tt class="docutils literal"><span class="pre">TokenMecab</span></tt></li>
208
- <li><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></li>
206
+ <li><code class="docutils literal"><span class="pre">TokenBigram</span></code></li>
207
+ <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></li>
208
+ <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></li>
209
+ <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></li>
210
+ <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></li>
211
+ <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></li>
212
+ <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlpha</span></code></li>
213
+ <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlphaDigit</span></code></li>
214
+ <li><code class="docutils literal"><span class="pre">TokenUnigram</span></code></li>
215
+ <li><code class="docutils literal"><span class="pre">TokenTrigram</span></code></li>
216
+ <li><code class="docutils literal"><span class="pre">TokenDelimit</span></code></li>
217
+ <li><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></li>
218
+ <li><code class="docutils literal"><span class="pre">TokenMecab</span></code></li>
219
+ <li><code class="docutils literal"><span class="pre">TokenRegexp</span></code></li>
209
220
  </ul>
210
221
  </div></blockquote>
211
222
  <div class="section" id="tokenbigram">
212
- <span id="token-bigram"></span><h3>7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt><a class="headerlink" href="#tokenbigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
213
- <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はバイグラムベースのトークナイザーです。多くのケースでは、このトークナイザーを使うことをオススメします。</p>
214
- <p>バイグラムというトークナイズ方法は、隣り合った2つの文字を1つのトークンとしてテキストをトークナイズします。例えば、 <tt class="docutils literal"><span class="pre">Hello</span></tt> は次のトークンにトークナイズします。</p>
223
+ <span id="token-bigram"></span><h3>7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code><a class="headerlink" href="#tokenbigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
224
+ <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> はバイグラムベースのトークナイザーです。多くのケースでは、このトークナイザーを使うことをオススメします。</p>
225
+ <p>バイグラムというトークナイズ方法は、隣り合った2つの文字を1つのトークンとしてテキストをトークナイズします。例えば、 <code class="docutils literal"><span class="pre">Hello</span></code> は次のトークンにトークナイズします。</p>
215
226
  <blockquote>
216
227
  <div><ul class="simple">
217
- <li><tt class="docutils literal"><span class="pre">He</span></tt></li>
218
- <li><tt class="docutils literal"><span class="pre">el</span></tt></li>
219
- <li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
220
- <li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
228
+ <li><code class="docutils literal"><span class="pre">He</span></code></li>
229
+ <li><code class="docutils literal"><span class="pre">el</span></code></li>
230
+ <li><code class="docutils literal"><span class="pre">ll</span></code></li>
231
+ <li><code class="docutils literal"><span class="pre">lo</span></code></li>
221
232
  </ul>
222
233
  </div></blockquote>
223
234
  <p>バイグラムというトークナイズ方法は再現性に優れています。なぜなら、2文字以上の文字のクエリーに対してはすべてのテキストを見つけることができるからです。</p>
224
- <p>一般的に、1文字のクエリーではすべてのテキストを見つけることはできません。なぜなら、1つの文字のトークンが存在しないからです。しかし、Groongaでは1文字のクエリーでもすべてのテキストを見つけることができます。なぜなら、Groongaは前方一致検索によりクエリーで指定した文字で始まるトークンをすべて見つけることができるからです。例えば、Groongaは <tt class="docutils literal"><span class="pre">l</span></tt> というクエリーから <tt class="docutils literal"><span class="pre">ll</span></tt> というトークンと <tt class="docutils literal"><span class="pre">lo</span></tt> というトークンを見つけることができます。</p>
225
- <p>バイグラムというトークナイズ方法は適合率はそれほど優れていません。なぜなら、単語の一部にクエリーが含まれていればすべてのテキストが見つかってしまうからです。例えば、 <tt class="docutils literal"><span class="pre">or</span></tt> で <tt class="docutils literal"><span class="pre">world</span></tt> が見つかります。これは非ASCIIを使う言語よりASCIIのみを使う言語で顕著です。以降の説明で触れる通り、 <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はこの問題を解決しています。</p>
226
- <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> の挙動は <a class="reference internal" href="normalizers.html"><em>ノーマライザー</em></a> を使うかどうかで変わります。</p>
227
- <p>ノーマライザーを使っていない場合は <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> は純粋なバイグラム(最後のトークンをのぞいてすべてのトークンを2文字にする)のトークナイズ方法を使います。</p>
235
+ <p>一般的に、1文字のクエリーではすべてのテキストを見つけることはできません。なぜなら、1つの文字のトークンが存在しないからです。しかし、Groongaでは1文字のクエリーでもすべてのテキストを見つけることができます。なぜなら、Groongaは前方一致検索によりクエリーで指定した文字で始まるトークンをすべて見つけることができるからです。例えば、Groongaは <code class="docutils literal"><span class="pre">l</span></code> というクエリーから <code class="docutils literal"><span class="pre">ll</span></code> というトークンと <code class="docutils literal"><span class="pre">lo</span></code> というトークンを見つけることができます。</p>
236
+ <p>バイグラムというトークナイズ方法は適合率はそれほど優れていません。なぜなら、単語の一部にクエリーが含まれていればすべてのテキストが見つかってしまうからです。例えば、 <code class="docutils literal"><span class="pre">or</span></code> で <code class="docutils literal"><span class="pre">world</span></code> が見つかります。これは非ASCIIを使う言語よりASCIIのみを使う言語で顕著です。以降の説明で触れる通り、 <code class="docutils literal"><span class="pre">TokenBigram</span></code> はこの問題を解決しています。</p>
237
+ <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> の挙動は <a class="reference internal" href="normalizers.html"><em>ノーマライザー</em></a> を使うかどうかで変わります。</p>
238
+ <p>ノーマライザーを使っていない場合は <code class="docutils literal"><span class="pre">TokenBigram</span></code> は純粋なバイグラム(最後のトークンをのぞいてすべてのトークンを2文字にする)のトークナイズ方法を使います。</p>
228
239
  <p>実行例:</p>
229
240
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot;
230
241
  # [
@@ -236,60 +247,71 @@
236
247
  # [
237
248
  # {
238
249
  # &quot;position&quot;: 0,
250
+ # &quot;force_prefix&quot;: false,
239
251
  # &quot;value&quot;: &quot;He&quot;
240
252
  # },
241
253
  # {
242
254
  # &quot;position&quot;: 1,
255
+ # &quot;force_prefix&quot;: false,
243
256
  # &quot;value&quot;: &quot;el&quot;
244
257
  # },
245
258
  # {
246
259
  # &quot;position&quot;: 2,
260
+ # &quot;force_prefix&quot;: false,
247
261
  # &quot;value&quot;: &quot;ll&quot;
248
262
  # },
249
263
  # {
250
264
  # &quot;position&quot;: 3,
265
+ # &quot;force_prefix&quot;: false,
251
266
  # &quot;value&quot;: &quot;lo&quot;
252
267
  # },
253
268
  # {
254
269
  # &quot;position&quot;: 4,
270
+ # &quot;force_prefix&quot;: false,
255
271
  # &quot;value&quot;: &quot;o &quot;
256
272
  # },
257
273
  # {
258
274
  # &quot;position&quot;: 5,
275
+ # &quot;force_prefix&quot;: false,
259
276
  # &quot;value&quot;: &quot; W&quot;
260
277
  # },
261
278
  # {
262
279
  # &quot;position&quot;: 6,
280
+ # &quot;force_prefix&quot;: false,
263
281
  # &quot;value&quot;: &quot;Wo&quot;
264
282
  # },
265
283
  # {
266
284
  # &quot;position&quot;: 7,
285
+ # &quot;force_prefix&quot;: false,
267
286
  # &quot;value&quot;: &quot;or&quot;
268
287
  # },
269
288
  # {
270
289
  # &quot;position&quot;: 8,
290
+ # &quot;force_prefix&quot;: false,
271
291
  # &quot;value&quot;: &quot;rl&quot;
272
292
  # },
273
293
  # {
274
294
  # &quot;position&quot;: 9,
295
+ # &quot;force_prefix&quot;: false,
275
296
  # &quot;value&quot;: &quot;ld&quot;
276
297
  # },
277
298
  # {
278
299
  # &quot;position&quot;: 10,
300
+ # &quot;force_prefix&quot;: false,
279
301
  # &quot;value&quot;: &quot;d&quot;
280
302
  # }
281
303
  # ]
282
304
  # ]
283
305
  </pre></div>
284
306
  </div>
285
- <p>ノーマライザーを使っている場合は <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はASCIIの文字には空白区切りのようなトークナイズ方法を使います。非ASCII文字にはバイグラムのトークナイズ方法を使います。</p>
307
+ <p>ノーマライザーを使っている場合は <code class="docutils literal"><span class="pre">TokenBigram</span></code> はASCIIの文字には空白区切りのようなトークナイズ方法を使います。非ASCII文字にはバイグラムのトークナイズ方法を使います。</p>
286
308
  <p>もしかしたら、複数の方法が混ざったこの挙動はわかりにくいかもしれません。しかし、英語のテキスト(ASCII文字列のみ)や日本語テキスト(ASCII文字列と非ASCII文字列が混ざっている)ような多くのユースケースでは合理的な方法です。</p>
287
309
  <p>ASCII文字しか使わない多くの言語は単語の区切りに空白文字を使っています。このようなケースに空白区切りのトークナイズ方法は適切です。</p>
288
310
  <p>非ASCII文字を使う言語では単語の区切りに空白文字を使いません。このケースにはバイグラムなトークナイズ方法は適切です。</p>
289
311
  <p>複数の言語が混ざっている場合は、複数の方法を組み合わせたトークナイズ方法が適切です。</p>
290
- <p>ASCII文字にバイグラムなトークナイズ方法を使いたい場合は <a class="reference internal" href="#token-bigram-split-symbol-alpha"><em>TokenBigramSplitSymbolAlpha</em></a> のような <tt class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></tt> というトークナイザーを参照してください。</p>
291
- <p>例を使いながら <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> の挙動を確認しましょう。</p>
292
- <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はASCII文字には1つ以上の空白文字をトークンの区切りとして使います。</p>
312
+ <p>ASCII文字にバイグラムなトークナイズ方法を使いたい場合は <a class="reference internal" href="#token-bigram-split-symbol-alpha"><span>TokenBigramSplitSymbolAlpha</span></a> のような <code class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></code> というトークナイザーを参照してください。</p>
313
+ <p>例を使いながら <code class="docutils literal"><span class="pre">TokenBigram</span></code> の挙動を確認しましょう。</p>
314
+ <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> はASCII文字には1つ以上の空白文字をトークンの区切りとして使います。</p>
293
315
  <p>実行例:</p>
294
316
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello World&quot; NormalizerAuto
295
317
  # [
@@ -301,24 +323,26 @@
301
323
  # [
302
324
  # {
303
325
  # &quot;position&quot;: 0,
326
+ # &quot;force_prefix&quot;: false,
304
327
  # &quot;value&quot;: &quot;hello&quot;
305
328
  # },
306
329
  # {
307
330
  # &quot;position&quot;: 1,
331
+ # &quot;force_prefix&quot;: false,
308
332
  # &quot;value&quot;: &quot;world&quot;
309
333
  # }
310
334
  # ]
311
335
  # ]
312
336
  </pre></div>
313
337
  </div>
314
- <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はASCII文字には文字の種類が変わったところをトークンの区切りとします。文字の種類は次のどれかです。</p>
338
+ <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> はASCII文字には文字の種類が変わったところをトークンの区切りとします。文字の種類は次のどれかです。</p>
315
339
  <blockquote>
316
340
  <div><ul class="simple">
317
341
  <li><p class="first">アルファベット</p>
318
342
  </li>
319
343
  <li><p class="first">数字</p>
320
344
  </li>
321
- <li><p class="first">記号(たとえば <tt class="docutils literal"><span class="pre">(</span></tt> 、 <tt class="docutils literal"><span class="pre">)</span></tt> 、 <tt class="docutils literal"><span class="pre">!</span></tt> など)</p>
345
+ <li><p class="first">記号(たとえば <code class="docutils literal"><span class="pre">(</span></code> 、 <code class="docutils literal"><span class="pre">)</span></code> 、 <code class="docutils literal"><span class="pre">!</span></code> など)</p>
322
346
  </li>
323
347
  <li><p class="first">ひらがな</p>
324
348
  </li>
@@ -333,9 +357,9 @@
333
357
  <p>次の例は2つのトークン区切りを示しています。</p>
334
358
  <blockquote>
335
359
  <div><ul class="simple">
336
- <li><p class="first"><tt class="docutils literal"><span class="pre">100</span></tt> (数字)と <tt class="docutils literal"><span class="pre">cents</span></tt> (アルファベット)の間のところ</p>
360
+ <li><p class="first"><code class="docutils literal"><span class="pre">100</span></code> (数字)と <code class="docutils literal"><span class="pre">cents</span></code> (アルファベット)の間のところ</p>
337
361
  </li>
338
- <li><p class="first"><tt class="docutils literal"><span class="pre">cents</span></tt> (アルファベット)と <tt class="docutils literal"><span class="pre">!!!</span></tt> (記号)の間のところ</p>
362
+ <li><p class="first"><code class="docutils literal"><span class="pre">cents</span></code> (アルファベット)と <code class="docutils literal"><span class="pre">!!!</span></code> (記号)の間のところ</p>
339
363
  </li>
340
364
  </ul>
341
365
  </div></blockquote>
@@ -350,21 +374,24 @@
350
374
  # [
351
375
  # {
352
376
  # &quot;position&quot;: 0,
377
+ # &quot;force_prefix&quot;: false,
353
378
  # &quot;value&quot;: &quot;100&quot;
354
379
  # },
355
380
  # {
356
381
  # &quot;position&quot;: 1,
382
+ # &quot;force_prefix&quot;: false,
357
383
  # &quot;value&quot;: &quot;cents&quot;
358
384
  # },
359
385
  # {
360
386
  # &quot;position&quot;: 2,
387
+ # &quot;force_prefix&quot;: false,
361
388
  # &quot;value&quot;: &quot;!!!&quot;
362
389
  # }
363
390
  # ]
364
391
  # ]
365
392
  </pre></div>
366
393
  </div>
367
- <p>以下は <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> が非ASCII文字にはトークナイズ方法としてバイグラムを使う例です。</p>
394
+ <p>以下は <code class="docutils literal"><span class="pre">TokenBigram</span></code> が非ASCII文字にはトークナイズ方法としてバイグラムを使う例です。</p>
368
395
  <p>実行例:</p>
369
396
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日本語の勉強&quot; NormalizerAuto
370
397
  # [
@@ -376,26 +403,32 @@
376
403
  # [
377
404
  # {
378
405
  # &quot;position&quot;: 0,
406
+ # &quot;force_prefix&quot;: false,
379
407
  # &quot;value&quot;: &quot;日本&quot;
380
408
  # },
381
409
  # {
382
410
  # &quot;position&quot;: 1,
411
+ # &quot;force_prefix&quot;: false,
383
412
  # &quot;value&quot;: &quot;本語&quot;
384
413
  # },
385
414
  # {
386
415
  # &quot;position&quot;: 2,
416
+ # &quot;force_prefix&quot;: false,
387
417
  # &quot;value&quot;: &quot;語の&quot;
388
418
  # },
389
419
  # {
390
420
  # &quot;position&quot;: 3,
421
+ # &quot;force_prefix&quot;: false,
391
422
  # &quot;value&quot;: &quot;の勉&quot;
392
423
  # },
393
424
  # {
394
425
  # &quot;position&quot;: 4,
426
+ # &quot;force_prefix&quot;: false,
395
427
  # &quot;value&quot;: &quot;勉強&quot;
396
428
  # },
397
429
  # {
398
430
  # &quot;position&quot;: 5,
431
+ # &quot;force_prefix&quot;: false,
399
432
  # &quot;value&quot;: &quot;強&quot;
400
433
  # }
401
434
  # ]
@@ -404,8 +437,8 @@
404
437
  </div>
405
438
  </div>
406
439
  <div class="section" id="tokenbigramsplitsymbol">
407
- <span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramsplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
408
- <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは記号の扱いです。 <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt> は記号のトークナイズ方法にバイグラムを使います。</p>
440
+ <span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code><a class="headerlink" href="#tokenbigramsplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
441
+ <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code> は <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> と似ています。違いは記号の扱いです。 <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code> は記号のトークナイズ方法にバイグラムを使います。</p>
409
442
  <p>実行例:</p>
410
443
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbol &quot;100cents!!!&quot; NormalizerAuto
411
444
  # [
@@ -417,22 +450,27 @@
417
450
  # [
418
451
  # {
419
452
  # &quot;position&quot;: 0,
453
+ # &quot;force_prefix&quot;: false,
420
454
  # &quot;value&quot;: &quot;100&quot;
421
455
  # },
422
456
  # {
423
457
  # &quot;position&quot;: 1,
458
+ # &quot;force_prefix&quot;: false,
424
459
  # &quot;value&quot;: &quot;cents&quot;
425
460
  # },
426
461
  # {
427
462
  # &quot;position&quot;: 2,
463
+ # &quot;force_prefix&quot;: false,
428
464
  # &quot;value&quot;: &quot;!!&quot;
429
465
  # },
430
466
  # {
431
467
  # &quot;position&quot;: 3,
468
+ # &quot;force_prefix&quot;: false,
432
469
  # &quot;value&quot;: &quot;!!&quot;
433
470
  # },
434
471
  # {
435
472
  # &quot;position&quot;: 4,
473
+ # &quot;force_prefix&quot;: false,
436
474
  # &quot;value&quot;: &quot;!&quot;
437
475
  # }
438
476
  # ]
@@ -441,8 +479,8 @@
441
479
  </div>
442
480
  </div>
443
481
  <div class="section" id="tokenbigramsplitsymbolalpha">
444
- <span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
445
- <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは記号とアルファベットの扱いです。 <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> は記号とアルファベットのトークナイズ方法にバイグラムを使います。</p>
482
+ <span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
483
+ <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> は <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> と似ています。違いは記号とアルファベットの扱いです。 <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> は記号とアルファベットのトークナイズ方法にバイグラムを使います。</p>
446
484
  <p>実行例:</p>
447
485
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlpha &quot;100cents!!!&quot; NormalizerAuto
448
486
  # [
@@ -454,38 +492,47 @@
454
492
  # [
455
493
  # {
456
494
  # &quot;position&quot;: 0,
495
+ # &quot;force_prefix&quot;: false,
457
496
  # &quot;value&quot;: &quot;100&quot;
458
497
  # },
459
498
  # {
460
499
  # &quot;position&quot;: 1,
500
+ # &quot;force_prefix&quot;: false,
461
501
  # &quot;value&quot;: &quot;ce&quot;
462
502
  # },
463
503
  # {
464
504
  # &quot;position&quot;: 2,
505
+ # &quot;force_prefix&quot;: false,
465
506
  # &quot;value&quot;: &quot;en&quot;
466
507
  # },
467
508
  # {
468
509
  # &quot;position&quot;: 3,
510
+ # &quot;force_prefix&quot;: false,
469
511
  # &quot;value&quot;: &quot;nt&quot;
470
512
  # },
471
513
  # {
472
514
  # &quot;position&quot;: 4,
515
+ # &quot;force_prefix&quot;: false,
473
516
  # &quot;value&quot;: &quot;ts&quot;
474
517
  # },
475
518
  # {
476
519
  # &quot;position&quot;: 5,
520
+ # &quot;force_prefix&quot;: false,
477
521
  # &quot;value&quot;: &quot;s!&quot;
478
522
  # },
479
523
  # {
480
524
  # &quot;position&quot;: 6,
525
+ # &quot;force_prefix&quot;: false,
481
526
  # &quot;value&quot;: &quot;!!&quot;
482
527
  # },
483
528
  # {
484
529
  # &quot;position&quot;: 7,
530
+ # &quot;force_prefix&quot;: false,
485
531
  # &quot;value&quot;: &quot;!!&quot;
486
532
  # },
487
533
  # {
488
534
  # &quot;position&quot;: 8,
535
+ # &quot;force_prefix&quot;: false,
489
536
  # &quot;value&quot;: &quot;!&quot;
490
537
  # }
491
538
  # ]
@@ -494,8 +541,8 @@
494
541
  </div>
495
542
  </div>
496
543
  <div class="section" id="tokenbigramsplitsymbolalphadigit">
497
- <span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
498
- <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは記号とアルファベットと数字の扱いです。 <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> は記号とアルファベット数字のトークナイズ方法にバイグラムを使います。つまり、すべての文字をバイグラムでトークナイズします。</p>
544
+ <span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
545
+ <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> は <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> と似ています。違いは記号とアルファベットと数字の扱いです。 <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> は記号とアルファベット数字のトークナイズ方法にバイグラムを使います。つまり、すべての文字をバイグラムでトークナイズします。</p>
499
546
  <p>実行例:</p>
500
547
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlphaDigit &quot;100cents!!!&quot; NormalizerAuto
501
548
  # [
@@ -507,46 +554,57 @@
507
554
  # [
508
555
  # {
509
556
  # &quot;position&quot;: 0,
557
+ # &quot;force_prefix&quot;: false,
510
558
  # &quot;value&quot;: &quot;10&quot;
511
559
  # },
512
560
  # {
513
561
  # &quot;position&quot;: 1,
562
+ # &quot;force_prefix&quot;: false,
514
563
  # &quot;value&quot;: &quot;00&quot;
515
564
  # },
516
565
  # {
517
566
  # &quot;position&quot;: 2,
567
+ # &quot;force_prefix&quot;: false,
518
568
  # &quot;value&quot;: &quot;0c&quot;
519
569
  # },
520
570
  # {
521
571
  # &quot;position&quot;: 3,
572
+ # &quot;force_prefix&quot;: false,
522
573
  # &quot;value&quot;: &quot;ce&quot;
523
574
  # },
524
575
  # {
525
576
  # &quot;position&quot;: 4,
577
+ # &quot;force_prefix&quot;: false,
526
578
  # &quot;value&quot;: &quot;en&quot;
527
579
  # },
528
580
  # {
529
581
  # &quot;position&quot;: 5,
582
+ # &quot;force_prefix&quot;: false,
530
583
  # &quot;value&quot;: &quot;nt&quot;
531
584
  # },
532
585
  # {
533
586
  # &quot;position&quot;: 6,
587
+ # &quot;force_prefix&quot;: false,
534
588
  # &quot;value&quot;: &quot;ts&quot;
535
589
  # },
536
590
  # {
537
591
  # &quot;position&quot;: 7,
592
+ # &quot;force_prefix&quot;: false,
538
593
  # &quot;value&quot;: &quot;s!&quot;
539
594
  # },
540
595
  # {
541
596
  # &quot;position&quot;: 8,
597
+ # &quot;force_prefix&quot;: false,
542
598
  # &quot;value&quot;: &quot;!!&quot;
543
599
  # },
544
600
  # {
545
601
  # &quot;position&quot;: 9,
602
+ # &quot;force_prefix&quot;: false,
546
603
  # &quot;value&quot;: &quot;!!&quot;
547
604
  # },
548
605
  # {
549
606
  # &quot;position&quot;: 10,
607
+ # &quot;force_prefix&quot;: false,
550
608
  # &quot;value&quot;: &quot;!&quot;
551
609
  # }
552
610
  # ]
@@ -555,10 +613,10 @@
555
613
  </div>
556
614
  </div>
557
615
  <div class="section" id="tokenbigramignoreblank">
558
- <span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt><a class="headerlink" href="#tokenbigramignoreblank" title="このヘッドラインへのパーマリンク">¶</a></h3>
559
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは空白文字の扱いです。 <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> は連続する記号と非ASCII文字の間にある空白文字を無視します。</p>
560
- <p><tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
561
- <p><a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> での実行結果です。</p>
616
+ <span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code><a class="headerlink" href="#tokenbigramignoreblank" title="このヘッドラインへのパーマリンク">¶</a></h3>
617
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> は <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> と似ています。違いは空白文字の扱いです。 <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> は連続する記号と非ASCII文字の間にある空白文字を無視します。</p>
618
+ <p><code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
619
+ <p><a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> での実行結果です。</p>
562
620
  <p>実行例:</p>
563
621
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
564
622
  # [
@@ -570,33 +628,39 @@
570
628
  # [
571
629
  # {
572
630
  # &quot;position&quot;: 0,
631
+ # &quot;force_prefix&quot;: false,
573
632
  # &quot;value&quot;: &quot;日&quot;
574
633
  # },
575
634
  # {
576
635
  # &quot;position&quot;: 1,
636
+ # &quot;force_prefix&quot;: false,
577
637
  # &quot;value&quot;: &quot;本&quot;
578
638
  # },
579
639
  # {
580
640
  # &quot;position&quot;: 2,
641
+ # &quot;force_prefix&quot;: false,
581
642
  # &quot;value&quot;: &quot;語&quot;
582
643
  # },
583
644
  # {
584
645
  # &quot;position&quot;: 3,
646
+ # &quot;force_prefix&quot;: false,
585
647
  # &quot;value&quot;: &quot;!&quot;
586
648
  # },
587
649
  # {
588
650
  # &quot;position&quot;: 4,
651
+ # &quot;force_prefix&quot;: false,
589
652
  # &quot;value&quot;: &quot;!&quot;
590
653
  # },
591
654
  # {
592
655
  # &quot;position&quot;: 5,
656
+ # &quot;force_prefix&quot;: false,
593
657
  # &quot;value&quot;: &quot;!&quot;
594
658
  # }
595
659
  # ]
596
660
  # ]
597
661
  </pre></div>
598
662
  </div>
599
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> での実行結果です。</p>
663
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> での実行結果です。</p>
600
664
  <p>実行例:</p>
601
665
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlank &quot;日 本 語 ! ! !&quot; NormalizerAuto
602
666
  # [
@@ -608,18 +672,22 @@
608
672
  # [
609
673
  # {
610
674
  # &quot;position&quot;: 0,
675
+ # &quot;force_prefix&quot;: false,
611
676
  # &quot;value&quot;: &quot;日本&quot;
612
677
  # },
613
678
  # {
614
679
  # &quot;position&quot;: 1,
680
+ # &quot;force_prefix&quot;: false,
615
681
  # &quot;value&quot;: &quot;本語&quot;
616
682
  # },
617
683
  # {
618
684
  # &quot;position&quot;: 2,
685
+ # &quot;force_prefix&quot;: false,
619
686
  # &quot;value&quot;: &quot;語&quot;
620
687
  # },
621
688
  # {
622
689
  # &quot;position&quot;: 3,
690
+ # &quot;force_prefix&quot;: false,
623
691
  # &quot;value&quot;: &quot;!!!&quot;
624
692
  # }
625
693
  # ]
@@ -628,8 +696,8 @@
628
696
  </div>
629
697
  </div>
630
698
  <div class="section" id="tokenbigramignoreblanksplitsymbol">
631
- <span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
632
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは次の通りです。</p>
699
+ <span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
700
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> は <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> と似ています。違いは次の通りです。</p>
633
701
  <blockquote>
634
702
  <div><ul class="simple">
635
703
  <li><p class="first">空白文字の扱い</p>
@@ -638,10 +706,10 @@
638
706
  </li>
639
707
  </ul>
640
708
  </div></blockquote>
641
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
642
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> は記号をバイグラムでトークナイズします。</p>
643
- <p><tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
644
- <p><a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> での実行結果です。</p>
709
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
710
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> は記号をバイグラムでトークナイズします。</p>
711
+ <p><code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
712
+ <p><a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> での実行結果です。</p>
645
713
  <p>実行例:</p>
646
714
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;日 本 語 ! ! !&quot; NormalizerAuto
647
715
  # [
@@ -653,33 +721,39 @@
653
721
  # [
654
722
  # {
655
723
  # &quot;position&quot;: 0,
724
+ # &quot;force_prefix&quot;: false,
656
725
  # &quot;value&quot;: &quot;日&quot;
657
726
  # },
658
727
  # {
659
728
  # &quot;position&quot;: 1,
729
+ # &quot;force_prefix&quot;: false,
660
730
  # &quot;value&quot;: &quot;本&quot;
661
731
  # },
662
732
  # {
663
733
  # &quot;position&quot;: 2,
734
+ # &quot;force_prefix&quot;: false,
664
735
  # &quot;value&quot;: &quot;語&quot;
665
736
  # },
666
737
  # {
667
738
  # &quot;position&quot;: 3,
739
+ # &quot;force_prefix&quot;: false,
668
740
  # &quot;value&quot;: &quot;!&quot;
669
741
  # },
670
742
  # {
671
743
  # &quot;position&quot;: 4,
744
+ # &quot;force_prefix&quot;: false,
672
745
  # &quot;value&quot;: &quot;!&quot;
673
746
  # },
674
747
  # {
675
748
  # &quot;position&quot;: 5,
749
+ # &quot;force_prefix&quot;: false,
676
750
  # &quot;value&quot;: &quot;!&quot;
677
751
  # }
678
752
  # ]
679
753
  # ]
680
754
  </pre></div>
681
755
  </div>
682
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> の実行結果です。</p>
756
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> の実行結果です。</p>
683
757
  <p>実行例:</p>
684
758
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbol &quot;日 本 語 ! ! !&quot; NormalizerAuto
685
759
  # [
@@ -691,26 +765,32 @@
691
765
  # [
692
766
  # {
693
767
  # &quot;position&quot;: 0,
768
+ # &quot;force_prefix&quot;: false,
694
769
  # &quot;value&quot;: &quot;日本&quot;
695
770
  # },
696
771
  # {
697
772
  # &quot;position&quot;: 1,
773
+ # &quot;force_prefix&quot;: false,
698
774
  # &quot;value&quot;: &quot;本語&quot;
699
775
  # },
700
776
  # {
701
777
  # &quot;position&quot;: 2,
778
+ # &quot;force_prefix&quot;: false,
702
779
  # &quot;value&quot;: &quot;語!&quot;
703
780
  # },
704
781
  # {
705
782
  # &quot;position&quot;: 3,
783
+ # &quot;force_prefix&quot;: false,
706
784
  # &quot;value&quot;: &quot;!!&quot;
707
785
  # },
708
786
  # {
709
787
  # &quot;position&quot;: 4,
788
+ # &quot;force_prefix&quot;: false,
710
789
  # &quot;value&quot;: &quot;!!&quot;
711
790
  # },
712
791
  # {
713
792
  # &quot;position&quot;: 5,
793
+ # &quot;force_prefix&quot;: false,
714
794
  # &quot;value&quot;: &quot;!&quot;
715
795
  # }
716
796
  # ]
@@ -719,8 +799,8 @@
719
799
  </div>
720
800
  </div>
721
801
  <div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
722
- <span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
723
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは次の通りです。</p>
802
+ <span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
803
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> は <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> と似ています。違いは次の通りです。</p>
724
804
  <blockquote>
725
805
  <div><ul class="simple">
726
806
  <li><p class="first">空白文字の扱い</p>
@@ -729,10 +809,10 @@
729
809
  </li>
730
810
  </ul>
731
811
  </div></blockquote>
732
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
733
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> は記号とアルファベットをバイグラムでトークナイズします。</p>
734
- <p><tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> というテキストを使うと違いがわかります。なぜなら空白文字入りの記号と非ASCII文字だけでなく、アルファベットも含んでいるからです。</p>
735
- <p><a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> での実行結果です。</p>
812
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
813
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> は記号とアルファベットをバイグラムでトークナイズします。</p>
814
+ <p><code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> というテキストを使うと違いがわかります。なぜなら空白文字入りの記号と非ASCII文字だけでなく、アルファベットも含んでいるからです。</p>
815
+ <p><a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> での実行結果です。</p>
736
816
  <p>実行例:</p>
737
817
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
738
818
  # [
@@ -744,37 +824,44 @@
744
824
  # [
745
825
  # {
746
826
  # &quot;position&quot;: 0,
827
+ # &quot;force_prefix&quot;: false,
747
828
  # &quot;value&quot;: &quot;hello&quot;
748
829
  # },
749
830
  # {
750
831
  # &quot;position&quot;: 1,
832
+ # &quot;force_prefix&quot;: false,
751
833
  # &quot;value&quot;: &quot;日&quot;
752
834
  # },
753
835
  # {
754
836
  # &quot;position&quot;: 2,
837
+ # &quot;force_prefix&quot;: false,
755
838
  # &quot;value&quot;: &quot;本&quot;
756
839
  # },
757
840
  # {
758
841
  # &quot;position&quot;: 3,
842
+ # &quot;force_prefix&quot;: false,
759
843
  # &quot;value&quot;: &quot;語&quot;
760
844
  # },
761
845
  # {
762
846
  # &quot;position&quot;: 4,
847
+ # &quot;force_prefix&quot;: false,
763
848
  # &quot;value&quot;: &quot;!&quot;
764
849
  # },
765
850
  # {
766
851
  # &quot;position&quot;: 5,
852
+ # &quot;force_prefix&quot;: false,
767
853
  # &quot;value&quot;: &quot;!&quot;
768
854
  # },
769
855
  # {
770
856
  # &quot;position&quot;: 6,
857
+ # &quot;force_prefix&quot;: false,
771
858
  # &quot;value&quot;: &quot;!&quot;
772
859
  # }
773
860
  # ]
774
861
  # ]
775
862
  </pre></div>
776
863
  </div>
777
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> の実行結果です。</p>
864
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> の実行結果です。</p>
778
865
  <p>実行例:</p>
779
866
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha &quot;Hello 日 本 語 ! ! !&quot; NormalizerAuto
780
867
  # [
@@ -786,46 +873,57 @@
786
873
  # [
787
874
  # {
788
875
  # &quot;position&quot;: 0,
876
+ # &quot;force_prefix&quot;: false,
789
877
  # &quot;value&quot;: &quot;he&quot;
790
878
  # },
791
879
  # {
792
880
  # &quot;position&quot;: 1,
881
+ # &quot;force_prefix&quot;: false,
793
882
  # &quot;value&quot;: &quot;el&quot;
794
883
  # },
795
884
  # {
796
885
  # &quot;position&quot;: 2,
886
+ # &quot;force_prefix&quot;: false,
797
887
  # &quot;value&quot;: &quot;ll&quot;
798
888
  # },
799
889
  # {
800
890
  # &quot;position&quot;: 3,
891
+ # &quot;force_prefix&quot;: false,
801
892
  # &quot;value&quot;: &quot;lo&quot;
802
893
  # },
803
894
  # {
804
895
  # &quot;position&quot;: 4,
896
+ # &quot;force_prefix&quot;: false,
805
897
  # &quot;value&quot;: &quot;o日&quot;
806
898
  # },
807
899
  # {
808
900
  # &quot;position&quot;: 5,
901
+ # &quot;force_prefix&quot;: false,
809
902
  # &quot;value&quot;: &quot;日本&quot;
810
903
  # },
811
904
  # {
812
905
  # &quot;position&quot;: 6,
906
+ # &quot;force_prefix&quot;: false,
813
907
  # &quot;value&quot;: &quot;本語&quot;
814
908
  # },
815
909
  # {
816
910
  # &quot;position&quot;: 7,
911
+ # &quot;force_prefix&quot;: false,
817
912
  # &quot;value&quot;: &quot;語!&quot;
818
913
  # },
819
914
  # {
820
915
  # &quot;position&quot;: 8,
916
+ # &quot;force_prefix&quot;: false,
821
917
  # &quot;value&quot;: &quot;!!&quot;
822
918
  # },
823
919
  # {
824
920
  # &quot;position&quot;: 9,
921
+ # &quot;force_prefix&quot;: false,
825
922
  # &quot;value&quot;: &quot;!!&quot;
826
923
  # },
827
924
  # {
828
925
  # &quot;position&quot;: 10,
926
+ # &quot;force_prefix&quot;: false,
829
927
  # &quot;value&quot;: &quot;!&quot;
830
928
  # }
831
929
  # ]
@@ -834,8 +932,8 @@
834
932
  </div>
835
933
  </div>
836
934
  <div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
837
- <span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
838
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは次の通りです。</p>
935
+ <span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
936
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> は <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> と似ています。違いは次の通りです。</p>
839
937
  <blockquote>
840
938
  <div><ul class="simple">
841
939
  <li><p class="first">空白文字の扱い</p>
@@ -844,10 +942,10 @@
844
942
  </li>
845
943
  </ul>
846
944
  </div></blockquote>
847
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
848
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> は記号、アルファベット、数字をバイグラムでトークナイズします。つまり、すべての文字をバイグラムでトークナイズします。</p>
849
- <p><tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></tt> というテキストを使うと違いがわかります。なぜなら、このテキストは空白文字入りの記号と非ASCII文字だけでなく、アルファベットと数字も含んでいるからです。</p>
850
- <p><a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> での実行結果です。</p>
945
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
946
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> は記号、アルファベット、数字をバイグラムでトークナイズします。つまり、すべての文字をバイグラムでトークナイズします。</p>
947
+ <p><code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></code> というテキストを使うと違いがわかります。なぜなら、このテキストは空白文字入りの記号と非ASCII文字だけでなく、アルファベットと数字も含んでいるからです。</p>
948
+ <p><a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> での実行結果です。</p>
851
949
  <p>実行例:</p>
852
950
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
853
951
  # [
@@ -859,41 +957,49 @@
859
957
  # [
860
958
  # {
861
959
  # &quot;position&quot;: 0,
960
+ # &quot;force_prefix&quot;: false,
862
961
  # &quot;value&quot;: &quot;hello&quot;
863
962
  # },
864
963
  # {
865
964
  # &quot;position&quot;: 1,
965
+ # &quot;force_prefix&quot;: false,
866
966
  # &quot;value&quot;: &quot;日&quot;
867
967
  # },
868
968
  # {
869
969
  # &quot;position&quot;: 2,
970
+ # &quot;force_prefix&quot;: false,
870
971
  # &quot;value&quot;: &quot;本&quot;
871
972
  # },
872
973
  # {
873
974
  # &quot;position&quot;: 3,
975
+ # &quot;force_prefix&quot;: false,
874
976
  # &quot;value&quot;: &quot;語&quot;
875
977
  # },
876
978
  # {
877
979
  # &quot;position&quot;: 4,
980
+ # &quot;force_prefix&quot;: false,
878
981
  # &quot;value&quot;: &quot;!&quot;
879
982
  # },
880
983
  # {
881
984
  # &quot;position&quot;: 5,
985
+ # &quot;force_prefix&quot;: false,
882
986
  # &quot;value&quot;: &quot;!&quot;
883
987
  # },
884
988
  # {
885
989
  # &quot;position&quot;: 6,
990
+ # &quot;force_prefix&quot;: false,
886
991
  # &quot;value&quot;: &quot;!&quot;
887
992
  # },
888
993
  # {
889
994
  # &quot;position&quot;: 7,
995
+ # &quot;force_prefix&quot;: false,
890
996
  # &quot;value&quot;: &quot;777&quot;
891
997
  # }
892
998
  # ]
893
999
  # ]
894
1000
  </pre></div>
895
1001
  </div>
896
- <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> の実行結果です。</p>
1002
+ <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> の実行結果です。</p>
897
1003
  <p>実行例:</p>
898
1004
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit &quot;Hello 日 本 語 ! ! ! 777&quot; NormalizerAuto
899
1005
  # [
@@ -905,58 +1011,72 @@
905
1011
  # [
906
1012
  # {
907
1013
  # &quot;position&quot;: 0,
1014
+ # &quot;force_prefix&quot;: false,
908
1015
  # &quot;value&quot;: &quot;he&quot;
909
1016
  # },
910
1017
  # {
911
1018
  # &quot;position&quot;: 1,
1019
+ # &quot;force_prefix&quot;: false,
912
1020
  # &quot;value&quot;: &quot;el&quot;
913
1021
  # },
914
1022
  # {
915
1023
  # &quot;position&quot;: 2,
1024
+ # &quot;force_prefix&quot;: false,
916
1025
  # &quot;value&quot;: &quot;ll&quot;
917
1026
  # },
918
1027
  # {
919
1028
  # &quot;position&quot;: 3,
1029
+ # &quot;force_prefix&quot;: false,
920
1030
  # &quot;value&quot;: &quot;lo&quot;
921
1031
  # },
922
1032
  # {
923
1033
  # &quot;position&quot;: 4,
1034
+ # &quot;force_prefix&quot;: false,
924
1035
  # &quot;value&quot;: &quot;o日&quot;
925
1036
  # },
926
1037
  # {
927
1038
  # &quot;position&quot;: 5,
1039
+ # &quot;force_prefix&quot;: false,
928
1040
  # &quot;value&quot;: &quot;日本&quot;
929
1041
  # },
930
1042
  # {
931
1043
  # &quot;position&quot;: 6,
1044
+ # &quot;force_prefix&quot;: false,
932
1045
  # &quot;value&quot;: &quot;本語&quot;
933
1046
  # },
934
1047
  # {
935
1048
  # &quot;position&quot;: 7,
1049
+ # &quot;force_prefix&quot;: false,
936
1050
  # &quot;value&quot;: &quot;語!&quot;
937
1051
  # },
938
1052
  # {
939
1053
  # &quot;position&quot;: 8,
1054
+ # &quot;force_prefix&quot;: false,
940
1055
  # &quot;value&quot;: &quot;!!&quot;
941
1056
  # },
942
1057
  # {
943
1058
  # &quot;position&quot;: 9,
1059
+ # &quot;force_prefix&quot;: false,
944
1060
  # &quot;value&quot;: &quot;!!&quot;
945
1061
  # },
946
1062
  # {
947
1063
  # &quot;position&quot;: 10,
1064
+ # &quot;force_prefix&quot;: false,
948
1065
  # &quot;value&quot;: &quot;!7&quot;
949
1066
  # },
950
1067
  # {
951
1068
  # &quot;position&quot;: 11,
1069
+ # &quot;force_prefix&quot;: false,
952
1070
  # &quot;value&quot;: &quot;77&quot;
953
1071
  # },
954
1072
  # {
955
1073
  # &quot;position&quot;: 12,
1074
+ # &quot;force_prefix&quot;: false,
956
1075
  # &quot;value&quot;: &quot;77&quot;
957
1076
  # },
958
1077
  # {
959
1078
  # &quot;position&quot;: 13,
1079
+ # &quot;force_prefix&quot;: false,
960
1080
  # &quot;value&quot;: &quot;7&quot;
961
1081
  # }
962
1082
  # ]
@@ -965,8 +1085,8 @@
965
1085
  </div>
966
1086
  </div>
967
1087
  <div class="section" id="tokenunigram">
968
- <span id="token-unigram"></span><h3>7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt><a class="headerlink" href="#tokenunigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
969
- <p><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> は各トークンが2文字ですが、 <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> は各トークンが1文字です。</p>
1088
+ <span id="token-unigram"></span><h3>7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code><a class="headerlink" href="#tokenunigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
1089
+ <p><code class="docutils literal"><span class="pre">TokenUnigram</span></code> は <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> は各トークンが2文字ですが、 <code class="docutils literal"><span class="pre">TokenUnigram</span></code> は各トークンが1文字です。</p>
970
1090
  <p>実行例:</p>
971
1091
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenUnigram &quot;100cents!!!&quot; NormalizerAuto
972
1092
  # [
@@ -978,14 +1098,17 @@
978
1098
  # [
979
1099
  # {
980
1100
  # &quot;position&quot;: 0,
1101
+ # &quot;force_prefix&quot;: false,
981
1102
  # &quot;value&quot;: &quot;100&quot;
982
1103
  # },
983
1104
  # {
984
1105
  # &quot;position&quot;: 1,
1106
+ # &quot;force_prefix&quot;: false,
985
1107
  # &quot;value&quot;: &quot;cents&quot;
986
1108
  # },
987
1109
  # {
988
1110
  # &quot;position&quot;: 2,
1111
+ # &quot;force_prefix&quot;: false,
989
1112
  # &quot;value&quot;: &quot;!!!&quot;
990
1113
  # }
991
1114
  # ]
@@ -994,8 +1117,8 @@
994
1117
  </div>
995
1118
  </div>
996
1119
  <div class="section" id="tokentrigram">
997
- <span id="token-trigram"></span><h3>7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt><a class="headerlink" href="#tokentrigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
998
- <p><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> は各トークンが2文字ですが、 <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> は各トークンが3文字です。</p>
1120
+ <span id="token-trigram"></span><h3>7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code><a class="headerlink" href="#tokentrigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
1121
+ <p><code class="docutils literal"><span class="pre">TokenTrigram</span></code> は <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> は各トークンが2文字ですが、 <code class="docutils literal"><span class="pre">TokenTrigram</span></code> は各トークンが3文字です。</p>
999
1122
  <p>実行例:</p>
1000
1123
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenTrigram &quot;10000cents!!!!!&quot; NormalizerAuto
1001
1124
  # [
@@ -1007,14 +1130,17 @@
1007
1130
  # [
1008
1131
  # {
1009
1132
  # &quot;position&quot;: 0,
1133
+ # &quot;force_prefix&quot;: false,
1010
1134
  # &quot;value&quot;: &quot;10000&quot;
1011
1135
  # },
1012
1136
  # {
1013
1137
  # &quot;position&quot;: 1,
1138
+ # &quot;force_prefix&quot;: false,
1014
1139
  # &quot;value&quot;: &quot;cents&quot;
1015
1140
  # },
1016
1141
  # {
1017
1142
  # &quot;position&quot;: 2,
1143
+ # &quot;force_prefix&quot;: false,
1018
1144
  # &quot;value&quot;: &quot;!!!!!&quot;
1019
1145
  # }
1020
1146
  # ]
@@ -1023,10 +1149,10 @@
1023
1149
  </div>
1024
1150
  </div>
1025
1151
  <div class="section" id="tokendelimit">
1026
- <span id="token-delimit"></span><h3>7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt><a class="headerlink" href="#tokendelimit" title="このヘッドラインへのパーマリンク">¶</a></h3>
1027
- <p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> は1つ以上の空白文字( <tt class="docutils literal"><span class="pre">U+0020</span></tt> )で分割してトークンを抽出します。たとえば、 <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> は <tt class="docutils literal"><span class="pre">Hello</span></tt> と <tt class="docutils literal"><span class="pre">World</span></tt> にトークナイズされます。</p>
1028
- <p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> はタグテキストに適切です。 <tt class="docutils literal"><span class="pre">groonga</span> <span class="pre">full-text-search</span> <span class="pre">http</span></tt> というテキストから <tt class="docutils literal"><span class="pre">groonga</span></tt> 、 <tt class="docutils literal"><span class="pre">full-text-search</span></tt> 、 <tt class="docutils literal"><span class="pre">http</span></tt> を抽出します。</p>
1029
- <p>以下は <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> の例です。</p>
1152
+ <span id="token-delimit"></span><h3>7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code><a class="headerlink" href="#tokendelimit" title="このヘッドラインへのパーマリンク">¶</a></h3>
1153
+ <p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> は1つ以上の空白文字( <code class="docutils literal"><span class="pre">U+0020</span></code> )で分割してトークンを抽出します。たとえば、 <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は <code class="docutils literal"><span class="pre">Hello</span></code> と <code class="docutils literal"><span class="pre">World</span></code> にトークナイズされます。</p>
1154
+ <p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> はタグテキストに適切です。 <code class="docutils literal"><span class="pre">groonga</span> <span class="pre">full-text-search</span> <span class="pre">http</span></code> というテキストから <code class="docutils literal"><span class="pre">groonga</span></code> 、 <code class="docutils literal"><span class="pre">full-text-search</span></code> 、 <code class="docutils literal"><span class="pre">http</span></code> を抽出します。</p>
1155
+ <p>以下は <code class="docutils literal"><span class="pre">TokenDelimit</span></code> の例です。</p>
1030
1156
  <p>実行例:</p>
1031
1157
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimit &quot;Groonga full-text-search HTTP&quot; NormalizerAuto
1032
1158
  # [
@@ -1038,14 +1164,17 @@
1038
1164
  # [
1039
1165
  # {
1040
1166
  # &quot;position&quot;: 0,
1167
+ # &quot;force_prefix&quot;: false,
1041
1168
  # &quot;value&quot;: &quot;groonga&quot;
1042
1169
  # },
1043
1170
  # {
1044
1171
  # &quot;position&quot;: 1,
1172
+ # &quot;force_prefix&quot;: false,
1045
1173
  # &quot;value&quot;: &quot;full-text-search&quot;
1046
1174
  # },
1047
1175
  # {
1048
1176
  # &quot;position&quot;: 2,
1177
+ # &quot;force_prefix&quot;: false,
1049
1178
  # &quot;value&quot;: &quot;http&quot;
1050
1179
  # }
1051
1180
  # ]
@@ -1054,10 +1183,10 @@
1054
1183
  </div>
1055
1184
  </div>
1056
1185
  <div class="section" id="tokendelimitnull">
1057
- <span id="token-delimit-null"></span><h3>7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt><a class="headerlink" href="#tokendelimitnull" title="このヘッドラインへのパーマリンク">¶</a></h3>
1058
- <p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> は <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a> に似ています。違いは区切り文字です。 <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a> は空白文字( <tt class="docutils literal"><span class="pre">U+0020</span></tt> )を使いますが、 <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> はNUL文字( <tt class="docutils literal"><span class="pre">U+0000</span></tt> )を使います。</p>
1059
- <p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> もタグテキストに適切です。</p>
1060
- <p>以下は <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> の例です。</p>
1186
+ <span id="token-delimit-null"></span><h3>7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code><a class="headerlink" href="#tokendelimitnull" title="このヘッドラインへのパーマリンク">¶</a></h3>
1187
+ <p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> は <a class="reference internal" href="#token-delimit"><span>TokenDelimit</span></a> に似ています。違いは区切り文字です。 <a class="reference internal" href="#token-delimit"><span>TokenDelimit</span></a> は空白文字( <code class="docutils literal"><span class="pre">U+0020</span></code> )を使いますが、 <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> はNUL文字( <code class="docutils literal"><span class="pre">U+0000</span></code> )を使います。</p>
1188
+ <p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> もタグテキストに適切です。</p>
1189
+ <p>以下は <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> の例です。</p>
1061
1190
  <p>実行例:</p>
1062
1191
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimitNull &quot;Groonga\u0000full-text-search\u0000HTTP&quot; NormalizerAuto
1063
1192
  # [
@@ -1069,6 +1198,7 @@
1069
1198
  # [
1070
1199
  # {
1071
1200
  # &quot;position&quot;: 0,
1201
+ # &quot;force_prefix&quot;: false,
1072
1202
  # &quot;value&quot;: &quot;groongau0000full-text-searchu0000http&quot;
1073
1203
  # }
1074
1204
  # ]
@@ -1077,12 +1207,12 @@
1077
1207
  </div>
1078
1208
  </div>
1079
1209
  <div class="section" id="tokenmecab">
1080
- <span id="token-mecab"></span><h3>7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt><a class="headerlink" href="#tokenmecab" title="このヘッドラインへのパーマリンク">¶</a></h3>
1081
- <p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> は <a class="reference external" href="http://mecab.sourceforge.net/">MeCab</a> 形態素解析器をベースにしたトークナイザーです。</p>
1210
+ <span id="token-mecab"></span><h3>7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code><a class="headerlink" href="#tokenmecab" title="このヘッドラインへのパーマリンク">¶</a></h3>
1211
+ <p><code class="docutils literal"><span class="pre">TokenMecab</span></code> は <a class="reference external" href="http://mecab.sourceforge.net/">MeCab</a> 形態素解析器をベースにしたトークナイザーです。</p>
1082
1212
  <p>MeCabは日本語に依存していません。その言語用の辞書を用意すれば日本語以外でもMeCabを使えます。日本語用の辞書には <a class="reference external" href="http://sourceforge.jp/projects/naist-jdic/">NAIST Japanese Dictionary</a> を使えます。</p>
1083
- <p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> は再現率より適合率に優れています。 <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> では <tt class="docutils literal"><span class="pre">京都</span></tt> というクエリーで <tt class="docutils literal"><span class="pre">東京都</span></tt> も <tt class="docutils literal"><span class="pre">京都</span></tt> も見つかりますが、この場合は <tt class="docutils literal"><span class="pre">東京都</span></tt> は期待した結果ではありません。 <tt class="docutils literal"><span class="pre">TokenMecab</span></tt> を使うと <tt class="docutils literal"><span class="pre">京都</span></tt> というクエリーで <tt class="docutils literal"><span class="pre">京都</span></tt> だけを見つけられます。</p>
1084
- <p>新語をサポートしたい場合は、MeCabの辞書を更新し続ける必要があります。これはメンテナンスコストがかかります。( <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> には辞書のメンテナンスコストはありません。なぜなら、 <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> は辞書を使っていないからです。)新語への対応に <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> が役に立つかもしれません。</p>
1085
- <p>以下は <tt class="docutils literal"><span class="pre">TokenMeCab</span></tt> の例です。 <tt class="docutils literal"><span class="pre">東京都</span></tt> は <tt class="docutils literal"><span class="pre">東京</span></tt> と <tt class="docutils literal"><span class="pre">都</span></tt> にトークナイズされています。 <tt class="docutils literal"><span class="pre">京都</span></tt> というトークンはありません。</p>
1213
+ <p><code class="docutils literal"><span class="pre">TokenMecab</span></code> は再現率より適合率に優れています。 <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> では <code class="docutils literal"><span class="pre">京都</span></code> というクエリーで <code class="docutils literal"><span class="pre">東京都</span></code> も <code class="docutils literal"><span class="pre">京都</span></code> も見つかりますが、この場合は <code class="docutils literal"><span class="pre">東京都</span></code> は期待した結果ではありません。 <code class="docutils literal"><span class="pre">TokenMecab</span></code> を使うと <code class="docutils literal"><span class="pre">京都</span></code> というクエリーで <code class="docutils literal"><span class="pre">京都</span></code> だけを見つけられます。</p>
1214
+ <p>新語をサポートしたい場合は、MeCabの辞書を更新し続ける必要があります。これはメンテナンスコストがかかります。( <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> には辞書のメンテナンスコストはありません。なぜなら、 <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> は辞書を使っていないからです。)新語への対応に <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> が役に立つかもしれません。</p>
1215
+ <p>以下は <code class="docutils literal"><span class="pre">TokenMeCab</span></code> の例です。 <code class="docutils literal"><span class="pre">東京都</span></code> は <code class="docutils literal"><span class="pre">東京</span></code> と <code class="docutils literal"><span class="pre">都</span></code> にトークナイズされています。 <code class="docutils literal"><span class="pre">京都</span></code> というトークンはありません。</p>
1086
1216
  <p>実行例:</p>
1087
1217
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenMecab &quot;東京都&quot;
1088
1218
  # [
@@ -1094,10 +1224,12 @@
1094
1224
  # [
1095
1225
  # {
1096
1226
  # &quot;position&quot;: 0,
1227
+ # &quot;force_prefix&quot;: false,
1097
1228
  # &quot;value&quot;: &quot;東京&quot;
1098
1229
  # },
1099
1230
  # {
1100
1231
  # &quot;position&quot;: 1,
1232
+ # &quot;force_prefix&quot;: false,
1101
1233
  # &quot;value&quot;: &quot;都&quot;
1102
1234
  # }
1103
1235
  # ]
@@ -1106,7 +1238,7 @@
1106
1238
  </div>
1107
1239
  </div>
1108
1240
  <div class="section" id="tokenregexp">
1109
- <span id="token-regexp"></span><h3>7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt><a class="headerlink" href="#tokenregexp" title="このヘッドラインへのパーマリンク">¶</a></h3>
1241
+ <span id="token-regexp"></span><h3>7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code><a class="headerlink" href="#tokenregexp" title="このヘッドラインへのパーマリンク">¶</a></h3>
1110
1242
  <div class="versionadded">
1111
1243
  <p><span class="versionmodified">バージョン 5.0.1 で追加.</span></p>
1112
1244
  </div>
@@ -1118,20 +1250,20 @@
1118
1250
  <p class="first admonition-title">ご用心</p>
1119
1251
  <p class="last">このトークナイザーはUTF-8でしか使えません。EUC-JPやShift_JISなどと一緒には使えません。</p>
1120
1252
  </div>
1121
- <p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> はインデックスを使った正規表現検索をサポートするトークナイザーです。</p>
1253
+ <p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> はインデックスを使った正規表現検索をサポートするトークナイザーです。</p>
1122
1254
  <p>一般的に、正規表現検索は逐次検索で実行します。しかし、次のケースはインデックスを使って検索できます。</p>
1123
1255
  <blockquote>
1124
1256
  <div><ul class="simple">
1125
- <li><p class="first"><tt class="docutils literal"><span class="pre">hello</span></tt> のようにリテラルしかないケース</p>
1257
+ <li><p class="first"><code class="docutils literal"><span class="pre">hello</span></code> のようにリテラルしかないケース</p>
1126
1258
  </li>
1127
- <li><p class="first"><tt class="docutils literal"><span class="pre">\A/home/alice</span></tt> のようにテキストの最初でのマッチとリテラルのみのケース</p>
1259
+ <li><p class="first"><code class="docutils literal"><span class="pre">\A/home/alice</span></code> のようにテキストの最初でのマッチとリテラルのみのケース</p>
1128
1260
  </li>
1129
- <li><p class="first"><tt class="docutils literal"><span class="pre">\.txt\z</span></tt> のようにテキストの最後でのマッチとリテラルのみのケース</p>
1261
+ <li><p class="first"><code class="docutils literal"><span class="pre">\.txt\z</span></code> のようにテキストの最後でのマッチとリテラルのみのケース</p>
1130
1262
  </li>
1131
1263
  </ul>
1132
1264
  </div></blockquote>
1133
1265
  <p>多くのケースでは、逐次検索よりもインデックスを使った検索の方が高速です。</p>
1134
- <p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> はベースはバイグラムを使います。 <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> は、インデックス時に、テキストの先頭にテキストの先頭であるというマーク( <tt class="docutils literal"><span class="pre">U+FFEF</span></tt> )を入れ、テキストの最後にテキストの最後であるというマーク( <tt class="docutils literal"><span class="pre">U+FFF0</span></tt> )を入れます。</p>
1266
+ <p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> はベースはバイグラムを使います。 <code class="docutils literal"><span class="pre">TokenRegexp</span></code> は、インデックス時に、テキストの先頭にテキストの先頭であるというマーク( <code class="docutils literal"><span class="pre">U+FFEF</span></code> )を入れ、テキストの最後にテキストの最後であるというマーク( <code class="docutils literal"><span class="pre">U+FFF0</span></code> )を入れます。</p>
1135
1267
  <p>実行例:</p>
1136
1268
  <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;/home/alice/test.txt&quot; NormalizerAuto --mode ADD
1137
1269
  # [
@@ -1143,186 +1275,112 @@
1143
1275
  # [
1144
1276
  # {
1145
1277
  # &quot;position&quot;: 0,
1278
+ # &quot;force_prefix&quot;: false,
1146
1279
  # &quot;value&quot;: &quot;￯&quot;
1147
1280
  # },
1148
1281
  # {
1149
1282
  # &quot;position&quot;: 1,
1283
+ # &quot;force_prefix&quot;: false,
1150
1284
  # &quot;value&quot;: &quot;/h&quot;
1151
1285
  # },
1152
1286
  # {
1153
1287
  # &quot;position&quot;: 2,
1288
+ # &quot;force_prefix&quot;: false,
1154
1289
  # &quot;value&quot;: &quot;ho&quot;
1155
1290
  # },
1156
1291
  # {
1157
1292
  # &quot;position&quot;: 3,
1293
+ # &quot;force_prefix&quot;: false,
1158
1294
  # &quot;value&quot;: &quot;om&quot;
1159
1295
  # },
1160
1296
  # {
1161
1297
  # &quot;position&quot;: 4,
1298
+ # &quot;force_prefix&quot;: false,
1162
1299
  # &quot;value&quot;: &quot;me&quot;
1163
1300
  # },
1164
1301
  # {
1165
1302
  # &quot;position&quot;: 5,
1303
+ # &quot;force_prefix&quot;: false,
1166
1304
  # &quot;value&quot;: &quot;e/&quot;
1167
1305
  # },
1168
1306
  # {
1169
1307
  # &quot;position&quot;: 6,
1308
+ # &quot;force_prefix&quot;: false,
1170
1309
  # &quot;value&quot;: &quot;/a&quot;
1171
1310
  # },
1172
1311
  # {
1173
1312
  # &quot;position&quot;: 7,
1313
+ # &quot;force_prefix&quot;: false,
1174
1314
  # &quot;value&quot;: &quot;al&quot;
1175
1315
  # },
1176
1316
  # {
1177
1317
  # &quot;position&quot;: 8,
1318
+ # &quot;force_prefix&quot;: false,
1178
1319
  # &quot;value&quot;: &quot;li&quot;
1179
1320
  # },
1180
1321
  # {
1181
1322
  # &quot;position&quot;: 9,
1323
+ # &quot;force_prefix&quot;: false,
1182
1324
  # &quot;value&quot;: &quot;ic&quot;
1183
1325
  # },
1184
1326
  # {
1185
1327
  # &quot;position&quot;: 10,
1328
+ # &quot;force_prefix&quot;: false,
1186
1329
  # &quot;value&quot;: &quot;ce&quot;
1187
1330
  # },
1188
1331
  # {
1189
1332
  # &quot;position&quot;: 11,
1333
+ # &quot;force_prefix&quot;: false,
1190
1334
  # &quot;value&quot;: &quot;e/&quot;
1191
1335
  # },
1192
1336
  # {
1193
1337
  # &quot;position&quot;: 12,
1338
+ # &quot;force_prefix&quot;: false,
1194
1339
  # &quot;value&quot;: &quot;/t&quot;
1195
1340
  # },
1196
1341
  # {
1197
1342
  # &quot;position&quot;: 13,
1343
+ # &quot;force_prefix&quot;: false,
1198
1344
  # &quot;value&quot;: &quot;te&quot;
1199
1345
  # },
1200
1346
  # {
1201
1347
  # &quot;position&quot;: 14,
1348
+ # &quot;force_prefix&quot;: false,
1202
1349
  # &quot;value&quot;: &quot;es&quot;
1203
1350
  # },
1204
1351
  # {
1205
1352
  # &quot;position&quot;: 15,
1353
+ # &quot;force_prefix&quot;: false,
1206
1354
  # &quot;value&quot;: &quot;st&quot;
1207
1355
  # },
1208
1356
  # {
1209
1357
  # &quot;position&quot;: 16,
1358
+ # &quot;force_prefix&quot;: false,
1210
1359
  # &quot;value&quot;: &quot;t.&quot;
1211
1360
  # },
1212
1361
  # {
1213
1362
  # &quot;position&quot;: 17,
1363
+ # &quot;force_prefix&quot;: false,
1214
1364
  # &quot;value&quot;: &quot;.t&quot;
1215
1365
  # },
1216
1366
  # {
1217
1367
  # &quot;position&quot;: 18,
1368
+ # &quot;force_prefix&quot;: false,
1218
1369
  # &quot;value&quot;: &quot;tx&quot;
1219
1370
  # },
1220
1371
  # {
1221
1372
  # &quot;position&quot;: 19,
1373
+ # &quot;force_prefix&quot;: false,
1222
1374
  # &quot;value&quot;: &quot;xt&quot;
1223
1375
  # },
1224
1376
  # {
1225
1377
  # &quot;position&quot;: 20,
1378
+ # &quot;force_prefix&quot;: false,
1226
1379
  # &quot;value&quot;: &quot;t&quot;
1227
1380
  # },
1228
1381
  # {
1229
1382
  # &quot;position&quot;: 21,
1230
- # &quot;value&quot;: &quot;￰&quot;
1231
- # }
1232
- # ]
1233
- # ]
1234
- </pre></div>
1235
- </div>
1236
- <p><tt class="docutils literal"><span class="pre">\A</span></tt> で検索したとき、テキストの先頭であるというマークを使います。クエリーをトークナイズするために <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> を使うときは、 <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> は最初のトークンとしてテキストの先頭であるというマーク( <tt class="docutils literal"><span class="pre">U+FFEF</span></tt> )を追加します。テキストの先頭であるというマークは先頭にしか存在しないはずなので、テキストの先頭であるという検索結果を得ることができます。</p>
1237
- <p>実行例:</p>
1238
- <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;\\A/home/alice/&quot; NormalizerAuto --mode GET
1239
- # [
1240
- # [
1241
- # 0,
1242
- # 1337566253.89858,
1243
- # 0.000355720520019531
1244
- # ],
1245
- # [
1246
- # {
1247
- # &quot;position&quot;: 0,
1248
- # &quot;value&quot;: &quot;￯&quot;
1249
- # },
1250
- # {
1251
- # &quot;position&quot;: 1,
1252
- # &quot;value&quot;: &quot;/h&quot;
1253
- # },
1254
- # {
1255
- # &quot;position&quot;: 2,
1256
- # &quot;value&quot;: &quot;ho&quot;
1257
- # },
1258
- # {
1259
- # &quot;position&quot;: 3,
1260
- # &quot;value&quot;: &quot;om&quot;
1261
- # },
1262
- # {
1263
- # &quot;position&quot;: 4,
1264
- # &quot;value&quot;: &quot;me&quot;
1265
- # },
1266
- # {
1267
- # &quot;position&quot;: 5,
1268
- # &quot;value&quot;: &quot;e/&quot;
1269
- # },
1270
- # {
1271
- # &quot;position&quot;: 6,
1272
- # &quot;value&quot;: &quot;/a&quot;
1273
- # },
1274
- # {
1275
- # &quot;position&quot;: 7,
1276
- # &quot;value&quot;: &quot;al&quot;
1277
- # },
1278
- # {
1279
- # &quot;position&quot;: 8,
1280
- # &quot;value&quot;: &quot;li&quot;
1281
- # },
1282
- # {
1283
- # &quot;position&quot;: 9,
1284
- # &quot;value&quot;: &quot;ic&quot;
1285
- # },
1286
- # {
1287
- # &quot;position&quot;: 10,
1288
- # &quot;value&quot;: &quot;ce&quot;
1289
- # },
1290
- # {
1291
- # &quot;position&quot;: 11,
1292
- # &quot;value&quot;: &quot;e/&quot;
1293
- # }
1294
- # ]
1295
- # ]
1296
- </pre></div>
1297
- </div>
1298
- <p><tt class="docutils literal"><span class="pre">\z</span></tt> で検索したとき、テキストの最後であるというマークを使います。クエリーをトークナイズするために <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> を使うときは、 <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> は最後のトークンとしてテキストの最後であるというマーク( <tt class="docutils literal"><span class="pre">U+FFF0</span></tt> )を追加します。テキストの最後であるというマークは最後にしか存在しないはずなので、テキストの最後であるという検索結果を得ることができます。</p>
1299
- <p>実行例:</p>
1300
- <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp &quot;\\.txt\\z&quot; NormalizerAuto --mode GET
1301
- # [
1302
- # [
1303
- # 0,
1304
- # 1337566253.89858,
1305
- # 0.000355720520019531
1306
- # ],
1307
- # [
1308
- # {
1309
- # &quot;position&quot;: 0,
1310
- # &quot;value&quot;: &quot;\\.&quot;
1311
- # },
1312
- # {
1313
- # &quot;position&quot;: 1,
1314
- # &quot;value&quot;: &quot;.t&quot;
1315
- # },
1316
- # {
1317
- # &quot;position&quot;: 2,
1318
- # &quot;value&quot;: &quot;tx&quot;
1319
- # },
1320
- # {
1321
- # &quot;position&quot;: 3,
1322
- # &quot;value&quot;: &quot;xt&quot;
1323
- # },
1324
- # {
1325
- # &quot;position&quot;: 5,
1383
+ # &quot;force_prefix&quot;: false,
1326
1384
  # &quot;value&quot;: &quot;￰&quot;
1327
1385
  # }
1328
1386
  # ]
@@ -1337,7 +1395,7 @@
1337
1395
  </div>
1338
1396
  </div>
1339
1397
  </div>
1340
- <div class="sphinxsidebar">
1398
+ <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
1341
1399
  <div class="sphinxsidebarwrapper">
1342
1400
  <h3><a href="../index.html">目次</a></h3>
1343
1401
  <ul>
@@ -1345,20 +1403,20 @@
1345
1403
  <li><a class="reference internal" href="#summary">7.8.1. 概要</a></li>
1346
1404
  <li><a class="reference internal" href="#what-is-tokenize">7.8.2. 「トークナイズ」とはなにか</a></li>
1347
1405
  <li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. 組み込みトークナイザー</a><ul>
1348
- <li><a class="reference internal" href="#tokenbigram">7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt></a></li>
1349
- <li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></a></li>
1350
- <li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></a></li>
1351
- <li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></a></li>
1352
- <li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></a></li>
1353
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></a></li>
1354
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt></a></li>
1355
- <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt></a></li>
1356
- <li><a class="reference internal" href="#tokenunigram">7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></a></li>
1357
- <li><a class="reference internal" href="#tokentrigram">7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></a></li>
1358
- <li><a class="reference internal" href="#tokendelimit">7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></a></li>
1359
- <li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></a></li>
1360
- <li><a class="reference internal" href="#tokenmecab">7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt></a></li>
1361
- <li><a class="reference internal" href="#tokenregexp">7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></a></li>
1406
+ <li><a class="reference internal" href="#tokenbigram">7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code></a></li>
1407
+ <li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
1408
+ <li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
1409
+ <li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
1410
+ <li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
1411
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
1412
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
1413
+ <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
1414
+ <li><a class="reference internal" href="#tokenunigram">7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code></a></li>
1415
+ <li><a class="reference internal" href="#tokentrigram">7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code></a></li>
1416
+ <li><a class="reference internal" href="#tokendelimit">7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code></a></li>
1417
+ <li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></a></li>
1418
+ <li><a class="reference internal" href="#tokenmecab">7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code></a></li>
1419
+ <li><a class="reference internal" href="#tokenregexp">7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code></a></li>
1362
1420
  </ul>
1363
1421
  </li>
1364
1422
  </ul>
@@ -1371,12 +1429,14 @@
1371
1429
  <h4>次のトピックへ</h4>
1372
1430
  <p class="topless"><a href="token_filters.html"
1373
1431
  title="次の章へ">7.9. トークンフィルター</a></p>
1374
- <h3>このページ</h3>
1375
- <ul class="this-page-menu">
1376
- <li><a href="../_sources/reference/tokenizers.txt"
1377
- rel="nofollow">ソースコードを表示</a></li>
1378
- </ul>
1379
- <div id="searchbox" style="display: none">
1432
+ <div role="note" aria-label="source link">
1433
+ <h3>このページ</h3>
1434
+ <ul class="this-page-menu">
1435
+ <li><a href="../_sources/reference/tokenizers.txt"
1436
+ rel="nofollow">ソースコードを表示</a></li>
1437
+ </ul>
1438
+ </div>
1439
+ <div id="searchbox" style="display: none" role="search">
1380
1440
  <h3>クイック検索</h3>
1381
1441
  <form class="search" action="../search.html" method="get">
1382
1442
  <input type="text" name="q" />
@@ -1393,7 +1453,7 @@
1393
1453
  </div>
1394
1454
  <div class="clearer"></div>
1395
1455
  </div>
1396
- <div class="related">
1456
+ <div class="related" role="navigation" aria-label="related navigation">
1397
1457
  <h3>ナビゲーション</h3>
1398
1458
  <ul>
1399
1459
  <li class="right" style="margin-right: 10px">
@@ -1405,11 +1465,11 @@
1405
1465
  <li class="right" >
1406
1466
  <a href="normalizers.html" title="7.7. ノーマライザー"
1407
1467
  >前へ</a> |</li>
1408
- <li><a href="../index.html">Groonga v5.0.4-139-g6629adbドキュメント</a> &raquo;</li>
1409
- <li><a href="../reference.html" >7. リファレンスマニュアル</a> &raquo;</li>
1468
+ <li class="nav-item nav-item-0"><a href="../index.html">Groonga v5.0.6-226-gd7da7e7ドキュメント</a> &raquo;</li>
1469
+ <li class="nav-item nav-item-1"><a href="../reference.html" >7. リファレンスマニュアル</a> &raquo;</li>
1410
1470
  </ul>
1411
1471
  </div>
1412
- <div class="footer">
1472
+ <div class="footer" role="contentinfo">
1413
1473
  &copy; Copyright 2009-2015, Brazil, Inc.
1414
1474
  </div>
1415
1475
  </body>