rroonga 5.0.0-x86-mingw32 → 5.0.1-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (733) hide show
  1. checksums.yaml +8 -8
  2. data/.yardopts +1 -0
  3. data/Rakefile +1 -16
  4. data/example/bookmark.rb +1 -6
  5. data/example/index-html.rb +0 -1
  6. data/ext/groonga/extconf.rb +4 -7
  7. data/ext/groonga/rb-grn-array.c +1 -1
  8. data/ext/groonga/rb-grn-column.c +33 -67
  9. data/ext/groonga/rb-grn-context.c +5 -5
  10. data/ext/groonga/rb-grn-database.c +2 -2
  11. data/ext/groonga/rb-grn-double-array-trie.c +4 -2
  12. data/ext/groonga/rb-grn-encoding-support.c +7 -1
  13. data/ext/groonga/rb-grn-equal-operator.c +85 -0
  14. data/ext/groonga/rb-grn-exception.c +17 -0
  15. data/ext/groonga/rb-grn-expression.c +85 -43
  16. data/ext/groonga/rb-grn-greater-equal-operator.c +88 -0
  17. data/ext/groonga/rb-grn-greater-operator.c +85 -0
  18. data/ext/groonga/rb-grn-hash.c +1 -1
  19. data/ext/groonga/rb-grn-index-column.c +150 -11
  20. data/ext/groonga/rb-grn-less-equal-operator.c +88 -0
  21. data/ext/groonga/rb-grn-less-operator.c +85 -0
  22. data/ext/groonga/rb-grn-logger.c +5 -5
  23. data/ext/groonga/rb-grn-match-operator.c +86 -0
  24. data/ext/groonga/rb-grn-normalizer.c +8 -1
  25. data/ext/groonga/rb-grn-not-equal-operator.c +85 -0
  26. data/ext/groonga/rb-grn-object.c +170 -36
  27. data/ext/groonga/rb-grn-operator.c +395 -172
  28. data/ext/groonga/rb-grn-patricia-trie.c +10 -8
  29. data/ext/groonga/rb-grn-plugin.c +51 -3
  30. data/ext/groonga/rb-grn-prefix-operator.c +86 -0
  31. data/ext/groonga/rb-grn-procedure-type.c +4 -0
  32. data/ext/groonga/rb-grn-query-logger.c +4 -4
  33. data/ext/groonga/rb-grn-regexp-operator.c +85 -0
  34. data/ext/groonga/rb-grn-snippet.c +1 -1
  35. data/ext/groonga/rb-grn-table-key-support.c +9 -5
  36. data/ext/groonga/rb-grn-table.c +52 -66
  37. data/ext/groonga/rb-grn-type.c +1 -1
  38. data/ext/groonga/rb-grn-utils.c +22 -3
  39. data/ext/groonga/rb-grn.h +31 -4
  40. data/ext/groonga/rb-groonga.c +9 -9
  41. data/lib/1.9/groonga.so +0 -0
  42. data/lib/2.0/groonga.so +0 -0
  43. data/lib/2.1/groonga.so +0 -0
  44. data/lib/2.2/groonga.so +0 -0
  45. data/lib/groonga/context.rb +31 -0
  46. data/lib/groonga/expression-builder.rb +14 -1
  47. data/lib/groonga/record.rb +10 -8
  48. data/lib/groonga/schema.rb +3 -1
  49. data/rroonga-build.rb +2 -2
  50. data/rroonga.gemspec +3 -3
  51. data/test/groonga-test-utils.rb +4 -0
  52. data/test/test-column.rb +28 -26
  53. data/test/test-exception.rb +1 -0
  54. data/test/test-expression-builder.rb +83 -1
  55. data/test/test-expression.rb +80 -48
  56. data/test/test-index-column.rb +102 -29
  57. data/test/test-normalizer.rb +35 -29
  58. data/test/test-operator.rb +214 -0
  59. data/test/test-plugin.rb +24 -6
  60. data/test/test-procedure.rb +29 -0
  61. data/test/test-schema-type.rb +14 -0
  62. data/test/test-table-select-mecab.rb +1 -4
  63. data/test/test-table.rb +7 -0
  64. data/test/test-token-regexp.rb +30 -0
  65. data/test/test-type.rb +24 -0
  66. data/vendor/local/bin/grndb.exe +0 -0
  67. data/vendor/local/bin/groonga-benchmark.exe +0 -0
  68. data/vendor/local/bin/groonga.exe +0 -0
  69. data/vendor/local/bin/libgcc_s_sjlj-1.dll +0 -0
  70. data/vendor/local/bin/libgroonga-0.dll +0 -0
  71. data/vendor/local/bin/libmecab-1.dll +0 -0
  72. data/vendor/local/bin/libmsgpack-3.dll +0 -0
  73. data/vendor/local/bin/libmsgpackc-2.dll +0 -0
  74. data/vendor/local/bin/libonig-5.dll +0 -0
  75. data/vendor/local/bin/libstdc++-6.dll +0 -0
  76. data/vendor/local/bin/lz4.exe +0 -0
  77. data/vendor/local/bin/lz4c.exe +0 -0
  78. data/vendor/local/bin/lz4cat +0 -0
  79. data/vendor/local/bin/mecab-config +2 -2
  80. data/vendor/local/bin/mecab.exe +0 -0
  81. data/vendor/local/bin/onig-config +1 -1
  82. data/vendor/local/bin/zlib1.dll +0 -0
  83. data/vendor/local/etc/groonga/groonga.conf +1 -1
  84. data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +2 -2
  85. data/vendor/local/include/groonga/groonga.h +1 -0
  86. data/vendor/local/include/groonga/groonga/expr.h +2 -0
  87. data/vendor/local/include/groonga/groonga/groonga.h +32 -5
  88. data/vendor/local/include/groonga/groonga/ii.h +7 -0
  89. data/vendor/local/include/groonga/groonga/obj.h +37 -0
  90. data/vendor/local/include/groonga/groonga/scorer.h +95 -0
  91. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
  92. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
  93. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
  94. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +2 -2
  95. data/vendor/local/lib/groonga/plugins/ruby/eval.a +0 -0
  96. data/vendor/local/lib/groonga/plugins/ruby/eval.dll +0 -0
  97. data/vendor/local/lib/groonga/plugins/ruby/eval.dll.a +0 -0
  98. data/vendor/local/lib/groonga/plugins/ruby/eval.la +2 -2
  99. data/vendor/local/lib/groonga/plugins/ruby/load.a +0 -0
  100. data/vendor/local/lib/groonga/plugins/ruby/load.dll +0 -0
  101. data/vendor/local/lib/groonga/plugins/ruby/load.dll.a +0 -0
  102. data/vendor/local/lib/groonga/plugins/ruby/load.la +2 -2
  103. data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +6 -3
  104. data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +6 -5
  105. data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +421 -17
  106. data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
  107. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
  108. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
  109. data/vendor/local/lib/groonga/plugins/suggest/suggest.la +2 -2
  110. data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
  111. data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
  112. data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
  113. data/vendor/local/lib/groonga/plugins/table/table.la +2 -2
  114. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
  115. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
  116. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
  117. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +2 -2
  118. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
  119. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
  120. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
  121. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +2 -2
  122. data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +12 -4
  123. data/vendor/local/lib/groonga/scripts/ruby/database.rb +11 -3
  124. data/vendor/local/lib/groonga/scripts/ruby/expression.rb +23 -0
  125. data/vendor/local/lib/groonga/scripts/ruby/expression_size_estimator.rb +158 -0
  126. data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +39 -0
  127. data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +4 -0
  128. data/vendor/local/lib/groonga/scripts/ruby/initialize/pre.rb +2 -0
  129. data/vendor/local/lib/groonga/scripts/ruby/logger.rb +11 -7
  130. data/vendor/local/lib/groonga/scripts/ruby/object.rb +11 -0
  131. data/vendor/local/lib/groonga/scripts/ruby/operator.rb +22 -0
  132. data/vendor/local/lib/groonga/scripts/ruby/scan_info.rb +7 -2
  133. data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +7 -11
  134. data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +137 -34
  135. data/vendor/local/lib/groonga/scripts/ruby/scan_info_search_index.rb +9 -0
  136. data/vendor/local/lib/libgroonga.a +0 -0
  137. data/vendor/local/lib/libgroonga.dll.a +0 -0
  138. data/vendor/local/lib/libgroonga.la +2 -2
  139. data/vendor/local/lib/liblz4.a +0 -0
  140. data/vendor/local/lib/liblz4.dll +0 -0
  141. data/vendor/local/lib/liblz4.dll.1 +0 -0
  142. data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
  143. data/vendor/local/lib/libmecab.a +0 -0
  144. data/vendor/local/lib/libmecab.dll.a +0 -0
  145. data/vendor/local/lib/libmecab.la +2 -2
  146. data/vendor/local/lib/libmsgpack.a +0 -0
  147. data/vendor/local/lib/libmsgpack.dll.a +0 -0
  148. data/vendor/local/lib/libmsgpack.la +2 -2
  149. data/vendor/local/lib/libmsgpackc.a +0 -0
  150. data/vendor/local/lib/libmsgpackc.dll.a +0 -0
  151. data/vendor/local/lib/libmsgpackc.la +2 -2
  152. data/vendor/local/lib/libonig.a +0 -0
  153. data/vendor/local/lib/libonig.dll.a +0 -0
  154. data/vendor/local/lib/libonig.la +2 -2
  155. data/vendor/local/lib/libz.a +0 -0
  156. data/vendor/local/lib/libz.dll.a +0 -0
  157. data/vendor/local/lib/pkgconfig/groonga.pc +3 -3
  158. data/vendor/local/lib/pkgconfig/liblz4.pc +5 -5
  159. data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
  160. data/vendor/local/lib/pkgconfig/oniguruma.pc +6 -6
  161. data/vendor/local/lib/pkgconfig/zlib.pc +3 -3
  162. data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
  163. data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
  164. data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
  165. data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
  166. data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
  167. data/vendor/local/sbin/groonga-httpd-restart +1 -1
  168. data/vendor/local/sbin/groonga-httpd.exe +0 -0
  169. data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
  170. data/vendor/local/share/doc/groonga/en/html/_images/used-when-indexing.png +0 -0
  171. data/vendor/local/share/doc/groonga/en/html/_images/used-when-searching.png +0 -0
  172. data/vendor/local/share/doc/groonga/en/html/_sources/characteristic.txt +1 -1
  173. data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +32 -17
  174. data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
  175. data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +3 -3
  176. data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +4 -4
  177. data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
  178. data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +3 -3
  179. data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
  180. data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -3
  181. data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
  182. data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +194 -0
  183. data/vendor/local/share/doc/groonga/en/html/_sources/news/1.0.x.txt +1 -1
  184. data/vendor/local/share/doc/groonga/en/html/_sources/news/3.x.txt +2 -2
  185. data/vendor/local/share/doc/groonga/en/html/_sources/news/4.x.txt +2 -2
  186. data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +2 -0
  187. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api.txt +3 -0
  188. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +42 -0
  189. data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/overview.txt +54 -0
  190. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +1 -1
  191. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_create.txt +2 -2
  192. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +1 -1
  193. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +3 -3
  194. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/delete.txt +4 -4
  195. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/load.txt +5 -5
  196. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +4 -4
  197. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +173 -0
  198. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +112 -0
  199. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +7 -6
  200. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +64 -0
  201. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +63 -0
  202. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +11 -1
  203. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +3 -2
  204. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +1 -1
  205. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +1 -1
  206. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +17 -17
  207. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/suggest.txt +12 -12
  208. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +7 -7
  209. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +4 -4
  210. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +6 -6
  211. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +1 -1
  212. data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga.txt +47 -26
  213. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/between.txt +5 -5
  214. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_distance.txt +3 -3
  215. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +6 -6
  216. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +1 -1
  217. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/html_untag.txt +1 -1
  218. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/in_values.txt +54 -2
  219. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +4 -4
  220. data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/sub_filter.txt +4 -4
  221. data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
  222. data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
  223. data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +2 -0
  224. data/vendor/local/share/doc/groonga/en/html/_sources/reference/normalizers.txt +4 -0
  225. data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations.txt +2 -1
  226. data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +403 -0
  227. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorer.txt +217 -0
  228. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
  229. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
  230. data/vendor/local/share/doc/groonga/en/html/_sources/reference/scoring_note.txt +13 -0
  231. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tables.txt +8 -0
  232. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +530 -16
  233. data/vendor/local/share/doc/groonga/en/html/_sources/server.txt +2 -1
  234. data/vendor/local/share/doc/groonga/en/html/_sources/server/memcached.txt +15 -0
  235. data/vendor/local/share/doc/groonga/en/html/_sources/spec/gqtp.txt +66 -1
  236. data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
  237. data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/network.txt +0 -81
  238. data/vendor/local/share/doc/groonga/en/html/_static/basic.css +6 -68
  239. data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +1 -26
  240. data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
  241. data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
  242. data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
  243. data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +9404 -4
  244. data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
  245. data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
  246. data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +2 -2
  247. data/vendor/local/share/doc/groonga/en/html/_static/underscore.js +1415 -31
  248. data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
  249. data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
  250. data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +15 -15
  251. data/vendor/local/share/doc/groonga/en/html/characteristic.html +18 -20
  252. data/vendor/local/share/doc/groonga/en/html/client.html +22 -24
  253. data/vendor/local/share/doc/groonga/en/html/community.html +17 -19
  254. data/vendor/local/share/doc/groonga/en/html/contribution.html +18 -20
  255. data/vendor/local/share/doc/groonga/en/html/contribution/development.html +17 -19
  256. data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +17 -19
  257. data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +17 -19
  258. data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +17 -19
  259. data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +51 -38
  260. data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +17 -19
  261. data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +17 -19
  262. data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +17 -19
  263. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +17 -19
  264. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +17 -19
  265. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +24 -26
  266. data/vendor/local/share/doc/groonga/en/html/contribution/report.html +17 -19
  267. data/vendor/local/share/doc/groonga/en/html/development.html +17 -19
  268. data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +24 -26
  269. data/vendor/local/share/doc/groonga/en/html/genindex.html +26 -14
  270. data/vendor/local/share/doc/groonga/en/html/index.html +150 -130
  271. data/vendor/local/share/doc/groonga/en/html/install.html +32 -34
  272. data/vendor/local/share/doc/groonga/en/html/install/centos.html +28 -30
  273. data/vendor/local/share/doc/groonga/en/html/install/debian.html +24 -26
  274. data/vendor/local/share/doc/groonga/en/html/install/fedora.html +28 -30
  275. data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +22 -24
  276. data/vendor/local/share/doc/groonga/en/html/install/others.html +87 -89
  277. data/vendor/local/share/doc/groonga/en/html/install/solaris.html +22 -24
  278. data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +25 -27
  279. data/vendor/local/share/doc/groonga/en/html/install/windows.html +30 -32
  280. data/vendor/local/share/doc/groonga/en/html/limitations.html +17 -19
  281. data/vendor/local/share/doc/groonga/en/html/news.html +256 -27
  282. data/vendor/local/share/doc/groonga/en/html/news/0.x.html +17 -19
  283. data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +19 -21
  284. data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +17 -19
  285. data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +32 -34
  286. data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +27 -29
  287. data/vendor/local/share/doc/groonga/en/html/news/2.x.html +98 -100
  288. data/vendor/local/share/doc/groonga/en/html/news/3.x.html +68 -70
  289. data/vendor/local/share/doc/groonga/en/html/news/4.x.html +102 -104
  290. data/vendor/local/share/doc/groonga/en/html/news/senna.html +17 -19
  291. data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
  292. data/vendor/local/share/doc/groonga/en/html/reference.html +139 -118
  293. data/vendor/local/share/doc/groonga/en/html/reference/api.html +51 -52
  294. data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +49 -51
  295. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +60 -62
  296. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +80 -82
  297. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +42 -44
  298. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +37 -39
  299. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +130 -80
  300. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +48 -50
  301. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +44 -46
  302. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +79 -81
  303. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +42 -44
  304. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +44 -46
  305. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +42 -44
  306. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +41 -43
  307. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +41 -43
  308. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +40 -42
  309. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +89 -91
  310. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +44 -46
  311. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +39 -41
  312. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +75 -77
  313. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +64 -66
  314. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +40 -42
  315. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +39 -41
  316. data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +202 -0
  317. data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +58 -60
  318. data/vendor/local/share/doc/groonga/en/html/reference/cast.html +17 -19
  319. data/vendor/local/share/doc/groonga/en/html/reference/column.html +17 -19
  320. data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +17 -19
  321. data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +22 -24
  322. data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +17 -19
  323. data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +58 -60
  324. data/vendor/local/share/doc/groonga/en/html/reference/command.html +56 -54
  325. data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +17 -19
  326. data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +53 -55
  327. data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +22 -24
  328. data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +94 -96
  329. data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +39 -41
  330. data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +43 -45
  331. data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +23 -25
  332. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +39 -41
  333. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +71 -73
  334. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +24 -26
  335. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +44 -46
  336. data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +37 -39
  337. data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +24 -26
  338. data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +35 -37
  339. data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +22 -24
  340. data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +43 -45
  341. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +49 -47
  342. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +23 -25
  343. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +24 -26
  344. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +26 -28
  345. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +314 -0
  346. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +252 -0
  347. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +87 -89
  348. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +46 -48
  349. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +195 -0
  350. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +193 -0
  351. data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +38 -40
  352. data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +37 -39
  353. data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +61 -51
  354. data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
  355. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +54 -56
  356. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +54 -56
  357. data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +590 -592
  358. data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +37 -39
  359. data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +40 -42
  360. data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +92 -94
  361. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +152 -154
  362. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +49 -51
  363. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +39 -41
  364. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +68 -70
  365. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +103 -105
  366. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +45 -47
  367. data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +52 -54
  368. data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -21
  369. data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +35 -37
  370. data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +21 -23
  371. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +26 -28
  372. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +73 -75
  373. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +17 -19
  374. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
  375. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +42 -44
  376. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +34 -36
  377. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +124 -90
  378. data/vendor/local/share/doc/groonga/en/html/reference/function.html +42 -44
  379. data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +66 -68
  380. data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +44 -46
  381. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +113 -115
  382. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +55 -57
  383. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +45 -47
  384. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +81 -83
  385. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +65 -67
  386. data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +54 -56
  387. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +135 -44
  388. data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -42
  389. data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +81 -83
  390. data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +43 -45
  391. data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +67 -69
  392. data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +66 -70
  393. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +39 -41
  394. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +349 -286
  395. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +483 -417
  396. data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +35 -37
  397. data/vendor/local/share/doc/groonga/en/html/reference/log.html +38 -40
  398. data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +44 -46
  399. data/vendor/local/share/doc/groonga/en/html/reference/operations.html +30 -31
  400. data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +32 -34
  401. data/vendor/local/share/doc/groonga/en/html/reference/output.html +32 -34
  402. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +17 -19
  403. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +60 -62
  404. data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +931 -0
  405. data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +442 -0
  406. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +153 -0
  407. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +287 -0
  408. data/vendor/local/share/doc/groonga/en/html/reference/scoring_note.html +114 -0
  409. data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +45 -47
  410. data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +51 -53
  411. data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +40 -42
  412. data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +38 -40
  413. data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +40 -42
  414. data/vendor/local/share/doc/groonga/en/html/reference/tables.html +52 -54
  415. data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +36 -38
  416. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +1394 -34
  417. data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +57 -59
  418. data/vendor/local/share/doc/groonga/en/html/reference/types.html +38 -40
  419. data/vendor/local/share/doc/groonga/en/html/search.html +11 -11
  420. data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
  421. data/vendor/local/share/doc/groonga/en/html/server.html +23 -24
  422. data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +28 -30
  423. data/vendor/local/share/doc/groonga/en/html/server/http.html +42 -44
  424. data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +68 -70
  425. data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +30 -32
  426. data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +29 -31
  427. data/vendor/local/share/doc/groonga/en/html/server/memcached.html +137 -0
  428. data/vendor/local/share/doc/groonga/en/html/server/package.html +36 -38
  429. data/vendor/local/share/doc/groonga/en/html/spec.html +22 -24
  430. data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +208 -129
  431. data/vendor/local/share/doc/groonga/en/html/spec/search.html +17 -19
  432. data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +17 -19
  433. data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
  434. data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
  435. data/vendor/local/share/doc/groonga/en/html/tutorial.html +21 -25
  436. data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +17 -19
  437. data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +31 -33
  438. data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +20 -22
  439. data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +17 -19
  440. data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +17 -19
  441. data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +21 -23
  442. data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +50 -52
  443. data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +27 -125
  444. data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +18 -20
  445. data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +20 -22
  446. data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +33 -35
  447. data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
  448. data/vendor/local/share/doc/groonga/ja/html/_images/used-when-indexing.png +0 -0
  449. data/vendor/local/share/doc/groonga/ja/html/_images/used-when-searching.png +0 -0
  450. data/vendor/local/share/doc/groonga/ja/html/_sources/characteristic.txt +1 -1
  451. data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +32 -17
  452. data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
  453. data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +3 -3
  454. data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +4 -4
  455. data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
  456. data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +3 -3
  457. data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
  458. data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -3
  459. data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
  460. data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +194 -0
  461. data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.0.x.txt +1 -1
  462. data/vendor/local/share/doc/groonga/ja/html/_sources/news/3.x.txt +2 -2
  463. data/vendor/local/share/doc/groonga/ja/html/_sources/news/4.x.txt +2 -2
  464. data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +2 -0
  465. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api.txt +3 -0
  466. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +42 -0
  467. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/overview.txt +54 -0
  468. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +1 -1
  469. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_create.txt +2 -2
  470. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +1 -1
  471. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +3 -3
  472. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/delete.txt +4 -4
  473. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/load.txt +5 -5
  474. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +4 -4
  475. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +173 -0
  476. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +112 -0
  477. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +7 -6
  478. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +64 -0
  479. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +63 -0
  480. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +11 -1
  481. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +3 -2
  482. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +1 -1
  483. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +1 -1
  484. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +17 -17
  485. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/suggest.txt +12 -12
  486. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +7 -7
  487. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +4 -4
  488. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +6 -6
  489. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +1 -1
  490. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga.txt +47 -26
  491. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/between.txt +5 -5
  492. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_distance.txt +3 -3
  493. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +6 -6
  494. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +1 -1
  495. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/html_untag.txt +1 -1
  496. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/in_values.txt +54 -2
  497. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +4 -4
  498. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/sub_filter.txt +4 -4
  499. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
  500. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
  501. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +2 -0
  502. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/normalizers.txt +4 -0
  503. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations.txt +2 -1
  504. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +403 -0
  505. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorer.txt +217 -0
  506. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
  507. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
  508. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scoring_note.txt +13 -0
  509. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tables.txt +8 -0
  510. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +530 -16
  511. data/vendor/local/share/doc/groonga/ja/html/_sources/server.txt +2 -1
  512. data/vendor/local/share/doc/groonga/ja/html/_sources/server/memcached.txt +15 -0
  513. data/vendor/local/share/doc/groonga/ja/html/_sources/spec/gqtp.txt +66 -1
  514. data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
  515. data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/network.txt +0 -81
  516. data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +6 -68
  517. data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +1 -26
  518. data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
  519. data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
  520. data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
  521. data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +9404 -4
  522. data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
  523. data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
  524. data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +2 -2
  525. data/vendor/local/share/doc/groonga/ja/html/_static/underscore.js +1415 -31
  526. data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
  527. data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
  528. data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +15 -15
  529. data/vendor/local/share/doc/groonga/ja/html/characteristic.html +17 -19
  530. data/vendor/local/share/doc/groonga/ja/html/client.html +22 -24
  531. data/vendor/local/share/doc/groonga/ja/html/community.html +17 -19
  532. data/vendor/local/share/doc/groonga/ja/html/contribution.html +18 -20
  533. data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +17 -19
  534. data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +17 -19
  535. data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +17 -19
  536. data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +17 -19
  537. data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +51 -38
  538. data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +17 -19
  539. data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +17 -19
  540. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +17 -19
  541. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +17 -19
  542. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +17 -19
  543. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +24 -26
  544. data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -19
  545. data/vendor/local/share/doc/groonga/ja/html/development.html +17 -19
  546. data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +20 -22
  547. data/vendor/local/share/doc/groonga/ja/html/genindex.html +26 -14
  548. data/vendor/local/share/doc/groonga/ja/html/index.html +150 -130
  549. data/vendor/local/share/doc/groonga/ja/html/install.html +32 -34
  550. data/vendor/local/share/doc/groonga/ja/html/install/centos.html +31 -33
  551. data/vendor/local/share/doc/groonga/ja/html/install/debian.html +25 -27
  552. data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +29 -31
  553. data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +22 -24
  554. data/vendor/local/share/doc/groonga/ja/html/install/others.html +78 -80
  555. data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +21 -23
  556. data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +26 -28
  557. data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -31
  558. data/vendor/local/share/doc/groonga/ja/html/limitations.html +17 -19
  559. data/vendor/local/share/doc/groonga/ja/html/news.html +210 -27
  560. data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +17 -19
  561. data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +19 -21
  562. data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +17 -19
  563. data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +32 -34
  564. data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +27 -29
  565. data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +91 -93
  566. data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +59 -61
  567. data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +89 -91
  568. data/vendor/local/share/doc/groonga/ja/html/news/senna.html +17 -19
  569. data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
  570. data/vendor/local/share/doc/groonga/ja/html/reference.html +139 -118
  571. data/vendor/local/share/doc/groonga/ja/html/reference/api.html +51 -52
  572. data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +49 -51
  573. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +55 -57
  574. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +80 -82
  575. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +42 -44
  576. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +37 -39
  577. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +126 -76
  578. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +48 -50
  579. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +44 -46
  580. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +74 -76
  581. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +42 -44
  582. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +44 -46
  583. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +42 -44
  584. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +41 -43
  585. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +41 -43
  586. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +40 -42
  587. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +89 -91
  588. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +44 -46
  589. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +39 -41
  590. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +75 -77
  591. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +64 -66
  592. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +40 -42
  593. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +39 -41
  594. data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +197 -0
  595. data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +58 -60
  596. data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +17 -19
  597. data/vendor/local/share/doc/groonga/ja/html/reference/column.html +17 -19
  598. data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +17 -19
  599. data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +22 -24
  600. data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +17 -19
  601. data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +48 -50
  602. data/vendor/local/share/doc/groonga/ja/html/reference/command.html +56 -54
  603. data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +17 -19
  604. data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +43 -45
  605. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +22 -24
  606. data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +93 -95
  607. data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +35 -37
  608. data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +43 -45
  609. data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +23 -25
  610. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +39 -41
  611. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +62 -64
  612. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +24 -26
  613. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +38 -40
  614. data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +37 -39
  615. data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +24 -26
  616. data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +32 -34
  617. data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +22 -24
  618. data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +33 -35
  619. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +48 -46
  620. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +23 -25
  621. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +24 -26
  622. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +26 -28
  623. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +314 -0
  624. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +250 -0
  625. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +80 -81
  626. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +46 -48
  627. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +188 -0
  628. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +190 -0
  629. data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +38 -40
  630. data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +37 -39
  631. data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +57 -47
  632. data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +71 -73
  633. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +53 -55
  634. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +53 -55
  635. data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +394 -396
  636. data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +37 -39
  637. data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +38 -40
  638. data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +78 -80
  639. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +123 -125
  640. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -51
  641. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +39 -41
  642. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +61 -63
  643. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +89 -91
  644. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +46 -48
  645. data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +51 -53
  646. data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -21
  647. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +35 -37
  648. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +21 -23
  649. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +26 -28
  650. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +61 -63
  651. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +17 -19
  652. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
  653. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +42 -44
  654. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +34 -36
  655. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +126 -90
  656. data/vendor/local/share/doc/groonga/ja/html/reference/function.html +42 -44
  657. data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +63 -65
  658. data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +44 -46
  659. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +94 -96
  660. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +55 -57
  661. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +45 -47
  662. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +66 -68
  663. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +55 -57
  664. data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +53 -55
  665. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +135 -44
  666. data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +40 -42
  667. data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +70 -72
  668. data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +43 -45
  669. data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +53 -55
  670. data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +56 -62
  671. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +36 -38
  672. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +229 -171
  673. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +381 -322
  674. data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +34 -36
  675. data/vendor/local/share/doc/groonga/ja/html/reference/log.html +38 -40
  676. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +38 -40
  677. data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +28 -30
  678. data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +32 -34
  679. data/vendor/local/share/doc/groonga/ja/html/reference/output.html +28 -30
  680. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +17 -19
  681. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +39 -41
  682. data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +878 -0
  683. data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +442 -0
  684. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +154 -0
  685. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +287 -0
  686. data/vendor/local/share/doc/groonga/ja/html/reference/scoring_note.html +115 -0
  687. data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +45 -47
  688. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +48 -50
  689. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +40 -42
  690. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +38 -40
  691. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +40 -42
  692. data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +42 -44
  693. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +37 -39
  694. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +1300 -34
  695. data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +57 -59
  696. data/vendor/local/share/doc/groonga/ja/html/reference/types.html +38 -40
  697. data/vendor/local/share/doc/groonga/ja/html/search.html +11 -11
  698. data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
  699. data/vendor/local/share/doc/groonga/ja/html/server.html +23 -24
  700. data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +28 -30
  701. data/vendor/local/share/doc/groonga/ja/html/server/http.html +42 -44
  702. data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +62 -64
  703. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +30 -32
  704. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +29 -31
  705. data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +138 -0
  706. data/vendor/local/share/doc/groonga/ja/html/server/package.html +35 -37
  707. data/vendor/local/share/doc/groonga/ja/html/spec.html +22 -24
  708. data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +207 -128
  709. data/vendor/local/share/doc/groonga/ja/html/spec/search.html +17 -19
  710. data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +17 -19
  711. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
  712. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
  713. data/vendor/local/share/doc/groonga/ja/html/tutorial.html +21 -25
  714. data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +17 -19
  715. data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +30 -32
  716. data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +17 -19
  717. data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +17 -19
  718. data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +17 -19
  719. data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +23 -25
  720. data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +47 -49
  721. data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +27 -125
  722. data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -20
  723. data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +20 -22
  724. data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +31 -33
  725. data/vendor/local/share/license/mruby/README.md +2 -2
  726. data/vendor/local/share/man/ja/man1/groonga.1 +6205 -2251
  727. data/vendor/local/share/man/man1/groonga.1 +7210 -3029
  728. metadata +75 -11
  729. data/doc/text/news.textile +0 -1217
  730. data/vendor/local/share/doc/groonga/en/html/_static/jquery-1.11.1.js +0 -10308
  731. data/vendor/local/share/doc/groonga/en/html/_static/underscore-1.3.1.js +0 -999
  732. data/vendor/local/share/doc/groonga/ja/html/_static/jquery-1.11.1.js +0 -10308
  733. data/vendor/local/share/doc/groonga/ja/html/_static/underscore-1.3.1.js +0 -999
@@ -0,0 +1,217 @@
1
+ .. -*- rst -*-
2
+
3
+ .. highlightlang:: none
4
+
5
+ .. groonga-command
6
+ .. database: scorer
7
+
8
+ Scorer
9
+ ======
10
+
11
+ Summary
12
+ -------
13
+
14
+ Groonga has scorer module that customizes score function. Score
15
+ function computes score of matched record. The default scorer function
16
+ uses the number of appeared terms. It is also known as TF (term
17
+ frequency).
18
+
19
+ TF is a fast score function but it's not suitable for the following
20
+ cases:
21
+
22
+ * Search query contains one or more frequently-appearing words such
23
+ as "the" and "a".
24
+ * Document contains many same keywords such as "They are keyword,
25
+ keyword, keyword ... and keyword". Search engine spammer may use
26
+ the technique.
27
+
28
+ Score function can solve these cases. For example, `TF-IDF
29
+ <http://en.wikipedia.org/wiki/Tf%E2%80%93idf>`_ (term
30
+ frequency-inverse document frequency) can solve the first case.
31
+ `Okapi BM25 <http://en.wikipedia.org/wiki/Okapi_BM25>`_ can solve the
32
+ second case. But their are slower than TF.
33
+
34
+ Groonga provides TF-IDF based scorer as
35
+ :doc:`/reference/scorers/scorer_tf_idf` but doesn't provide Okapi BM25
36
+ based scorer yet.
37
+
38
+ .. include:: scoring_note.rst
39
+
40
+ Usage
41
+ -----
42
+
43
+ This section describes how to use scorer.
44
+
45
+ Here are a schema definition and sample data to show usage.
46
+
47
+ Sample schema:
48
+
49
+ .. groonga-command
50
+ .. include:: ../example/reference/scorer/usage_setup_schema.log
51
+ .. table_create Memos TABLE_HASH_KEY ShortText
52
+ .. column_create Memos title COLUMN_SCALAR ShortText
53
+ .. column_create Memos content COLUMN_SCALAR Text
54
+ ..
55
+ .. table_create Terms TABLE_PAT_KEY ShortText \
56
+ .. --default_tokenizer TokenBigram \
57
+ .. --normalizer NormalizerAuto
58
+ .. column_create Terms title_index COLUMN_INDEX|WITH_POSITION Memos title
59
+ .. column_create Terms content_index COLUMN_INDEX|WITH_POSITION Memos content
60
+
61
+ Sample data:
62
+
63
+ .. groonga-command
64
+ .. include:: ../example/reference/scorer/usage_setup_data.log
65
+ .. load --table Memos
66
+ .. [
67
+ .. {
68
+ .. "_key": "memo1",
69
+ .. "title": "Groonga is easy",
70
+ .. "content": "Groonga is very easy full text search engine!"
71
+ .. },
72
+ .. {
73
+ .. "_key": "memo2",
74
+ .. "title": "Mroonga is easy",
75
+ .. "content": "Mroonga is more easier full text search engine!"
76
+ .. },
77
+ .. {
78
+ .. "_key": "memo3",
79
+ .. "title": "Rroonga is easy",
80
+ .. "content": "Ruby is very helpful."
81
+ .. },
82
+ .. {
83
+ .. "_key": "memo4",
84
+ .. "title": "Groonga is fast",
85
+ .. "content": "Groonga! Groonga! Groonga! Groonga is very fast!"
86
+ .. },
87
+ .. {
88
+ .. "_key": "memo5",
89
+ .. "title": "PGroonga is fast",
90
+ .. "content": "PGroonga is very fast!"
91
+ .. },
92
+ .. {
93
+ .. "_key": "memo6",
94
+ .. "title": "PGroonga is useful",
95
+ .. "content": "SQL is easy because many client libraries exist."
96
+ .. },
97
+ .. {
98
+ .. "_key": "memo7",
99
+ .. "title": "Mroonga is also useful",
100
+ .. "content": "MySQL has replication feature. Mroonga can use it."
101
+ .. }
102
+ .. ]
103
+
104
+ You can specify custom score function in :ref:`select-match-columns`.
105
+ There are some syntaxes.
106
+
107
+ For score function that doesn't require any parameter such as
108
+ :doc:`/reference/scorers/scorer_tf_idf`::
109
+
110
+ SCORE_FUNCTION(COLUMN)
111
+
112
+ You can specify weight::
113
+
114
+ SCORE_FUNCTION(COLUMN) * WEIGHT
115
+
116
+ For score function that requires one or more parameters such as
117
+ :doc:`/reference/scorers/scorer_tf_at_most`::
118
+
119
+ SCORE_FUNCTION(COLUMN, ARGUMENT1, ARGUMENT2, ...)
120
+
121
+ You can specify weight::
122
+
123
+ SCORE_FUNCTION(COLUMN, ARGUMENT1, ARGUMENT2, ...) * WEIGHT
124
+
125
+ You can use different score function for each match column::
126
+
127
+ SCORE_FUNCTION1(COLUMN1) ||
128
+ SCORE_FUNCTION2(COLUMN2) * WEIGHT ||
129
+ SCORE_FUNCTION3(COLUMN3, ARGUMENT1) ||
130
+ ...
131
+
132
+ Here is a simplest example:
133
+
134
+ .. groonga-command
135
+ .. include:: ../example/reference/scorer/usage_one_no_argument_no_weight.log
136
+ .. select Memos \
137
+ .. --match_columns "scorer_tf_idf(content)" \
138
+ .. --query "Groonga" \
139
+ .. --output_columns "content, _score" \
140
+ .. --sortby "-_score"
141
+
142
+ ``Groonga! Groonga! Groonga! Groonga is very fast!`` contains 4
143
+ ``Groonga``. If you use TF based scorer that is the default scorer,
144
+ ``_score`` is ``4``. But the actual ``_score`` is ``2``. Because the
145
+ ``select`` command uses TF-IDF based scorer ``scorer_tf_idf()``.
146
+
147
+ Here is an example that uses weight:
148
+
149
+ .. groonga-command
150
+ .. include:: ../example/reference/scorer/usage_one_no_argument_weight.log
151
+ .. select Memos \
152
+ .. --match_columns "scorer_tf_idf(content) * 10" \
153
+ .. --query "Groonga" \
154
+ .. --output_columns "content, _score" \
155
+ .. --sortby "-_score"
156
+
157
+ ``Groonga! Groonga! Groonga! Groonga is very fast!`` has ``22`` as
158
+ ``_score``. It had ``2`` as ``_score`` in the previous example that
159
+ doesn't specify weight.
160
+
161
+ Here is an example that uses scorer that requires one
162
+ argument. :doc:`/reference/scorers/scorer_tf_at_most` scorer requires
163
+ one argument. You can limit TF score by the scorer.
164
+
165
+ .. groonga-command
166
+ .. include:: ../example/reference/scorer/usage_one_one_argument_no_weight.log
167
+ .. select Memos \
168
+ .. --match_columns "scorer_tf_at_most(content, 2.0)" \
169
+ .. --query "Groonga" \
170
+ .. --output_columns "content, _score" \
171
+ .. --sortby "-_score"
172
+
173
+ ``Groonga! Groonga! Groonga! Groonga is very fast!`` contains 4
174
+ ``Groonga``. If you use normal TF based scorer that is the default
175
+ scorer, ``_score`` is ``4``. But the actual ``_score`` is ``2``.
176
+ Because the scorer used in the ``select`` command limits the maximum
177
+ score value to ``2``.
178
+
179
+ Here is an example that uses multiple scorers::
180
+
181
+ .. groonga-command
182
+ .. include:: ../example/reference/scorer/usage_multiple_scorers.log
183
+ .. select Memos \
184
+ .. --match_columns "scorer_tf_idf(title) || scorer_tf_at_most(content, 2.0)" \
185
+ .. --query "Groonga" \
186
+ .. --output_columns "title, content, _score" \
187
+ .. --sortby "-_score"
188
+
189
+ The ``--match_columns`` uses ``scorer_tf_idf(title)`` and
190
+ ``scorer_tf_at_most(content, 2.0)``. ``_score`` value is sum of them.
191
+
192
+ You can use the default scorer and custom scorer in the same
193
+ ``--match_columns``. You can use the default scorer by just specifying
194
+ a match column::
195
+
196
+ .. groonga-command
197
+ .. include:: ../example/reference/scorer/usage_default_and_custom_scorers.log
198
+ .. select Memos \
199
+ .. --match_columns "title || scorer_tf_at_most(content, 2.0)" \
200
+ .. --query "Groonga" \
201
+ .. --output_columns "title, content, _score" \
202
+ .. --sortby "-_score"
203
+
204
+ The ``--match_columns`` uses the default scorer (TF) for ``title`` and
205
+ :doc:`/reference/scorers/scorer_tf_at_most` for
206
+ ``content``. ``_score`` value is sum of them.
207
+
208
+ Built-in scorers
209
+ ----------------
210
+
211
+ Here are built-in scores:
212
+
213
+ .. toctree::
214
+ :maxdepth: 1
215
+ :glob:
216
+
217
+ scorers/*
@@ -0,0 +1,22 @@
1
+ .. -*- rst -*-
2
+
3
+ .. highlightlang:: none
4
+
5
+ .. groonga-command
6
+ .. database: scorer_tf_at_most
7
+
8
+ ``scorer_tf_at_most``
9
+ =====================
10
+
11
+ .. note::
12
+
13
+ This scorer is an experimental feature.
14
+
15
+ .. versionadded:: 5.0.1
16
+
17
+ Summary
18
+ -------
19
+
20
+ ``scorer_tf_at_most`` is a scorer based on TF (term frequency).
21
+
22
+ TODO
@@ -0,0 +1,110 @@
1
+ .. -*- rst -*-
2
+
3
+ .. highlightlang:: none
4
+
5
+ .. groonga-command
6
+ .. database: scorer_tf_idf
7
+
8
+ ``scorer_tf_idf``
9
+ =================
10
+
11
+ .. note::
12
+
13
+ This scorer is an experimental feature.
14
+
15
+ .. versionadded:: 5.0.1
16
+
17
+ Summary
18
+ -------
19
+
20
+ ``scorer_tf_idf`` is a scorer based of `TF-IDF
21
+ <http://en.wikipedia.org/wiki/Tf%E2%80%93idf>`_ (term
22
+ frequency-inverse document frequency) score function.
23
+
24
+ To put it simply, TF (term frequency) divided by DF (document
25
+ frequency) is TF-IDF. "TF" means that "the number of occurrences is
26
+ more important". "TF divided by DF" means that "the number of
27
+ occurrences of important term is more important".
28
+
29
+ The default score function in Groonga is TF (term frequency). It
30
+ doesn't care about term importance but is fast.
31
+
32
+ TF-IDF cares about term importance but is slower than TF.
33
+
34
+ TF-IDF will compute more suitable score rather than TF for many cases.
35
+ But it's not perfect.
36
+
37
+ If document contains many same keywords such as "They are keyword,
38
+ keyword, keyword ... and keyword". It increases score by TF and
39
+ TF-IDF. Search engine spammer may use the technique. But TF-IDF
40
+ doesn't guard from the technique.
41
+
42
+ `Okapi BM25 <http://en.wikipedia.org/wiki/Okapi_BM25>`_ can solve the
43
+ case. But it's more slower than TF-IDF and not implemented yet in
44
+ Groonga.
45
+
46
+ .. include:: ../scoring_note.rst
47
+
48
+ Usage
49
+ -----
50
+
51
+ This section describes how to use this scorer.
52
+
53
+ Here are a schema definition and sample data to show usage.
54
+
55
+ Sample schema:
56
+
57
+ .. groonga-command
58
+ .. include:: ../example/reference/scorers/scorer_tf_idf/usage_setup_schema.log
59
+ .. table_create Logs TABLE_NO_KEY
60
+ .. column_create Logs message COLUMN_SCALAR Text
61
+ ..
62
+ .. table_create Terms TABLE_PAT_KEY ShortText \
63
+ .. --default_tokenizer TokenBigram \
64
+ .. --normalizer NormalizerAuto
65
+ .. column_create Terms message_index COLUMN_INDEX|WITH_POSITION Logs message
66
+
67
+ Sample data:
68
+
69
+ .. groonga-command
70
+ .. include:: ../example/reference/scorers/scorer_tf_idf/usage_setup_data.log
71
+ .. load --table Logs
72
+ .. [
73
+ .. {"message": "Error"},
74
+ .. {"message": "Warning"},
75
+ .. {"message": "Warning Warning"},
76
+ .. {"message": "Warning Warning Warning"},
77
+ .. {"message": "Info"},
78
+ .. {"message": "Info Info"},
79
+ .. {"message": "Info Info Info"},
80
+ .. {"message": "Info Info Info Info"},
81
+ .. {"message": "Notice"},
82
+ .. {"message": "Notice Notice"},
83
+ .. {"message": "Notice Notice Notice"},
84
+ .. {"message": "Notice Notice Notice Notice"},
85
+ .. {"message": "Notice Notice Notice Notice Notice"}
86
+ .. ]
87
+
88
+ You specify ``scorer_tf_idf`` in :ref:`select-match-columns` like the
89
+ following:
90
+
91
+ .. groonga-command
92
+ .. include:: ../example/reference/scorers/scorer_tf_idf/usage_no_weight.log
93
+ .. select Logs \
94
+ .. --match_columns "scorer_tf_idf(message)" \
95
+ .. --query "Error OR Info" \
96
+ .. --output_columns "message, _score" \
97
+ .. --sortby "-_score"
98
+
99
+ Both the score of ``Info Info Info`` and the score of ``Error`` are
100
+ ``2`` even ``Info Info Info`` includes three ``Info`` terms. Because
101
+ ``Error`` is more important term rather than ``Info``. The number of
102
+ documents that include ``Info`` is ``4``. The number of documents that
103
+ include ``Error`` is ``1``. Term that is included in less documents
104
+ means that the term is more characteristic term. Characteristic term
105
+ is important term.
106
+
107
+ See also
108
+ --------
109
+
110
+ * :doc:`../scorer`
@@ -0,0 +1,13 @@
1
+ .. _note:
2
+
3
+ You don't need to resolve scoring only by score function. Score
4
+ function is highly depends on search query. You may be able to use
5
+ metadata of matched record.
6
+
7
+ For example, Google uses `PageRank
8
+ <http://en.wikipedia.org/wiki/PageRank>`_ for scoring. You may be
9
+ able to use data type ("title" data are important rather than
10
+ "memo" data), tag, geolocation and so on.
11
+
12
+ Please stop to think about only score function for scoring.
13
+
@@ -69,6 +69,8 @@ prefix is omitted in the table.)
69
69
  | search | | | | |
70
70
  +--------------+------------+--------------+-------------+-------------+
71
71
 
72
+ .. _table-no-key:
73
+
72
74
  ``TABLE_NO_KEY``
73
75
  ^^^^^^^^^^^^^^^^
74
76
 
@@ -79,6 +81,8 @@ You cannot use ``TABLE_NO_KEY`` for lexicon for fulltext search
79
81
  because lexicon stores tokens as key. ``TABLE_NO_KEY`` is useful for
80
82
  no key records such as log.
81
83
 
84
+ .. _table-hash-key:
85
+
82
86
  ``TABLE_HASH_KEY``
83
87
  ^^^^^^^^^^^^^^^^^^
84
88
 
@@ -88,6 +92,8 @@ functions such as common prefix search and predictive search.
88
92
  ``TABLE_HASH_KEY`` is useful for index for exact search such as tag
89
93
  search.
90
94
 
95
+ .. _table-pat-key:
96
+
91
97
  ``TABLE_PAT_KEY``
92
98
  ^^^^^^^^^^^^^^^^^
93
99
 
@@ -96,6 +102,8 @@ search.
96
102
  ``TABLE_PAT_KEY`` is useful for lexicon for fulltext search and
97
103
  index for range search.
98
104
 
105
+ .. _table-dat-key:
106
+
99
107
  ``TABLE_DAT_KEY``
100
108
  ^^^^^^^^^^^^^^^^^
101
109
 
@@ -2,23 +2,537 @@
2
2
 
3
3
  .. highlightlang:: none
4
4
 
5
+ .. groonga-command
6
+ .. database: tokenizers
7
+
5
8
  Tokenizers
6
9
  ==========
7
10
 
8
- TODO: Write me.
9
-
10
- Here are the list of built-in tokenizers:
11
-
12
- * TokenBigram
13
- * TokenBigramSplitSymbol
14
- * TokenBigramSplitSymbolAlpha
15
- * TokenBigramSplitSymbolAlphaDigit
16
- * TokenBigramIgnoreBlank
17
- * TokenBigramIgnoreBlankSplitSymbol
18
- * TokenBigramIgnoreBlankSplitAlpha
19
- * TokenBigramIgnoreBlankSplitAlphaDigit
20
- * TokenDelimit
21
- * TokenDelimitNull
22
- * TokenTrigram
23
- * TokenUnigram
11
+ Summary
12
+ -------
13
+
14
+ Groonga has tokenizer module that tokenizes text. It is used when
15
+ the following cases:
16
+
17
+ * Indexing text
18
+
19
+ .. figure:: /images/reference/tokenizers/used-when-indexing.png
20
+ :align: center
21
+ :width: 80%
22
+
23
+ Tokenizer is used when indexing text.
24
+
25
+ * Searching by query
26
+
27
+ .. figure:: /images/reference/tokenizers/used-when-searching.png
28
+ :align: center
29
+ :width: 80%
30
+
31
+ Tokenizer is used when searching by query.
32
+
33
+ Tokenizer is an important module for full-text search. You can change
34
+ trade-off between `precision and recall
35
+ <http://en.wikipedia.org/wiki/Precision_and_recall>`_ by changing
36
+ tokenizer.
37
+
38
+ Normally, :ref:`token-bigram` is a suitable tokenizer. If you don't
39
+ know much about tokenizer, it's recommended that you choose
40
+ :ref:`token-bigram`.
41
+
42
+ You can try a tokenizer by :doc:`/reference/commands/tokenize` and
43
+ :doc:`/reference/commands/table_tokenize`. Here is an example to
44
+ try :ref:`token-bigram` tokenizer by
45
+ :doc:`/reference/commands/tokenize`:
46
+
47
+ .. groonga-command
48
+ .. include:: ../example/reference/tokenizers/tokenize-example.log
49
+ .. tokenize TokenBigram "Hello World"
50
+
51
+ What is "tokenize"?
52
+ -------------------
53
+
54
+ "tokenize" is the process that extracts zero or more tokens from a
55
+ text. There are some "tokenize" methods.
56
+
57
+ For example, ``Hello World`` is tokenized to the following tokens by
58
+ bigram tokenize method:
59
+
60
+ * ``He``
61
+ * ``el``
62
+ * ``ll``
63
+ * ``lo``
64
+ * ``o_`` (``_`` means a white-space)
65
+ * ``_W`` (``_`` means a white-space)
66
+ * ``Wo``
67
+ * ``or``
68
+ * ``rl``
69
+ * ``ld``
70
+
71
+ In the above example, 10 tokens are extracted from one text ``Hello
72
+ World``.
73
+
74
+ For example, ``Hello World`` is tokenized to the following tokens by
75
+ white-space-separate tokenize method:
76
+
77
+ * ``Hello``
78
+ * ``World``
79
+
80
+ In the above example, 2 tokens are extracted from one text ``Hello
81
+ World``.
82
+
83
+ Token is used as search key. You can find indexed documents only by
84
+ tokens that are extracted by used tokenize method. For example, you
85
+ can find ``Hello World`` by ``ll`` with bigram tokenize method but you
86
+ can't find ``Hello World`` by ``ll`` with white-space-separate tokenize
87
+ method. Because white-space-separate tokenize method doesn't extract
88
+ ``ll`` token. It just extracts ``Hello`` and ``World`` tokens.
89
+
90
+ In general, tokenize method that generates small tokens increases
91
+ recall but decreases precision. Tokenize method that generates large
92
+ tokens increases precision but decreases recall.
93
+
94
+ For example, we can find ``Hello World`` and ``A or B`` by ``or`` with
95
+ bigram tokenize method. ``Hello World`` is a noise for people who
96
+ wants to search "logical and". It means that precision is
97
+ decreased. But recall is increased.
98
+
99
+ We can find only ``A or B`` by ``or`` with white-space-separate
100
+ tokenize method. Because ``World`` is tokenized to one token ``World``
101
+ with white-space-separate tokenize method. It means that precision is
102
+ increased for people who wants to search "logical and". But recall is
103
+ decreased because ``Hello World`` that contains ``or`` isn't found.
104
+
105
+ Built-in tokenizsers
106
+ --------------------
107
+
108
+ Here is a list of built-in tokenizers:
109
+
110
+ * ``TokenBigram``
111
+ * ``TokenBigramSplitSymbol``
112
+ * ``TokenBigramSplitSymbolAlpha``
113
+ * ``TokenBigramSplitSymbolAlphaDigit``
114
+ * ``TokenBigramIgnoreBlank``
115
+ * ``TokenBigramIgnoreBlankSplitSymbol``
116
+ * ``TokenBigramIgnoreBlankSplitAlpha``
117
+ * ``TokenBigramIgnoreBlankSplitAlphaDigit``
118
+ * ``TokenUnigram``
119
+ * ``TokenTrigram``
120
+ * ``TokenDelimit``
121
+ * ``TokenDelimitNull``
122
+ * ``TokenMecab``
123
+ * ``TokenRegexp``
124
+
125
+ .. _token-bigram:
126
+
127
+ ``TokenBigram``
128
+ ^^^^^^^^^^^^^^^
129
+
130
+ ``TokenBigram`` is a bigram based tokenizer. It's recommended to use
131
+ this tokenizer for most cases.
132
+
133
+ Bigram tokenize method tokenizes a text to two adjacent characters
134
+ tokens. For example, ``Hello`` is tokenized to the following tokens:
135
+
136
+ * ``He``
137
+ * ``el``
138
+ * ``ll``
139
+ * ``lo``
140
+
141
+ Bigram tokenize method is good for recall because you can find all
142
+ texts by query consists of two or more characters.
143
+
144
+ In general, you can't find all texts by query consists of one
145
+ character because one character token doesn't exist. But you can find
146
+ all texts by query consists of one character in Groonga. Because
147
+ Groonga find tokens that start with query by predictive search. For
148
+ example, Groonga can find ``ll`` and ``lo`` tokens by ``l`` query.
149
+
150
+ Bigram tokenize method isn't good for precision because you can find
151
+ texts that includes query in word. For example, you can find ``world``
152
+ by ``or``. This is more sensitive for ASCII only languages rather than
153
+ non-ASCII languages. ``TokenBigram`` has solution for this problem
154
+ described in the bellow.
155
+
156
+ ``TokenBigram`` behavior is different when it's worked with any
157
+ :doc:`/reference/normalizers`.
158
+
159
+ If no normalizer is used, ``TokenBigram`` uses pure bigram (all tokens
160
+ except the last token have two characters) tokenize method:
161
+
162
+ .. groonga-command
163
+ .. include:: ../example/reference/tokenizers/token-bigram-no-normalizer.log
164
+ .. tokenize TokenBigram "Hello World"
165
+
166
+ If normalizer is used, ``TokenBigram`` uses white-space-separate like
167
+ tokenize method for ASCII characters. ``TokenBigram`` uses bigram
168
+ tokenize method for non-ASCII characters.
169
+
170
+ You may be confused with this combined behavior. But it's reasonable
171
+ for most use cases such as English text (only ASCII characters) and
172
+ Japanese text (ASCII and non-ASCII characters are mixed).
173
+
174
+ Most languages consists of only ASCII characters use white-space for
175
+ word separator. White-space-separate tokenize method is suitable for
176
+ the case.
177
+
178
+ Languages consists of non-ASCII characters don't use white-space for
179
+ word separator. Bigram tokenize method is suitable for the case.
180
+
181
+ Mixed tokenize method is suitable for mixed language case.
182
+
183
+ If you want to use bigram tokenize method for ASCII character, see
184
+ ``TokenBigramSplitXXX`` type tokenizers such as
185
+ :ref:`token-bigram-split-symbol-alpha`.
186
+
187
+ Let's confirm ``TokenBigram`` behavior by example.
188
+
189
+ ``TokenBigram`` uses one or more white-spaces as token delimiter for
190
+ ASCII characters:
191
+
192
+ .. groonga-command
193
+ .. include:: ../example/reference/tokenizers/token-bigram-ascii-and-white-space-with-normalizer.log
194
+ .. tokenize TokenBigram "Hello World" NormalizerAuto
195
+
196
+ ``TokenBigram`` uses character type change as token delimiter for
197
+ ASCII characters. Character type is one of them:
198
+
199
+ * Alphabet
200
+ * Digit
201
+ * Symbol (such as ``(``, ``)`` and ``!``)
202
+ * Hiragana
203
+ * Katakana
204
+ * Kanji
205
+ * Others
206
+
207
+ The following example shows two token delimiters:
208
+
209
+ * at between ``100`` (digits) and ``cents`` (alphabets)
210
+ * at between ``cents`` (alphabets) and ``!!!`` (symbols)
211
+
212
+ .. groonga-command
213
+ .. include:: ../example/reference/tokenizers/token-bigram-ascii-and-character-type-change-with-normalizer.log
214
+ .. tokenize TokenBigram "100cents!!!" NormalizerAuto
215
+
216
+ Here is an example that ``TokenBigram`` uses bigram tokenize method
217
+ for non-ASCII characters.
218
+
219
+ .. groonga-command
220
+ .. include:: ../example/reference/tokenizers/token-bigram-non-ascii-with-normalizer.log
221
+ .. tokenize TokenBigram "日本語の勉強" NormalizerAuto
222
+
223
+ .. _token-bigram-split-symbol:
224
+
225
+ ``TokenBigramSplitSymbol``
226
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
227
+
228
+ ``TokenBigramSplitSymbol`` is similar to :ref:`token-bigram`. The
229
+ difference between them is symbol handling. ``TokenBigramSplitSymbol``
230
+ tokenizes symbols by bigram tokenize method:
231
+
232
+ .. groonga-command
233
+ .. include:: ../example/reference/tokenizers/token-bigram-split-symbol-with-normalizer.log
234
+ .. tokenize TokenBigramSplitSymbol "100cents!!!" NormalizerAuto
235
+
236
+ .. _token-bigram-split-symbol-alpha:
237
+
238
+ ``TokenBigramSplitSymbolAlpha``
239
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
240
+
241
+ ``TokenBigramSplitSymbolAlpha`` is similar to :ref:`token-bigram`. The
242
+ difference between them is symbol and alphabet
243
+ handling. ``TokenBigramSplitSymbolAlpha`` tokenizes symbols and
244
+ alphabets by bigram tokenize method:
245
+
246
+ .. groonga-command
247
+ .. include:: ../example/reference/tokenizers/token-bigram-split-symbol-alpha-with-normalizer.log
248
+ .. tokenize TokenBigramSplitSymbolAlpha "100cents!!!" NormalizerAuto
249
+
250
+ .. _token-bigram-split-symbol-alpha-digit:
251
+
252
+ ``TokenBigramSplitSymbolAlphaDigit``
253
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
254
+
255
+ ``TokenBigramSplitSymbolAlphaDigit`` is similar to
256
+ :ref:`token-bigram`. The difference between them is symbol, alphabet
257
+ and digit handling. ``TokenBigramSplitSymbolAlphaDigit`` tokenizes
258
+ symbols, alphabets and digits by bigram tokenize method. It means that
259
+ all characters are tokenized by bigram tokenize method:
260
+
261
+ .. groonga-command
262
+ .. include:: ../example/reference/tokenizers/token-bigram-split-symbol-alpha-digit-with-normalizer.log
263
+ .. tokenize TokenBigramSplitSymbolAlphaDigit "100cents!!!" NormalizerAuto
264
+
265
+ .. _token-bigram-ignore-blank:
266
+
267
+ ``TokenBigramIgnoreBlank``
268
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
269
+
270
+ ``TokenBigramIgnoreBlank`` is similar to :ref:`token-bigram`. The
271
+ difference between them is blank handling. ``TokenBigramIgnoreBlank``
272
+ ignores white-spaces in continuous symbols and non-ASCII characters.
273
+
274
+ You can find difference of them by ``日 本 語 ! ! !`` text because it
275
+ has symbols and non-ASCII characters.
276
+
277
+ Here is a result by :ref:`token-bigram` :
278
+
279
+ .. groonga-command
280
+ .. include:: ../example/reference/tokenizers/token-bigram-with-white-spaces.log
281
+ .. tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
282
+
283
+ Here is a result by ``TokenBigramIgnoreBlank``:
284
+
285
+ .. groonga-command
286
+ .. include:: ../example/reference/tokenizers/token-bigram-ignore-blank-with-white-spaces.log
287
+ .. tokenize TokenBigramIgnoreBlank "日 本 語 ! ! !" NormalizerAuto
288
+
289
+ .. _token-bigram-ignore-blank-split-symbol:
290
+
291
+ ``TokenBigramIgnoreBlankSplitSymbol``
292
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
293
+
294
+ ``TokenBigramIgnoreBlankSplitSymbol`` is similar to
295
+ :ref:`token-bigram`. The differences between them are the followings:
296
+
297
+ * Blank handling
298
+ * Symbol handling
299
+
300
+ ``TokenBigramIgnoreBlankSplitSymbol`` ignores white-spaces in
301
+ continuous symbols and non-ASCII characters.
302
+
303
+ ``TokenBigramIgnoreBlankSplitSymbol`` tokenizes symbols by bigram
304
+ tokenize method.
305
+
306
+ You can find difference of them by ``日 本 語 ! ! !`` text because it
307
+ has symbols and non-ASCII characters.
308
+
309
+ Here is a result by :ref:`token-bigram` :
310
+
311
+ .. groonga-command
312
+ .. include:: ../example/reference/tokenizers/token-bigram-with-white-spaces-and-symbol.log
313
+ .. tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
314
+
315
+ Here is a result by ``TokenBigramIgnoreBlankSplitSymbol``:
316
+
317
+ .. groonga-command
318
+ .. include:: ../example/reference/tokenizers/token-bigram-ignore-blank-split-symbol-with-white-spaces-and-symbol.log
319
+ .. tokenize TokenBigramIgnoreBlankSplitSymbol "日 本 語 ! ! !" NormalizerAuto
320
+
321
+ .. _token-bigram-ignore-blank-split-symbol-alpha:
322
+
323
+ ``TokenBigramIgnoreBlankSplitSymbolAlpha``
324
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
325
+
326
+ ``TokenBigramIgnoreBlankSplitSymbolAlpha`` is similar to
327
+ :ref:`token-bigram`. The differences between them are the followings:
328
+
329
+ * Blank handling
330
+ * Symbol and alphabet handling
331
+
332
+ ``TokenBigramIgnoreBlankSplitSymbolAlpha`` ignores white-spaces in
333
+ continuous symbols and non-ASCII characters.
334
+
335
+ ``TokenBigramIgnoreBlankSplitSymbolAlpha`` tokenizes symbols and
336
+ alphabets by bigram tokenize method.
337
+
338
+ You can find difference of them by ``Hello 日 本 語 ! ! !`` text because it
339
+ has symbols and non-ASCII characters with white spaces and alphabets.
340
+
341
+ Here is a result by :ref:`token-bigram` :
342
+
343
+ .. groonga-command
344
+ .. include:: ../example/reference/tokenizers/token-bigram-with-white-spaces-and-symbol-and-alphabet.log
345
+ .. tokenize TokenBigram "Hello 日 本 語 ! ! !" NormalizerAuto
346
+
347
+ Here is a result by ``TokenBigramIgnoreBlankSplitSymbolAlpha``:
348
+
349
+ .. groonga-command
350
+ .. include:: ../example/reference/tokenizers/token-bigram-ignore-blank-split-symbol-with-white-spaces-and-symbol-and-alphabet.log
351
+ .. tokenize TokenBigramIgnoreBlankSplitSymbolAlpha "Hello 日 本 語 ! ! !" NormalizerAuto
352
+
353
+ .. _token-bigram-ignore-blank-split-symbol-alpha-digit:
354
+
355
+ ``TokenBigramIgnoreBlankSplitSymbolAlphaDigit``
356
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
357
+
358
+ ``TokenBigramIgnoreBlankSplitSymbolAlphaDigit`` is similar to
359
+ :ref:`token-bigram`. The differences between them are the followings:
360
+
361
+ * Blank handling
362
+ * Symbol, alphabet and digit handling
363
+
364
+ ``TokenBigramIgnoreBlankSplitSymbolAlphaDigit`` ignores white-spaces
365
+ in continuous symbols and non-ASCII characters.
366
+
367
+ ``TokenBigramIgnoreBlankSplitSymbolAlphaDigit`` tokenizes symbols,
368
+ alphabets and digits by bigram tokenize method. It means that all
369
+ characters are tokenized by bigram tokenize method.
370
+
371
+ You can find difference of them by ``Hello 日 本 語 ! ! ! 777`` text
372
+ because it has symbols and non-ASCII characters with white spaces,
373
+ alphabets and digits.
374
+
375
+ Here is a result by :ref:`token-bigram` :
376
+
377
+ .. groonga-command
378
+ .. include:: ../example/reference/tokenizers/token-bigram-with-white-spaces-and-symbol-and-alphabet-and-digit.log
379
+ .. tokenize TokenBigram "Hello 日 本 語 ! ! ! 777" NormalizerAuto
380
+
381
+ Here is a result by ``TokenBigramIgnoreBlankSplitSymbolAlphaDigit``:
382
+
383
+ .. groonga-command
384
+ .. include:: ../example/reference/tokenizers/token-bigram-ignore-blank-split-symbol-with-white-spaces-and-symbol-and-alphabet-digit.log
385
+ .. tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit "Hello 日 本 語 ! ! ! 777" NormalizerAuto
386
+
387
+ .. _token-unigram:
388
+
389
+ ``TokenUnigram``
390
+ ^^^^^^^^^^^^^^^^
391
+
392
+ ``TokenUnigram`` is similar to :ref:`token-bigram`. The differences
393
+ between them is token unit. :ref:`token-bigram` uses 2 characters per
394
+ token. ``TokenUnigram`` uses 1 character per token.
395
+
396
+ .. groonga-command
397
+ .. include:: ../example/reference/tokenizers/token-unigram.log
398
+ .. tokenize TokenUnigram "100cents!!!" NormalizerAuto
399
+
400
+ .. _token-trigram:
401
+
402
+ ``TokenTrigram``
403
+ ^^^^^^^^^^^^^^^^
404
+
405
+ ``TokenTrigram`` is similar to :ref:`token-bigram`. The differences
406
+ between them is token unit. :ref:`token-bigram` uses 2 characters per
407
+ token. ``TokenTrigram`` uses 3 characters per token.
408
+
409
+ .. groonga-command
410
+ .. include:: ../example/reference/tokenizers/token-trigram.log
411
+ .. tokenize TokenTrigram "10000cents!!!!!" NormalizerAuto
412
+
413
+ .. _token-delimit:
414
+
415
+ ``TokenDelimit``
416
+ ^^^^^^^^^^^^^^^^
417
+
418
+ ``TokenDelimit`` extracts token by splitting one or more space
419
+ characters (``U+0020``). For example, ``Hello World`` is tokenized to
420
+ ``Hello`` and ``World``.
421
+
422
+ ``TokenDelimit`` is suitable for tag text. You can extract ``groonga``
423
+ and ``full-text-search`` and ``http`` as tags from ``groonga
424
+ full-text-search http``.
425
+
426
+ Here is an example of ``TokenDelimit``:
427
+
428
+ .. groonga-command
429
+ .. include:: ../example/reference/tokenizers/token-delimit.log
430
+ .. tokenize TokenDelimit "Groonga full-text-search HTTP" NormalizerAuto
431
+
432
+ .. _token-delimit-null:
433
+
434
+ ``TokenDelimitNull``
435
+ ^^^^^^^^^^^^^^^^^^^^
436
+
437
+ ``TokenDelimitNull`` is similar to :ref:`token-delimit`. The
438
+ difference between them is separator character. :ref:`token-delimit`
439
+ uses space character (``U+0020``) but ``TokenDelimitNull`` uses NUL
440
+ character (``U+0000``).
441
+
442
+ ``TokenDelimitNull`` is also suitable for tag text.
443
+
444
+ Here is an example of ``TokenDelimitNull``:
445
+
446
+ .. groonga-command
447
+ .. include:: ../example/reference/tokenizers/token-delimit-null.log
448
+ .. tokenize TokenDelimitNull "Groonga\u0000full-text-search\u0000HTTP" NormalizerAuto
449
+
450
+ .. _token-mecab:
451
+
452
+ ``TokenMecab``
453
+ ^^^^^^^^^^^^^^
454
+
455
+ ``TokenMecab`` is a tokenizer based on `MeCab
456
+ <http://mecab.sourceforge.net/>`_ part-of-speech and
457
+ morphological analyzer.
458
+
459
+ MeCab doesn't depend on Japanese. You can use MeCab for other
460
+ languages by creating dictionary for the languages. You can use `NAIST
461
+ Japanese Dictionary <http://sourceforge.jp/projects/naist-jdic/>`_
462
+ for Japanese.
463
+
464
+ ``TokenMecab`` is good for precision rather than recall. You can find
465
+ ``東京都`` and ``京都`` texts by ``京都`` query with
466
+ :ref:`token-bigram` but ``東京都`` isn't expected. You can find only
467
+ ``京都`` text by ``京都`` query with ``TokenMecab``.
468
+
469
+ If you want to support neologisms, you need to keep updating your
470
+ MeCab dictionary. It needs maintain cost. (:ref:`token-bigram` doesn't
471
+ require dictionary maintenance because :ref:`token-bigram` doesn't use
472
+ dictionary.) `mecab-ipadic-NEologd : Neologism dictionary for MeCab
473
+ <https://github.com/neologd/mecab-ipadic-neologd>`_ may help you.
474
+
475
+ Here is an example of ``TokenMeCab``. ``東京都`` is tokenized to ``東京``
476
+ and ``都``. They don't include ``京都``:
477
+
478
+ .. groonga-command
479
+ .. include:: ../example/reference/tokenizers/token-mecab.log
480
+ .. tokenize TokenMecab "東京都"
481
+
482
+ .. _token-regexp:
483
+
484
+ ``TokenRegexp``
485
+ ^^^^^^^^^^^^^^^
486
+
487
+ .. versionadded:: 5.0.1
488
+
489
+ .. caution::
490
+
491
+ This tokenizer is experimental. Specification may be changed.
492
+
493
+ .. caution::
494
+
495
+ This tokenizer can be used only with UTF-8. You can't use this
496
+ tokenizer with EUC-JP, Shift_JIS and so on.
497
+
498
+ ``TokenRegexp`` is a tokenizer for supporting regular expression
499
+ search by index.
500
+
501
+ In general, regular expression search is evaluated as sequential
502
+ search. But the following cases can be evaluated as index search:
503
+
504
+ * Literal only case such as ``hello``
505
+ * The beginning of text and literal case such as ``\A/home/alice``
506
+ * The end of text and literal case such as ``\.txt\z``
507
+
508
+ In most cases, index search is faster than sequential search.
509
+
510
+ ``TokenRegexp`` is based on bigram tokenize method. ``TokenRegexp``
511
+ adds the beginning of text mark (``U+FFEF``) at the begging of text
512
+ and the end of text mark (``U+FFF0``) to the end of text when you
513
+ index text:
514
+
515
+ .. groonga-command
516
+ .. include:: ../example/reference/tokenizers/token-regexp-add.log
517
+ .. tokenize TokenRegexp "/home/alice/test.txt" NormalizerAuto --mode ADD
518
+
519
+ The beginning of text mark is used for the beginning of text search by
520
+ ``\A``. If you use ``TokenRegexp`` for tokenizing query,
521
+ ``TokenRegexp`` adds the beginning of text mark (``U+FFEF``) as the
522
+ first token. The beginning of text mark must be appeared at the first,
523
+ you can get results of the beginning of text search.
524
+
525
+ .. groonga-command
526
+ .. include:: ../example/reference/tokenizers/token-regexp-get-beginning-of-text.log
527
+ .. tokenize TokenRegexp "\\A/home/alice/" NormalizerAuto --mode GET
528
+
529
+ The end of text mark is used for the end of text search by ``\z``.
530
+ If you use ``TokenRegexp`` for tokenizing query, ``TokenRegexp`` adds
531
+ the end of text mark (``U+FFF0``) as the last token. The end of text
532
+ mark must be appeared at the end, you can get results of the end of
533
+ text search.
534
+
535
+ .. groonga-command
536
+ .. include:: ../example/reference/tokenizers/token-regexp-get-end-of-text.log
537
+ .. tokenize TokenRegexp "\\.txt\\z" NormalizerAuto --mode GET
24
538