rroonga 5.0.0-x86-mingw32 → 5.0.1-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
 - data/.yardopts +1 -0
 - data/Rakefile +1 -16
 - data/example/bookmark.rb +1 -6
 - data/example/index-html.rb +0 -1
 - data/ext/groonga/extconf.rb +4 -7
 - data/ext/groonga/rb-grn-array.c +1 -1
 - data/ext/groonga/rb-grn-column.c +33 -67
 - data/ext/groonga/rb-grn-context.c +5 -5
 - data/ext/groonga/rb-grn-database.c +2 -2
 - data/ext/groonga/rb-grn-double-array-trie.c +4 -2
 - data/ext/groonga/rb-grn-encoding-support.c +7 -1
 - data/ext/groonga/rb-grn-equal-operator.c +85 -0
 - data/ext/groonga/rb-grn-exception.c +17 -0
 - data/ext/groonga/rb-grn-expression.c +85 -43
 - data/ext/groonga/rb-grn-greater-equal-operator.c +88 -0
 - data/ext/groonga/rb-grn-greater-operator.c +85 -0
 - data/ext/groonga/rb-grn-hash.c +1 -1
 - data/ext/groonga/rb-grn-index-column.c +150 -11
 - data/ext/groonga/rb-grn-less-equal-operator.c +88 -0
 - data/ext/groonga/rb-grn-less-operator.c +85 -0
 - data/ext/groonga/rb-grn-logger.c +5 -5
 - data/ext/groonga/rb-grn-match-operator.c +86 -0
 - data/ext/groonga/rb-grn-normalizer.c +8 -1
 - data/ext/groonga/rb-grn-not-equal-operator.c +85 -0
 - data/ext/groonga/rb-grn-object.c +170 -36
 - data/ext/groonga/rb-grn-operator.c +395 -172
 - data/ext/groonga/rb-grn-patricia-trie.c +10 -8
 - data/ext/groonga/rb-grn-plugin.c +51 -3
 - data/ext/groonga/rb-grn-prefix-operator.c +86 -0
 - data/ext/groonga/rb-grn-procedure-type.c +4 -0
 - data/ext/groonga/rb-grn-query-logger.c +4 -4
 - data/ext/groonga/rb-grn-regexp-operator.c +85 -0
 - data/ext/groonga/rb-grn-snippet.c +1 -1
 - data/ext/groonga/rb-grn-table-key-support.c +9 -5
 - data/ext/groonga/rb-grn-table.c +52 -66
 - data/ext/groonga/rb-grn-type.c +1 -1
 - data/ext/groonga/rb-grn-utils.c +22 -3
 - data/ext/groonga/rb-grn.h +31 -4
 - data/ext/groonga/rb-groonga.c +9 -9
 - data/lib/1.9/groonga.so +0 -0
 - data/lib/2.0/groonga.so +0 -0
 - data/lib/2.1/groonga.so +0 -0
 - data/lib/2.2/groonga.so +0 -0
 - data/lib/groonga/context.rb +31 -0
 - data/lib/groonga/expression-builder.rb +14 -1
 - data/lib/groonga/record.rb +10 -8
 - data/lib/groonga/schema.rb +3 -1
 - data/rroonga-build.rb +2 -2
 - data/rroonga.gemspec +3 -3
 - data/test/groonga-test-utils.rb +4 -0
 - data/test/test-column.rb +28 -26
 - data/test/test-exception.rb +1 -0
 - data/test/test-expression-builder.rb +83 -1
 - data/test/test-expression.rb +80 -48
 - data/test/test-index-column.rb +102 -29
 - data/test/test-normalizer.rb +35 -29
 - data/test/test-operator.rb +214 -0
 - data/test/test-plugin.rb +24 -6
 - data/test/test-procedure.rb +29 -0
 - data/test/test-schema-type.rb +14 -0
 - data/test/test-table-select-mecab.rb +1 -4
 - data/test/test-table.rb +7 -0
 - data/test/test-token-regexp.rb +30 -0
 - data/test/test-type.rb +24 -0
 - data/vendor/local/bin/grndb.exe +0 -0
 - data/vendor/local/bin/groonga-benchmark.exe +0 -0
 - data/vendor/local/bin/groonga.exe +0 -0
 - data/vendor/local/bin/libgcc_s_sjlj-1.dll +0 -0
 - data/vendor/local/bin/libgroonga-0.dll +0 -0
 - data/vendor/local/bin/libmecab-1.dll +0 -0
 - data/vendor/local/bin/libmsgpack-3.dll +0 -0
 - data/vendor/local/bin/libmsgpackc-2.dll +0 -0
 - data/vendor/local/bin/libonig-5.dll +0 -0
 - data/vendor/local/bin/libstdc++-6.dll +0 -0
 - data/vendor/local/bin/lz4.exe +0 -0
 - data/vendor/local/bin/lz4c.exe +0 -0
 - data/vendor/local/bin/lz4cat +0 -0
 - data/vendor/local/bin/mecab-config +2 -2
 - data/vendor/local/bin/mecab.exe +0 -0
 - data/vendor/local/bin/onig-config +1 -1
 - data/vendor/local/bin/zlib1.dll +0 -0
 - data/vendor/local/etc/groonga/groonga.conf +1 -1
 - data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +2 -2
 - data/vendor/local/include/groonga/groonga.h +1 -0
 - data/vendor/local/include/groonga/groonga/expr.h +2 -0
 - data/vendor/local/include/groonga/groonga/groonga.h +32 -5
 - data/vendor/local/include/groonga/groonga/ii.h +7 -0
 - data/vendor/local/include/groonga/groonga/obj.h +37 -0
 - data/vendor/local/include/groonga/groonga/scorer.h +95 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +2 -2
 - data/vendor/local/lib/groonga/plugins/ruby/eval.a +0 -0
 - data/vendor/local/lib/groonga/plugins/ruby/eval.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/ruby/eval.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/ruby/eval.la +2 -2
 - data/vendor/local/lib/groonga/plugins/ruby/load.a +0 -0
 - data/vendor/local/lib/groonga/plugins/ruby/load.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/ruby/load.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/ruby/load.la +2 -2
 - data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +6 -3
 - data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +6 -5
 - data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +421 -17
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.la +2 -2
 - data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
 - data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/table/table.la +2 -2
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +2 -2
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +2 -2
 - data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +12 -4
 - data/vendor/local/lib/groonga/scripts/ruby/database.rb +11 -3
 - data/vendor/local/lib/groonga/scripts/ruby/expression.rb +23 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_size_estimator.rb +158 -0
 - data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +39 -0
 - data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +4 -0
 - data/vendor/local/lib/groonga/scripts/ruby/initialize/pre.rb +2 -0
 - data/vendor/local/lib/groonga/scripts/ruby/logger.rb +11 -7
 - data/vendor/local/lib/groonga/scripts/ruby/object.rb +11 -0
 - data/vendor/local/lib/groonga/scripts/ruby/operator.rb +22 -0
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info.rb +7 -2
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +7 -11
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +137 -34
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info_search_index.rb +9 -0
 - data/vendor/local/lib/libgroonga.a +0 -0
 - data/vendor/local/lib/libgroonga.dll.a +0 -0
 - data/vendor/local/lib/libgroonga.la +2 -2
 - data/vendor/local/lib/liblz4.a +0 -0
 - data/vendor/local/lib/liblz4.dll +0 -0
 - data/vendor/local/lib/liblz4.dll.1 +0 -0
 - data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
 - data/vendor/local/lib/libmecab.a +0 -0
 - data/vendor/local/lib/libmecab.dll.a +0 -0
 - data/vendor/local/lib/libmecab.la +2 -2
 - data/vendor/local/lib/libmsgpack.a +0 -0
 - data/vendor/local/lib/libmsgpack.dll.a +0 -0
 - data/vendor/local/lib/libmsgpack.la +2 -2
 - data/vendor/local/lib/libmsgpackc.a +0 -0
 - data/vendor/local/lib/libmsgpackc.dll.a +0 -0
 - data/vendor/local/lib/libmsgpackc.la +2 -2
 - data/vendor/local/lib/libonig.a +0 -0
 - data/vendor/local/lib/libonig.dll.a +0 -0
 - data/vendor/local/lib/libonig.la +2 -2
 - data/vendor/local/lib/libz.a +0 -0
 - data/vendor/local/lib/libz.dll.a +0 -0
 - data/vendor/local/lib/pkgconfig/groonga.pc +3 -3
 - data/vendor/local/lib/pkgconfig/liblz4.pc +5 -5
 - data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
 - data/vendor/local/lib/pkgconfig/oniguruma.pc +6 -6
 - data/vendor/local/lib/pkgconfig/zlib.pc +3 -3
 - data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
 - data/vendor/local/sbin/groonga-httpd-restart +1 -1
 - data/vendor/local/sbin/groonga-httpd.exe +0 -0
 - data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_images/used-when-indexing.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_images/used-when-searching.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/characteristic.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +32 -17
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
 - data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +194 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/1.0.x.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/3.x.txt +2 -2
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/4.x.txt +2 -2
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +2 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api.txt +3 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +42 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/overview.txt +54 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_create.txt +2 -2
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/delete.txt +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/load.txt +5 -5
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +173 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +112 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +7 -6
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +64 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +63 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +11 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +3 -2
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +17 -17
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/suggest.txt +12 -12
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +7 -7
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +6 -6
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga.txt +47 -26
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/between.txt +5 -5
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_distance.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +6 -6
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/html_untag.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/in_values.txt +54 -2
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/sub_filter.txt +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +2 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/normalizers.txt +4 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations.txt +2 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +403 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorer.txt +217 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/scoring_note.txt +13 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/tables.txt +8 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +530 -16
 - data/vendor/local/share/doc/groonga/en/html/_sources/server.txt +2 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/server/memcached.txt +15 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/spec/gqtp.txt +66 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/network.txt +0 -81
 - data/vendor/local/share/doc/groonga/en/html/_static/basic.css +6 -68
 - data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +1 -26
 - data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +9404 -4
 - data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +2 -2
 - data/vendor/local/share/doc/groonga/en/html/_static/underscore.js +1415 -31
 - data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +15 -15
 - data/vendor/local/share/doc/groonga/en/html/characteristic.html +18 -20
 - data/vendor/local/share/doc/groonga/en/html/client.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/community.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution.html +18 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/development.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +51 -38
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +24 -26
 - data/vendor/local/share/doc/groonga/en/html/contribution/report.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/development.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +24 -26
 - data/vendor/local/share/doc/groonga/en/html/genindex.html +26 -14
 - data/vendor/local/share/doc/groonga/en/html/index.html +150 -130
 - data/vendor/local/share/doc/groonga/en/html/install.html +32 -34
 - data/vendor/local/share/doc/groonga/en/html/install/centos.html +28 -30
 - data/vendor/local/share/doc/groonga/en/html/install/debian.html +24 -26
 - data/vendor/local/share/doc/groonga/en/html/install/fedora.html +28 -30
 - data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/install/others.html +87 -89
 - data/vendor/local/share/doc/groonga/en/html/install/solaris.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +25 -27
 - data/vendor/local/share/doc/groonga/en/html/install/windows.html +30 -32
 - data/vendor/local/share/doc/groonga/en/html/limitations.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/news.html +256 -27
 - data/vendor/local/share/doc/groonga/en/html/news/0.x.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +19 -21
 - data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +32 -34
 - data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +27 -29
 - data/vendor/local/share/doc/groonga/en/html/news/2.x.html +98 -100
 - data/vendor/local/share/doc/groonga/en/html/news/3.x.html +68 -70
 - data/vendor/local/share/doc/groonga/en/html/news/4.x.html +102 -104
 - data/vendor/local/share/doc/groonga/en/html/news/senna.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
 - data/vendor/local/share/doc/groonga/en/html/reference.html +139 -118
 - data/vendor/local/share/doc/groonga/en/html/reference/api.html +51 -52
 - data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +49 -51
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +60 -62
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +80 -82
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +42 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +37 -39
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +130 -80
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +48 -50
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +44 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +79 -81
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +42 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +44 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +42 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +41 -43
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +41 -43
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +40 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +89 -91
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +44 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +39 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +75 -77
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +64 -66
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +40 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +39 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +202 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +58 -60
 - data/vendor/local/share/doc/groonga/en/html/reference/cast.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/column.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +58 -60
 - data/vendor/local/share/doc/groonga/en/html/reference/command.html +56 -54
 - data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +53 -55
 - data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +94 -96
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +39 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +43 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +23 -25
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +39 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +71 -73
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +24 -26
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +44 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +37 -39
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +24 -26
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +35 -37
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +43 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +49 -47
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +23 -25
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +24 -26
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +26 -28
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +314 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +252 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +87 -89
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +46 -48
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +195 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +193 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +38 -40
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +37 -39
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +61 -51
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +54 -56
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +54 -56
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +590 -592
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +37 -39
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +40 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +92 -94
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +152 -154
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +49 -51
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +39 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +68 -70
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +103 -105
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +45 -47
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +52 -54
 - data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -21
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +35 -37
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +21 -23
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +26 -28
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +73 -75
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +42 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +34 -36
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +124 -90
 - data/vendor/local/share/doc/groonga/en/html/reference/function.html +42 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +66 -68
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +44 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +113 -115
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +55 -57
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +45 -47
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +81 -83
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +65 -67
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +54 -56
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +135 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +81 -83
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +43 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +67 -69
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +66 -70
 - data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +39 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +349 -286
 - data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +483 -417
 - data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +35 -37
 - data/vendor/local/share/doc/groonga/en/html/reference/log.html +38 -40
 - data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +44 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/operations.html +30 -31
 - data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +32 -34
 - data/vendor/local/share/doc/groonga/en/html/reference/output.html +32 -34
 - data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +60 -62
 - data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +931 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +442 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +153 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +287 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/scoring_note.html +114 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +45 -47
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +51 -53
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +40 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +38 -40
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +40 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/tables.html +52 -54
 - data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +36 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +1394 -34
 - data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +57 -59
 - data/vendor/local/share/doc/groonga/en/html/reference/types.html +38 -40
 - data/vendor/local/share/doc/groonga/en/html/search.html +11 -11
 - data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
 - data/vendor/local/share/doc/groonga/en/html/server.html +23 -24
 - data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +28 -30
 - data/vendor/local/share/doc/groonga/en/html/server/http.html +42 -44
 - data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +68 -70
 - data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +30 -32
 - data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +29 -31
 - data/vendor/local/share/doc/groonga/en/html/server/memcached.html +137 -0
 - data/vendor/local/share/doc/groonga/en/html/server/package.html +36 -38
 - data/vendor/local/share/doc/groonga/en/html/spec.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +208 -129
 - data/vendor/local/share/doc/groonga/en/html/spec/search.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
 - data/vendor/local/share/doc/groonga/en/html/tutorial.html +21 -25
 - data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +31 -33
 - data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +20 -22
 - data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +21 -23
 - data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +50 -52
 - data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +27 -125
 - data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +18 -20
 - data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +20 -22
 - data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +33 -35
 - data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_images/used-when-indexing.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_images/used-when-searching.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/characteristic.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +32 -17
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +194 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.0.x.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/3.x.txt +2 -2
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/4.x.txt +2 -2
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +2 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api.txt +3 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +42 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/overview.txt +54 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_create.txt +2 -2
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/delete.txt +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/load.txt +5 -5
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +173 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +112 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +7 -6
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +64 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +63 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +11 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +3 -2
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +17 -17
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/suggest.txt +12 -12
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +7 -7
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +6 -6
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga.txt +47 -26
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/between.txt +5 -5
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_distance.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +6 -6
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/html_untag.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/in_values.txt +54 -2
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/sub_filter.txt +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +2 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/normalizers.txt +4 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations.txt +2 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +403 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorer.txt +217 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scoring_note.txt +13 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tables.txt +8 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +530 -16
 - data/vendor/local/share/doc/groonga/ja/html/_sources/server.txt +2 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/server/memcached.txt +15 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/spec/gqtp.txt +66 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/network.txt +0 -81
 - data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +6 -68
 - data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +1 -26
 - data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +9404 -4
 - data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +2 -2
 - data/vendor/local/share/doc/groonga/ja/html/_static/underscore.js +1415 -31
 - data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +15 -15
 - data/vendor/local/share/doc/groonga/ja/html/characteristic.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/client.html +22 -24
 - data/vendor/local/share/doc/groonga/ja/html/community.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution.html +18 -20
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +51 -38
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +24 -26
 - data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/development.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +20 -22
 - data/vendor/local/share/doc/groonga/ja/html/genindex.html +26 -14
 - data/vendor/local/share/doc/groonga/ja/html/index.html +150 -130
 - data/vendor/local/share/doc/groonga/ja/html/install.html +32 -34
 - data/vendor/local/share/doc/groonga/ja/html/install/centos.html +31 -33
 - data/vendor/local/share/doc/groonga/ja/html/install/debian.html +25 -27
 - data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +29 -31
 - data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +22 -24
 - data/vendor/local/share/doc/groonga/ja/html/install/others.html +78 -80
 - data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +21 -23
 - data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +26 -28
 - data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -31
 - data/vendor/local/share/doc/groonga/ja/html/limitations.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/news.html +210 -27
 - data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +19 -21
 - data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +32 -34
 - data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +27 -29
 - data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +91 -93
 - data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +59 -61
 - data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +89 -91
 - data/vendor/local/share/doc/groonga/ja/html/news/senna.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference.html +139 -118
 - data/vendor/local/share/doc/groonga/ja/html/reference/api.html +51 -52
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +49 -51
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +55 -57
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +80 -82
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +37 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +126 -76
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +48 -50
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +44 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +74 -76
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +44 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +41 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +41 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +40 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +89 -91
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +44 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +39 -41
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +75 -77
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +64 -66
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +40 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +39 -41
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +197 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +58 -60
 - data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/column.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +22 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +48 -50
 - data/vendor/local/share/doc/groonga/ja/html/reference/command.html +56 -54
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +43 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +22 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +93 -95
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +35 -37
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +43 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +23 -25
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +39 -41
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +62 -64
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +24 -26
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +37 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +24 -26
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +32 -34
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +22 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +33 -35
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +48 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +23 -25
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +24 -26
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +26 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +314 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +250 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +80 -81
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +46 -48
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +188 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +190 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +37 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +57 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +71 -73
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +53 -55
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +53 -55
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +394 -396
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +37 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +78 -80
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +123 -125
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -51
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +39 -41
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +61 -63
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +89 -91
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +46 -48
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +51 -53
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -21
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +35 -37
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +21 -23
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +26 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +61 -63
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +34 -36
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +126 -90
 - data/vendor/local/share/doc/groonga/ja/html/reference/function.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +63 -65
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +44 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +94 -96
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +55 -57
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +45 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +66 -68
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +55 -57
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +53 -55
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +135 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +40 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +70 -72
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +43 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +53 -55
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +56 -62
 - data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +36 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +229 -171
 - data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +381 -322
 - data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +34 -36
 - data/vendor/local/share/doc/groonga/ja/html/reference/log.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +28 -30
 - data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +32 -34
 - data/vendor/local/share/doc/groonga/ja/html/reference/output.html +28 -30
 - data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +39 -41
 - data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +878 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +442 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +154 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +287 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/scoring_note.html +115 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +45 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +48 -50
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +40 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +40 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +37 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +1300 -34
 - data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +57 -59
 - data/vendor/local/share/doc/groonga/ja/html/reference/types.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/search.html +11 -11
 - data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/server.html +23 -24
 - data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +28 -30
 - data/vendor/local/share/doc/groonga/ja/html/server/http.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +62 -64
 - data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +30 -32
 - data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +29 -31
 - data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +138 -0
 - data/vendor/local/share/doc/groonga/ja/html/server/package.html +35 -37
 - data/vendor/local/share/doc/groonga/ja/html/spec.html +22 -24
 - data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +207 -128
 - data/vendor/local/share/doc/groonga/ja/html/spec/search.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
 - data/vendor/local/share/doc/groonga/ja/html/tutorial.html +21 -25
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +30 -32
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +23 -25
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +47 -49
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +27 -125
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -20
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +20 -22
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +31 -33
 - data/vendor/local/share/license/mruby/README.md +2 -2
 - data/vendor/local/share/man/ja/man1/groonga.1 +6205 -2251
 - data/vendor/local/share/man/man1/groonga.1 +7210 -3029
 - metadata +75 -11
 - data/doc/text/news.textile +0 -1217
 - data/vendor/local/share/doc/groonga/en/html/_static/jquery-1.11.1.js +0 -10308
 - data/vendor/local/share/doc/groonga/en/html/_static/underscore-1.3.1.js +0 -999
 - data/vendor/local/share/doc/groonga/ja/html/_static/jquery-1.11.1.js +0 -10308
 - data/vendor/local/share/doc/groonga/ja/html/_static/underscore-1.3.1.js +0 -999
 
| 
         @@ -0,0 +1,217 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            .. -*- rst -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            .. highlightlang:: none
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 6 
     | 
    
         
            +
            .. database: scorer
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            Scorer
         
     | 
| 
      
 9 
     | 
    
         
            +
            ======
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            Summary
         
     | 
| 
      
 12 
     | 
    
         
            +
            -------
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            Groonga has scorer module that customizes score function. Score
         
     | 
| 
      
 15 
     | 
    
         
            +
            function computes score of matched record. The default scorer function
         
     | 
| 
      
 16 
     | 
    
         
            +
            uses the number of appeared terms. It is also known as TF (term
         
     | 
| 
      
 17 
     | 
    
         
            +
            frequency).
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            TF is a fast score function but it's not suitable for the following
         
     | 
| 
      
 20 
     | 
    
         
            +
            cases:
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
              * Search query contains one or more frequently-appearing words such
         
     | 
| 
      
 23 
     | 
    
         
            +
                as "the" and "a".
         
     | 
| 
      
 24 
     | 
    
         
            +
              * Document contains many same keywords such as "They are keyword,
         
     | 
| 
      
 25 
     | 
    
         
            +
                keyword, keyword ... and keyword". Search engine spammer may use
         
     | 
| 
      
 26 
     | 
    
         
            +
                the technique.
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
            Score function can solve these cases. For example, `TF-IDF
         
     | 
| 
      
 29 
     | 
    
         
            +
            <http://en.wikipedia.org/wiki/Tf%E2%80%93idf>`_ (term
         
     | 
| 
      
 30 
     | 
    
         
            +
            frequency-inverse document frequency) can solve the first case.
         
     | 
| 
      
 31 
     | 
    
         
            +
            `Okapi BM25 <http://en.wikipedia.org/wiki/Okapi_BM25>`_ can solve the
         
     | 
| 
      
 32 
     | 
    
         
            +
            second case. But their are slower than TF.
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
            Groonga provides TF-IDF based scorer as
         
     | 
| 
      
 35 
     | 
    
         
            +
            :doc:`/reference/scorers/scorer_tf_idf` but doesn't provide Okapi BM25
         
     | 
| 
      
 36 
     | 
    
         
            +
            based scorer yet.
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
            .. include:: scoring_note.rst
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
            Usage
         
     | 
| 
      
 41 
     | 
    
         
            +
            -----
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
            This section describes how to use scorer.
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
            Here are a schema definition and sample data to show usage.
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            Sample schema:
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 50 
     | 
    
         
            +
            .. include:: ../example/reference/scorer/usage_setup_schema.log
         
     | 
| 
      
 51 
     | 
    
         
            +
            .. table_create Memos TABLE_HASH_KEY ShortText
         
     | 
| 
      
 52 
     | 
    
         
            +
            .. column_create Memos title COLUMN_SCALAR ShortText
         
     | 
| 
      
 53 
     | 
    
         
            +
            .. column_create Memos content COLUMN_SCALAR Text
         
     | 
| 
      
 54 
     | 
    
         
            +
            ..
         
     | 
| 
      
 55 
     | 
    
         
            +
            .. table_create Terms TABLE_PAT_KEY ShortText \
         
     | 
| 
      
 56 
     | 
    
         
            +
            ..   --default_tokenizer TokenBigram \
         
     | 
| 
      
 57 
     | 
    
         
            +
            ..   --normalizer NormalizerAuto
         
     | 
| 
      
 58 
     | 
    
         
            +
            .. column_create Terms title_index COLUMN_INDEX|WITH_POSITION Memos title
         
     | 
| 
      
 59 
     | 
    
         
            +
            .. column_create Terms content_index COLUMN_INDEX|WITH_POSITION Memos content
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
            Sample data:
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 64 
     | 
    
         
            +
            .. include:: ../example/reference/scorer/usage_setup_data.log
         
     | 
| 
      
 65 
     | 
    
         
            +
            .. load --table Memos
         
     | 
| 
      
 66 
     | 
    
         
            +
            .. [
         
     | 
| 
      
 67 
     | 
    
         
            +
            .. {
         
     | 
| 
      
 68 
     | 
    
         
            +
            ..   "_key": "memo1",
         
     | 
| 
      
 69 
     | 
    
         
            +
            ..   "title": "Groonga is easy",
         
     | 
| 
      
 70 
     | 
    
         
            +
            ..   "content": "Groonga is very easy full text search engine!"
         
     | 
| 
      
 71 
     | 
    
         
            +
            .. },
         
     | 
| 
      
 72 
     | 
    
         
            +
            .. {
         
     | 
| 
      
 73 
     | 
    
         
            +
            ..   "_key": "memo2",
         
     | 
| 
      
 74 
     | 
    
         
            +
            ..   "title": "Mroonga is easy",
         
     | 
| 
      
 75 
     | 
    
         
            +
            ..   "content": "Mroonga is more easier full text search engine!"
         
     | 
| 
      
 76 
     | 
    
         
            +
            .. },
         
     | 
| 
      
 77 
     | 
    
         
            +
            .. {
         
     | 
| 
      
 78 
     | 
    
         
            +
            ..   "_key": "memo3",
         
     | 
| 
      
 79 
     | 
    
         
            +
            ..   "title": "Rroonga is easy",
         
     | 
| 
      
 80 
     | 
    
         
            +
            ..   "content": "Ruby is very helpful."
         
     | 
| 
      
 81 
     | 
    
         
            +
            .. },
         
     | 
| 
      
 82 
     | 
    
         
            +
            .. {
         
     | 
| 
      
 83 
     | 
    
         
            +
            ..   "_key": "memo4",
         
     | 
| 
      
 84 
     | 
    
         
            +
            ..   "title": "Groonga is fast",
         
     | 
| 
      
 85 
     | 
    
         
            +
            ..   "content": "Groonga! Groonga! Groonga! Groonga is very fast!"
         
     | 
| 
      
 86 
     | 
    
         
            +
            .. },
         
     | 
| 
      
 87 
     | 
    
         
            +
            .. {
         
     | 
| 
      
 88 
     | 
    
         
            +
            ..   "_key": "memo5",
         
     | 
| 
      
 89 
     | 
    
         
            +
            ..   "title": "PGroonga is fast",
         
     | 
| 
      
 90 
     | 
    
         
            +
            ..   "content": "PGroonga is very fast!"
         
     | 
| 
      
 91 
     | 
    
         
            +
            .. },
         
     | 
| 
      
 92 
     | 
    
         
            +
            .. {
         
     | 
| 
      
 93 
     | 
    
         
            +
            ..   "_key": "memo6",
         
     | 
| 
      
 94 
     | 
    
         
            +
            ..   "title": "PGroonga is useful",
         
     | 
| 
      
 95 
     | 
    
         
            +
            ..   "content": "SQL is easy because many client libraries exist."
         
     | 
| 
      
 96 
     | 
    
         
            +
            .. },
         
     | 
| 
      
 97 
     | 
    
         
            +
            .. {
         
     | 
| 
      
 98 
     | 
    
         
            +
            ..   "_key": "memo7",
         
     | 
| 
      
 99 
     | 
    
         
            +
            ..   "title": "Mroonga is also useful",
         
     | 
| 
      
 100 
     | 
    
         
            +
            ..   "content": "MySQL has replication feature. Mroonga can use it."
         
     | 
| 
      
 101 
     | 
    
         
            +
            .. }
         
     | 
| 
      
 102 
     | 
    
         
            +
            .. ]
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
      
 104 
     | 
    
         
            +
            You can specify custom score function in :ref:`select-match-columns`.
         
     | 
| 
      
 105 
     | 
    
         
            +
            There are some syntaxes.
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
            For score function that doesn't require any parameter such as
         
     | 
| 
      
 108 
     | 
    
         
            +
            :doc:`/reference/scorers/scorer_tf_idf`::
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
              SCORE_FUNCTION(COLUMN)
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
            You can specify weight::
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
              SCORE_FUNCTION(COLUMN) * WEIGHT
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
            For score function that requires one or more parameters such as
         
     | 
| 
      
 117 
     | 
    
         
            +
            :doc:`/reference/scorers/scorer_tf_at_most`::
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
              SCORE_FUNCTION(COLUMN, ARGUMENT1, ARGUMENT2, ...)
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
            You can specify weight::
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
              SCORE_FUNCTION(COLUMN, ARGUMENT1, ARGUMENT2, ...) * WEIGHT
         
     | 
| 
      
 124 
     | 
    
         
            +
             
     | 
| 
      
 125 
     | 
    
         
            +
            You can use different score function for each match column::
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
              SCORE_FUNCTION1(COLUMN1) ||
         
     | 
| 
      
 128 
     | 
    
         
            +
                SCORE_FUNCTION2(COLUMN2) * WEIGHT ||
         
     | 
| 
      
 129 
     | 
    
         
            +
                SCORE_FUNCTION3(COLUMN3, ARGUMENT1) ||
         
     | 
| 
      
 130 
     | 
    
         
            +
                ...
         
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
      
 132 
     | 
    
         
            +
            Here is a simplest example:
         
     | 
| 
      
 133 
     | 
    
         
            +
             
     | 
| 
      
 134 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 135 
     | 
    
         
            +
            .. include:: ../example/reference/scorer/usage_one_no_argument_no_weight.log
         
     | 
| 
      
 136 
     | 
    
         
            +
            .. select Memos \
         
     | 
| 
      
 137 
     | 
    
         
            +
            ..   --match_columns "scorer_tf_idf(content)" \
         
     | 
| 
      
 138 
     | 
    
         
            +
            ..   --query "Groonga" \
         
     | 
| 
      
 139 
     | 
    
         
            +
            ..   --output_columns "content, _score" \
         
     | 
| 
      
 140 
     | 
    
         
            +
            ..   --sortby "-_score"
         
     | 
| 
      
 141 
     | 
    
         
            +
             
     | 
| 
      
 142 
     | 
    
         
            +
            ``Groonga! Groonga! Groonga! Groonga is very fast!`` contains 4
         
     | 
| 
      
 143 
     | 
    
         
            +
            ``Groonga``. If you use TF based scorer that is the default scorer,
         
     | 
| 
      
 144 
     | 
    
         
            +
            ``_score`` is ``4``. But the actual ``_score`` is ``2``. Because the
         
     | 
| 
      
 145 
     | 
    
         
            +
            ``select`` command uses TF-IDF based scorer ``scorer_tf_idf()``.
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
      
 147 
     | 
    
         
            +
            Here is an example that uses weight:
         
     | 
| 
      
 148 
     | 
    
         
            +
             
     | 
| 
      
 149 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 150 
     | 
    
         
            +
            .. include:: ../example/reference/scorer/usage_one_no_argument_weight.log
         
     | 
| 
      
 151 
     | 
    
         
            +
            .. select Memos \
         
     | 
| 
      
 152 
     | 
    
         
            +
            ..   --match_columns "scorer_tf_idf(content) * 10" \
         
     | 
| 
      
 153 
     | 
    
         
            +
            ..   --query "Groonga" \
         
     | 
| 
      
 154 
     | 
    
         
            +
            ..   --output_columns "content, _score" \
         
     | 
| 
      
 155 
     | 
    
         
            +
            ..   --sortby "-_score"
         
     | 
| 
      
 156 
     | 
    
         
            +
             
     | 
| 
      
 157 
     | 
    
         
            +
            ``Groonga! Groonga! Groonga! Groonga is very fast!`` has ``22`` as
         
     | 
| 
      
 158 
     | 
    
         
            +
            ``_score``. It had ``2`` as ``_score`` in the previous example that
         
     | 
| 
      
 159 
     | 
    
         
            +
            doesn't specify weight.
         
     | 
| 
      
 160 
     | 
    
         
            +
             
     | 
| 
      
 161 
     | 
    
         
            +
            Here is an example that uses scorer that requires one
         
     | 
| 
      
 162 
     | 
    
         
            +
            argument. :doc:`/reference/scorers/scorer_tf_at_most` scorer requires
         
     | 
| 
      
 163 
     | 
    
         
            +
            one argument. You can limit TF score by the scorer.
         
     | 
| 
      
 164 
     | 
    
         
            +
             
     | 
| 
      
 165 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 166 
     | 
    
         
            +
            .. include:: ../example/reference/scorer/usage_one_one_argument_no_weight.log
         
     | 
| 
      
 167 
     | 
    
         
            +
            .. select Memos \
         
     | 
| 
      
 168 
     | 
    
         
            +
            ..   --match_columns "scorer_tf_at_most(content, 2.0)" \
         
     | 
| 
      
 169 
     | 
    
         
            +
            ..   --query "Groonga" \
         
     | 
| 
      
 170 
     | 
    
         
            +
            ..   --output_columns "content, _score" \
         
     | 
| 
      
 171 
     | 
    
         
            +
            ..   --sortby "-_score"
         
     | 
| 
      
 172 
     | 
    
         
            +
             
     | 
| 
      
 173 
     | 
    
         
            +
            ``Groonga! Groonga! Groonga! Groonga is very fast!`` contains 4
         
     | 
| 
      
 174 
     | 
    
         
            +
            ``Groonga``. If you use normal TF based scorer that is the default
         
     | 
| 
      
 175 
     | 
    
         
            +
            scorer, ``_score`` is ``4``. But the actual ``_score`` is ``2``.
         
     | 
| 
      
 176 
     | 
    
         
            +
            Because the scorer used in the ``select`` command limits the maximum
         
     | 
| 
      
 177 
     | 
    
         
            +
            score value to ``2``.
         
     | 
| 
      
 178 
     | 
    
         
            +
             
     | 
| 
      
 179 
     | 
    
         
            +
            Here is an example that uses multiple scorers::
         
     | 
| 
      
 180 
     | 
    
         
            +
             
     | 
| 
      
 181 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 182 
     | 
    
         
            +
            .. include:: ../example/reference/scorer/usage_multiple_scorers.log
         
     | 
| 
      
 183 
     | 
    
         
            +
            .. select Memos \
         
     | 
| 
      
 184 
     | 
    
         
            +
            ..   --match_columns "scorer_tf_idf(title) || scorer_tf_at_most(content, 2.0)" \
         
     | 
| 
      
 185 
     | 
    
         
            +
            ..   --query "Groonga" \
         
     | 
| 
      
 186 
     | 
    
         
            +
            ..   --output_columns "title, content, _score" \
         
     | 
| 
      
 187 
     | 
    
         
            +
            ..   --sortby "-_score"
         
     | 
| 
      
 188 
     | 
    
         
            +
             
     | 
| 
      
 189 
     | 
    
         
            +
            The ``--match_columns`` uses ``scorer_tf_idf(title)`` and
         
     | 
| 
      
 190 
     | 
    
         
            +
            ``scorer_tf_at_most(content, 2.0)``. ``_score`` value is sum of them.
         
     | 
| 
      
 191 
     | 
    
         
            +
             
     | 
| 
      
 192 
     | 
    
         
            +
            You can use the default scorer and custom scorer in the same
         
     | 
| 
      
 193 
     | 
    
         
            +
            ``--match_columns``. You can use the default scorer by just specifying
         
     | 
| 
      
 194 
     | 
    
         
            +
            a match column::
         
     | 
| 
      
 195 
     | 
    
         
            +
             
     | 
| 
      
 196 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 197 
     | 
    
         
            +
            .. include:: ../example/reference/scorer/usage_default_and_custom_scorers.log
         
     | 
| 
      
 198 
     | 
    
         
            +
            .. select Memos \
         
     | 
| 
      
 199 
     | 
    
         
            +
            ..   --match_columns "title || scorer_tf_at_most(content, 2.0)" \
         
     | 
| 
      
 200 
     | 
    
         
            +
            ..   --query "Groonga" \
         
     | 
| 
      
 201 
     | 
    
         
            +
            ..   --output_columns "title, content, _score" \
         
     | 
| 
      
 202 
     | 
    
         
            +
            ..   --sortby "-_score"
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
            The ``--match_columns`` uses the default scorer (TF) for ``title`` and
         
     | 
| 
      
 205 
     | 
    
         
            +
            :doc:`/reference/scorers/scorer_tf_at_most` for
         
     | 
| 
      
 206 
     | 
    
         
            +
            ``content``. ``_score`` value is sum of them.
         
     | 
| 
      
 207 
     | 
    
         
            +
             
     | 
| 
      
 208 
     | 
    
         
            +
            Built-in scorers
         
     | 
| 
      
 209 
     | 
    
         
            +
            ----------------
         
     | 
| 
      
 210 
     | 
    
         
            +
             
     | 
| 
      
 211 
     | 
    
         
            +
            Here are built-in scores:
         
     | 
| 
      
 212 
     | 
    
         
            +
             
     | 
| 
      
 213 
     | 
    
         
            +
            .. toctree::
         
     | 
| 
      
 214 
     | 
    
         
            +
               :maxdepth: 1
         
     | 
| 
      
 215 
     | 
    
         
            +
               :glob:
         
     | 
| 
      
 216 
     | 
    
         
            +
             
     | 
| 
      
 217 
     | 
    
         
            +
               scorers/*
         
     | 
| 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            .. -*- rst -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            .. highlightlang:: none
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 6 
     | 
    
         
            +
            .. database: scorer_tf_at_most
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            ``scorer_tf_at_most``
         
     | 
| 
      
 9 
     | 
    
         
            +
            =====================
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            .. note::
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
               This scorer is an experimental feature.
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            .. versionadded:: 5.0.1
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            Summary
         
     | 
| 
      
 18 
     | 
    
         
            +
            -------
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            ``scorer_tf_at_most`` is a scorer based on TF (term frequency).
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            TODO
         
     | 
| 
         @@ -0,0 +1,110 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            .. -*- rst -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            .. highlightlang:: none
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 6 
     | 
    
         
            +
            .. database: scorer_tf_idf
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            ``scorer_tf_idf``
         
     | 
| 
      
 9 
     | 
    
         
            +
            =================
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            .. note::
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
               This scorer is an experimental feature.
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            .. versionadded:: 5.0.1
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            Summary
         
     | 
| 
      
 18 
     | 
    
         
            +
            -------
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            ``scorer_tf_idf`` is a scorer based of `TF-IDF
         
     | 
| 
      
 21 
     | 
    
         
            +
            <http://en.wikipedia.org/wiki/Tf%E2%80%93idf>`_ (term
         
     | 
| 
      
 22 
     | 
    
         
            +
            frequency-inverse document frequency) score function.
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
            To put it simply, TF (term frequency) divided by DF (document
         
     | 
| 
      
 25 
     | 
    
         
            +
            frequency) is TF-IDF. "TF" means that "the number of occurrences is
         
     | 
| 
      
 26 
     | 
    
         
            +
            more important". "TF divided by DF" means that "the number of
         
     | 
| 
      
 27 
     | 
    
         
            +
            occurrences of important term is more important".
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            The default score function in Groonga is TF (term frequency). It
         
     | 
| 
      
 30 
     | 
    
         
            +
            doesn't care about term importance but is fast.
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            TF-IDF cares about term importance but is slower than TF.
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
            TF-IDF will compute more suitable score rather than TF for many cases.
         
     | 
| 
      
 35 
     | 
    
         
            +
            But it's not perfect.
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
            If document contains many same keywords such as "They are keyword,
         
     | 
| 
      
 38 
     | 
    
         
            +
            keyword, keyword ... and keyword". It increases score by TF and
         
     | 
| 
      
 39 
     | 
    
         
            +
            TF-IDF. Search engine spammer may use the technique. But TF-IDF
         
     | 
| 
      
 40 
     | 
    
         
            +
            doesn't guard from the technique.
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
            `Okapi BM25 <http://en.wikipedia.org/wiki/Okapi_BM25>`_ can solve the
         
     | 
| 
      
 43 
     | 
    
         
            +
            case. But it's more slower than TF-IDF and not implemented yet in
         
     | 
| 
      
 44 
     | 
    
         
            +
            Groonga.
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            .. include:: ../scoring_note.rst
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
            Usage
         
     | 
| 
      
 49 
     | 
    
         
            +
            -----
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
            This section describes how to use this scorer.
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            Here are a schema definition and sample data to show usage.
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
            Sample schema:
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 58 
     | 
    
         
            +
            .. include:: ../example/reference/scorers/scorer_tf_idf/usage_setup_schema.log
         
     | 
| 
      
 59 
     | 
    
         
            +
            .. table_create Logs TABLE_NO_KEY
         
     | 
| 
      
 60 
     | 
    
         
            +
            .. column_create Logs message COLUMN_SCALAR Text
         
     | 
| 
      
 61 
     | 
    
         
            +
            ..
         
     | 
| 
      
 62 
     | 
    
         
            +
            .. table_create Terms TABLE_PAT_KEY ShortText \
         
     | 
| 
      
 63 
     | 
    
         
            +
            ..   --default_tokenizer TokenBigram \
         
     | 
| 
      
 64 
     | 
    
         
            +
            ..   --normalizer NormalizerAuto
         
     | 
| 
      
 65 
     | 
    
         
            +
            .. column_create Terms message_index COLUMN_INDEX|WITH_POSITION Logs message
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
            Sample data:
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 70 
     | 
    
         
            +
            .. include:: ../example/reference/scorers/scorer_tf_idf/usage_setup_data.log
         
     | 
| 
      
 71 
     | 
    
         
            +
            .. load --table Logs
         
     | 
| 
      
 72 
     | 
    
         
            +
            .. [
         
     | 
| 
      
 73 
     | 
    
         
            +
            .. {"message": "Error"},
         
     | 
| 
      
 74 
     | 
    
         
            +
            .. {"message": "Warning"},
         
     | 
| 
      
 75 
     | 
    
         
            +
            .. {"message": "Warning Warning"},
         
     | 
| 
      
 76 
     | 
    
         
            +
            .. {"message": "Warning Warning Warning"},
         
     | 
| 
      
 77 
     | 
    
         
            +
            .. {"message": "Info"},
         
     | 
| 
      
 78 
     | 
    
         
            +
            .. {"message": "Info Info"},
         
     | 
| 
      
 79 
     | 
    
         
            +
            .. {"message": "Info Info Info"},
         
     | 
| 
      
 80 
     | 
    
         
            +
            .. {"message": "Info Info Info Info"},
         
     | 
| 
      
 81 
     | 
    
         
            +
            .. {"message": "Notice"},
         
     | 
| 
      
 82 
     | 
    
         
            +
            .. {"message": "Notice Notice"},
         
     | 
| 
      
 83 
     | 
    
         
            +
            .. {"message": "Notice Notice Notice"},
         
     | 
| 
      
 84 
     | 
    
         
            +
            .. {"message": "Notice Notice Notice Notice"},
         
     | 
| 
      
 85 
     | 
    
         
            +
            .. {"message": "Notice Notice Notice Notice Notice"}
         
     | 
| 
      
 86 
     | 
    
         
            +
            .. ]
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
            You specify ``scorer_tf_idf`` in :ref:`select-match-columns` like the
         
     | 
| 
      
 89 
     | 
    
         
            +
            following:
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
      
 91 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 92 
     | 
    
         
            +
            .. include:: ../example/reference/scorers/scorer_tf_idf/usage_no_weight.log
         
     | 
| 
      
 93 
     | 
    
         
            +
            .. select Logs \
         
     | 
| 
      
 94 
     | 
    
         
            +
            ..   --match_columns "scorer_tf_idf(message)" \
         
     | 
| 
      
 95 
     | 
    
         
            +
            ..   --query "Error OR Info" \
         
     | 
| 
      
 96 
     | 
    
         
            +
            ..   --output_columns "message, _score" \
         
     | 
| 
      
 97 
     | 
    
         
            +
            ..   --sortby "-_score"
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
            Both the score of ``Info Info Info`` and the score of ``Error`` are
         
     | 
| 
      
 100 
     | 
    
         
            +
            ``2`` even ``Info Info Info`` includes three ``Info`` terms. Because
         
     | 
| 
      
 101 
     | 
    
         
            +
            ``Error`` is more important term rather than ``Info``. The number of
         
     | 
| 
      
 102 
     | 
    
         
            +
            documents that include ``Info`` is ``4``. The number of documents that
         
     | 
| 
      
 103 
     | 
    
         
            +
            include ``Error`` is ``1``. Term that is included in less documents
         
     | 
| 
      
 104 
     | 
    
         
            +
            means that the term is more characteristic term. Characteristic term
         
     | 
| 
      
 105 
     | 
    
         
            +
            is important term.
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
            See also
         
     | 
| 
      
 108 
     | 
    
         
            +
            --------
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
            * :doc:`../scorer`
         
     | 
| 
         @@ -0,0 +1,13 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            .. _note:
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
               You don't need to resolve scoring only by score function. Score
         
     | 
| 
      
 4 
     | 
    
         
            +
               function is highly depends on search query. You may be able to use
         
     | 
| 
      
 5 
     | 
    
         
            +
               metadata of matched record.
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
               For example, Google uses `PageRank
         
     | 
| 
      
 8 
     | 
    
         
            +
               <http://en.wikipedia.org/wiki/PageRank>`_ for scoring. You may be
         
     | 
| 
      
 9 
     | 
    
         
            +
               able to use data type ("title" data are important rather than
         
     | 
| 
      
 10 
     | 
    
         
            +
               "memo" data), tag, geolocation and so on.
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
               Please stop to think about only score function for scoring.
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
         @@ -69,6 +69,8 @@ prefix is omitted in the table.) 
     | 
|
| 
       69 
69 
     | 
    
         
             
            | search       |            |              |             |             |
         
     | 
| 
       70 
70 
     | 
    
         
             
            +--------------+------------+--------------+-------------+-------------+
         
     | 
| 
       71 
71 
     | 
    
         | 
| 
      
 72 
     | 
    
         
            +
            .. _table-no-key:
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
       72 
74 
     | 
    
         
             
            ``TABLE_NO_KEY``
         
     | 
| 
       73 
75 
     | 
    
         
             
            ^^^^^^^^^^^^^^^^
         
     | 
| 
       74 
76 
     | 
    
         | 
| 
         @@ -79,6 +81,8 @@ You cannot use ``TABLE_NO_KEY`` for lexicon for fulltext search 
     | 
|
| 
       79 
81 
     | 
    
         
             
            because lexicon stores tokens as key. ``TABLE_NO_KEY`` is useful for
         
     | 
| 
       80 
82 
     | 
    
         
             
            no key records such as log.
         
     | 
| 
       81 
83 
     | 
    
         | 
| 
      
 84 
     | 
    
         
            +
            .. _table-hash-key:
         
     | 
| 
      
 85 
     | 
    
         
            +
             
     | 
| 
       82 
86 
     | 
    
         
             
            ``TABLE_HASH_KEY``
         
     | 
| 
       83 
87 
     | 
    
         
             
            ^^^^^^^^^^^^^^^^^^
         
     | 
| 
       84 
88 
     | 
    
         | 
| 
         @@ -88,6 +92,8 @@ functions such as common prefix search and predictive search. 
     | 
|
| 
       88 
92 
     | 
    
         
             
            ``TABLE_HASH_KEY`` is useful for index for exact search such as tag
         
     | 
| 
       89 
93 
     | 
    
         
             
            search.
         
     | 
| 
       90 
94 
     | 
    
         | 
| 
      
 95 
     | 
    
         
            +
            .. _table-pat-key:
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
       91 
97 
     | 
    
         
             
            ``TABLE_PAT_KEY``
         
     | 
| 
       92 
98 
     | 
    
         
             
            ^^^^^^^^^^^^^^^^^
         
     | 
| 
       93 
99 
     | 
    
         | 
| 
         @@ -96,6 +102,8 @@ search. 
     | 
|
| 
       96 
102 
     | 
    
         
             
            ``TABLE_PAT_KEY`` is useful for lexicon for fulltext search and
         
     | 
| 
       97 
103 
     | 
    
         
             
            index for range search.
         
     | 
| 
       98 
104 
     | 
    
         | 
| 
      
 105 
     | 
    
         
            +
            .. _table-dat-key:
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
       99 
107 
     | 
    
         
             
            ``TABLE_DAT_KEY``
         
     | 
| 
       100 
108 
     | 
    
         
             
            ^^^^^^^^^^^^^^^^^
         
     | 
| 
       101 
109 
     | 
    
         | 
| 
         @@ -2,23 +2,537 @@ 
     | 
|
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            .. highlightlang:: none
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
      
 5 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 6 
     | 
    
         
            +
            .. database: tokenizers
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
       5 
8 
     | 
    
         
             
            Tokenizers
         
     | 
| 
       6 
9 
     | 
    
         
             
            ==========
         
     | 
| 
       7 
10 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
            *  
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
            *  
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
      
 11 
     | 
    
         
            +
            Summary
         
     | 
| 
      
 12 
     | 
    
         
            +
            -------
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            Groonga has tokenizer module that tokenizes text. It is used when
         
     | 
| 
      
 15 
     | 
    
         
            +
            the following cases:
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
              * Indexing text
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                .. figure:: /images/reference/tokenizers/used-when-indexing.png
         
     | 
| 
      
 20 
     | 
    
         
            +
                   :align: center
         
     | 
| 
      
 21 
     | 
    
         
            +
                   :width: 80%
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                   Tokenizer is used when indexing text.
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
              * Searching by query
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                .. figure:: /images/reference/tokenizers/used-when-searching.png
         
     | 
| 
      
 28 
     | 
    
         
            +
                   :align: center
         
     | 
| 
      
 29 
     | 
    
         
            +
                   :width: 80%
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                   Tokenizer is used when searching by query.
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
            Tokenizer is an important module for full-text search. You can change
         
     | 
| 
      
 34 
     | 
    
         
            +
            trade-off between `precision and recall
         
     | 
| 
      
 35 
     | 
    
         
            +
            <http://en.wikipedia.org/wiki/Precision_and_recall>`_ by changing
         
     | 
| 
      
 36 
     | 
    
         
            +
            tokenizer.
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
            Normally, :ref:`token-bigram` is a suitable tokenizer. If you don't
         
     | 
| 
      
 39 
     | 
    
         
            +
            know much about tokenizer, it's recommended that you choose
         
     | 
| 
      
 40 
     | 
    
         
            +
            :ref:`token-bigram`.
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
            You can try a tokenizer by :doc:`/reference/commands/tokenize` and
         
     | 
| 
      
 43 
     | 
    
         
            +
            :doc:`/reference/commands/table_tokenize`. Here is an example to
         
     | 
| 
      
 44 
     | 
    
         
            +
            try :ref:`token-bigram` tokenizer by
         
     | 
| 
      
 45 
     | 
    
         
            +
            :doc:`/reference/commands/tokenize`:
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 48 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/tokenize-example.log
         
     | 
| 
      
 49 
     | 
    
         
            +
            .. tokenize TokenBigram "Hello World"
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
            What is "tokenize"?
         
     | 
| 
      
 52 
     | 
    
         
            +
            -------------------
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
            "tokenize" is the process that extracts zero or more tokens from a
         
     | 
| 
      
 55 
     | 
    
         
            +
            text. There are some "tokenize" methods.
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
            For example, ``Hello World`` is tokenized to the following tokens by
         
     | 
| 
      
 58 
     | 
    
         
            +
            bigram tokenize method:
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
              * ``He``
         
     | 
| 
      
 61 
     | 
    
         
            +
              * ``el``
         
     | 
| 
      
 62 
     | 
    
         
            +
              * ``ll``
         
     | 
| 
      
 63 
     | 
    
         
            +
              * ``lo``
         
     | 
| 
      
 64 
     | 
    
         
            +
              * ``o_`` (``_`` means a white-space)
         
     | 
| 
      
 65 
     | 
    
         
            +
              * ``_W`` (``_`` means a white-space)
         
     | 
| 
      
 66 
     | 
    
         
            +
              * ``Wo``
         
     | 
| 
      
 67 
     | 
    
         
            +
              * ``or``
         
     | 
| 
      
 68 
     | 
    
         
            +
              * ``rl``
         
     | 
| 
      
 69 
     | 
    
         
            +
              * ``ld``
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
            In the above example, 10 tokens are extracted from one text ``Hello
         
     | 
| 
      
 72 
     | 
    
         
            +
            World``.
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
            For example, ``Hello World`` is tokenized to the following tokens by
         
     | 
| 
      
 75 
     | 
    
         
            +
            white-space-separate tokenize method:
         
     | 
| 
      
 76 
     | 
    
         
            +
             
     | 
| 
      
 77 
     | 
    
         
            +
              * ``Hello``
         
     | 
| 
      
 78 
     | 
    
         
            +
              * ``World``
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
            In the above example, 2 tokens are extracted from one text ``Hello
         
     | 
| 
      
 81 
     | 
    
         
            +
            World``.
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
            Token is used as search key. You can find indexed documents only by
         
     | 
| 
      
 84 
     | 
    
         
            +
            tokens that are extracted by used tokenize method. For example, you
         
     | 
| 
      
 85 
     | 
    
         
            +
            can find ``Hello World`` by ``ll`` with bigram tokenize method but you
         
     | 
| 
      
 86 
     | 
    
         
            +
            can't find ``Hello World`` by ``ll`` with white-space-separate tokenize
         
     | 
| 
      
 87 
     | 
    
         
            +
            method. Because white-space-separate tokenize method doesn't extract
         
     | 
| 
      
 88 
     | 
    
         
            +
            ``ll`` token. It just extracts ``Hello`` and ``World`` tokens.
         
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
            In general, tokenize method that generates small tokens increases
         
     | 
| 
      
 91 
     | 
    
         
            +
            recall but decreases precision. Tokenize method that generates large
         
     | 
| 
      
 92 
     | 
    
         
            +
            tokens increases precision but decreases recall.
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
            For example, we can find ``Hello World`` and ``A or B`` by ``or`` with
         
     | 
| 
      
 95 
     | 
    
         
            +
            bigram tokenize method. ``Hello World`` is a noise for people who
         
     | 
| 
      
 96 
     | 
    
         
            +
            wants to search "logical and". It means that precision is
         
     | 
| 
      
 97 
     | 
    
         
            +
            decreased. But recall is increased.
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
            We can find only ``A or B`` by ``or`` with white-space-separate
         
     | 
| 
      
 100 
     | 
    
         
            +
            tokenize method. Because ``World`` is tokenized to one token ``World``
         
     | 
| 
      
 101 
     | 
    
         
            +
            with white-space-separate tokenize method. It means that precision is
         
     | 
| 
      
 102 
     | 
    
         
            +
            increased for people who wants to search "logical and". But recall is
         
     | 
| 
      
 103 
     | 
    
         
            +
            decreased because ``Hello World`` that contains ``or`` isn't found.
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
            Built-in tokenizsers
         
     | 
| 
      
 106 
     | 
    
         
            +
            --------------------
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
            Here is a list of built-in tokenizers:
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
              * ``TokenBigram``
         
     | 
| 
      
 111 
     | 
    
         
            +
              * ``TokenBigramSplitSymbol``
         
     | 
| 
      
 112 
     | 
    
         
            +
              * ``TokenBigramSplitSymbolAlpha``
         
     | 
| 
      
 113 
     | 
    
         
            +
              * ``TokenBigramSplitSymbolAlphaDigit``
         
     | 
| 
      
 114 
     | 
    
         
            +
              * ``TokenBigramIgnoreBlank``
         
     | 
| 
      
 115 
     | 
    
         
            +
              * ``TokenBigramIgnoreBlankSplitSymbol``
         
     | 
| 
      
 116 
     | 
    
         
            +
              * ``TokenBigramIgnoreBlankSplitAlpha``
         
     | 
| 
      
 117 
     | 
    
         
            +
              * ``TokenBigramIgnoreBlankSplitAlphaDigit``
         
     | 
| 
      
 118 
     | 
    
         
            +
              * ``TokenUnigram``
         
     | 
| 
      
 119 
     | 
    
         
            +
              * ``TokenTrigram``
         
     | 
| 
      
 120 
     | 
    
         
            +
              * ``TokenDelimit``
         
     | 
| 
      
 121 
     | 
    
         
            +
              * ``TokenDelimitNull``
         
     | 
| 
      
 122 
     | 
    
         
            +
              * ``TokenMecab``
         
     | 
| 
      
 123 
     | 
    
         
            +
              * ``TokenRegexp``
         
     | 
| 
      
 124 
     | 
    
         
            +
             
     | 
| 
      
 125 
     | 
    
         
            +
            .. _token-bigram:
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
            ``TokenBigram``
         
     | 
| 
      
 128 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^
         
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
      
 130 
     | 
    
         
            +
            ``TokenBigram`` is a bigram based tokenizer. It's recommended to use
         
     | 
| 
      
 131 
     | 
    
         
            +
            this tokenizer for most cases.
         
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
      
 133 
     | 
    
         
            +
            Bigram tokenize method tokenizes a text to two adjacent characters
         
     | 
| 
      
 134 
     | 
    
         
            +
            tokens. For example, ``Hello`` is tokenized to the following tokens:
         
     | 
| 
      
 135 
     | 
    
         
            +
             
     | 
| 
      
 136 
     | 
    
         
            +
              * ``He``
         
     | 
| 
      
 137 
     | 
    
         
            +
              * ``el``
         
     | 
| 
      
 138 
     | 
    
         
            +
              * ``ll``
         
     | 
| 
      
 139 
     | 
    
         
            +
              * ``lo``
         
     | 
| 
      
 140 
     | 
    
         
            +
             
     | 
| 
      
 141 
     | 
    
         
            +
            Bigram tokenize method is good for recall because you can find all
         
     | 
| 
      
 142 
     | 
    
         
            +
            texts by query consists of two or more characters.
         
     | 
| 
      
 143 
     | 
    
         
            +
             
     | 
| 
      
 144 
     | 
    
         
            +
            In general, you can't find all texts by query consists of one
         
     | 
| 
      
 145 
     | 
    
         
            +
            character because one character token doesn't exist. But you can find
         
     | 
| 
      
 146 
     | 
    
         
            +
            all texts by query consists of one character in Groonga. Because
         
     | 
| 
      
 147 
     | 
    
         
            +
            Groonga find tokens that start with query by predictive search. For
         
     | 
| 
      
 148 
     | 
    
         
            +
            example, Groonga can find ``ll`` and ``lo`` tokens by ``l`` query.
         
     | 
| 
      
 149 
     | 
    
         
            +
             
     | 
| 
      
 150 
     | 
    
         
            +
            Bigram tokenize method isn't good for precision because you can find
         
     | 
| 
      
 151 
     | 
    
         
            +
            texts that includes query in word. For example, you can find ``world``
         
     | 
| 
      
 152 
     | 
    
         
            +
            by ``or``. This is more sensitive for ASCII only languages rather than
         
     | 
| 
      
 153 
     | 
    
         
            +
            non-ASCII languages. ``TokenBigram`` has solution for this problem
         
     | 
| 
      
 154 
     | 
    
         
            +
            described in the bellow.
         
     | 
| 
      
 155 
     | 
    
         
            +
             
     | 
| 
      
 156 
     | 
    
         
            +
            ``TokenBigram`` behavior is different when it's worked with any
         
     | 
| 
      
 157 
     | 
    
         
            +
            :doc:`/reference/normalizers`.
         
     | 
| 
      
 158 
     | 
    
         
            +
             
     | 
| 
      
 159 
     | 
    
         
            +
            If no normalizer is used, ``TokenBigram`` uses pure bigram (all tokens
         
     | 
| 
      
 160 
     | 
    
         
            +
            except the last token have two characters) tokenize method:
         
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 163 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-no-normalizer.log
         
     | 
| 
      
 164 
     | 
    
         
            +
            .. tokenize TokenBigram "Hello World"
         
     | 
| 
      
 165 
     | 
    
         
            +
             
     | 
| 
      
 166 
     | 
    
         
            +
            If normalizer is used, ``TokenBigram`` uses white-space-separate like
         
     | 
| 
      
 167 
     | 
    
         
            +
            tokenize method for ASCII characters. ``TokenBigram`` uses bigram
         
     | 
| 
      
 168 
     | 
    
         
            +
            tokenize method for non-ASCII characters.
         
     | 
| 
      
 169 
     | 
    
         
            +
             
     | 
| 
      
 170 
     | 
    
         
            +
            You may be confused with this combined behavior. But it's reasonable
         
     | 
| 
      
 171 
     | 
    
         
            +
            for most use cases such as English text (only ASCII characters) and
         
     | 
| 
      
 172 
     | 
    
         
            +
            Japanese text (ASCII and non-ASCII characters are mixed).
         
     | 
| 
      
 173 
     | 
    
         
            +
             
     | 
| 
      
 174 
     | 
    
         
            +
            Most languages consists of only ASCII characters use white-space for
         
     | 
| 
      
 175 
     | 
    
         
            +
            word separator. White-space-separate tokenize method is suitable for
         
     | 
| 
      
 176 
     | 
    
         
            +
            the case.
         
     | 
| 
      
 177 
     | 
    
         
            +
             
     | 
| 
      
 178 
     | 
    
         
            +
            Languages consists of non-ASCII characters don't use white-space for
         
     | 
| 
      
 179 
     | 
    
         
            +
            word separator. Bigram tokenize method is suitable for the case.
         
     | 
| 
      
 180 
     | 
    
         
            +
             
     | 
| 
      
 181 
     | 
    
         
            +
            Mixed tokenize method is suitable for mixed language case.
         
     | 
| 
      
 182 
     | 
    
         
            +
             
     | 
| 
      
 183 
     | 
    
         
            +
            If you want to use bigram tokenize method for ASCII character, see
         
     | 
| 
      
 184 
     | 
    
         
            +
            ``TokenBigramSplitXXX`` type tokenizers such as
         
     | 
| 
      
 185 
     | 
    
         
            +
            :ref:`token-bigram-split-symbol-alpha`.
         
     | 
| 
      
 186 
     | 
    
         
            +
             
     | 
| 
      
 187 
     | 
    
         
            +
            Let's confirm ``TokenBigram`` behavior by example.
         
     | 
| 
      
 188 
     | 
    
         
            +
             
     | 
| 
      
 189 
     | 
    
         
            +
            ``TokenBigram`` uses one or more white-spaces as token delimiter for
         
     | 
| 
      
 190 
     | 
    
         
            +
            ASCII characters:
         
     | 
| 
      
 191 
     | 
    
         
            +
             
     | 
| 
      
 192 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 193 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-ascii-and-white-space-with-normalizer.log
         
     | 
| 
      
 194 
     | 
    
         
            +
            .. tokenize TokenBigram "Hello World" NormalizerAuto
         
     | 
| 
      
 195 
     | 
    
         
            +
             
     | 
| 
      
 196 
     | 
    
         
            +
            ``TokenBigram`` uses character type change as token delimiter for
         
     | 
| 
      
 197 
     | 
    
         
            +
            ASCII characters. Character type is one of them:
         
     | 
| 
      
 198 
     | 
    
         
            +
             
     | 
| 
      
 199 
     | 
    
         
            +
              * Alphabet
         
     | 
| 
      
 200 
     | 
    
         
            +
              * Digit
         
     | 
| 
      
 201 
     | 
    
         
            +
              * Symbol (such as ``(``, ``)`` and ``!``)
         
     | 
| 
      
 202 
     | 
    
         
            +
              * Hiragana
         
     | 
| 
      
 203 
     | 
    
         
            +
              * Katakana
         
     | 
| 
      
 204 
     | 
    
         
            +
              * Kanji
         
     | 
| 
      
 205 
     | 
    
         
            +
              * Others
         
     | 
| 
      
 206 
     | 
    
         
            +
             
     | 
| 
      
 207 
     | 
    
         
            +
            The following example shows two token delimiters:
         
     | 
| 
      
 208 
     | 
    
         
            +
             
     | 
| 
      
 209 
     | 
    
         
            +
              * at between ``100`` (digits) and ``cents`` (alphabets)
         
     | 
| 
      
 210 
     | 
    
         
            +
              * at between ``cents`` (alphabets) and ``!!!`` (symbols)
         
     | 
| 
      
 211 
     | 
    
         
            +
             
     | 
| 
      
 212 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 213 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-ascii-and-character-type-change-with-normalizer.log
         
     | 
| 
      
 214 
     | 
    
         
            +
            .. tokenize TokenBigram "100cents!!!" NormalizerAuto
         
     | 
| 
      
 215 
     | 
    
         
            +
             
     | 
| 
      
 216 
     | 
    
         
            +
            Here is an example that ``TokenBigram`` uses bigram tokenize method
         
     | 
| 
      
 217 
     | 
    
         
            +
            for non-ASCII characters.
         
     | 
| 
      
 218 
     | 
    
         
            +
             
     | 
| 
      
 219 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 220 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-non-ascii-with-normalizer.log
         
     | 
| 
      
 221 
     | 
    
         
            +
            .. tokenize TokenBigram "日本語の勉強" NormalizerAuto
         
     | 
| 
      
 222 
     | 
    
         
            +
             
     | 
| 
      
 223 
     | 
    
         
            +
            .. _token-bigram-split-symbol:
         
     | 
| 
      
 224 
     | 
    
         
            +
             
     | 
| 
      
 225 
     | 
    
         
            +
            ``TokenBigramSplitSymbol``
         
     | 
| 
      
 226 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^^^^^^^^^^^^
         
     | 
| 
      
 227 
     | 
    
         
            +
             
     | 
| 
      
 228 
     | 
    
         
            +
            ``TokenBigramSplitSymbol`` is similar to :ref:`token-bigram`. The
         
     | 
| 
      
 229 
     | 
    
         
            +
            difference between them is symbol handling. ``TokenBigramSplitSymbol``
         
     | 
| 
      
 230 
     | 
    
         
            +
            tokenizes symbols by bigram tokenize method:
         
     | 
| 
      
 231 
     | 
    
         
            +
             
     | 
| 
      
 232 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 233 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-split-symbol-with-normalizer.log
         
     | 
| 
      
 234 
     | 
    
         
            +
            .. tokenize TokenBigramSplitSymbol "100cents!!!" NormalizerAuto
         
     | 
| 
      
 235 
     | 
    
         
            +
             
     | 
| 
      
 236 
     | 
    
         
            +
            .. _token-bigram-split-symbol-alpha:
         
     | 
| 
      
 237 
     | 
    
         
            +
             
     | 
| 
      
 238 
     | 
    
         
            +
            ``TokenBigramSplitSymbolAlpha``
         
     | 
| 
      
 239 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
         
     | 
| 
      
 240 
     | 
    
         
            +
             
     | 
| 
      
 241 
     | 
    
         
            +
            ``TokenBigramSplitSymbolAlpha`` is similar to :ref:`token-bigram`. The
         
     | 
| 
      
 242 
     | 
    
         
            +
            difference between them is symbol and alphabet
         
     | 
| 
      
 243 
     | 
    
         
            +
            handling. ``TokenBigramSplitSymbolAlpha`` tokenizes symbols and
         
     | 
| 
      
 244 
     | 
    
         
            +
            alphabets by bigram tokenize method:
         
     | 
| 
      
 245 
     | 
    
         
            +
             
     | 
| 
      
 246 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 247 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-split-symbol-alpha-with-normalizer.log
         
     | 
| 
      
 248 
     | 
    
         
            +
            .. tokenize TokenBigramSplitSymbolAlpha "100cents!!!" NormalizerAuto
         
     | 
| 
      
 249 
     | 
    
         
            +
             
     | 
| 
      
 250 
     | 
    
         
            +
            .. _token-bigram-split-symbol-alpha-digit:
         
     | 
| 
      
 251 
     | 
    
         
            +
             
     | 
| 
      
 252 
     | 
    
         
            +
            ``TokenBigramSplitSymbolAlphaDigit``
         
     | 
| 
      
 253 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
         
     | 
| 
      
 254 
     | 
    
         
            +
             
     | 
| 
      
 255 
     | 
    
         
            +
            ``TokenBigramSplitSymbolAlphaDigit`` is similar to
         
     | 
| 
      
 256 
     | 
    
         
            +
            :ref:`token-bigram`. The difference between them is symbol, alphabet
         
     | 
| 
      
 257 
     | 
    
         
            +
            and digit handling. ``TokenBigramSplitSymbolAlphaDigit`` tokenizes
         
     | 
| 
      
 258 
     | 
    
         
            +
            symbols, alphabets and digits by bigram tokenize method. It means that
         
     | 
| 
      
 259 
     | 
    
         
            +
            all characters are tokenized by bigram tokenize method:
         
     | 
| 
      
 260 
     | 
    
         
            +
             
     | 
| 
      
 261 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 262 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-split-symbol-alpha-digit-with-normalizer.log
         
     | 
| 
      
 263 
     | 
    
         
            +
            .. tokenize TokenBigramSplitSymbolAlphaDigit "100cents!!!" NormalizerAuto
         
     | 
| 
      
 264 
     | 
    
         
            +
             
     | 
| 
      
 265 
     | 
    
         
            +
            .. _token-bigram-ignore-blank:
         
     | 
| 
      
 266 
     | 
    
         
            +
             
     | 
| 
      
 267 
     | 
    
         
            +
            ``TokenBigramIgnoreBlank``
         
     | 
| 
      
 268 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^^^^^^^^^^^^
         
     | 
| 
      
 269 
     | 
    
         
            +
             
     | 
| 
      
 270 
     | 
    
         
            +
            ``TokenBigramIgnoreBlank`` is similar to :ref:`token-bigram`. The
         
     | 
| 
      
 271 
     | 
    
         
            +
            difference between them is blank handling. ``TokenBigramIgnoreBlank``
         
     | 
| 
      
 272 
     | 
    
         
            +
            ignores white-spaces in continuous symbols and non-ASCII characters.
         
     | 
| 
      
 273 
     | 
    
         
            +
             
     | 
| 
      
 274 
     | 
    
         
            +
            You can find difference of them by ``日 本 語 ! ! !`` text because it
         
     | 
| 
      
 275 
     | 
    
         
            +
            has symbols and non-ASCII characters.
         
     | 
| 
      
 276 
     | 
    
         
            +
             
     | 
| 
      
 277 
     | 
    
         
            +
            Here is a result by :ref:`token-bigram` :
         
     | 
| 
      
 278 
     | 
    
         
            +
             
     | 
| 
      
 279 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 280 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-with-white-spaces.log
         
     | 
| 
      
 281 
     | 
    
         
            +
            .. tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
      
 282 
     | 
    
         
            +
             
     | 
| 
      
 283 
     | 
    
         
            +
            Here is a result by ``TokenBigramIgnoreBlank``:
         
     | 
| 
      
 284 
     | 
    
         
            +
             
     | 
| 
      
 285 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 286 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-ignore-blank-with-white-spaces.log
         
     | 
| 
      
 287 
     | 
    
         
            +
            .. tokenize TokenBigramIgnoreBlank "日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
      
 288 
     | 
    
         
            +
             
     | 
| 
      
 289 
     | 
    
         
            +
            .. _token-bigram-ignore-blank-split-symbol:
         
     | 
| 
      
 290 
     | 
    
         
            +
             
     | 
| 
      
 291 
     | 
    
         
            +
            ``TokenBigramIgnoreBlankSplitSymbol``
         
     | 
| 
      
 292 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
         
     | 
| 
      
 293 
     | 
    
         
            +
             
     | 
| 
      
 294 
     | 
    
         
            +
            ``TokenBigramIgnoreBlankSplitSymbol`` is similar to
         
     | 
| 
      
 295 
     | 
    
         
            +
            :ref:`token-bigram`. The differences between them are the followings:
         
     | 
| 
      
 296 
     | 
    
         
            +
             
     | 
| 
      
 297 
     | 
    
         
            +
              * Blank handling
         
     | 
| 
      
 298 
     | 
    
         
            +
              * Symbol handling
         
     | 
| 
      
 299 
     | 
    
         
            +
             
     | 
| 
      
 300 
     | 
    
         
            +
            ``TokenBigramIgnoreBlankSplitSymbol`` ignores white-spaces in
         
     | 
| 
      
 301 
     | 
    
         
            +
            continuous symbols and non-ASCII characters.
         
     | 
| 
      
 302 
     | 
    
         
            +
             
     | 
| 
      
 303 
     | 
    
         
            +
            ``TokenBigramIgnoreBlankSplitSymbol`` tokenizes symbols by bigram
         
     | 
| 
      
 304 
     | 
    
         
            +
            tokenize method.
         
     | 
| 
      
 305 
     | 
    
         
            +
             
     | 
| 
      
 306 
     | 
    
         
            +
            You can find difference of them by ``日 本 語 ! ! !`` text because it
         
     | 
| 
      
 307 
     | 
    
         
            +
            has symbols and non-ASCII characters.
         
     | 
| 
      
 308 
     | 
    
         
            +
             
     | 
| 
      
 309 
     | 
    
         
            +
            Here is a result by :ref:`token-bigram` :
         
     | 
| 
      
 310 
     | 
    
         
            +
             
     | 
| 
      
 311 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 312 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-with-white-spaces-and-symbol.log
         
     | 
| 
      
 313 
     | 
    
         
            +
            .. tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
      
 314 
     | 
    
         
            +
             
     | 
| 
      
 315 
     | 
    
         
            +
            Here is a result by ``TokenBigramIgnoreBlankSplitSymbol``:
         
     | 
| 
      
 316 
     | 
    
         
            +
             
     | 
| 
      
 317 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 318 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-ignore-blank-split-symbol-with-white-spaces-and-symbol.log
         
     | 
| 
      
 319 
     | 
    
         
            +
            .. tokenize TokenBigramIgnoreBlankSplitSymbol "日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
      
 320 
     | 
    
         
            +
             
     | 
| 
      
 321 
     | 
    
         
            +
            .. _token-bigram-ignore-blank-split-symbol-alpha:
         
     | 
| 
      
 322 
     | 
    
         
            +
             
     | 
| 
      
 323 
     | 
    
         
            +
            ``TokenBigramIgnoreBlankSplitSymbolAlpha``
         
     | 
| 
      
 324 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
         
     | 
| 
      
 325 
     | 
    
         
            +
             
     | 
| 
      
 326 
     | 
    
         
            +
            ``TokenBigramIgnoreBlankSplitSymbolAlpha`` is similar to
         
     | 
| 
      
 327 
     | 
    
         
            +
            :ref:`token-bigram`. The differences between them are the followings:
         
     | 
| 
      
 328 
     | 
    
         
            +
             
     | 
| 
      
 329 
     | 
    
         
            +
              * Blank handling
         
     | 
| 
      
 330 
     | 
    
         
            +
              * Symbol and alphabet handling
         
     | 
| 
      
 331 
     | 
    
         
            +
             
     | 
| 
      
 332 
     | 
    
         
            +
            ``TokenBigramIgnoreBlankSplitSymbolAlpha`` ignores white-spaces in
         
     | 
| 
      
 333 
     | 
    
         
            +
            continuous symbols and non-ASCII characters.
         
     | 
| 
      
 334 
     | 
    
         
            +
             
     | 
| 
      
 335 
     | 
    
         
            +
            ``TokenBigramIgnoreBlankSplitSymbolAlpha`` tokenizes symbols and
         
     | 
| 
      
 336 
     | 
    
         
            +
            alphabets by bigram tokenize method.
         
     | 
| 
      
 337 
     | 
    
         
            +
             
     | 
| 
      
 338 
     | 
    
         
            +
            You can find difference of them by ``Hello 日 本 語 ! ! !`` text because it
         
     | 
| 
      
 339 
     | 
    
         
            +
            has symbols and non-ASCII characters with white spaces and alphabets.
         
     | 
| 
      
 340 
     | 
    
         
            +
             
     | 
| 
      
 341 
     | 
    
         
            +
            Here is a result by :ref:`token-bigram` :
         
     | 
| 
      
 342 
     | 
    
         
            +
             
     | 
| 
      
 343 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 344 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-with-white-spaces-and-symbol-and-alphabet.log
         
     | 
| 
      
 345 
     | 
    
         
            +
            .. tokenize TokenBigram "Hello 日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
      
 346 
     | 
    
         
            +
             
     | 
| 
      
 347 
     | 
    
         
            +
            Here is a result by ``TokenBigramIgnoreBlankSplitSymbolAlpha``:
         
     | 
| 
      
 348 
     | 
    
         
            +
             
     | 
| 
      
 349 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 350 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-ignore-blank-split-symbol-with-white-spaces-and-symbol-and-alphabet.log
         
     | 
| 
      
 351 
     | 
    
         
            +
            .. tokenize TokenBigramIgnoreBlankSplitSymbolAlpha "Hello 日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
      
 352 
     | 
    
         
            +
             
     | 
| 
      
 353 
     | 
    
         
            +
            .. _token-bigram-ignore-blank-split-symbol-alpha-digit:
         
     | 
| 
      
 354 
     | 
    
         
            +
             
     | 
| 
      
 355 
     | 
    
         
            +
            ``TokenBigramIgnoreBlankSplitSymbolAlphaDigit``
         
     | 
| 
      
 356 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
         
     | 
| 
      
 357 
     | 
    
         
            +
             
     | 
| 
      
 358 
     | 
    
         
            +
            ``TokenBigramIgnoreBlankSplitSymbolAlphaDigit`` is similar to
         
     | 
| 
      
 359 
     | 
    
         
            +
            :ref:`token-bigram`. The differences between them are the followings:
         
     | 
| 
      
 360 
     | 
    
         
            +
             
     | 
| 
      
 361 
     | 
    
         
            +
              * Blank handling
         
     | 
| 
      
 362 
     | 
    
         
            +
              * Symbol, alphabet and digit handling
         
     | 
| 
      
 363 
     | 
    
         
            +
             
     | 
| 
      
 364 
     | 
    
         
            +
            ``TokenBigramIgnoreBlankSplitSymbolAlphaDigit`` ignores white-spaces
         
     | 
| 
      
 365 
     | 
    
         
            +
            in continuous symbols and non-ASCII characters.
         
     | 
| 
      
 366 
     | 
    
         
            +
             
     | 
| 
      
 367 
     | 
    
         
            +
            ``TokenBigramIgnoreBlankSplitSymbolAlphaDigit`` tokenizes symbols,
         
     | 
| 
      
 368 
     | 
    
         
            +
            alphabets and digits by bigram tokenize method. It means that all
         
     | 
| 
      
 369 
     | 
    
         
            +
            characters are tokenized by bigram tokenize method.
         
     | 
| 
      
 370 
     | 
    
         
            +
             
     | 
| 
      
 371 
     | 
    
         
            +
            You can find difference of them by ``Hello 日 本 語 ! ! ! 777`` text
         
     | 
| 
      
 372 
     | 
    
         
            +
            because it has symbols and non-ASCII characters with white spaces,
         
     | 
| 
      
 373 
     | 
    
         
            +
            alphabets and digits.
         
     | 
| 
      
 374 
     | 
    
         
            +
             
     | 
| 
      
 375 
     | 
    
         
            +
            Here is a result by :ref:`token-bigram` :
         
     | 
| 
      
 376 
     | 
    
         
            +
             
     | 
| 
      
 377 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 378 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-with-white-spaces-and-symbol-and-alphabet-and-digit.log
         
     | 
| 
      
 379 
     | 
    
         
            +
            .. tokenize TokenBigram "Hello 日 本 語 ! ! ! 777" NormalizerAuto
         
     | 
| 
      
 380 
     | 
    
         
            +
             
     | 
| 
      
 381 
     | 
    
         
            +
            Here is a result by ``TokenBigramIgnoreBlankSplitSymbolAlphaDigit``:
         
     | 
| 
      
 382 
     | 
    
         
            +
             
     | 
| 
      
 383 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 384 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-bigram-ignore-blank-split-symbol-with-white-spaces-and-symbol-and-alphabet-digit.log
         
     | 
| 
      
 385 
     | 
    
         
            +
            .. tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit "Hello 日 本 語 ! ! ! 777" NormalizerAuto
         
     | 
| 
      
 386 
     | 
    
         
            +
             
     | 
| 
      
 387 
     | 
    
         
            +
            .. _token-unigram:
         
     | 
| 
      
 388 
     | 
    
         
            +
             
     | 
| 
      
 389 
     | 
    
         
            +
            ``TokenUnigram``
         
     | 
| 
      
 390 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^^
         
     | 
| 
      
 391 
     | 
    
         
            +
             
     | 
| 
      
 392 
     | 
    
         
            +
            ``TokenUnigram`` is similar to :ref:`token-bigram`. The differences
         
     | 
| 
      
 393 
     | 
    
         
            +
            between them is token unit. :ref:`token-bigram` uses 2 characters per
         
     | 
| 
      
 394 
     | 
    
         
            +
            token. ``TokenUnigram`` uses 1 character per token.
         
     | 
| 
      
 395 
     | 
    
         
            +
             
     | 
| 
      
 396 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 397 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-unigram.log
         
     | 
| 
      
 398 
     | 
    
         
            +
            .. tokenize TokenUnigram "100cents!!!" NormalizerAuto
         
     | 
| 
      
 399 
     | 
    
         
            +
             
     | 
| 
      
 400 
     | 
    
         
            +
            .. _token-trigram:
         
     | 
| 
      
 401 
     | 
    
         
            +
             
     | 
| 
      
 402 
     | 
    
         
            +
            ``TokenTrigram``
         
     | 
| 
      
 403 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^^
         
     | 
| 
      
 404 
     | 
    
         
            +
             
     | 
| 
      
 405 
     | 
    
         
            +
            ``TokenTrigram`` is similar to :ref:`token-bigram`. The differences
         
     | 
| 
      
 406 
     | 
    
         
            +
            between them is token unit. :ref:`token-bigram` uses 2 characters per
         
     | 
| 
      
 407 
     | 
    
         
            +
            token. ``TokenTrigram`` uses 3 characters per token.
         
     | 
| 
      
 408 
     | 
    
         
            +
             
     | 
| 
      
 409 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 410 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-trigram.log
         
     | 
| 
      
 411 
     | 
    
         
            +
            .. tokenize TokenTrigram "10000cents!!!!!" NormalizerAuto
         
     | 
| 
      
 412 
     | 
    
         
            +
             
     | 
| 
      
 413 
     | 
    
         
            +
            .. _token-delimit:
         
     | 
| 
      
 414 
     | 
    
         
            +
             
     | 
| 
      
 415 
     | 
    
         
            +
            ``TokenDelimit``
         
     | 
| 
      
 416 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^^
         
     | 
| 
      
 417 
     | 
    
         
            +
             
     | 
| 
      
 418 
     | 
    
         
            +
            ``TokenDelimit`` extracts token by splitting one or more space
         
     | 
| 
      
 419 
     | 
    
         
            +
            characters (``U+0020``). For example, ``Hello World`` is tokenized to
         
     | 
| 
      
 420 
     | 
    
         
            +
            ``Hello`` and ``World``.
         
     | 
| 
      
 421 
     | 
    
         
            +
             
     | 
| 
      
 422 
     | 
    
         
            +
            ``TokenDelimit`` is suitable for tag text. You can extract ``groonga``
         
     | 
| 
      
 423 
     | 
    
         
            +
            and ``full-text-search`` and ``http`` as tags from ``groonga
         
     | 
| 
      
 424 
     | 
    
         
            +
            full-text-search http``.
         
     | 
| 
      
 425 
     | 
    
         
            +
             
     | 
| 
      
 426 
     | 
    
         
            +
            Here is an example of ``TokenDelimit``:
         
     | 
| 
      
 427 
     | 
    
         
            +
             
     | 
| 
      
 428 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 429 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-delimit.log
         
     | 
| 
      
 430 
     | 
    
         
            +
            .. tokenize TokenDelimit "Groonga full-text-search HTTP" NormalizerAuto
         
     | 
| 
      
 431 
     | 
    
         
            +
             
     | 
| 
      
 432 
     | 
    
         
            +
            .. _token-delimit-null:
         
     | 
| 
      
 433 
     | 
    
         
            +
             
     | 
| 
      
 434 
     | 
    
         
            +
            ``TokenDelimitNull``
         
     | 
| 
      
 435 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^^^^^^
         
     | 
| 
      
 436 
     | 
    
         
            +
             
     | 
| 
      
 437 
     | 
    
         
            +
            ``TokenDelimitNull`` is similar to :ref:`token-delimit`. The
         
     | 
| 
      
 438 
     | 
    
         
            +
            difference between them is separator character. :ref:`token-delimit`
         
     | 
| 
      
 439 
     | 
    
         
            +
            uses space character (``U+0020``) but ``TokenDelimitNull`` uses NUL
         
     | 
| 
      
 440 
     | 
    
         
            +
            character (``U+0000``).
         
     | 
| 
      
 441 
     | 
    
         
            +
             
     | 
| 
      
 442 
     | 
    
         
            +
            ``TokenDelimitNull`` is also suitable for tag text.
         
     | 
| 
      
 443 
     | 
    
         
            +
             
     | 
| 
      
 444 
     | 
    
         
            +
            Here is an example of ``TokenDelimitNull``:
         
     | 
| 
      
 445 
     | 
    
         
            +
             
     | 
| 
      
 446 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 447 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-delimit-null.log
         
     | 
| 
      
 448 
     | 
    
         
            +
            .. tokenize TokenDelimitNull "Groonga\u0000full-text-search\u0000HTTP" NormalizerAuto
         
     | 
| 
      
 449 
     | 
    
         
            +
             
     | 
| 
      
 450 
     | 
    
         
            +
            .. _token-mecab:
         
     | 
| 
      
 451 
     | 
    
         
            +
             
     | 
| 
      
 452 
     | 
    
         
            +
            ``TokenMecab``
         
     | 
| 
      
 453 
     | 
    
         
            +
            ^^^^^^^^^^^^^^
         
     | 
| 
      
 454 
     | 
    
         
            +
             
     | 
| 
      
 455 
     | 
    
         
            +
            ``TokenMecab`` is a tokenizer based on `MeCab
         
     | 
| 
      
 456 
     | 
    
         
            +
            <http://mecab.sourceforge.net/>`_ part-of-speech and
         
     | 
| 
      
 457 
     | 
    
         
            +
            morphological analyzer.
         
     | 
| 
      
 458 
     | 
    
         
            +
             
     | 
| 
      
 459 
     | 
    
         
            +
            MeCab doesn't depend on Japanese. You can use MeCab for other
         
     | 
| 
      
 460 
     | 
    
         
            +
            languages by creating dictionary for the languages. You can use `NAIST
         
     | 
| 
      
 461 
     | 
    
         
            +
            Japanese Dictionary <http://sourceforge.jp/projects/naist-jdic/>`_
         
     | 
| 
      
 462 
     | 
    
         
            +
            for Japanese.
         
     | 
| 
      
 463 
     | 
    
         
            +
             
     | 
| 
      
 464 
     | 
    
         
            +
            ``TokenMecab`` is good for precision rather than recall. You can find
         
     | 
| 
      
 465 
     | 
    
         
            +
            ``東京都`` and ``京都`` texts by ``京都`` query with
         
     | 
| 
      
 466 
     | 
    
         
            +
            :ref:`token-bigram` but ``東京都`` isn't expected. You can find only
         
     | 
| 
      
 467 
     | 
    
         
            +
            ``京都`` text by ``京都`` query with ``TokenMecab``.
         
     | 
| 
      
 468 
     | 
    
         
            +
             
     | 
| 
      
 469 
     | 
    
         
            +
            If you want to support neologisms, you need to keep updating your
         
     | 
| 
      
 470 
     | 
    
         
            +
            MeCab dictionary. It needs maintain cost. (:ref:`token-bigram` doesn't
         
     | 
| 
      
 471 
     | 
    
         
            +
            require dictionary maintenance because :ref:`token-bigram` doesn't use
         
     | 
| 
      
 472 
     | 
    
         
            +
            dictionary.) `mecab-ipadic-NEologd : Neologism dictionary for MeCab
         
     | 
| 
      
 473 
     | 
    
         
            +
            <https://github.com/neologd/mecab-ipadic-neologd>`_ may help you.
         
     | 
| 
      
 474 
     | 
    
         
            +
             
     | 
| 
      
 475 
     | 
    
         
            +
            Here is an example of ``TokenMeCab``. ``東京都`` is tokenized to ``東京``
         
     | 
| 
      
 476 
     | 
    
         
            +
            and ``都``. They don't include ``京都``:
         
     | 
| 
      
 477 
     | 
    
         
            +
             
     | 
| 
      
 478 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 479 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-mecab.log
         
     | 
| 
      
 480 
     | 
    
         
            +
            .. tokenize TokenMecab "東京都"
         
     | 
| 
      
 481 
     | 
    
         
            +
             
     | 
| 
      
 482 
     | 
    
         
            +
            .. _token-regexp:
         
     | 
| 
      
 483 
     | 
    
         
            +
             
     | 
| 
      
 484 
     | 
    
         
            +
            ``TokenRegexp``
         
     | 
| 
      
 485 
     | 
    
         
            +
            ^^^^^^^^^^^^^^^
         
     | 
| 
      
 486 
     | 
    
         
            +
             
     | 
| 
      
 487 
     | 
    
         
            +
            .. versionadded:: 5.0.1
         
     | 
| 
      
 488 
     | 
    
         
            +
             
     | 
| 
      
 489 
     | 
    
         
            +
            .. caution::
         
     | 
| 
      
 490 
     | 
    
         
            +
             
     | 
| 
      
 491 
     | 
    
         
            +
               This tokenizer is experimental. Specification may be changed.
         
     | 
| 
      
 492 
     | 
    
         
            +
             
     | 
| 
      
 493 
     | 
    
         
            +
            .. caution::
         
     | 
| 
      
 494 
     | 
    
         
            +
             
     | 
| 
      
 495 
     | 
    
         
            +
               This tokenizer can be used only with UTF-8. You can't use this
         
     | 
| 
      
 496 
     | 
    
         
            +
               tokenizer with EUC-JP, Shift_JIS and so on.
         
     | 
| 
      
 497 
     | 
    
         
            +
             
     | 
| 
      
 498 
     | 
    
         
            +
            ``TokenRegexp`` is a tokenizer for supporting regular expression
         
     | 
| 
      
 499 
     | 
    
         
            +
            search by index.
         
     | 
| 
      
 500 
     | 
    
         
            +
             
     | 
| 
      
 501 
     | 
    
         
            +
            In general, regular expression search is evaluated as sequential
         
     | 
| 
      
 502 
     | 
    
         
            +
            search. But the following cases can be evaluated as index search:
         
     | 
| 
      
 503 
     | 
    
         
            +
             
     | 
| 
      
 504 
     | 
    
         
            +
              * Literal only case such as ``hello``
         
     | 
| 
      
 505 
     | 
    
         
            +
              * The beginning of text and literal case such as ``\A/home/alice``
         
     | 
| 
      
 506 
     | 
    
         
            +
              * The end of text and literal case such as ``\.txt\z``
         
     | 
| 
      
 507 
     | 
    
         
            +
             
     | 
| 
      
 508 
     | 
    
         
            +
            In most cases, index search is faster than sequential search.
         
     | 
| 
      
 509 
     | 
    
         
            +
             
     | 
| 
      
 510 
     | 
    
         
            +
            ``TokenRegexp`` is based on bigram tokenize method. ``TokenRegexp``
         
     | 
| 
      
 511 
     | 
    
         
            +
            adds the beginning of text mark (``U+FFEF``) at the begging of text
         
     | 
| 
      
 512 
     | 
    
         
            +
            and the end of text mark (``U+FFF0``) to the end of text when you
         
     | 
| 
      
 513 
     | 
    
         
            +
            index text:
         
     | 
| 
      
 514 
     | 
    
         
            +
             
     | 
| 
      
 515 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 516 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-regexp-add.log
         
     | 
| 
      
 517 
     | 
    
         
            +
            .. tokenize TokenRegexp "/home/alice/test.txt" NormalizerAuto --mode ADD
         
     | 
| 
      
 518 
     | 
    
         
            +
             
     | 
| 
      
 519 
     | 
    
         
            +
            The beginning of text mark is used for the beginning of text search by
         
     | 
| 
      
 520 
     | 
    
         
            +
            ``\A``. If you use ``TokenRegexp`` for tokenizing query,
         
     | 
| 
      
 521 
     | 
    
         
            +
            ``TokenRegexp`` adds the beginning of text mark (``U+FFEF``) as the
         
     | 
| 
      
 522 
     | 
    
         
            +
            first token. The beginning of text mark must be appeared at the first,
         
     | 
| 
      
 523 
     | 
    
         
            +
            you can get results of the beginning of text search.
         
     | 
| 
      
 524 
     | 
    
         
            +
             
     | 
| 
      
 525 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 526 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-regexp-get-beginning-of-text.log
         
     | 
| 
      
 527 
     | 
    
         
            +
            .. tokenize TokenRegexp "\\A/home/alice/" NormalizerAuto --mode GET
         
     | 
| 
      
 528 
     | 
    
         
            +
             
     | 
| 
      
 529 
     | 
    
         
            +
            The end of text mark is used for the end of text search by ``\z``.
         
     | 
| 
      
 530 
     | 
    
         
            +
            If you use ``TokenRegexp`` for tokenizing query, ``TokenRegexp`` adds
         
     | 
| 
      
 531 
     | 
    
         
            +
            the end of text mark (``U+FFF0``) as the last token. The end of text
         
     | 
| 
      
 532 
     | 
    
         
            +
            mark must be appeared at the end, you can get results of the end of
         
     | 
| 
      
 533 
     | 
    
         
            +
            text search.
         
     | 
| 
      
 534 
     | 
    
         
            +
             
     | 
| 
      
 535 
     | 
    
         
            +
            .. groonga-command
         
     | 
| 
      
 536 
     | 
    
         
            +
            .. include:: ../example/reference/tokenizers/token-regexp-get-end-of-text.log
         
     | 
| 
      
 537 
     | 
    
         
            +
            .. tokenize TokenRegexp "\\.txt\\z" NormalizerAuto --mode GET
         
     | 
| 
       24 
538 
     | 
    
         |