rroonga 5.0.0-x86-mingw32 → 5.0.1-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
 - data/.yardopts +1 -0
 - data/Rakefile +1 -16
 - data/example/bookmark.rb +1 -6
 - data/example/index-html.rb +0 -1
 - data/ext/groonga/extconf.rb +4 -7
 - data/ext/groonga/rb-grn-array.c +1 -1
 - data/ext/groonga/rb-grn-column.c +33 -67
 - data/ext/groonga/rb-grn-context.c +5 -5
 - data/ext/groonga/rb-grn-database.c +2 -2
 - data/ext/groonga/rb-grn-double-array-trie.c +4 -2
 - data/ext/groonga/rb-grn-encoding-support.c +7 -1
 - data/ext/groonga/rb-grn-equal-operator.c +85 -0
 - data/ext/groonga/rb-grn-exception.c +17 -0
 - data/ext/groonga/rb-grn-expression.c +85 -43
 - data/ext/groonga/rb-grn-greater-equal-operator.c +88 -0
 - data/ext/groonga/rb-grn-greater-operator.c +85 -0
 - data/ext/groonga/rb-grn-hash.c +1 -1
 - data/ext/groonga/rb-grn-index-column.c +150 -11
 - data/ext/groonga/rb-grn-less-equal-operator.c +88 -0
 - data/ext/groonga/rb-grn-less-operator.c +85 -0
 - data/ext/groonga/rb-grn-logger.c +5 -5
 - data/ext/groonga/rb-grn-match-operator.c +86 -0
 - data/ext/groonga/rb-grn-normalizer.c +8 -1
 - data/ext/groonga/rb-grn-not-equal-operator.c +85 -0
 - data/ext/groonga/rb-grn-object.c +170 -36
 - data/ext/groonga/rb-grn-operator.c +395 -172
 - data/ext/groonga/rb-grn-patricia-trie.c +10 -8
 - data/ext/groonga/rb-grn-plugin.c +51 -3
 - data/ext/groonga/rb-grn-prefix-operator.c +86 -0
 - data/ext/groonga/rb-grn-procedure-type.c +4 -0
 - data/ext/groonga/rb-grn-query-logger.c +4 -4
 - data/ext/groonga/rb-grn-regexp-operator.c +85 -0
 - data/ext/groonga/rb-grn-snippet.c +1 -1
 - data/ext/groonga/rb-grn-table-key-support.c +9 -5
 - data/ext/groonga/rb-grn-table.c +52 -66
 - data/ext/groonga/rb-grn-type.c +1 -1
 - data/ext/groonga/rb-grn-utils.c +22 -3
 - data/ext/groonga/rb-grn.h +31 -4
 - data/ext/groonga/rb-groonga.c +9 -9
 - data/lib/1.9/groonga.so +0 -0
 - data/lib/2.0/groonga.so +0 -0
 - data/lib/2.1/groonga.so +0 -0
 - data/lib/2.2/groonga.so +0 -0
 - data/lib/groonga/context.rb +31 -0
 - data/lib/groonga/expression-builder.rb +14 -1
 - data/lib/groonga/record.rb +10 -8
 - data/lib/groonga/schema.rb +3 -1
 - data/rroonga-build.rb +2 -2
 - data/rroonga.gemspec +3 -3
 - data/test/groonga-test-utils.rb +4 -0
 - data/test/test-column.rb +28 -26
 - data/test/test-exception.rb +1 -0
 - data/test/test-expression-builder.rb +83 -1
 - data/test/test-expression.rb +80 -48
 - data/test/test-index-column.rb +102 -29
 - data/test/test-normalizer.rb +35 -29
 - data/test/test-operator.rb +214 -0
 - data/test/test-plugin.rb +24 -6
 - data/test/test-procedure.rb +29 -0
 - data/test/test-schema-type.rb +14 -0
 - data/test/test-table-select-mecab.rb +1 -4
 - data/test/test-table.rb +7 -0
 - data/test/test-token-regexp.rb +30 -0
 - data/test/test-type.rb +24 -0
 - data/vendor/local/bin/grndb.exe +0 -0
 - data/vendor/local/bin/groonga-benchmark.exe +0 -0
 - data/vendor/local/bin/groonga.exe +0 -0
 - data/vendor/local/bin/libgcc_s_sjlj-1.dll +0 -0
 - data/vendor/local/bin/libgroonga-0.dll +0 -0
 - data/vendor/local/bin/libmecab-1.dll +0 -0
 - data/vendor/local/bin/libmsgpack-3.dll +0 -0
 - data/vendor/local/bin/libmsgpackc-2.dll +0 -0
 - data/vendor/local/bin/libonig-5.dll +0 -0
 - data/vendor/local/bin/libstdc++-6.dll +0 -0
 - data/vendor/local/bin/lz4.exe +0 -0
 - data/vendor/local/bin/lz4c.exe +0 -0
 - data/vendor/local/bin/lz4cat +0 -0
 - data/vendor/local/bin/mecab-config +2 -2
 - data/vendor/local/bin/mecab.exe +0 -0
 - data/vendor/local/bin/onig-config +1 -1
 - data/vendor/local/bin/zlib1.dll +0 -0
 - data/vendor/local/etc/groonga/groonga.conf +1 -1
 - data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +2 -2
 - data/vendor/local/include/groonga/groonga.h +1 -0
 - data/vendor/local/include/groonga/groonga/expr.h +2 -0
 - data/vendor/local/include/groonga/groonga/groonga.h +32 -5
 - data/vendor/local/include/groonga/groonga/ii.h +7 -0
 - data/vendor/local/include/groonga/groonga/obj.h +37 -0
 - data/vendor/local/include/groonga/groonga/scorer.h +95 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +2 -2
 - data/vendor/local/lib/groonga/plugins/ruby/eval.a +0 -0
 - data/vendor/local/lib/groonga/plugins/ruby/eval.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/ruby/eval.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/ruby/eval.la +2 -2
 - data/vendor/local/lib/groonga/plugins/ruby/load.a +0 -0
 - data/vendor/local/lib/groonga/plugins/ruby/load.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/ruby/load.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/ruby/load.la +2 -2
 - data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +6 -3
 - data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +6 -5
 - data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +421 -17
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.la +2 -2
 - data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
 - data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/table/table.la +2 -2
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +2 -2
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +2 -2
 - data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +12 -4
 - data/vendor/local/lib/groonga/scripts/ruby/database.rb +11 -3
 - data/vendor/local/lib/groonga/scripts/ruby/expression.rb +23 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_size_estimator.rb +158 -0
 - data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +39 -0
 - data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +4 -0
 - data/vendor/local/lib/groonga/scripts/ruby/initialize/pre.rb +2 -0
 - data/vendor/local/lib/groonga/scripts/ruby/logger.rb +11 -7
 - data/vendor/local/lib/groonga/scripts/ruby/object.rb +11 -0
 - data/vendor/local/lib/groonga/scripts/ruby/operator.rb +22 -0
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info.rb +7 -2
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +7 -11
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +137 -34
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info_search_index.rb +9 -0
 - data/vendor/local/lib/libgroonga.a +0 -0
 - data/vendor/local/lib/libgroonga.dll.a +0 -0
 - data/vendor/local/lib/libgroonga.la +2 -2
 - data/vendor/local/lib/liblz4.a +0 -0
 - data/vendor/local/lib/liblz4.dll +0 -0
 - data/vendor/local/lib/liblz4.dll.1 +0 -0
 - data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
 - data/vendor/local/lib/libmecab.a +0 -0
 - data/vendor/local/lib/libmecab.dll.a +0 -0
 - data/vendor/local/lib/libmecab.la +2 -2
 - data/vendor/local/lib/libmsgpack.a +0 -0
 - data/vendor/local/lib/libmsgpack.dll.a +0 -0
 - data/vendor/local/lib/libmsgpack.la +2 -2
 - data/vendor/local/lib/libmsgpackc.a +0 -0
 - data/vendor/local/lib/libmsgpackc.dll.a +0 -0
 - data/vendor/local/lib/libmsgpackc.la +2 -2
 - data/vendor/local/lib/libonig.a +0 -0
 - data/vendor/local/lib/libonig.dll.a +0 -0
 - data/vendor/local/lib/libonig.la +2 -2
 - data/vendor/local/lib/libz.a +0 -0
 - data/vendor/local/lib/libz.dll.a +0 -0
 - data/vendor/local/lib/pkgconfig/groonga.pc +3 -3
 - data/vendor/local/lib/pkgconfig/liblz4.pc +5 -5
 - data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
 - data/vendor/local/lib/pkgconfig/oniguruma.pc +6 -6
 - data/vendor/local/lib/pkgconfig/zlib.pc +3 -3
 - data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
 - data/vendor/local/sbin/groonga-httpd-restart +1 -1
 - data/vendor/local/sbin/groonga-httpd.exe +0 -0
 - data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_images/used-when-indexing.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_images/used-when-searching.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/characteristic.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +32 -17
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
 - data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +194 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/1.0.x.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/3.x.txt +2 -2
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/4.x.txt +2 -2
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +2 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api.txt +3 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +42 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/overview.txt +54 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_create.txt +2 -2
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/delete.txt +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/load.txt +5 -5
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +173 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +112 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +7 -6
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +64 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +63 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +11 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +3 -2
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +17 -17
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/suggest.txt +12 -12
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +7 -7
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +6 -6
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga.txt +47 -26
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/between.txt +5 -5
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_distance.txt +3 -3
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +6 -6
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/html_untag.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/in_values.txt +54 -2
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/sub_filter.txt +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +2 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/normalizers.txt +4 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations.txt +2 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +403 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorer.txt +217 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/scoring_note.txt +13 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/tables.txt +8 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +530 -16
 - data/vendor/local/share/doc/groonga/en/html/_sources/server.txt +2 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/server/memcached.txt +15 -0
 - data/vendor/local/share/doc/groonga/en/html/_sources/spec/gqtp.txt +66 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/network.txt +0 -81
 - data/vendor/local/share/doc/groonga/en/html/_static/basic.css +6 -68
 - data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +1 -26
 - data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +9404 -4
 - data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +2 -2
 - data/vendor/local/share/doc/groonga/en/html/_static/underscore.js +1415 -31
 - data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +15 -15
 - data/vendor/local/share/doc/groonga/en/html/characteristic.html +18 -20
 - data/vendor/local/share/doc/groonga/en/html/client.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/community.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution.html +18 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/development.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +51 -38
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +24 -26
 - data/vendor/local/share/doc/groonga/en/html/contribution/report.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/development.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +24 -26
 - data/vendor/local/share/doc/groonga/en/html/genindex.html +26 -14
 - data/vendor/local/share/doc/groonga/en/html/index.html +150 -130
 - data/vendor/local/share/doc/groonga/en/html/install.html +32 -34
 - data/vendor/local/share/doc/groonga/en/html/install/centos.html +28 -30
 - data/vendor/local/share/doc/groonga/en/html/install/debian.html +24 -26
 - data/vendor/local/share/doc/groonga/en/html/install/fedora.html +28 -30
 - data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/install/others.html +87 -89
 - data/vendor/local/share/doc/groonga/en/html/install/solaris.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +25 -27
 - data/vendor/local/share/doc/groonga/en/html/install/windows.html +30 -32
 - data/vendor/local/share/doc/groonga/en/html/limitations.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/news.html +256 -27
 - data/vendor/local/share/doc/groonga/en/html/news/0.x.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +19 -21
 - data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +32 -34
 - data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +27 -29
 - data/vendor/local/share/doc/groonga/en/html/news/2.x.html +98 -100
 - data/vendor/local/share/doc/groonga/en/html/news/3.x.html +68 -70
 - data/vendor/local/share/doc/groonga/en/html/news/4.x.html +102 -104
 - data/vendor/local/share/doc/groonga/en/html/news/senna.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
 - data/vendor/local/share/doc/groonga/en/html/reference.html +139 -118
 - data/vendor/local/share/doc/groonga/en/html/reference/api.html +51 -52
 - data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +49 -51
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +60 -62
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +80 -82
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +42 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +37 -39
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +130 -80
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +48 -50
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +44 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +79 -81
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +42 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +44 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +42 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +41 -43
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +41 -43
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +40 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +89 -91
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +44 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +39 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +75 -77
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +64 -66
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +40 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +39 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +202 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +58 -60
 - data/vendor/local/share/doc/groonga/en/html/reference/cast.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/column.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +58 -60
 - data/vendor/local/share/doc/groonga/en/html/reference/command.html +56 -54
 - data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +53 -55
 - data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +94 -96
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +39 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +43 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +23 -25
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +39 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +71 -73
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +24 -26
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +44 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +37 -39
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +24 -26
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +35 -37
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +43 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +49 -47
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +23 -25
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +24 -26
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +26 -28
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +314 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +252 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +87 -89
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +46 -48
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +195 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +193 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +38 -40
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +37 -39
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +61 -51
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +54 -56
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +54 -56
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +590 -592
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +37 -39
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +40 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +92 -94
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +152 -154
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +49 -51
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +39 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +68 -70
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +103 -105
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +45 -47
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +52 -54
 - data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -21
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +35 -37
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +21 -23
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +26 -28
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +73 -75
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +42 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +34 -36
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +124 -90
 - data/vendor/local/share/doc/groonga/en/html/reference/function.html +42 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +66 -68
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +44 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +113 -115
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +55 -57
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +45 -47
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +81 -83
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +65 -67
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +54 -56
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +135 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +81 -83
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +43 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +67 -69
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +66 -70
 - data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +39 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +349 -286
 - data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +483 -417
 - data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +35 -37
 - data/vendor/local/share/doc/groonga/en/html/reference/log.html +38 -40
 - data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +44 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/operations.html +30 -31
 - data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +32 -34
 - data/vendor/local/share/doc/groonga/en/html/reference/output.html +32 -34
 - data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +60 -62
 - data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +931 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +442 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +153 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +287 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/scoring_note.html +114 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +45 -47
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +51 -53
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +40 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +38 -40
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +40 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/tables.html +52 -54
 - data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +36 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +1394 -34
 - data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +57 -59
 - data/vendor/local/share/doc/groonga/en/html/reference/types.html +38 -40
 - data/vendor/local/share/doc/groonga/en/html/search.html +11 -11
 - data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
 - data/vendor/local/share/doc/groonga/en/html/server.html +23 -24
 - data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +28 -30
 - data/vendor/local/share/doc/groonga/en/html/server/http.html +42 -44
 - data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +68 -70
 - data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +30 -32
 - data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +29 -31
 - data/vendor/local/share/doc/groonga/en/html/server/memcached.html +137 -0
 - data/vendor/local/share/doc/groonga/en/html/server/package.html +36 -38
 - data/vendor/local/share/doc/groonga/en/html/spec.html +22 -24
 - data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +208 -129
 - data/vendor/local/share/doc/groonga/en/html/spec/search.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
 - data/vendor/local/share/doc/groonga/en/html/tutorial.html +21 -25
 - data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +31 -33
 - data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +20 -22
 - data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +17 -19
 - data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +21 -23
 - data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +50 -52
 - data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +27 -125
 - data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +18 -20
 - data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +20 -22
 - data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +33 -35
 - data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_images/used-when-indexing.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_images/used-when-searching.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/characteristic.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +32 -17
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +194 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.0.x.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/3.x.txt +2 -2
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/4.x.txt +2 -2
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +2 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api.txt +3 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +42 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/overview.txt +54 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_create.txt +2 -2
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/delete.txt +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/load.txt +5 -5
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +173 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +112 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +7 -6
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +64 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +63 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +11 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +3 -2
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +17 -17
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/suggest.txt +12 -12
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +7 -7
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +6 -6
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga.txt +47 -26
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/between.txt +5 -5
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_distance.txt +3 -3
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +6 -6
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/html_untag.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/in_values.txt +54 -2
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/sub_filter.txt +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +2 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/normalizers.txt +4 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations.txt +2 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +403 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorer.txt +217 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scoring_note.txt +13 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tables.txt +8 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +530 -16
 - data/vendor/local/share/doc/groonga/ja/html/_sources/server.txt +2 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/server/memcached.txt +15 -0
 - data/vendor/local/share/doc/groonga/ja/html/_sources/spec/gqtp.txt +66 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/network.txt +0 -81
 - data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +6 -68
 - data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +1 -26
 - data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +9404 -4
 - data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +2 -2
 - data/vendor/local/share/doc/groonga/ja/html/_static/underscore.js +1415 -31
 - data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +15 -15
 - data/vendor/local/share/doc/groonga/ja/html/characteristic.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/client.html +22 -24
 - data/vendor/local/share/doc/groonga/ja/html/community.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution.html +18 -20
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +51 -38
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +24 -26
 - data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/development.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +20 -22
 - data/vendor/local/share/doc/groonga/ja/html/genindex.html +26 -14
 - data/vendor/local/share/doc/groonga/ja/html/index.html +150 -130
 - data/vendor/local/share/doc/groonga/ja/html/install.html +32 -34
 - data/vendor/local/share/doc/groonga/ja/html/install/centos.html +31 -33
 - data/vendor/local/share/doc/groonga/ja/html/install/debian.html +25 -27
 - data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +29 -31
 - data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +22 -24
 - data/vendor/local/share/doc/groonga/ja/html/install/others.html +78 -80
 - data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +21 -23
 - data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +26 -28
 - data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -31
 - data/vendor/local/share/doc/groonga/ja/html/limitations.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/news.html +210 -27
 - data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +19 -21
 - data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +32 -34
 - data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +27 -29
 - data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +91 -93
 - data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +59 -61
 - data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +89 -91
 - data/vendor/local/share/doc/groonga/ja/html/news/senna.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference.html +139 -118
 - data/vendor/local/share/doc/groonga/ja/html/reference/api.html +51 -52
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +49 -51
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +55 -57
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +80 -82
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +37 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +126 -76
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +48 -50
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +44 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +74 -76
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +44 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +41 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +41 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +40 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +89 -91
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +44 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +39 -41
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +75 -77
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +64 -66
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +40 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +39 -41
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +197 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +58 -60
 - data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/column.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +22 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +48 -50
 - data/vendor/local/share/doc/groonga/ja/html/reference/command.html +56 -54
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +43 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +22 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +93 -95
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +35 -37
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +43 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +23 -25
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +39 -41
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +62 -64
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +24 -26
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +37 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +24 -26
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +32 -34
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +22 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +33 -35
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +48 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +23 -25
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +24 -26
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +26 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +314 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +250 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +80 -81
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +46 -48
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +188 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +190 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +37 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +57 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +71 -73
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +53 -55
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +53 -55
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +394 -396
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +37 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +78 -80
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +123 -125
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -51
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +39 -41
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +61 -63
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +89 -91
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +46 -48
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +51 -53
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -21
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +35 -37
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +21 -23
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +26 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +61 -63
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +34 -36
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +126 -90
 - data/vendor/local/share/doc/groonga/ja/html/reference/function.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +63 -65
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +44 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +94 -96
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +55 -57
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +45 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +66 -68
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +55 -57
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +53 -55
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +135 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +40 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +70 -72
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +43 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +53 -55
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +56 -62
 - data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +36 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +229 -171
 - data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +381 -322
 - data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +34 -36
 - data/vendor/local/share/doc/groonga/ja/html/reference/log.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +28 -30
 - data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +32 -34
 - data/vendor/local/share/doc/groonga/ja/html/reference/output.html +28 -30
 - data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +39 -41
 - data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +878 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +442 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +154 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +287 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/scoring_note.html +115 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +45 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +48 -50
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +40 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +40 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +37 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +1300 -34
 - data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +57 -59
 - data/vendor/local/share/doc/groonga/ja/html/reference/types.html +38 -40
 - data/vendor/local/share/doc/groonga/ja/html/search.html +11 -11
 - data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/server.html +23 -24
 - data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +28 -30
 - data/vendor/local/share/doc/groonga/ja/html/server/http.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +62 -64
 - data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +30 -32
 - data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +29 -31
 - data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +138 -0
 - data/vendor/local/share/doc/groonga/ja/html/server/package.html +35 -37
 - data/vendor/local/share/doc/groonga/ja/html/spec.html +22 -24
 - data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +207 -128
 - data/vendor/local/share/doc/groonga/ja/html/spec/search.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
 - data/vendor/local/share/doc/groonga/ja/html/tutorial.html +21 -25
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +30 -32
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +17 -19
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +23 -25
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +47 -49
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +27 -125
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -20
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +20 -22
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +31 -33
 - data/vendor/local/share/license/mruby/README.md +2 -2
 - data/vendor/local/share/man/ja/man1/groonga.1 +6205 -2251
 - data/vendor/local/share/man/man1/groonga.1 +7210 -3029
 - metadata +75 -11
 - data/doc/text/news.textile +0 -1217
 - data/vendor/local/share/doc/groonga/en/html/_static/jquery-1.11.1.js +0 -10308
 - data/vendor/local/share/doc/groonga/en/html/_static/underscore-1.3.1.js +0 -999
 - data/vendor/local/share/doc/groonga/ja/html/_static/jquery-1.11.1.js +0 -10308
 - data/vendor/local/share/doc/groonga/ja/html/_static/underscore-1.3.1.js +0 -999
 
| 
         @@ -7,7 +7,7 @@ 
     | 
|
| 
       7 
7 
     | 
    
         
             
              <head>
         
     | 
| 
       8 
8 
     | 
    
         
             
                <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
         
     | 
| 
       9 
9 
     | 
    
         | 
| 
       10 
     | 
    
         
            -
                <title>7.8. Tokenizers — Groonga v5.0. 
     | 
| 
      
 10 
     | 
    
         
            +
                <title>7.8. Tokenizers — Groonga v5.0.1-42-g4d10df1 documentation</title>
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
12 
     | 
    
         
             
                <link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
         
     | 
| 
       13 
13 
     | 
    
         
             
                <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
         
     | 
| 
         @@ -15,7 +15,7 @@ 
     | 
|
| 
       15 
15 
     | 
    
         
             
                <script type="text/javascript">
         
     | 
| 
       16 
16 
     | 
    
         
             
                  var DOCUMENTATION_OPTIONS = {
         
     | 
| 
       17 
17 
     | 
    
         
             
                    URL_ROOT:    '../',
         
     | 
| 
       18 
     | 
    
         
            -
                    VERSION:     '5.0. 
     | 
| 
      
 18 
     | 
    
         
            +
                    VERSION:     '5.0.1-42-g4d10df1',
         
     | 
| 
       19 
19 
     | 
    
         
             
                    COLLAPSE_INDEX: false,
         
     | 
| 
       20 
20 
     | 
    
         
             
                    FILE_SUFFIX: '.html',
         
     | 
| 
       21 
21 
     | 
    
         
             
                    HAS_SOURCE:  true
         
     | 
| 
         @@ -25,12 +25,12 @@ 
     | 
|
| 
       25 
25 
     | 
    
         
             
                <script type="text/javascript" src="../_static/underscore.js"></script>
         
     | 
| 
       26 
26 
     | 
    
         
             
                <script type="text/javascript" src="../_static/doctools.js"></script>
         
     | 
| 
       27 
27 
     | 
    
         
             
                <link rel="shortcut icon" href="../_static/favicon.ico"/>
         
     | 
| 
       28 
     | 
    
         
            -
                <link rel="top" title="Groonga v5.0. 
     | 
| 
      
 28 
     | 
    
         
            +
                <link rel="top" title="Groonga v5.0.1-42-g4d10df1 documentation" href="../index.html" />
         
     | 
| 
       29 
29 
     | 
    
         
             
                <link rel="up" title="7. Reference manual" href="../reference.html" />
         
     | 
| 
       30 
30 
     | 
    
         
             
                <link rel="next" title="7.9. Token filters" href="token_filters.html" />
         
     | 
| 
       31 
31 
     | 
    
         
             
                <link rel="prev" title="7.7. Normalizers" href="normalizers.html" /> 
         
     | 
| 
       32 
32 
     | 
    
         
             
              </head>
         
     | 
| 
       33 
     | 
    
         
            -
              <body 
     | 
| 
      
 33 
     | 
    
         
            +
              <body>
         
     | 
| 
       34 
34 
     | 
    
         
             
            <div class="header">
         
     | 
| 
       35 
35 
     | 
    
         
             
              <h1 class="title">
         
     | 
| 
       36 
36 
     | 
    
         
             
                <a id="top-link" href="../index.html">
         
     | 
| 
         @@ -48,7 +48,7 @@ 
     | 
|
| 
       48 
48 
     | 
    
         
             
            </div>
         
     | 
| 
       49 
49 
     | 
    
         | 
| 
       50 
50 
     | 
    
         | 
| 
       51 
     | 
    
         
            -
                <div class="related" 
     | 
| 
      
 51 
     | 
    
         
            +
                <div class="related">
         
     | 
| 
       52 
52 
     | 
    
         
             
                  <h3>Navigation</h3>
         
     | 
| 
       53 
53 
     | 
    
         
             
                  <ul>
         
     | 
| 
       54 
54 
     | 
    
         
             
                    <li class="right" style="margin-right: 10px">
         
     | 
| 
         @@ -60,7 +60,7 @@ 
     | 
|
| 
       60 
60 
     | 
    
         
             
                    <li class="right" >
         
     | 
| 
       61 
61 
     | 
    
         
             
                      <a href="normalizers.html" title="7.7. Normalizers"
         
     | 
| 
       62 
62 
     | 
    
         
             
                         accesskey="P">previous</a> |</li>
         
     | 
| 
       63 
     | 
    
         
            -
                    <li><a href="../index.html">Groonga v5.0. 
     | 
| 
      
 63 
     | 
    
         
            +
                    <li><a href="../index.html">Groonga v5.0.1-42-g4d10df1 documentation</a> »</li>
         
     | 
| 
       64 
64 
     | 
    
         
             
                      <li><a href="../reference.html" accesskey="U">7. Reference manual</a> »</li> 
         
     | 
| 
       65 
65 
     | 
    
         
             
                  </ul>
         
     | 
| 
       66 
66 
     | 
    
         
             
                </div>  
         
     | 
| 
         @@ -68,48 +68,1408 @@ 
     | 
|
| 
       68 
68 
     | 
    
         
             
                <div class="document">
         
     | 
| 
       69 
69 
     | 
    
         
             
                  <div class="documentwrapper">
         
     | 
| 
       70 
70 
     | 
    
         
             
                    <div class="bodywrapper">
         
     | 
| 
       71 
     | 
    
         
            -
                      <div class="body" 
     | 
| 
      
 71 
     | 
    
         
            +
                      <div class="body">
         
     | 
| 
       72 
72 
     | 
    
         | 
| 
       73 
73 
     | 
    
         
             
              <div class="section" id="tokenizers">
         
     | 
| 
       74 
74 
     | 
    
         
             
            <h1>7.8. Tokenizers<a class="headerlink" href="#tokenizers" title="Permalink to this headline">¶</a></h1>
         
     | 
| 
       75 
     | 
    
         
            -
            < 
     | 
| 
       76 
     | 
    
         
            -
            < 
     | 
| 
       77 
     | 
    
         
            -
            < 
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
            < 
     | 
| 
       80 
     | 
    
         
            -
            < 
     | 
| 
       81 
     | 
    
         
            -
            <li> 
     | 
| 
       82 
     | 
    
         
            -
            < 
     | 
| 
       83 
     | 
    
         
            -
            < 
     | 
| 
       84 
     | 
    
         
            -
            < 
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
            <li> 
     | 
| 
       88 
     | 
    
         
            -
            < 
     | 
| 
       89 
     | 
    
         
            -
            < 
     | 
| 
      
 75 
     | 
    
         
            +
            <div class="section" id="summary">
         
     | 
| 
      
 76 
     | 
    
         
            +
            <h2>7.8.1. Summary<a class="headerlink" href="#summary" title="Permalink to this headline">¶</a></h2>
         
     | 
| 
      
 77 
     | 
    
         
            +
            <p>Groonga has tokenizer module that tokenizes text. It is used when
         
     | 
| 
      
 78 
     | 
    
         
            +
            the following cases:</p>
         
     | 
| 
      
 79 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 80 
     | 
    
         
            +
            <div><ul>
         
     | 
| 
      
 81 
     | 
    
         
            +
            <li><p class="first">Indexing text</p>
         
     | 
| 
      
 82 
     | 
    
         
            +
            <div class="figure align-center">
         
     | 
| 
      
 83 
     | 
    
         
            +
            <a class="reference internal image-reference" href="../_images/used-when-indexing.png"><img alt="../_images/used-when-indexing.png" src="../_images/used-when-indexing.png" style="width: 80%;" /></a>
         
     | 
| 
      
 84 
     | 
    
         
            +
            <p class="caption">Tokenizer is used when indexing text.</p>
         
     | 
| 
      
 85 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 86 
     | 
    
         
            +
            </li>
         
     | 
| 
      
 87 
     | 
    
         
            +
            <li><p class="first">Searching by query</p>
         
     | 
| 
      
 88 
     | 
    
         
            +
            <div class="figure align-center">
         
     | 
| 
      
 89 
     | 
    
         
            +
            <a class="reference internal image-reference" href="../_images/used-when-searching.png"><img alt="../_images/used-when-searching.png" src="../_images/used-when-searching.png" style="width: 80%;" /></a>
         
     | 
| 
      
 90 
     | 
    
         
            +
            <p class="caption">Tokenizer is used when searching by query.</p>
         
     | 
| 
      
 91 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 92 
     | 
    
         
            +
            </li>
         
     | 
| 
      
 93 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 94 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 95 
     | 
    
         
            +
            <p>Tokenizer is an important module for full-text search. You can change
         
     | 
| 
      
 96 
     | 
    
         
            +
            trade-off between <a class="reference external" href="http://en.wikipedia.org/wiki/Precision_and_recall">precision and recall</a> by changing
         
     | 
| 
      
 97 
     | 
    
         
            +
            tokenizer.</p>
         
     | 
| 
      
 98 
     | 
    
         
            +
            <p>Normally, <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> is a suitable tokenizer. If you don't
         
     | 
| 
      
 99 
     | 
    
         
            +
            know much about tokenizer, it's recommended that you choose
         
     | 
| 
      
 100 
     | 
    
         
            +
            <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>.</p>
         
     | 
| 
      
 101 
     | 
    
         
            +
            <p>You can try a tokenizer by <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> and
         
     | 
| 
      
 102 
     | 
    
         
            +
            <a class="reference internal" href="commands/table_tokenize.html"><em>table_tokenize</em></a>. Here is an example to
         
     | 
| 
      
 103 
     | 
    
         
            +
            try <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> tokenizer by
         
     | 
| 
      
 104 
     | 
    
         
            +
            <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a>:</p>
         
     | 
| 
      
 105 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 106 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello World"
         
     | 
| 
      
 107 
     | 
    
         
            +
            # [
         
     | 
| 
      
 108 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 109 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 110 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 111 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 112 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 113 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 114 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 115 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 116 
     | 
    
         
            +
            #       "value": "He"
         
     | 
| 
      
 117 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 118 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 119 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 120 
     | 
    
         
            +
            #       "value": "el"
         
     | 
| 
      
 121 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 122 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 123 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 124 
     | 
    
         
            +
            #       "value": "ll"
         
     | 
| 
      
 125 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 126 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 127 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 128 
     | 
    
         
            +
            #       "value": "lo"
         
     | 
| 
      
 129 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 130 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 131 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 132 
     | 
    
         
            +
            #       "value": "o "
         
     | 
| 
      
 133 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 134 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 135 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 136 
     | 
    
         
            +
            #       "value": " W"
         
     | 
| 
      
 137 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 138 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 139 
     | 
    
         
            +
            #       "position": 6,
         
     | 
| 
      
 140 
     | 
    
         
            +
            #       "value": "Wo"
         
     | 
| 
      
 141 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 142 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 143 
     | 
    
         
            +
            #       "position": 7,
         
     | 
| 
      
 144 
     | 
    
         
            +
            #       "value": "or"
         
     | 
| 
      
 145 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 146 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 147 
     | 
    
         
            +
            #       "position": 8,
         
     | 
| 
      
 148 
     | 
    
         
            +
            #       "value": "rl"
         
     | 
| 
      
 149 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 150 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 151 
     | 
    
         
            +
            #       "position": 9,
         
     | 
| 
      
 152 
     | 
    
         
            +
            #       "value": "ld"
         
     | 
| 
      
 153 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 154 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 155 
     | 
    
         
            +
            #       "position": 10,
         
     | 
| 
      
 156 
     | 
    
         
            +
            #       "value": "d"
         
     | 
| 
      
 157 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 158 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 159 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 160 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 161 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 162 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 163 
     | 
    
         
            +
            <div class="section" id="what-is-tokenize">
         
     | 
| 
      
 164 
     | 
    
         
            +
            <h2>7.8.2. What is "tokenize"?<a class="headerlink" href="#what-is-tokenize" title="Permalink to this headline">¶</a></h2>
         
     | 
| 
      
 165 
     | 
    
         
            +
            <p>"tokenize" is the process that extracts zero or more tokens from a
         
     | 
| 
      
 166 
     | 
    
         
            +
            text. There are some "tokenize" methods.</p>
         
     | 
| 
      
 167 
     | 
    
         
            +
            <p>For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to the following tokens by
         
     | 
| 
      
 168 
     | 
    
         
            +
            bigram tokenize method:</p>
         
     | 
| 
      
 169 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 170 
     | 
    
         
            +
            <div><ul class="simple">
         
     | 
| 
      
 171 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">He</span></tt></li>
         
     | 
| 
      
 172 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">el</span></tt></li>
         
     | 
| 
      
 173 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
         
     | 
| 
      
 174 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
         
     | 
| 
      
 175 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">o_</span></tt> (<tt class="docutils literal"><span class="pre">_</span></tt> means a white-space)</li>
         
     | 
| 
      
 176 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">_W</span></tt> (<tt class="docutils literal"><span class="pre">_</span></tt> means a white-space)</li>
         
     | 
| 
      
 177 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">Wo</span></tt></li>
         
     | 
| 
      
 178 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">or</span></tt></li>
         
     | 
| 
      
 179 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">rl</span></tt></li>
         
     | 
| 
      
 180 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">ld</span></tt></li>
         
     | 
| 
      
 181 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 182 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 183 
     | 
    
         
            +
            <p>In the above example, 10 tokens are extracted from one text <tt class="docutils literal"><span class="pre">Hello</span>
         
     | 
| 
      
 184 
     | 
    
         
            +
            <span class="pre">World</span></tt>.</p>
         
     | 
| 
      
 185 
     | 
    
         
            +
            <p>For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to the following tokens by
         
     | 
| 
      
 186 
     | 
    
         
            +
            white-space-separate tokenize method:</p>
         
     | 
| 
      
 187 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 188 
     | 
    
         
            +
            <div><ul class="simple">
         
     | 
| 
      
 189 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">Hello</span></tt></li>
         
     | 
| 
      
 190 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">World</span></tt></li>
         
     | 
| 
      
 191 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 192 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 193 
     | 
    
         
            +
            <p>In the above example, 2 tokens are extracted from one text <tt class="docutils literal"><span class="pre">Hello</span>
         
     | 
| 
      
 194 
     | 
    
         
            +
            <span class="pre">World</span></tt>.</p>
         
     | 
| 
      
 195 
     | 
    
         
            +
            <p>Token is used as search key. You can find indexed documents only by
         
     | 
| 
      
 196 
     | 
    
         
            +
            tokens that are extracted by used tokenize method. For example, you
         
     | 
| 
      
 197 
     | 
    
         
            +
            can find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> by <tt class="docutils literal"><span class="pre">ll</span></tt> with bigram tokenize method but you
         
     | 
| 
      
 198 
     | 
    
         
            +
            can't find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> by <tt class="docutils literal"><span class="pre">ll</span></tt> with white-space-separate tokenize
         
     | 
| 
      
 199 
     | 
    
         
            +
            method. Because white-space-separate tokenize method doesn't extract
         
     | 
| 
      
 200 
     | 
    
         
            +
            <tt class="docutils literal"><span class="pre">ll</span></tt> token. It just extracts <tt class="docutils literal"><span class="pre">Hello</span></tt> and <tt class="docutils literal"><span class="pre">World</span></tt> tokens.</p>
         
     | 
| 
      
 201 
     | 
    
         
            +
            <p>In general, tokenize method that generates small tokens increases
         
     | 
| 
      
 202 
     | 
    
         
            +
            recall but decreases precision. Tokenize method that generates large
         
     | 
| 
      
 203 
     | 
    
         
            +
            tokens increases precision but decreases recall.</p>
         
     | 
| 
      
 204 
     | 
    
         
            +
            <p>For example, we can find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> and <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> by <tt class="docutils literal"><span class="pre">or</span></tt> with
         
     | 
| 
      
 205 
     | 
    
         
            +
            bigram tokenize method. <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is a noise for people who
         
     | 
| 
      
 206 
     | 
    
         
            +
            wants to search "logical and". It means that precision is
         
     | 
| 
      
 207 
     | 
    
         
            +
            decreased. But recall is increased.</p>
         
     | 
| 
      
 208 
     | 
    
         
            +
            <p>We can find only <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> by <tt class="docutils literal"><span class="pre">or</span></tt> with white-space-separate
         
     | 
| 
      
 209 
     | 
    
         
            +
            tokenize method. Because <tt class="docutils literal"><span class="pre">World</span></tt> is tokenized to one token <tt class="docutils literal"><span class="pre">World</span></tt>
         
     | 
| 
      
 210 
     | 
    
         
            +
            with white-space-separate tokenize method. It means that precision is
         
     | 
| 
      
 211 
     | 
    
         
            +
            increased for people who wants to search "logical and". But recall is
         
     | 
| 
      
 212 
     | 
    
         
            +
            decreased because <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> that contains <tt class="docutils literal"><span class="pre">or</span></tt> isn't found.</p>
         
     | 
| 
      
 213 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 214 
     | 
    
         
            +
            <div class="section" id="built-in-tokenizsers">
         
     | 
| 
      
 215 
     | 
    
         
            +
            <h2>7.8.3. Built-in tokenizsers<a class="headerlink" href="#built-in-tokenizsers" title="Permalink to this headline">¶</a></h2>
         
     | 
| 
      
 216 
     | 
    
         
            +
            <p>Here is a list of built-in tokenizers:</p>
         
     | 
| 
      
 217 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 218 
     | 
    
         
            +
            <div><ul class="simple">
         
     | 
| 
      
 219 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenBigram</span></tt></li>
         
     | 
| 
      
 220 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></li>
         
     | 
| 
      
 221 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></li>
         
     | 
| 
      
 222 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></li>
         
     | 
| 
      
 223 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></li>
         
     | 
| 
      
 224 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></li>
         
     | 
| 
      
 225 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlpha</span></tt></li>
         
     | 
| 
      
 226 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlphaDigit</span></tt></li>
         
     | 
| 
      
 227 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></li>
         
     | 
| 
      
 228 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></li>
         
     | 
| 
      
 229 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></li>
         
     | 
| 
      
 230 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></li>
         
     | 
| 
      
 231 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenMecab</span></tt></li>
         
     | 
| 
      
 232 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></li>
         
     | 
| 
       90 
233 
     | 
    
         
             
            </ul>
         
     | 
| 
      
 234 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 235 
     | 
    
         
            +
            <div class="section" id="tokenbigram">
         
     | 
| 
      
 236 
     | 
    
         
            +
            <span id="token-bigram"></span><h3>7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt><a class="headerlink" href="#tokenbigram" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 237 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> is a bigram based tokenizer. It's recommended to use
         
     | 
| 
      
 238 
     | 
    
         
            +
            this tokenizer for most cases.</p>
         
     | 
| 
      
 239 
     | 
    
         
            +
            <p>Bigram tokenize method tokenizes a text to two adjacent characters
         
     | 
| 
      
 240 
     | 
    
         
            +
            tokens. For example, <tt class="docutils literal"><span class="pre">Hello</span></tt> is tokenized to the following tokens:</p>
         
     | 
| 
      
 241 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 242 
     | 
    
         
            +
            <div><ul class="simple">
         
     | 
| 
      
 243 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">He</span></tt></li>
         
     | 
| 
      
 244 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">el</span></tt></li>
         
     | 
| 
      
 245 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
         
     | 
| 
      
 246 
     | 
    
         
            +
            <li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
         
     | 
| 
      
 247 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 248 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 249 
     | 
    
         
            +
            <p>Bigram tokenize method is good for recall because you can find all
         
     | 
| 
      
 250 
     | 
    
         
            +
            texts by query consists of two or more characters.</p>
         
     | 
| 
      
 251 
     | 
    
         
            +
            <p>In general, you can't find all texts by query consists of one
         
     | 
| 
      
 252 
     | 
    
         
            +
            character because one character token doesn't exist. But you can find
         
     | 
| 
      
 253 
     | 
    
         
            +
            all texts by query consists of one character in Groonga. Because
         
     | 
| 
      
 254 
     | 
    
         
            +
            Groonga find tokens that start with query by predictive search. For
         
     | 
| 
      
 255 
     | 
    
         
            +
            example, Groonga can find <tt class="docutils literal"><span class="pre">ll</span></tt> and <tt class="docutils literal"><span class="pre">lo</span></tt> tokens by <tt class="docutils literal"><span class="pre">l</span></tt> query.</p>
         
     | 
| 
      
 256 
     | 
    
         
            +
            <p>Bigram tokenize method isn't good for precision because you can find
         
     | 
| 
      
 257 
     | 
    
         
            +
            texts that includes query in word. For example, you can find <tt class="docutils literal"><span class="pre">world</span></tt>
         
     | 
| 
      
 258 
     | 
    
         
            +
            by <tt class="docutils literal"><span class="pre">or</span></tt>. This is more sensitive for ASCII only languages rather than
         
     | 
| 
      
 259 
     | 
    
         
            +
            non-ASCII languages. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> has solution for this problem
         
     | 
| 
      
 260 
     | 
    
         
            +
            described in the bellow.</p>
         
     | 
| 
      
 261 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> behavior is different when it's worked with any
         
     | 
| 
      
 262 
     | 
    
         
            +
            <a class="reference internal" href="normalizers.html"><em>Normalizers</em></a>.</p>
         
     | 
| 
      
 263 
     | 
    
         
            +
            <p>If no normalizer is used, <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses pure bigram (all tokens
         
     | 
| 
      
 264 
     | 
    
         
            +
            except the last token have two characters) tokenize method:</p>
         
     | 
| 
      
 265 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 266 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello World"
         
     | 
| 
      
 267 
     | 
    
         
            +
            # [
         
     | 
| 
      
 268 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 269 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 270 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 271 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 272 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 273 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 274 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 275 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 276 
     | 
    
         
            +
            #       "value": "He"
         
     | 
| 
      
 277 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 278 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 279 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 280 
     | 
    
         
            +
            #       "value": "el"
         
     | 
| 
      
 281 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 282 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 283 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 284 
     | 
    
         
            +
            #       "value": "ll"
         
     | 
| 
      
 285 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 286 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 287 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 288 
     | 
    
         
            +
            #       "value": "lo"
         
     | 
| 
      
 289 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 290 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 291 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 292 
     | 
    
         
            +
            #       "value": "o "
         
     | 
| 
      
 293 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 294 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 295 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 296 
     | 
    
         
            +
            #       "value": " W"
         
     | 
| 
      
 297 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 298 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 299 
     | 
    
         
            +
            #       "position": 6,
         
     | 
| 
      
 300 
     | 
    
         
            +
            #       "value": "Wo"
         
     | 
| 
      
 301 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 302 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 303 
     | 
    
         
            +
            #       "position": 7,
         
     | 
| 
      
 304 
     | 
    
         
            +
            #       "value": "or"
         
     | 
| 
      
 305 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 306 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 307 
     | 
    
         
            +
            #       "position": 8,
         
     | 
| 
      
 308 
     | 
    
         
            +
            #       "value": "rl"
         
     | 
| 
      
 309 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 310 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 311 
     | 
    
         
            +
            #       "position": 9,
         
     | 
| 
      
 312 
     | 
    
         
            +
            #       "value": "ld"
         
     | 
| 
      
 313 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 314 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 315 
     | 
    
         
            +
            #       "position": 10,
         
     | 
| 
      
 316 
     | 
    
         
            +
            #       "value": "d"
         
     | 
| 
      
 317 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 318 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 319 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 320 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 321 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 322 
     | 
    
         
            +
            <p>If normalizer is used, <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses white-space-separate like
         
     | 
| 
      
 323 
     | 
    
         
            +
            tokenize method for ASCII characters. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses bigram
         
     | 
| 
      
 324 
     | 
    
         
            +
            tokenize method for non-ASCII characters.</p>
         
     | 
| 
      
 325 
     | 
    
         
            +
            <p>You may be confused with this combined behavior. But it's reasonable
         
     | 
| 
      
 326 
     | 
    
         
            +
            for most use cases such as English text (only ASCII characters) and
         
     | 
| 
      
 327 
     | 
    
         
            +
            Japanese text (ASCII and non-ASCII characters are mixed).</p>
         
     | 
| 
      
 328 
     | 
    
         
            +
            <p>Most languages consists of only ASCII characters use white-space for
         
     | 
| 
      
 329 
     | 
    
         
            +
            word separator. White-space-separate tokenize method is suitable for
         
     | 
| 
      
 330 
     | 
    
         
            +
            the case.</p>
         
     | 
| 
      
 331 
     | 
    
         
            +
            <p>Languages consists of non-ASCII characters don't use white-space for
         
     | 
| 
      
 332 
     | 
    
         
            +
            word separator. Bigram tokenize method is suitable for the case.</p>
         
     | 
| 
      
 333 
     | 
    
         
            +
            <p>Mixed tokenize method is suitable for mixed language case.</p>
         
     | 
| 
      
 334 
     | 
    
         
            +
            <p>If you want to use bigram tokenize method for ASCII character, see
         
     | 
| 
      
 335 
     | 
    
         
            +
            <tt class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></tt> type tokenizers such as
         
     | 
| 
      
 336 
     | 
    
         
            +
            <a class="reference internal" href="#token-bigram-split-symbol-alpha"><em>TokenBigramSplitSymbolAlpha</em></a>.</p>
         
     | 
| 
      
 337 
     | 
    
         
            +
            <p>Let's confirm <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> behavior by example.</p>
         
     | 
| 
      
 338 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses one or more white-spaces as token delimiter for
         
     | 
| 
      
 339 
     | 
    
         
            +
            ASCII characters:</p>
         
     | 
| 
      
 340 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 341 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello World" NormalizerAuto
         
     | 
| 
      
 342 
     | 
    
         
            +
            # [
         
     | 
| 
      
 343 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 344 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 345 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 346 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 347 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 348 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 349 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 350 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 351 
     | 
    
         
            +
            #       "value": "hello"
         
     | 
| 
      
 352 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 353 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 354 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 355 
     | 
    
         
            +
            #       "value": "world"
         
     | 
| 
      
 356 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 357 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 358 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 359 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 360 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 361 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses character type change as token delimiter for
         
     | 
| 
      
 362 
     | 
    
         
            +
            ASCII characters. Character type is one of them:</p>
         
     | 
| 
      
 363 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 364 
     | 
    
         
            +
            <div><ul class="simple">
         
     | 
| 
      
 365 
     | 
    
         
            +
            <li>Alphabet</li>
         
     | 
| 
      
 366 
     | 
    
         
            +
            <li>Digit</li>
         
     | 
| 
      
 367 
     | 
    
         
            +
            <li>Symbol (such as <tt class="docutils literal"><span class="pre">(</span></tt>, <tt class="docutils literal"><span class="pre">)</span></tt> and <tt class="docutils literal"><span class="pre">!</span></tt>)</li>
         
     | 
| 
      
 368 
     | 
    
         
            +
            <li>Hiragana</li>
         
     | 
| 
      
 369 
     | 
    
         
            +
            <li>Katakana</li>
         
     | 
| 
      
 370 
     | 
    
         
            +
            <li>Kanji</li>
         
     | 
| 
      
 371 
     | 
    
         
            +
            <li>Others</li>
         
     | 
| 
      
 372 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 373 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 374 
     | 
    
         
            +
            <p>The following example shows two token delimiters:</p>
         
     | 
| 
      
 375 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 376 
     | 
    
         
            +
            <div><ul class="simple">
         
     | 
| 
      
 377 
     | 
    
         
            +
            <li>at between <tt class="docutils literal"><span class="pre">100</span></tt> (digits) and <tt class="docutils literal"><span class="pre">cents</span></tt> (alphabets)</li>
         
     | 
| 
      
 378 
     | 
    
         
            +
            <li>at between <tt class="docutils literal"><span class="pre">cents</span></tt> (alphabets) and <tt class="docutils literal"><span class="pre">!!!</span></tt> (symbols)</li>
         
     | 
| 
      
 379 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 380 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 381 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 382 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "100cents!!!" NormalizerAuto
         
     | 
| 
      
 383 
     | 
    
         
            +
            # [
         
     | 
| 
      
 384 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 385 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 386 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 387 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 388 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 389 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 390 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 391 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 392 
     | 
    
         
            +
            #       "value": "100"
         
     | 
| 
      
 393 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 394 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 395 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 396 
     | 
    
         
            +
            #       "value": "cents"
         
     | 
| 
      
 397 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 398 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 399 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 400 
     | 
    
         
            +
            #       "value": "!!!"
         
     | 
| 
      
 401 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 402 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 403 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 404 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 405 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 406 
     | 
    
         
            +
            <p>Here is an example that <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses bigram tokenize method
         
     | 
| 
      
 407 
     | 
    
         
            +
            for non-ASCII characters.</p>
         
     | 
| 
      
 408 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 409 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "日本語の勉強" NormalizerAuto
         
     | 
| 
      
 410 
     | 
    
         
            +
            # [
         
     | 
| 
      
 411 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 412 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 413 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 414 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 415 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 416 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 417 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 418 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 419 
     | 
    
         
            +
            #       "value": "日本"
         
     | 
| 
      
 420 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 421 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 422 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 423 
     | 
    
         
            +
            #       "value": "本語"
         
     | 
| 
      
 424 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 425 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 426 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 427 
     | 
    
         
            +
            #       "value": "語の"
         
     | 
| 
      
 428 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 429 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 430 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 431 
     | 
    
         
            +
            #       "value": "の勉"
         
     | 
| 
      
 432 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 433 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 434 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 435 
     | 
    
         
            +
            #       "value": "勉強"
         
     | 
| 
      
 436 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 437 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 438 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 439 
     | 
    
         
            +
            #       "value": "強"
         
     | 
| 
      
 440 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 441 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 442 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 443 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 444 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 445 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 446 
     | 
    
         
            +
            <div class="section" id="tokenbigramsplitsymbol">
         
     | 
| 
      
 447 
     | 
    
         
            +
            <span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramsplitsymbol" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 448 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
         
     | 
| 
      
 449 
     | 
    
         
            +
            difference between them is symbol handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt>
         
     | 
| 
      
 450 
     | 
    
         
            +
            tokenizes symbols by bigram tokenize method:</p>
         
     | 
| 
      
 451 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 452 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbol "100cents!!!" NormalizerAuto
         
     | 
| 
      
 453 
     | 
    
         
            +
            # [
         
     | 
| 
      
 454 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 455 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 456 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 457 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 458 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 459 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 460 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 461 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 462 
     | 
    
         
            +
            #       "value": "100"
         
     | 
| 
      
 463 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 464 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 465 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 466 
     | 
    
         
            +
            #       "value": "cents"
         
     | 
| 
      
 467 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 468 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 469 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 470 
     | 
    
         
            +
            #       "value": "!!"
         
     | 
| 
      
 471 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 472 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 473 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 474 
     | 
    
         
            +
            #       "value": "!!"
         
     | 
| 
      
 475 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 476 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 477 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 478 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 479 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 480 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 481 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 482 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 483 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 484 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 485 
     | 
    
         
            +
            <div class="section" id="tokenbigramsplitsymbolalpha">
         
     | 
| 
      
 486 
     | 
    
         
            +
            <span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 487 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
         
     | 
| 
      
 488 
     | 
    
         
            +
            difference between them is symbol and alphabet
         
     | 
| 
      
 489 
     | 
    
         
            +
            handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> tokenizes symbols and
         
     | 
| 
      
 490 
     | 
    
         
            +
            alphabets by bigram tokenize method:</p>
         
     | 
| 
      
 491 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 492 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlpha "100cents!!!" NormalizerAuto
         
     | 
| 
      
 493 
     | 
    
         
            +
            # [
         
     | 
| 
      
 494 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 495 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 496 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 497 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 498 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 499 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 500 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 501 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 502 
     | 
    
         
            +
            #       "value": "100"
         
     | 
| 
      
 503 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 504 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 505 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 506 
     | 
    
         
            +
            #       "value": "ce"
         
     | 
| 
      
 507 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 508 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 509 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 510 
     | 
    
         
            +
            #       "value": "en"
         
     | 
| 
      
 511 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 512 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 513 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 514 
     | 
    
         
            +
            #       "value": "nt"
         
     | 
| 
      
 515 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 516 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 517 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 518 
     | 
    
         
            +
            #       "value": "ts"
         
     | 
| 
      
 519 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 520 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 521 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 522 
     | 
    
         
            +
            #       "value": "s!"
         
     | 
| 
      
 523 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 524 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 525 
     | 
    
         
            +
            #       "position": 6,
         
     | 
| 
      
 526 
     | 
    
         
            +
            #       "value": "!!"
         
     | 
| 
      
 527 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 528 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 529 
     | 
    
         
            +
            #       "position": 7,
         
     | 
| 
      
 530 
     | 
    
         
            +
            #       "value": "!!"
         
     | 
| 
      
 531 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 532 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 533 
     | 
    
         
            +
            #       "position": 8,
         
     | 
| 
      
 534 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 535 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 536 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 537 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 538 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 539 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 540 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 541 
     | 
    
         
            +
            <div class="section" id="tokenbigramsplitsymbolalphadigit">
         
     | 
| 
      
 542 
     | 
    
         
            +
            <span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 543 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> is similar to
         
     | 
| 
      
 544 
     | 
    
         
            +
            <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The difference between them is symbol, alphabet
         
     | 
| 
      
 545 
     | 
    
         
            +
            and digit handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> tokenizes
         
     | 
| 
      
 546 
     | 
    
         
            +
            symbols, alphabets and digits by bigram tokenize method. It means that
         
     | 
| 
      
 547 
     | 
    
         
            +
            all characters are tokenized by bigram tokenize method:</p>
         
     | 
| 
      
 548 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 549 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlphaDigit "100cents!!!" NormalizerAuto
         
     | 
| 
      
 550 
     | 
    
         
            +
            # [
         
     | 
| 
      
 551 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 552 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 553 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 554 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 555 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 556 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 557 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 558 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 559 
     | 
    
         
            +
            #       "value": "10"
         
     | 
| 
      
 560 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 561 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 562 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 563 
     | 
    
         
            +
            #       "value": "00"
         
     | 
| 
      
 564 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 565 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 566 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 567 
     | 
    
         
            +
            #       "value": "0c"
         
     | 
| 
      
 568 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 569 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 570 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 571 
     | 
    
         
            +
            #       "value": "ce"
         
     | 
| 
      
 572 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 573 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 574 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 575 
     | 
    
         
            +
            #       "value": "en"
         
     | 
| 
      
 576 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 577 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 578 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 579 
     | 
    
         
            +
            #       "value": "nt"
         
     | 
| 
      
 580 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 581 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 582 
     | 
    
         
            +
            #       "position": 6,
         
     | 
| 
      
 583 
     | 
    
         
            +
            #       "value": "ts"
         
     | 
| 
      
 584 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 585 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 586 
     | 
    
         
            +
            #       "position": 7,
         
     | 
| 
      
 587 
     | 
    
         
            +
            #       "value": "s!"
         
     | 
| 
      
 588 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 589 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 590 
     | 
    
         
            +
            #       "position": 8,
         
     | 
| 
      
 591 
     | 
    
         
            +
            #       "value": "!!"
         
     | 
| 
      
 592 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 593 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 594 
     | 
    
         
            +
            #       "position": 9,
         
     | 
| 
      
 595 
     | 
    
         
            +
            #       "value": "!!"
         
     | 
| 
      
 596 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 597 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 598 
     | 
    
         
            +
            #       "position": 10,
         
     | 
| 
      
 599 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 600 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 601 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 602 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 603 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 604 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 605 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 606 
     | 
    
         
            +
            <div class="section" id="tokenbigramignoreblank">
         
     | 
| 
      
 607 
     | 
    
         
            +
            <span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt><a class="headerlink" href="#tokenbigramignoreblank" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 608 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
         
     | 
| 
      
 609 
     | 
    
         
            +
            difference between them is blank handling. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt>
         
     | 
| 
      
 610 
     | 
    
         
            +
            ignores white-spaces in continuous symbols and non-ASCII characters.</p>
         
     | 
| 
      
 611 
     | 
    
         
            +
            <p>You can find difference of them by <tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
         
     | 
| 
      
 612 
     | 
    
         
            +
            has symbols and non-ASCII characters.</p>
         
     | 
| 
      
 613 
     | 
    
         
            +
            <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
         
     | 
| 
      
 614 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 615 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
      
 616 
     | 
    
         
            +
            # [
         
     | 
| 
      
 617 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 618 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 619 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 620 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 621 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 622 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 623 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 624 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 625 
     | 
    
         
            +
            #       "value": "日"
         
     | 
| 
      
 626 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 627 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 628 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 629 
     | 
    
         
            +
            #       "value": "本"
         
     | 
| 
      
 630 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 631 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 632 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 633 
     | 
    
         
            +
            #       "value": "語"
         
     | 
| 
      
 634 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 635 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 636 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 637 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 638 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 639 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 640 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 641 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 642 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 643 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 644 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 645 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 646 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 647 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 648 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 649 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 650 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 651 
     | 
    
         
            +
            <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt>:</p>
         
     | 
| 
      
 652 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 653 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlank "日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
      
 654 
     | 
    
         
            +
            # [
         
     | 
| 
      
 655 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 656 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 657 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 658 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 659 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 660 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 661 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 662 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 663 
     | 
    
         
            +
            #       "value": "日本"
         
     | 
| 
      
 664 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 665 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 666 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 667 
     | 
    
         
            +
            #       "value": "本語"
         
     | 
| 
      
 668 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 669 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 670 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 671 
     | 
    
         
            +
            #       "value": "語"
         
     | 
| 
      
 672 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 673 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 674 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 675 
     | 
    
         
            +
            #       "value": "!!!"
         
     | 
| 
      
 676 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 677 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 678 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 679 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 680 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 681 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 682 
     | 
    
         
            +
            <div class="section" id="tokenbigramignoreblanksplitsymbol">
         
     | 
| 
      
 683 
     | 
    
         
            +
            <span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 684 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> is similar to
         
     | 
| 
      
 685 
     | 
    
         
            +
            <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
         
     | 
| 
      
 686 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 687 
     | 
    
         
            +
            <div><ul class="simple">
         
     | 
| 
      
 688 
     | 
    
         
            +
            <li>Blank handling</li>
         
     | 
| 
      
 689 
     | 
    
         
            +
            <li>Symbol handling</li>
         
     | 
| 
      
 690 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 691 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 692 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> ignores white-spaces in
         
     | 
| 
      
 693 
     | 
    
         
            +
            continuous symbols and non-ASCII characters.</p>
         
     | 
| 
      
 694 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> tokenizes symbols by bigram
         
     | 
| 
      
 695 
     | 
    
         
            +
            tokenize method.</p>
         
     | 
| 
      
 696 
     | 
    
         
            +
            <p>You can find difference of them by <tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
         
     | 
| 
      
 697 
     | 
    
         
            +
            has symbols and non-ASCII characters.</p>
         
     | 
| 
      
 698 
     | 
    
         
            +
            <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
         
     | 
| 
      
 699 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 700 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
      
 701 
     | 
    
         
            +
            # [
         
     | 
| 
      
 702 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 703 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 704 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 705 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 706 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 707 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 708 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 709 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 710 
     | 
    
         
            +
            #       "value": "日"
         
     | 
| 
      
 711 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 712 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 713 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 714 
     | 
    
         
            +
            #       "value": "本"
         
     | 
| 
      
 715 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 716 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 717 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 718 
     | 
    
         
            +
            #       "value": "語"
         
     | 
| 
      
 719 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 720 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 721 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 722 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 723 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 724 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 725 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 726 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 727 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 728 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 729 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 730 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 731 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 732 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 733 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 734 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 735 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 736 
     | 
    
         
            +
            <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt>:</p>
         
     | 
| 
      
 737 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 738 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbol "日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
      
 739 
     | 
    
         
            +
            # [
         
     | 
| 
      
 740 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 741 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 742 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 743 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 744 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 745 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 746 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 747 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 748 
     | 
    
         
            +
            #       "value": "日本"
         
     | 
| 
      
 749 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 750 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 751 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 752 
     | 
    
         
            +
            #       "value": "本語"
         
     | 
| 
      
 753 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 754 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 755 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 756 
     | 
    
         
            +
            #       "value": "語!"
         
     | 
| 
      
 757 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 758 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 759 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 760 
     | 
    
         
            +
            #       "value": "!!"
         
     | 
| 
      
 761 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 762 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 763 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 764 
     | 
    
         
            +
            #       "value": "!!"
         
     | 
| 
      
 765 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 766 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 767 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 768 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 769 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 770 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 771 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 772 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 773 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 774 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 775 
     | 
    
         
            +
            <div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
         
     | 
| 
      
 776 
     | 
    
         
            +
            <span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 777 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> is similar to
         
     | 
| 
      
 778 
     | 
    
         
            +
            <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
         
     | 
| 
      
 779 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 780 
     | 
    
         
            +
            <div><ul class="simple">
         
     | 
| 
      
 781 
     | 
    
         
            +
            <li>Blank handling</li>
         
     | 
| 
      
 782 
     | 
    
         
            +
            <li>Symbol and alphabet handling</li>
         
     | 
| 
      
 783 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 784 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 785 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> ignores white-spaces in
         
     | 
| 
      
 786 
     | 
    
         
            +
            continuous symbols and non-ASCII characters.</p>
         
     | 
| 
      
 787 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> tokenizes symbols and
         
     | 
| 
      
 788 
     | 
    
         
            +
            alphabets by bigram tokenize method.</p>
         
     | 
| 
      
 789 
     | 
    
         
            +
            <p>You can find difference of them by <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
         
     | 
| 
      
 790 
     | 
    
         
            +
            has symbols and non-ASCII characters with white spaces and alphabets.</p>
         
     | 
| 
      
 791 
     | 
    
         
            +
            <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
         
     | 
| 
      
 792 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 793 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello 日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
      
 794 
     | 
    
         
            +
            # [
         
     | 
| 
      
 795 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 796 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 797 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 798 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 799 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 800 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 801 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 802 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 803 
     | 
    
         
            +
            #       "value": "hello"
         
     | 
| 
      
 804 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 805 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 806 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 807 
     | 
    
         
            +
            #       "value": "日"
         
     | 
| 
      
 808 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 809 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 810 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 811 
     | 
    
         
            +
            #       "value": "本"
         
     | 
| 
      
 812 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 813 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 814 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 815 
     | 
    
         
            +
            #       "value": "語"
         
     | 
| 
      
 816 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 817 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 818 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 819 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 820 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 821 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 822 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 823 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 824 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 825 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 826 
     | 
    
         
            +
            #       "position": 6,
         
     | 
| 
      
 827 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 828 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 829 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 830 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 831 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 832 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 833 
     | 
    
         
            +
            <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt>:</p>
         
     | 
| 
      
 834 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 835 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha "Hello 日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
      
 836 
     | 
    
         
            +
            # [
         
     | 
| 
      
 837 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 838 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 839 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 840 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 841 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 842 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 843 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 844 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 845 
     | 
    
         
            +
            #       "value": "he"
         
     | 
| 
      
 846 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 847 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 848 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 849 
     | 
    
         
            +
            #       "value": "el"
         
     | 
| 
      
 850 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 851 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 852 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 853 
     | 
    
         
            +
            #       "value": "ll"
         
     | 
| 
      
 854 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 855 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 856 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 857 
     | 
    
         
            +
            #       "value": "lo"
         
     | 
| 
      
 858 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 859 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 860 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 861 
     | 
    
         
            +
            #       "value": "o日"
         
     | 
| 
      
 862 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 863 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 864 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 865 
     | 
    
         
            +
            #       "value": "日本"
         
     | 
| 
      
 866 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 867 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 868 
     | 
    
         
            +
            #       "position": 6,
         
     | 
| 
      
 869 
     | 
    
         
            +
            #       "value": "本語"
         
     | 
| 
      
 870 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 871 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 872 
     | 
    
         
            +
            #       "position": 7,
         
     | 
| 
      
 873 
     | 
    
         
            +
            #       "value": "語!"
         
     | 
| 
      
 874 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 875 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 876 
     | 
    
         
            +
            #       "position": 8,
         
     | 
| 
      
 877 
     | 
    
         
            +
            #       "value": "!!"
         
     | 
| 
      
 878 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 879 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 880 
     | 
    
         
            +
            #       "position": 9,
         
     | 
| 
      
 881 
     | 
    
         
            +
            #       "value": "!!"
         
     | 
| 
      
 882 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 883 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 884 
     | 
    
         
            +
            #       "position": 10,
         
     | 
| 
      
 885 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 886 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 887 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 888 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 889 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 890 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 891 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 892 
     | 
    
         
            +
            <div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
         
     | 
| 
      
 893 
     | 
    
         
            +
            <span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 894 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> is similar to
         
     | 
| 
      
 895 
     | 
    
         
            +
            <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
         
     | 
| 
      
 896 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 897 
     | 
    
         
            +
            <div><ul class="simple">
         
     | 
| 
      
 898 
     | 
    
         
            +
            <li>Blank handling</li>
         
     | 
| 
      
 899 
     | 
    
         
            +
            <li>Symbol, alphabet and digit handling</li>
         
     | 
| 
      
 900 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 901 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 902 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> ignores white-spaces
         
     | 
| 
      
 903 
     | 
    
         
            +
            in continuous symbols and non-ASCII characters.</p>
         
     | 
| 
      
 904 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> tokenizes symbols,
         
     | 
| 
      
 905 
     | 
    
         
            +
            alphabets and digits by bigram tokenize method. It means that all
         
     | 
| 
      
 906 
     | 
    
         
            +
            characters are tokenized by bigram tokenize method.</p>
         
     | 
| 
      
 907 
     | 
    
         
            +
            <p>You can find difference of them by <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></tt> text
         
     | 
| 
      
 908 
     | 
    
         
            +
            because it has symbols and non-ASCII characters with white spaces,
         
     | 
| 
      
 909 
     | 
    
         
            +
            alphabets and digits.</p>
         
     | 
| 
      
 910 
     | 
    
         
            +
            <p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
         
     | 
| 
      
 911 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 912 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello 日 本 語 ! ! ! 777" NormalizerAuto
         
     | 
| 
      
 913 
     | 
    
         
            +
            # [
         
     | 
| 
      
 914 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 915 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 916 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 917 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 918 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 919 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 920 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 921 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 922 
     | 
    
         
            +
            #       "value": "hello"
         
     | 
| 
      
 923 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 924 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 925 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 926 
     | 
    
         
            +
            #       "value": "日"
         
     | 
| 
      
 927 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 928 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 929 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 930 
     | 
    
         
            +
            #       "value": "本"
         
     | 
| 
      
 931 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 932 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 933 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 934 
     | 
    
         
            +
            #       "value": "語"
         
     | 
| 
      
 935 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 936 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 937 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 938 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 939 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 940 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 941 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 942 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 943 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 944 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 945 
     | 
    
         
            +
            #       "position": 6,
         
     | 
| 
      
 946 
     | 
    
         
            +
            #       "value": "!"
         
     | 
| 
      
 947 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 948 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 949 
     | 
    
         
            +
            #       "position": 7,
         
     | 
| 
      
 950 
     | 
    
         
            +
            #       "value": "777"
         
     | 
| 
      
 951 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 952 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 953 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 954 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 955 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 956 
     | 
    
         
            +
            <p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt>:</p>
         
     | 
| 
      
 957 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 958 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit "Hello 日 本 語 ! ! ! 777" NormalizerAuto
         
     | 
| 
      
 959 
     | 
    
         
            +
            # [
         
     | 
| 
      
 960 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 961 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 962 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 963 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 964 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 965 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 966 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 967 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 968 
     | 
    
         
            +
            #       "value": "he"
         
     | 
| 
      
 969 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 970 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 971 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 972 
     | 
    
         
            +
            #       "value": "el"
         
     | 
| 
      
 973 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 974 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 975 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 976 
     | 
    
         
            +
            #       "value": "ll"
         
     | 
| 
      
 977 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 978 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 979 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 980 
     | 
    
         
            +
            #       "value": "lo"
         
     | 
| 
      
 981 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 982 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 983 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 984 
     | 
    
         
            +
            #       "value": "o日"
         
     | 
| 
      
 985 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 986 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 987 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 988 
     | 
    
         
            +
            #       "value": "日本"
         
     | 
| 
      
 989 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 990 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 991 
     | 
    
         
            +
            #       "position": 6,
         
     | 
| 
      
 992 
     | 
    
         
            +
            #       "value": "本語"
         
     | 
| 
      
 993 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 994 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 995 
     | 
    
         
            +
            #       "position": 7,
         
     | 
| 
      
 996 
     | 
    
         
            +
            #       "value": "語!"
         
     | 
| 
      
 997 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 998 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 999 
     | 
    
         
            +
            #       "position": 8,
         
     | 
| 
      
 1000 
     | 
    
         
            +
            #       "value": "!!"
         
     | 
| 
      
 1001 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1002 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1003 
     | 
    
         
            +
            #       "position": 9,
         
     | 
| 
      
 1004 
     | 
    
         
            +
            #       "value": "!!"
         
     | 
| 
      
 1005 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1006 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1007 
     | 
    
         
            +
            #       "position": 10,
         
     | 
| 
      
 1008 
     | 
    
         
            +
            #       "value": "!7"
         
     | 
| 
      
 1009 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1010 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1011 
     | 
    
         
            +
            #       "position": 11,
         
     | 
| 
      
 1012 
     | 
    
         
            +
            #       "value": "77"
         
     | 
| 
      
 1013 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1014 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1015 
     | 
    
         
            +
            #       "position": 12,
         
     | 
| 
      
 1016 
     | 
    
         
            +
            #       "value": "77"
         
     | 
| 
      
 1017 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1018 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1019 
     | 
    
         
            +
            #       "position": 13,
         
     | 
| 
      
 1020 
     | 
    
         
            +
            #       "value": "7"
         
     | 
| 
      
 1021 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 1022 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 1023 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 1024 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 1025 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1026 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1027 
     | 
    
         
            +
            <div class="section" id="tokenunigram">
         
     | 
| 
      
 1028 
     | 
    
         
            +
            <span id="token-unigram"></span><h3>7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt><a class="headerlink" href="#tokenunigram" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 1029 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences
         
     | 
| 
      
 1030 
     | 
    
         
            +
            between them is token unit. <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> uses 2 characters per
         
     | 
| 
      
 1031 
     | 
    
         
            +
            token. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> uses 1 character per token.</p>
         
     | 
| 
      
 1032 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 1033 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenUnigram "100cents!!!" NormalizerAuto
         
     | 
| 
      
 1034 
     | 
    
         
            +
            # [
         
     | 
| 
      
 1035 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1036 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 1037 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 1038 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 1039 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 1040 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1041 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1042 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 1043 
     | 
    
         
            +
            #       "value": "100"
         
     | 
| 
      
 1044 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1045 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1046 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 1047 
     | 
    
         
            +
            #       "value": "cents"
         
     | 
| 
      
 1048 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1049 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1050 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 1051 
     | 
    
         
            +
            #       "value": "!!!"
         
     | 
| 
      
 1052 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 1053 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 1054 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 1055 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 1056 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1057 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1058 
     | 
    
         
            +
            <div class="section" id="tokentrigram">
         
     | 
| 
      
 1059 
     | 
    
         
            +
            <span id="token-trigram"></span><h3>7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt><a class="headerlink" href="#tokentrigram" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 1060 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences
         
     | 
| 
      
 1061 
     | 
    
         
            +
            between them is token unit. <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> uses 2 characters per
         
     | 
| 
      
 1062 
     | 
    
         
            +
            token. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> uses 3 characters per token.</p>
         
     | 
| 
      
 1063 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 1064 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenTrigram "10000cents!!!!!" NormalizerAuto
         
     | 
| 
      
 1065 
     | 
    
         
            +
            # [
         
     | 
| 
      
 1066 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1067 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 1068 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 1069 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 1070 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 1071 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1072 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1073 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 1074 
     | 
    
         
            +
            #       "value": "10000"
         
     | 
| 
      
 1075 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1076 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1077 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 1078 
     | 
    
         
            +
            #       "value": "cents"
         
     | 
| 
      
 1079 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1080 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1081 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 1082 
     | 
    
         
            +
            #       "value": "!!!!!"
         
     | 
| 
      
 1083 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 1084 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 1085 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 1086 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 1087 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1088 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1089 
     | 
    
         
            +
            <div class="section" id="tokendelimit">
         
     | 
| 
      
 1090 
     | 
    
         
            +
            <span id="token-delimit"></span><h3>7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt><a class="headerlink" href="#tokendelimit" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 1091 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> extracts token by splitting one or more space
         
     | 
| 
      
 1092 
     | 
    
         
            +
            characters (<tt class="docutils literal"><span class="pre">U+0020</span></tt>). For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to
         
     | 
| 
      
 1093 
     | 
    
         
            +
            <tt class="docutils literal"><span class="pre">Hello</span></tt> and <tt class="docutils literal"><span class="pre">World</span></tt>.</p>
         
     | 
| 
      
 1094 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> is suitable for tag text. You can extract <tt class="docutils literal"><span class="pre">groonga</span></tt>
         
     | 
| 
      
 1095 
     | 
    
         
            +
            and <tt class="docutils literal"><span class="pre">full-text-search</span></tt> and <tt class="docutils literal"><span class="pre">http</span></tt> as tags from <tt class="docutils literal"><span class="pre">groonga</span>
         
     | 
| 
      
 1096 
     | 
    
         
            +
            <span class="pre">full-text-search</span> <span class="pre">http</span></tt>.</p>
         
     | 
| 
      
 1097 
     | 
    
         
            +
            <p>Here is an example of <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt>:</p>
         
     | 
| 
      
 1098 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 1099 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimit "Groonga full-text-search HTTP" NormalizerAuto
         
     | 
| 
      
 1100 
     | 
    
         
            +
            # [
         
     | 
| 
      
 1101 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1102 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 1103 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 1104 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 1105 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 1106 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1107 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1108 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 1109 
     | 
    
         
            +
            #       "value": "groonga"
         
     | 
| 
      
 1110 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1111 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1112 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 1113 
     | 
    
         
            +
            #       "value": "full-text-search"
         
     | 
| 
      
 1114 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1115 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1116 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 1117 
     | 
    
         
            +
            #       "value": "http"
         
     | 
| 
      
 1118 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 1119 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 1120 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 1121 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 1122 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1123 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1124 
     | 
    
         
            +
            <div class="section" id="tokendelimitnull">
         
     | 
| 
      
 1125 
     | 
    
         
            +
            <span id="token-delimit-null"></span><h3>7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt><a class="headerlink" href="#tokendelimitnull" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 1126 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> is similar to <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a>. The
         
     | 
| 
      
 1127 
     | 
    
         
            +
            difference between them is separator character. <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a>
         
     | 
| 
      
 1128 
     | 
    
         
            +
            uses space character (<tt class="docutils literal"><span class="pre">U+0020</span></tt>) but <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> uses NUL
         
     | 
| 
      
 1129 
     | 
    
         
            +
            character (<tt class="docutils literal"><span class="pre">U+0000</span></tt>).</p>
         
     | 
| 
      
 1130 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> is also suitable for tag text.</p>
         
     | 
| 
      
 1131 
     | 
    
         
            +
            <p>Here is an example of <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt>:</p>
         
     | 
| 
      
 1132 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 1133 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimitNull "Groonga\u0000full-text-search\u0000HTTP" NormalizerAuto
         
     | 
| 
      
 1134 
     | 
    
         
            +
            # [
         
     | 
| 
      
 1135 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1136 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 1137 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 1138 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 1139 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 1140 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1141 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1142 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 1143 
     | 
    
         
            +
            #       "value": "groongau0000full-text-searchu0000http"
         
     | 
| 
      
 1144 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 1145 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 1146 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 1147 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 1148 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1149 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1150 
     | 
    
         
            +
            <div class="section" id="tokenmecab">
         
     | 
| 
      
 1151 
     | 
    
         
            +
            <span id="token-mecab"></span><h3>7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt><a class="headerlink" href="#tokenmecab" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 1152 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> is a tokenizer based on <a class="reference external" href="http://mecab.sourceforge.net/">MeCab</a> part-of-speech and
         
     | 
| 
      
 1153 
     | 
    
         
            +
            morphological analyzer.</p>
         
     | 
| 
      
 1154 
     | 
    
         
            +
            <p>MeCab doesn't depend on Japanese. You can use MeCab for other
         
     | 
| 
      
 1155 
     | 
    
         
            +
            languages by creating dictionary for the languages. You can use <a class="reference external" href="http://sourceforge.jp/projects/naist-jdic/">NAIST
         
     | 
| 
      
 1156 
     | 
    
         
            +
            Japanese Dictionary</a>
         
     | 
| 
      
 1157 
     | 
    
         
            +
            for Japanese.</p>
         
     | 
| 
      
 1158 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> is good for precision rather than recall. You can find
         
     | 
| 
      
 1159 
     | 
    
         
            +
            <tt class="docutils literal"><span class="pre">東京都</span></tt> and <tt class="docutils literal"><span class="pre">京都</span></tt> texts by <tt class="docutils literal"><span class="pre">京都</span></tt> query with
         
     | 
| 
      
 1160 
     | 
    
         
            +
            <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> but <tt class="docutils literal"><span class="pre">東京都</span></tt> isn't expected. You can find only
         
     | 
| 
      
 1161 
     | 
    
         
            +
            <tt class="docutils literal"><span class="pre">京都</span></tt> text by <tt class="docutils literal"><span class="pre">京都</span></tt> query with <tt class="docutils literal"><span class="pre">TokenMecab</span></tt>.</p>
         
     | 
| 
      
 1162 
     | 
    
         
            +
            <p>If you want to support neologisms, you need to keep updating your
         
     | 
| 
      
 1163 
     | 
    
         
            +
            MeCab dictionary. It needs maintain cost. (<a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> doesn't
         
     | 
| 
      
 1164 
     | 
    
         
            +
            require dictionary maintenance because <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> doesn't use
         
     | 
| 
      
 1165 
     | 
    
         
            +
            dictionary.) <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> may help you.</p>
         
     | 
| 
      
 1166 
     | 
    
         
            +
            <p>Here is an example of <tt class="docutils literal"><span class="pre">TokenMeCab</span></tt>. <tt class="docutils literal"><span class="pre">東京都</span></tt> is tokenized to <tt class="docutils literal"><span class="pre">東京</span></tt>
         
     | 
| 
      
 1167 
     | 
    
         
            +
            and <tt class="docutils literal"><span class="pre">都</span></tt>. They don't include <tt class="docutils literal"><span class="pre">京都</span></tt>:</p>
         
     | 
| 
      
 1168 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 1169 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenMecab "東京都"
         
     | 
| 
      
 1170 
     | 
    
         
            +
            # [
         
     | 
| 
      
 1171 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1172 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 1173 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 1174 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 1175 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 1176 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1177 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1178 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 1179 
     | 
    
         
            +
            #       "value": "東京"
         
     | 
| 
      
 1180 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1181 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1182 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 1183 
     | 
    
         
            +
            #       "value": "都"
         
     | 
| 
      
 1184 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 1185 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 1186 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 1187 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 1188 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1189 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1190 
     | 
    
         
            +
            <div class="section" id="tokenregexp">
         
     | 
| 
      
 1191 
     | 
    
         
            +
            <span id="token-regexp"></span><h3>7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt><a class="headerlink" href="#tokenregexp" title="Permalink to this headline">¶</a></h3>
         
     | 
| 
      
 1192 
     | 
    
         
            +
            <div class="versionadded">
         
     | 
| 
      
 1193 
     | 
    
         
            +
            <p><span class="versionmodified">New in version 5.0.1.</span></p>
         
     | 
| 
      
 1194 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1195 
     | 
    
         
            +
            <div class="admonition caution">
         
     | 
| 
      
 1196 
     | 
    
         
            +
            <p class="first admonition-title">Caution</p>
         
     | 
| 
      
 1197 
     | 
    
         
            +
            <p class="last">This tokenizer is experimental. Specification may be changed.</p>
         
     | 
| 
      
 1198 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1199 
     | 
    
         
            +
            <div class="admonition caution">
         
     | 
| 
      
 1200 
     | 
    
         
            +
            <p class="first admonition-title">Caution</p>
         
     | 
| 
      
 1201 
     | 
    
         
            +
            <p class="last">This tokenizer can be used only with UTF-8. You can't use this
         
     | 
| 
      
 1202 
     | 
    
         
            +
            tokenizer with EUC-JP, Shift_JIS and so on.</p>
         
     | 
| 
      
 1203 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1204 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> is a tokenizer for supporting regular expression
         
     | 
| 
      
 1205 
     | 
    
         
            +
            search by index.</p>
         
     | 
| 
      
 1206 
     | 
    
         
            +
            <p>In general, regular expression search is evaluated as sequential
         
     | 
| 
      
 1207 
     | 
    
         
            +
            search. But the following cases can be evaluated as index search:</p>
         
     | 
| 
      
 1208 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 1209 
     | 
    
         
            +
            <div><ul class="simple">
         
     | 
| 
      
 1210 
     | 
    
         
            +
            <li>Literal only case such as <tt class="docutils literal"><span class="pre">hello</span></tt></li>
         
     | 
| 
      
 1211 
     | 
    
         
            +
            <li>The beginning of text and literal case such as <tt class="docutils literal"><span class="pre">\A/home/alice</span></tt></li>
         
     | 
| 
      
 1212 
     | 
    
         
            +
            <li>The end of text and literal case such as <tt class="docutils literal"><span class="pre">\.txt\z</span></tt></li>
         
     | 
| 
      
 1213 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 1214 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 1215 
     | 
    
         
            +
            <p>In most cases, index search is faster than sequential search.</p>
         
     | 
| 
      
 1216 
     | 
    
         
            +
            <p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> is based on bigram tokenize method. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt>
         
     | 
| 
      
 1217 
     | 
    
         
            +
            adds the beginning of text mark (<tt class="docutils literal"><span class="pre">U+FFEF</span></tt>) at the begging of text
         
     | 
| 
      
 1218 
     | 
    
         
            +
            and the end of text mark (<tt class="docutils literal"><span class="pre">U+FFF0</span></tt>) to the end of text when you
         
     | 
| 
      
 1219 
     | 
    
         
            +
            index text:</p>
         
     | 
| 
      
 1220 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 1221 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp "/home/alice/test.txt" NormalizerAuto --mode ADD
         
     | 
| 
      
 1222 
     | 
    
         
            +
            # [
         
     | 
| 
      
 1223 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1224 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 1225 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 1226 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 1227 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 1228 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1229 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1230 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 1231 
     | 
    
         
            +
            #       "value": ""
         
     | 
| 
      
 1232 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1233 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1234 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 1235 
     | 
    
         
            +
            #       "value": "/h"
         
     | 
| 
      
 1236 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1237 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1238 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 1239 
     | 
    
         
            +
            #       "value": "ho"
         
     | 
| 
      
 1240 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1241 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1242 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 1243 
     | 
    
         
            +
            #       "value": "om"
         
     | 
| 
      
 1244 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1245 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1246 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 1247 
     | 
    
         
            +
            #       "value": "me"
         
     | 
| 
      
 1248 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1249 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1250 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 1251 
     | 
    
         
            +
            #       "value": "e/"
         
     | 
| 
      
 1252 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1253 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1254 
     | 
    
         
            +
            #       "position": 6,
         
     | 
| 
      
 1255 
     | 
    
         
            +
            #       "value": "/a"
         
     | 
| 
      
 1256 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1257 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1258 
     | 
    
         
            +
            #       "position": 7,
         
     | 
| 
      
 1259 
     | 
    
         
            +
            #       "value": "al"
         
     | 
| 
      
 1260 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1261 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1262 
     | 
    
         
            +
            #       "position": 8,
         
     | 
| 
      
 1263 
     | 
    
         
            +
            #       "value": "li"
         
     | 
| 
      
 1264 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1265 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1266 
     | 
    
         
            +
            #       "position": 9,
         
     | 
| 
      
 1267 
     | 
    
         
            +
            #       "value": "ic"
         
     | 
| 
      
 1268 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1269 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1270 
     | 
    
         
            +
            #       "position": 10,
         
     | 
| 
      
 1271 
     | 
    
         
            +
            #       "value": "ce"
         
     | 
| 
      
 1272 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1273 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1274 
     | 
    
         
            +
            #       "position": 11,
         
     | 
| 
      
 1275 
     | 
    
         
            +
            #       "value": "e/"
         
     | 
| 
      
 1276 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1277 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1278 
     | 
    
         
            +
            #       "position": 12,
         
     | 
| 
      
 1279 
     | 
    
         
            +
            #       "value": "/t"
         
     | 
| 
      
 1280 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1281 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1282 
     | 
    
         
            +
            #       "position": 13,
         
     | 
| 
      
 1283 
     | 
    
         
            +
            #       "value": "te"
         
     | 
| 
      
 1284 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1285 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1286 
     | 
    
         
            +
            #       "position": 14,
         
     | 
| 
      
 1287 
     | 
    
         
            +
            #       "value": "es"
         
     | 
| 
      
 1288 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1289 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1290 
     | 
    
         
            +
            #       "position": 15,
         
     | 
| 
      
 1291 
     | 
    
         
            +
            #       "value": "st"
         
     | 
| 
      
 1292 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1293 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1294 
     | 
    
         
            +
            #       "position": 16,
         
     | 
| 
      
 1295 
     | 
    
         
            +
            #       "value": "t."
         
     | 
| 
      
 1296 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1297 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1298 
     | 
    
         
            +
            #       "position": 17,
         
     | 
| 
      
 1299 
     | 
    
         
            +
            #       "value": ".t"
         
     | 
| 
      
 1300 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1301 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1302 
     | 
    
         
            +
            #       "position": 18,
         
     | 
| 
      
 1303 
     | 
    
         
            +
            #       "value": "tx"
         
     | 
| 
      
 1304 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1305 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1306 
     | 
    
         
            +
            #       "position": 19,
         
     | 
| 
      
 1307 
     | 
    
         
            +
            #       "value": "xt"
         
     | 
| 
      
 1308 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1309 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1310 
     | 
    
         
            +
            #       "position": 20,
         
     | 
| 
      
 1311 
     | 
    
         
            +
            #       "value": "t"
         
     | 
| 
      
 1312 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1313 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1314 
     | 
    
         
            +
            #       "position": 21,
         
     | 
| 
      
 1315 
     | 
    
         
            +
            #       "value": ""
         
     | 
| 
      
 1316 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 1317 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 1318 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 1319 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 1320 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1321 
     | 
    
         
            +
            <p>The beginning of text mark is used for the beginning of text search by
         
     | 
| 
      
 1322 
     | 
    
         
            +
            <tt class="docutils literal"><span class="pre">\A</span></tt>. If you use <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> for tokenizing query,
         
     | 
| 
      
 1323 
     | 
    
         
            +
            <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> adds the beginning of text mark (<tt class="docutils literal"><span class="pre">U+FFEF</span></tt>) as the
         
     | 
| 
      
 1324 
     | 
    
         
            +
            first token. The beginning of text mark must be appeared at the first,
         
     | 
| 
      
 1325 
     | 
    
         
            +
            you can get results of the beginning of text search.</p>
         
     | 
| 
      
 1326 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 1327 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp "\\A/home/alice/" NormalizerAuto --mode GET
         
     | 
| 
      
 1328 
     | 
    
         
            +
            # [
         
     | 
| 
      
 1329 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1330 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 1331 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 1332 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 1333 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 1334 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1335 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1336 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 1337 
     | 
    
         
            +
            #       "value": ""
         
     | 
| 
      
 1338 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1339 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1340 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 1341 
     | 
    
         
            +
            #       "value": "/h"
         
     | 
| 
      
 1342 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1343 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1344 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 1345 
     | 
    
         
            +
            #       "value": "ho"
         
     | 
| 
      
 1346 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1347 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1348 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 1349 
     | 
    
         
            +
            #       "value": "om"
         
     | 
| 
      
 1350 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1351 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1352 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 1353 
     | 
    
         
            +
            #       "value": "me"
         
     | 
| 
      
 1354 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1355 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1356 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 1357 
     | 
    
         
            +
            #       "value": "e/"
         
     | 
| 
      
 1358 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1359 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1360 
     | 
    
         
            +
            #       "position": 6,
         
     | 
| 
      
 1361 
     | 
    
         
            +
            #       "value": "/a"
         
     | 
| 
      
 1362 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1363 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1364 
     | 
    
         
            +
            #       "position": 7,
         
     | 
| 
      
 1365 
     | 
    
         
            +
            #       "value": "al"
         
     | 
| 
      
 1366 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1367 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1368 
     | 
    
         
            +
            #       "position": 8,
         
     | 
| 
      
 1369 
     | 
    
         
            +
            #       "value": "li"
         
     | 
| 
      
 1370 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1371 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1372 
     | 
    
         
            +
            #       "position": 9,
         
     | 
| 
      
 1373 
     | 
    
         
            +
            #       "value": "ic"
         
     | 
| 
      
 1374 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1375 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1376 
     | 
    
         
            +
            #       "position": 10,
         
     | 
| 
      
 1377 
     | 
    
         
            +
            #       "value": "ce"
         
     | 
| 
      
 1378 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1379 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1380 
     | 
    
         
            +
            #       "position": 11,
         
     | 
| 
      
 1381 
     | 
    
         
            +
            #       "value": "e/"
         
     | 
| 
      
 1382 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 1383 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 1384 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 1385 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 1386 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1387 
     | 
    
         
            +
            <p>The end of text mark is used for the end of text search by <tt class="docutils literal"><span class="pre">\z</span></tt>.
         
     | 
| 
      
 1388 
     | 
    
         
            +
            If you use <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> for tokenizing query, <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> adds
         
     | 
| 
      
 1389 
     | 
    
         
            +
            the end of text mark (<tt class="docutils literal"><span class="pre">U+FFF0</span></tt>) as the last token. The end of text
         
     | 
| 
      
 1390 
     | 
    
         
            +
            mark must be appeared at the end, you can get results of the end of
         
     | 
| 
      
 1391 
     | 
    
         
            +
            text search.</p>
         
     | 
| 
      
 1392 
     | 
    
         
            +
            <p>Execution example:</p>
         
     | 
| 
      
 1393 
     | 
    
         
            +
            <div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp "\\.txt\\z" NormalizerAuto --mode GET
         
     | 
| 
      
 1394 
     | 
    
         
            +
            # [
         
     | 
| 
      
 1395 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1396 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 1397 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 1398 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 1399 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 1400 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 1401 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1402 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 1403 
     | 
    
         
            +
            #       "value": "\\."
         
     | 
| 
      
 1404 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1405 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1406 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 1407 
     | 
    
         
            +
            #       "value": ".t"
         
     | 
| 
      
 1408 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1409 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1410 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 1411 
     | 
    
         
            +
            #       "value": "tx"
         
     | 
| 
      
 1412 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1413 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1414 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 1415 
     | 
    
         
            +
            #       "value": "xt"
         
     | 
| 
      
 1416 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 1417 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 1418 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 1419 
     | 
    
         
            +
            #       "value": ""
         
     | 
| 
      
 1420 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 1421 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 1422 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 1423 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 1424 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1425 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 1426 
     | 
    
         
            +
            </div>
         
     | 
| 
       91 
1427 
     | 
    
         
             
            </div>
         
     | 
| 
       92 
1428 
     | 
    
         | 
| 
       93 
1429 
     | 
    
         | 
| 
       94 
1430 
     | 
    
         
             
                      </div>
         
     | 
| 
       95 
1431 
     | 
    
         
             
                    </div>
         
     | 
| 
       96 
1432 
     | 
    
         
             
                  </div>
         
     | 
| 
       97 
     | 
    
         
            -
                  <div class="sphinxsidebar" 
     | 
| 
      
 1433 
     | 
    
         
            +
                  <div class="sphinxsidebar">
         
     | 
| 
       98 
1434 
     | 
    
         
             
                    <div class="sphinxsidebarwrapper">
         
     | 
| 
      
 1435 
     | 
    
         
            +
              <h3><a href="../index.html">Table Of Contents</a></h3>
         
     | 
| 
      
 1436 
     | 
    
         
            +
              <ul>
         
     | 
| 
      
 1437 
     | 
    
         
            +
            <li><a class="reference internal" href="#">7.8. Tokenizers</a><ul>
         
     | 
| 
      
 1438 
     | 
    
         
            +
            <li><a class="reference internal" href="#summary">7.8.1. Summary</a></li>
         
     | 
| 
      
 1439 
     | 
    
         
            +
            <li><a class="reference internal" href="#what-is-tokenize">7.8.2. What is "tokenize"?</a></li>
         
     | 
| 
      
 1440 
     | 
    
         
            +
            <li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. Built-in tokenizsers</a><ul>
         
     | 
| 
      
 1441 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokenbigram">7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt></a></li>
         
     | 
| 
      
 1442 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></a></li>
         
     | 
| 
      
 1443 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></a></li>
         
     | 
| 
      
 1444 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></a></li>
         
     | 
| 
      
 1445 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></a></li>
         
     | 
| 
      
 1446 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></a></li>
         
     | 
| 
      
 1447 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt></a></li>
         
     | 
| 
      
 1448 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt></a></li>
         
     | 
| 
      
 1449 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokenunigram">7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></a></li>
         
     | 
| 
      
 1450 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokentrigram">7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></a></li>
         
     | 
| 
      
 1451 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokendelimit">7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></a></li>
         
     | 
| 
      
 1452 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></a></li>
         
     | 
| 
      
 1453 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokenmecab">7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt></a></li>
         
     | 
| 
      
 1454 
     | 
    
         
            +
            <li><a class="reference internal" href="#tokenregexp">7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></a></li>
         
     | 
| 
      
 1455 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 1456 
     | 
    
         
            +
            </li>
         
     | 
| 
      
 1457 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 1458 
     | 
    
         
            +
            </li>
         
     | 
| 
      
 1459 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 1460 
     | 
    
         
            +
             
     | 
| 
       99 
1461 
     | 
    
         
             
              <h4>Previous topic</h4>
         
     | 
| 
       100 
1462 
     | 
    
         
             
              <p class="topless"><a href="normalizers.html"
         
     | 
| 
       101 
1463 
     | 
    
         
             
                                    title="previous chapter">7.7. Normalizers</a></p>
         
     | 
| 
       102 
1464 
     | 
    
         
             
              <h4>Next topic</h4>
         
     | 
| 
       103 
1465 
     | 
    
         
             
              <p class="topless"><a href="token_filters.html"
         
     | 
| 
       104 
1466 
     | 
    
         
             
                                    title="next chapter">7.9. Token filters</a></p>
         
     | 
| 
       105 
     | 
    
         
            -
              < 
     | 
| 
       106 
     | 
    
         
            -
             
     | 
| 
       107 
     | 
    
         
            -
                < 
     | 
| 
       108 
     | 
    
         
            -
             
     | 
| 
       109 
     | 
    
         
            -
             
     | 
| 
       110 
     | 
    
         
            -
             
     | 
| 
       111 
     | 
    
         
            -
               </div>
         
     | 
| 
       112 
     | 
    
         
            -
            <div id="searchbox" style="display: none" role="search">
         
     | 
| 
      
 1467 
     | 
    
         
            +
              <h3>This Page</h3>
         
     | 
| 
      
 1468 
     | 
    
         
            +
              <ul class="this-page-menu">
         
     | 
| 
      
 1469 
     | 
    
         
            +
                <li><a href="../_sources/reference/tokenizers.txt"
         
     | 
| 
      
 1470 
     | 
    
         
            +
                       rel="nofollow">Show Source</a></li>
         
     | 
| 
      
 1471 
     | 
    
         
            +
              </ul>
         
     | 
| 
      
 1472 
     | 
    
         
            +
            <div id="searchbox" style="display: none">
         
     | 
| 
       113 
1473 
     | 
    
         
             
              <h3>Quick search</h3>
         
     | 
| 
       114 
1474 
     | 
    
         
             
                <form class="search" action="../search.html" method="get">
         
     | 
| 
       115 
1475 
     | 
    
         
             
                  <input type="text" name="q" />
         
     | 
| 
         @@ -126,7 +1486,7 @@ 
     | 
|
| 
       126 
1486 
     | 
    
         
             
                  </div>
         
     | 
| 
       127 
1487 
     | 
    
         
             
                  <div class="clearer"></div>
         
     | 
| 
       128 
1488 
     | 
    
         
             
                </div>
         
     | 
| 
       129 
     | 
    
         
            -
                <div class="related" 
     | 
| 
      
 1489 
     | 
    
         
            +
                <div class="related">
         
     | 
| 
       130 
1490 
     | 
    
         
             
                  <h3>Navigation</h3>
         
     | 
| 
       131 
1491 
     | 
    
         
             
                  <ul>
         
     | 
| 
       132 
1492 
     | 
    
         
             
                    <li class="right" style="margin-right: 10px">
         
     | 
| 
         @@ -138,11 +1498,11 @@ 
     | 
|
| 
       138 
1498 
     | 
    
         
             
                    <li class="right" >
         
     | 
| 
       139 
1499 
     | 
    
         
             
                      <a href="normalizers.html" title="7.7. Normalizers"
         
     | 
| 
       140 
1500 
     | 
    
         
             
                         >previous</a> |</li>
         
     | 
| 
       141 
     | 
    
         
            -
                    <li><a href="../index.html">Groonga v5.0. 
     | 
| 
      
 1501 
     | 
    
         
            +
                    <li><a href="../index.html">Groonga v5.0.1-42-g4d10df1 documentation</a> »</li>
         
     | 
| 
       142 
1502 
     | 
    
         
             
                      <li><a href="../reference.html" >7. Reference manual</a> »</li> 
         
     | 
| 
       143 
1503 
     | 
    
         
             
                  </ul>
         
     | 
| 
       144 
1504 
     | 
    
         
             
                </div>
         
     | 
| 
       145 
     | 
    
         
            -
                <div class="footer" 
     | 
| 
      
 1505 
     | 
    
         
            +
                <div class="footer">
         
     | 
| 
       146 
1506 
     | 
    
         
             
                    © Copyright 2009-2015, Brazil, Inc.
         
     | 
| 
       147 
1507 
     | 
    
         
             
                </div>
         
     | 
| 
       148 
1508 
     | 
    
         
             
              </body>
         
     |