rroonga 5.0.0-x86-mingw32 → 5.0.1-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.yardopts +1 -0
- data/Rakefile +1 -16
- data/example/bookmark.rb +1 -6
- data/example/index-html.rb +0 -1
- data/ext/groonga/extconf.rb +4 -7
- data/ext/groonga/rb-grn-array.c +1 -1
- data/ext/groonga/rb-grn-column.c +33 -67
- data/ext/groonga/rb-grn-context.c +5 -5
- data/ext/groonga/rb-grn-database.c +2 -2
- data/ext/groonga/rb-grn-double-array-trie.c +4 -2
- data/ext/groonga/rb-grn-encoding-support.c +7 -1
- data/ext/groonga/rb-grn-equal-operator.c +85 -0
- data/ext/groonga/rb-grn-exception.c +17 -0
- data/ext/groonga/rb-grn-expression.c +85 -43
- data/ext/groonga/rb-grn-greater-equal-operator.c +88 -0
- data/ext/groonga/rb-grn-greater-operator.c +85 -0
- data/ext/groonga/rb-grn-hash.c +1 -1
- data/ext/groonga/rb-grn-index-column.c +150 -11
- data/ext/groonga/rb-grn-less-equal-operator.c +88 -0
- data/ext/groonga/rb-grn-less-operator.c +85 -0
- data/ext/groonga/rb-grn-logger.c +5 -5
- data/ext/groonga/rb-grn-match-operator.c +86 -0
- data/ext/groonga/rb-grn-normalizer.c +8 -1
- data/ext/groonga/rb-grn-not-equal-operator.c +85 -0
- data/ext/groonga/rb-grn-object.c +170 -36
- data/ext/groonga/rb-grn-operator.c +395 -172
- data/ext/groonga/rb-grn-patricia-trie.c +10 -8
- data/ext/groonga/rb-grn-plugin.c +51 -3
- data/ext/groonga/rb-grn-prefix-operator.c +86 -0
- data/ext/groonga/rb-grn-procedure-type.c +4 -0
- data/ext/groonga/rb-grn-query-logger.c +4 -4
- data/ext/groonga/rb-grn-regexp-operator.c +85 -0
- data/ext/groonga/rb-grn-snippet.c +1 -1
- data/ext/groonga/rb-grn-table-key-support.c +9 -5
- data/ext/groonga/rb-grn-table.c +52 -66
- data/ext/groonga/rb-grn-type.c +1 -1
- data/ext/groonga/rb-grn-utils.c +22 -3
- data/ext/groonga/rb-grn.h +31 -4
- data/ext/groonga/rb-groonga.c +9 -9
- data/lib/1.9/groonga.so +0 -0
- data/lib/2.0/groonga.so +0 -0
- data/lib/2.1/groonga.so +0 -0
- data/lib/2.2/groonga.so +0 -0
- data/lib/groonga/context.rb +31 -0
- data/lib/groonga/expression-builder.rb +14 -1
- data/lib/groonga/record.rb +10 -8
- data/lib/groonga/schema.rb +3 -1
- data/rroonga-build.rb +2 -2
- data/rroonga.gemspec +3 -3
- data/test/groonga-test-utils.rb +4 -0
- data/test/test-column.rb +28 -26
- data/test/test-exception.rb +1 -0
- data/test/test-expression-builder.rb +83 -1
- data/test/test-expression.rb +80 -48
- data/test/test-index-column.rb +102 -29
- data/test/test-normalizer.rb +35 -29
- data/test/test-operator.rb +214 -0
- data/test/test-plugin.rb +24 -6
- data/test/test-procedure.rb +29 -0
- data/test/test-schema-type.rb +14 -0
- data/test/test-table-select-mecab.rb +1 -4
- data/test/test-table.rb +7 -0
- data/test/test-token-regexp.rb +30 -0
- data/test/test-type.rb +24 -0
- data/vendor/local/bin/grndb.exe +0 -0
- data/vendor/local/bin/groonga-benchmark.exe +0 -0
- data/vendor/local/bin/groonga.exe +0 -0
- data/vendor/local/bin/libgcc_s_sjlj-1.dll +0 -0
- data/vendor/local/bin/libgroonga-0.dll +0 -0
- data/vendor/local/bin/libmecab-1.dll +0 -0
- data/vendor/local/bin/libmsgpack-3.dll +0 -0
- data/vendor/local/bin/libmsgpackc-2.dll +0 -0
- data/vendor/local/bin/libonig-5.dll +0 -0
- data/vendor/local/bin/libstdc++-6.dll +0 -0
- data/vendor/local/bin/lz4.exe +0 -0
- data/vendor/local/bin/lz4c.exe +0 -0
- data/vendor/local/bin/lz4cat +0 -0
- data/vendor/local/bin/mecab-config +2 -2
- data/vendor/local/bin/mecab.exe +0 -0
- data/vendor/local/bin/onig-config +1 -1
- data/vendor/local/bin/zlib1.dll +0 -0
- data/vendor/local/etc/groonga/groonga.conf +1 -1
- data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +2 -2
- data/vendor/local/include/groonga/groonga.h +1 -0
- data/vendor/local/include/groonga/groonga/expr.h +2 -0
- data/vendor/local/include/groonga/groonga/groonga.h +32 -5
- data/vendor/local/include/groonga/groonga/ii.h +7 -0
- data/vendor/local/include/groonga/groonga/obj.h +37 -0
- data/vendor/local/include/groonga/groonga/scorer.h +95 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +2 -2
- data/vendor/local/lib/groonga/plugins/ruby/eval.a +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/eval.dll +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/eval.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/eval.la +2 -2
- data/vendor/local/lib/groonga/plugins/ruby/load.a +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/load.dll +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/load.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/load.la +2 -2
- data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +6 -3
- data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +6 -5
- data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +421 -17
- data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.la +2 -2
- data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
- data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
- data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/table/table.la +2 -2
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +2 -2
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +2 -2
- data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +12 -4
- data/vendor/local/lib/groonga/scripts/ruby/database.rb +11 -3
- data/vendor/local/lib/groonga/scripts/ruby/expression.rb +23 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_size_estimator.rb +158 -0
- data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +39 -0
- data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +4 -0
- data/vendor/local/lib/groonga/scripts/ruby/initialize/pre.rb +2 -0
- data/vendor/local/lib/groonga/scripts/ruby/logger.rb +11 -7
- data/vendor/local/lib/groonga/scripts/ruby/object.rb +11 -0
- data/vendor/local/lib/groonga/scripts/ruby/operator.rb +22 -0
- data/vendor/local/lib/groonga/scripts/ruby/scan_info.rb +7 -2
- data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +7 -11
- data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +137 -34
- data/vendor/local/lib/groonga/scripts/ruby/scan_info_search_index.rb +9 -0
- data/vendor/local/lib/libgroonga.a +0 -0
- data/vendor/local/lib/libgroonga.dll.a +0 -0
- data/vendor/local/lib/libgroonga.la +2 -2
- data/vendor/local/lib/liblz4.a +0 -0
- data/vendor/local/lib/liblz4.dll +0 -0
- data/vendor/local/lib/liblz4.dll.1 +0 -0
- data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
- data/vendor/local/lib/libmecab.a +0 -0
- data/vendor/local/lib/libmecab.dll.a +0 -0
- data/vendor/local/lib/libmecab.la +2 -2
- data/vendor/local/lib/libmsgpack.a +0 -0
- data/vendor/local/lib/libmsgpack.dll.a +0 -0
- data/vendor/local/lib/libmsgpack.la +2 -2
- data/vendor/local/lib/libmsgpackc.a +0 -0
- data/vendor/local/lib/libmsgpackc.dll.a +0 -0
- data/vendor/local/lib/libmsgpackc.la +2 -2
- data/vendor/local/lib/libonig.a +0 -0
- data/vendor/local/lib/libonig.dll.a +0 -0
- data/vendor/local/lib/libonig.la +2 -2
- data/vendor/local/lib/libz.a +0 -0
- data/vendor/local/lib/libz.dll.a +0 -0
- data/vendor/local/lib/pkgconfig/groonga.pc +3 -3
- data/vendor/local/lib/pkgconfig/liblz4.pc +5 -5
- data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
- data/vendor/local/lib/pkgconfig/oniguruma.pc +6 -6
- data/vendor/local/lib/pkgconfig/zlib.pc +3 -3
- data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
- data/vendor/local/sbin/groonga-httpd-restart +1 -1
- data/vendor/local/sbin/groonga-httpd.exe +0 -0
- data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
- data/vendor/local/share/doc/groonga/en/html/_images/used-when-indexing.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_images/used-when-searching.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/characteristic.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +32 -17
- data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
- data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +194 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/news/1.0.x.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/news/3.x.txt +2 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/news/4.x.txt +2 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +2 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api.txt +3 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +42 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/overview.txt +54 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_create.txt +2 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/delete.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/load.txt +5 -5
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +173 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +112 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +7 -6
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +64 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +63 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +11 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +3 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +17 -17
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/suggest.txt +12 -12
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +7 -7
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +6 -6
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga.txt +47 -26
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/between.txt +5 -5
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_distance.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +6 -6
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/html_untag.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/in_values.txt +54 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/sub_filter.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +2 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/normalizers.txt +4 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations.txt +2 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +403 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorer.txt +217 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/scoring_note.txt +13 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/tables.txt +8 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +530 -16
- data/vendor/local/share/doc/groonga/en/html/_sources/server.txt +2 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/server/memcached.txt +15 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/spec/gqtp.txt +66 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/network.txt +0 -81
- data/vendor/local/share/doc/groonga/en/html/_static/basic.css +6 -68
- data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +1 -26
- data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +9404 -4
- data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +2 -2
- data/vendor/local/share/doc/groonga/en/html/_static/underscore.js +1415 -31
- data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +15 -15
- data/vendor/local/share/doc/groonga/en/html/characteristic.html +18 -20
- data/vendor/local/share/doc/groonga/en/html/client.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/community.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution.html +18 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/development.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +51 -38
- data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +24 -26
- data/vendor/local/share/doc/groonga/en/html/contribution/report.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/development.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +24 -26
- data/vendor/local/share/doc/groonga/en/html/genindex.html +26 -14
- data/vendor/local/share/doc/groonga/en/html/index.html +150 -130
- data/vendor/local/share/doc/groonga/en/html/install.html +32 -34
- data/vendor/local/share/doc/groonga/en/html/install/centos.html +28 -30
- data/vendor/local/share/doc/groonga/en/html/install/debian.html +24 -26
- data/vendor/local/share/doc/groonga/en/html/install/fedora.html +28 -30
- data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/install/others.html +87 -89
- data/vendor/local/share/doc/groonga/en/html/install/solaris.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +25 -27
- data/vendor/local/share/doc/groonga/en/html/install/windows.html +30 -32
- data/vendor/local/share/doc/groonga/en/html/limitations.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/news.html +256 -27
- data/vendor/local/share/doc/groonga/en/html/news/0.x.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +19 -21
- data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +32 -34
- data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +27 -29
- data/vendor/local/share/doc/groonga/en/html/news/2.x.html +98 -100
- data/vendor/local/share/doc/groonga/en/html/news/3.x.html +68 -70
- data/vendor/local/share/doc/groonga/en/html/news/4.x.html +102 -104
- data/vendor/local/share/doc/groonga/en/html/news/senna.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
- data/vendor/local/share/doc/groonga/en/html/reference.html +139 -118
- data/vendor/local/share/doc/groonga/en/html/reference/api.html +51 -52
- data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +49 -51
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +60 -62
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +80 -82
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +42 -44
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +37 -39
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +130 -80
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +48 -50
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +44 -46
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +79 -81
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +42 -44
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +44 -46
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +42 -44
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +41 -43
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +41 -43
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +40 -42
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +89 -91
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +44 -46
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +39 -41
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +75 -77
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +64 -66
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +40 -42
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +39 -41
- data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +202 -0
- data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +58 -60
- data/vendor/local/share/doc/groonga/en/html/reference/cast.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/column.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +58 -60
- data/vendor/local/share/doc/groonga/en/html/reference/command.html +56 -54
- data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +53 -55
- data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +94 -96
- data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +39 -41
- data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +43 -45
- data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +23 -25
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +39 -41
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +71 -73
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +24 -26
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +44 -46
- data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +37 -39
- data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +24 -26
- data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +35 -37
- data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +43 -45
- data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +49 -47
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +23 -25
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +24 -26
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +26 -28
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +314 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +252 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +87 -89
- data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +46 -48
- data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +195 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +193 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +38 -40
- data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +37 -39
- data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +61 -51
- data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
- data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +54 -56
- data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +54 -56
- data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +590 -592
- data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +37 -39
- data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +40 -42
- data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +92 -94
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +152 -154
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +49 -51
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +39 -41
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +68 -70
- data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +103 -105
- data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +45 -47
- data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +52 -54
- data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -21
- data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +35 -37
- data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +21 -23
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +26 -28
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +73 -75
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +42 -44
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +34 -36
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +124 -90
- data/vendor/local/share/doc/groonga/en/html/reference/function.html +42 -44
- data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +66 -68
- data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +44 -46
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +113 -115
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +55 -57
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +45 -47
- data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +81 -83
- data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +65 -67
- data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +54 -56
- data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +135 -44
- data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -42
- data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +81 -83
- data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +43 -45
- data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +67 -69
- data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +66 -70
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +39 -41
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +349 -286
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +483 -417
- data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +35 -37
- data/vendor/local/share/doc/groonga/en/html/reference/log.html +38 -40
- data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +44 -46
- data/vendor/local/share/doc/groonga/en/html/reference/operations.html +30 -31
- data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +32 -34
- data/vendor/local/share/doc/groonga/en/html/reference/output.html +32 -34
- data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +60 -62
- data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +931 -0
- data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +442 -0
- data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +153 -0
- data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +287 -0
- data/vendor/local/share/doc/groonga/en/html/reference/scoring_note.html +114 -0
- data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +45 -47
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +51 -53
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +40 -42
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +38 -40
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +40 -42
- data/vendor/local/share/doc/groonga/en/html/reference/tables.html +52 -54
- data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +36 -38
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +1394 -34
- data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +57 -59
- data/vendor/local/share/doc/groonga/en/html/reference/types.html +38 -40
- data/vendor/local/share/doc/groonga/en/html/search.html +11 -11
- data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
- data/vendor/local/share/doc/groonga/en/html/server.html +23 -24
- data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +28 -30
- data/vendor/local/share/doc/groonga/en/html/server/http.html +42 -44
- data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +68 -70
- data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +30 -32
- data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +29 -31
- data/vendor/local/share/doc/groonga/en/html/server/memcached.html +137 -0
- data/vendor/local/share/doc/groonga/en/html/server/package.html +36 -38
- data/vendor/local/share/doc/groonga/en/html/spec.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +208 -129
- data/vendor/local/share/doc/groonga/en/html/spec/search.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
- data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
- data/vendor/local/share/doc/groonga/en/html/tutorial.html +21 -25
- data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +31 -33
- data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +20 -22
- data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +21 -23
- data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +50 -52
- data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +27 -125
- data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +18 -20
- data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +20 -22
- data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +33 -35
- data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_images/used-when-indexing.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_images/used-when-searching.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/characteristic.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +32 -17
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
- data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +194 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.0.x.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/3.x.txt +2 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/4.x.txt +2 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +2 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api.txt +3 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +42 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/overview.txt +54 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_create.txt +2 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/delete.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/load.txt +5 -5
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +173 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +112 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +7 -6
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +64 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +63 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +11 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +3 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +17 -17
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/suggest.txt +12 -12
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +7 -7
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +6 -6
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga.txt +47 -26
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/between.txt +5 -5
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_distance.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +6 -6
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/html_untag.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/in_values.txt +54 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/sub_filter.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +2 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/normalizers.txt +4 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations.txt +2 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +403 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorer.txt +217 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scoring_note.txt +13 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tables.txt +8 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +530 -16
- data/vendor/local/share/doc/groonga/ja/html/_sources/server.txt +2 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/server/memcached.txt +15 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/spec/gqtp.txt +66 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/network.txt +0 -81
- data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +6 -68
- data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +1 -26
- data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +9404 -4
- data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +2 -2
- data/vendor/local/share/doc/groonga/ja/html/_static/underscore.js +1415 -31
- data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +15 -15
- data/vendor/local/share/doc/groonga/ja/html/characteristic.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/client.html +22 -24
- data/vendor/local/share/doc/groonga/ja/html/community.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution.html +18 -20
- data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +51 -38
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +24 -26
- data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/development.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +20 -22
- data/vendor/local/share/doc/groonga/ja/html/genindex.html +26 -14
- data/vendor/local/share/doc/groonga/ja/html/index.html +150 -130
- data/vendor/local/share/doc/groonga/ja/html/install.html +32 -34
- data/vendor/local/share/doc/groonga/ja/html/install/centos.html +31 -33
- data/vendor/local/share/doc/groonga/ja/html/install/debian.html +25 -27
- data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +29 -31
- data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +22 -24
- data/vendor/local/share/doc/groonga/ja/html/install/others.html +78 -80
- data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +21 -23
- data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +26 -28
- data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -31
- data/vendor/local/share/doc/groonga/ja/html/limitations.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/news.html +210 -27
- data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +19 -21
- data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +32 -34
- data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +27 -29
- data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +91 -93
- data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +59 -61
- data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +89 -91
- data/vendor/local/share/doc/groonga/ja/html/news/senna.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
- data/vendor/local/share/doc/groonga/ja/html/reference.html +139 -118
- data/vendor/local/share/doc/groonga/ja/html/reference/api.html +51 -52
- data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +49 -51
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +55 -57
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +80 -82
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +37 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +126 -76
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +48 -50
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +44 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +74 -76
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +44 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +41 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +41 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +40 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +89 -91
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +44 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +39 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +75 -77
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +64 -66
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +40 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +39 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +197 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +58 -60
- data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/column.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +22 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +48 -50
- data/vendor/local/share/doc/groonga/ja/html/reference/command.html +56 -54
- data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +43 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +22 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +93 -95
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +35 -37
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +43 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +23 -25
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +39 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +62 -64
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +24 -26
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +37 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +24 -26
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +32 -34
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +22 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +33 -35
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +48 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +23 -25
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +24 -26
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +26 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +314 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +250 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +80 -81
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +46 -48
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +188 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +190 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +37 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +57 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +71 -73
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +53 -55
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +53 -55
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +394 -396
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +37 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +78 -80
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +123 -125
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -51
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +39 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +61 -63
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +89 -91
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +46 -48
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +51 -53
- data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +35 -37
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +21 -23
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +26 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +61 -63
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +34 -36
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +126 -90
- data/vendor/local/share/doc/groonga/ja/html/reference/function.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +63 -65
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +44 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +94 -96
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +55 -57
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +45 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +66 -68
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +55 -57
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +53 -55
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +135 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +40 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +70 -72
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +43 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +53 -55
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +56 -62
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +36 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +229 -171
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +381 -322
- data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +34 -36
- data/vendor/local/share/doc/groonga/ja/html/reference/log.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +28 -30
- data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +32 -34
- data/vendor/local/share/doc/groonga/ja/html/reference/output.html +28 -30
- data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +39 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +878 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +442 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +154 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +287 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/scoring_note.html +115 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +45 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +48 -50
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +40 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +40 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +37 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +1300 -34
- data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +57 -59
- data/vendor/local/share/doc/groonga/ja/html/reference/types.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/search.html +11 -11
- data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
- data/vendor/local/share/doc/groonga/ja/html/server.html +23 -24
- data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +28 -30
- data/vendor/local/share/doc/groonga/ja/html/server/http.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +62 -64
- data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +30 -32
- data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +29 -31
- data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +138 -0
- data/vendor/local/share/doc/groonga/ja/html/server/package.html +35 -37
- data/vendor/local/share/doc/groonga/ja/html/spec.html +22 -24
- data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +207 -128
- data/vendor/local/share/doc/groonga/ja/html/spec/search.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
- data/vendor/local/share/doc/groonga/ja/html/tutorial.html +21 -25
- data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +30 -32
- data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +23 -25
- data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +47 -49
- data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +27 -125
- data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -20
- data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +20 -22
- data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +31 -33
- data/vendor/local/share/license/mruby/README.md +2 -2
- data/vendor/local/share/man/ja/man1/groonga.1 +6205 -2251
- data/vendor/local/share/man/man1/groonga.1 +7210 -3029
- metadata +75 -11
- data/doc/text/news.textile +0 -1217
- data/vendor/local/share/doc/groonga/en/html/_static/jquery-1.11.1.js +0 -10308
- data/vendor/local/share/doc/groonga/en/html/_static/underscore-1.3.1.js +0 -999
- data/vendor/local/share/doc/groonga/ja/html/_static/jquery-1.11.1.js +0 -10308
- data/vendor/local/share/doc/groonga/ja/html/_static/underscore-1.3.1.js +0 -999
@@ -7,7 +7,7 @@
|
|
7
7
|
<head>
|
8
8
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
9
9
|
|
10
|
-
<title>7.8. Tokenizers — Groonga v5.0.
|
10
|
+
<title>7.8. Tokenizers — Groonga v5.0.1-42-g4d10df1 documentation</title>
|
11
11
|
|
12
12
|
<link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
|
13
13
|
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
@@ -15,7 +15,7 @@
|
|
15
15
|
<script type="text/javascript">
|
16
16
|
var DOCUMENTATION_OPTIONS = {
|
17
17
|
URL_ROOT: '../',
|
18
|
-
VERSION: '5.0.
|
18
|
+
VERSION: '5.0.1-42-g4d10df1',
|
19
19
|
COLLAPSE_INDEX: false,
|
20
20
|
FILE_SUFFIX: '.html',
|
21
21
|
HAS_SOURCE: true
|
@@ -25,12 +25,12 @@
|
|
25
25
|
<script type="text/javascript" src="../_static/underscore.js"></script>
|
26
26
|
<script type="text/javascript" src="../_static/doctools.js"></script>
|
27
27
|
<link rel="shortcut icon" href="../_static/favicon.ico"/>
|
28
|
-
<link rel="top" title="Groonga v5.0.
|
28
|
+
<link rel="top" title="Groonga v5.0.1-42-g4d10df1 documentation" href="../index.html" />
|
29
29
|
<link rel="up" title="7. Reference manual" href="../reference.html" />
|
30
30
|
<link rel="next" title="7.9. Token filters" href="token_filters.html" />
|
31
31
|
<link rel="prev" title="7.7. Normalizers" href="normalizers.html" />
|
32
32
|
</head>
|
33
|
-
<body
|
33
|
+
<body>
|
34
34
|
<div class="header">
|
35
35
|
<h1 class="title">
|
36
36
|
<a id="top-link" href="../index.html">
|
@@ -48,7 +48,7 @@
|
|
48
48
|
</div>
|
49
49
|
|
50
50
|
|
51
|
-
<div class="related"
|
51
|
+
<div class="related">
|
52
52
|
<h3>Navigation</h3>
|
53
53
|
<ul>
|
54
54
|
<li class="right" style="margin-right: 10px">
|
@@ -60,7 +60,7 @@
|
|
60
60
|
<li class="right" >
|
61
61
|
<a href="normalizers.html" title="7.7. Normalizers"
|
62
62
|
accesskey="P">previous</a> |</li>
|
63
|
-
<li><a href="../index.html">Groonga v5.0.
|
63
|
+
<li><a href="../index.html">Groonga v5.0.1-42-g4d10df1 documentation</a> »</li>
|
64
64
|
<li><a href="../reference.html" accesskey="U">7. Reference manual</a> »</li>
|
65
65
|
</ul>
|
66
66
|
</div>
|
@@ -68,48 +68,1408 @@
|
|
68
68
|
<div class="document">
|
69
69
|
<div class="documentwrapper">
|
70
70
|
<div class="bodywrapper">
|
71
|
-
<div class="body"
|
71
|
+
<div class="body">
|
72
72
|
|
73
73
|
<div class="section" id="tokenizers">
|
74
74
|
<h1>7.8. Tokenizers<a class="headerlink" href="#tokenizers" title="Permalink to this headline">¶</a></h1>
|
75
|
-
<
|
76
|
-
<
|
77
|
-
<
|
78
|
-
|
79
|
-
<
|
80
|
-
<
|
81
|
-
<li>
|
82
|
-
<
|
83
|
-
<
|
84
|
-
<
|
85
|
-
|
86
|
-
|
87
|
-
<li>
|
88
|
-
<
|
89
|
-
<
|
75
|
+
<div class="section" id="summary">
|
76
|
+
<h2>7.8.1. Summary<a class="headerlink" href="#summary" title="Permalink to this headline">¶</a></h2>
|
77
|
+
<p>Groonga has tokenizer module that tokenizes text. It is used when
|
78
|
+
the following cases:</p>
|
79
|
+
<blockquote>
|
80
|
+
<div><ul>
|
81
|
+
<li><p class="first">Indexing text</p>
|
82
|
+
<div class="figure align-center">
|
83
|
+
<a class="reference internal image-reference" href="../_images/used-when-indexing.png"><img alt="../_images/used-when-indexing.png" src="../_images/used-when-indexing.png" style="width: 80%;" /></a>
|
84
|
+
<p class="caption">Tokenizer is used when indexing text.</p>
|
85
|
+
</div>
|
86
|
+
</li>
|
87
|
+
<li><p class="first">Searching by query</p>
|
88
|
+
<div class="figure align-center">
|
89
|
+
<a class="reference internal image-reference" href="../_images/used-when-searching.png"><img alt="../_images/used-when-searching.png" src="../_images/used-when-searching.png" style="width: 80%;" /></a>
|
90
|
+
<p class="caption">Tokenizer is used when searching by query.</p>
|
91
|
+
</div>
|
92
|
+
</li>
|
93
|
+
</ul>
|
94
|
+
</div></blockquote>
|
95
|
+
<p>Tokenizer is an important module for full-text search. You can change
|
96
|
+
trade-off between <a class="reference external" href="http://en.wikipedia.org/wiki/Precision_and_recall">precision and recall</a> by changing
|
97
|
+
tokenizer.</p>
|
98
|
+
<p>Normally, <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> is a suitable tokenizer. If you don't
|
99
|
+
know much about tokenizer, it's recommended that you choose
|
100
|
+
<a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>.</p>
|
101
|
+
<p>You can try a tokenizer by <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> and
|
102
|
+
<a class="reference internal" href="commands/table_tokenize.html"><em>table_tokenize</em></a>. Here is an example to
|
103
|
+
try <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> tokenizer by
|
104
|
+
<a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a>:</p>
|
105
|
+
<p>Execution example:</p>
|
106
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello World"
|
107
|
+
# [
|
108
|
+
# [
|
109
|
+
# 0,
|
110
|
+
# 1337566253.89858,
|
111
|
+
# 0.000355720520019531
|
112
|
+
# ],
|
113
|
+
# [
|
114
|
+
# {
|
115
|
+
# "position": 0,
|
116
|
+
# "value": "He"
|
117
|
+
# },
|
118
|
+
# {
|
119
|
+
# "position": 1,
|
120
|
+
# "value": "el"
|
121
|
+
# },
|
122
|
+
# {
|
123
|
+
# "position": 2,
|
124
|
+
# "value": "ll"
|
125
|
+
# },
|
126
|
+
# {
|
127
|
+
# "position": 3,
|
128
|
+
# "value": "lo"
|
129
|
+
# },
|
130
|
+
# {
|
131
|
+
# "position": 4,
|
132
|
+
# "value": "o "
|
133
|
+
# },
|
134
|
+
# {
|
135
|
+
# "position": 5,
|
136
|
+
# "value": " W"
|
137
|
+
# },
|
138
|
+
# {
|
139
|
+
# "position": 6,
|
140
|
+
# "value": "Wo"
|
141
|
+
# },
|
142
|
+
# {
|
143
|
+
# "position": 7,
|
144
|
+
# "value": "or"
|
145
|
+
# },
|
146
|
+
# {
|
147
|
+
# "position": 8,
|
148
|
+
# "value": "rl"
|
149
|
+
# },
|
150
|
+
# {
|
151
|
+
# "position": 9,
|
152
|
+
# "value": "ld"
|
153
|
+
# },
|
154
|
+
# {
|
155
|
+
# "position": 10,
|
156
|
+
# "value": "d"
|
157
|
+
# }
|
158
|
+
# ]
|
159
|
+
# ]
|
160
|
+
</pre></div>
|
161
|
+
</div>
|
162
|
+
</div>
|
163
|
+
<div class="section" id="what-is-tokenize">
|
164
|
+
<h2>7.8.2. What is "tokenize"?<a class="headerlink" href="#what-is-tokenize" title="Permalink to this headline">¶</a></h2>
|
165
|
+
<p>"tokenize" is the process that extracts zero or more tokens from a
|
166
|
+
text. There are some "tokenize" methods.</p>
|
167
|
+
<p>For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to the following tokens by
|
168
|
+
bigram tokenize method:</p>
|
169
|
+
<blockquote>
|
170
|
+
<div><ul class="simple">
|
171
|
+
<li><tt class="docutils literal"><span class="pre">He</span></tt></li>
|
172
|
+
<li><tt class="docutils literal"><span class="pre">el</span></tt></li>
|
173
|
+
<li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
|
174
|
+
<li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
|
175
|
+
<li><tt class="docutils literal"><span class="pre">o_</span></tt> (<tt class="docutils literal"><span class="pre">_</span></tt> means a white-space)</li>
|
176
|
+
<li><tt class="docutils literal"><span class="pre">_W</span></tt> (<tt class="docutils literal"><span class="pre">_</span></tt> means a white-space)</li>
|
177
|
+
<li><tt class="docutils literal"><span class="pre">Wo</span></tt></li>
|
178
|
+
<li><tt class="docutils literal"><span class="pre">or</span></tt></li>
|
179
|
+
<li><tt class="docutils literal"><span class="pre">rl</span></tt></li>
|
180
|
+
<li><tt class="docutils literal"><span class="pre">ld</span></tt></li>
|
181
|
+
</ul>
|
182
|
+
</div></blockquote>
|
183
|
+
<p>In the above example, 10 tokens are extracted from one text <tt class="docutils literal"><span class="pre">Hello</span>
|
184
|
+
<span class="pre">World</span></tt>.</p>
|
185
|
+
<p>For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to the following tokens by
|
186
|
+
white-space-separate tokenize method:</p>
|
187
|
+
<blockquote>
|
188
|
+
<div><ul class="simple">
|
189
|
+
<li><tt class="docutils literal"><span class="pre">Hello</span></tt></li>
|
190
|
+
<li><tt class="docutils literal"><span class="pre">World</span></tt></li>
|
191
|
+
</ul>
|
192
|
+
</div></blockquote>
|
193
|
+
<p>In the above example, 2 tokens are extracted from one text <tt class="docutils literal"><span class="pre">Hello</span>
|
194
|
+
<span class="pre">World</span></tt>.</p>
|
195
|
+
<p>Token is used as search key. You can find indexed documents only by
|
196
|
+
tokens that are extracted by used tokenize method. For example, you
|
197
|
+
can find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> by <tt class="docutils literal"><span class="pre">ll</span></tt> with bigram tokenize method but you
|
198
|
+
can't find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> by <tt class="docutils literal"><span class="pre">ll</span></tt> with white-space-separate tokenize
|
199
|
+
method. Because white-space-separate tokenize method doesn't extract
|
200
|
+
<tt class="docutils literal"><span class="pre">ll</span></tt> token. It just extracts <tt class="docutils literal"><span class="pre">Hello</span></tt> and <tt class="docutils literal"><span class="pre">World</span></tt> tokens.</p>
|
201
|
+
<p>In general, tokenize method that generates small tokens increases
|
202
|
+
recall but decreases precision. Tokenize method that generates large
|
203
|
+
tokens increases precision but decreases recall.</p>
|
204
|
+
<p>For example, we can find <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> and <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> by <tt class="docutils literal"><span class="pre">or</span></tt> with
|
205
|
+
bigram tokenize method. <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is a noise for people who
|
206
|
+
wants to search "logical and". It means that precision is
|
207
|
+
decreased. But recall is increased.</p>
|
208
|
+
<p>We can find only <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> by <tt class="docutils literal"><span class="pre">or</span></tt> with white-space-separate
|
209
|
+
tokenize method. Because <tt class="docutils literal"><span class="pre">World</span></tt> is tokenized to one token <tt class="docutils literal"><span class="pre">World</span></tt>
|
210
|
+
with white-space-separate tokenize method. It means that precision is
|
211
|
+
increased for people who wants to search "logical and". But recall is
|
212
|
+
decreased because <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> that contains <tt class="docutils literal"><span class="pre">or</span></tt> isn't found.</p>
|
213
|
+
</div>
|
214
|
+
<div class="section" id="built-in-tokenizsers">
|
215
|
+
<h2>7.8.3. Built-in tokenizsers<a class="headerlink" href="#built-in-tokenizsers" title="Permalink to this headline">¶</a></h2>
|
216
|
+
<p>Here is a list of built-in tokenizers:</p>
|
217
|
+
<blockquote>
|
218
|
+
<div><ul class="simple">
|
219
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigram</span></tt></li>
|
220
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></li>
|
221
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></li>
|
222
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></li>
|
223
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></li>
|
224
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></li>
|
225
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlpha</span></tt></li>
|
226
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlphaDigit</span></tt></li>
|
227
|
+
<li><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></li>
|
228
|
+
<li><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></li>
|
229
|
+
<li><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></li>
|
230
|
+
<li><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></li>
|
231
|
+
<li><tt class="docutils literal"><span class="pre">TokenMecab</span></tt></li>
|
232
|
+
<li><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></li>
|
90
233
|
</ul>
|
234
|
+
</div></blockquote>
|
235
|
+
<div class="section" id="tokenbigram">
|
236
|
+
<span id="token-bigram"></span><h3>7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt><a class="headerlink" href="#tokenbigram" title="Permalink to this headline">¶</a></h3>
|
237
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> is a bigram based tokenizer. It's recommended to use
|
238
|
+
this tokenizer for most cases.</p>
|
239
|
+
<p>Bigram tokenize method tokenizes a text to two adjacent characters
|
240
|
+
tokens. For example, <tt class="docutils literal"><span class="pre">Hello</span></tt> is tokenized to the following tokens:</p>
|
241
|
+
<blockquote>
|
242
|
+
<div><ul class="simple">
|
243
|
+
<li><tt class="docutils literal"><span class="pre">He</span></tt></li>
|
244
|
+
<li><tt class="docutils literal"><span class="pre">el</span></tt></li>
|
245
|
+
<li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
|
246
|
+
<li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
|
247
|
+
</ul>
|
248
|
+
</div></blockquote>
|
249
|
+
<p>Bigram tokenize method is good for recall because you can find all
|
250
|
+
texts by query consists of two or more characters.</p>
|
251
|
+
<p>In general, you can't find all texts by query consists of one
|
252
|
+
character because one character token doesn't exist. But you can find
|
253
|
+
all texts by query consists of one character in Groonga. Because
|
254
|
+
Groonga find tokens that start with query by predictive search. For
|
255
|
+
example, Groonga can find <tt class="docutils literal"><span class="pre">ll</span></tt> and <tt class="docutils literal"><span class="pre">lo</span></tt> tokens by <tt class="docutils literal"><span class="pre">l</span></tt> query.</p>
|
256
|
+
<p>Bigram tokenize method isn't good for precision because you can find
|
257
|
+
texts that includes query in word. For example, you can find <tt class="docutils literal"><span class="pre">world</span></tt>
|
258
|
+
by <tt class="docutils literal"><span class="pre">or</span></tt>. This is more sensitive for ASCII only languages rather than
|
259
|
+
non-ASCII languages. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> has solution for this problem
|
260
|
+
described in the bellow.</p>
|
261
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> behavior is different when it's worked with any
|
262
|
+
<a class="reference internal" href="normalizers.html"><em>Normalizers</em></a>.</p>
|
263
|
+
<p>If no normalizer is used, <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses pure bigram (all tokens
|
264
|
+
except the last token have two characters) tokenize method:</p>
|
265
|
+
<p>Execution example:</p>
|
266
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello World"
|
267
|
+
# [
|
268
|
+
# [
|
269
|
+
# 0,
|
270
|
+
# 1337566253.89858,
|
271
|
+
# 0.000355720520019531
|
272
|
+
# ],
|
273
|
+
# [
|
274
|
+
# {
|
275
|
+
# "position": 0,
|
276
|
+
# "value": "He"
|
277
|
+
# },
|
278
|
+
# {
|
279
|
+
# "position": 1,
|
280
|
+
# "value": "el"
|
281
|
+
# },
|
282
|
+
# {
|
283
|
+
# "position": 2,
|
284
|
+
# "value": "ll"
|
285
|
+
# },
|
286
|
+
# {
|
287
|
+
# "position": 3,
|
288
|
+
# "value": "lo"
|
289
|
+
# },
|
290
|
+
# {
|
291
|
+
# "position": 4,
|
292
|
+
# "value": "o "
|
293
|
+
# },
|
294
|
+
# {
|
295
|
+
# "position": 5,
|
296
|
+
# "value": " W"
|
297
|
+
# },
|
298
|
+
# {
|
299
|
+
# "position": 6,
|
300
|
+
# "value": "Wo"
|
301
|
+
# },
|
302
|
+
# {
|
303
|
+
# "position": 7,
|
304
|
+
# "value": "or"
|
305
|
+
# },
|
306
|
+
# {
|
307
|
+
# "position": 8,
|
308
|
+
# "value": "rl"
|
309
|
+
# },
|
310
|
+
# {
|
311
|
+
# "position": 9,
|
312
|
+
# "value": "ld"
|
313
|
+
# },
|
314
|
+
# {
|
315
|
+
# "position": 10,
|
316
|
+
# "value": "d"
|
317
|
+
# }
|
318
|
+
# ]
|
319
|
+
# ]
|
320
|
+
</pre></div>
|
321
|
+
</div>
|
322
|
+
<p>If normalizer is used, <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses white-space-separate like
|
323
|
+
tokenize method for ASCII characters. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses bigram
|
324
|
+
tokenize method for non-ASCII characters.</p>
|
325
|
+
<p>You may be confused with this combined behavior. But it's reasonable
|
326
|
+
for most use cases such as English text (only ASCII characters) and
|
327
|
+
Japanese text (ASCII and non-ASCII characters are mixed).</p>
|
328
|
+
<p>Most languages consists of only ASCII characters use white-space for
|
329
|
+
word separator. White-space-separate tokenize method is suitable for
|
330
|
+
the case.</p>
|
331
|
+
<p>Languages consists of non-ASCII characters don't use white-space for
|
332
|
+
word separator. Bigram tokenize method is suitable for the case.</p>
|
333
|
+
<p>Mixed tokenize method is suitable for mixed language case.</p>
|
334
|
+
<p>If you want to use bigram tokenize method for ASCII character, see
|
335
|
+
<tt class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></tt> type tokenizers such as
|
336
|
+
<a class="reference internal" href="#token-bigram-split-symbol-alpha"><em>TokenBigramSplitSymbolAlpha</em></a>.</p>
|
337
|
+
<p>Let's confirm <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> behavior by example.</p>
|
338
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses one or more white-spaces as token delimiter for
|
339
|
+
ASCII characters:</p>
|
340
|
+
<p>Execution example:</p>
|
341
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello World" NormalizerAuto
|
342
|
+
# [
|
343
|
+
# [
|
344
|
+
# 0,
|
345
|
+
# 1337566253.89858,
|
346
|
+
# 0.000355720520019531
|
347
|
+
# ],
|
348
|
+
# [
|
349
|
+
# {
|
350
|
+
# "position": 0,
|
351
|
+
# "value": "hello"
|
352
|
+
# },
|
353
|
+
# {
|
354
|
+
# "position": 1,
|
355
|
+
# "value": "world"
|
356
|
+
# }
|
357
|
+
# ]
|
358
|
+
# ]
|
359
|
+
</pre></div>
|
360
|
+
</div>
|
361
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses character type change as token delimiter for
|
362
|
+
ASCII characters. Character type is one of them:</p>
|
363
|
+
<blockquote>
|
364
|
+
<div><ul class="simple">
|
365
|
+
<li>Alphabet</li>
|
366
|
+
<li>Digit</li>
|
367
|
+
<li>Symbol (such as <tt class="docutils literal"><span class="pre">(</span></tt>, <tt class="docutils literal"><span class="pre">)</span></tt> and <tt class="docutils literal"><span class="pre">!</span></tt>)</li>
|
368
|
+
<li>Hiragana</li>
|
369
|
+
<li>Katakana</li>
|
370
|
+
<li>Kanji</li>
|
371
|
+
<li>Others</li>
|
372
|
+
</ul>
|
373
|
+
</div></blockquote>
|
374
|
+
<p>The following example shows two token delimiters:</p>
|
375
|
+
<blockquote>
|
376
|
+
<div><ul class="simple">
|
377
|
+
<li>at between <tt class="docutils literal"><span class="pre">100</span></tt> (digits) and <tt class="docutils literal"><span class="pre">cents</span></tt> (alphabets)</li>
|
378
|
+
<li>at between <tt class="docutils literal"><span class="pre">cents</span></tt> (alphabets) and <tt class="docutils literal"><span class="pre">!!!</span></tt> (symbols)</li>
|
379
|
+
</ul>
|
380
|
+
</div></blockquote>
|
381
|
+
<p>Execution example:</p>
|
382
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "100cents!!!" NormalizerAuto
|
383
|
+
# [
|
384
|
+
# [
|
385
|
+
# 0,
|
386
|
+
# 1337566253.89858,
|
387
|
+
# 0.000355720520019531
|
388
|
+
# ],
|
389
|
+
# [
|
390
|
+
# {
|
391
|
+
# "position": 0,
|
392
|
+
# "value": "100"
|
393
|
+
# },
|
394
|
+
# {
|
395
|
+
# "position": 1,
|
396
|
+
# "value": "cents"
|
397
|
+
# },
|
398
|
+
# {
|
399
|
+
# "position": 2,
|
400
|
+
# "value": "!!!"
|
401
|
+
# }
|
402
|
+
# ]
|
403
|
+
# ]
|
404
|
+
</pre></div>
|
405
|
+
</div>
|
406
|
+
<p>Here is an example that <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> uses bigram tokenize method
|
407
|
+
for non-ASCII characters.</p>
|
408
|
+
<p>Execution example:</p>
|
409
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "日本語の勉強" NormalizerAuto
|
410
|
+
# [
|
411
|
+
# [
|
412
|
+
# 0,
|
413
|
+
# 1337566253.89858,
|
414
|
+
# 0.000355720520019531
|
415
|
+
# ],
|
416
|
+
# [
|
417
|
+
# {
|
418
|
+
# "position": 0,
|
419
|
+
# "value": "日本"
|
420
|
+
# },
|
421
|
+
# {
|
422
|
+
# "position": 1,
|
423
|
+
# "value": "本語"
|
424
|
+
# },
|
425
|
+
# {
|
426
|
+
# "position": 2,
|
427
|
+
# "value": "語の"
|
428
|
+
# },
|
429
|
+
# {
|
430
|
+
# "position": 3,
|
431
|
+
# "value": "の勉"
|
432
|
+
# },
|
433
|
+
# {
|
434
|
+
# "position": 4,
|
435
|
+
# "value": "勉強"
|
436
|
+
# },
|
437
|
+
# {
|
438
|
+
# "position": 5,
|
439
|
+
# "value": "強"
|
440
|
+
# }
|
441
|
+
# ]
|
442
|
+
# ]
|
443
|
+
</pre></div>
|
444
|
+
</div>
|
445
|
+
</div>
|
446
|
+
<div class="section" id="tokenbigramsplitsymbol">
|
447
|
+
<span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramsplitsymbol" title="Permalink to this headline">¶</a></h3>
|
448
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
|
449
|
+
difference between them is symbol handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt>
|
450
|
+
tokenizes symbols by bigram tokenize method:</p>
|
451
|
+
<p>Execution example:</p>
|
452
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbol "100cents!!!" NormalizerAuto
|
453
|
+
# [
|
454
|
+
# [
|
455
|
+
# 0,
|
456
|
+
# 1337566253.89858,
|
457
|
+
# 0.000355720520019531
|
458
|
+
# ],
|
459
|
+
# [
|
460
|
+
# {
|
461
|
+
# "position": 0,
|
462
|
+
# "value": "100"
|
463
|
+
# },
|
464
|
+
# {
|
465
|
+
# "position": 1,
|
466
|
+
# "value": "cents"
|
467
|
+
# },
|
468
|
+
# {
|
469
|
+
# "position": 2,
|
470
|
+
# "value": "!!"
|
471
|
+
# },
|
472
|
+
# {
|
473
|
+
# "position": 3,
|
474
|
+
# "value": "!!"
|
475
|
+
# },
|
476
|
+
# {
|
477
|
+
# "position": 4,
|
478
|
+
# "value": "!"
|
479
|
+
# }
|
480
|
+
# ]
|
481
|
+
# ]
|
482
|
+
</pre></div>
|
483
|
+
</div>
|
484
|
+
</div>
|
485
|
+
<div class="section" id="tokenbigramsplitsymbolalpha">
|
486
|
+
<span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
|
487
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
|
488
|
+
difference between them is symbol and alphabet
|
489
|
+
handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> tokenizes symbols and
|
490
|
+
alphabets by bigram tokenize method:</p>
|
491
|
+
<p>Execution example:</p>
|
492
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlpha "100cents!!!" NormalizerAuto
|
493
|
+
# [
|
494
|
+
# [
|
495
|
+
# 0,
|
496
|
+
# 1337566253.89858,
|
497
|
+
# 0.000355720520019531
|
498
|
+
# ],
|
499
|
+
# [
|
500
|
+
# {
|
501
|
+
# "position": 0,
|
502
|
+
# "value": "100"
|
503
|
+
# },
|
504
|
+
# {
|
505
|
+
# "position": 1,
|
506
|
+
# "value": "ce"
|
507
|
+
# },
|
508
|
+
# {
|
509
|
+
# "position": 2,
|
510
|
+
# "value": "en"
|
511
|
+
# },
|
512
|
+
# {
|
513
|
+
# "position": 3,
|
514
|
+
# "value": "nt"
|
515
|
+
# },
|
516
|
+
# {
|
517
|
+
# "position": 4,
|
518
|
+
# "value": "ts"
|
519
|
+
# },
|
520
|
+
# {
|
521
|
+
# "position": 5,
|
522
|
+
# "value": "s!"
|
523
|
+
# },
|
524
|
+
# {
|
525
|
+
# "position": 6,
|
526
|
+
# "value": "!!"
|
527
|
+
# },
|
528
|
+
# {
|
529
|
+
# "position": 7,
|
530
|
+
# "value": "!!"
|
531
|
+
# },
|
532
|
+
# {
|
533
|
+
# "position": 8,
|
534
|
+
# "value": "!"
|
535
|
+
# }
|
536
|
+
# ]
|
537
|
+
# ]
|
538
|
+
</pre></div>
|
539
|
+
</div>
|
540
|
+
</div>
|
541
|
+
<div class="section" id="tokenbigramsplitsymbolalphadigit">
|
542
|
+
<span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
|
543
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> is similar to
|
544
|
+
<a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The difference between them is symbol, alphabet
|
545
|
+
and digit handling. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> tokenizes
|
546
|
+
symbols, alphabets and digits by bigram tokenize method. It means that
|
547
|
+
all characters are tokenized by bigram tokenize method:</p>
|
548
|
+
<p>Execution example:</p>
|
549
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlphaDigit "100cents!!!" NormalizerAuto
|
550
|
+
# [
|
551
|
+
# [
|
552
|
+
# 0,
|
553
|
+
# 1337566253.89858,
|
554
|
+
# 0.000355720520019531
|
555
|
+
# ],
|
556
|
+
# [
|
557
|
+
# {
|
558
|
+
# "position": 0,
|
559
|
+
# "value": "10"
|
560
|
+
# },
|
561
|
+
# {
|
562
|
+
# "position": 1,
|
563
|
+
# "value": "00"
|
564
|
+
# },
|
565
|
+
# {
|
566
|
+
# "position": 2,
|
567
|
+
# "value": "0c"
|
568
|
+
# },
|
569
|
+
# {
|
570
|
+
# "position": 3,
|
571
|
+
# "value": "ce"
|
572
|
+
# },
|
573
|
+
# {
|
574
|
+
# "position": 4,
|
575
|
+
# "value": "en"
|
576
|
+
# },
|
577
|
+
# {
|
578
|
+
# "position": 5,
|
579
|
+
# "value": "nt"
|
580
|
+
# },
|
581
|
+
# {
|
582
|
+
# "position": 6,
|
583
|
+
# "value": "ts"
|
584
|
+
# },
|
585
|
+
# {
|
586
|
+
# "position": 7,
|
587
|
+
# "value": "s!"
|
588
|
+
# },
|
589
|
+
# {
|
590
|
+
# "position": 8,
|
591
|
+
# "value": "!!"
|
592
|
+
# },
|
593
|
+
# {
|
594
|
+
# "position": 9,
|
595
|
+
# "value": "!!"
|
596
|
+
# },
|
597
|
+
# {
|
598
|
+
# "position": 10,
|
599
|
+
# "value": "!"
|
600
|
+
# }
|
601
|
+
# ]
|
602
|
+
# ]
|
603
|
+
</pre></div>
|
604
|
+
</div>
|
605
|
+
</div>
|
606
|
+
<div class="section" id="tokenbigramignoreblank">
|
607
|
+
<span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt><a class="headerlink" href="#tokenbigramignoreblank" title="Permalink to this headline">¶</a></h3>
|
608
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The
|
609
|
+
difference between them is blank handling. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt>
|
610
|
+
ignores white-spaces in continuous symbols and non-ASCII characters.</p>
|
611
|
+
<p>You can find difference of them by <tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
|
612
|
+
has symbols and non-ASCII characters.</p>
|
613
|
+
<p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
|
614
|
+
<p>Execution example:</p>
|
615
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
|
616
|
+
# [
|
617
|
+
# [
|
618
|
+
# 0,
|
619
|
+
# 1337566253.89858,
|
620
|
+
# 0.000355720520019531
|
621
|
+
# ],
|
622
|
+
# [
|
623
|
+
# {
|
624
|
+
# "position": 0,
|
625
|
+
# "value": "日"
|
626
|
+
# },
|
627
|
+
# {
|
628
|
+
# "position": 1,
|
629
|
+
# "value": "本"
|
630
|
+
# },
|
631
|
+
# {
|
632
|
+
# "position": 2,
|
633
|
+
# "value": "語"
|
634
|
+
# },
|
635
|
+
# {
|
636
|
+
# "position": 3,
|
637
|
+
# "value": "!"
|
638
|
+
# },
|
639
|
+
# {
|
640
|
+
# "position": 4,
|
641
|
+
# "value": "!"
|
642
|
+
# },
|
643
|
+
# {
|
644
|
+
# "position": 5,
|
645
|
+
# "value": "!"
|
646
|
+
# }
|
647
|
+
# ]
|
648
|
+
# ]
|
649
|
+
</pre></div>
|
650
|
+
</div>
|
651
|
+
<p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt>:</p>
|
652
|
+
<p>Execution example:</p>
|
653
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlank "日 本 語 ! ! !" NormalizerAuto
|
654
|
+
# [
|
655
|
+
# [
|
656
|
+
# 0,
|
657
|
+
# 1337566253.89858,
|
658
|
+
# 0.000355720520019531
|
659
|
+
# ],
|
660
|
+
# [
|
661
|
+
# {
|
662
|
+
# "position": 0,
|
663
|
+
# "value": "日本"
|
664
|
+
# },
|
665
|
+
# {
|
666
|
+
# "position": 1,
|
667
|
+
# "value": "本語"
|
668
|
+
# },
|
669
|
+
# {
|
670
|
+
# "position": 2,
|
671
|
+
# "value": "語"
|
672
|
+
# },
|
673
|
+
# {
|
674
|
+
# "position": 3,
|
675
|
+
# "value": "!!!"
|
676
|
+
# }
|
677
|
+
# ]
|
678
|
+
# ]
|
679
|
+
</pre></div>
|
680
|
+
</div>
|
681
|
+
</div>
|
682
|
+
<div class="section" id="tokenbigramignoreblanksplitsymbol">
|
683
|
+
<span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="Permalink to this headline">¶</a></h3>
|
684
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> is similar to
|
685
|
+
<a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
|
686
|
+
<blockquote>
|
687
|
+
<div><ul class="simple">
|
688
|
+
<li>Blank handling</li>
|
689
|
+
<li>Symbol handling</li>
|
690
|
+
</ul>
|
691
|
+
</div></blockquote>
|
692
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> ignores white-spaces in
|
693
|
+
continuous symbols and non-ASCII characters.</p>
|
694
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> tokenizes symbols by bigram
|
695
|
+
tokenize method.</p>
|
696
|
+
<p>You can find difference of them by <tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
|
697
|
+
has symbols and non-ASCII characters.</p>
|
698
|
+
<p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
|
699
|
+
<p>Execution example:</p>
|
700
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
|
701
|
+
# [
|
702
|
+
# [
|
703
|
+
# 0,
|
704
|
+
# 1337566253.89858,
|
705
|
+
# 0.000355720520019531
|
706
|
+
# ],
|
707
|
+
# [
|
708
|
+
# {
|
709
|
+
# "position": 0,
|
710
|
+
# "value": "日"
|
711
|
+
# },
|
712
|
+
# {
|
713
|
+
# "position": 1,
|
714
|
+
# "value": "本"
|
715
|
+
# },
|
716
|
+
# {
|
717
|
+
# "position": 2,
|
718
|
+
# "value": "語"
|
719
|
+
# },
|
720
|
+
# {
|
721
|
+
# "position": 3,
|
722
|
+
# "value": "!"
|
723
|
+
# },
|
724
|
+
# {
|
725
|
+
# "position": 4,
|
726
|
+
# "value": "!"
|
727
|
+
# },
|
728
|
+
# {
|
729
|
+
# "position": 5,
|
730
|
+
# "value": "!"
|
731
|
+
# }
|
732
|
+
# ]
|
733
|
+
# ]
|
734
|
+
</pre></div>
|
735
|
+
</div>
|
736
|
+
<p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt>:</p>
|
737
|
+
<p>Execution example:</p>
|
738
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbol "日 本 語 ! ! !" NormalizerAuto
|
739
|
+
# [
|
740
|
+
# [
|
741
|
+
# 0,
|
742
|
+
# 1337566253.89858,
|
743
|
+
# 0.000355720520019531
|
744
|
+
# ],
|
745
|
+
# [
|
746
|
+
# {
|
747
|
+
# "position": 0,
|
748
|
+
# "value": "日本"
|
749
|
+
# },
|
750
|
+
# {
|
751
|
+
# "position": 1,
|
752
|
+
# "value": "本語"
|
753
|
+
# },
|
754
|
+
# {
|
755
|
+
# "position": 2,
|
756
|
+
# "value": "語!"
|
757
|
+
# },
|
758
|
+
# {
|
759
|
+
# "position": 3,
|
760
|
+
# "value": "!!"
|
761
|
+
# },
|
762
|
+
# {
|
763
|
+
# "position": 4,
|
764
|
+
# "value": "!!"
|
765
|
+
# },
|
766
|
+
# {
|
767
|
+
# "position": 5,
|
768
|
+
# "value": "!"
|
769
|
+
# }
|
770
|
+
# ]
|
771
|
+
# ]
|
772
|
+
</pre></div>
|
773
|
+
</div>
|
774
|
+
</div>
|
775
|
+
<div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
|
776
|
+
<span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
|
777
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> is similar to
|
778
|
+
<a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
|
779
|
+
<blockquote>
|
780
|
+
<div><ul class="simple">
|
781
|
+
<li>Blank handling</li>
|
782
|
+
<li>Symbol and alphabet handling</li>
|
783
|
+
</ul>
|
784
|
+
</div></blockquote>
|
785
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> ignores white-spaces in
|
786
|
+
continuous symbols and non-ASCII characters.</p>
|
787
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> tokenizes symbols and
|
788
|
+
alphabets by bigram tokenize method.</p>
|
789
|
+
<p>You can find difference of them by <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> text because it
|
790
|
+
has symbols and non-ASCII characters with white spaces and alphabets.</p>
|
791
|
+
<p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
|
792
|
+
<p>Execution example:</p>
|
793
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello 日 本 語 ! ! !" NormalizerAuto
|
794
|
+
# [
|
795
|
+
# [
|
796
|
+
# 0,
|
797
|
+
# 1337566253.89858,
|
798
|
+
# 0.000355720520019531
|
799
|
+
# ],
|
800
|
+
# [
|
801
|
+
# {
|
802
|
+
# "position": 0,
|
803
|
+
# "value": "hello"
|
804
|
+
# },
|
805
|
+
# {
|
806
|
+
# "position": 1,
|
807
|
+
# "value": "日"
|
808
|
+
# },
|
809
|
+
# {
|
810
|
+
# "position": 2,
|
811
|
+
# "value": "本"
|
812
|
+
# },
|
813
|
+
# {
|
814
|
+
# "position": 3,
|
815
|
+
# "value": "語"
|
816
|
+
# },
|
817
|
+
# {
|
818
|
+
# "position": 4,
|
819
|
+
# "value": "!"
|
820
|
+
# },
|
821
|
+
# {
|
822
|
+
# "position": 5,
|
823
|
+
# "value": "!"
|
824
|
+
# },
|
825
|
+
# {
|
826
|
+
# "position": 6,
|
827
|
+
# "value": "!"
|
828
|
+
# }
|
829
|
+
# ]
|
830
|
+
# ]
|
831
|
+
</pre></div>
|
832
|
+
</div>
|
833
|
+
<p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt>:</p>
|
834
|
+
<p>Execution example:</p>
|
835
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha "Hello 日 本 語 ! ! !" NormalizerAuto
|
836
|
+
# [
|
837
|
+
# [
|
838
|
+
# 0,
|
839
|
+
# 1337566253.89858,
|
840
|
+
# 0.000355720520019531
|
841
|
+
# ],
|
842
|
+
# [
|
843
|
+
# {
|
844
|
+
# "position": 0,
|
845
|
+
# "value": "he"
|
846
|
+
# },
|
847
|
+
# {
|
848
|
+
# "position": 1,
|
849
|
+
# "value": "el"
|
850
|
+
# },
|
851
|
+
# {
|
852
|
+
# "position": 2,
|
853
|
+
# "value": "ll"
|
854
|
+
# },
|
855
|
+
# {
|
856
|
+
# "position": 3,
|
857
|
+
# "value": "lo"
|
858
|
+
# },
|
859
|
+
# {
|
860
|
+
# "position": 4,
|
861
|
+
# "value": "o日"
|
862
|
+
# },
|
863
|
+
# {
|
864
|
+
# "position": 5,
|
865
|
+
# "value": "日本"
|
866
|
+
# },
|
867
|
+
# {
|
868
|
+
# "position": 6,
|
869
|
+
# "value": "本語"
|
870
|
+
# },
|
871
|
+
# {
|
872
|
+
# "position": 7,
|
873
|
+
# "value": "語!"
|
874
|
+
# },
|
875
|
+
# {
|
876
|
+
# "position": 8,
|
877
|
+
# "value": "!!"
|
878
|
+
# },
|
879
|
+
# {
|
880
|
+
# "position": 9,
|
881
|
+
# "value": "!!"
|
882
|
+
# },
|
883
|
+
# {
|
884
|
+
# "position": 10,
|
885
|
+
# "value": "!"
|
886
|
+
# }
|
887
|
+
# ]
|
888
|
+
# ]
|
889
|
+
</pre></div>
|
890
|
+
</div>
|
891
|
+
</div>
|
892
|
+
<div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
|
893
|
+
<span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
|
894
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> is similar to
|
895
|
+
<a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences between them are the followings:</p>
|
896
|
+
<blockquote>
|
897
|
+
<div><ul class="simple">
|
898
|
+
<li>Blank handling</li>
|
899
|
+
<li>Symbol, alphabet and digit handling</li>
|
900
|
+
</ul>
|
901
|
+
</div></blockquote>
|
902
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> ignores white-spaces
|
903
|
+
in continuous symbols and non-ASCII characters.</p>
|
904
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> tokenizes symbols,
|
905
|
+
alphabets and digits by bigram tokenize method. It means that all
|
906
|
+
characters are tokenized by bigram tokenize method.</p>
|
907
|
+
<p>You can find difference of them by <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></tt> text
|
908
|
+
because it has symbols and non-ASCII characters with white spaces,
|
909
|
+
alphabets and digits.</p>
|
910
|
+
<p>Here is a result by <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> :</p>
|
911
|
+
<p>Execution example:</p>
|
912
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello 日 本 語 ! ! ! 777" NormalizerAuto
|
913
|
+
# [
|
914
|
+
# [
|
915
|
+
# 0,
|
916
|
+
# 1337566253.89858,
|
917
|
+
# 0.000355720520019531
|
918
|
+
# ],
|
919
|
+
# [
|
920
|
+
# {
|
921
|
+
# "position": 0,
|
922
|
+
# "value": "hello"
|
923
|
+
# },
|
924
|
+
# {
|
925
|
+
# "position": 1,
|
926
|
+
# "value": "日"
|
927
|
+
# },
|
928
|
+
# {
|
929
|
+
# "position": 2,
|
930
|
+
# "value": "本"
|
931
|
+
# },
|
932
|
+
# {
|
933
|
+
# "position": 3,
|
934
|
+
# "value": "語"
|
935
|
+
# },
|
936
|
+
# {
|
937
|
+
# "position": 4,
|
938
|
+
# "value": "!"
|
939
|
+
# },
|
940
|
+
# {
|
941
|
+
# "position": 5,
|
942
|
+
# "value": "!"
|
943
|
+
# },
|
944
|
+
# {
|
945
|
+
# "position": 6,
|
946
|
+
# "value": "!"
|
947
|
+
# },
|
948
|
+
# {
|
949
|
+
# "position": 7,
|
950
|
+
# "value": "777"
|
951
|
+
# }
|
952
|
+
# ]
|
953
|
+
# ]
|
954
|
+
</pre></div>
|
955
|
+
</div>
|
956
|
+
<p>Here is a result by <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt>:</p>
|
957
|
+
<p>Execution example:</p>
|
958
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit "Hello 日 本 語 ! ! ! 777" NormalizerAuto
|
959
|
+
# [
|
960
|
+
# [
|
961
|
+
# 0,
|
962
|
+
# 1337566253.89858,
|
963
|
+
# 0.000355720520019531
|
964
|
+
# ],
|
965
|
+
# [
|
966
|
+
# {
|
967
|
+
# "position": 0,
|
968
|
+
# "value": "he"
|
969
|
+
# },
|
970
|
+
# {
|
971
|
+
# "position": 1,
|
972
|
+
# "value": "el"
|
973
|
+
# },
|
974
|
+
# {
|
975
|
+
# "position": 2,
|
976
|
+
# "value": "ll"
|
977
|
+
# },
|
978
|
+
# {
|
979
|
+
# "position": 3,
|
980
|
+
# "value": "lo"
|
981
|
+
# },
|
982
|
+
# {
|
983
|
+
# "position": 4,
|
984
|
+
# "value": "o日"
|
985
|
+
# },
|
986
|
+
# {
|
987
|
+
# "position": 5,
|
988
|
+
# "value": "日本"
|
989
|
+
# },
|
990
|
+
# {
|
991
|
+
# "position": 6,
|
992
|
+
# "value": "本語"
|
993
|
+
# },
|
994
|
+
# {
|
995
|
+
# "position": 7,
|
996
|
+
# "value": "語!"
|
997
|
+
# },
|
998
|
+
# {
|
999
|
+
# "position": 8,
|
1000
|
+
# "value": "!!"
|
1001
|
+
# },
|
1002
|
+
# {
|
1003
|
+
# "position": 9,
|
1004
|
+
# "value": "!!"
|
1005
|
+
# },
|
1006
|
+
# {
|
1007
|
+
# "position": 10,
|
1008
|
+
# "value": "!7"
|
1009
|
+
# },
|
1010
|
+
# {
|
1011
|
+
# "position": 11,
|
1012
|
+
# "value": "77"
|
1013
|
+
# },
|
1014
|
+
# {
|
1015
|
+
# "position": 12,
|
1016
|
+
# "value": "77"
|
1017
|
+
# },
|
1018
|
+
# {
|
1019
|
+
# "position": 13,
|
1020
|
+
# "value": "7"
|
1021
|
+
# }
|
1022
|
+
# ]
|
1023
|
+
# ]
|
1024
|
+
</pre></div>
|
1025
|
+
</div>
|
1026
|
+
</div>
|
1027
|
+
<div class="section" id="tokenunigram">
|
1028
|
+
<span id="token-unigram"></span><h3>7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt><a class="headerlink" href="#tokenunigram" title="Permalink to this headline">¶</a></h3>
|
1029
|
+
<p><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences
|
1030
|
+
between them is token unit. <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> uses 2 characters per
|
1031
|
+
token. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> uses 1 character per token.</p>
|
1032
|
+
<p>Execution example:</p>
|
1033
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenUnigram "100cents!!!" NormalizerAuto
|
1034
|
+
# [
|
1035
|
+
# [
|
1036
|
+
# 0,
|
1037
|
+
# 1337566253.89858,
|
1038
|
+
# 0.000355720520019531
|
1039
|
+
# ],
|
1040
|
+
# [
|
1041
|
+
# {
|
1042
|
+
# "position": 0,
|
1043
|
+
# "value": "100"
|
1044
|
+
# },
|
1045
|
+
# {
|
1046
|
+
# "position": 1,
|
1047
|
+
# "value": "cents"
|
1048
|
+
# },
|
1049
|
+
# {
|
1050
|
+
# "position": 2,
|
1051
|
+
# "value": "!!!"
|
1052
|
+
# }
|
1053
|
+
# ]
|
1054
|
+
# ]
|
1055
|
+
</pre></div>
|
1056
|
+
</div>
|
1057
|
+
</div>
|
1058
|
+
<div class="section" id="tokentrigram">
|
1059
|
+
<span id="token-trigram"></span><h3>7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt><a class="headerlink" href="#tokentrigram" title="Permalink to this headline">¶</a></h3>
|
1060
|
+
<p><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> is similar to <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>. The differences
|
1061
|
+
between them is token unit. <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> uses 2 characters per
|
1062
|
+
token. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> uses 3 characters per token.</p>
|
1063
|
+
<p>Execution example:</p>
|
1064
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenTrigram "10000cents!!!!!" NormalizerAuto
|
1065
|
+
# [
|
1066
|
+
# [
|
1067
|
+
# 0,
|
1068
|
+
# 1337566253.89858,
|
1069
|
+
# 0.000355720520019531
|
1070
|
+
# ],
|
1071
|
+
# [
|
1072
|
+
# {
|
1073
|
+
# "position": 0,
|
1074
|
+
# "value": "10000"
|
1075
|
+
# },
|
1076
|
+
# {
|
1077
|
+
# "position": 1,
|
1078
|
+
# "value": "cents"
|
1079
|
+
# },
|
1080
|
+
# {
|
1081
|
+
# "position": 2,
|
1082
|
+
# "value": "!!!!!"
|
1083
|
+
# }
|
1084
|
+
# ]
|
1085
|
+
# ]
|
1086
|
+
</pre></div>
|
1087
|
+
</div>
|
1088
|
+
</div>
|
1089
|
+
<div class="section" id="tokendelimit">
|
1090
|
+
<span id="token-delimit"></span><h3>7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt><a class="headerlink" href="#tokendelimit" title="Permalink to this headline">¶</a></h3>
|
1091
|
+
<p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> extracts token by splitting one or more space
|
1092
|
+
characters (<tt class="docutils literal"><span class="pre">U+0020</span></tt>). For example, <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> is tokenized to
|
1093
|
+
<tt class="docutils literal"><span class="pre">Hello</span></tt> and <tt class="docutils literal"><span class="pre">World</span></tt>.</p>
|
1094
|
+
<p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> is suitable for tag text. You can extract <tt class="docutils literal"><span class="pre">groonga</span></tt>
|
1095
|
+
and <tt class="docutils literal"><span class="pre">full-text-search</span></tt> and <tt class="docutils literal"><span class="pre">http</span></tt> as tags from <tt class="docutils literal"><span class="pre">groonga</span>
|
1096
|
+
<span class="pre">full-text-search</span> <span class="pre">http</span></tt>.</p>
|
1097
|
+
<p>Here is an example of <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt>:</p>
|
1098
|
+
<p>Execution example:</p>
|
1099
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimit "Groonga full-text-search HTTP" NormalizerAuto
|
1100
|
+
# [
|
1101
|
+
# [
|
1102
|
+
# 0,
|
1103
|
+
# 1337566253.89858,
|
1104
|
+
# 0.000355720520019531
|
1105
|
+
# ],
|
1106
|
+
# [
|
1107
|
+
# {
|
1108
|
+
# "position": 0,
|
1109
|
+
# "value": "groonga"
|
1110
|
+
# },
|
1111
|
+
# {
|
1112
|
+
# "position": 1,
|
1113
|
+
# "value": "full-text-search"
|
1114
|
+
# },
|
1115
|
+
# {
|
1116
|
+
# "position": 2,
|
1117
|
+
# "value": "http"
|
1118
|
+
# }
|
1119
|
+
# ]
|
1120
|
+
# ]
|
1121
|
+
</pre></div>
|
1122
|
+
</div>
|
1123
|
+
</div>
|
1124
|
+
<div class="section" id="tokendelimitnull">
|
1125
|
+
<span id="token-delimit-null"></span><h3>7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt><a class="headerlink" href="#tokendelimitnull" title="Permalink to this headline">¶</a></h3>
|
1126
|
+
<p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> is similar to <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a>. The
|
1127
|
+
difference between them is separator character. <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a>
|
1128
|
+
uses space character (<tt class="docutils literal"><span class="pre">U+0020</span></tt>) but <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> uses NUL
|
1129
|
+
character (<tt class="docutils literal"><span class="pre">U+0000</span></tt>).</p>
|
1130
|
+
<p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> is also suitable for tag text.</p>
|
1131
|
+
<p>Here is an example of <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt>:</p>
|
1132
|
+
<p>Execution example:</p>
|
1133
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimitNull "Groonga\u0000full-text-search\u0000HTTP" NormalizerAuto
|
1134
|
+
# [
|
1135
|
+
# [
|
1136
|
+
# 0,
|
1137
|
+
# 1337566253.89858,
|
1138
|
+
# 0.000355720520019531
|
1139
|
+
# ],
|
1140
|
+
# [
|
1141
|
+
# {
|
1142
|
+
# "position": 0,
|
1143
|
+
# "value": "groongau0000full-text-searchu0000http"
|
1144
|
+
# }
|
1145
|
+
# ]
|
1146
|
+
# ]
|
1147
|
+
</pre></div>
|
1148
|
+
</div>
|
1149
|
+
</div>
|
1150
|
+
<div class="section" id="tokenmecab">
|
1151
|
+
<span id="token-mecab"></span><h3>7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt><a class="headerlink" href="#tokenmecab" title="Permalink to this headline">¶</a></h3>
|
1152
|
+
<p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> is a tokenizer based on <a class="reference external" href="http://mecab.sourceforge.net/">MeCab</a> part-of-speech and
|
1153
|
+
morphological analyzer.</p>
|
1154
|
+
<p>MeCab doesn't depend on Japanese. You can use MeCab for other
|
1155
|
+
languages by creating dictionary for the languages. You can use <a class="reference external" href="http://sourceforge.jp/projects/naist-jdic/">NAIST
|
1156
|
+
Japanese Dictionary</a>
|
1157
|
+
for Japanese.</p>
|
1158
|
+
<p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> is good for precision rather than recall. You can find
|
1159
|
+
<tt class="docutils literal"><span class="pre">東京都</span></tt> and <tt class="docutils literal"><span class="pre">京都</span></tt> texts by <tt class="docutils literal"><span class="pre">京都</span></tt> query with
|
1160
|
+
<a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> but <tt class="docutils literal"><span class="pre">東京都</span></tt> isn't expected. You can find only
|
1161
|
+
<tt class="docutils literal"><span class="pre">京都</span></tt> text by <tt class="docutils literal"><span class="pre">京都</span></tt> query with <tt class="docutils literal"><span class="pre">TokenMecab</span></tt>.</p>
|
1162
|
+
<p>If you want to support neologisms, you need to keep updating your
|
1163
|
+
MeCab dictionary. It needs maintain cost. (<a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> doesn't
|
1164
|
+
require dictionary maintenance because <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> doesn't use
|
1165
|
+
dictionary.) <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> may help you.</p>
|
1166
|
+
<p>Here is an example of <tt class="docutils literal"><span class="pre">TokenMeCab</span></tt>. <tt class="docutils literal"><span class="pre">東京都</span></tt> is tokenized to <tt class="docutils literal"><span class="pre">東京</span></tt>
|
1167
|
+
and <tt class="docutils literal"><span class="pre">都</span></tt>. They don't include <tt class="docutils literal"><span class="pre">京都</span></tt>:</p>
|
1168
|
+
<p>Execution example:</p>
|
1169
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenMecab "東京都"
|
1170
|
+
# [
|
1171
|
+
# [
|
1172
|
+
# 0,
|
1173
|
+
# 1337566253.89858,
|
1174
|
+
# 0.000355720520019531
|
1175
|
+
# ],
|
1176
|
+
# [
|
1177
|
+
# {
|
1178
|
+
# "position": 0,
|
1179
|
+
# "value": "東京"
|
1180
|
+
# },
|
1181
|
+
# {
|
1182
|
+
# "position": 1,
|
1183
|
+
# "value": "都"
|
1184
|
+
# }
|
1185
|
+
# ]
|
1186
|
+
# ]
|
1187
|
+
</pre></div>
|
1188
|
+
</div>
|
1189
|
+
</div>
|
1190
|
+
<div class="section" id="tokenregexp">
|
1191
|
+
<span id="token-regexp"></span><h3>7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt><a class="headerlink" href="#tokenregexp" title="Permalink to this headline">¶</a></h3>
|
1192
|
+
<div class="versionadded">
|
1193
|
+
<p><span class="versionmodified">New in version 5.0.1.</span></p>
|
1194
|
+
</div>
|
1195
|
+
<div class="admonition caution">
|
1196
|
+
<p class="first admonition-title">Caution</p>
|
1197
|
+
<p class="last">This tokenizer is experimental. Specification may be changed.</p>
|
1198
|
+
</div>
|
1199
|
+
<div class="admonition caution">
|
1200
|
+
<p class="first admonition-title">Caution</p>
|
1201
|
+
<p class="last">This tokenizer can be used only with UTF-8. You can't use this
|
1202
|
+
tokenizer with EUC-JP, Shift_JIS and so on.</p>
|
1203
|
+
</div>
|
1204
|
+
<p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> is a tokenizer for supporting regular expression
|
1205
|
+
search by index.</p>
|
1206
|
+
<p>In general, regular expression search is evaluated as sequential
|
1207
|
+
search. But the following cases can be evaluated as index search:</p>
|
1208
|
+
<blockquote>
|
1209
|
+
<div><ul class="simple">
|
1210
|
+
<li>Literal only case such as <tt class="docutils literal"><span class="pre">hello</span></tt></li>
|
1211
|
+
<li>The beginning of text and literal case such as <tt class="docutils literal"><span class="pre">\A/home/alice</span></tt></li>
|
1212
|
+
<li>The end of text and literal case such as <tt class="docutils literal"><span class="pre">\.txt\z</span></tt></li>
|
1213
|
+
</ul>
|
1214
|
+
</div></blockquote>
|
1215
|
+
<p>In most cases, index search is faster than sequential search.</p>
|
1216
|
+
<p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> is based on bigram tokenize method. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt>
|
1217
|
+
adds the beginning of text mark (<tt class="docutils literal"><span class="pre">U+FFEF</span></tt>) at the begging of text
|
1218
|
+
and the end of text mark (<tt class="docutils literal"><span class="pre">U+FFF0</span></tt>) to the end of text when you
|
1219
|
+
index text:</p>
|
1220
|
+
<p>Execution example:</p>
|
1221
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp "/home/alice/test.txt" NormalizerAuto --mode ADD
|
1222
|
+
# [
|
1223
|
+
# [
|
1224
|
+
# 0,
|
1225
|
+
# 1337566253.89858,
|
1226
|
+
# 0.000355720520019531
|
1227
|
+
# ],
|
1228
|
+
# [
|
1229
|
+
# {
|
1230
|
+
# "position": 0,
|
1231
|
+
# "value": ""
|
1232
|
+
# },
|
1233
|
+
# {
|
1234
|
+
# "position": 1,
|
1235
|
+
# "value": "/h"
|
1236
|
+
# },
|
1237
|
+
# {
|
1238
|
+
# "position": 2,
|
1239
|
+
# "value": "ho"
|
1240
|
+
# },
|
1241
|
+
# {
|
1242
|
+
# "position": 3,
|
1243
|
+
# "value": "om"
|
1244
|
+
# },
|
1245
|
+
# {
|
1246
|
+
# "position": 4,
|
1247
|
+
# "value": "me"
|
1248
|
+
# },
|
1249
|
+
# {
|
1250
|
+
# "position": 5,
|
1251
|
+
# "value": "e/"
|
1252
|
+
# },
|
1253
|
+
# {
|
1254
|
+
# "position": 6,
|
1255
|
+
# "value": "/a"
|
1256
|
+
# },
|
1257
|
+
# {
|
1258
|
+
# "position": 7,
|
1259
|
+
# "value": "al"
|
1260
|
+
# },
|
1261
|
+
# {
|
1262
|
+
# "position": 8,
|
1263
|
+
# "value": "li"
|
1264
|
+
# },
|
1265
|
+
# {
|
1266
|
+
# "position": 9,
|
1267
|
+
# "value": "ic"
|
1268
|
+
# },
|
1269
|
+
# {
|
1270
|
+
# "position": 10,
|
1271
|
+
# "value": "ce"
|
1272
|
+
# },
|
1273
|
+
# {
|
1274
|
+
# "position": 11,
|
1275
|
+
# "value": "e/"
|
1276
|
+
# },
|
1277
|
+
# {
|
1278
|
+
# "position": 12,
|
1279
|
+
# "value": "/t"
|
1280
|
+
# },
|
1281
|
+
# {
|
1282
|
+
# "position": 13,
|
1283
|
+
# "value": "te"
|
1284
|
+
# },
|
1285
|
+
# {
|
1286
|
+
# "position": 14,
|
1287
|
+
# "value": "es"
|
1288
|
+
# },
|
1289
|
+
# {
|
1290
|
+
# "position": 15,
|
1291
|
+
# "value": "st"
|
1292
|
+
# },
|
1293
|
+
# {
|
1294
|
+
# "position": 16,
|
1295
|
+
# "value": "t."
|
1296
|
+
# },
|
1297
|
+
# {
|
1298
|
+
# "position": 17,
|
1299
|
+
# "value": ".t"
|
1300
|
+
# },
|
1301
|
+
# {
|
1302
|
+
# "position": 18,
|
1303
|
+
# "value": "tx"
|
1304
|
+
# },
|
1305
|
+
# {
|
1306
|
+
# "position": 19,
|
1307
|
+
# "value": "xt"
|
1308
|
+
# },
|
1309
|
+
# {
|
1310
|
+
# "position": 20,
|
1311
|
+
# "value": "t"
|
1312
|
+
# },
|
1313
|
+
# {
|
1314
|
+
# "position": 21,
|
1315
|
+
# "value": ""
|
1316
|
+
# }
|
1317
|
+
# ]
|
1318
|
+
# ]
|
1319
|
+
</pre></div>
|
1320
|
+
</div>
|
1321
|
+
<p>The beginning of text mark is used for the beginning of text search by
|
1322
|
+
<tt class="docutils literal"><span class="pre">\A</span></tt>. If you use <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> for tokenizing query,
|
1323
|
+
<tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> adds the beginning of text mark (<tt class="docutils literal"><span class="pre">U+FFEF</span></tt>) as the
|
1324
|
+
first token. The beginning of text mark must be appeared at the first,
|
1325
|
+
you can get results of the beginning of text search.</p>
|
1326
|
+
<p>Execution example:</p>
|
1327
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp "\\A/home/alice/" NormalizerAuto --mode GET
|
1328
|
+
# [
|
1329
|
+
# [
|
1330
|
+
# 0,
|
1331
|
+
# 1337566253.89858,
|
1332
|
+
# 0.000355720520019531
|
1333
|
+
# ],
|
1334
|
+
# [
|
1335
|
+
# {
|
1336
|
+
# "position": 0,
|
1337
|
+
# "value": ""
|
1338
|
+
# },
|
1339
|
+
# {
|
1340
|
+
# "position": 1,
|
1341
|
+
# "value": "/h"
|
1342
|
+
# },
|
1343
|
+
# {
|
1344
|
+
# "position": 2,
|
1345
|
+
# "value": "ho"
|
1346
|
+
# },
|
1347
|
+
# {
|
1348
|
+
# "position": 3,
|
1349
|
+
# "value": "om"
|
1350
|
+
# },
|
1351
|
+
# {
|
1352
|
+
# "position": 4,
|
1353
|
+
# "value": "me"
|
1354
|
+
# },
|
1355
|
+
# {
|
1356
|
+
# "position": 5,
|
1357
|
+
# "value": "e/"
|
1358
|
+
# },
|
1359
|
+
# {
|
1360
|
+
# "position": 6,
|
1361
|
+
# "value": "/a"
|
1362
|
+
# },
|
1363
|
+
# {
|
1364
|
+
# "position": 7,
|
1365
|
+
# "value": "al"
|
1366
|
+
# },
|
1367
|
+
# {
|
1368
|
+
# "position": 8,
|
1369
|
+
# "value": "li"
|
1370
|
+
# },
|
1371
|
+
# {
|
1372
|
+
# "position": 9,
|
1373
|
+
# "value": "ic"
|
1374
|
+
# },
|
1375
|
+
# {
|
1376
|
+
# "position": 10,
|
1377
|
+
# "value": "ce"
|
1378
|
+
# },
|
1379
|
+
# {
|
1380
|
+
# "position": 11,
|
1381
|
+
# "value": "e/"
|
1382
|
+
# }
|
1383
|
+
# ]
|
1384
|
+
# ]
|
1385
|
+
</pre></div>
|
1386
|
+
</div>
|
1387
|
+
<p>The end of text mark is used for the end of text search by <tt class="docutils literal"><span class="pre">\z</span></tt>.
|
1388
|
+
If you use <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> for tokenizing query, <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> adds
|
1389
|
+
the end of text mark (<tt class="docutils literal"><span class="pre">U+FFF0</span></tt>) as the last token. The end of text
|
1390
|
+
mark must be appeared at the end, you can get results of the end of
|
1391
|
+
text search.</p>
|
1392
|
+
<p>Execution example:</p>
|
1393
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp "\\.txt\\z" NormalizerAuto --mode GET
|
1394
|
+
# [
|
1395
|
+
# [
|
1396
|
+
# 0,
|
1397
|
+
# 1337566253.89858,
|
1398
|
+
# 0.000355720520019531
|
1399
|
+
# ],
|
1400
|
+
# [
|
1401
|
+
# {
|
1402
|
+
# "position": 0,
|
1403
|
+
# "value": "\\."
|
1404
|
+
# },
|
1405
|
+
# {
|
1406
|
+
# "position": 1,
|
1407
|
+
# "value": ".t"
|
1408
|
+
# },
|
1409
|
+
# {
|
1410
|
+
# "position": 2,
|
1411
|
+
# "value": "tx"
|
1412
|
+
# },
|
1413
|
+
# {
|
1414
|
+
# "position": 3,
|
1415
|
+
# "value": "xt"
|
1416
|
+
# },
|
1417
|
+
# {
|
1418
|
+
# "position": 5,
|
1419
|
+
# "value": ""
|
1420
|
+
# }
|
1421
|
+
# ]
|
1422
|
+
# ]
|
1423
|
+
</pre></div>
|
1424
|
+
</div>
|
1425
|
+
</div>
|
1426
|
+
</div>
|
91
1427
|
</div>
|
92
1428
|
|
93
1429
|
|
94
1430
|
</div>
|
95
1431
|
</div>
|
96
1432
|
</div>
|
97
|
-
<div class="sphinxsidebar"
|
1433
|
+
<div class="sphinxsidebar">
|
98
1434
|
<div class="sphinxsidebarwrapper">
|
1435
|
+
<h3><a href="../index.html">Table Of Contents</a></h3>
|
1436
|
+
<ul>
|
1437
|
+
<li><a class="reference internal" href="#">7.8. Tokenizers</a><ul>
|
1438
|
+
<li><a class="reference internal" href="#summary">7.8.1. Summary</a></li>
|
1439
|
+
<li><a class="reference internal" href="#what-is-tokenize">7.8.2. What is "tokenize"?</a></li>
|
1440
|
+
<li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. Built-in tokenizsers</a><ul>
|
1441
|
+
<li><a class="reference internal" href="#tokenbigram">7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt></a></li>
|
1442
|
+
<li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></a></li>
|
1443
|
+
<li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></a></li>
|
1444
|
+
<li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></a></li>
|
1445
|
+
<li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></a></li>
|
1446
|
+
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></a></li>
|
1447
|
+
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt></a></li>
|
1448
|
+
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt></a></li>
|
1449
|
+
<li><a class="reference internal" href="#tokenunigram">7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></a></li>
|
1450
|
+
<li><a class="reference internal" href="#tokentrigram">7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></a></li>
|
1451
|
+
<li><a class="reference internal" href="#tokendelimit">7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></a></li>
|
1452
|
+
<li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></a></li>
|
1453
|
+
<li><a class="reference internal" href="#tokenmecab">7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt></a></li>
|
1454
|
+
<li><a class="reference internal" href="#tokenregexp">7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></a></li>
|
1455
|
+
</ul>
|
1456
|
+
</li>
|
1457
|
+
</ul>
|
1458
|
+
</li>
|
1459
|
+
</ul>
|
1460
|
+
|
99
1461
|
<h4>Previous topic</h4>
|
100
1462
|
<p class="topless"><a href="normalizers.html"
|
101
1463
|
title="previous chapter">7.7. Normalizers</a></p>
|
102
1464
|
<h4>Next topic</h4>
|
103
1465
|
<p class="topless"><a href="token_filters.html"
|
104
1466
|
title="next chapter">7.9. Token filters</a></p>
|
105
|
-
<
|
106
|
-
|
107
|
-
<
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
</div>
|
112
|
-
<div id="searchbox" style="display: none" role="search">
|
1467
|
+
<h3>This Page</h3>
|
1468
|
+
<ul class="this-page-menu">
|
1469
|
+
<li><a href="../_sources/reference/tokenizers.txt"
|
1470
|
+
rel="nofollow">Show Source</a></li>
|
1471
|
+
</ul>
|
1472
|
+
<div id="searchbox" style="display: none">
|
113
1473
|
<h3>Quick search</h3>
|
114
1474
|
<form class="search" action="../search.html" method="get">
|
115
1475
|
<input type="text" name="q" />
|
@@ -126,7 +1486,7 @@
|
|
126
1486
|
</div>
|
127
1487
|
<div class="clearer"></div>
|
128
1488
|
</div>
|
129
|
-
<div class="related"
|
1489
|
+
<div class="related">
|
130
1490
|
<h3>Navigation</h3>
|
131
1491
|
<ul>
|
132
1492
|
<li class="right" style="margin-right: 10px">
|
@@ -138,11 +1498,11 @@
|
|
138
1498
|
<li class="right" >
|
139
1499
|
<a href="normalizers.html" title="7.7. Normalizers"
|
140
1500
|
>previous</a> |</li>
|
141
|
-
<li><a href="../index.html">Groonga v5.0.
|
1501
|
+
<li><a href="../index.html">Groonga v5.0.1-42-g4d10df1 documentation</a> »</li>
|
142
1502
|
<li><a href="../reference.html" >7. Reference manual</a> »</li>
|
143
1503
|
</ul>
|
144
1504
|
</div>
|
145
|
-
<div class="footer"
|
1505
|
+
<div class="footer">
|
146
1506
|
© Copyright 2009-2015, Brazil, Inc.
|
147
1507
|
</div>
|
148
1508
|
</body>
|