rroonga 5.0.0-x86-mingw32 → 5.0.1-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.yardopts +1 -0
- data/Rakefile +1 -16
- data/example/bookmark.rb +1 -6
- data/example/index-html.rb +0 -1
- data/ext/groonga/extconf.rb +4 -7
- data/ext/groonga/rb-grn-array.c +1 -1
- data/ext/groonga/rb-grn-column.c +33 -67
- data/ext/groonga/rb-grn-context.c +5 -5
- data/ext/groonga/rb-grn-database.c +2 -2
- data/ext/groonga/rb-grn-double-array-trie.c +4 -2
- data/ext/groonga/rb-grn-encoding-support.c +7 -1
- data/ext/groonga/rb-grn-equal-operator.c +85 -0
- data/ext/groonga/rb-grn-exception.c +17 -0
- data/ext/groonga/rb-grn-expression.c +85 -43
- data/ext/groonga/rb-grn-greater-equal-operator.c +88 -0
- data/ext/groonga/rb-grn-greater-operator.c +85 -0
- data/ext/groonga/rb-grn-hash.c +1 -1
- data/ext/groonga/rb-grn-index-column.c +150 -11
- data/ext/groonga/rb-grn-less-equal-operator.c +88 -0
- data/ext/groonga/rb-grn-less-operator.c +85 -0
- data/ext/groonga/rb-grn-logger.c +5 -5
- data/ext/groonga/rb-grn-match-operator.c +86 -0
- data/ext/groonga/rb-grn-normalizer.c +8 -1
- data/ext/groonga/rb-grn-not-equal-operator.c +85 -0
- data/ext/groonga/rb-grn-object.c +170 -36
- data/ext/groonga/rb-grn-operator.c +395 -172
- data/ext/groonga/rb-grn-patricia-trie.c +10 -8
- data/ext/groonga/rb-grn-plugin.c +51 -3
- data/ext/groonga/rb-grn-prefix-operator.c +86 -0
- data/ext/groonga/rb-grn-procedure-type.c +4 -0
- data/ext/groonga/rb-grn-query-logger.c +4 -4
- data/ext/groonga/rb-grn-regexp-operator.c +85 -0
- data/ext/groonga/rb-grn-snippet.c +1 -1
- data/ext/groonga/rb-grn-table-key-support.c +9 -5
- data/ext/groonga/rb-grn-table.c +52 -66
- data/ext/groonga/rb-grn-type.c +1 -1
- data/ext/groonga/rb-grn-utils.c +22 -3
- data/ext/groonga/rb-grn.h +31 -4
- data/ext/groonga/rb-groonga.c +9 -9
- data/lib/1.9/groonga.so +0 -0
- data/lib/2.0/groonga.so +0 -0
- data/lib/2.1/groonga.so +0 -0
- data/lib/2.2/groonga.so +0 -0
- data/lib/groonga/context.rb +31 -0
- data/lib/groonga/expression-builder.rb +14 -1
- data/lib/groonga/record.rb +10 -8
- data/lib/groonga/schema.rb +3 -1
- data/rroonga-build.rb +2 -2
- data/rroonga.gemspec +3 -3
- data/test/groonga-test-utils.rb +4 -0
- data/test/test-column.rb +28 -26
- data/test/test-exception.rb +1 -0
- data/test/test-expression-builder.rb +83 -1
- data/test/test-expression.rb +80 -48
- data/test/test-index-column.rb +102 -29
- data/test/test-normalizer.rb +35 -29
- data/test/test-operator.rb +214 -0
- data/test/test-plugin.rb +24 -6
- data/test/test-procedure.rb +29 -0
- data/test/test-schema-type.rb +14 -0
- data/test/test-table-select-mecab.rb +1 -4
- data/test/test-table.rb +7 -0
- data/test/test-token-regexp.rb +30 -0
- data/test/test-type.rb +24 -0
- data/vendor/local/bin/grndb.exe +0 -0
- data/vendor/local/bin/groonga-benchmark.exe +0 -0
- data/vendor/local/bin/groonga.exe +0 -0
- data/vendor/local/bin/libgcc_s_sjlj-1.dll +0 -0
- data/vendor/local/bin/libgroonga-0.dll +0 -0
- data/vendor/local/bin/libmecab-1.dll +0 -0
- data/vendor/local/bin/libmsgpack-3.dll +0 -0
- data/vendor/local/bin/libmsgpackc-2.dll +0 -0
- data/vendor/local/bin/libonig-5.dll +0 -0
- data/vendor/local/bin/libstdc++-6.dll +0 -0
- data/vendor/local/bin/lz4.exe +0 -0
- data/vendor/local/bin/lz4c.exe +0 -0
- data/vendor/local/bin/lz4cat +0 -0
- data/vendor/local/bin/mecab-config +2 -2
- data/vendor/local/bin/mecab.exe +0 -0
- data/vendor/local/bin/onig-config +1 -1
- data/vendor/local/bin/zlib1.dll +0 -0
- data/vendor/local/etc/groonga/groonga.conf +1 -1
- data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +2 -2
- data/vendor/local/include/groonga/groonga.h +1 -0
- data/vendor/local/include/groonga/groonga/expr.h +2 -0
- data/vendor/local/include/groonga/groonga/groonga.h +32 -5
- data/vendor/local/include/groonga/groonga/ii.h +7 -0
- data/vendor/local/include/groonga/groonga/obj.h +37 -0
- data/vendor/local/include/groonga/groonga/scorer.h +95 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +2 -2
- data/vendor/local/lib/groonga/plugins/ruby/eval.a +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/eval.dll +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/eval.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/eval.la +2 -2
- data/vendor/local/lib/groonga/plugins/ruby/load.a +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/load.dll +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/load.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/load.la +2 -2
- data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +6 -3
- data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +6 -5
- data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +421 -17
- data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.la +2 -2
- data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
- data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
- data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/table/table.la +2 -2
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +2 -2
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +2 -2
- data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +12 -4
- data/vendor/local/lib/groonga/scripts/ruby/database.rb +11 -3
- data/vendor/local/lib/groonga/scripts/ruby/expression.rb +23 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_size_estimator.rb +158 -0
- data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +39 -0
- data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +4 -0
- data/vendor/local/lib/groonga/scripts/ruby/initialize/pre.rb +2 -0
- data/vendor/local/lib/groonga/scripts/ruby/logger.rb +11 -7
- data/vendor/local/lib/groonga/scripts/ruby/object.rb +11 -0
- data/vendor/local/lib/groonga/scripts/ruby/operator.rb +22 -0
- data/vendor/local/lib/groonga/scripts/ruby/scan_info.rb +7 -2
- data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +7 -11
- data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +137 -34
- data/vendor/local/lib/groonga/scripts/ruby/scan_info_search_index.rb +9 -0
- data/vendor/local/lib/libgroonga.a +0 -0
- data/vendor/local/lib/libgroonga.dll.a +0 -0
- data/vendor/local/lib/libgroonga.la +2 -2
- data/vendor/local/lib/liblz4.a +0 -0
- data/vendor/local/lib/liblz4.dll +0 -0
- data/vendor/local/lib/liblz4.dll.1 +0 -0
- data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
- data/vendor/local/lib/libmecab.a +0 -0
- data/vendor/local/lib/libmecab.dll.a +0 -0
- data/vendor/local/lib/libmecab.la +2 -2
- data/vendor/local/lib/libmsgpack.a +0 -0
- data/vendor/local/lib/libmsgpack.dll.a +0 -0
- data/vendor/local/lib/libmsgpack.la +2 -2
- data/vendor/local/lib/libmsgpackc.a +0 -0
- data/vendor/local/lib/libmsgpackc.dll.a +0 -0
- data/vendor/local/lib/libmsgpackc.la +2 -2
- data/vendor/local/lib/libonig.a +0 -0
- data/vendor/local/lib/libonig.dll.a +0 -0
- data/vendor/local/lib/libonig.la +2 -2
- data/vendor/local/lib/libz.a +0 -0
- data/vendor/local/lib/libz.dll.a +0 -0
- data/vendor/local/lib/pkgconfig/groonga.pc +3 -3
- data/vendor/local/lib/pkgconfig/liblz4.pc +5 -5
- data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
- data/vendor/local/lib/pkgconfig/oniguruma.pc +6 -6
- data/vendor/local/lib/pkgconfig/zlib.pc +3 -3
- data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
- data/vendor/local/sbin/groonga-httpd-restart +1 -1
- data/vendor/local/sbin/groonga-httpd.exe +0 -0
- data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
- data/vendor/local/share/doc/groonga/en/html/_images/used-when-indexing.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_images/used-when-searching.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/characteristic.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +32 -17
- data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
- data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +194 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/news/1.0.x.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/news/3.x.txt +2 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/news/4.x.txt +2 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +2 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api.txt +3 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +42 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/overview.txt +54 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_create.txt +2 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/delete.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/load.txt +5 -5
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +173 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +112 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +7 -6
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +64 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +63 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +11 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +3 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +17 -17
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/suggest.txt +12 -12
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +7 -7
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +6 -6
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga.txt +47 -26
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/between.txt +5 -5
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_distance.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +6 -6
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/html_untag.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/in_values.txt +54 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/sub_filter.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +2 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/normalizers.txt +4 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations.txt +2 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +403 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorer.txt +217 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/scoring_note.txt +13 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/tables.txt +8 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +530 -16
- data/vendor/local/share/doc/groonga/en/html/_sources/server.txt +2 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/server/memcached.txt +15 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/spec/gqtp.txt +66 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/network.txt +0 -81
- data/vendor/local/share/doc/groonga/en/html/_static/basic.css +6 -68
- data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +1 -26
- data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +9404 -4
- data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +2 -2
- data/vendor/local/share/doc/groonga/en/html/_static/underscore.js +1415 -31
- data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +15 -15
- data/vendor/local/share/doc/groonga/en/html/characteristic.html +18 -20
- data/vendor/local/share/doc/groonga/en/html/client.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/community.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution.html +18 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/development.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +51 -38
- data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +24 -26
- data/vendor/local/share/doc/groonga/en/html/contribution/report.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/development.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +24 -26
- data/vendor/local/share/doc/groonga/en/html/genindex.html +26 -14
- data/vendor/local/share/doc/groonga/en/html/index.html +150 -130
- data/vendor/local/share/doc/groonga/en/html/install.html +32 -34
- data/vendor/local/share/doc/groonga/en/html/install/centos.html +28 -30
- data/vendor/local/share/doc/groonga/en/html/install/debian.html +24 -26
- data/vendor/local/share/doc/groonga/en/html/install/fedora.html +28 -30
- data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/install/others.html +87 -89
- data/vendor/local/share/doc/groonga/en/html/install/solaris.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +25 -27
- data/vendor/local/share/doc/groonga/en/html/install/windows.html +30 -32
- data/vendor/local/share/doc/groonga/en/html/limitations.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/news.html +256 -27
- data/vendor/local/share/doc/groonga/en/html/news/0.x.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +19 -21
- data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +32 -34
- data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +27 -29
- data/vendor/local/share/doc/groonga/en/html/news/2.x.html +98 -100
- data/vendor/local/share/doc/groonga/en/html/news/3.x.html +68 -70
- data/vendor/local/share/doc/groonga/en/html/news/4.x.html +102 -104
- data/vendor/local/share/doc/groonga/en/html/news/senna.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
- data/vendor/local/share/doc/groonga/en/html/reference.html +139 -118
- data/vendor/local/share/doc/groonga/en/html/reference/api.html +51 -52
- data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +49 -51
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +60 -62
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +80 -82
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +42 -44
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +37 -39
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +130 -80
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +48 -50
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +44 -46
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +79 -81
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +42 -44
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +44 -46
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +42 -44
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +41 -43
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +41 -43
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +40 -42
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +89 -91
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +44 -46
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +39 -41
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +75 -77
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +64 -66
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +40 -42
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +39 -41
- data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +202 -0
- data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +58 -60
- data/vendor/local/share/doc/groonga/en/html/reference/cast.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/column.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +58 -60
- data/vendor/local/share/doc/groonga/en/html/reference/command.html +56 -54
- data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +53 -55
- data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +94 -96
- data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +39 -41
- data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +43 -45
- data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +23 -25
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +39 -41
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +71 -73
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +24 -26
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +44 -46
- data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +37 -39
- data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +24 -26
- data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +35 -37
- data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +43 -45
- data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +49 -47
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +23 -25
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +24 -26
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +26 -28
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +314 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +252 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +87 -89
- data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +46 -48
- data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +195 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +193 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +38 -40
- data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +37 -39
- data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +61 -51
- data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
- data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +54 -56
- data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +54 -56
- data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +590 -592
- data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +37 -39
- data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +40 -42
- data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +92 -94
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +152 -154
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +49 -51
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +39 -41
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +68 -70
- data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +103 -105
- data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +45 -47
- data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +52 -54
- data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -21
- data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +35 -37
- data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +21 -23
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +26 -28
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +73 -75
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +42 -44
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +34 -36
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +124 -90
- data/vendor/local/share/doc/groonga/en/html/reference/function.html +42 -44
- data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +66 -68
- data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +44 -46
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +113 -115
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +55 -57
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +45 -47
- data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +81 -83
- data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +65 -67
- data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +54 -56
- data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +135 -44
- data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -42
- data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +81 -83
- data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +43 -45
- data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +67 -69
- data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +66 -70
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +39 -41
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +349 -286
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +483 -417
- data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +35 -37
- data/vendor/local/share/doc/groonga/en/html/reference/log.html +38 -40
- data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +44 -46
- data/vendor/local/share/doc/groonga/en/html/reference/operations.html +30 -31
- data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +32 -34
- data/vendor/local/share/doc/groonga/en/html/reference/output.html +32 -34
- data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +60 -62
- data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +931 -0
- data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +442 -0
- data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +153 -0
- data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +287 -0
- data/vendor/local/share/doc/groonga/en/html/reference/scoring_note.html +114 -0
- data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +45 -47
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +51 -53
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +40 -42
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +38 -40
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +40 -42
- data/vendor/local/share/doc/groonga/en/html/reference/tables.html +52 -54
- data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +36 -38
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +1394 -34
- data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +57 -59
- data/vendor/local/share/doc/groonga/en/html/reference/types.html +38 -40
- data/vendor/local/share/doc/groonga/en/html/search.html +11 -11
- data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
- data/vendor/local/share/doc/groonga/en/html/server.html +23 -24
- data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +28 -30
- data/vendor/local/share/doc/groonga/en/html/server/http.html +42 -44
- data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +68 -70
- data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +30 -32
- data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +29 -31
- data/vendor/local/share/doc/groonga/en/html/server/memcached.html +137 -0
- data/vendor/local/share/doc/groonga/en/html/server/package.html +36 -38
- data/vendor/local/share/doc/groonga/en/html/spec.html +22 -24
- data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +208 -129
- data/vendor/local/share/doc/groonga/en/html/spec/search.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
- data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
- data/vendor/local/share/doc/groonga/en/html/tutorial.html +21 -25
- data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +31 -33
- data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +20 -22
- data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +17 -19
- data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +21 -23
- data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +50 -52
- data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +27 -125
- data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +18 -20
- data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +20 -22
- data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +33 -35
- data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_images/used-when-indexing.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_images/used-when-searching.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/characteristic.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +32 -17
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
- data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +194 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.0.x.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/3.x.txt +2 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/4.x.txt +2 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +2 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api.txt +3 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +42 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/overview.txt +54 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_create.txt +2 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/delete.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/load.txt +5 -5
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +173 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +112 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +7 -6
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +64 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +63 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +11 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +3 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +17 -17
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/suggest.txt +12 -12
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +7 -7
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +6 -6
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga.txt +47 -26
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/between.txt +5 -5
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_distance.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +6 -6
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/html_untag.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/in_values.txt +54 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/sub_filter.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +44 -18
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +41 -11
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +2 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/normalizers.txt +4 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations.txt +2 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +403 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorer.txt +217 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_at_most.txt +22 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_idf.txt +110 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scoring_note.txt +13 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tables.txt +8 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +530 -16
- data/vendor/local/share/doc/groonga/ja/html/_sources/server.txt +2 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/server/memcached.txt +15 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/spec/gqtp.txt +66 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/network.txt +0 -81
- data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +6 -68
- data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +1 -26
- data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +9404 -4
- data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +2 -2
- data/vendor/local/share/doc/groonga/ja/html/_static/underscore.js +1415 -31
- data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +15 -15
- data/vendor/local/share/doc/groonga/ja/html/characteristic.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/client.html +22 -24
- data/vendor/local/share/doc/groonga/ja/html/community.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution.html +18 -20
- data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +51 -38
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +24 -26
- data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/development.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +20 -22
- data/vendor/local/share/doc/groonga/ja/html/genindex.html +26 -14
- data/vendor/local/share/doc/groonga/ja/html/index.html +150 -130
- data/vendor/local/share/doc/groonga/ja/html/install.html +32 -34
- data/vendor/local/share/doc/groonga/ja/html/install/centos.html +31 -33
- data/vendor/local/share/doc/groonga/ja/html/install/debian.html +25 -27
- data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +29 -31
- data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +22 -24
- data/vendor/local/share/doc/groonga/ja/html/install/others.html +78 -80
- data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +21 -23
- data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +26 -28
- data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -31
- data/vendor/local/share/doc/groonga/ja/html/limitations.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/news.html +210 -27
- data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +19 -21
- data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +32 -34
- data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +27 -29
- data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +91 -93
- data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +59 -61
- data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +89 -91
- data/vendor/local/share/doc/groonga/ja/html/news/senna.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
- data/vendor/local/share/doc/groonga/ja/html/reference.html +139 -118
- data/vendor/local/share/doc/groonga/ja/html/reference/api.html +51 -52
- data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +49 -51
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +55 -57
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +80 -82
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +37 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +126 -76
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +48 -50
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +44 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +74 -76
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +44 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +41 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +41 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +40 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +89 -91
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +44 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +39 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +75 -77
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +64 -66
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +40 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +39 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +197 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +58 -60
- data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/column.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +22 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +48 -50
- data/vendor/local/share/doc/groonga/ja/html/reference/command.html +56 -54
- data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +43 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +22 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +93 -95
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +35 -37
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +43 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +23 -25
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +39 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +62 -64
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +24 -26
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +37 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +24 -26
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +32 -34
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +22 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +33 -35
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +48 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +23 -25
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +24 -26
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +26 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +314 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +250 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +80 -81
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +46 -48
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +188 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +190 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +37 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +57 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +71 -73
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +53 -55
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +53 -55
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +394 -396
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +37 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +78 -80
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +123 -125
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -51
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +39 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +61 -63
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +89 -91
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +46 -48
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +51 -53
- data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +35 -37
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +21 -23
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +26 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +61 -63
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +34 -36
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +126 -90
- data/vendor/local/share/doc/groonga/ja/html/reference/function.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +63 -65
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +44 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +94 -96
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +55 -57
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +45 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +66 -68
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +55 -57
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +53 -55
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +135 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +40 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +70 -72
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +43 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +53 -55
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +56 -62
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +36 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +229 -171
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +381 -322
- data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +34 -36
- data/vendor/local/share/doc/groonga/ja/html/reference/log.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +28 -30
- data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +32 -34
- data/vendor/local/share/doc/groonga/ja/html/reference/output.html +28 -30
- data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +39 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +878 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +442 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +154 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +287 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/scoring_note.html +115 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +45 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +48 -50
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +40 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +40 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +37 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +1300 -34
- data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +57 -59
- data/vendor/local/share/doc/groonga/ja/html/reference/types.html +38 -40
- data/vendor/local/share/doc/groonga/ja/html/search.html +11 -11
- data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
- data/vendor/local/share/doc/groonga/ja/html/server.html +23 -24
- data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +28 -30
- data/vendor/local/share/doc/groonga/ja/html/server/http.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +62 -64
- data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +30 -32
- data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +29 -31
- data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +138 -0
- data/vendor/local/share/doc/groonga/ja/html/server/package.html +35 -37
- data/vendor/local/share/doc/groonga/ja/html/spec.html +22 -24
- data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +207 -128
- data/vendor/local/share/doc/groonga/ja/html/spec/search.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +18 -20
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +18 -20
- data/vendor/local/share/doc/groonga/ja/html/tutorial.html +21 -25
- data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +30 -32
- data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +17 -19
- data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +23 -25
- data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +47 -49
- data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +27 -125
- data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -20
- data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +20 -22
- data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +31 -33
- data/vendor/local/share/license/mruby/README.md +2 -2
- data/vendor/local/share/man/ja/man1/groonga.1 +6205 -2251
- data/vendor/local/share/man/man1/groonga.1 +7210 -3029
- metadata +75 -11
- data/doc/text/news.textile +0 -1217
- data/vendor/local/share/doc/groonga/en/html/_static/jquery-1.11.1.js +0 -10308
- data/vendor/local/share/doc/groonga/en/html/_static/underscore-1.3.1.js +0 -999
- data/vendor/local/share/doc/groonga/ja/html/_static/jquery-1.11.1.js +0 -10308
- data/vendor/local/share/doc/groonga/ja/html/_static/underscore-1.3.1.js +0 -999
@@ -7,7 +7,7 @@
|
|
7
7
|
<head>
|
8
8
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
9
9
|
|
10
|
-
<title>7.8.
|
10
|
+
<title>7.8. トークナイザー — Groonga v5.0.1-42-g4d10df1ドキュメント</title>
|
11
11
|
|
12
12
|
<link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
|
13
13
|
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
@@ -15,7 +15,7 @@
|
|
15
15
|
<script type="text/javascript">
|
16
16
|
var DOCUMENTATION_OPTIONS = {
|
17
17
|
URL_ROOT: '../',
|
18
|
-
VERSION: '5.0.
|
18
|
+
VERSION: '5.0.1-42-g4d10df1',
|
19
19
|
COLLAPSE_INDEX: false,
|
20
20
|
FILE_SUFFIX: '.html',
|
21
21
|
HAS_SOURCE: true
|
@@ -26,12 +26,12 @@
|
|
26
26
|
<script type="text/javascript" src="../_static/doctools.js"></script>
|
27
27
|
<script type="text/javascript" src="../_static/translations.js"></script>
|
28
28
|
<link rel="shortcut icon" href="../_static/favicon.ico"/>
|
29
|
-
<link rel="top" title="Groonga v5.0.
|
29
|
+
<link rel="top" title="Groonga v5.0.1-42-g4d10df1ドキュメント" href="../index.html" />
|
30
30
|
<link rel="up" title="7. リファレンスマニュアル" href="../reference.html" />
|
31
31
|
<link rel="next" title="7.9. トークンフィルター" href="token_filters.html" />
|
32
32
|
<link rel="prev" title="7.7. ノーマライザー" href="normalizers.html" />
|
33
33
|
</head>
|
34
|
-
<body
|
34
|
+
<body>
|
35
35
|
<div class="header">
|
36
36
|
<h1 class="title">
|
37
37
|
<a id="top-link" href="../index.html">
|
@@ -49,7 +49,7 @@
|
|
49
49
|
</div>
|
50
50
|
|
51
51
|
|
52
|
-
<div class="related"
|
52
|
+
<div class="related">
|
53
53
|
<h3>ナビゲーション</h3>
|
54
54
|
<ul>
|
55
55
|
<li class="right" style="margin-right: 10px">
|
@@ -61,7 +61,7 @@
|
|
61
61
|
<li class="right" >
|
62
62
|
<a href="normalizers.html" title="7.7. ノーマライザー"
|
63
63
|
accesskey="P">前へ</a> |</li>
|
64
|
-
<li><a href="../index.html">Groonga v5.0.
|
64
|
+
<li><a href="../index.html">Groonga v5.0.1-42-g4d10df1ドキュメント</a> »</li>
|
65
65
|
<li><a href="../reference.html" accesskey="U">7. リファレンスマニュアル</a> »</li>
|
66
66
|
</ul>
|
67
67
|
</div>
|
@@ -69,48 +69,1314 @@
|
|
69
69
|
<div class="document">
|
70
70
|
<div class="documentwrapper">
|
71
71
|
<div class="bodywrapper">
|
72
|
-
<div class="body"
|
72
|
+
<div class="body">
|
73
73
|
|
74
74
|
<div class="section" id="tokenizers">
|
75
|
-
<h1>7.8.
|
76
|
-
<
|
75
|
+
<h1>7.8. トークナイザー<a class="headerlink" href="#tokenizers" title="このヘッドラインへのパーマリンク">¶</a></h1>
|
76
|
+
<div class="section" id="summary">
|
77
|
+
<h2>7.8.1. 概要<a class="headerlink" href="#summary" title="このヘッドラインへのパーマリンク">¶</a></h2>
|
78
|
+
<p>Groongaにはテキストをトークナイズするトークナイザーモージュールがあります。次のケースのときにトークナイザーを使います。</p>
|
79
|
+
<blockquote>
|
80
|
+
<div><ul>
|
81
|
+
<li><p class="first">テキストのインデックスを構築するとき</p>
|
82
|
+
<div class="figure align-center">
|
83
|
+
<a class="reference internal image-reference" href="../_images/used-when-indexing.png"><img alt="../_images/used-when-indexing.png" src="../_images/used-when-indexing.png" style="width: 80%;" /></a>
|
84
|
+
<p class="caption">テキストのインデックスを構築するときにトークナイザーを使います。</p>
|
85
|
+
</div>
|
86
|
+
</li>
|
87
|
+
<li><p class="first">クエリーで検索するとき</p>
|
88
|
+
<div class="figure align-center">
|
89
|
+
<a class="reference internal image-reference" href="../_images/used-when-searching.png"><img alt="../_images/used-when-searching.png" src="../_images/used-when-searching.png" style="width: 80%;" /></a>
|
90
|
+
<p class="caption">クエリーで検索するときにトークナイザーを使います。</p>
|
91
|
+
</div>
|
92
|
+
</li>
|
93
|
+
</ul>
|
94
|
+
</div></blockquote>
|
95
|
+
<p>全文検索ではトークナイザーは重要なモジュールです。トークナイザーを変えることで <a class="reference external" href="http://ja.wikipedia.org/wiki/%E6%83%85%E5%A0%B1%E6%A4%9C%E7%B4%A2">適合率と再現率</a> のトレードオフを調整することができます。</p>
|
96
|
+
<p>一般的に <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> が適切なトークナイザーです。トークナイザーについてよく知らない場合は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> を使うことをオススメします。</p>
|
97
|
+
<p><a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> コマンドと <a class="reference internal" href="commands/table_tokenize.html"><em>table_tokenize</em></a> コマンドを使うことでトークナイザーを試すことができます。 <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> コマンドを使って <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> トークナイザーを試す例を以下に示します。</p>
|
98
|
+
<p>実行例:</p>
|
99
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello World"
|
100
|
+
# [
|
101
|
+
# [
|
102
|
+
# 0,
|
103
|
+
# 1337566253.89858,
|
104
|
+
# 0.000355720520019531
|
105
|
+
# ],
|
106
|
+
# [
|
107
|
+
# {
|
108
|
+
# "position": 0,
|
109
|
+
# "value": "He"
|
110
|
+
# },
|
111
|
+
# {
|
112
|
+
# "position": 1,
|
113
|
+
# "value": "el"
|
114
|
+
# },
|
115
|
+
# {
|
116
|
+
# "position": 2,
|
117
|
+
# "value": "ll"
|
118
|
+
# },
|
119
|
+
# {
|
120
|
+
# "position": 3,
|
121
|
+
# "value": "lo"
|
122
|
+
# },
|
123
|
+
# {
|
124
|
+
# "position": 4,
|
125
|
+
# "value": "o "
|
126
|
+
# },
|
127
|
+
# {
|
128
|
+
# "position": 5,
|
129
|
+
# "value": " W"
|
130
|
+
# },
|
131
|
+
# {
|
132
|
+
# "position": 6,
|
133
|
+
# "value": "Wo"
|
134
|
+
# },
|
135
|
+
# {
|
136
|
+
# "position": 7,
|
137
|
+
# "value": "or"
|
138
|
+
# },
|
139
|
+
# {
|
140
|
+
# "position": 8,
|
141
|
+
# "value": "rl"
|
142
|
+
# },
|
143
|
+
# {
|
144
|
+
# "position": 9,
|
145
|
+
# "value": "ld"
|
146
|
+
# },
|
147
|
+
# {
|
148
|
+
# "position": 10,
|
149
|
+
# "value": "d"
|
150
|
+
# }
|
151
|
+
# ]
|
152
|
+
# ]
|
153
|
+
</pre></div>
|
154
|
+
</div>
|
155
|
+
</div>
|
156
|
+
<div class="section" id="what-is-tokenize">
|
157
|
+
<h2>7.8.2. 「トークナイズ」とはなにか<a class="headerlink" href="#what-is-tokenize" title="このヘッドラインへのパーマリンク">¶</a></h2>
|
158
|
+
<p>「トークナイズ」はテキストから0個以上のトークンを抽出する処理です。「トークナイズ」する方法はいくつかあります。</p>
|
159
|
+
<p>例えば、バイグラムというトークナイズ方法では <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> は次のトークンにトークナイズされます。</p>
|
160
|
+
<blockquote>
|
161
|
+
<div><ul class="simple">
|
162
|
+
<li><tt class="docutils literal"><span class="pre">He</span></tt></li>
|
163
|
+
<li><tt class="docutils literal"><span class="pre">el</span></tt></li>
|
164
|
+
<li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
|
165
|
+
<li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
|
166
|
+
<li><p class="first"><tt class="docutils literal"><span class="pre">o_</span></tt> ( <tt class="docutils literal"><span class="pre">_</span></tt> は空白文字という意味)</p>
|
167
|
+
</li>
|
168
|
+
<li><p class="first"><tt class="docutils literal"><span class="pre">_W</span></tt> ( <tt class="docutils literal"><span class="pre">_</span></tt> は空白文字という意味)</p>
|
169
|
+
</li>
|
170
|
+
<li><tt class="docutils literal"><span class="pre">Wo</span></tt></li>
|
171
|
+
<li><tt class="docutils literal"><span class="pre">or</span></tt></li>
|
172
|
+
<li><tt class="docutils literal"><span class="pre">rl</span></tt></li>
|
173
|
+
<li><tt class="docutils literal"><span class="pre">ld</span></tt></li>
|
174
|
+
</ul>
|
175
|
+
</div></blockquote>
|
176
|
+
<p>上記の例では、 <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> から10個のトークンを抽出しました。</p>
|
177
|
+
<p>例えば、空白区切りのトークナイズ方法では <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> は次のトークンにトークナイズされます。</p>
|
178
|
+
<blockquote>
|
179
|
+
<div><ul class="simple">
|
180
|
+
<li><tt class="docutils literal"><span class="pre">Hello</span></tt></li>
|
181
|
+
<li><tt class="docutils literal"><span class="pre">World</span></tt></li>
|
182
|
+
</ul>
|
183
|
+
</div></blockquote>
|
184
|
+
<p>上記の例では、<tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> から2つのトークンを抽出しました。</p>
|
185
|
+
<p>トークンは検索時のキーとして使われます。使用したトークナイズ方法で抽出したトークンでしかインデックス化されたドキュメントを探すことはできません。例えば、トークナイズ方法としてバイグラムを使った場合は <tt class="docutils literal"><span class="pre">ll</span></tt> で <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> を見つけることができます。しかし、空白区切りのトークナイズ方法を使ったときは <tt class="docutils literal"><span class="pre">ll</span></tt> で <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> を見つけることはできません。なぜなら、空白区切りのトークナイズ方法は <tt class="docutils literal"><span class="pre">ll</span></tt> というトークンを抽出していないからです。空白区切りのトークナイズ方法は <tt class="docutils literal"><span class="pre">Hello</span></tt> というトークンと <tt class="docutils literal"><span class="pre">World</span></tt> というトークンしか抽出していません。</p>
|
186
|
+
<p>一般的に、小さいトークンを生成するトークナイズ方法は再現率が高い代わりに適合率が低くなりがちです。一方、大きいトークンを生成するトークナイズ方法は適合率が高い代わりに再現率が低くなりがちです。</p>
|
187
|
+
<p>例えば、バイグラムというトークナイズ方法では <tt class="docutils literal"><span class="pre">or</span></tt> で <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> と <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> を検索できます。しかし、「論理和」を検索したい人にとっては <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> は不要な結果です。これは、適合率が下がったということです。しかし、再現率は上がっています。</p>
|
188
|
+
<p>空白区切りのトークナイズ方法を使った場合は <tt class="docutils literal"><span class="pre">or</span></tt> で <tt class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></tt> だけが見つかります。なぜなら、空白区切りのトークナイズ方法では <tt class="docutils literal"><span class="pre">World</span></tt> は <tt class="docutils literal"><span class="pre">World</span></tt> という1つのトークンだけにトークナイズされるからです。これは、「論理和」を探したい人にとっては適合率が挙がっています。しかし、 <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> も <tt class="docutils literal"><span class="pre">or</span></tt> を含んでいるのに見つかっていないので再現率が下がっています。</p>
|
189
|
+
</div>
|
190
|
+
<div class="section" id="built-in-tokenizsers">
|
191
|
+
<h2>7.8.3. 組み込みトークナイザー<a class="headerlink" href="#built-in-tokenizsers" title="このヘッドラインへのパーマリンク">¶</a></h2>
|
77
192
|
<p>以下は組み込みのトークナイザーのリストです。</p>
|
78
|
-
<
|
79
|
-
<
|
80
|
-
<li>
|
81
|
-
<li>
|
82
|
-
<li>
|
83
|
-
<li>
|
84
|
-
<li>
|
85
|
-
<li>
|
86
|
-
<li>
|
87
|
-
<li>
|
88
|
-
<li>
|
89
|
-
<li>TokenTrigram</li>
|
90
|
-
<li>
|
193
|
+
<blockquote>
|
194
|
+
<div><ul class="simple">
|
195
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigram</span></tt></li>
|
196
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></li>
|
197
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></li>
|
198
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></li>
|
199
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></li>
|
200
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></li>
|
201
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlpha</span></tt></li>
|
202
|
+
<li><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlphaDigit</span></tt></li>
|
203
|
+
<li><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></li>
|
204
|
+
<li><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></li>
|
205
|
+
<li><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></li>
|
206
|
+
<li><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></li>
|
207
|
+
<li><tt class="docutils literal"><span class="pre">TokenMecab</span></tt></li>
|
208
|
+
<li><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></li>
|
91
209
|
</ul>
|
210
|
+
</div></blockquote>
|
211
|
+
<div class="section" id="tokenbigram">
|
212
|
+
<span id="token-bigram"></span><h3>7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt><a class="headerlink" href="#tokenbigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
213
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はバイグラムベースのトークナイザーです。多くのケースでは、このトークナイザーを使うことをオススメします。</p>
|
214
|
+
<p>バイグラムというトークナイズ方法は、隣り合った2つの文字を1つのトークンとしてテキストをトークナイズします。例えば、 <tt class="docutils literal"><span class="pre">Hello</span></tt> は次のトークンにトークナイズします。</p>
|
215
|
+
<blockquote>
|
216
|
+
<div><ul class="simple">
|
217
|
+
<li><tt class="docutils literal"><span class="pre">He</span></tt></li>
|
218
|
+
<li><tt class="docutils literal"><span class="pre">el</span></tt></li>
|
219
|
+
<li><tt class="docutils literal"><span class="pre">ll</span></tt></li>
|
220
|
+
<li><tt class="docutils literal"><span class="pre">lo</span></tt></li>
|
221
|
+
</ul>
|
222
|
+
</div></blockquote>
|
223
|
+
<p>バイグラムというトークナイズ方法は再現性に優れています。なぜなら、2文字以上の文字のクエリーに対してはすべてのテキストを見つけることができるからです。</p>
|
224
|
+
<p>一般的に、1文字のクエリーではすべてのテキストを見つけることはできません。なぜなら、1つの文字のトークンが存在しないからです。しかし、Groongaでは1文字のクエリーでもすべてのテキストを見つけることができます。なぜなら、Groongaは前方一致検索によりクエリーで指定した文字で始まるトークンをすべて見つけることができるからです。例えば、Groongaは <tt class="docutils literal"><span class="pre">l</span></tt> というクエリーから <tt class="docutils literal"><span class="pre">ll</span></tt> というトークンと <tt class="docutils literal"><span class="pre">lo</span></tt> というトークンを見つけることができます。</p>
|
225
|
+
<p>バイグラムというトークナイズ方法は適合率はそれほど優れていません。なぜなら、単語の一部にクエリーが含まれていればすべてのテキストが見つかってしまうからです。例えば、 <tt class="docutils literal"><span class="pre">or</span></tt> で <tt class="docutils literal"><span class="pre">world</span></tt> が見つかります。これは非ASCIIを使う言語よりASCIIのみを使う言語で顕著です。以降の説明で触れる通り、 <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はこの問題を解決しています。</p>
|
226
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> の挙動は <a class="reference internal" href="normalizers.html"><em>ノーマライザー</em></a> を使うかどうかで変わります。</p>
|
227
|
+
<p>ノーマライザーを使っていない場合は <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> は純粋なバイグラム(最後のトークンをのぞいてすべてのトークンを2文字にする)のトークナイズ方法を使います。</p>
|
228
|
+
<p>実行例:</p>
|
229
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello World"
|
230
|
+
# [
|
231
|
+
# [
|
232
|
+
# 0,
|
233
|
+
# 1337566253.89858,
|
234
|
+
# 0.000355720520019531
|
235
|
+
# ],
|
236
|
+
# [
|
237
|
+
# {
|
238
|
+
# "position": 0,
|
239
|
+
# "value": "He"
|
240
|
+
# },
|
241
|
+
# {
|
242
|
+
# "position": 1,
|
243
|
+
# "value": "el"
|
244
|
+
# },
|
245
|
+
# {
|
246
|
+
# "position": 2,
|
247
|
+
# "value": "ll"
|
248
|
+
# },
|
249
|
+
# {
|
250
|
+
# "position": 3,
|
251
|
+
# "value": "lo"
|
252
|
+
# },
|
253
|
+
# {
|
254
|
+
# "position": 4,
|
255
|
+
# "value": "o "
|
256
|
+
# },
|
257
|
+
# {
|
258
|
+
# "position": 5,
|
259
|
+
# "value": " W"
|
260
|
+
# },
|
261
|
+
# {
|
262
|
+
# "position": 6,
|
263
|
+
# "value": "Wo"
|
264
|
+
# },
|
265
|
+
# {
|
266
|
+
# "position": 7,
|
267
|
+
# "value": "or"
|
268
|
+
# },
|
269
|
+
# {
|
270
|
+
# "position": 8,
|
271
|
+
# "value": "rl"
|
272
|
+
# },
|
273
|
+
# {
|
274
|
+
# "position": 9,
|
275
|
+
# "value": "ld"
|
276
|
+
# },
|
277
|
+
# {
|
278
|
+
# "position": 10,
|
279
|
+
# "value": "d"
|
280
|
+
# }
|
281
|
+
# ]
|
282
|
+
# ]
|
283
|
+
</pre></div>
|
284
|
+
</div>
|
285
|
+
<p>ノーマライザーを使っている場合は <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はASCIIの文字には空白区切りのようなトークナイズ方法を使います。非ASCII文字にはバイグラムのトークナイズ方法を使います。</p>
|
286
|
+
<p>もしかしたら、複数の方法が混ざったこの挙動はわかりにくいかもしれません。しかし、英語のテキスト(ASCII文字列のみ)や日本語テキスト(ASCII文字列と非ASCII文字列が混ざっている)ような多くのユースケースでは合理的な方法です。</p>
|
287
|
+
<p>ASCII文字しか使わない多くの言語は単語の区切りに空白文字を使っています。このようなケースに空白区切りのトークナイズ方法は適切です。</p>
|
288
|
+
<p>非ASCII文字を使う言語では単語の区切りに空白文字を使いません。このケースにはバイグラムなトークナイズ方法は適切です。</p>
|
289
|
+
<p>複数の言語が混ざっている場合は、複数の方法を組み合わせたトークナイズ方法が適切です。</p>
|
290
|
+
<p>ASCII文字にバイグラムなトークナイズ方法を使いたい場合は <a class="reference internal" href="#token-bigram-split-symbol-alpha"><em>TokenBigramSplitSymbolAlpha</em></a> のような <tt class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></tt> というトークナイザーを参照してください。</p>
|
291
|
+
<p>例を使いながら <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> の挙動を確認しましょう。</p>
|
292
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はASCII文字には1つ以上の空白文字をトークンの区切りとして使います。</p>
|
293
|
+
<p>実行例:</p>
|
294
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello World" NormalizerAuto
|
295
|
+
# [
|
296
|
+
# [
|
297
|
+
# 0,
|
298
|
+
# 1337566253.89858,
|
299
|
+
# 0.000355720520019531
|
300
|
+
# ],
|
301
|
+
# [
|
302
|
+
# {
|
303
|
+
# "position": 0,
|
304
|
+
# "value": "hello"
|
305
|
+
# },
|
306
|
+
# {
|
307
|
+
# "position": 1,
|
308
|
+
# "value": "world"
|
309
|
+
# }
|
310
|
+
# ]
|
311
|
+
# ]
|
312
|
+
</pre></div>
|
313
|
+
</div>
|
314
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigram</span></tt> はASCII文字には文字の種類が変わったところをトークンの区切りとします。文字の種類は次のどれかです。</p>
|
315
|
+
<blockquote>
|
316
|
+
<div><ul class="simple">
|
317
|
+
<li><p class="first">アルファベット</p>
|
318
|
+
</li>
|
319
|
+
<li><p class="first">数字</p>
|
320
|
+
</li>
|
321
|
+
<li><p class="first">記号(たとえば <tt class="docutils literal"><span class="pre">(</span></tt> 、 <tt class="docutils literal"><span class="pre">)</span></tt> 、 <tt class="docutils literal"><span class="pre">!</span></tt> など)</p>
|
322
|
+
</li>
|
323
|
+
<li><p class="first">ひらがな</p>
|
324
|
+
</li>
|
325
|
+
<li><p class="first">カタカナ</p>
|
326
|
+
</li>
|
327
|
+
<li><p class="first">漢字</p>
|
328
|
+
</li>
|
329
|
+
<li><p class="first">その他</p>
|
330
|
+
</li>
|
331
|
+
</ul>
|
332
|
+
</div></blockquote>
|
333
|
+
<p>次の例は2つのトークン区切りを示しています。</p>
|
334
|
+
<blockquote>
|
335
|
+
<div><ul class="simple">
|
336
|
+
<li><p class="first"><tt class="docutils literal"><span class="pre">100</span></tt> (数字)と <tt class="docutils literal"><span class="pre">cents</span></tt> (アルファベット)の間のところ</p>
|
337
|
+
</li>
|
338
|
+
<li><p class="first"><tt class="docutils literal"><span class="pre">cents</span></tt> (アルファベット)と <tt class="docutils literal"><span class="pre">!!!</span></tt> (記号)の間のところ</p>
|
339
|
+
</li>
|
340
|
+
</ul>
|
341
|
+
</div></blockquote>
|
342
|
+
<p>実行例:</p>
|
343
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "100cents!!!" NormalizerAuto
|
344
|
+
# [
|
345
|
+
# [
|
346
|
+
# 0,
|
347
|
+
# 1337566253.89858,
|
348
|
+
# 0.000355720520019531
|
349
|
+
# ],
|
350
|
+
# [
|
351
|
+
# {
|
352
|
+
# "position": 0,
|
353
|
+
# "value": "100"
|
354
|
+
# },
|
355
|
+
# {
|
356
|
+
# "position": 1,
|
357
|
+
# "value": "cents"
|
358
|
+
# },
|
359
|
+
# {
|
360
|
+
# "position": 2,
|
361
|
+
# "value": "!!!"
|
362
|
+
# }
|
363
|
+
# ]
|
364
|
+
# ]
|
365
|
+
</pre></div>
|
366
|
+
</div>
|
367
|
+
<p>以下は <tt class="docutils literal"><span class="pre">TokenBigram</span></tt> が非ASCII文字にはトークナイズ方法としてバイグラムを使う例です。</p>
|
368
|
+
<p>実行例:</p>
|
369
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "日本語の勉強" NormalizerAuto
|
370
|
+
# [
|
371
|
+
# [
|
372
|
+
# 0,
|
373
|
+
# 1337566253.89858,
|
374
|
+
# 0.000355720520019531
|
375
|
+
# ],
|
376
|
+
# [
|
377
|
+
# {
|
378
|
+
# "position": 0,
|
379
|
+
# "value": "日本"
|
380
|
+
# },
|
381
|
+
# {
|
382
|
+
# "position": 1,
|
383
|
+
# "value": "本語"
|
384
|
+
# },
|
385
|
+
# {
|
386
|
+
# "position": 2,
|
387
|
+
# "value": "語の"
|
388
|
+
# },
|
389
|
+
# {
|
390
|
+
# "position": 3,
|
391
|
+
# "value": "の勉"
|
392
|
+
# },
|
393
|
+
# {
|
394
|
+
# "position": 4,
|
395
|
+
# "value": "勉強"
|
396
|
+
# },
|
397
|
+
# {
|
398
|
+
# "position": 5,
|
399
|
+
# "value": "強"
|
400
|
+
# }
|
401
|
+
# ]
|
402
|
+
# ]
|
403
|
+
</pre></div>
|
404
|
+
</div>
|
405
|
+
</div>
|
406
|
+
<div class="section" id="tokenbigramsplitsymbol">
|
407
|
+
<span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramsplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
408
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは記号の扱いです。 <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt> は記号のトークナイズ方法にバイグラムを使います。</p>
|
409
|
+
<p>実行例:</p>
|
410
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbol "100cents!!!" NormalizerAuto
|
411
|
+
# [
|
412
|
+
# [
|
413
|
+
# 0,
|
414
|
+
# 1337566253.89858,
|
415
|
+
# 0.000355720520019531
|
416
|
+
# ],
|
417
|
+
# [
|
418
|
+
# {
|
419
|
+
# "position": 0,
|
420
|
+
# "value": "100"
|
421
|
+
# },
|
422
|
+
# {
|
423
|
+
# "position": 1,
|
424
|
+
# "value": "cents"
|
425
|
+
# },
|
426
|
+
# {
|
427
|
+
# "position": 2,
|
428
|
+
# "value": "!!"
|
429
|
+
# },
|
430
|
+
# {
|
431
|
+
# "position": 3,
|
432
|
+
# "value": "!!"
|
433
|
+
# },
|
434
|
+
# {
|
435
|
+
# "position": 4,
|
436
|
+
# "value": "!"
|
437
|
+
# }
|
438
|
+
# ]
|
439
|
+
# ]
|
440
|
+
</pre></div>
|
441
|
+
</div>
|
442
|
+
</div>
|
443
|
+
<div class="section" id="tokenbigramsplitsymbolalpha">
|
444
|
+
<span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
445
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは記号とアルファベットの扱いです。 <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt> は記号とアルファベットのトークナイズ方法にバイグラムを使います。</p>
|
446
|
+
<p>実行例:</p>
|
447
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlpha "100cents!!!" NormalizerAuto
|
448
|
+
# [
|
449
|
+
# [
|
450
|
+
# 0,
|
451
|
+
# 1337566253.89858,
|
452
|
+
# 0.000355720520019531
|
453
|
+
# ],
|
454
|
+
# [
|
455
|
+
# {
|
456
|
+
# "position": 0,
|
457
|
+
# "value": "100"
|
458
|
+
# },
|
459
|
+
# {
|
460
|
+
# "position": 1,
|
461
|
+
# "value": "ce"
|
462
|
+
# },
|
463
|
+
# {
|
464
|
+
# "position": 2,
|
465
|
+
# "value": "en"
|
466
|
+
# },
|
467
|
+
# {
|
468
|
+
# "position": 3,
|
469
|
+
# "value": "nt"
|
470
|
+
# },
|
471
|
+
# {
|
472
|
+
# "position": 4,
|
473
|
+
# "value": "ts"
|
474
|
+
# },
|
475
|
+
# {
|
476
|
+
# "position": 5,
|
477
|
+
# "value": "s!"
|
478
|
+
# },
|
479
|
+
# {
|
480
|
+
# "position": 6,
|
481
|
+
# "value": "!!"
|
482
|
+
# },
|
483
|
+
# {
|
484
|
+
# "position": 7,
|
485
|
+
# "value": "!!"
|
486
|
+
# },
|
487
|
+
# {
|
488
|
+
# "position": 8,
|
489
|
+
# "value": "!"
|
490
|
+
# }
|
491
|
+
# ]
|
492
|
+
# ]
|
493
|
+
</pre></div>
|
494
|
+
</div>
|
495
|
+
</div>
|
496
|
+
<div class="section" id="tokenbigramsplitsymbolalphadigit">
|
497
|
+
<span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
498
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは記号とアルファベットと数字の扱いです。 <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt> は記号とアルファベット数字のトークナイズ方法にバイグラムを使います。つまり、すべての文字をバイグラムでトークナイズします。</p>
|
499
|
+
<p>実行例:</p>
|
500
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlphaDigit "100cents!!!" NormalizerAuto
|
501
|
+
# [
|
502
|
+
# [
|
503
|
+
# 0,
|
504
|
+
# 1337566253.89858,
|
505
|
+
# 0.000355720520019531
|
506
|
+
# ],
|
507
|
+
# [
|
508
|
+
# {
|
509
|
+
# "position": 0,
|
510
|
+
# "value": "10"
|
511
|
+
# },
|
512
|
+
# {
|
513
|
+
# "position": 1,
|
514
|
+
# "value": "00"
|
515
|
+
# },
|
516
|
+
# {
|
517
|
+
# "position": 2,
|
518
|
+
# "value": "0c"
|
519
|
+
# },
|
520
|
+
# {
|
521
|
+
# "position": 3,
|
522
|
+
# "value": "ce"
|
523
|
+
# },
|
524
|
+
# {
|
525
|
+
# "position": 4,
|
526
|
+
# "value": "en"
|
527
|
+
# },
|
528
|
+
# {
|
529
|
+
# "position": 5,
|
530
|
+
# "value": "nt"
|
531
|
+
# },
|
532
|
+
# {
|
533
|
+
# "position": 6,
|
534
|
+
# "value": "ts"
|
535
|
+
# },
|
536
|
+
# {
|
537
|
+
# "position": 7,
|
538
|
+
# "value": "s!"
|
539
|
+
# },
|
540
|
+
# {
|
541
|
+
# "position": 8,
|
542
|
+
# "value": "!!"
|
543
|
+
# },
|
544
|
+
# {
|
545
|
+
# "position": 9,
|
546
|
+
# "value": "!!"
|
547
|
+
# },
|
548
|
+
# {
|
549
|
+
# "position": 10,
|
550
|
+
# "value": "!"
|
551
|
+
# }
|
552
|
+
# ]
|
553
|
+
# ]
|
554
|
+
</pre></div>
|
555
|
+
</div>
|
556
|
+
</div>
|
557
|
+
<div class="section" id="tokenbigramignoreblank">
|
558
|
+
<span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt><a class="headerlink" href="#tokenbigramignoreblank" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
559
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは空白文字の扱いです。 <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> は連続する記号と非ASCII文字の間にある空白文字を無視します。</p>
|
560
|
+
<p><tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
|
561
|
+
<p><a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> での実行結果です。</p>
|
562
|
+
<p>実行例:</p>
|
563
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
|
564
|
+
# [
|
565
|
+
# [
|
566
|
+
# 0,
|
567
|
+
# 1337566253.89858,
|
568
|
+
# 0.000355720520019531
|
569
|
+
# ],
|
570
|
+
# [
|
571
|
+
# {
|
572
|
+
# "position": 0,
|
573
|
+
# "value": "日"
|
574
|
+
# },
|
575
|
+
# {
|
576
|
+
# "position": 1,
|
577
|
+
# "value": "本"
|
578
|
+
# },
|
579
|
+
# {
|
580
|
+
# "position": 2,
|
581
|
+
# "value": "語"
|
582
|
+
# },
|
583
|
+
# {
|
584
|
+
# "position": 3,
|
585
|
+
# "value": "!"
|
586
|
+
# },
|
587
|
+
# {
|
588
|
+
# "position": 4,
|
589
|
+
# "value": "!"
|
590
|
+
# },
|
591
|
+
# {
|
592
|
+
# "position": 5,
|
593
|
+
# "value": "!"
|
594
|
+
# }
|
595
|
+
# ]
|
596
|
+
# ]
|
597
|
+
</pre></div>
|
598
|
+
</div>
|
599
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt> での実行結果です。</p>
|
600
|
+
<p>実行例:</p>
|
601
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlank "日 本 語 ! ! !" NormalizerAuto
|
602
|
+
# [
|
603
|
+
# [
|
604
|
+
# 0,
|
605
|
+
# 1337566253.89858,
|
606
|
+
# 0.000355720520019531
|
607
|
+
# ],
|
608
|
+
# [
|
609
|
+
# {
|
610
|
+
# "position": 0,
|
611
|
+
# "value": "日本"
|
612
|
+
# },
|
613
|
+
# {
|
614
|
+
# "position": 1,
|
615
|
+
# "value": "本語"
|
616
|
+
# },
|
617
|
+
# {
|
618
|
+
# "position": 2,
|
619
|
+
# "value": "語"
|
620
|
+
# },
|
621
|
+
# {
|
622
|
+
# "position": 3,
|
623
|
+
# "value": "!!!"
|
624
|
+
# }
|
625
|
+
# ]
|
626
|
+
# ]
|
627
|
+
</pre></div>
|
628
|
+
</div>
|
629
|
+
</div>
|
630
|
+
<div class="section" id="tokenbigramignoreblanksplitsymbol">
|
631
|
+
<span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
632
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは次の通りです。</p>
|
633
|
+
<blockquote>
|
634
|
+
<div><ul class="simple">
|
635
|
+
<li><p class="first">空白文字の扱い</p>
|
636
|
+
</li>
|
637
|
+
<li><p class="first">記号の扱い</p>
|
638
|
+
</li>
|
639
|
+
</ul>
|
640
|
+
</div></blockquote>
|
641
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
|
642
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> は記号をバイグラムでトークナイズします。</p>
|
643
|
+
<p><tt class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
|
644
|
+
<p><a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> での実行結果です。</p>
|
645
|
+
<p>実行例:</p>
|
646
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
|
647
|
+
# [
|
648
|
+
# [
|
649
|
+
# 0,
|
650
|
+
# 1337566253.89858,
|
651
|
+
# 0.000355720520019531
|
652
|
+
# ],
|
653
|
+
# [
|
654
|
+
# {
|
655
|
+
# "position": 0,
|
656
|
+
# "value": "日"
|
657
|
+
# },
|
658
|
+
# {
|
659
|
+
# "position": 1,
|
660
|
+
# "value": "本"
|
661
|
+
# },
|
662
|
+
# {
|
663
|
+
# "position": 2,
|
664
|
+
# "value": "語"
|
665
|
+
# },
|
666
|
+
# {
|
667
|
+
# "position": 3,
|
668
|
+
# "value": "!"
|
669
|
+
# },
|
670
|
+
# {
|
671
|
+
# "position": 4,
|
672
|
+
# "value": "!"
|
673
|
+
# },
|
674
|
+
# {
|
675
|
+
# "position": 5,
|
676
|
+
# "value": "!"
|
677
|
+
# }
|
678
|
+
# ]
|
679
|
+
# ]
|
680
|
+
</pre></div>
|
681
|
+
</div>
|
682
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt> の実行結果です。</p>
|
683
|
+
<p>実行例:</p>
|
684
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbol "日 本 語 ! ! !" NormalizerAuto
|
685
|
+
# [
|
686
|
+
# [
|
687
|
+
# 0,
|
688
|
+
# 1337566253.89858,
|
689
|
+
# 0.000355720520019531
|
690
|
+
# ],
|
691
|
+
# [
|
692
|
+
# {
|
693
|
+
# "position": 0,
|
694
|
+
# "value": "日本"
|
695
|
+
# },
|
696
|
+
# {
|
697
|
+
# "position": 1,
|
698
|
+
# "value": "本語"
|
699
|
+
# },
|
700
|
+
# {
|
701
|
+
# "position": 2,
|
702
|
+
# "value": "語!"
|
703
|
+
# },
|
704
|
+
# {
|
705
|
+
# "position": 3,
|
706
|
+
# "value": "!!"
|
707
|
+
# },
|
708
|
+
# {
|
709
|
+
# "position": 4,
|
710
|
+
# "value": "!!"
|
711
|
+
# },
|
712
|
+
# {
|
713
|
+
# "position": 5,
|
714
|
+
# "value": "!"
|
715
|
+
# }
|
716
|
+
# ]
|
717
|
+
# ]
|
718
|
+
</pre></div>
|
719
|
+
</div>
|
720
|
+
</div>
|
721
|
+
<div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
|
722
|
+
<span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
723
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは次の通りです。</p>
|
724
|
+
<blockquote>
|
725
|
+
<div><ul class="simple">
|
726
|
+
<li><p class="first">空白文字の扱い</p>
|
727
|
+
</li>
|
728
|
+
<li><p class="first">記号とアルファベットの扱い</p>
|
729
|
+
</li>
|
730
|
+
</ul>
|
731
|
+
</div></blockquote>
|
732
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
|
733
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> は記号とアルファベットをバイグラムでトークナイズします。</p>
|
734
|
+
<p><tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></tt> というテキストを使うと違いがわかります。なぜなら空白文字入りの記号と非ASCII文字だけでなく、アルファベットも含んでいるからです。</p>
|
735
|
+
<p><a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> での実行結果です。</p>
|
736
|
+
<p>実行例:</p>
|
737
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello 日 本 語 ! ! !" NormalizerAuto
|
738
|
+
# [
|
739
|
+
# [
|
740
|
+
# 0,
|
741
|
+
# 1337566253.89858,
|
742
|
+
# 0.000355720520019531
|
743
|
+
# ],
|
744
|
+
# [
|
745
|
+
# {
|
746
|
+
# "position": 0,
|
747
|
+
# "value": "hello"
|
748
|
+
# },
|
749
|
+
# {
|
750
|
+
# "position": 1,
|
751
|
+
# "value": "日"
|
752
|
+
# },
|
753
|
+
# {
|
754
|
+
# "position": 2,
|
755
|
+
# "value": "本"
|
756
|
+
# },
|
757
|
+
# {
|
758
|
+
# "position": 3,
|
759
|
+
# "value": "語"
|
760
|
+
# },
|
761
|
+
# {
|
762
|
+
# "position": 4,
|
763
|
+
# "value": "!"
|
764
|
+
# },
|
765
|
+
# {
|
766
|
+
# "position": 5,
|
767
|
+
# "value": "!"
|
768
|
+
# },
|
769
|
+
# {
|
770
|
+
# "position": 6,
|
771
|
+
# "value": "!"
|
772
|
+
# }
|
773
|
+
# ]
|
774
|
+
# ]
|
775
|
+
</pre></div>
|
776
|
+
</div>
|
777
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt> の実行結果です。</p>
|
778
|
+
<p>実行例:</p>
|
779
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha "Hello 日 本 語 ! ! !" NormalizerAuto
|
780
|
+
# [
|
781
|
+
# [
|
782
|
+
# 0,
|
783
|
+
# 1337566253.89858,
|
784
|
+
# 0.000355720520019531
|
785
|
+
# ],
|
786
|
+
# [
|
787
|
+
# {
|
788
|
+
# "position": 0,
|
789
|
+
# "value": "he"
|
790
|
+
# },
|
791
|
+
# {
|
792
|
+
# "position": 1,
|
793
|
+
# "value": "el"
|
794
|
+
# },
|
795
|
+
# {
|
796
|
+
# "position": 2,
|
797
|
+
# "value": "ll"
|
798
|
+
# },
|
799
|
+
# {
|
800
|
+
# "position": 3,
|
801
|
+
# "value": "lo"
|
802
|
+
# },
|
803
|
+
# {
|
804
|
+
# "position": 4,
|
805
|
+
# "value": "o日"
|
806
|
+
# },
|
807
|
+
# {
|
808
|
+
# "position": 5,
|
809
|
+
# "value": "日本"
|
810
|
+
# },
|
811
|
+
# {
|
812
|
+
# "position": 6,
|
813
|
+
# "value": "本語"
|
814
|
+
# },
|
815
|
+
# {
|
816
|
+
# "position": 7,
|
817
|
+
# "value": "語!"
|
818
|
+
# },
|
819
|
+
# {
|
820
|
+
# "position": 8,
|
821
|
+
# "value": "!!"
|
822
|
+
# },
|
823
|
+
# {
|
824
|
+
# "position": 9,
|
825
|
+
# "value": "!!"
|
826
|
+
# },
|
827
|
+
# {
|
828
|
+
# "position": 10,
|
829
|
+
# "value": "!"
|
830
|
+
# }
|
831
|
+
# ]
|
832
|
+
# ]
|
833
|
+
</pre></div>
|
834
|
+
</div>
|
835
|
+
</div>
|
836
|
+
<div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
|
837
|
+
<span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
838
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> と似ています。違いは次の通りです。</p>
|
839
|
+
<blockquote>
|
840
|
+
<div><ul class="simple">
|
841
|
+
<li><p class="first">空白文字の扱い</p>
|
842
|
+
</li>
|
843
|
+
<li><p class="first">記号とアルファベットと数字の扱い</p>
|
844
|
+
</li>
|
845
|
+
</ul>
|
846
|
+
</div></blockquote>
|
847
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
|
848
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> は記号、アルファベット、数字をバイグラムでトークナイズします。つまり、すべての文字をバイグラムでトークナイズします。</p>
|
849
|
+
<p><tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></tt> というテキストを使うと違いがわかります。なぜなら、このテキストは空白文字入りの記号と非ASCII文字だけでなく、アルファベットと数字も含んでいるからです。</p>
|
850
|
+
<p><a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> での実行結果です。</p>
|
851
|
+
<p>実行例:</p>
|
852
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello 日 本 語 ! ! ! 777" NormalizerAuto
|
853
|
+
# [
|
854
|
+
# [
|
855
|
+
# 0,
|
856
|
+
# 1337566253.89858,
|
857
|
+
# 0.000355720520019531
|
858
|
+
# ],
|
859
|
+
# [
|
860
|
+
# {
|
861
|
+
# "position": 0,
|
862
|
+
# "value": "hello"
|
863
|
+
# },
|
864
|
+
# {
|
865
|
+
# "position": 1,
|
866
|
+
# "value": "日"
|
867
|
+
# },
|
868
|
+
# {
|
869
|
+
# "position": 2,
|
870
|
+
# "value": "本"
|
871
|
+
# },
|
872
|
+
# {
|
873
|
+
# "position": 3,
|
874
|
+
# "value": "語"
|
875
|
+
# },
|
876
|
+
# {
|
877
|
+
# "position": 4,
|
878
|
+
# "value": "!"
|
879
|
+
# },
|
880
|
+
# {
|
881
|
+
# "position": 5,
|
882
|
+
# "value": "!"
|
883
|
+
# },
|
884
|
+
# {
|
885
|
+
# "position": 6,
|
886
|
+
# "value": "!"
|
887
|
+
# },
|
888
|
+
# {
|
889
|
+
# "position": 7,
|
890
|
+
# "value": "777"
|
891
|
+
# }
|
892
|
+
# ]
|
893
|
+
# ]
|
894
|
+
</pre></div>
|
895
|
+
</div>
|
896
|
+
<p><tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt> の実行結果です。</p>
|
897
|
+
<p>実行例:</p>
|
898
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit "Hello 日 本 語 ! ! ! 777" NormalizerAuto
|
899
|
+
# [
|
900
|
+
# [
|
901
|
+
# 0,
|
902
|
+
# 1337566253.89858,
|
903
|
+
# 0.000355720520019531
|
904
|
+
# ],
|
905
|
+
# [
|
906
|
+
# {
|
907
|
+
# "position": 0,
|
908
|
+
# "value": "he"
|
909
|
+
# },
|
910
|
+
# {
|
911
|
+
# "position": 1,
|
912
|
+
# "value": "el"
|
913
|
+
# },
|
914
|
+
# {
|
915
|
+
# "position": 2,
|
916
|
+
# "value": "ll"
|
917
|
+
# },
|
918
|
+
# {
|
919
|
+
# "position": 3,
|
920
|
+
# "value": "lo"
|
921
|
+
# },
|
922
|
+
# {
|
923
|
+
# "position": 4,
|
924
|
+
# "value": "o日"
|
925
|
+
# },
|
926
|
+
# {
|
927
|
+
# "position": 5,
|
928
|
+
# "value": "日本"
|
929
|
+
# },
|
930
|
+
# {
|
931
|
+
# "position": 6,
|
932
|
+
# "value": "本語"
|
933
|
+
# },
|
934
|
+
# {
|
935
|
+
# "position": 7,
|
936
|
+
# "value": "語!"
|
937
|
+
# },
|
938
|
+
# {
|
939
|
+
# "position": 8,
|
940
|
+
# "value": "!!"
|
941
|
+
# },
|
942
|
+
# {
|
943
|
+
# "position": 9,
|
944
|
+
# "value": "!!"
|
945
|
+
# },
|
946
|
+
# {
|
947
|
+
# "position": 10,
|
948
|
+
# "value": "!7"
|
949
|
+
# },
|
950
|
+
# {
|
951
|
+
# "position": 11,
|
952
|
+
# "value": "77"
|
953
|
+
# },
|
954
|
+
# {
|
955
|
+
# "position": 12,
|
956
|
+
# "value": "77"
|
957
|
+
# },
|
958
|
+
# {
|
959
|
+
# "position": 13,
|
960
|
+
# "value": "7"
|
961
|
+
# }
|
962
|
+
# ]
|
963
|
+
# ]
|
964
|
+
</pre></div>
|
965
|
+
</div>
|
966
|
+
</div>
|
967
|
+
<div class="section" id="tokenunigram">
|
968
|
+
<span id="token-unigram"></span><h3>7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt><a class="headerlink" href="#tokenunigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
969
|
+
<p><tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> は各トークンが2文字ですが、 <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> は各トークンが1文字です。</p>
|
970
|
+
<p>実行例:</p>
|
971
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenUnigram "100cents!!!" NormalizerAuto
|
972
|
+
# [
|
973
|
+
# [
|
974
|
+
# 0,
|
975
|
+
# 1337566253.89858,
|
976
|
+
# 0.000355720520019531
|
977
|
+
# ],
|
978
|
+
# [
|
979
|
+
# {
|
980
|
+
# "position": 0,
|
981
|
+
# "value": "100"
|
982
|
+
# },
|
983
|
+
# {
|
984
|
+
# "position": 1,
|
985
|
+
# "value": "cents"
|
986
|
+
# },
|
987
|
+
# {
|
988
|
+
# "position": 2,
|
989
|
+
# "value": "!!!"
|
990
|
+
# }
|
991
|
+
# ]
|
992
|
+
# ]
|
993
|
+
</pre></div>
|
994
|
+
</div>
|
995
|
+
</div>
|
996
|
+
<div class="section" id="tokentrigram">
|
997
|
+
<span id="token-trigram"></span><h3>7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt><a class="headerlink" href="#tokentrigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
998
|
+
<p><tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> は <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> は各トークンが2文字ですが、 <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> は各トークンが3文字です。</p>
|
999
|
+
<p>実行例:</p>
|
1000
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenTrigram "10000cents!!!!!" NormalizerAuto
|
1001
|
+
# [
|
1002
|
+
# [
|
1003
|
+
# 0,
|
1004
|
+
# 1337566253.89858,
|
1005
|
+
# 0.000355720520019531
|
1006
|
+
# ],
|
1007
|
+
# [
|
1008
|
+
# {
|
1009
|
+
# "position": 0,
|
1010
|
+
# "value": "10000"
|
1011
|
+
# },
|
1012
|
+
# {
|
1013
|
+
# "position": 1,
|
1014
|
+
# "value": "cents"
|
1015
|
+
# },
|
1016
|
+
# {
|
1017
|
+
# "position": 2,
|
1018
|
+
# "value": "!!!!!"
|
1019
|
+
# }
|
1020
|
+
# ]
|
1021
|
+
# ]
|
1022
|
+
</pre></div>
|
1023
|
+
</div>
|
1024
|
+
</div>
|
1025
|
+
<div class="section" id="tokendelimit">
|
1026
|
+
<span id="token-delimit"></span><h3>7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt><a class="headerlink" href="#tokendelimit" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
1027
|
+
<p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> は1つ以上の空白文字( <tt class="docutils literal"><span class="pre">U+0020</span></tt> )で分割してトークンを抽出します。たとえば、 <tt class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></tt> は <tt class="docutils literal"><span class="pre">Hello</span></tt> と <tt class="docutils literal"><span class="pre">World</span></tt> にトークナイズされます。</p>
|
1028
|
+
<p><tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> はタグテキストに適切です。 <tt class="docutils literal"><span class="pre">groonga</span> <span class="pre">full-text-search</span> <span class="pre">http</span></tt> というテキストから <tt class="docutils literal"><span class="pre">groonga</span></tt> 、 <tt class="docutils literal"><span class="pre">full-text-search</span></tt> 、 <tt class="docutils literal"><span class="pre">http</span></tt> を抽出します。</p>
|
1029
|
+
<p>以下は <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt> の例です。</p>
|
1030
|
+
<p>実行例:</p>
|
1031
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimit "Groonga full-text-search HTTP" NormalizerAuto
|
1032
|
+
# [
|
1033
|
+
# [
|
1034
|
+
# 0,
|
1035
|
+
# 1337566253.89858,
|
1036
|
+
# 0.000355720520019531
|
1037
|
+
# ],
|
1038
|
+
# [
|
1039
|
+
# {
|
1040
|
+
# "position": 0,
|
1041
|
+
# "value": "groonga"
|
1042
|
+
# },
|
1043
|
+
# {
|
1044
|
+
# "position": 1,
|
1045
|
+
# "value": "full-text-search"
|
1046
|
+
# },
|
1047
|
+
# {
|
1048
|
+
# "position": 2,
|
1049
|
+
# "value": "http"
|
1050
|
+
# }
|
1051
|
+
# ]
|
1052
|
+
# ]
|
1053
|
+
</pre></div>
|
1054
|
+
</div>
|
1055
|
+
</div>
|
1056
|
+
<div class="section" id="tokendelimitnull">
|
1057
|
+
<span id="token-delimit-null"></span><h3>7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt><a class="headerlink" href="#tokendelimitnull" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
1058
|
+
<p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> は <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a> に似ています。違いは区切り文字です。 <a class="reference internal" href="#token-delimit"><em>TokenDelimit</em></a> は空白文字( <tt class="docutils literal"><span class="pre">U+0020</span></tt> )を使いますが、 <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> はNUL文字( <tt class="docutils literal"><span class="pre">U+0000</span></tt> )を使います。</p>
|
1059
|
+
<p><tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> もタグテキストに適切です。</p>
|
1060
|
+
<p>以下は <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt> の例です。</p>
|
1061
|
+
<p>実行例:</p>
|
1062
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimitNull "Groonga\u0000full-text-search\u0000HTTP" NormalizerAuto
|
1063
|
+
# [
|
1064
|
+
# [
|
1065
|
+
# 0,
|
1066
|
+
# 1337566253.89858,
|
1067
|
+
# 0.000355720520019531
|
1068
|
+
# ],
|
1069
|
+
# [
|
1070
|
+
# {
|
1071
|
+
# "position": 0,
|
1072
|
+
# "value": "groongau0000full-text-searchu0000http"
|
1073
|
+
# }
|
1074
|
+
# ]
|
1075
|
+
# ]
|
1076
|
+
</pre></div>
|
1077
|
+
</div>
|
1078
|
+
</div>
|
1079
|
+
<div class="section" id="tokenmecab">
|
1080
|
+
<span id="token-mecab"></span><h3>7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt><a class="headerlink" href="#tokenmecab" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
1081
|
+
<p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> は <a class="reference external" href="http://mecab.sourceforge.net/">MeCab</a> 形態素解析器をベースにしたトークナイザーです。</p>
|
1082
|
+
<p>MeCabは日本語に依存していません。その言語用の辞書を用意すれば日本語以外でもMeCabを使えます。日本語用の辞書には <a class="reference external" href="http://sourceforge.jp/projects/naist-jdic/">NAIST Japanese Dictionary</a> を使えます。</p>
|
1083
|
+
<p><tt class="docutils literal"><span class="pre">TokenMecab</span></tt> は再現率より適合率に優れています。 <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> では <tt class="docutils literal"><span class="pre">京都</span></tt> というクエリーで <tt class="docutils literal"><span class="pre">東京都</span></tt> も <tt class="docutils literal"><span class="pre">京都</span></tt> も見つかりますが、この場合は <tt class="docutils literal"><span class="pre">東京都</span></tt> は期待した結果ではありません。 <tt class="docutils literal"><span class="pre">TokenMecab</span></tt> を使うと <tt class="docutils literal"><span class="pre">京都</span></tt> というクエリーで <tt class="docutils literal"><span class="pre">京都</span></tt> だけを見つけられます。</p>
|
1084
|
+
<p>新語をサポートしたい場合は、MeCabの辞書を更新し続ける筆意用があります。これはメンテナンスコストがかかります。( <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> には辞書のメンテナンスコストはありません。なぜなら、 <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a> は辞書を使っていないからです。)新語への対応に <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> が役に立つかもしれません。</p>
|
1085
|
+
<p>以下は <tt class="docutils literal"><span class="pre">TokenMeCab</span></tt> の例です。 <tt class="docutils literal"><span class="pre">東京都</span></tt> は <tt class="docutils literal"><span class="pre">東京</span></tt> と <tt class="docutils literal"><span class="pre">都</span></tt> にトークナイズされています。 <tt class="docutils literal"><span class="pre">京都</span></tt> というトークンはありません。</p>
|
1086
|
+
<p>実行例:</p>
|
1087
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenMecab "東京都"
|
1088
|
+
# [
|
1089
|
+
# [
|
1090
|
+
# 0,
|
1091
|
+
# 1337566253.89858,
|
1092
|
+
# 0.000355720520019531
|
1093
|
+
# ],
|
1094
|
+
# [
|
1095
|
+
# {
|
1096
|
+
# "position": 0,
|
1097
|
+
# "value": "東京"
|
1098
|
+
# },
|
1099
|
+
# {
|
1100
|
+
# "position": 1,
|
1101
|
+
# "value": "都"
|
1102
|
+
# }
|
1103
|
+
# ]
|
1104
|
+
# ]
|
1105
|
+
</pre></div>
|
1106
|
+
</div>
|
1107
|
+
</div>
|
1108
|
+
<div class="section" id="tokenregexp">
|
1109
|
+
<span id="token-regexp"></span><h3>7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt><a class="headerlink" href="#tokenregexp" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
1110
|
+
<div class="versionadded">
|
1111
|
+
<p><span class="versionmodified">バージョン 5.0.1 で追加.</span></p>
|
1112
|
+
</div>
|
1113
|
+
<div class="admonition caution">
|
1114
|
+
<p class="first admonition-title">ご用心</p>
|
1115
|
+
<p class="last">このトークナイザーは実験的です。仕様が変わる可能性があります。</p>
|
1116
|
+
</div>
|
1117
|
+
<div class="admonition caution">
|
1118
|
+
<p class="first admonition-title">ご用心</p>
|
1119
|
+
<p class="last">このトークナイザーはUTF-8でしか使えません。EUC-JPやShift_JISなどと一緒には使えません。</p>
|
1120
|
+
</div>
|
1121
|
+
<p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> はインデックスを使った正規表現検索をサポートするトークナイザーです。</p>
|
1122
|
+
<p>一般的に、正規表現検索は逐次検索で実行します。しかし、次のケースはインデックスを使って検索できます。</p>
|
1123
|
+
<blockquote>
|
1124
|
+
<div><ul class="simple">
|
1125
|
+
<li><p class="first"><tt class="docutils literal"><span class="pre">hello</span></tt> のようにリテラルしかないケース</p>
|
1126
|
+
</li>
|
1127
|
+
<li><p class="first"><tt class="docutils literal"><span class="pre">\A/home/alice</span></tt> のようにテキストの最初でのマッチとリテラルのみのケース</p>
|
1128
|
+
</li>
|
1129
|
+
<li><p class="first"><tt class="docutils literal"><span class="pre">\.txt\z</span></tt> のようにテキストの最後でのマッチとリテラルのみのケース</p>
|
1130
|
+
</li>
|
1131
|
+
</ul>
|
1132
|
+
</div></blockquote>
|
1133
|
+
<p>多くのケースでは、逐次検索よりもインデックスを使った検索の方が高速です。</p>
|
1134
|
+
<p><tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> はベースはバイグラムを使います。 <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> は、インデックス時に、テキストの先頭にテキストの先頭であるというマーク( <tt class="docutils literal"><span class="pre">U+FFEF</span></tt> )を入れ、テキストの最後にテキストの最後であるというマーク( <tt class="docutils literal"><span class="pre">U+FFF0</span></tt> )を入れます。</p>
|
1135
|
+
<p>実行例:</p>
|
1136
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp "/home/alice/test.txt" NormalizerAuto --mode ADD
|
1137
|
+
# [
|
1138
|
+
# [
|
1139
|
+
# 0,
|
1140
|
+
# 1337566253.89858,
|
1141
|
+
# 0.000355720520019531
|
1142
|
+
# ],
|
1143
|
+
# [
|
1144
|
+
# {
|
1145
|
+
# "position": 0,
|
1146
|
+
# "value": ""
|
1147
|
+
# },
|
1148
|
+
# {
|
1149
|
+
# "position": 1,
|
1150
|
+
# "value": "/h"
|
1151
|
+
# },
|
1152
|
+
# {
|
1153
|
+
# "position": 2,
|
1154
|
+
# "value": "ho"
|
1155
|
+
# },
|
1156
|
+
# {
|
1157
|
+
# "position": 3,
|
1158
|
+
# "value": "om"
|
1159
|
+
# },
|
1160
|
+
# {
|
1161
|
+
# "position": 4,
|
1162
|
+
# "value": "me"
|
1163
|
+
# },
|
1164
|
+
# {
|
1165
|
+
# "position": 5,
|
1166
|
+
# "value": "e/"
|
1167
|
+
# },
|
1168
|
+
# {
|
1169
|
+
# "position": 6,
|
1170
|
+
# "value": "/a"
|
1171
|
+
# },
|
1172
|
+
# {
|
1173
|
+
# "position": 7,
|
1174
|
+
# "value": "al"
|
1175
|
+
# },
|
1176
|
+
# {
|
1177
|
+
# "position": 8,
|
1178
|
+
# "value": "li"
|
1179
|
+
# },
|
1180
|
+
# {
|
1181
|
+
# "position": 9,
|
1182
|
+
# "value": "ic"
|
1183
|
+
# },
|
1184
|
+
# {
|
1185
|
+
# "position": 10,
|
1186
|
+
# "value": "ce"
|
1187
|
+
# },
|
1188
|
+
# {
|
1189
|
+
# "position": 11,
|
1190
|
+
# "value": "e/"
|
1191
|
+
# },
|
1192
|
+
# {
|
1193
|
+
# "position": 12,
|
1194
|
+
# "value": "/t"
|
1195
|
+
# },
|
1196
|
+
# {
|
1197
|
+
# "position": 13,
|
1198
|
+
# "value": "te"
|
1199
|
+
# },
|
1200
|
+
# {
|
1201
|
+
# "position": 14,
|
1202
|
+
# "value": "es"
|
1203
|
+
# },
|
1204
|
+
# {
|
1205
|
+
# "position": 15,
|
1206
|
+
# "value": "st"
|
1207
|
+
# },
|
1208
|
+
# {
|
1209
|
+
# "position": 16,
|
1210
|
+
# "value": "t."
|
1211
|
+
# },
|
1212
|
+
# {
|
1213
|
+
# "position": 17,
|
1214
|
+
# "value": ".t"
|
1215
|
+
# },
|
1216
|
+
# {
|
1217
|
+
# "position": 18,
|
1218
|
+
# "value": "tx"
|
1219
|
+
# },
|
1220
|
+
# {
|
1221
|
+
# "position": 19,
|
1222
|
+
# "value": "xt"
|
1223
|
+
# },
|
1224
|
+
# {
|
1225
|
+
# "position": 20,
|
1226
|
+
# "value": "t"
|
1227
|
+
# },
|
1228
|
+
# {
|
1229
|
+
# "position": 21,
|
1230
|
+
# "value": ""
|
1231
|
+
# }
|
1232
|
+
# ]
|
1233
|
+
# ]
|
1234
|
+
</pre></div>
|
1235
|
+
</div>
|
1236
|
+
<p><tt class="docutils literal"><span class="pre">\A</span></tt> で検索したとき、テキストの先頭であるというマークを使います。クエリーをトークナイズするために <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> を使うときは、 <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> は最初のトークンとしてテキストの先頭であるというマーク( <tt class="docutils literal"><span class="pre">U+FFEF</span></tt> )を追加します。テキストの先頭であるというマークは先頭にしか存在しないはずなので、テキストの先頭であるという検索結果を得ることができます。</p>
|
1237
|
+
<p>実行例:</p>
|
1238
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp "\\A/home/alice/" NormalizerAuto --mode GET
|
1239
|
+
# [
|
1240
|
+
# [
|
1241
|
+
# 0,
|
1242
|
+
# 1337566253.89858,
|
1243
|
+
# 0.000355720520019531
|
1244
|
+
# ],
|
1245
|
+
# [
|
1246
|
+
# {
|
1247
|
+
# "position": 0,
|
1248
|
+
# "value": ""
|
1249
|
+
# },
|
1250
|
+
# {
|
1251
|
+
# "position": 1,
|
1252
|
+
# "value": "/h"
|
1253
|
+
# },
|
1254
|
+
# {
|
1255
|
+
# "position": 2,
|
1256
|
+
# "value": "ho"
|
1257
|
+
# },
|
1258
|
+
# {
|
1259
|
+
# "position": 3,
|
1260
|
+
# "value": "om"
|
1261
|
+
# },
|
1262
|
+
# {
|
1263
|
+
# "position": 4,
|
1264
|
+
# "value": "me"
|
1265
|
+
# },
|
1266
|
+
# {
|
1267
|
+
# "position": 5,
|
1268
|
+
# "value": "e/"
|
1269
|
+
# },
|
1270
|
+
# {
|
1271
|
+
# "position": 6,
|
1272
|
+
# "value": "/a"
|
1273
|
+
# },
|
1274
|
+
# {
|
1275
|
+
# "position": 7,
|
1276
|
+
# "value": "al"
|
1277
|
+
# },
|
1278
|
+
# {
|
1279
|
+
# "position": 8,
|
1280
|
+
# "value": "li"
|
1281
|
+
# },
|
1282
|
+
# {
|
1283
|
+
# "position": 9,
|
1284
|
+
# "value": "ic"
|
1285
|
+
# },
|
1286
|
+
# {
|
1287
|
+
# "position": 10,
|
1288
|
+
# "value": "ce"
|
1289
|
+
# },
|
1290
|
+
# {
|
1291
|
+
# "position": 11,
|
1292
|
+
# "value": "e/"
|
1293
|
+
# }
|
1294
|
+
# ]
|
1295
|
+
# ]
|
1296
|
+
</pre></div>
|
1297
|
+
</div>
|
1298
|
+
<p><tt class="docutils literal"><span class="pre">\z</span></tt> で検索したとき、テキストの最後であるというマークを使います。クエリーをトークナイズするために <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> を使うときは、 <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> は最後のトークンとしてテキストの最後であるというマーク( <tt class="docutils literal"><span class="pre">U+FFF0</span></tt> )を追加します。テキストの最後であるというマークは最後にしか存在しないはずなので、テキストの最後であるという検索結果を得ることができます。</p>
|
1299
|
+
<p>実行例:</p>
|
1300
|
+
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp "\\.txt\\z" NormalizerAuto --mode GET
|
1301
|
+
# [
|
1302
|
+
# [
|
1303
|
+
# 0,
|
1304
|
+
# 1337566253.89858,
|
1305
|
+
# 0.000355720520019531
|
1306
|
+
# ],
|
1307
|
+
# [
|
1308
|
+
# {
|
1309
|
+
# "position": 0,
|
1310
|
+
# "value": "\\."
|
1311
|
+
# },
|
1312
|
+
# {
|
1313
|
+
# "position": 1,
|
1314
|
+
# "value": ".t"
|
1315
|
+
# },
|
1316
|
+
# {
|
1317
|
+
# "position": 2,
|
1318
|
+
# "value": "tx"
|
1319
|
+
# },
|
1320
|
+
# {
|
1321
|
+
# "position": 3,
|
1322
|
+
# "value": "xt"
|
1323
|
+
# },
|
1324
|
+
# {
|
1325
|
+
# "position": 5,
|
1326
|
+
# "value": ""
|
1327
|
+
# }
|
1328
|
+
# ]
|
1329
|
+
# ]
|
1330
|
+
</pre></div>
|
1331
|
+
</div>
|
1332
|
+
</div>
|
1333
|
+
</div>
|
92
1334
|
</div>
|
93
1335
|
|
94
1336
|
|
95
1337
|
</div>
|
96
1338
|
</div>
|
97
1339
|
</div>
|
98
|
-
<div class="sphinxsidebar"
|
1340
|
+
<div class="sphinxsidebar">
|
99
1341
|
<div class="sphinxsidebarwrapper">
|
1342
|
+
<h3><a href="../index.html">目次</a></h3>
|
1343
|
+
<ul>
|
1344
|
+
<li><a class="reference internal" href="#">7.8. トークナイザー</a><ul>
|
1345
|
+
<li><a class="reference internal" href="#summary">7.8.1. 概要</a></li>
|
1346
|
+
<li><a class="reference internal" href="#what-is-tokenize">7.8.2. 「トークナイズ」とはなにか</a></li>
|
1347
|
+
<li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. 組み込みトークナイザー</a><ul>
|
1348
|
+
<li><a class="reference internal" href="#tokenbigram">7.8.3.1. <tt class="docutils literal"><span class="pre">TokenBigram</span></tt></a></li>
|
1349
|
+
<li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></tt></a></li>
|
1350
|
+
<li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></tt></a></li>
|
1351
|
+
<li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <tt class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></tt></a></li>
|
1352
|
+
<li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></tt></a></li>
|
1353
|
+
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></tt></a></li>
|
1354
|
+
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></tt></a></li>
|
1355
|
+
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <tt class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></tt></a></li>
|
1356
|
+
<li><a class="reference internal" href="#tokenunigram">7.8.3.9. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt></a></li>
|
1357
|
+
<li><a class="reference internal" href="#tokentrigram">7.8.3.10. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt></a></li>
|
1358
|
+
<li><a class="reference internal" href="#tokendelimit">7.8.3.11. <tt class="docutils literal"><span class="pre">TokenDelimit</span></tt></a></li>
|
1359
|
+
<li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <tt class="docutils literal"><span class="pre">TokenDelimitNull</span></tt></a></li>
|
1360
|
+
<li><a class="reference internal" href="#tokenmecab">7.8.3.13. <tt class="docutils literal"><span class="pre">TokenMecab</span></tt></a></li>
|
1361
|
+
<li><a class="reference internal" href="#tokenregexp">7.8.3.14. <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt></a></li>
|
1362
|
+
</ul>
|
1363
|
+
</li>
|
1364
|
+
</ul>
|
1365
|
+
</li>
|
1366
|
+
</ul>
|
1367
|
+
|
100
1368
|
<h4>前のトピックへ</h4>
|
101
1369
|
<p class="topless"><a href="normalizers.html"
|
102
1370
|
title="前の章へ">7.7. ノーマライザー</a></p>
|
103
1371
|
<h4>次のトピックへ</h4>
|
104
1372
|
<p class="topless"><a href="token_filters.html"
|
105
1373
|
title="次の章へ">7.9. トークンフィルター</a></p>
|
106
|
-
<
|
107
|
-
|
108
|
-
<
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
</div>
|
113
|
-
<div id="searchbox" style="display: none" role="search">
|
1374
|
+
<h3>このページ</h3>
|
1375
|
+
<ul class="this-page-menu">
|
1376
|
+
<li><a href="../_sources/reference/tokenizers.txt"
|
1377
|
+
rel="nofollow">ソースコードを表示</a></li>
|
1378
|
+
</ul>
|
1379
|
+
<div id="searchbox" style="display: none">
|
114
1380
|
<h3>クイック検索</h3>
|
115
1381
|
<form class="search" action="../search.html" method="get">
|
116
1382
|
<input type="text" name="q" />
|
@@ -127,7 +1393,7 @@
|
|
127
1393
|
</div>
|
128
1394
|
<div class="clearer"></div>
|
129
1395
|
</div>
|
130
|
-
<div class="related"
|
1396
|
+
<div class="related">
|
131
1397
|
<h3>ナビゲーション</h3>
|
132
1398
|
<ul>
|
133
1399
|
<li class="right" style="margin-right: 10px">
|
@@ -139,11 +1405,11 @@
|
|
139
1405
|
<li class="right" >
|
140
1406
|
<a href="normalizers.html" title="7.7. ノーマライザー"
|
141
1407
|
>前へ</a> |</li>
|
142
|
-
<li><a href="../index.html">Groonga v5.0.
|
1408
|
+
<li><a href="../index.html">Groonga v5.0.1-42-g4d10df1ドキュメント</a> »</li>
|
143
1409
|
<li><a href="../reference.html" >7. リファレンスマニュアル</a> »</li>
|
144
1410
|
</ul>
|
145
1411
|
</div>
|
146
|
-
<div class="footer"
|
1412
|
+
<div class="footer">
|
147
1413
|
© Copyright 2009-2015, Brazil, Inc.
|
148
1414
|
</div>
|
149
1415
|
</body>
|