rroonga 5.0.4-x64-mingw32 → 5.0.5-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/README.md +2 -2
- data/example/measure-data-column-disk-usage.rb +124 -0
- data/example/measure-index-column-disk-usage.rb +81 -0
- data/example/measure-table-disk-usage.rb +100 -0
- data/ext/groonga/rb-grn-database.c +31 -0
- data/ext/groonga/rb-grn-double-array-trie.c +1 -8
- data/ext/groonga/rb-grn-logger.c +45 -0
- data/ext/groonga/rb-grn-object.c +29 -1
- data/ext/groonga/rb-grn-patricia-trie.c +1 -8
- data/ext/groonga/rb-grn-table-cursor.c +8 -3
- data/ext/groonga/rb-grn-table.c +10 -5
- data/ext/groonga/rb-grn-thread.c +160 -0
- data/ext/groonga/rb-grn-windows-event-logger.c +79 -0
- data/ext/groonga/rb-grn.h +3 -1
- data/ext/groonga/rb-groonga.c +3 -1
- data/lib/1.9/groonga.so +0 -0
- data/lib/2.0/groonga.so +0 -0
- data/lib/2.1/groonga.so +0 -0
- data/lib/2.2/groonga.so +0 -0
- data/lib/groonga/dumper.rb +6 -1
- data/rroonga-build.rb +4 -4
- data/test/groonga-test-utils.rb +5 -1
- data/test/test-database.rb +11 -0
- data/test/test-logger.rb +6 -0
- data/test/test-operator.rb +6 -6
- data/test/test-procedure.rb +15 -0
- data/test/test-table-dumper.rb +170 -1
- data/test/test-thread.rb +42 -0
- data/test/test-windows-event-logger.rb +28 -0
- data/vendor/local/bin/grndb.exe +0 -0
- data/vendor/local/bin/groonga-benchmark.exe +0 -0
- data/vendor/local/bin/groonga.exe +0 -0
- data/vendor/local/bin/libgcc_s_seh-1.dll +0 -0
- data/vendor/local/bin/libgroonga-0.dll +0 -0
- data/vendor/local/bin/libmecab-1.dll +0 -0
- data/vendor/local/bin/libmsgpack-4.dll +0 -0
- data/vendor/local/bin/libmsgpackc-2.dll +0 -0
- data/vendor/local/bin/libonig-5.dll +0 -0
- data/vendor/local/bin/libstdc++-6.dll +0 -0
- data/vendor/local/bin/libwinpthread-1.dll +0 -0
- data/vendor/local/bin/lz4.exe +0 -0
- data/vendor/local/bin/lz4c.exe +0 -0
- data/vendor/local/bin/lz4cat +0 -0
- data/vendor/local/bin/mecab-config +2 -2
- data/vendor/local/bin/mecab.exe +0 -0
- data/vendor/local/bin/onig-config +1 -1
- data/vendor/local/bin/zlib1.dll +0 -0
- data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +2 -2
- data/vendor/local/etc/groonga/windows_event_log/provider.man +38 -0
- data/vendor/local/include/groonga/groonga.h +2 -0
- data/vendor/local/include/groonga/groonga/command.h +2 -0
- data/vendor/local/include/groonga/groonga/groonga.h +5 -0
- data/vendor/local/include/groonga/groonga/obj.h +1 -0
- data/vendor/local/include/groonga/groonga/portability.h +16 -0
- data/vendor/local/include/groonga/groonga/thread.h +42 -0
- data/vendor/local/include/groonga/groonga/windows_event_logger.h +33 -0
- data/vendor/local/lib/groonga/plugins/functions/vector.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/vector.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/vector.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/vector.la +2 -2
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +2 -2
- data/vendor/local/lib/groonga/plugins/ruby/eval.a +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/eval.dll +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/eval.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/eval.la +2 -2
- data/vendor/local/lib/groonga/plugins/ruby/load.a +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/load.dll +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/load.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/ruby/load.la +2 -2
- data/vendor/local/lib/groonga/plugins/sharding.rb +5 -0
- data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +43 -6
- data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +32 -25
- data/vendor/local/lib/groonga/plugins/sharding/logical_parameters.rb +44 -0
- data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +217 -49
- data/vendor/local/lib/groonga/plugins/sharding/logical_select.rb +507 -45
- data/vendor/local/lib/groonga/plugins/sharding/logical_shard_list.rb +28 -0
- data/vendor/local/lib/groonga/plugins/sharding/logical_table_remove.rb +11 -6
- data/vendor/local/lib/groonga/plugins/sharding/parameters.rb +10 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.la +2 -2
- data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
- data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
- data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/table/table.la +2 -2
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +2 -2
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +2 -2
- data/vendor/local/lib/groonga/scripts/ruby/command.rb +31 -1
- data/vendor/local/lib/groonga/scripts/ruby/context.rb +18 -2
- data/vendor/local/lib/groonga/scripts/ruby/database.rb +12 -4
- data/vendor/local/lib/groonga/scripts/ruby/expression_size_estimator.rb +31 -28
- data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +1 -0
- data/vendor/local/lib/groonga/scripts/ruby/logger/level.rb +4 -2
- data/vendor/local/lib/groonga/scripts/ruby/query_logger.rb +9 -0
- data/vendor/local/lib/groonga/scripts/ruby/query_logger/flag.rb +39 -0
- data/vendor/local/lib/groonga/scripts/ruby/record.rb +12 -0
- data/vendor/local/lib/groonga/scripts/ruby/table.rb +35 -1
- data/vendor/local/lib/libgroonga.a +0 -0
- data/vendor/local/lib/libgroonga.dll.a +0 -0
- data/vendor/local/lib/libgroonga.la +2 -2
- data/vendor/local/lib/liblz4.dll +0 -0
- data/vendor/local/lib/liblz4.dll.1 +0 -0
- data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
- data/vendor/local/lib/libmecab.a +0 -0
- data/vendor/local/lib/libmecab.dll.a +0 -0
- data/vendor/local/lib/libmecab.la +2 -2
- data/vendor/local/lib/libmsgpack.a +0 -0
- data/vendor/local/lib/libmsgpack.dll.a +0 -0
- data/vendor/local/lib/libmsgpack.la +2 -2
- data/vendor/local/lib/libmsgpackc.a +0 -0
- data/vendor/local/lib/libmsgpackc.dll.a +0 -0
- data/vendor/local/lib/libmsgpackc.la +2 -2
- data/vendor/local/lib/libonig.a +0 -0
- data/vendor/local/lib/libonig.dll.a +0 -0
- data/vendor/local/lib/libonig.la +2 -2
- data/vendor/local/lib/libz.dll.a +0 -0
- data/vendor/local/lib/pkgconfig/groonga.pc +3 -3
- data/vendor/local/lib/pkgconfig/liblz4.pc +5 -5
- data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
- data/vendor/local/lib/pkgconfig/oniguruma.pc +6 -6
- data/vendor/local/lib/pkgconfig/zlib.pc +3 -3
- data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
- data/vendor/local/sbin/groonga-httpd-restart +1 -1
- data/vendor/local/sbin/groonga-httpd.exe +0 -0
- data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development.txt +3 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build.txt +19 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/unix_autotools.txt +101 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/unix_cmake.txt +94 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/windows_cmake.txt +93 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +16 -7
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/repository.txt +7 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/test.txt +4 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
- data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +319 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +1 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_db.txt +23 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_thread.txt +122 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_copy.txt +381 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +3 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/database_unmap.txt +85 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/io_flush.txt +218 -9
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +1 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/log_level.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +3 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_parameters.txt +138 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +97 -10
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_select.txt +745 -23
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_shard_list.txt +107 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_table_remove.txt +3 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +2 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalizer_list.txt +1 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_exist.txt +90 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +1 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +240 -56
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +33 -7
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_rename.txt +90 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +2 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/thread_limit.txt +110 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +2 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenizer_list.txt +1 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +1 -3
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-httpd.txt +3 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +0 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +0 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +2 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/snippet_html.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +34 -14
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +2 -2
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/query_expanders/tsv.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +3 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/scoring_note.txt +2 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/sharding.txt +108 -0
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +0 -21
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/tuning.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/spec/search.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +4 -4
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/introduction.txt +24 -18
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/match_columns.txt +19 -19
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/micro_blog.txt +9 -9
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/query_expansion.txt +1 -1
- data/vendor/local/share/doc/groonga/en/html/_static/basic.css +68 -6
- data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +27 -2
- data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/jquery-1.11.1.js +10308 -0
- data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +4 -9404
- data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +2 -2
- data/vendor/local/share/doc/groonga/en/html/_static/underscore-1.3.1.js +999 -0
- data/vendor/local/share/doc/groonga/en/html/_static/underscore.js +31 -1415
- data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +15 -15
- data/vendor/local/share/doc/groonga/en/html/characteristic.html +19 -17
- data/vendor/local/share/doc/groonga/en/html/client.html +19 -17
- data/vendor/local/share/doc/groonga/en/html/community.html +19 -17
- data/vendor/local/share/doc/groonga/en/html/contribution.html +78 -70
- data/vendor/local/share/doc/groonga/en/html/contribution/development.html +30 -27
- data/vendor/local/share/doc/groonga/en/html/contribution/development/build.html +146 -0
- data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_autotools.html +237 -0
- data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_cmake.html +227 -0
- data/vendor/local/share/doc/groonga/en/html/contribution/development/build/windows_cmake.html +231 -0
- data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +37 -35
- data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +54 -52
- data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +80 -78
- data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +135 -122
- data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +38 -34
- data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +58 -54
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +21 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +27 -25
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +30 -28
- data/vendor/local/share/doc/groonga/en/html/contribution/report.html +21 -19
- data/vendor/local/share/doc/groonga/en/html/development.html +19 -17
- data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +30 -28
- data/vendor/local/share/doc/groonga/en/html/genindex.html +48 -20
- data/vendor/local/share/doc/groonga/en/html/index.html +123 -105
- data/vendor/local/share/doc/groonga/en/html/install.html +33 -31
- data/vendor/local/share/doc/groonga/en/html/install/centos.html +32 -30
- data/vendor/local/share/doc/groonga/en/html/install/debian.html +31 -29
- data/vendor/local/share/doc/groonga/en/html/install/fedora.html +29 -27
- data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +26 -24
- data/vendor/local/share/doc/groonga/en/html/install/others.html +92 -90
- data/vendor/local/share/doc/groonga/en/html/install/solaris.html +26 -24
- data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +29 -28
- data/vendor/local/share/doc/groonga/en/html/install/windows.html +34 -32
- data/vendor/local/share/doc/groonga/en/html/limitations.html +19 -17
- data/vendor/local/share/doc/groonga/en/html/news.html +509 -142
- data/vendor/local/share/doc/groonga/en/html/news/0.x.html +19 -17
- data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +20 -18
- data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +19 -17
- data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +34 -32
- data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +29 -27
- data/vendor/local/share/doc/groonga/en/html/news/2.x.html +110 -108
- data/vendor/local/share/doc/groonga/en/html/news/3.x.html +73 -71
- data/vendor/local/share/doc/groonga/en/html/news/4.x.html +111 -109
- data/vendor/local/share/doc/groonga/en/html/news/senna.html +19 -17
- data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
- data/vendor/local/share/doc/groonga/en/html/reference.html +111 -94
- data/vendor/local/share/doc/groonga/en/html/reference/api.html +55 -52
- data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +51 -49
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +63 -61
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +84 -82
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +46 -44
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +41 -39
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +89 -87
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +88 -50
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +48 -46
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +83 -81
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +46 -44
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +48 -46
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +46 -44
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +45 -43
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +45 -43
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +44 -42
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +93 -91
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +48 -46
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +43 -41
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +79 -77
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +69 -67
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_thread.html +296 -0
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +45 -43
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +38 -36
- data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +51 -49
- data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +60 -58
- data/vendor/local/share/doc/groonga/en/html/reference/cast.html +19 -17
- data/vendor/local/share/doc/groonga/en/html/reference/column.html +21 -19
- data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +28 -26
- data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +72 -70
- data/vendor/local/share/doc/groonga/en/html/reference/command.html +70 -61
- data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +59 -57
- data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +28 -26
- data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +100 -98
- data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +44 -42
- data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +49 -47
- data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +33 -31
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_copy.html +796 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +62 -60
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +96 -94
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +46 -44
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +67 -64
- data/vendor/local/share/doc/groonga/en/html/reference/commands/database_unmap.html +236 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +62 -60
- data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +46 -44
- data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +59 -57
- data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +63 -61
- data/vendor/local/share/doc/groonga/en/html/reference/commands/io_flush.html +281 -54
- data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +66 -64
- data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +53 -52
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +48 -46
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +48 -46
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +48 -46
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +78 -75
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_parameters.html +283 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +160 -85
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_select.html +2071 -83
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_shard_list.html +287 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_table_remove.html +71 -68
- data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +86 -84
- data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +52 -50
- data/vendor/local/share/doc/groonga/en/html/reference/commands/object_exist.html +227 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +57 -55
- data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +53 -51
- data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +43 -41
- data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +43 -41
- data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +56 -54
- data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +75 -74
- data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +59 -57
- data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +59 -57
- data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +898 -647
- data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +43 -41
- data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +50 -48
- data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +87 -85
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +175 -152
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +55 -53
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +46 -44
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_rename.html +327 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +77 -75
- data/vendor/local/share/doc/groonga/en/html/reference/commands/thread_limit.html +241 -0
- data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +108 -106
- data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +56 -51
- data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +56 -55
- data/vendor/local/share/doc/groonga/en/html/reference/executables.html +24 -22
- data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +41 -39
- data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +28 -26
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +33 -31
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +83 -81
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +29 -27
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +48 -46
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +40 -38
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +84 -82
- data/vendor/local/share/doc/groonga/en/html/reference/function.html +22 -20
- data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +47 -45
- data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +25 -23
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +85 -83
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +36 -34
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +26 -24
- data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +66 -64
- data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +54 -52
- data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +35 -33
- data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +34 -32
- data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +55 -53
- data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +24 -22
- data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +49 -47
- data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +45 -43
- data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_size.html +33 -31
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +29 -27
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +244 -242
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +327 -325
- data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +35 -29
- data/vendor/local/share/doc/groonga/en/html/reference/log.html +43 -41
- data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +49 -47
- data/vendor/local/share/doc/groonga/en/html/reference/operations.html +21 -19
- data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/reference/output.html +36 -34
- data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +21 -19
- data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +62 -60
- data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +107 -103
- data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +50 -40
- data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +27 -25
- data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +41 -31
- data/vendor/local/share/doc/groonga/en/html/reference/scoring_note.html +19 -17
- data/vendor/local/share/doc/groonga/en/html/reference/sharding.html +241 -0
- data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +21 -19
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +30 -28
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/reference/tables.html +56 -54
- data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +41 -39
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +341 -289
- data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +68 -66
- data/vendor/local/share/doc/groonga/en/html/reference/types.html +43 -41
- data/vendor/local/share/doc/groonga/en/html/search.html +11 -11
- data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
- data/vendor/local/share/doc/groonga/en/html/server.html +19 -17
- data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +22 -20
- data/vendor/local/share/doc/groonga/en/html/server/http.html +21 -19
- data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +37 -35
- data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/server/memcached.html +21 -19
- data/vendor/local/share/doc/groonga/en/html/server/package.html +39 -37
- data/vendor/local/share/doc/groonga/en/html/spec.html +23 -21
- data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +129 -127
- data/vendor/local/share/doc/groonga/en/html/spec/search.html +22 -20
- data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +19 -17
- data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +25 -23
- data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +22 -20
- data/vendor/local/share/doc/groonga/en/html/tutorial.html +20 -18
- data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +29 -23
- data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +35 -33
- data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +25 -23
- data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +46 -39
- data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +21 -19
- data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +65 -63
- data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +55 -53
- data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +25 -23
- data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +22 -20
- data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +25 -23
- data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +37 -35
- data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development.txt +3 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build.txt +19 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/unix_autotools.txt +101 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/unix_cmake.txt +94 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/windows_cmake.txt +93 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +16 -7
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/repository.txt +7 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/test.txt +4 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
- data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +319 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +1 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_db.txt +23 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_thread.txt +122 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_copy.txt +381 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +3 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/database_unmap.txt +85 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/io_flush.txt +218 -9
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +1 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/log_level.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +3 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_parameters.txt +138 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +97 -10
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_select.txt +745 -23
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_shard_list.txt +107 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_table_remove.txt +3 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +2 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalizer_list.txt +1 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_exist.txt +90 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +1 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +240 -56
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +33 -7
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_rename.txt +90 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +2 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/thread_limit.txt +110 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +2 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenizer_list.txt +1 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +1 -3
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-httpd.txt +3 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +0 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +0 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +2 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/snippet_html.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +34 -14
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +2 -2
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/query_expanders/tsv.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +3 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scoring_note.txt +2 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/sharding.txt +108 -0
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +0 -21
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tuning.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/spec/search.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/introduction.txt +24 -18
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/match_columns.txt +19 -19
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/micro_blog.txt +9 -9
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/query_expansion.txt +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +68 -6
- data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +27 -2
- data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/jquery-1.11.1.js +10308 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +4 -9404
- data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +2 -2
- data/vendor/local/share/doc/groonga/ja/html/_static/underscore-1.3.1.js +999 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/underscore.js +31 -1415
- data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +15 -15
- data/vendor/local/share/doc/groonga/ja/html/characteristic.html +19 -17
- data/vendor/local/share/doc/groonga/ja/html/client.html +19 -17
- data/vendor/local/share/doc/groonga/ja/html/community.html +19 -17
- data/vendor/local/share/doc/groonga/ja/html/contribution.html +77 -69
- data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +30 -27
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/build.html +144 -0
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_autotools.html +226 -0
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_cmake.html +215 -0
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/windows_cmake.html +229 -0
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +36 -34
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +53 -51
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +79 -77
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +134 -121
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +29 -27
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +57 -53
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +21 -19
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +30 -28
- data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +21 -19
- data/vendor/local/share/doc/groonga/ja/html/development.html +19 -17
- data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +25 -23
- data/vendor/local/share/doc/groonga/ja/html/genindex.html +48 -20
- data/vendor/local/share/doc/groonga/ja/html/index.html +122 -104
- data/vendor/local/share/doc/groonga/ja/html/install.html +33 -31
- data/vendor/local/share/doc/groonga/ja/html/install/centos.html +35 -33
- data/vendor/local/share/doc/groonga/ja/html/install/debian.html +33 -31
- data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +30 -28
- data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +26 -24
- data/vendor/local/share/doc/groonga/ja/html/install/others.html +83 -81
- data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +25 -23
- data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +30 -29
- data/vendor/local/share/doc/groonga/ja/html/install/windows.html +33 -31
- data/vendor/local/share/doc/groonga/ja/html/limitations.html +19 -17
- data/vendor/local/share/doc/groonga/ja/html/news.html +460 -126
- data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +19 -17
- data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +20 -18
- data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +19 -17
- data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +34 -32
- data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +29 -27
- data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +102 -100
- data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +66 -64
- data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +91 -89
- data/vendor/local/share/doc/groonga/ja/html/news/senna.html +19 -17
- data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
- data/vendor/local/share/doc/groonga/ja/html/reference.html +111 -94
- data/vendor/local/share/doc/groonga/ja/html/reference/api.html +55 -52
- data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +51 -49
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +58 -56
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +84 -82
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +46 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +41 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +85 -83
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +88 -50
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +48 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +78 -76
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +46 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +48 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +46 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +45 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +45 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +44 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +93 -91
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +48 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +43 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +79 -77
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +69 -67
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_thread.html +286 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +45 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +38 -36
- data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +48 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +60 -58
- data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +19 -17
- data/vendor/local/share/doc/groonga/ja/html/reference/column.html +21 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +28 -26
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +61 -59
- data/vendor/local/share/doc/groonga/ja/html/reference/command.html +70 -61
- data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +49 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +28 -26
- data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +99 -97
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +41 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +49 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +33 -31
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_copy.html +781 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +62 -60
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +88 -86
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +46 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +66 -63
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/database_unmap.html +229 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +62 -60
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +46 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +58 -56
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +63 -61
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/io_flush.html +266 -54
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +59 -57
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +53 -52
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +48 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +48 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +48 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +78 -75
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_parameters.html +276 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +158 -85
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_select.html +2008 -80
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_shard_list.html +285 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_table_remove.html +71 -68
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +82 -79
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +52 -50
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_exist.html +220 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +53 -51
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +51 -49
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +43 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +43 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +52 -50
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +68 -67
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +59 -57
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +59 -57
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +680 -448
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +43 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +48 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +84 -82
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +146 -126
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +55 -53
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +46 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_rename.html +322 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +73 -70
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/thread_limit.html +229 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +94 -91
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +56 -51
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +56 -55
- data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +24 -22
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +39 -37
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +28 -26
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +33 -31
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +73 -72
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +29 -27
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +48 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +40 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +84 -82
- data/vendor/local/share/doc/groonga/ja/html/reference/function.html +22 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +46 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +25 -23
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +69 -67
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +36 -34
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +26 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +56 -54
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +45 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +34 -32
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +35 -33
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +48 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +24 -22
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +35 -33
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +37 -35
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_size.html +33 -31
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +26 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +143 -141
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +236 -234
- data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +34 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/log.html +43 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +39 -37
- data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +21 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/output.html +32 -30
- data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +21 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +42 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +91 -88
- data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +50 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +27 -25
- data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +41 -31
- data/vendor/local/share/doc/groonga/ja/html/reference/scoring_note.html +19 -17
- data/vendor/local/share/doc/groonga/ja/html/reference/sharding.html +223 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +21 -19
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +27 -25
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +46 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +38 -36
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +303 -243
- data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +62 -60
- data/vendor/local/share/doc/groonga/ja/html/reference/types.html +42 -40
- data/vendor/local/share/doc/groonga/ja/html/search.html +11 -11
- data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
- data/vendor/local/share/doc/groonga/ja/html/server.html +19 -17
- data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +21 -19
- data/vendor/local/share/doc/groonga/ja/html/server/http.html +21 -19
- data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +31 -29
- data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +21 -19
- data/vendor/local/share/doc/groonga/ja/html/server/package.html +38 -36
- data/vendor/local/share/doc/groonga/ja/html/spec.html +23 -21
- data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +128 -126
- data/vendor/local/share/doc/groonga/ja/html/spec/search.html +22 -20
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +19 -17
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +25 -23
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +22 -20
- data/vendor/local/share/doc/groonga/ja/html/tutorial.html +20 -18
- data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +29 -23
- data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +34 -32
- data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +22 -20
- data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +46 -39
- data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +21 -19
- data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +66 -64
- data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +52 -50
- data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +25 -23
- data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +22 -20
- data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +25 -23
- data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +35 -33
- data/vendor/local/share/license/groonga/README.md +6 -0
- data/vendor/local/share/license/mruby/AUTHORS +1 -0
- data/vendor/local/share/license/mruby/MITL +1 -1
- data/vendor/local/share/license/mruby/README.md +6 -5
- data/vendor/local/share/license/msgpack/README +219 -0
- data/vendor/local/share/man/ja/man1/groonga.1 +23512 -15126
- data/vendor/local/share/man/man1/groonga.1 +26542 -17745
- metadata +77 -3
- data/vendor/local/share/license/msgpack/AUTHORS +0 -0
@@ -7,7 +7,7 @@
|
|
7
7
|
<head>
|
8
8
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
9
9
|
|
10
|
-
<title>7.9. Token filters — Groonga v5.0.
|
10
|
+
<title>7.9. Token filters — Groonga v5.0.6-226-gd7da7e7 documentation</title>
|
11
11
|
|
12
12
|
<link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
|
13
13
|
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
@@ -15,7 +15,7 @@
|
|
15
15
|
<script type="text/javascript">
|
16
16
|
var DOCUMENTATION_OPTIONS = {
|
17
17
|
URL_ROOT: '../',
|
18
|
-
VERSION: '5.0.
|
18
|
+
VERSION: '5.0.6-226-gd7da7e7',
|
19
19
|
COLLAPSE_INDEX: false,
|
20
20
|
FILE_SUFFIX: '.html',
|
21
21
|
HAS_SOURCE: true
|
@@ -25,12 +25,12 @@
|
|
25
25
|
<script type="text/javascript" src="../_static/underscore.js"></script>
|
26
26
|
<script type="text/javascript" src="../_static/doctools.js"></script>
|
27
27
|
<link rel="shortcut icon" href="../_static/favicon.ico"/>
|
28
|
-
<link rel="top" title="Groonga v5.0.
|
28
|
+
<link rel="top" title="Groonga v5.0.6-226-gd7da7e7 documentation" href="../index.html" />
|
29
29
|
<link rel="up" title="7. Reference manual" href="../reference.html" />
|
30
30
|
<link rel="next" title="7.10. Query expanders" href="query_expanders.html" />
|
31
31
|
<link rel="prev" title="7.8. Tokenizers" href="tokenizers.html" />
|
32
32
|
</head>
|
33
|
-
<body>
|
33
|
+
<body role="document">
|
34
34
|
<div class="header">
|
35
35
|
<h1 class="title">
|
36
36
|
<a id="top-link" href="../index.html">
|
@@ -48,7 +48,7 @@
|
|
48
48
|
</div>
|
49
49
|
|
50
50
|
|
51
|
-
<div class="related">
|
51
|
+
<div class="related" role="navigation" aria-label="related navigation">
|
52
52
|
<h3>Navigation</h3>
|
53
53
|
<ul>
|
54
54
|
<li class="right" style="margin-right: 10px">
|
@@ -60,15 +60,15 @@
|
|
60
60
|
<li class="right" >
|
61
61
|
<a href="tokenizers.html" title="7.8. Tokenizers"
|
62
62
|
accesskey="P">previous</a> |</li>
|
63
|
-
<li><a href="../index.html">Groonga v5.0.
|
64
|
-
<li><a href="../reference.html" accesskey="U">7. Reference manual</a> »</li>
|
63
|
+
<li class="nav-item nav-item-0"><a href="../index.html">Groonga v5.0.6-226-gd7da7e7 documentation</a> »</li>
|
64
|
+
<li class="nav-item nav-item-1"><a href="../reference.html" accesskey="U">7. Reference manual</a> »</li>
|
65
65
|
</ul>
|
66
66
|
</div>
|
67
67
|
|
68
68
|
<div class="document">
|
69
69
|
<div class="documentwrapper">
|
70
70
|
<div class="bodywrapper">
|
71
|
-
<div class="body">
|
71
|
+
<div class="body" role="main">
|
72
72
|
|
73
73
|
<div class="section" id="token-filters">
|
74
74
|
<h1>7.9. Token filters<a class="headerlink" href="#token-filters" title="Permalink to this headline">¶</a></h1>
|
@@ -78,9 +78,9 @@
|
|
78
78
|
<p>Token filter module can be added as a plugin.</p>
|
79
79
|
<p>You can customize tokenized token by registering your token filters plugins to Groonga.</p>
|
80
80
|
<p>A table can have zero or more token filters. You can attach token
|
81
|
-
filters to a table by <a class="reference internal" href="commands/table_create.html#table-create-token-filters"><
|
81
|
+
filters to a table by <a class="reference internal" href="commands/table_create.html#table-create-token-filters"><span>token_filters</span></a> option in
|
82
82
|
<a class="reference internal" href="commands/table_create.html"><em>table_create</em></a>.</p>
|
83
|
-
<p>Here is an example <
|
83
|
+
<p>Here is an example <code class="docutils literal"><span class="pre">table_create</span></code> that uses <code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code>
|
84
84
|
token filter module:</p>
|
85
85
|
<p>Execution example:</p>
|
86
86
|
<div class="highlight-none"><div class="highlight"><pre>register token_filters/stop_word
|
@@ -97,17 +97,17 @@ table_create Terms TABLE_PAT_KEY ShortText \
|
|
97
97
|
<h2>7.9.2. Available token filters<a class="headerlink" href="#available-token-filters" title="Permalink to this headline">¶</a></h2>
|
98
98
|
<p>Here is the list of available token filters:</p>
|
99
99
|
<ul class="simple">
|
100
|
-
<li><
|
101
|
-
<li><
|
100
|
+
<li><code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code></li>
|
101
|
+
<li><code class="docutils literal"><span class="pre">TokenFilterStem</span></code></li>
|
102
102
|
</ul>
|
103
103
|
<div class="section" id="tokenfilterstopword">
|
104
|
-
<span id="token-filter-stop-word"></span><h3>7.9.2.1. <
|
105
|
-
<p><
|
104
|
+
<span id="token-filter-stop-word"></span><h3>7.9.2.1. <code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code><a class="headerlink" href="#tokenfilterstopword" title="Permalink to this headline">¶</a></h3>
|
105
|
+
<p><code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code> removes stop words from tokenized token
|
106
106
|
in searching the documents.</p>
|
107
|
-
<p><
|
107
|
+
<p><code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code> can specify stop word after adding the
|
108
108
|
documents because it removes token in searching the documents.</p>
|
109
|
-
<p>The stop word is specified <
|
110
|
-
<p>Here is an example that uses <
|
109
|
+
<p>The stop word is specified <code class="docutils literal"><span class="pre">is_stop_word</span></code> column on lexicon table.</p>
|
110
|
+
<p>Here is an example that uses <code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code> token filter:</p>
|
111
111
|
<p>Execution example:</p>
|
112
112
|
<div class="highlight-none"><div class="highlight"><pre>register token_filters/stop_word
|
113
113
|
# [[0, 1337566253.89858, 0.000355720520019531], true]
|
@@ -171,14 +171,14 @@ select Memos --match_columns content --query "Hello and"
|
|
171
171
|
# ]
|
172
172
|
</pre></div>
|
173
173
|
</div>
|
174
|
-
<p><
|
175
|
-
<p><
|
176
|
-
<
|
174
|
+
<p><code class="docutils literal"><span class="pre">and</span></code> token is marked as stop word in <code class="docutils literal"><span class="pre">Terms</span></code> table.</p>
|
175
|
+
<p><code class="docutils literal"><span class="pre">"Hello"</span></code> that doesn't have <code class="docutils literal"><span class="pre">and</span></code> in content is matched. Because
|
176
|
+
<code class="docutils literal"><span class="pre">and</span></code> is a stop word and <code class="docutils literal"><span class="pre">and</span></code> is removed from query.</p>
|
177
177
|
</div>
|
178
178
|
<div class="section" id="tokenfilterstem">
|
179
|
-
<span id="token-filter-stem"></span><h3>7.9.2.2. <
|
180
|
-
<p><
|
181
|
-
<p>Here is an example that uses <
|
179
|
+
<span id="token-filter-stem"></span><h3>7.9.2.2. <code class="docutils literal"><span class="pre">TokenFilterStem</span></code><a class="headerlink" href="#tokenfilterstem" title="Permalink to this headline">¶</a></h3>
|
180
|
+
<p><code class="docutils literal"><span class="pre">TokenFilterStem</span></code> stems tokenized token.</p>
|
181
|
+
<p>Here is an example that uses <code class="docutils literal"><span class="pre">TokenFilterStem</span></code> token filter:</p>
|
182
182
|
<p>Execution example:</p>
|
183
183
|
<div class="highlight-none"><div class="highlight"><pre>register token_filters/stem
|
184
184
|
# [[0, 1337566253.89858, 0.000355720520019531], true]
|
@@ -239,9 +239,9 @@ select Memos --match_columns content --query "develops"
|
|
239
239
|
# ]
|
240
240
|
</pre></div>
|
241
241
|
</div>
|
242
|
-
<p>All of <
|
243
|
-
tokens are stemmed as <
|
244
|
-
<
|
242
|
+
<p>All of <code class="docutils literal"><span class="pre">develop</span></code>, <code class="docutils literal"><span class="pre">developing</span></code>, <code class="docutils literal"><span class="pre">developed</span></code> and <code class="docutils literal"><span class="pre">develops</span></code>
|
243
|
+
tokens are stemmed as <code class="docutils literal"><span class="pre">develop</span></code>. So we can find <code class="docutils literal"><span class="pre">develop</span></code>,
|
244
|
+
<code class="docutils literal"><span class="pre">developing</span></code> and <code class="docutils literal"><span class="pre">developed</span></code> by <code class="docutils literal"><span class="pre">develops</span></code> query.</p>
|
245
245
|
</div>
|
246
246
|
</div>
|
247
247
|
<div class="section" id="see-also">
|
@@ -256,15 +256,15 @@ tokens are stemmed as <tt class="docutils literal"><span class="pre">develop</sp
|
|
256
256
|
</div>
|
257
257
|
</div>
|
258
258
|
</div>
|
259
|
-
<div class="sphinxsidebar">
|
259
|
+
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
260
260
|
<div class="sphinxsidebarwrapper">
|
261
261
|
<h3><a href="../index.html">Table Of Contents</a></h3>
|
262
262
|
<ul>
|
263
263
|
<li><a class="reference internal" href="#">7.9. Token filters</a><ul>
|
264
264
|
<li><a class="reference internal" href="#summary">7.9.1. Summary</a></li>
|
265
265
|
<li><a class="reference internal" href="#available-token-filters">7.9.2. Available token filters</a><ul>
|
266
|
-
<li><a class="reference internal" href="#tokenfilterstopword">7.9.2.1. <
|
267
|
-
<li><a class="reference internal" href="#tokenfilterstem">7.9.2.2. <
|
266
|
+
<li><a class="reference internal" href="#tokenfilterstopword">7.9.2.1. <code class="docutils literal"><span class="pre">TokenFilterStopWord</span></code></a></li>
|
267
|
+
<li><a class="reference internal" href="#tokenfilterstem">7.9.2.2. <code class="docutils literal"><span class="pre">TokenFilterStem</span></code></a></li>
|
268
268
|
</ul>
|
269
269
|
</li>
|
270
270
|
<li><a class="reference internal" href="#see-also">7.9.3. See also</a></li>
|
@@ -278,12 +278,14 @@ tokens are stemmed as <tt class="docutils literal"><span class="pre">develop</sp
|
|
278
278
|
<h4>Next topic</h4>
|
279
279
|
<p class="topless"><a href="query_expanders.html"
|
280
280
|
title="next chapter">7.10. Query expanders</a></p>
|
281
|
-
<
|
282
|
-
|
283
|
-
<
|
284
|
-
|
285
|
-
|
286
|
-
|
281
|
+
<div role="note" aria-label="source link">
|
282
|
+
<h3>This Page</h3>
|
283
|
+
<ul class="this-page-menu">
|
284
|
+
<li><a href="../_sources/reference/token_filters.txt"
|
285
|
+
rel="nofollow">Show Source</a></li>
|
286
|
+
</ul>
|
287
|
+
</div>
|
288
|
+
<div id="searchbox" style="display: none" role="search">
|
287
289
|
<h3>Quick search</h3>
|
288
290
|
<form class="search" action="../search.html" method="get">
|
289
291
|
<input type="text" name="q" />
|
@@ -300,7 +302,7 @@ tokens are stemmed as <tt class="docutils literal"><span class="pre">develop</sp
|
|
300
302
|
</div>
|
301
303
|
<div class="clearer"></div>
|
302
304
|
</div>
|
303
|
-
<div class="related">
|
305
|
+
<div class="related" role="navigation" aria-label="related navigation">
|
304
306
|
<h3>Navigation</h3>
|
305
307
|
<ul>
|
306
308
|
<li class="right" style="margin-right: 10px">
|
@@ -312,11 +314,11 @@ tokens are stemmed as <tt class="docutils literal"><span class="pre">develop</sp
|
|
312
314
|
<li class="right" >
|
313
315
|
<a href="tokenizers.html" title="7.8. Tokenizers"
|
314
316
|
>previous</a> |</li>
|
315
|
-
<li><a href="../index.html">Groonga v5.0.
|
316
|
-
<li><a href="../reference.html" >7. Reference manual</a> »</li>
|
317
|
+
<li class="nav-item nav-item-0"><a href="../index.html">Groonga v5.0.6-226-gd7da7e7 documentation</a> »</li>
|
318
|
+
<li class="nav-item nav-item-1"><a href="../reference.html" >7. Reference manual</a> »</li>
|
317
319
|
</ul>
|
318
320
|
</div>
|
319
|
-
<div class="footer">
|
321
|
+
<div class="footer" role="contentinfo">
|
320
322
|
© Copyright 2009-2015, Brazil, Inc.
|
321
323
|
</div>
|
322
324
|
</body>
|
@@ -7,7 +7,7 @@
|
|
7
7
|
<head>
|
8
8
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
9
9
|
|
10
|
-
<title>7.8. Tokenizers — Groonga v5.0.
|
10
|
+
<title>7.8. Tokenizers — Groonga v5.0.6-226-gd7da7e7 documentation</title>
|
11
11
|
|
12
12
|
<link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
|
13
13
|
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
@@ -15,7 +15,7 @@
|
|
15
15
|
<script type="text/javascript">
|
16
16
|
var DOCUMENTATION_OPTIONS = {
|
17
17
|
URL_ROOT: '../',
|
18
|
-
VERSION: '5.0.
|
18
|
+
VERSION: '5.0.6-226-gd7da7e7',
|
19
19
|
COLLAPSE_INDEX: false,
|
20
20
|
FILE_SUFFIX: '.html',
|
21
21
|
HAS_SOURCE: true
|
@@ -25,12 +25,12 @@
|
|
25
25
|
<script type="text/javascript" src="../_static/underscore.js"></script>
|
26
26
|
<script type="text/javascript" src="../_static/doctools.js"></script>
|
27
27
|
<link rel="shortcut icon" href="../_static/favicon.ico"/>
|
28
|
-
<link rel="top" title="Groonga v5.0.
|
28
|
+
<link rel="top" title="Groonga v5.0.6-226-gd7da7e7 documentation" href="../index.html" />
|
29
29
|
<link rel="up" title="7. Reference manual" href="../reference.html" />
|
30
30
|
<link rel="next" title="7.9. Token filters" href="token_filters.html" />
|
31
31
|
<link rel="prev" title="7.7. Normalizers" href="normalizers.html" />
|
32
32
|
</head>
|
33
|
-
<body>
|
33
|
+
<body role="document">
|
34
34
|
<div class="header">
|
35
35
|
<h1 class="title">
|
36
36
|
<a id="top-link" href="../index.html">
|
@@ -48,7 +48,7 @@
|
|
48
48
|
</div>
|
49
49
|
|
50
50
|
|
51
|
-
<div class="related">
|
51
|
+
<div class="related" role="navigation" aria-label="related navigation">
|
52
52
|
<h3>Navigation</h3>
|
53
53
|
<ul>
|
54
54
|
<li class="right" style="margin-right: 10px">
|
@@ -60,15 +60,15 @@
|
|
60
60
|
<li class="right" >
|
61
61
|
<a href="normalizers.html" title="7.7. Normalizers"
|
62
62
|
accesskey="P">previous</a> |</li>
|
63
|
-
<li><a href="../index.html">Groonga v5.0.
|
64
|
-
<li><a href="../reference.html" accesskey="U">7. Reference manual</a> »</li>
|
63
|
+
<li class="nav-item nav-item-0"><a href="../index.html">Groonga v5.0.6-226-gd7da7e7 documentation</a> »</li>
|
64
|
+
<li class="nav-item nav-item-1"><a href="../reference.html" accesskey="U">7. Reference manual</a> »</li>
|
65
65
|
</ul>
|
66
66
|
</div>
|
67
67
|
|
68
68
|
<div class="document">
|
69
69
|
<div class="documentwrapper">
|
70
70
|
<div class="bodywrapper">
|
71
|
-
<div class="body">
|
71
|
+
<div class="body" role="main">
|
72
72
|
|
73
73
|
<div class="section" id="tokenizers">
|
74
74
|
<h1>7.8. Tokenizers<a class="headerlink" href="#tokenizers" title="Permalink to this headline">¶</a></h1>
|
@@ -79,15 +79,15 @@ the following cases:</p>
|
|
79
79
|
<blockquote>
|
80
80
|
<div><ul>
|
81
81
|
<li><p class="first">Indexing text</p>
|
82
|
-
<div class="figure align-center">
|
82
|
+
<div class="figure align-center" id="id1">
|
83
83
|
<a class="reference internal image-reference" href="../_images/used-when-indexing.png"><img alt="../_images/used-when-indexing.png" src="../_images/used-when-indexing.png" style="width: 80%;" /></a>
|
84
|
-
<p class="caption">Tokenizer is used when indexing text.</p>
|
84
|
+
<p class="caption"><span class="caption-text">Tokenizer is used when indexing text.</span></p>
|
85
85
|
</div>
|
86
86
|
</li>
|
87
87
|
<li><p class="first">Searching by query</p>
|
88
|
-
<div class="figure align-center">
|
88
|
+
<div class="figure align-center" id="id2">
|
89
89
|
<a class="reference internal image-reference" href="../_images/used-when-searching.png"><img alt="../_images/used-when-searching.png" src="../_images/used-when-searching.png" style="width: 80%;" /></a>
|
90
|
-
<p class="caption">Tokenizer is used when searching by query.</p>
|
90
|
+
<p class="caption"><span class="caption-text">Tokenizer is used when searching by query.</span></p>
|
91
91
|
</div>
|
92
92
|
</li>
|
93
93
|
</ul>
|
@@ -95,12 +95,12 @@ the following cases:</p>
|
|
95
95
|
<p>Tokenizer is an important module for full-text search. You can change
|
96
96
|
trade-off between <a class="reference external" href="http://en.wikipedia.org/wiki/Precision_and_recall">precision and recall</a> by changing
|
97
97
|
tokenizer.</p>
|
98
|
-
<p>Normally, <a class="reference internal" href="#token-bigram"><
|
98
|
+
<p>Normally, <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> is a suitable tokenizer. If you don't
|
99
99
|
know much about tokenizer, it's recommended that you choose
|
100
|
-
<a class="reference internal" href="#token-bigram"><
|
100
|
+
<a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>.</p>
|
101
101
|
<p>You can try a tokenizer by <a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a> and
|
102
102
|
<a class="reference internal" href="commands/table_tokenize.html"><em>table_tokenize</em></a>. Here is an example to
|
103
|
-
try <a class="reference internal" href="#token-bigram"><
|
103
|
+
try <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> tokenizer by
|
104
104
|
<a class="reference internal" href="commands/tokenize.html"><em>tokenize</em></a>:</p>
|
105
105
|
<p>Execution example:</p>
|
106
106
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello World"
|
@@ -113,46 +113,57 @@ try <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>
|
|
113
113
|
# [
|
114
114
|
# {
|
115
115
|
# "position": 0,
|
116
|
+
# "force_prefix": false,
|
116
117
|
# "value": "He"
|
117
118
|
# },
|
118
119
|
# {
|
119
120
|
# "position": 1,
|
121
|
+
# "force_prefix": false,
|
120
122
|
# "value": "el"
|
121
123
|
# },
|
122
124
|
# {
|
123
125
|
# "position": 2,
|
126
|
+
# "force_prefix": false,
|
124
127
|
# "value": "ll"
|
125
128
|
# },
|
126
129
|
# {
|
127
130
|
# "position": 3,
|
131
|
+
# "force_prefix": false,
|
128
132
|
# "value": "lo"
|
129
133
|
# },
|
130
134
|
# {
|
131
135
|
# "position": 4,
|
136
|
+
# "force_prefix": false,
|
132
137
|
# "value": "o "
|
133
138
|
# },
|
134
139
|
# {
|
135
140
|
# "position": 5,
|
141
|
+
# "force_prefix": false,
|
136
142
|
# "value": " W"
|
137
143
|
# },
|
138
144
|
# {
|
139
145
|
# "position": 6,
|
146
|
+
# "force_prefix": false,
|
140
147
|
# "value": "Wo"
|
141
148
|
# },
|
142
149
|
# {
|
143
150
|
# "position": 7,
|
151
|
+
# "force_prefix": false,
|
144
152
|
# "value": "or"
|
145
153
|
# },
|
146
154
|
# {
|
147
155
|
# "position": 8,
|
156
|
+
# "force_prefix": false,
|
148
157
|
# "value": "rl"
|
149
158
|
# },
|
150
159
|
# {
|
151
160
|
# "position": 9,
|
161
|
+
# "force_prefix": false,
|
152
162
|
# "value": "ld"
|
153
163
|
# },
|
154
164
|
# {
|
155
165
|
# "position": 10,
|
166
|
+
# "force_prefix": false,
|
156
167
|
# "value": "d"
|
157
168
|
# }
|
158
169
|
# ]
|
@@ -164,86 +175,86 @@ try <a class="reference internal" href="#token-bigram"><em>TokenBigram</em></a>
|
|
164
175
|
<h2>7.8.2. What is "tokenize"?<a class="headerlink" href="#what-is-tokenize" title="Permalink to this headline">¶</a></h2>
|
165
176
|
<p>"tokenize" is the process that extracts zero or more tokens from a
|
166
177
|
text. There are some "tokenize" methods.</p>
|
167
|
-
<p>For example, <
|
178
|
+
<p>For example, <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to the following tokens by
|
168
179
|
bigram tokenize method:</p>
|
169
180
|
<blockquote>
|
170
181
|
<div><ul class="simple">
|
171
|
-
<li><
|
172
|
-
<li><
|
173
|
-
<li><
|
174
|
-
<li><
|
175
|
-
<li><
|
176
|
-
<li><
|
177
|
-
<li><
|
178
|
-
<li><
|
179
|
-
<li><
|
180
|
-
<li><
|
182
|
+
<li><code class="docutils literal"><span class="pre">He</span></code></li>
|
183
|
+
<li><code class="docutils literal"><span class="pre">el</span></code></li>
|
184
|
+
<li><code class="docutils literal"><span class="pre">ll</span></code></li>
|
185
|
+
<li><code class="docutils literal"><span class="pre">lo</span></code></li>
|
186
|
+
<li><code class="docutils literal"><span class="pre">o_</span></code> (<code class="docutils literal"><span class="pre">_</span></code> means a white-space)</li>
|
187
|
+
<li><code class="docutils literal"><span class="pre">_W</span></code> (<code class="docutils literal"><span class="pre">_</span></code> means a white-space)</li>
|
188
|
+
<li><code class="docutils literal"><span class="pre">Wo</span></code></li>
|
189
|
+
<li><code class="docutils literal"><span class="pre">or</span></code></li>
|
190
|
+
<li><code class="docutils literal"><span class="pre">rl</span></code></li>
|
191
|
+
<li><code class="docutils literal"><span class="pre">ld</span></code></li>
|
181
192
|
</ul>
|
182
193
|
</div></blockquote>
|
183
|
-
<p>In the above example, 10 tokens are extracted from one text <
|
184
|
-
<span class="pre">World</span></
|
185
|
-
<p>For example, <
|
194
|
+
<p>In the above example, 10 tokens are extracted from one text <code class="docutils literal"><span class="pre">Hello</span>
|
195
|
+
<span class="pre">World</span></code>.</p>
|
196
|
+
<p>For example, <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to the following tokens by
|
186
197
|
white-space-separate tokenize method:</p>
|
187
198
|
<blockquote>
|
188
199
|
<div><ul class="simple">
|
189
|
-
<li><
|
190
|
-
<li><
|
200
|
+
<li><code class="docutils literal"><span class="pre">Hello</span></code></li>
|
201
|
+
<li><code class="docutils literal"><span class="pre">World</span></code></li>
|
191
202
|
</ul>
|
192
203
|
</div></blockquote>
|
193
|
-
<p>In the above example, 2 tokens are extracted from one text <
|
194
|
-
<span class="pre">World</span></
|
204
|
+
<p>In the above example, 2 tokens are extracted from one text <code class="docutils literal"><span class="pre">Hello</span>
|
205
|
+
<span class="pre">World</span></code>.</p>
|
195
206
|
<p>Token is used as search key. You can find indexed documents only by
|
196
207
|
tokens that are extracted by used tokenize method. For example, you
|
197
|
-
can find <
|
198
|
-
can't find <
|
208
|
+
can find <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> by <code class="docutils literal"><span class="pre">ll</span></code> with bigram tokenize method but you
|
209
|
+
can't find <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> by <code class="docutils literal"><span class="pre">ll</span></code> with white-space-separate tokenize
|
199
210
|
method. Because white-space-separate tokenize method doesn't extract
|
200
|
-
<
|
211
|
+
<code class="docutils literal"><span class="pre">ll</span></code> token. It just extracts <code class="docutils literal"><span class="pre">Hello</span></code> and <code class="docutils literal"><span class="pre">World</span></code> tokens.</p>
|
201
212
|
<p>In general, tokenize method that generates small tokens increases
|
202
213
|
recall but decreases precision. Tokenize method that generates large
|
203
214
|
tokens increases precision but decreases recall.</p>
|
204
|
-
<p>For example, we can find <
|
205
|
-
bigram tokenize method. <
|
215
|
+
<p>For example, we can find <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> and <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> by <code class="docutils literal"><span class="pre">or</span></code> with
|
216
|
+
bigram tokenize method. <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is a noise for people who
|
206
217
|
wants to search "logical and". It means that precision is
|
207
218
|
decreased. But recall is increased.</p>
|
208
|
-
<p>We can find only <
|
209
|
-
tokenize method. Because <
|
219
|
+
<p>We can find only <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> by <code class="docutils literal"><span class="pre">or</span></code> with white-space-separate
|
220
|
+
tokenize method. Because <code class="docutils literal"><span class="pre">World</span></code> is tokenized to one token <code class="docutils literal"><span class="pre">World</span></code>
|
210
221
|
with white-space-separate tokenize method. It means that precision is
|
211
222
|
increased for people who wants to search "logical and". But recall is
|
212
|
-
decreased because <
|
223
|
+
decreased because <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> that contains <code class="docutils literal"><span class="pre">or</span></code> isn't found.</p>
|
213
224
|
</div>
|
214
225
|
<div class="section" id="built-in-tokenizsers">
|
215
226
|
<h2>7.8.3. Built-in tokenizsers<a class="headerlink" href="#built-in-tokenizsers" title="Permalink to this headline">¶</a></h2>
|
216
227
|
<p>Here is a list of built-in tokenizers:</p>
|
217
228
|
<blockquote>
|
218
229
|
<div><ul class="simple">
|
219
|
-
<li><
|
220
|
-
<li><
|
221
|
-
<li><
|
222
|
-
<li><
|
223
|
-
<li><
|
224
|
-
<li><
|
225
|
-
<li><
|
226
|
-
<li><
|
227
|
-
<li><
|
228
|
-
<li><
|
229
|
-
<li><
|
230
|
-
<li><
|
231
|
-
<li><
|
232
|
-
<li><
|
230
|
+
<li><code class="docutils literal"><span class="pre">TokenBigram</span></code></li>
|
231
|
+
<li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></li>
|
232
|
+
<li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></li>
|
233
|
+
<li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></li>
|
234
|
+
<li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></li>
|
235
|
+
<li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></li>
|
236
|
+
<li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlpha</span></code></li>
|
237
|
+
<li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitAlphaDigit</span></code></li>
|
238
|
+
<li><code class="docutils literal"><span class="pre">TokenUnigram</span></code></li>
|
239
|
+
<li><code class="docutils literal"><span class="pre">TokenTrigram</span></code></li>
|
240
|
+
<li><code class="docutils literal"><span class="pre">TokenDelimit</span></code></li>
|
241
|
+
<li><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></li>
|
242
|
+
<li><code class="docutils literal"><span class="pre">TokenMecab</span></code></li>
|
243
|
+
<li><code class="docutils literal"><span class="pre">TokenRegexp</span></code></li>
|
233
244
|
</ul>
|
234
245
|
</div></blockquote>
|
235
246
|
<div class="section" id="tokenbigram">
|
236
|
-
<span id="token-bigram"></span><h3>7.8.3.1. <
|
237
|
-
<p><
|
247
|
+
<span id="token-bigram"></span><h3>7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code><a class="headerlink" href="#tokenbigram" title="Permalink to this headline">¶</a></h3>
|
248
|
+
<p><code class="docutils literal"><span class="pre">TokenBigram</span></code> is a bigram based tokenizer. It's recommended to use
|
238
249
|
this tokenizer for most cases.</p>
|
239
250
|
<p>Bigram tokenize method tokenizes a text to two adjacent characters
|
240
|
-
tokens. For example, <
|
251
|
+
tokens. For example, <code class="docutils literal"><span class="pre">Hello</span></code> is tokenized to the following tokens:</p>
|
241
252
|
<blockquote>
|
242
253
|
<div><ul class="simple">
|
243
|
-
<li><
|
244
|
-
<li><
|
245
|
-
<li><
|
246
|
-
<li><
|
254
|
+
<li><code class="docutils literal"><span class="pre">He</span></code></li>
|
255
|
+
<li><code class="docutils literal"><span class="pre">el</span></code></li>
|
256
|
+
<li><code class="docutils literal"><span class="pre">ll</span></code></li>
|
257
|
+
<li><code class="docutils literal"><span class="pre">lo</span></code></li>
|
247
258
|
</ul>
|
248
259
|
</div></blockquote>
|
249
260
|
<p>Bigram tokenize method is good for recall because you can find all
|
@@ -252,15 +263,15 @@ texts by query consists of two or more characters.</p>
|
|
252
263
|
character because one character token doesn't exist. But you can find
|
253
264
|
all texts by query consists of one character in Groonga. Because
|
254
265
|
Groonga find tokens that start with query by predictive search. For
|
255
|
-
example, Groonga can find <
|
266
|
+
example, Groonga can find <code class="docutils literal"><span class="pre">ll</span></code> and <code class="docutils literal"><span class="pre">lo</span></code> tokens by <code class="docutils literal"><span class="pre">l</span></code> query.</p>
|
256
267
|
<p>Bigram tokenize method isn't good for precision because you can find
|
257
|
-
texts that includes query in word. For example, you can find <
|
258
|
-
by <
|
259
|
-
non-ASCII languages. <
|
268
|
+
texts that includes query in word. For example, you can find <code class="docutils literal"><span class="pre">world</span></code>
|
269
|
+
by <code class="docutils literal"><span class="pre">or</span></code>. This is more sensitive for ASCII only languages rather than
|
270
|
+
non-ASCII languages. <code class="docutils literal"><span class="pre">TokenBigram</span></code> has solution for this problem
|
260
271
|
described in the bellow.</p>
|
261
|
-
<p><
|
272
|
+
<p><code class="docutils literal"><span class="pre">TokenBigram</span></code> behavior is different when it's worked with any
|
262
273
|
<a class="reference internal" href="normalizers.html"><em>Normalizers</em></a>.</p>
|
263
|
-
<p>If no normalizer is used, <
|
274
|
+
<p>If no normalizer is used, <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses pure bigram (all tokens
|
264
275
|
except the last token have two characters) tokenize method:</p>
|
265
276
|
<p>Execution example:</p>
|
266
277
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello World"
|
@@ -273,54 +284,65 @@ except the last token have two characters) tokenize method:</p>
|
|
273
284
|
# [
|
274
285
|
# {
|
275
286
|
# "position": 0,
|
287
|
+
# "force_prefix": false,
|
276
288
|
# "value": "He"
|
277
289
|
# },
|
278
290
|
# {
|
279
291
|
# "position": 1,
|
292
|
+
# "force_prefix": false,
|
280
293
|
# "value": "el"
|
281
294
|
# },
|
282
295
|
# {
|
283
296
|
# "position": 2,
|
297
|
+
# "force_prefix": false,
|
284
298
|
# "value": "ll"
|
285
299
|
# },
|
286
300
|
# {
|
287
301
|
# "position": 3,
|
302
|
+
# "force_prefix": false,
|
288
303
|
# "value": "lo"
|
289
304
|
# },
|
290
305
|
# {
|
291
306
|
# "position": 4,
|
307
|
+
# "force_prefix": false,
|
292
308
|
# "value": "o "
|
293
309
|
# },
|
294
310
|
# {
|
295
311
|
# "position": 5,
|
312
|
+
# "force_prefix": false,
|
296
313
|
# "value": " W"
|
297
314
|
# },
|
298
315
|
# {
|
299
316
|
# "position": 6,
|
317
|
+
# "force_prefix": false,
|
300
318
|
# "value": "Wo"
|
301
319
|
# },
|
302
320
|
# {
|
303
321
|
# "position": 7,
|
322
|
+
# "force_prefix": false,
|
304
323
|
# "value": "or"
|
305
324
|
# },
|
306
325
|
# {
|
307
326
|
# "position": 8,
|
327
|
+
# "force_prefix": false,
|
308
328
|
# "value": "rl"
|
309
329
|
# },
|
310
330
|
# {
|
311
331
|
# "position": 9,
|
332
|
+
# "force_prefix": false,
|
312
333
|
# "value": "ld"
|
313
334
|
# },
|
314
335
|
# {
|
315
336
|
# "position": 10,
|
337
|
+
# "force_prefix": false,
|
316
338
|
# "value": "d"
|
317
339
|
# }
|
318
340
|
# ]
|
319
341
|
# ]
|
320
342
|
</pre></div>
|
321
343
|
</div>
|
322
|
-
<p>If normalizer is used, <
|
323
|
-
tokenize method for ASCII characters. <
|
344
|
+
<p>If normalizer is used, <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses white-space-separate like
|
345
|
+
tokenize method for ASCII characters. <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses bigram
|
324
346
|
tokenize method for non-ASCII characters.</p>
|
325
347
|
<p>You may be confused with this combined behavior. But it's reasonable
|
326
348
|
for most use cases such as English text (only ASCII characters) and
|
@@ -332,10 +354,10 @@ the case.</p>
|
|
332
354
|
word separator. Bigram tokenize method is suitable for the case.</p>
|
333
355
|
<p>Mixed tokenize method is suitable for mixed language case.</p>
|
334
356
|
<p>If you want to use bigram tokenize method for ASCII character, see
|
335
|
-
<
|
336
|
-
<a class="reference internal" href="#token-bigram-split-symbol-alpha"><
|
337
|
-
<p>Let's confirm <
|
338
|
-
<p><
|
357
|
+
<code class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></code> type tokenizers such as
|
358
|
+
<a class="reference internal" href="#token-bigram-split-symbol-alpha"><span>TokenBigramSplitSymbolAlpha</span></a>.</p>
|
359
|
+
<p>Let's confirm <code class="docutils literal"><span class="pre">TokenBigram</span></code> behavior by example.</p>
|
360
|
+
<p><code class="docutils literal"><span class="pre">TokenBigram</span></code> uses one or more white-spaces as token delimiter for
|
339
361
|
ASCII characters:</p>
|
340
362
|
<p>Execution example:</p>
|
341
363
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello World" NormalizerAuto
|
@@ -348,23 +370,25 @@ ASCII characters:</p>
|
|
348
370
|
# [
|
349
371
|
# {
|
350
372
|
# "position": 0,
|
373
|
+
# "force_prefix": false,
|
351
374
|
# "value": "hello"
|
352
375
|
# },
|
353
376
|
# {
|
354
377
|
# "position": 1,
|
378
|
+
# "force_prefix": false,
|
355
379
|
# "value": "world"
|
356
380
|
# }
|
357
381
|
# ]
|
358
382
|
# ]
|
359
383
|
</pre></div>
|
360
384
|
</div>
|
361
|
-
<p><
|
385
|
+
<p><code class="docutils literal"><span class="pre">TokenBigram</span></code> uses character type change as token delimiter for
|
362
386
|
ASCII characters. Character type is one of them:</p>
|
363
387
|
<blockquote>
|
364
388
|
<div><ul class="simple">
|
365
389
|
<li>Alphabet</li>
|
366
390
|
<li>Digit</li>
|
367
|
-
<li>Symbol (such as <
|
391
|
+
<li>Symbol (such as <code class="docutils literal"><span class="pre">(</span></code>, <code class="docutils literal"><span class="pre">)</span></code> and <code class="docutils literal"><span class="pre">!</span></code>)</li>
|
368
392
|
<li>Hiragana</li>
|
369
393
|
<li>Katakana</li>
|
370
394
|
<li>Kanji</li>
|
@@ -374,8 +398,8 @@ ASCII characters. Character type is one of them:</p>
|
|
374
398
|
<p>The following example shows two token delimiters:</p>
|
375
399
|
<blockquote>
|
376
400
|
<div><ul class="simple">
|
377
|
-
<li>at between <
|
378
|
-
<li>at between <
|
401
|
+
<li>at between <code class="docutils literal"><span class="pre">100</span></code> (digits) and <code class="docutils literal"><span class="pre">cents</span></code> (alphabets)</li>
|
402
|
+
<li>at between <code class="docutils literal"><span class="pre">cents</span></code> (alphabets) and <code class="docutils literal"><span class="pre">!!!</span></code> (symbols)</li>
|
379
403
|
</ul>
|
380
404
|
</div></blockquote>
|
381
405
|
<p>Execution example:</p>
|
@@ -389,21 +413,24 @@ ASCII characters. Character type is one of them:</p>
|
|
389
413
|
# [
|
390
414
|
# {
|
391
415
|
# "position": 0,
|
416
|
+
# "force_prefix": false,
|
392
417
|
# "value": "100"
|
393
418
|
# },
|
394
419
|
# {
|
395
420
|
# "position": 1,
|
421
|
+
# "force_prefix": false,
|
396
422
|
# "value": "cents"
|
397
423
|
# },
|
398
424
|
# {
|
399
425
|
# "position": 2,
|
426
|
+
# "force_prefix": false,
|
400
427
|
# "value": "!!!"
|
401
428
|
# }
|
402
429
|
# ]
|
403
430
|
# ]
|
404
431
|
</pre></div>
|
405
432
|
</div>
|
406
|
-
<p>Here is an example that <
|
433
|
+
<p>Here is an example that <code class="docutils literal"><span class="pre">TokenBigram</span></code> uses bigram tokenize method
|
407
434
|
for non-ASCII characters.</p>
|
408
435
|
<p>Execution example:</p>
|
409
436
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "日本語の勉強" NormalizerAuto
|
@@ -416,26 +443,32 @@ for non-ASCII characters.</p>
|
|
416
443
|
# [
|
417
444
|
# {
|
418
445
|
# "position": 0,
|
446
|
+
# "force_prefix": false,
|
419
447
|
# "value": "日本"
|
420
448
|
# },
|
421
449
|
# {
|
422
450
|
# "position": 1,
|
451
|
+
# "force_prefix": false,
|
423
452
|
# "value": "本語"
|
424
453
|
# },
|
425
454
|
# {
|
426
455
|
# "position": 2,
|
456
|
+
# "force_prefix": false,
|
427
457
|
# "value": "語の"
|
428
458
|
# },
|
429
459
|
# {
|
430
460
|
# "position": 3,
|
461
|
+
# "force_prefix": false,
|
431
462
|
# "value": "の勉"
|
432
463
|
# },
|
433
464
|
# {
|
434
465
|
# "position": 4,
|
466
|
+
# "force_prefix": false,
|
435
467
|
# "value": "勉強"
|
436
468
|
# },
|
437
469
|
# {
|
438
470
|
# "position": 5,
|
471
|
+
# "force_prefix": false,
|
439
472
|
# "value": "強"
|
440
473
|
# }
|
441
474
|
# ]
|
@@ -444,9 +477,9 @@ for non-ASCII characters.</p>
|
|
444
477
|
</div>
|
445
478
|
</div>
|
446
479
|
<div class="section" id="tokenbigramsplitsymbol">
|
447
|
-
<span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <
|
448
|
-
<p><
|
449
|
-
difference between them is symbol handling. <
|
480
|
+
<span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code><a class="headerlink" href="#tokenbigramsplitsymbol" title="Permalink to this headline">¶</a></h3>
|
481
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code> is similar to <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The
|
482
|
+
difference between them is symbol handling. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code>
|
450
483
|
tokenizes symbols by bigram tokenize method:</p>
|
451
484
|
<p>Execution example:</p>
|
452
485
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbol "100cents!!!" NormalizerAuto
|
@@ -459,22 +492,27 @@ tokenizes symbols by bigram tokenize method:</p>
|
|
459
492
|
# [
|
460
493
|
# {
|
461
494
|
# "position": 0,
|
495
|
+
# "force_prefix": false,
|
462
496
|
# "value": "100"
|
463
497
|
# },
|
464
498
|
# {
|
465
499
|
# "position": 1,
|
500
|
+
# "force_prefix": false,
|
466
501
|
# "value": "cents"
|
467
502
|
# },
|
468
503
|
# {
|
469
504
|
# "position": 2,
|
505
|
+
# "force_prefix": false,
|
470
506
|
# "value": "!!"
|
471
507
|
# },
|
472
508
|
# {
|
473
509
|
# "position": 3,
|
510
|
+
# "force_prefix": false,
|
474
511
|
# "value": "!!"
|
475
512
|
# },
|
476
513
|
# {
|
477
514
|
# "position": 4,
|
515
|
+
# "force_prefix": false,
|
478
516
|
# "value": "!"
|
479
517
|
# }
|
480
518
|
# ]
|
@@ -483,10 +521,10 @@ tokenizes symbols by bigram tokenize method:</p>
|
|
483
521
|
</div>
|
484
522
|
</div>
|
485
523
|
<div class="section" id="tokenbigramsplitsymbolalpha">
|
486
|
-
<span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <
|
487
|
-
<p><
|
524
|
+
<span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
|
525
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> is similar to <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The
|
488
526
|
difference between them is symbol and alphabet
|
489
|
-
handling. <
|
527
|
+
handling. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> tokenizes symbols and
|
490
528
|
alphabets by bigram tokenize method:</p>
|
491
529
|
<p>Execution example:</p>
|
492
530
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramSplitSymbolAlpha "100cents!!!" NormalizerAuto
|
@@ -499,38 +537,47 @@ alphabets by bigram tokenize method:</p>
|
|
499
537
|
# [
|
500
538
|
# {
|
501
539
|
# "position": 0,
|
540
|
+
# "force_prefix": false,
|
502
541
|
# "value": "100"
|
503
542
|
# },
|
504
543
|
# {
|
505
544
|
# "position": 1,
|
545
|
+
# "force_prefix": false,
|
506
546
|
# "value": "ce"
|
507
547
|
# },
|
508
548
|
# {
|
509
549
|
# "position": 2,
|
550
|
+
# "force_prefix": false,
|
510
551
|
# "value": "en"
|
511
552
|
# },
|
512
553
|
# {
|
513
554
|
# "position": 3,
|
555
|
+
# "force_prefix": false,
|
514
556
|
# "value": "nt"
|
515
557
|
# },
|
516
558
|
# {
|
517
559
|
# "position": 4,
|
560
|
+
# "force_prefix": false,
|
518
561
|
# "value": "ts"
|
519
562
|
# },
|
520
563
|
# {
|
521
564
|
# "position": 5,
|
565
|
+
# "force_prefix": false,
|
522
566
|
# "value": "s!"
|
523
567
|
# },
|
524
568
|
# {
|
525
569
|
# "position": 6,
|
570
|
+
# "force_prefix": false,
|
526
571
|
# "value": "!!"
|
527
572
|
# },
|
528
573
|
# {
|
529
574
|
# "position": 7,
|
575
|
+
# "force_prefix": false,
|
530
576
|
# "value": "!!"
|
531
577
|
# },
|
532
578
|
# {
|
533
579
|
# "position": 8,
|
580
|
+
# "force_prefix": false,
|
534
581
|
# "value": "!"
|
535
582
|
# }
|
536
583
|
# ]
|
@@ -539,10 +586,10 @@ alphabets by bigram tokenize method:</p>
|
|
539
586
|
</div>
|
540
587
|
</div>
|
541
588
|
<div class="section" id="tokenbigramsplitsymbolalphadigit">
|
542
|
-
<span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <
|
543
|
-
<p><
|
544
|
-
<a class="reference internal" href="#token-bigram"><
|
545
|
-
and digit handling. <
|
589
|
+
<span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
|
590
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> is similar to
|
591
|
+
<a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The difference between them is symbol, alphabet
|
592
|
+
and digit handling. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> tokenizes
|
546
593
|
symbols, alphabets and digits by bigram tokenize method. It means that
|
547
594
|
all characters are tokenized by bigram tokenize method:</p>
|
548
595
|
<p>Execution example:</p>
|
@@ -556,46 +603,57 @@ all characters are tokenized by bigram tokenize method:</p>
|
|
556
603
|
# [
|
557
604
|
# {
|
558
605
|
# "position": 0,
|
606
|
+
# "force_prefix": false,
|
559
607
|
# "value": "10"
|
560
608
|
# },
|
561
609
|
# {
|
562
610
|
# "position": 1,
|
611
|
+
# "force_prefix": false,
|
563
612
|
# "value": "00"
|
564
613
|
# },
|
565
614
|
# {
|
566
615
|
# "position": 2,
|
616
|
+
# "force_prefix": false,
|
567
617
|
# "value": "0c"
|
568
618
|
# },
|
569
619
|
# {
|
570
620
|
# "position": 3,
|
621
|
+
# "force_prefix": false,
|
571
622
|
# "value": "ce"
|
572
623
|
# },
|
573
624
|
# {
|
574
625
|
# "position": 4,
|
626
|
+
# "force_prefix": false,
|
575
627
|
# "value": "en"
|
576
628
|
# },
|
577
629
|
# {
|
578
630
|
# "position": 5,
|
631
|
+
# "force_prefix": false,
|
579
632
|
# "value": "nt"
|
580
633
|
# },
|
581
634
|
# {
|
582
635
|
# "position": 6,
|
636
|
+
# "force_prefix": false,
|
583
637
|
# "value": "ts"
|
584
638
|
# },
|
585
639
|
# {
|
586
640
|
# "position": 7,
|
641
|
+
# "force_prefix": false,
|
587
642
|
# "value": "s!"
|
588
643
|
# },
|
589
644
|
# {
|
590
645
|
# "position": 8,
|
646
|
+
# "force_prefix": false,
|
591
647
|
# "value": "!!"
|
592
648
|
# },
|
593
649
|
# {
|
594
650
|
# "position": 9,
|
651
|
+
# "force_prefix": false,
|
595
652
|
# "value": "!!"
|
596
653
|
# },
|
597
654
|
# {
|
598
655
|
# "position": 10,
|
656
|
+
# "force_prefix": false,
|
599
657
|
# "value": "!"
|
600
658
|
# }
|
601
659
|
# ]
|
@@ -604,13 +662,13 @@ all characters are tokenized by bigram tokenize method:</p>
|
|
604
662
|
</div>
|
605
663
|
</div>
|
606
664
|
<div class="section" id="tokenbigramignoreblank">
|
607
|
-
<span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <
|
608
|
-
<p><
|
609
|
-
difference between them is blank handling. <
|
665
|
+
<span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code><a class="headerlink" href="#tokenbigramignoreblank" title="Permalink to this headline">¶</a></h3>
|
666
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> is similar to <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The
|
667
|
+
difference between them is blank handling. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code>
|
610
668
|
ignores white-spaces in continuous symbols and non-ASCII characters.</p>
|
611
|
-
<p>You can find difference of them by <
|
669
|
+
<p>You can find difference of them by <code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> text because it
|
612
670
|
has symbols and non-ASCII characters.</p>
|
613
|
-
<p>Here is a result by <a class="reference internal" href="#token-bigram"><
|
671
|
+
<p>Here is a result by <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> :</p>
|
614
672
|
<p>Execution example:</p>
|
615
673
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
|
616
674
|
# [
|
@@ -622,33 +680,39 @@ has symbols and non-ASCII characters.</p>
|
|
622
680
|
# [
|
623
681
|
# {
|
624
682
|
# "position": 0,
|
683
|
+
# "force_prefix": false,
|
625
684
|
# "value": "日"
|
626
685
|
# },
|
627
686
|
# {
|
628
687
|
# "position": 1,
|
688
|
+
# "force_prefix": false,
|
629
689
|
# "value": "本"
|
630
690
|
# },
|
631
691
|
# {
|
632
692
|
# "position": 2,
|
693
|
+
# "force_prefix": false,
|
633
694
|
# "value": "語"
|
634
695
|
# },
|
635
696
|
# {
|
636
697
|
# "position": 3,
|
698
|
+
# "force_prefix": false,
|
637
699
|
# "value": "!"
|
638
700
|
# },
|
639
701
|
# {
|
640
702
|
# "position": 4,
|
703
|
+
# "force_prefix": false,
|
641
704
|
# "value": "!"
|
642
705
|
# },
|
643
706
|
# {
|
644
707
|
# "position": 5,
|
708
|
+
# "force_prefix": false,
|
645
709
|
# "value": "!"
|
646
710
|
# }
|
647
711
|
# ]
|
648
712
|
# ]
|
649
713
|
</pre></div>
|
650
714
|
</div>
|
651
|
-
<p>Here is a result by <
|
715
|
+
<p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code>:</p>
|
652
716
|
<p>Execution example:</p>
|
653
717
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlank "日 本 語 ! ! !" NormalizerAuto
|
654
718
|
# [
|
@@ -660,18 +724,22 @@ has symbols and non-ASCII characters.</p>
|
|
660
724
|
# [
|
661
725
|
# {
|
662
726
|
# "position": 0,
|
727
|
+
# "force_prefix": false,
|
663
728
|
# "value": "日本"
|
664
729
|
# },
|
665
730
|
# {
|
666
731
|
# "position": 1,
|
732
|
+
# "force_prefix": false,
|
667
733
|
# "value": "本語"
|
668
734
|
# },
|
669
735
|
# {
|
670
736
|
# "position": 2,
|
737
|
+
# "force_prefix": false,
|
671
738
|
# "value": "語"
|
672
739
|
# },
|
673
740
|
# {
|
674
741
|
# "position": 3,
|
742
|
+
# "force_prefix": false,
|
675
743
|
# "value": "!!!"
|
676
744
|
# }
|
677
745
|
# ]
|
@@ -680,22 +748,22 @@ has symbols and non-ASCII characters.</p>
|
|
680
748
|
</div>
|
681
749
|
</div>
|
682
750
|
<div class="section" id="tokenbigramignoreblanksplitsymbol">
|
683
|
-
<span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <
|
684
|
-
<p><
|
685
|
-
<a class="reference internal" href="#token-bigram"><
|
751
|
+
<span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="Permalink to this headline">¶</a></h3>
|
752
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> is similar to
|
753
|
+
<a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The differences between them are the followings:</p>
|
686
754
|
<blockquote>
|
687
755
|
<div><ul class="simple">
|
688
756
|
<li>Blank handling</li>
|
689
757
|
<li>Symbol handling</li>
|
690
758
|
</ul>
|
691
759
|
</div></blockquote>
|
692
|
-
<p><
|
760
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> ignores white-spaces in
|
693
761
|
continuous symbols and non-ASCII characters.</p>
|
694
|
-
<p><
|
762
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> tokenizes symbols by bigram
|
695
763
|
tokenize method.</p>
|
696
|
-
<p>You can find difference of them by <
|
764
|
+
<p>You can find difference of them by <code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> text because it
|
697
765
|
has symbols and non-ASCII characters.</p>
|
698
|
-
<p>Here is a result by <a class="reference internal" href="#token-bigram"><
|
766
|
+
<p>Here is a result by <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> :</p>
|
699
767
|
<p>Execution example:</p>
|
700
768
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
|
701
769
|
# [
|
@@ -707,33 +775,39 @@ has symbols and non-ASCII characters.</p>
|
|
707
775
|
# [
|
708
776
|
# {
|
709
777
|
# "position": 0,
|
778
|
+
# "force_prefix": false,
|
710
779
|
# "value": "日"
|
711
780
|
# },
|
712
781
|
# {
|
713
782
|
# "position": 1,
|
783
|
+
# "force_prefix": false,
|
714
784
|
# "value": "本"
|
715
785
|
# },
|
716
786
|
# {
|
717
787
|
# "position": 2,
|
788
|
+
# "force_prefix": false,
|
718
789
|
# "value": "語"
|
719
790
|
# },
|
720
791
|
# {
|
721
792
|
# "position": 3,
|
793
|
+
# "force_prefix": false,
|
722
794
|
# "value": "!"
|
723
795
|
# },
|
724
796
|
# {
|
725
797
|
# "position": 4,
|
798
|
+
# "force_prefix": false,
|
726
799
|
# "value": "!"
|
727
800
|
# },
|
728
801
|
# {
|
729
802
|
# "position": 5,
|
803
|
+
# "force_prefix": false,
|
730
804
|
# "value": "!"
|
731
805
|
# }
|
732
806
|
# ]
|
733
807
|
# ]
|
734
808
|
</pre></div>
|
735
809
|
</div>
|
736
|
-
<p>Here is a result by <
|
810
|
+
<p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code>:</p>
|
737
811
|
<p>Execution example:</p>
|
738
812
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbol "日 本 語 ! ! !" NormalizerAuto
|
739
813
|
# [
|
@@ -745,26 +819,32 @@ has symbols and non-ASCII characters.</p>
|
|
745
819
|
# [
|
746
820
|
# {
|
747
821
|
# "position": 0,
|
822
|
+
# "force_prefix": false,
|
748
823
|
# "value": "日本"
|
749
824
|
# },
|
750
825
|
# {
|
751
826
|
# "position": 1,
|
827
|
+
# "force_prefix": false,
|
752
828
|
# "value": "本語"
|
753
829
|
# },
|
754
830
|
# {
|
755
831
|
# "position": 2,
|
832
|
+
# "force_prefix": false,
|
756
833
|
# "value": "語!"
|
757
834
|
# },
|
758
835
|
# {
|
759
836
|
# "position": 3,
|
837
|
+
# "force_prefix": false,
|
760
838
|
# "value": "!!"
|
761
839
|
# },
|
762
840
|
# {
|
763
841
|
# "position": 4,
|
842
|
+
# "force_prefix": false,
|
764
843
|
# "value": "!!"
|
765
844
|
# },
|
766
845
|
# {
|
767
846
|
# "position": 5,
|
847
|
+
# "force_prefix": false,
|
768
848
|
# "value": "!"
|
769
849
|
# }
|
770
850
|
# ]
|
@@ -773,22 +853,22 @@ has symbols and non-ASCII characters.</p>
|
|
773
853
|
</div>
|
774
854
|
</div>
|
775
855
|
<div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
|
776
|
-
<span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <
|
777
|
-
<p><
|
778
|
-
<a class="reference internal" href="#token-bigram"><
|
856
|
+
<span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="Permalink to this headline">¶</a></h3>
|
857
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> is similar to
|
858
|
+
<a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The differences between them are the followings:</p>
|
779
859
|
<blockquote>
|
780
860
|
<div><ul class="simple">
|
781
861
|
<li>Blank handling</li>
|
782
862
|
<li>Symbol and alphabet handling</li>
|
783
863
|
</ul>
|
784
864
|
</div></blockquote>
|
785
|
-
<p><
|
865
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> ignores white-spaces in
|
786
866
|
continuous symbols and non-ASCII characters.</p>
|
787
|
-
<p><
|
867
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> tokenizes symbols and
|
788
868
|
alphabets by bigram tokenize method.</p>
|
789
|
-
<p>You can find difference of them by <
|
869
|
+
<p>You can find difference of them by <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> text because it
|
790
870
|
has symbols and non-ASCII characters with white spaces and alphabets.</p>
|
791
|
-
<p>Here is a result by <a class="reference internal" href="#token-bigram"><
|
871
|
+
<p>Here is a result by <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> :</p>
|
792
872
|
<p>Execution example:</p>
|
793
873
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello 日 本 語 ! ! !" NormalizerAuto
|
794
874
|
# [
|
@@ -800,37 +880,44 @@ has symbols and non-ASCII characters with white spaces and alphabets.</p>
|
|
800
880
|
# [
|
801
881
|
# {
|
802
882
|
# "position": 0,
|
883
|
+
# "force_prefix": false,
|
803
884
|
# "value": "hello"
|
804
885
|
# },
|
805
886
|
# {
|
806
887
|
# "position": 1,
|
888
|
+
# "force_prefix": false,
|
807
889
|
# "value": "日"
|
808
890
|
# },
|
809
891
|
# {
|
810
892
|
# "position": 2,
|
893
|
+
# "force_prefix": false,
|
811
894
|
# "value": "本"
|
812
895
|
# },
|
813
896
|
# {
|
814
897
|
# "position": 3,
|
898
|
+
# "force_prefix": false,
|
815
899
|
# "value": "語"
|
816
900
|
# },
|
817
901
|
# {
|
818
902
|
# "position": 4,
|
903
|
+
# "force_prefix": false,
|
819
904
|
# "value": "!"
|
820
905
|
# },
|
821
906
|
# {
|
822
907
|
# "position": 5,
|
908
|
+
# "force_prefix": false,
|
823
909
|
# "value": "!"
|
824
910
|
# },
|
825
911
|
# {
|
826
912
|
# "position": 6,
|
913
|
+
# "force_prefix": false,
|
827
914
|
# "value": "!"
|
828
915
|
# }
|
829
916
|
# ]
|
830
917
|
# ]
|
831
918
|
</pre></div>
|
832
919
|
</div>
|
833
|
-
<p>Here is a result by <
|
920
|
+
<p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code>:</p>
|
834
921
|
<p>Execution example:</p>
|
835
922
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha "Hello 日 本 語 ! ! !" NormalizerAuto
|
836
923
|
# [
|
@@ -842,46 +929,57 @@ has symbols and non-ASCII characters with white spaces and alphabets.</p>
|
|
842
929
|
# [
|
843
930
|
# {
|
844
931
|
# "position": 0,
|
932
|
+
# "force_prefix": false,
|
845
933
|
# "value": "he"
|
846
934
|
# },
|
847
935
|
# {
|
848
936
|
# "position": 1,
|
937
|
+
# "force_prefix": false,
|
849
938
|
# "value": "el"
|
850
939
|
# },
|
851
940
|
# {
|
852
941
|
# "position": 2,
|
942
|
+
# "force_prefix": false,
|
853
943
|
# "value": "ll"
|
854
944
|
# },
|
855
945
|
# {
|
856
946
|
# "position": 3,
|
947
|
+
# "force_prefix": false,
|
857
948
|
# "value": "lo"
|
858
949
|
# },
|
859
950
|
# {
|
860
951
|
# "position": 4,
|
952
|
+
# "force_prefix": false,
|
861
953
|
# "value": "o日"
|
862
954
|
# },
|
863
955
|
# {
|
864
956
|
# "position": 5,
|
957
|
+
# "force_prefix": false,
|
865
958
|
# "value": "日本"
|
866
959
|
# },
|
867
960
|
# {
|
868
961
|
# "position": 6,
|
962
|
+
# "force_prefix": false,
|
869
963
|
# "value": "本語"
|
870
964
|
# },
|
871
965
|
# {
|
872
966
|
# "position": 7,
|
967
|
+
# "force_prefix": false,
|
873
968
|
# "value": "語!"
|
874
969
|
# },
|
875
970
|
# {
|
876
971
|
# "position": 8,
|
972
|
+
# "force_prefix": false,
|
877
973
|
# "value": "!!"
|
878
974
|
# },
|
879
975
|
# {
|
880
976
|
# "position": 9,
|
977
|
+
# "force_prefix": false,
|
881
978
|
# "value": "!!"
|
882
979
|
# },
|
883
980
|
# {
|
884
981
|
# "position": 10,
|
982
|
+
# "force_prefix": false,
|
885
983
|
# "value": "!"
|
886
984
|
# }
|
887
985
|
# ]
|
@@ -890,24 +988,24 @@ has symbols and non-ASCII characters with white spaces and alphabets.</p>
|
|
890
988
|
</div>
|
891
989
|
</div>
|
892
990
|
<div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
|
893
|
-
<span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <
|
894
|
-
<p><
|
895
|
-
<a class="reference internal" href="#token-bigram"><
|
991
|
+
<span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="Permalink to this headline">¶</a></h3>
|
992
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> is similar to
|
993
|
+
<a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The differences between them are the followings:</p>
|
896
994
|
<blockquote>
|
897
995
|
<div><ul class="simple">
|
898
996
|
<li>Blank handling</li>
|
899
997
|
<li>Symbol, alphabet and digit handling</li>
|
900
998
|
</ul>
|
901
999
|
</div></blockquote>
|
902
|
-
<p><
|
1000
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> ignores white-spaces
|
903
1001
|
in continuous symbols and non-ASCII characters.</p>
|
904
|
-
<p><
|
1002
|
+
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> tokenizes symbols,
|
905
1003
|
alphabets and digits by bigram tokenize method. It means that all
|
906
1004
|
characters are tokenized by bigram tokenize method.</p>
|
907
|
-
<p>You can find difference of them by <
|
1005
|
+
<p>You can find difference of them by <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></code> text
|
908
1006
|
because it has symbols and non-ASCII characters with white spaces,
|
909
1007
|
alphabets and digits.</p>
|
910
|
-
<p>Here is a result by <a class="reference internal" href="#token-bigram"><
|
1008
|
+
<p>Here is a result by <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> :</p>
|
911
1009
|
<p>Execution example:</p>
|
912
1010
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigram "Hello 日 本 語 ! ! ! 777" NormalizerAuto
|
913
1011
|
# [
|
@@ -919,41 +1017,49 @@ alphabets and digits.</p>
|
|
919
1017
|
# [
|
920
1018
|
# {
|
921
1019
|
# "position": 0,
|
1020
|
+
# "force_prefix": false,
|
922
1021
|
# "value": "hello"
|
923
1022
|
# },
|
924
1023
|
# {
|
925
1024
|
# "position": 1,
|
1025
|
+
# "force_prefix": false,
|
926
1026
|
# "value": "日"
|
927
1027
|
# },
|
928
1028
|
# {
|
929
1029
|
# "position": 2,
|
1030
|
+
# "force_prefix": false,
|
930
1031
|
# "value": "本"
|
931
1032
|
# },
|
932
1033
|
# {
|
933
1034
|
# "position": 3,
|
1035
|
+
# "force_prefix": false,
|
934
1036
|
# "value": "語"
|
935
1037
|
# },
|
936
1038
|
# {
|
937
1039
|
# "position": 4,
|
1040
|
+
# "force_prefix": false,
|
938
1041
|
# "value": "!"
|
939
1042
|
# },
|
940
1043
|
# {
|
941
1044
|
# "position": 5,
|
1045
|
+
# "force_prefix": false,
|
942
1046
|
# "value": "!"
|
943
1047
|
# },
|
944
1048
|
# {
|
945
1049
|
# "position": 6,
|
1050
|
+
# "force_prefix": false,
|
946
1051
|
# "value": "!"
|
947
1052
|
# },
|
948
1053
|
# {
|
949
1054
|
# "position": 7,
|
1055
|
+
# "force_prefix": false,
|
950
1056
|
# "value": "777"
|
951
1057
|
# }
|
952
1058
|
# ]
|
953
1059
|
# ]
|
954
1060
|
</pre></div>
|
955
1061
|
</div>
|
956
|
-
<p>Here is a result by <
|
1062
|
+
<p>Here is a result by <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code>:</p>
|
957
1063
|
<p>Execution example:</p>
|
958
1064
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit "Hello 日 本 語 ! ! ! 777" NormalizerAuto
|
959
1065
|
# [
|
@@ -965,58 +1071,72 @@ alphabets and digits.</p>
|
|
965
1071
|
# [
|
966
1072
|
# {
|
967
1073
|
# "position": 0,
|
1074
|
+
# "force_prefix": false,
|
968
1075
|
# "value": "he"
|
969
1076
|
# },
|
970
1077
|
# {
|
971
1078
|
# "position": 1,
|
1079
|
+
# "force_prefix": false,
|
972
1080
|
# "value": "el"
|
973
1081
|
# },
|
974
1082
|
# {
|
975
1083
|
# "position": 2,
|
1084
|
+
# "force_prefix": false,
|
976
1085
|
# "value": "ll"
|
977
1086
|
# },
|
978
1087
|
# {
|
979
1088
|
# "position": 3,
|
1089
|
+
# "force_prefix": false,
|
980
1090
|
# "value": "lo"
|
981
1091
|
# },
|
982
1092
|
# {
|
983
1093
|
# "position": 4,
|
1094
|
+
# "force_prefix": false,
|
984
1095
|
# "value": "o日"
|
985
1096
|
# },
|
986
1097
|
# {
|
987
1098
|
# "position": 5,
|
1099
|
+
# "force_prefix": false,
|
988
1100
|
# "value": "日本"
|
989
1101
|
# },
|
990
1102
|
# {
|
991
1103
|
# "position": 6,
|
1104
|
+
# "force_prefix": false,
|
992
1105
|
# "value": "本語"
|
993
1106
|
# },
|
994
1107
|
# {
|
995
1108
|
# "position": 7,
|
1109
|
+
# "force_prefix": false,
|
996
1110
|
# "value": "語!"
|
997
1111
|
# },
|
998
1112
|
# {
|
999
1113
|
# "position": 8,
|
1114
|
+
# "force_prefix": false,
|
1000
1115
|
# "value": "!!"
|
1001
1116
|
# },
|
1002
1117
|
# {
|
1003
1118
|
# "position": 9,
|
1119
|
+
# "force_prefix": false,
|
1004
1120
|
# "value": "!!"
|
1005
1121
|
# },
|
1006
1122
|
# {
|
1007
1123
|
# "position": 10,
|
1124
|
+
# "force_prefix": false,
|
1008
1125
|
# "value": "!7"
|
1009
1126
|
# },
|
1010
1127
|
# {
|
1011
1128
|
# "position": 11,
|
1129
|
+
# "force_prefix": false,
|
1012
1130
|
# "value": "77"
|
1013
1131
|
# },
|
1014
1132
|
# {
|
1015
1133
|
# "position": 12,
|
1134
|
+
# "force_prefix": false,
|
1016
1135
|
# "value": "77"
|
1017
1136
|
# },
|
1018
1137
|
# {
|
1019
1138
|
# "position": 13,
|
1139
|
+
# "force_prefix": false,
|
1020
1140
|
# "value": "7"
|
1021
1141
|
# }
|
1022
1142
|
# ]
|
@@ -1025,10 +1145,10 @@ alphabets and digits.</p>
|
|
1025
1145
|
</div>
|
1026
1146
|
</div>
|
1027
1147
|
<div class="section" id="tokenunigram">
|
1028
|
-
<span id="token-unigram"></span><h3>7.8.3.9. <
|
1029
|
-
<p><
|
1030
|
-
between them is token unit. <a class="reference internal" href="#token-bigram"><
|
1031
|
-
token. <
|
1148
|
+
<span id="token-unigram"></span><h3>7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code><a class="headerlink" href="#tokenunigram" title="Permalink to this headline">¶</a></h3>
|
1149
|
+
<p><code class="docutils literal"><span class="pre">TokenUnigram</span></code> is similar to <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The differences
|
1150
|
+
between them is token unit. <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> uses 2 characters per
|
1151
|
+
token. <code class="docutils literal"><span class="pre">TokenUnigram</span></code> uses 1 character per token.</p>
|
1032
1152
|
<p>Execution example:</p>
|
1033
1153
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenUnigram "100cents!!!" NormalizerAuto
|
1034
1154
|
# [
|
@@ -1040,14 +1160,17 @@ token. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> u
|
|
1040
1160
|
# [
|
1041
1161
|
# {
|
1042
1162
|
# "position": 0,
|
1163
|
+
# "force_prefix": false,
|
1043
1164
|
# "value": "100"
|
1044
1165
|
# },
|
1045
1166
|
# {
|
1046
1167
|
# "position": 1,
|
1168
|
+
# "force_prefix": false,
|
1047
1169
|
# "value": "cents"
|
1048
1170
|
# },
|
1049
1171
|
# {
|
1050
1172
|
# "position": 2,
|
1173
|
+
# "force_prefix": false,
|
1051
1174
|
# "value": "!!!"
|
1052
1175
|
# }
|
1053
1176
|
# ]
|
@@ -1056,10 +1179,10 @@ token. <tt class="docutils literal"><span class="pre">TokenUnigram</span></tt> u
|
|
1056
1179
|
</div>
|
1057
1180
|
</div>
|
1058
1181
|
<div class="section" id="tokentrigram">
|
1059
|
-
<span id="token-trigram"></span><h3>7.8.3.10. <
|
1060
|
-
<p><
|
1061
|
-
between them is token unit. <a class="reference internal" href="#token-bigram"><
|
1062
|
-
token. <
|
1182
|
+
<span id="token-trigram"></span><h3>7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code><a class="headerlink" href="#tokentrigram" title="Permalink to this headline">¶</a></h3>
|
1183
|
+
<p><code class="docutils literal"><span class="pre">TokenTrigram</span></code> is similar to <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a>. The differences
|
1184
|
+
between them is token unit. <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> uses 2 characters per
|
1185
|
+
token. <code class="docutils literal"><span class="pre">TokenTrigram</span></code> uses 3 characters per token.</p>
|
1063
1186
|
<p>Execution example:</p>
|
1064
1187
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenTrigram "10000cents!!!!!" NormalizerAuto
|
1065
1188
|
# [
|
@@ -1071,14 +1194,17 @@ token. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> u
|
|
1071
1194
|
# [
|
1072
1195
|
# {
|
1073
1196
|
# "position": 0,
|
1197
|
+
# "force_prefix": false,
|
1074
1198
|
# "value": "10000"
|
1075
1199
|
# },
|
1076
1200
|
# {
|
1077
1201
|
# "position": 1,
|
1202
|
+
# "force_prefix": false,
|
1078
1203
|
# "value": "cents"
|
1079
1204
|
# },
|
1080
1205
|
# {
|
1081
1206
|
# "position": 2,
|
1207
|
+
# "force_prefix": false,
|
1082
1208
|
# "value": "!!!!!"
|
1083
1209
|
# }
|
1084
1210
|
# ]
|
@@ -1087,14 +1213,14 @@ token. <tt class="docutils literal"><span class="pre">TokenTrigram</span></tt> u
|
|
1087
1213
|
</div>
|
1088
1214
|
</div>
|
1089
1215
|
<div class="section" id="tokendelimit">
|
1090
|
-
<span id="token-delimit"></span><h3>7.8.3.11. <
|
1091
|
-
<p><
|
1092
|
-
characters (<
|
1093
|
-
<
|
1094
|
-
<p><
|
1095
|
-
and <
|
1096
|
-
<span class="pre">full-text-search</span> <span class="pre">http</span></
|
1097
|
-
<p>Here is an example of <
|
1216
|
+
<span id="token-delimit"></span><h3>7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code><a class="headerlink" href="#tokendelimit" title="Permalink to this headline">¶</a></h3>
|
1217
|
+
<p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> extracts token by splitting one or more space
|
1218
|
+
characters (<code class="docutils literal"><span class="pre">U+0020</span></code>). For example, <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> is tokenized to
|
1219
|
+
<code class="docutils literal"><span class="pre">Hello</span></code> and <code class="docutils literal"><span class="pre">World</span></code>.</p>
|
1220
|
+
<p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> is suitable for tag text. You can extract <code class="docutils literal"><span class="pre">groonga</span></code>
|
1221
|
+
and <code class="docutils literal"><span class="pre">full-text-search</span></code> and <code class="docutils literal"><span class="pre">http</span></code> as tags from <code class="docutils literal"><span class="pre">groonga</span>
|
1222
|
+
<span class="pre">full-text-search</span> <span class="pre">http</span></code>.</p>
|
1223
|
+
<p>Here is an example of <code class="docutils literal"><span class="pre">TokenDelimit</span></code>:</p>
|
1098
1224
|
<p>Execution example:</p>
|
1099
1225
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimit "Groonga full-text-search HTTP" NormalizerAuto
|
1100
1226
|
# [
|
@@ -1106,14 +1232,17 @@ and <tt class="docutils literal"><span class="pre">full-text-search</span></tt>
|
|
1106
1232
|
# [
|
1107
1233
|
# {
|
1108
1234
|
# "position": 0,
|
1235
|
+
# "force_prefix": false,
|
1109
1236
|
# "value": "groonga"
|
1110
1237
|
# },
|
1111
1238
|
# {
|
1112
1239
|
# "position": 1,
|
1240
|
+
# "force_prefix": false,
|
1113
1241
|
# "value": "full-text-search"
|
1114
1242
|
# },
|
1115
1243
|
# {
|
1116
1244
|
# "position": 2,
|
1245
|
+
# "force_prefix": false,
|
1117
1246
|
# "value": "http"
|
1118
1247
|
# }
|
1119
1248
|
# ]
|
@@ -1122,13 +1251,13 @@ and <tt class="docutils literal"><span class="pre">full-text-search</span></tt>
|
|
1122
1251
|
</div>
|
1123
1252
|
</div>
|
1124
1253
|
<div class="section" id="tokendelimitnull">
|
1125
|
-
<span id="token-delimit-null"></span><h3>7.8.3.12. <
|
1126
|
-
<p><
|
1127
|
-
difference between them is separator character. <a class="reference internal" href="#token-delimit"><
|
1128
|
-
uses space character (<
|
1129
|
-
character (<
|
1130
|
-
<p><
|
1131
|
-
<p>Here is an example of <
|
1254
|
+
<span id="token-delimit-null"></span><h3>7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code><a class="headerlink" href="#tokendelimitnull" title="Permalink to this headline">¶</a></h3>
|
1255
|
+
<p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> is similar to <a class="reference internal" href="#token-delimit"><span>TokenDelimit</span></a>. The
|
1256
|
+
difference between them is separator character. <a class="reference internal" href="#token-delimit"><span>TokenDelimit</span></a>
|
1257
|
+
uses space character (<code class="docutils literal"><span class="pre">U+0020</span></code>) but <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> uses NUL
|
1258
|
+
character (<code class="docutils literal"><span class="pre">U+0000</span></code>).</p>
|
1259
|
+
<p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> is also suitable for tag text.</p>
|
1260
|
+
<p>Here is an example of <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code>:</p>
|
1132
1261
|
<p>Execution example:</p>
|
1133
1262
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenDelimitNull "Groonga\u0000full-text-search\u0000HTTP" NormalizerAuto
|
1134
1263
|
# [
|
@@ -1140,6 +1269,7 @@ character (<tt class="docutils literal"><span class="pre">U+0000</span></tt>).</
|
|
1140
1269
|
# [
|
1141
1270
|
# {
|
1142
1271
|
# "position": 0,
|
1272
|
+
# "force_prefix": false,
|
1143
1273
|
# "value": "groongau0000full-text-searchu0000http"
|
1144
1274
|
# }
|
1145
1275
|
# ]
|
@@ -1148,23 +1278,23 @@ character (<tt class="docutils literal"><span class="pre">U+0000</span></tt>).</
|
|
1148
1278
|
</div>
|
1149
1279
|
</div>
|
1150
1280
|
<div class="section" id="tokenmecab">
|
1151
|
-
<span id="token-mecab"></span><h3>7.8.3.13. <
|
1152
|
-
<p><
|
1281
|
+
<span id="token-mecab"></span><h3>7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code><a class="headerlink" href="#tokenmecab" title="Permalink to this headline">¶</a></h3>
|
1282
|
+
<p><code class="docutils literal"><span class="pre">TokenMecab</span></code> is a tokenizer based on <a class="reference external" href="http://mecab.sourceforge.net/">MeCab</a> part-of-speech and
|
1153
1283
|
morphological analyzer.</p>
|
1154
1284
|
<p>MeCab doesn't depend on Japanese. You can use MeCab for other
|
1155
1285
|
languages by creating dictionary for the languages. You can use <a class="reference external" href="http://osdn.jp/projects/naist-jdic/">NAIST
|
1156
1286
|
Japanese Dictionary</a>
|
1157
1287
|
for Japanese.</p>
|
1158
|
-
<p><
|
1159
|
-
<
|
1160
|
-
<a class="reference internal" href="#token-bigram"><
|
1161
|
-
<
|
1288
|
+
<p><code class="docutils literal"><span class="pre">TokenMecab</span></code> is good for precision rather than recall. You can find
|
1289
|
+
<code class="docutils literal"><span class="pre">東京都</span></code> and <code class="docutils literal"><span class="pre">京都</span></code> texts by <code class="docutils literal"><span class="pre">京都</span></code> query with
|
1290
|
+
<a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> but <code class="docutils literal"><span class="pre">東京都</span></code> isn't expected. You can find only
|
1291
|
+
<code class="docutils literal"><span class="pre">京都</span></code> text by <code class="docutils literal"><span class="pre">京都</span></code> query with <code class="docutils literal"><span class="pre">TokenMecab</span></code>.</p>
|
1162
1292
|
<p>If you want to support neologisms, you need to keep updating your
|
1163
|
-
MeCab dictionary. It needs maintain cost. (<a class="reference internal" href="#token-bigram"><
|
1164
|
-
require dictionary maintenance because <a class="reference internal" href="#token-bigram"><
|
1293
|
+
MeCab dictionary. It needs maintain cost. (<a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> doesn't
|
1294
|
+
require dictionary maintenance because <a class="reference internal" href="#token-bigram"><span>TokenBigram</span></a> doesn't use
|
1165
1295
|
dictionary.) <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> may help you.</p>
|
1166
|
-
<p>Here is an example of <
|
1167
|
-
and <
|
1296
|
+
<p>Here is an example of <code class="docutils literal"><span class="pre">TokenMeCab</span></code>. <code class="docutils literal"><span class="pre">東京都</span></code> is tokenized to <code class="docutils literal"><span class="pre">東京</span></code>
|
1297
|
+
and <code class="docutils literal"><span class="pre">都</span></code>. They don't include <code class="docutils literal"><span class="pre">京都</span></code>:</p>
|
1168
1298
|
<p>Execution example:</p>
|
1169
1299
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenMecab "東京都"
|
1170
1300
|
# [
|
@@ -1176,10 +1306,12 @@ and <tt class="docutils literal"><span class="pre">都</span></tt>. They don't i
|
|
1176
1306
|
# [
|
1177
1307
|
# {
|
1178
1308
|
# "position": 0,
|
1309
|
+
# "force_prefix": false,
|
1179
1310
|
# "value": "東京"
|
1180
1311
|
# },
|
1181
1312
|
# {
|
1182
1313
|
# "position": 1,
|
1314
|
+
# "force_prefix": false,
|
1183
1315
|
# "value": "都"
|
1184
1316
|
# }
|
1185
1317
|
# ]
|
@@ -1188,7 +1320,7 @@ and <tt class="docutils literal"><span class="pre">都</span></tt>. They don't i
|
|
1188
1320
|
</div>
|
1189
1321
|
</div>
|
1190
1322
|
<div class="section" id="tokenregexp">
|
1191
|
-
<span id="token-regexp"></span><h3>7.8.3.14. <
|
1323
|
+
<span id="token-regexp"></span><h3>7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code><a class="headerlink" href="#tokenregexp" title="Permalink to this headline">¶</a></h3>
|
1192
1324
|
<div class="versionadded">
|
1193
1325
|
<p><span class="versionmodified">New in version 5.0.1.</span></p>
|
1194
1326
|
</div>
|
@@ -1201,21 +1333,21 @@ and <tt class="docutils literal"><span class="pre">都</span></tt>. They don't i
|
|
1201
1333
|
<p class="last">This tokenizer can be used only with UTF-8. You can't use this
|
1202
1334
|
tokenizer with EUC-JP, Shift_JIS and so on.</p>
|
1203
1335
|
</div>
|
1204
|
-
<p><
|
1336
|
+
<p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> is a tokenizer for supporting regular expression
|
1205
1337
|
search by index.</p>
|
1206
1338
|
<p>In general, regular expression search is evaluated as sequential
|
1207
1339
|
search. But the following cases can be evaluated as index search:</p>
|
1208
1340
|
<blockquote>
|
1209
1341
|
<div><ul class="simple">
|
1210
|
-
<li>Literal only case such as <
|
1211
|
-
<li>The beginning of text and literal case such as <
|
1212
|
-
<li>The end of text and literal case such as <
|
1342
|
+
<li>Literal only case such as <code class="docutils literal"><span class="pre">hello</span></code></li>
|
1343
|
+
<li>The beginning of text and literal case such as <code class="docutils literal"><span class="pre">\A/home/alice</span></code></li>
|
1344
|
+
<li>The end of text and literal case such as <code class="docutils literal"><span class="pre">\.txt\z</span></code></li>
|
1213
1345
|
</ul>
|
1214
1346
|
</div></blockquote>
|
1215
1347
|
<p>In most cases, index search is faster than sequential search.</p>
|
1216
|
-
<p><
|
1217
|
-
adds the beginning of text mark (<
|
1218
|
-
and the end of text mark (<
|
1348
|
+
<p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> is based on bigram tokenize method. <code class="docutils literal"><span class="pre">TokenRegexp</span></code>
|
1349
|
+
adds the beginning of text mark (<code class="docutils literal"><span class="pre">U+FFEF</span></code>) at the begging of text
|
1350
|
+
and the end of text mark (<code class="docutils literal"><span class="pre">U+FFF0</span></code>) to the end of text when you
|
1219
1351
|
index text:</p>
|
1220
1352
|
<p>Execution example:</p>
|
1221
1353
|
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp "/home/alice/test.txt" NormalizerAuto --mode ADD
|
@@ -1228,194 +1360,112 @@ index text:</p>
|
|
1228
1360
|
# [
|
1229
1361
|
# {
|
1230
1362
|
# "position": 0,
|
1363
|
+
# "force_prefix": false,
|
1231
1364
|
# "value": ""
|
1232
1365
|
# },
|
1233
1366
|
# {
|
1234
1367
|
# "position": 1,
|
1368
|
+
# "force_prefix": false,
|
1235
1369
|
# "value": "/h"
|
1236
1370
|
# },
|
1237
1371
|
# {
|
1238
1372
|
# "position": 2,
|
1373
|
+
# "force_prefix": false,
|
1239
1374
|
# "value": "ho"
|
1240
1375
|
# },
|
1241
1376
|
# {
|
1242
1377
|
# "position": 3,
|
1378
|
+
# "force_prefix": false,
|
1243
1379
|
# "value": "om"
|
1244
1380
|
# },
|
1245
1381
|
# {
|
1246
1382
|
# "position": 4,
|
1383
|
+
# "force_prefix": false,
|
1247
1384
|
# "value": "me"
|
1248
1385
|
# },
|
1249
1386
|
# {
|
1250
1387
|
# "position": 5,
|
1388
|
+
# "force_prefix": false,
|
1251
1389
|
# "value": "e/"
|
1252
1390
|
# },
|
1253
1391
|
# {
|
1254
1392
|
# "position": 6,
|
1393
|
+
# "force_prefix": false,
|
1255
1394
|
# "value": "/a"
|
1256
1395
|
# },
|
1257
1396
|
# {
|
1258
1397
|
# "position": 7,
|
1398
|
+
# "force_prefix": false,
|
1259
1399
|
# "value": "al"
|
1260
1400
|
# },
|
1261
1401
|
# {
|
1262
1402
|
# "position": 8,
|
1403
|
+
# "force_prefix": false,
|
1263
1404
|
# "value": "li"
|
1264
1405
|
# },
|
1265
1406
|
# {
|
1266
1407
|
# "position": 9,
|
1408
|
+
# "force_prefix": false,
|
1267
1409
|
# "value": "ic"
|
1268
1410
|
# },
|
1269
1411
|
# {
|
1270
1412
|
# "position": 10,
|
1413
|
+
# "force_prefix": false,
|
1271
1414
|
# "value": "ce"
|
1272
1415
|
# },
|
1273
1416
|
# {
|
1274
1417
|
# "position": 11,
|
1418
|
+
# "force_prefix": false,
|
1275
1419
|
# "value": "e/"
|
1276
1420
|
# },
|
1277
1421
|
# {
|
1278
1422
|
# "position": 12,
|
1423
|
+
# "force_prefix": false,
|
1279
1424
|
# "value": "/t"
|
1280
1425
|
# },
|
1281
1426
|
# {
|
1282
1427
|
# "position": 13,
|
1428
|
+
# "force_prefix": false,
|
1283
1429
|
# "value": "te"
|
1284
1430
|
# },
|
1285
1431
|
# {
|
1286
1432
|
# "position": 14,
|
1433
|
+
# "force_prefix": false,
|
1287
1434
|
# "value": "es"
|
1288
1435
|
# },
|
1289
1436
|
# {
|
1290
1437
|
# "position": 15,
|
1438
|
+
# "force_prefix": false,
|
1291
1439
|
# "value": "st"
|
1292
1440
|
# },
|
1293
1441
|
# {
|
1294
1442
|
# "position": 16,
|
1443
|
+
# "force_prefix": false,
|
1295
1444
|
# "value": "t."
|
1296
1445
|
# },
|
1297
1446
|
# {
|
1298
1447
|
# "position": 17,
|
1448
|
+
# "force_prefix": false,
|
1299
1449
|
# "value": ".t"
|
1300
1450
|
# },
|
1301
1451
|
# {
|
1302
1452
|
# "position": 18,
|
1453
|
+
# "force_prefix": false,
|
1303
1454
|
# "value": "tx"
|
1304
1455
|
# },
|
1305
1456
|
# {
|
1306
1457
|
# "position": 19,
|
1458
|
+
# "force_prefix": false,
|
1307
1459
|
# "value": "xt"
|
1308
1460
|
# },
|
1309
1461
|
# {
|
1310
1462
|
# "position": 20,
|
1463
|
+
# "force_prefix": false,
|
1311
1464
|
# "value": "t"
|
1312
1465
|
# },
|
1313
1466
|
# {
|
1314
1467
|
# "position": 21,
|
1315
|
-
# "
|
1316
|
-
# }
|
1317
|
-
# ]
|
1318
|
-
# ]
|
1319
|
-
</pre></div>
|
1320
|
-
</div>
|
1321
|
-
<p>The beginning of text mark is used for the beginning of text search by
|
1322
|
-
<tt class="docutils literal"><span class="pre">\A</span></tt>. If you use <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> for tokenizing query,
|
1323
|
-
<tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> adds the beginning of text mark (<tt class="docutils literal"><span class="pre">U+FFEF</span></tt>) as the
|
1324
|
-
first token. The beginning of text mark must be appeared at the first,
|
1325
|
-
you can get results of the beginning of text search.</p>
|
1326
|
-
<p>Execution example:</p>
|
1327
|
-
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp "\\A/home/alice/" NormalizerAuto --mode GET
|
1328
|
-
# [
|
1329
|
-
# [
|
1330
|
-
# 0,
|
1331
|
-
# 1337566253.89858,
|
1332
|
-
# 0.000355720520019531
|
1333
|
-
# ],
|
1334
|
-
# [
|
1335
|
-
# {
|
1336
|
-
# "position": 0,
|
1337
|
-
# "value": ""
|
1338
|
-
# },
|
1339
|
-
# {
|
1340
|
-
# "position": 1,
|
1341
|
-
# "value": "/h"
|
1342
|
-
# },
|
1343
|
-
# {
|
1344
|
-
# "position": 2,
|
1345
|
-
# "value": "ho"
|
1346
|
-
# },
|
1347
|
-
# {
|
1348
|
-
# "position": 3,
|
1349
|
-
# "value": "om"
|
1350
|
-
# },
|
1351
|
-
# {
|
1352
|
-
# "position": 4,
|
1353
|
-
# "value": "me"
|
1354
|
-
# },
|
1355
|
-
# {
|
1356
|
-
# "position": 5,
|
1357
|
-
# "value": "e/"
|
1358
|
-
# },
|
1359
|
-
# {
|
1360
|
-
# "position": 6,
|
1361
|
-
# "value": "/a"
|
1362
|
-
# },
|
1363
|
-
# {
|
1364
|
-
# "position": 7,
|
1365
|
-
# "value": "al"
|
1366
|
-
# },
|
1367
|
-
# {
|
1368
|
-
# "position": 8,
|
1369
|
-
# "value": "li"
|
1370
|
-
# },
|
1371
|
-
# {
|
1372
|
-
# "position": 9,
|
1373
|
-
# "value": "ic"
|
1374
|
-
# },
|
1375
|
-
# {
|
1376
|
-
# "position": 10,
|
1377
|
-
# "value": "ce"
|
1378
|
-
# },
|
1379
|
-
# {
|
1380
|
-
# "position": 11,
|
1381
|
-
# "value": "e/"
|
1382
|
-
# }
|
1383
|
-
# ]
|
1384
|
-
# ]
|
1385
|
-
</pre></div>
|
1386
|
-
</div>
|
1387
|
-
<p>The end of text mark is used for the end of text search by <tt class="docutils literal"><span class="pre">\z</span></tt>.
|
1388
|
-
If you use <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> for tokenizing query, <tt class="docutils literal"><span class="pre">TokenRegexp</span></tt> adds
|
1389
|
-
the end of text mark (<tt class="docutils literal"><span class="pre">U+FFF0</span></tt>) as the last token. The end of text
|
1390
|
-
mark must be appeared at the end, you can get results of the end of
|
1391
|
-
text search.</p>
|
1392
|
-
<p>Execution example:</p>
|
1393
|
-
<div class="highlight-none"><div class="highlight"><pre>tokenize TokenRegexp "\\.txt\\z" NormalizerAuto --mode GET
|
1394
|
-
# [
|
1395
|
-
# [
|
1396
|
-
# 0,
|
1397
|
-
# 1337566253.89858,
|
1398
|
-
# 0.000355720520019531
|
1399
|
-
# ],
|
1400
|
-
# [
|
1401
|
-
# {
|
1402
|
-
# "position": 0,
|
1403
|
-
# "value": "\\."
|
1404
|
-
# },
|
1405
|
-
# {
|
1406
|
-
# "position": 1,
|
1407
|
-
# "value": ".t"
|
1408
|
-
# },
|
1409
|
-
# {
|
1410
|
-
# "position": 2,
|
1411
|
-
# "value": "tx"
|
1412
|
-
# },
|
1413
|
-
# {
|
1414
|
-
# "position": 3,
|
1415
|
-
# "value": "xt"
|
1416
|
-
# },
|
1417
|
-
# {
|
1418
|
-
# "position": 5,
|
1468
|
+
# "force_prefix": false,
|
1419
1469
|
# "value": ""
|
1420
1470
|
# }
|
1421
1471
|
# ]
|
@@ -1430,7 +1480,7 @@ text search.</p>
|
|
1430
1480
|
</div>
|
1431
1481
|
</div>
|
1432
1482
|
</div>
|
1433
|
-
<div class="sphinxsidebar">
|
1483
|
+
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
1434
1484
|
<div class="sphinxsidebarwrapper">
|
1435
1485
|
<h3><a href="../index.html">Table Of Contents</a></h3>
|
1436
1486
|
<ul>
|
@@ -1438,20 +1488,20 @@ text search.</p>
|
|
1438
1488
|
<li><a class="reference internal" href="#summary">7.8.1. Summary</a></li>
|
1439
1489
|
<li><a class="reference internal" href="#what-is-tokenize">7.8.2. What is "tokenize"?</a></li>
|
1440
1490
|
<li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. Built-in tokenizsers</a><ul>
|
1441
|
-
<li><a class="reference internal" href="#tokenbigram">7.8.3.1. <
|
1442
|
-
<li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <
|
1443
|
-
<li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <
|
1444
|
-
<li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <
|
1445
|
-
<li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <
|
1446
|
-
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <
|
1447
|
-
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <
|
1448
|
-
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <
|
1449
|
-
<li><a class="reference internal" href="#tokenunigram">7.8.3.9. <
|
1450
|
-
<li><a class="reference internal" href="#tokentrigram">7.8.3.10. <
|
1451
|
-
<li><a class="reference internal" href="#tokendelimit">7.8.3.11. <
|
1452
|
-
<li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <
|
1453
|
-
<li><a class="reference internal" href="#tokenmecab">7.8.3.13. <
|
1454
|
-
<li><a class="reference internal" href="#tokenregexp">7.8.3.14. <
|
1491
|
+
<li><a class="reference internal" href="#tokenbigram">7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code></a></li>
|
1492
|
+
<li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
|
1493
|
+
<li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
|
1494
|
+
<li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
|
1495
|
+
<li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
|
1496
|
+
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
|
1497
|
+
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
|
1498
|
+
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
|
1499
|
+
<li><a class="reference internal" href="#tokenunigram">7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code></a></li>
|
1500
|
+
<li><a class="reference internal" href="#tokentrigram">7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code></a></li>
|
1501
|
+
<li><a class="reference internal" href="#tokendelimit">7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code></a></li>
|
1502
|
+
<li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></a></li>
|
1503
|
+
<li><a class="reference internal" href="#tokenmecab">7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code></a></li>
|
1504
|
+
<li><a class="reference internal" href="#tokenregexp">7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code></a></li>
|
1455
1505
|
</ul>
|
1456
1506
|
</li>
|
1457
1507
|
</ul>
|
@@ -1464,12 +1514,14 @@ text search.</p>
|
|
1464
1514
|
<h4>Next topic</h4>
|
1465
1515
|
<p class="topless"><a href="token_filters.html"
|
1466
1516
|
title="next chapter">7.9. Token filters</a></p>
|
1467
|
-
<
|
1468
|
-
|
1469
|
-
<
|
1470
|
-
|
1471
|
-
|
1472
|
-
|
1517
|
+
<div role="note" aria-label="source link">
|
1518
|
+
<h3>This Page</h3>
|
1519
|
+
<ul class="this-page-menu">
|
1520
|
+
<li><a href="../_sources/reference/tokenizers.txt"
|
1521
|
+
rel="nofollow">Show Source</a></li>
|
1522
|
+
</ul>
|
1523
|
+
</div>
|
1524
|
+
<div id="searchbox" style="display: none" role="search">
|
1473
1525
|
<h3>Quick search</h3>
|
1474
1526
|
<form class="search" action="../search.html" method="get">
|
1475
1527
|
<input type="text" name="q" />
|
@@ -1486,7 +1538,7 @@ text search.</p>
|
|
1486
1538
|
</div>
|
1487
1539
|
<div class="clearer"></div>
|
1488
1540
|
</div>
|
1489
|
-
<div class="related">
|
1541
|
+
<div class="related" role="navigation" aria-label="related navigation">
|
1490
1542
|
<h3>Navigation</h3>
|
1491
1543
|
<ul>
|
1492
1544
|
<li class="right" style="margin-right: 10px">
|
@@ -1498,11 +1550,11 @@ text search.</p>
|
|
1498
1550
|
<li class="right" >
|
1499
1551
|
<a href="normalizers.html" title="7.7. Normalizers"
|
1500
1552
|
>previous</a> |</li>
|
1501
|
-
<li><a href="../index.html">Groonga v5.0.
|
1502
|
-
<li><a href="../reference.html" >7. Reference manual</a> »</li>
|
1553
|
+
<li class="nav-item nav-item-0"><a href="../index.html">Groonga v5.0.6-226-gd7da7e7 documentation</a> »</li>
|
1554
|
+
<li class="nav-item nav-item-1"><a href="../reference.html" >7. Reference manual</a> »</li>
|
1503
1555
|
</ul>
|
1504
1556
|
</div>
|
1505
|
-
<div class="footer">
|
1557
|
+
<div class="footer" role="contentinfo">
|
1506
1558
|
© Copyright 2009-2015, Brazil, Inc.
|
1507
1559
|
</div>
|
1508
1560
|
</body>
|