rroonga 7.1.1-x64-mingw32 → 9.0.2-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
 - data/Rakefile +3 -3
 - data/doc/text/news.md +22 -0
 - data/ext/groonga/extconf.rb +29 -26
 - data/ext/groonga/rb-grn.h +3 -3
 - data/lib/2.2/groonga.so +0 -0
 - data/lib/2.3/groonga.so +0 -0
 - data/lib/2.4/groonga.so +0 -0
 - data/lib/2.5/groonga.so +0 -0
 - data/lib/groonga/expression-builder.rb +1 -1
 - data/lib/groonga/schema.rb +13 -0
 - data/rroonga-build.rb +4 -11
 - data/test/test-expression-builder.rb +8 -0
 - data/vendor/local/bin/cv2pdb.exe +0 -0
 - data/vendor/local/bin/generate-pdb.bat +38 -36
 - data/vendor/local/bin/grndb.exe +0 -0
 - data/vendor/local/bin/groonga-benchmark.exe +0 -0
 - data/vendor/local/bin/groonga-suggest-create-dataset.exe +0 -0
 - data/vendor/local/bin/groonga.exe +0 -0
 - data/vendor/local/bin/libgroonga-0.dll +0 -0
 - data/vendor/local/bin/libmecab-2.dll +0 -0
 - data/vendor/local/bin/libmsgpackc.dll +0 -0
 - data/vendor/local/bin/libonigmo-6.dll +0 -0
 - data/vendor/local/bin/libpcre-1.dll +0 -0
 - data/vendor/local/bin/libpcrecpp-0.dll +0 -0
 - data/vendor/local/bin/libpcreposix-0.dll +0 -0
 - data/vendor/local/bin/lz4.exe +0 -0
 - data/vendor/local/bin/lz4c.exe +0 -0
 - data/vendor/local/bin/{lz4cat → lz4cat.exe} +0 -0
 - data/vendor/local/bin/mecab.exe +0 -0
 - data/vendor/local/bin/pcre-config +1 -1
 - data/vendor/local/bin/pcregrep.exe +0 -0
 - data/vendor/local/bin/pcretest.exe +0 -0
 - data/vendor/local/bin/unlz4.exe +0 -0
 - data/vendor/local/bin/zlib1.dll +0 -0
 - data/vendor/local/include/groonga/groonga.h +16 -1
 - data/vendor/local/include/groonga/groonga/accessor.h +5 -1
 - data/vendor/local/include/groonga/groonga/column.h +4 -0
 - data/vendor/local/include/groonga/groonga/db.h +3 -1
 - data/vendor/local/include/groonga/groonga/expr.h +5 -0
 - data/vendor/local/include/groonga/groonga/groonga.h +124 -171
 - data/vendor/local/include/groonga/groonga/highlighter.h +57 -0
 - data/vendor/local/include/groonga/groonga/ii.h +2 -0
 - data/vendor/local/include/groonga/groonga/index_column.h +31 -0
 - data/vendor/local/include/groonga/groonga/memory.h +29 -0
 - data/vendor/local/include/groonga/groonga/msgpack.h +50 -0
 - data/vendor/local/include/groonga/groonga/obj.h +22 -1
 - data/vendor/local/include/groonga/groonga/option.h +61 -0
 - data/vendor/local/include/groonga/groonga/output.h +57 -2
 - data/vendor/local/include/groonga/groonga/output_columns.h +38 -0
 - data/vendor/local/include/groonga/groonga/plugin.h +5 -0
 - data/vendor/local/include/groonga/groonga/raw_string.h +60 -0
 - data/vendor/local/include/groonga/groonga/string.h +113 -0
 - data/vendor/local/include/groonga/groonga/table.h +89 -1
 - data/vendor/local/include/groonga/groonga/thread.h +15 -0
 - data/vendor/local/include/groonga/groonga/time.h +1 -0
 - data/vendor/local/include/groonga/groonga/token.h +60 -10
 - data/vendor/local/include/groonga/groonga/token_cursor.h +59 -0
 - data/vendor/local/include/groonga/groonga/token_filter.h +24 -0
 - data/vendor/local/include/groonga/groonga/token_metadata.h +49 -0
 - data/vendor/local/include/groonga/groonga/tokenizer.h +99 -25
 - data/vendor/local/include/groonga/groonga/tokenizer_query_deprecated.h +50 -0
 - data/vendor/local/include/groonga/groonga/vector.h +80 -0
 - data/vendor/local/include/groonga/groonga/version.h +32 -0
 - data/vendor/local/include/groonga/groonga/window_function.h +18 -8
 - data/vendor/local/include/groonga/groonga/window_function_executor.h +68 -0
 - data/vendor/local/include/lz4.h +504 -212
 - data/vendor/local/include/lz4frame.h +433 -153
 - data/vendor/local/include/lz4frame_static.h +47 -0
 - data/vendor/local/include/lz4hc.h +281 -108
 - data/vendor/local/include/msgpack.hpp +4 -0
 - data/vendor/local/include/msgpack/adaptor/adaptor_base.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/adaptor_base_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/array_ref_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/boost/msgpack_variant_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/boost/string_view.hpp +15 -0
 - data/vendor/local/include/msgpack/adaptor/check_container_size_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/cpp17/optional.hpp +16 -0
 - data/vendor/local/include/msgpack/adaptor/cpp17/string_view.hpp +16 -0
 - data/vendor/local/include/msgpack/adaptor/define_decl.hpp +2 -0
 - data/vendor/local/include/msgpack/adaptor/ext_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/fixint_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/int_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/map_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/msgpack_tuple_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/nil_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/raw_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/size_equal_only_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/adaptor/tr1/unordered_map.hpp +2 -2
 - data/vendor/local/include/msgpack/adaptor/tr1/unordered_set.hpp +2 -2
 - data/vendor/local/include/msgpack/adaptor/v4raw_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/cpp_config_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/create_object_visitor.hpp +17 -0
 - data/vendor/local/include/msgpack/create_object_visitor_decl.hpp +16 -0
 - data/vendor/local/include/msgpack/fbuffer.h +1 -1
 - data/vendor/local/include/msgpack/fbuffer_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/gcc_atomic.hpp +0 -2
 - data/vendor/local/include/msgpack/iterator_decl.hpp +2 -1
 - data/vendor/local/include/msgpack/meta_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/null_visitor.hpp +17 -0
 - data/vendor/local/include/msgpack/null_visitor_decl.hpp +16 -0
 - data/vendor/local/include/msgpack/object.h +5 -0
 - data/vendor/local/include/msgpack/object_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/object_fwd.hpp +1 -0
 - data/vendor/local/include/msgpack/object_fwd_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/pack.h +1 -0
 - data/vendor/local/include/msgpack/pack_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/parse.hpp +18 -0
 - data/vendor/local/include/msgpack/parse_decl.hpp +16 -0
 - data/vendor/local/include/msgpack/parse_return.hpp +17 -0
 - data/vendor/local/include/msgpack/sbuffer_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/sysdep.h +34 -26
 - data/vendor/local/include/msgpack/type.hpp +9 -0
 - data/vendor/local/include/msgpack/unpack.h +12 -1
 - data/vendor/local/include/msgpack/unpack.hpp +1 -0
 - data/vendor/local/include/msgpack/unpack_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/unpack_exception.hpp +15 -0
 - data/vendor/local/include/msgpack/unpack_template.h +22 -30
 - data/vendor/local/include/msgpack/v1/adaptor/array_ref.hpp +6 -6
 - data/vendor/local/include/msgpack/v1/adaptor/boost/fusion.hpp +49 -6
 - data/vendor/local/include/msgpack/v1/adaptor/boost/msgpack_variant.hpp +6 -4
 - data/vendor/local/include/msgpack/v1/adaptor/boost/string_view.hpp +87 -0
 - data/vendor/local/include/msgpack/v1/adaptor/carray.hpp +11 -11
 - data/vendor/local/include/msgpack/v1/adaptor/char_ptr.hpp +1 -1
 - data/vendor/local/include/msgpack/v1/adaptor/cpp11/array.hpp +1 -1
 - data/vendor/local/include/msgpack/v1/adaptor/cpp11/array_char.hpp +8 -1
 - data/vendor/local/include/msgpack/v1/adaptor/cpp11/array_unsigned_char.hpp +8 -1
 - data/vendor/local/include/msgpack/v1/adaptor/cpp11/forward_list.hpp +1 -1
 - data/vendor/local/include/msgpack/v1/adaptor/cpp11/tuple.hpp +2 -2
 - data/vendor/local/include/msgpack/v1/adaptor/cpp11/unordered_map.hpp +4 -4
 - data/vendor/local/include/msgpack/v1/adaptor/cpp11/unordered_set.hpp +2 -2
 - data/vendor/local/include/msgpack/v1/adaptor/cpp17/optional.hpp +90 -0
 - data/vendor/local/include/msgpack/v1/adaptor/cpp17/string_view.hpp +86 -0
 - data/vendor/local/include/msgpack/v1/adaptor/deque.hpp +1 -1
 - data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_define_array.hpp +1088 -32
 - data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_define_map.hpp +32 -16
 - data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_msgpack_tuple.hpp +32 -32
 - data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_convert_helper.hpp +45 -0
 - data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_define_array.hpp +4 -3
 - data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_define_map.hpp +4 -2
 - data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_msgpack_tuple.hpp +2 -2
 - data/vendor/local/include/msgpack/v1/adaptor/ext.hpp +1 -1
 - data/vendor/local/include/msgpack/v1/adaptor/fixint.hpp +40 -24
 - data/vendor/local/include/msgpack/v1/adaptor/float.hpp +4 -4
 - data/vendor/local/include/msgpack/v1/adaptor/int.hpp +55 -33
 - data/vendor/local/include/msgpack/v1/adaptor/list.hpp +1 -1
 - data/vendor/local/include/msgpack/v1/adaptor/map.hpp +10 -10
 - data/vendor/local/include/msgpack/v1/adaptor/pair.hpp +2 -2
 - data/vendor/local/include/msgpack/v1/adaptor/set.hpp +2 -2
 - data/vendor/local/include/msgpack/v1/adaptor/string.hpp +1 -1
 - data/vendor/local/include/msgpack/v1/adaptor/tr1/unordered_map.hpp +2 -2
 - data/vendor/local/include/msgpack/v1/adaptor/tr1/unordered_set.hpp +2 -2
 - data/vendor/local/include/msgpack/v1/adaptor/vector.hpp +5 -5
 - data/vendor/local/include/msgpack/v1/adaptor/vector_bool.hpp +1 -1
 - data/vendor/local/include/msgpack/v1/adaptor/vector_char.hpp +9 -9
 - data/vendor/local/include/msgpack/v1/adaptor/vector_unsigned_char.hpp +9 -9
 - data/vendor/local/include/msgpack/v1/cpp_config.hpp +6 -0
 - data/vendor/local/include/msgpack/v1/cpp_config_decl.hpp +6 -0
 - data/vendor/local/include/msgpack/v1/detail/cpp03_zone.hpp +41 -34
 - data/vendor/local/include/msgpack/v1/detail/cpp03_zone_decl.hpp +8 -0
 - data/vendor/local/include/msgpack/v1/detail/cpp11_zone.hpp +25 -19
 - data/vendor/local/include/msgpack/v1/detail/cpp11_zone_decl.hpp +8 -0
 - data/vendor/local/include/msgpack/v1/meta.hpp +6 -0
 - data/vendor/local/include/msgpack/v1/meta_decl.hpp +5 -0
 - data/vendor/local/include/msgpack/v1/object.hpp +768 -393
 - data/vendor/local/include/msgpack/v1/object_decl.hpp +11 -1
 - data/vendor/local/include/msgpack/v1/object_fwd.hpp +4 -1
 - data/vendor/local/include/msgpack/v1/object_fwd_decl.hpp +3 -1
 - data/vendor/local/include/msgpack/v1/parse_return.hpp +36 -0
 - data/vendor/local/include/msgpack/v1/unpack.hpp +39 -120
 - data/vendor/local/include/msgpack/v1/unpack_decl.hpp +2 -9
 - data/vendor/local/include/msgpack/v1/unpack_exception.hpp +122 -0
 - data/vendor/local/include/msgpack/v1/vrefbuffer.hpp +2 -2
 - data/vendor/local/include/msgpack/v2/create_object_visitor.hpp +250 -0
 - data/vendor/local/include/msgpack/v2/create_object_visitor_decl.hpp +33 -0
 - data/vendor/local/include/msgpack/v2/meta_decl.hpp +4 -0
 - data/vendor/local/include/msgpack/v2/null_visitor.hpp +96 -0
 - data/vendor/local/include/msgpack/v2/null_visitor_decl.hpp +29 -0
 - data/vendor/local/include/msgpack/v2/object_decl.hpp +4 -0
 - data/vendor/local/include/msgpack/v2/object_fwd.hpp +1 -1
 - data/vendor/local/include/msgpack/v2/object_fwd_decl.hpp +2 -0
 - data/vendor/local/include/msgpack/v2/pack_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/v2/parse.hpp +1072 -0
 - data/vendor/local/include/msgpack/v2/parse_decl.hpp +79 -0
 - data/vendor/local/include/msgpack/v2/parse_return.hpp +37 -0
 - data/vendor/local/include/msgpack/v2/unpack.hpp +21 -1298
 - data/vendor/local/include/msgpack/v2/unpack_decl.hpp +9 -45
 - data/vendor/local/include/msgpack/v2/x3_parse.hpp +875 -0
 - data/vendor/local/include/msgpack/v2/x3_parse_decl.hpp +36 -0
 - data/vendor/local/include/msgpack/v2/x3_unpack.hpp +120 -0
 - data/vendor/local/include/msgpack/v2/x3_unpack_decl.hpp +71 -0
 - data/vendor/local/include/msgpack/v3/adaptor/adaptor_base.hpp +58 -0
 - data/vendor/local/include/msgpack/v3/adaptor/adaptor_base_decl.hpp +52 -0
 - data/vendor/local/include/msgpack/v3/adaptor/array_ref_decl.hpp +36 -0
 - data/vendor/local/include/msgpack/v3/adaptor/boost/msgpack_variant_decl.hpp +42 -0
 - data/vendor/local/include/msgpack/v3/adaptor/check_container_size_decl.hpp +39 -0
 - data/vendor/local/include/msgpack/v3/adaptor/define_decl.hpp +23 -0
 - data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_define_array_decl.hpp +31 -0
 - data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_define_map_decl.hpp +31 -0
 - data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_msgpack_tuple_decl.hpp +43 -0
 - data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_define_array_decl.hpp +32 -0
 - data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_define_map_decl.hpp +31 -0
 - data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_msgpack_tuple_decl.hpp +59 -0
 - data/vendor/local/include/msgpack/v3/adaptor/ext_decl.hpp +34 -0
 - data/vendor/local/include/msgpack/v3/adaptor/fixint_decl.hpp +43 -0
 - data/vendor/local/include/msgpack/v3/adaptor/int_decl.hpp +54 -0
 - data/vendor/local/include/msgpack/v3/adaptor/map_decl.hpp +33 -0
 - data/vendor/local/include/msgpack/v3/adaptor/msgpack_tuple_decl.hpp +21 -0
 - data/vendor/local/include/msgpack/v3/adaptor/nil_decl.hpp +42 -0
 - data/vendor/local/include/msgpack/v3/adaptor/raw_decl.hpp +33 -0
 - data/vendor/local/include/msgpack/v3/adaptor/size_equal_only_decl.hpp +35 -0
 - data/vendor/local/include/msgpack/v3/adaptor/v4raw_decl.hpp +34 -0
 - data/vendor/local/include/msgpack/v3/cpp_config_decl.hpp +84 -0
 - data/vendor/local/include/msgpack/v3/create_object_visitor_decl.hpp +33 -0
 - data/vendor/local/include/msgpack/v3/detail/cpp03_zone_decl.hpp +31 -0
 - data/vendor/local/include/msgpack/v3/detail/cpp11_zone_decl.hpp +31 -0
 - data/vendor/local/include/msgpack/v3/fbuffer_decl.hpp +32 -0
 - data/vendor/local/include/msgpack/v3/iterator_decl.hpp +33 -0
 - data/vendor/local/include/msgpack/v3/meta_decl.hpp +50 -0
 - data/vendor/local/include/msgpack/v3/null_visitor_decl.hpp +29 -0
 - data/vendor/local/include/msgpack/v3/object_decl.hpp +53 -0
 - data/vendor/local/include/msgpack/v3/object_fwd.hpp +70 -0
 - data/vendor/local/include/msgpack/v3/object_fwd_decl.hpp +75 -0
 - data/vendor/local/include/msgpack/v3/pack_decl.hpp +55 -0
 - data/vendor/local/include/msgpack/v3/parse.hpp +677 -0
 - data/vendor/local/include/msgpack/v3/parse_decl.hpp +49 -0
 - data/vendor/local/include/msgpack/v3/parse_return.hpp +35 -0
 - data/vendor/local/include/msgpack/v3/sbuffer_decl.hpp +33 -0
 - data/vendor/local/include/msgpack/v3/unpack.hpp +192 -0
 - data/vendor/local/include/msgpack/v3/unpack_decl.hpp +304 -0
 - data/vendor/local/include/msgpack/v3/vrefbuffer_decl.hpp +29 -0
 - data/vendor/local/include/msgpack/v3/x3_parse_decl.hpp +34 -0
 - data/vendor/local/include/msgpack/v3/x3_unpack.hpp +97 -0
 - data/vendor/local/include/msgpack/v3/x3_unpack_decl.hpp +65 -0
 - data/vendor/local/include/msgpack/v3/zbuffer_decl.hpp +29 -0
 - data/vendor/local/include/msgpack/v3/zone_decl.hpp +21 -0
 - data/vendor/local/include/msgpack/version_master.h +2 -2
 - data/vendor/local/include/msgpack/versioning.hpp +5 -3
 - data/vendor/local/include/msgpack/vrefbuffer.h +1 -2
 - data/vendor/local/include/msgpack/vrefbuffer_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/x3_parse.hpp +15 -0
 - data/vendor/local/include/msgpack/x3_parse_decl.hpp +16 -0
 - data/vendor/local/include/msgpack/x3_unpack.hpp +16 -0
 - data/vendor/local/include/msgpack/x3_unpack_decl.hpp +16 -0
 - data/vendor/local/include/msgpack/zbuffer_decl.hpp +1 -0
 - data/vendor/local/include/msgpack/zone_decl.hpp +1 -0
 - data/vendor/local/include/pcre.h +6 -6
 - data/vendor/local/lib/cmake/msgpack/msgpack-config-version.cmake +46 -0
 - data/vendor/local/lib/cmake/msgpack/msgpack-config.cmake +47 -0
 - data/vendor/local/lib/cmake/msgpack/msgpack-targets-noconfig.cmake +29 -0
 - data/vendor/local/lib/cmake/msgpack/msgpack-targets.cmake +101 -0
 - data/vendor/local/lib/groonga/plugins/functions/index_column.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/index_column.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/index_column.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/index_column.la +1 -1
 - data/vendor/local/lib/groonga/plugins/functions/math.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/math.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/math.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/math.la +1 -1
 - data/vendor/local/lib/groonga/plugins/functions/number.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/number.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/number.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/number.la +1 -1
 - data/vendor/local/lib/groonga/plugins/functions/string.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/string.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/string.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/string.la +1 -1
 - data/vendor/local/lib/groonga/plugins/functions/time.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/time.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/time.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/time.la +1 -1
 - data/vendor/local/lib/groonga/plugins/functions/vector.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/vector.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/vector.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/vector.la +1 -1
 - data/vendor/local/lib/groonga/plugins/normalizers/mysql.a +0 -0
 - data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/normalizers/mysql.la +2 -2
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +1 -1
 - data/vendor/local/lib/groonga/plugins/sharding/dynamic_columns.rb +150 -19
 - data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +123 -65
 - data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +528 -113
 - data/vendor/local/lib/groonga/plugins/sharding/logical_select.rb +142 -40
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.la +1 -1
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +1 -1
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +1 -1
 - data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +1 -1
 - data/vendor/local/lib/groonga/scripts/ruby/command_line/grndb.rb +64 -35
 - data/vendor/local/lib/groonga/scripts/ruby/expression.rb +3 -1
 - data/vendor/local/lib/groonga/scripts/ruby/expression_rewriters.rb +15 -21
 - data/vendor/local/lib/groonga/scripts/ruby/expression_rewriters/optimizer.rb +274 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree.rb +8 -2
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/assign.rb +22 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/assign_binary_operation.rb +24 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/binary_operation.rb +206 -8
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/constant.rb +16 -1
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/function_call.rb +30 -1
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/logical_operation.rb +6 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/member.rb +18 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/null.rb +17 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/reference.rb +18 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/table.rb +14 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/unary_operation.rb +26 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/variable.rb +4 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree_builder.rb +78 -8
 - data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +10 -0
 - data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +2 -0
 - data/vendor/local/lib/groonga/scripts/ruby/locale_output.rb +28 -0
 - data/vendor/local/lib/groonga/scripts/ruby/logger.rb +36 -4
 - data/vendor/local/lib/groonga/scripts/ruby/record.rb +1 -1
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +0 -3
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +46 -5
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info_data_size_estimator.rb +5 -136
 - data/vendor/local/lib/groonga/scripts/ruby/table.rb +2 -2
 - data/vendor/local/lib/libgroonga.a +0 -0
 - data/vendor/local/lib/libgroonga.dll.a +0 -0
 - data/vendor/local/lib/libgroonga.la +1 -1
 - data/vendor/local/lib/liblz4.a +0 -0
 - data/vendor/local/lib/liblz4.dll +0 -0
 - data/vendor/local/lib/liblz4.dll.1 +0 -0
 - data/vendor/local/lib/{liblz4.dll.1.5.0 → liblz4.dll.1.8.2} +0 -0
 - data/vendor/local/lib/libmecab.dll.a +0 -0
 - data/vendor/local/lib/libmsgpackc.a +0 -0
 - data/vendor/local/lib/libmsgpackc.dll.a +0 -0
 - data/vendor/local/lib/libonigmo.a +0 -0
 - data/vendor/local/lib/libonigmo.dll.a +0 -0
 - data/vendor/local/lib/libpcre.a +0 -0
 - data/vendor/local/lib/libpcre.dll.a +0 -0
 - data/vendor/local/lib/libpcre.la +2 -2
 - data/vendor/local/lib/libpcrecpp.dll.a +0 -0
 - data/vendor/local/lib/libpcrecpp.la +1 -1
 - data/vendor/local/lib/libpcreposix.a +0 -0
 - data/vendor/local/lib/libpcreposix.dll.a +0 -0
 - data/vendor/local/lib/libpcreposix.la +2 -2
 - data/vendor/local/lib/libz.dll.a +0 -0
 - data/vendor/local/lib/pkgconfig/groonga-normalizer-mysql.pc +1 -1
 - data/vendor/local/lib/pkgconfig/groonga.pc +2 -2
 - data/vendor/local/lib/pkgconfig/liblz4.pc +3 -3
 - data/vendor/local/lib/pkgconfig/libpcre.pc +1 -1
 - data/vendor/local/lib/pkgconfig/libpcrecpp.pc +1 -1
 - data/vendor/local/lib/pkgconfig/libpcreposix.pc +1 -1
 - data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
 - data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
 - data/vendor/local/share/doc/groonga-normalizer-mysql/README.md +14 -22
 - data/vendor/local/share/doc/groonga-normalizer-mysql/news.md +22 -2
 - data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_static/basic.css +113 -4
 - data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +46 -19
 - data/vendor/local/share/doc/groonga/en/html/_static/documentation_options.js +10 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/{jquery-3.1.0.js → jquery-3.2.1.js} +474 -295
 - data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_static/language_data.js +297 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/pygments.css +4 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +69 -322
 - data/vendor/local/share/doc/groonga/en/html/characteristic.html +16 -24
 - data/vendor/local/share/doc/groonga/en/html/client.html +15 -23
 - data/vendor/local/share/doc/groonga/en/html/community.html +30 -38
 - data/vendor/local/share/doc/groonga/en/html/contribution.html +23 -31
 - data/vendor/local/share/doc/groonga/en/html/contribution/development.html +15 -23
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/build.html +15 -23
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_autotools.html +58 -66
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_cmake.html +51 -56
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/build/windows_cmake.html +52 -56
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +27 -35
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +19 -27
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +26 -34
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +167 -167
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +16 -24
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +28 -36
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +15 -23
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +15 -23
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +59 -67
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +31 -39
 - data/vendor/local/share/doc/groonga/en/html/contribution/report.html +18 -26
 - data/vendor/local/share/doc/groonga/en/html/development.html +15 -23
 - data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +38 -43
 - data/vendor/local/share/doc/groonga/en/html/genindex.html +50 -28
 - data/vendor/local/share/doc/groonga/en/html/index.html +248 -234
 - data/vendor/local/share/doc/groonga/en/html/install.html +43 -47
 - data/vendor/local/share/doc/groonga/en/html/install/centos.html +43 -51
 - data/vendor/local/share/doc/groonga/en/html/install/debian.html +52 -131
 - data/vendor/local/share/doc/groonga/en/html/install/docker.html +155 -0
 - data/vendor/local/share/doc/groonga/en/html/install/fedora.html +41 -49
 - data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +29 -37
 - data/vendor/local/share/doc/groonga/en/html/install/others.html +142 -150
 - data/vendor/local/share/doc/groonga/en/html/install/solaris.html +30 -38
 - data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +43 -51
 - data/vendor/local/share/doc/groonga/en/html/install/windows.html +33 -41
 - data/vendor/local/share/doc/groonga/en/html/limitations.html +36 -42
 - data/vendor/local/share/doc/groonga/en/html/news.html +1586 -598
 - data/vendor/local/share/doc/groonga/en/html/news/0.x.html +83 -83
 - data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +147 -155
 - data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +26 -34
 - data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +225 -233
 - data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +48 -56
 - data/vendor/local/share/doc/groonga/en/html/news/2.x.html +378 -386
 - data/vendor/local/share/doc/groonga/en/html/news/3.x.html +320 -328
 - data/vendor/local/share/doc/groonga/en/html/news/4.x.html +442 -448
 - data/vendor/local/share/doc/groonga/en/html/news/5.x.html +742 -860
 - data/vendor/local/share/doc/groonga/en/html/news/6.x.html +544 -621
 - data/vendor/local/share/doc/groonga/en/html/news/senna.html +32 -40
 - data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
 - data/vendor/local/share/doc/groonga/en/html/reference.html +208 -198
 - data/vendor/local/share/doc/groonga/en/html/reference/alias.html +85 -93
 - data/vendor/local/share/doc/groonga/en/html/reference/api.html +50 -57
 - data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +62 -77
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +117 -149
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +140 -176
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +43 -55
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +48 -56
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +194 -254
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +106 -138
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +62 -82
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +117 -137
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +74 -98
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +79 -103
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +40 -48
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +57 -73
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +75 -99
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_inspect.html +495 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +52 -68
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +291 -357
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +69 -89
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +47 -59
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +226 -306
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +120 -160
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_thread.html +80 -103
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +46 -58
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +40 -52
 - data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +52 -66
 - data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +98 -122
 - data/vendor/local/share/doc/groonga/en/html/reference/cast.html +40 -26
 - data/vendor/local/share/doc/groonga/en/html/reference/column.html +16 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +16 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +30 -34
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +16 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +92 -100
 - data/vendor/local/share/doc/groonga/en/html/reference/command.html +76 -84
 - data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +26 -34
 - data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +64 -72
 - data/vendor/local/share/doc/groonga/en/html/reference/command/pretty_print.html +21 -29
 - data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +25 -33
 - data/vendor/local/share/doc/groonga/en/html/reference/command/request_timeout.html +32 -40
 - data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +105 -113
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +44 -50
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +85 -73
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +31 -37
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_copy.html +131 -139
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +370 -326
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +115 -117
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +38 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +47 -53
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/config_delete.html +40 -48
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/config_get.html +42 -50
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/config_set.html +41 -49
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/database_unmap.html +37 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +71 -63
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +31 -37
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +49 -51
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +64 -71
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/io_flush.html +335 -138
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +233 -87
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_acquire.html +45 -53
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +42 -48
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_release.html +43 -51
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +58 -64
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +33 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +31 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +295 -218
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_parameters.html +56 -64
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +532 -214
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_select.html +797 -388
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_shard_list.html +35 -43
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_table_remove.html +188 -196
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +83 -90
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +41 -48
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/object_exist.html +41 -49
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/object_inspect.html +401 -403
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/object_list.html +253 -261
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/object_remove.html +60 -68
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +36 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +35 -43
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/query_expand.html +21 -29
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +22 -30
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +21 -29
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +39 -47
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/reindex.html +47 -53
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +38 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +38 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/schema.html +330 -338
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +1545 -1194
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +57 -65
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +83 -91
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +119 -133
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_copy.html +30 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +165 -174
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +50 -50
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +104 -112
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_rename.html +42 -50
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +49 -57
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/thread_limit.html +46 -54
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +110 -117
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +41 -48
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +40 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/configuration.html +37 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -27
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +134 -114
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +25 -31
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +66 -66
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +174 -182
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +25 -33
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +27 -35
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +191 -199
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +32 -40
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +189 -163
 - data/vendor/local/share/doc/groonga/en/html/reference/function.html +59 -64
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +71 -79
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/cast_loose.html +210 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +49 -55
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/fuzzy_search.html +38 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +133 -142
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +67 -73
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +56 -62
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +80 -88
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +70 -78
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +56 -64
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/in_records.html +87 -94
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +54 -62
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/math_abs.html +55 -63
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -48
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/number_classify.html +36 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/prefix_rk_search.html +74 -82
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +152 -160
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +45 -52
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +76 -84
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/string_length.html +37 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/string_substring.html +39 -47
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +76 -84
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day.html +37 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day_of_week.html +278 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_hour.html +37 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_minute.html +36 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_month.html +36 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_second.html +36 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_week.html +36 -44
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_year.html +37 -45
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_find.html +368 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_new.html +40 -48
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_size.html +54 -62
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_slice.html +40 -47
 - data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +44 -52
 - data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +307 -316
 - data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +486 -492
 - data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +44 -52
 - data/vendor/local/share/doc/groonga/en/html/reference/log.html +128 -147
 - data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +43 -92
 - data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_auto.html +179 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_nfkc100.html +897 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_nfkc51.html +162 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/operations.html +26 -34
 - data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +48 -56
 - data/vendor/local/share/doc/groonga/en/html/reference/operations/prefix_rk_search.html +47 -55
 - data/vendor/local/share/doc/groonga/en/html/reference/output.html +47 -55
 - data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +20 -28
 - data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +93 -101
 - data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +228 -225
 - data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +59 -67
 - data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +50 -58
 - data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +57 -65
 - data/vendor/local/share/doc/groonga/en/html/reference/sharding.html +76 -86
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +43 -51
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +159 -167
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +93 -101
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +85 -93
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +88 -96
 - data/vendor/local/share/doc/groonga/en/html/reference/tables.html +142 -150
 - data/vendor/local/share/doc/groonga/en/html/reference/token_filter/summary.html +147 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +31 -223
 - data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_nfkc100.html +626 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_stem.html +291 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_stop_word.html +287 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizer/summary.html +259 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +42 -1455
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram.html +368 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank.html +221 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol.html +240 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html +270 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html +292 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol.html +179 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol_alpha.html +200 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol_alpha_digit.html +212 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_delimit.html +357 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_delimit_null.html +162 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_mecab.html +783 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_regexp.html +289 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_trigram.html +194 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_unigram.html +194 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +71 -79
 - data/vendor/local/share/doc/groonga/en/html/reference/types.html +64 -72
 - data/vendor/local/share/doc/groonga/en/html/reference/window_function.html +29 -37
 - data/vendor/local/share/doc/groonga/en/html/reference/window_functions/record_number.html +38 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_count.html +38 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_record_number.html +38 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_sum.html +38 -46
 - data/vendor/local/share/doc/groonga/en/html/search.html +13 -24
 - data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
 - data/vendor/local/share/doc/groonga/en/html/server.html +15 -23
 - data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +27 -35
 - data/vendor/local/share/doc/groonga/en/html/server/http.html +18 -26
 - data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +94 -102
 - data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +15 -23
 - data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +15 -23
 - data/vendor/local/share/doc/groonga/en/html/server/memcached.html +18 -26
 - data/vendor/local/share/doc/groonga/en/html/server/package.html +101 -109
 - data/vendor/local/share/doc/groonga/en/html/spec.html +19 -27
 - data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +207 -215
 - data/vendor/local/share/doc/groonga/en/html/spec/search.html +39 -39
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +15 -23
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +46 -50
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting/how_to_analyze_error_message.html +27 -35
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +26 -31
 - data/vendor/local/share/doc/groonga/en/html/tutorial.html +17 -25
 - data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +46 -54
 - data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +63 -71
 - data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +30 -38
 - data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +88 -97
 - data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +19 -27
 - data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +61 -69
 - data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +108 -116
 - data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +24 -32
 - data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +19 -27
 - data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +32 -40
 - data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +52 -60
 - data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +113 -4
 - data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +46 -19
 - data/vendor/local/share/doc/groonga/ja/html/_static/documentation_options.js +10 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/{jquery-3.1.0.js → jquery-3.2.1.js} +474 -295
 - data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_static/language_data.js +124 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/pygments.css +4 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +70 -150
 - data/vendor/local/share/doc/groonga/ja/html/characteristic.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/client.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/community.html +29 -37
 - data/vendor/local/share/doc/groonga/ja/html/contribution.html +23 -31
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/build.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_autotools.html +50 -58
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_cmake.html +43 -48
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/windows_cmake.html +47 -51
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +26 -34
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +18 -26
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +23 -31
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +162 -162
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +16 -24
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +26 -34
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +50 -58
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +28 -36
 - data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -25
 - data/vendor/local/share/doc/groonga/ja/html/development.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +32 -37
 - data/vendor/local/share/doc/groonga/ja/html/genindex.html +50 -28
 - data/vendor/local/share/doc/groonga/ja/html/index.html +247 -233
 - data/vendor/local/share/doc/groonga/ja/html/install.html +41 -45
 - data/vendor/local/share/doc/groonga/ja/html/install/centos.html +44 -52
 - data/vendor/local/share/doc/groonga/ja/html/install/debian.html +52 -121
 - data/vendor/local/share/doc/groonga/ja/html/install/docker.html +155 -0
 - data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +40 -48
 - data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +28 -36
 - data/vendor/local/share/doc/groonga/ja/html/install/others.html +116 -124
 - data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +28 -36
 - data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +43 -51
 - data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -37
 - data/vendor/local/share/doc/groonga/ja/html/limitations.html +30 -36
 - data/vendor/local/share/doc/groonga/ja/html/news.html +1234 -384
 - data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +82 -82
 - data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +146 -154
 - data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +25 -33
 - data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +191 -199
 - data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +41 -49
 - data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +283 -291
 - data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +229 -237
 - data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +274 -280
 - data/vendor/local/share/doc/groonga/ja/html/news/5.x.html +475 -593
 - data/vendor/local/share/doc/groonga/ja/html/news/6.x.html +313 -390
 - data/vendor/local/share/doc/groonga/ja/html/news/senna.html +31 -39
 - data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference.html +208 -198
 - data/vendor/local/share/doc/groonga/ja/html/reference/alias.html +70 -78
 - data/vendor/local/share/doc/groonga/ja/html/reference/api.html +50 -57
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +57 -72
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +107 -139
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +137 -173
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +40 -52
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +46 -54
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +184 -244
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +99 -131
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +57 -77
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +100 -120
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +71 -95
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +75 -99
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +37 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +54 -70
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +71 -95
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_inspect.html +487 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +49 -65
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +286 -352
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +64 -84
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +44 -56
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +219 -299
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +116 -156
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_thread.html +70 -93
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +42 -54
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +36 -48
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +48 -62
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +94 -118
 - data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +39 -25
 - data/vendor/local/share/doc/groonga/ja/html/reference/column.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +28 -32
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +76 -84
 - data/vendor/local/share/doc/groonga/ja/html/reference/command.html +76 -84
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +25 -33
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +51 -59
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/pretty_print.html +20 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +21 -29
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/request_timeout.html +27 -35
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +101 -109
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +39 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +84 -72
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +30 -36
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_copy.html +104 -112
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +271 -237
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +100 -102
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +37 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +41 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_delete.html +38 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_get.html +39 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_set.html +39 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/database_unmap.html +34 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +70 -62
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +30 -36
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +42 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +59 -68
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/io_flush.html +300 -126
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +212 -80
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_acquire.html +42 -50
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +40 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_release.html +41 -49
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +57 -63
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +32 -37
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +30 -37
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +246 -178
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_parameters.html +51 -59
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +479 -175
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_select.html +718 -326
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_shard_list.html +34 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_table_remove.html +145 -153
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +78 -85
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +40 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_exist.html +36 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_inspect.html +360 -362
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_list.html +221 -229
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_remove.html +47 -55
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +32 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +33 -41
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/query_expand.html +20 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +21 -29
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +20 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +35 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/reindex.html +42 -48
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +57 -57
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +36 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +36 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/schema.html +317 -325
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +1246 -917
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +50 -58
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +77 -85
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +109 -123
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_copy.html +29 -37
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +131 -140
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -49
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +87 -95
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_rename.html +36 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +44 -52
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/thread_limit.html +38 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +93 -100
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +40 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +39 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/configuration.html +36 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -27
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +125 -107
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +23 -29
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +62 -62
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +132 -140
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +23 -31
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +25 -33
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +166 -174
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +31 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +189 -165
 - data/vendor/local/share/doc/groonga/ja/html/reference/function.html +59 -64
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +69 -77
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/cast_loose.html +208 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +48 -54
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/fuzzy_search.html +37 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +115 -124
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +66 -72
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +55 -61
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +69 -77
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +60 -68
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +54 -62
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_records.html +85 -93
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +54 -62
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/math_abs.html +54 -62
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +39 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/number_classify.html +35 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/prefix_rk_search.html +67 -75
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +130 -138
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +44 -51
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +61 -69
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_length.html +36 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_substring.html +38 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +63 -71
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day.html +36 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day_of_week.html +276 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_hour.html +36 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_minute.html +35 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_month.html +35 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_second.html +35 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_week.html +35 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_year.html +36 -44
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_find.html +353 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_new.html +39 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_size.html +52 -61
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_slice.html +38 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +38 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +200 -208
 - data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +375 -382
 - data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +39 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/log.html +125 -144
 - data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +36 -70
 - data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_auto.html +168 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_nfkc100.html +887 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_nfkc51.html +160 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +26 -34
 - data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +38 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/operations/prefix_rk_search.html +41 -49
 - data/vendor/local/share/doc/groonga/ja/html/reference/output.html +42 -50
 - data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +20 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +68 -76
 - data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +178 -184
 - data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +38 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +38 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +39 -47
 - data/vendor/local/share/doc/groonga/ja/html/reference/sharding.html +63 -73
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +43 -51
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +130 -138
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +72 -80
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +68 -76
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +76 -86
 - data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +129 -137
 - data/vendor/local/share/doc/groonga/ja/html/reference/token_filter/summary.html +145 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +31 -215
 - data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_nfkc100.html +617 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_stem.html +289 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_stop_word.html +284 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizer/summary.html +233 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +42 -1349
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram.html +344 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank.html +219 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol.html +237 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html +267 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html +287 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol.html +179 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol_alpha.html +199 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol_alpha_digit.html +209 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_delimit.html +344 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_delimit_null.html +160 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_mecab.html +764 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_regexp.html +284 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_trigram.html +191 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_unigram.html +191 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +65 -73
 - data/vendor/local/share/doc/groonga/ja/html/reference/types.html +48 -56
 - data/vendor/local/share/doc/groonga/ja/html/reference/window_function.html +29 -37
 - data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/record_number.html +37 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_count.html +37 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_record_number.html +37 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_sum.html +37 -45
 - data/vendor/local/share/doc/groonga/ja/html/search.html +13 -24
 - data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/server.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +22 -30
 - data/vendor/local/share/doc/groonga/ja/html/server/http.html +17 -25
 - data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +82 -90
 - data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +16 -24
 - data/vendor/local/share/doc/groonga/ja/html/server/package.html +99 -107
 - data/vendor/local/share/doc/groonga/ja/html/spec.html +19 -27
 - data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +201 -209
 - data/vendor/local/share/doc/groonga/ja/html/spec/search.html +36 -36
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +44 -48
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting/how_to_analyze_error_message.html +21 -29
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +24 -29
 - data/vendor/local/share/doc/groonga/ja/html/tutorial.html +16 -24
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +32 -40
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +62 -70
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +22 -30
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +77 -86
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +15 -23
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +56 -64
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +84 -92
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +20 -28
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -26
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +21 -29
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +43 -51
 - data/vendor/local/share/doc/pcre/AUTHORS +3 -3
 - data/vendor/local/share/doc/pcre/ChangeLog +53 -0
 - data/vendor/local/share/doc/pcre/LICENCE +3 -3
 - data/vendor/local/share/doc/pcre/NEWS +6 -0
 - data/vendor/local/share/doc/pcre/html/NON-AUTOTOOLS-BUILD.txt +8 -7
 - data/vendor/local/share/groonga/mruby/LEGAL +35 -35
 - data/vendor/local/share/license/cv2pdb/{README → README.MD} +28 -10
 - data/vendor/local/share/license/groonga-normalizer-mysql/README.md +14 -22
 - data/vendor/local/share/license/lz4/LICENSE +2 -2
 - data/vendor/local/share/license/mruby/AUTHORS +3 -0
 - data/vendor/local/share/license/mruby/MITL +1 -1
 - data/vendor/local/share/license/mruby/README.md +1 -1
 - data/vendor/local/share/license/msgpack/README.md +5 -34
 - data/vendor/local/share/license/pcre/LICENCE +3 -3
 - data/vendor/local/share/man/man1/lz4.1 +221 -86
 - data/vendor/local/share/man/man1/lz4c.1 +222 -32
 - data/vendor/local/share/man/man1/lz4cat.1 +221 -30
 - data/vendor/local/share/man/man1/unlz4.1 +223 -0
 - metadata +231 -87
 - data/lib/2.1/groonga.so +0 -0
 - data/vendor/local/lib/groonga/plugins/expression_rewriters/optimizer.rb +0 -147
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/options.rb +0 -14
 - data/vendor/local/share/doc/groonga/en/html/_static/ajax-loader.gif +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/comment-bright.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/comment-close.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/comment.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +0 -808
 - data/vendor/local/share/doc/groonga/ja/html/_static/ajax-loader.gif +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/comment-bright.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/comment-close.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/comment.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +0 -808
 
| 
         @@ -0,0 +1,233 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
             
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            <!DOCTYPE html>
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            <html xmlns="http://www.w3.org/1999/xhtml" lang="ja">
         
     | 
| 
      
 6 
     | 
    
         
            +
              <head>
         
     | 
| 
      
 7 
     | 
    
         
            +
                <meta charset="utf-8" />
         
     | 
| 
      
 8 
     | 
    
         
            +
                <title>7.8.1. 概要 — Groonga v9.0.2ドキュメント</title>
         
     | 
| 
      
 9 
     | 
    
         
            +
                <link rel="stylesheet" href="../../_static/groonga.css" type="text/css" />
         
     | 
| 
      
 10 
     | 
    
         
            +
                <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
         
     | 
| 
      
 11 
     | 
    
         
            +
                
         
     | 
| 
      
 12 
     | 
    
         
            +
                <script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
         
     | 
| 
      
 13 
     | 
    
         
            +
                <script type="text/javascript" src="../../_static/jquery.js"></script>
         
     | 
| 
      
 14 
     | 
    
         
            +
                <script type="text/javascript" src="../../_static/underscore.js"></script>
         
     | 
| 
      
 15 
     | 
    
         
            +
                <script type="text/javascript" src="../../_static/doctools.js"></script>
         
     | 
| 
      
 16 
     | 
    
         
            +
                <script type="text/javascript" src="../../_static/language_data.js"></script>
         
     | 
| 
      
 17 
     | 
    
         
            +
                <script type="text/javascript" src="../../_static/translations.js"></script>
         
     | 
| 
      
 18 
     | 
    
         
            +
                
         
     | 
| 
      
 19 
     | 
    
         
            +
                <link rel="shortcut icon" href="../../_static/favicon.ico"/>
         
     | 
| 
      
 20 
     | 
    
         
            +
                <link rel="index" title="索引" href="../../genindex.html" />
         
     | 
| 
      
 21 
     | 
    
         
            +
                <link rel="search" title="検索" href="../../search.html" />
         
     | 
| 
      
 22 
     | 
    
         
            +
                <link rel="next" title="7.8.2. TokenBigram" href="../tokenizers/token_bigram.html" />
         
     | 
| 
      
 23 
     | 
    
         
            +
                <link rel="prev" title="7.8. トークナイザー" href="../tokenizers.html" /> 
         
     | 
| 
      
 24 
     | 
    
         
            +
              </head><body>
         
     | 
| 
      
 25 
     | 
    
         
            +
            <div class="header">
         
     | 
| 
      
 26 
     | 
    
         
            +
              <h1 class="title">
         
     | 
| 
      
 27 
     | 
    
         
            +
                <a id="top-link" href="../../index.html">
         
     | 
| 
      
 28 
     | 
    
         
            +
                  <span class="project">groonga</span>
         
     | 
| 
      
 29 
     | 
    
         
            +
                  <span class="separator">-</span>
         
     | 
| 
      
 30 
     | 
    
         
            +
                  <span class="description">オープンソースのカラムストア機能付き全文検索エンジン</span>
         
     | 
| 
      
 31 
     | 
    
         
            +
                </a>
         
     | 
| 
      
 32 
     | 
    
         
            +
              </h1>
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
              <div class="other-language-links">
         
     | 
| 
      
 35 
     | 
    
         
            +
                <ul>
         
     | 
| 
      
 36 
     | 
    
         
            +
                  <li><a href="../../../../en/html/reference/tokenizer/summary.html">English</a></li>
         
     | 
| 
      
 37 
     | 
    
         
            +
                </ul>
         
     | 
| 
      
 38 
     | 
    
         
            +
              </div>
         
     | 
| 
      
 39 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 40 
     | 
    
         
            +
              
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                <div class="related" role="navigation" aria-label="related navigation">
         
     | 
| 
      
 43 
     | 
    
         
            +
                  <h3>ナビゲーション</h3>
         
     | 
| 
      
 44 
     | 
    
         
            +
                  <ul>
         
     | 
| 
      
 45 
     | 
    
         
            +
                    <li class="right" style="margin-right: 10px">
         
     | 
| 
      
 46 
     | 
    
         
            +
                      <a href="../../genindex.html" title="総合索引"
         
     | 
| 
      
 47 
     | 
    
         
            +
                         accesskey="I">索引</a></li>
         
     | 
| 
      
 48 
     | 
    
         
            +
                    <li class="right" >
         
     | 
| 
      
 49 
     | 
    
         
            +
                      <a href="../tokenizers/token_bigram.html" title="7.8.2. TokenBigram"
         
     | 
| 
      
 50 
     | 
    
         
            +
                         accesskey="N">次へ</a> |</li>
         
     | 
| 
      
 51 
     | 
    
         
            +
                    <li class="right" >
         
     | 
| 
      
 52 
     | 
    
         
            +
                      <a href="../tokenizers.html" title="7.8. トークナイザー"
         
     | 
| 
      
 53 
     | 
    
         
            +
                         accesskey="P">前へ</a> |</li>
         
     | 
| 
      
 54 
     | 
    
         
            +
                    <li class="nav-item nav-item-0"><a href="../../index.html">Groonga v9.0.2ドキュメント</a> »</li>
         
     | 
| 
      
 55 
     | 
    
         
            +
                      <li class="nav-item nav-item-1"><a href="../../reference.html" >7. リファレンスマニュアル</a> »</li>
         
     | 
| 
      
 56 
     | 
    
         
            +
                      <li class="nav-item nav-item-2"><a href="../tokenizers.html" accesskey="U">7.8. トークナイザー</a> »</li> 
         
     | 
| 
      
 57 
     | 
    
         
            +
                  </ul>
         
     | 
| 
      
 58 
     | 
    
         
            +
                </div>  
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                <div class="document">
         
     | 
| 
      
 61 
     | 
    
         
            +
                  <div class="documentwrapper">
         
     | 
| 
      
 62 
     | 
    
         
            +
                    <div class="bodywrapper">
         
     | 
| 
      
 63 
     | 
    
         
            +
                      <div class="body" role="main">
         
     | 
| 
      
 64 
     | 
    
         
            +
                        
         
     | 
| 
      
 65 
     | 
    
         
            +
              <div class="section" id="summary">
         
     | 
| 
      
 66 
     | 
    
         
            +
            <h1>7.8.1. 概要<a class="headerlink" href="#summary" title="このヘッドラインへのパーマリンク">¶</a></h1>
         
     | 
| 
      
 67 
     | 
    
         
            +
            <p>Groongaにはテキストをトークナイズするトークナイザーモージュールがあります。次のケースのときにトークナイザーを使います。</p>
         
     | 
| 
      
 68 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 69 
     | 
    
         
            +
            <div><ul>
         
     | 
| 
      
 70 
     | 
    
         
            +
            <li><p>テキストのインデックスを構築するとき</p>
         
     | 
| 
      
 71 
     | 
    
         
            +
            <div class="figure align-center" id="id1">
         
     | 
| 
      
 72 
     | 
    
         
            +
            <a class="reference internal image-reference" href="../../_images/used-when-indexing.png"><img alt="../../_images/used-when-indexing.png" src="../../_images/used-when-indexing.png" style="width: 80%;" /></a>
         
     | 
| 
      
 73 
     | 
    
         
            +
            <p class="caption"><span class="caption-text">テキストのインデックスを構築するときにトークナイザーを使います。</span><a class="headerlink" href="#id1" title="この画像へのパーマリンク">¶</a></p>
         
     | 
| 
      
 74 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 75 
     | 
    
         
            +
            </li>
         
     | 
| 
      
 76 
     | 
    
         
            +
            <li><p>クエリーで検索するとき</p>
         
     | 
| 
      
 77 
     | 
    
         
            +
            <div class="figure align-center" id="id2">
         
     | 
| 
      
 78 
     | 
    
         
            +
            <a class="reference internal image-reference" href="../../_images/used-when-searching.png"><img alt="../../_images/used-when-searching.png" src="../../_images/used-when-searching.png" style="width: 80%;" /></a>
         
     | 
| 
      
 79 
     | 
    
         
            +
            <p class="caption"><span class="caption-text">クエリーで検索するときにトークナイザーを使います。</span><a class="headerlink" href="#id2" title="この画像へのパーマリンク">¶</a></p>
         
     | 
| 
      
 80 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 81 
     | 
    
         
            +
            </li>
         
     | 
| 
      
 82 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 83 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 84 
     | 
    
         
            +
            <p>全文検索ではトークナイザーは重要なモジュールです。トークナイザーを変えることで <a class="reference external" href="http://ja.wikipedia.org/wiki/%E6%83%85%E5%A0%B1%E6%A4%9C%E7%B4%A2">適合率と再現率</a> のトレードオフを調整することができます。</p>
         
     | 
| 
      
 85 
     | 
    
         
            +
            <p>一般的に <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> が適切なトークナイザーです。トークナイザーについてよく知らない場合は <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> を使うことをオススメします。</p>
         
     | 
| 
      
 86 
     | 
    
         
            +
            <p><a class="reference internal" href="../commands/tokenize.html"><span class="doc">tokenize</span></a> コマンドと <a class="reference internal" href="../commands/table_tokenize.html"><span class="doc">table_tokenize</span></a> コマンドを使うことでトークナイザーを試すことができます。 <a class="reference internal" href="../commands/tokenize.html"><span class="doc">tokenize</span></a> コマンドを使って <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> トークナイザーを試す例を以下に示します。</p>
         
     | 
| 
      
 87 
     | 
    
         
            +
            <p>実行例:</p>
         
     | 
| 
      
 88 
     | 
    
         
            +
            <div class="highlight-none notranslate"><div class="highlight"><pre><span></span>tokenize TokenBigram "Hello World"
         
     | 
| 
      
 89 
     | 
    
         
            +
            # [
         
     | 
| 
      
 90 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 91 
     | 
    
         
            +
            #     0,
         
     | 
| 
      
 92 
     | 
    
         
            +
            #     1337566253.89858,
         
     | 
| 
      
 93 
     | 
    
         
            +
            #     0.000355720520019531
         
     | 
| 
      
 94 
     | 
    
         
            +
            #   ],
         
     | 
| 
      
 95 
     | 
    
         
            +
            #   [
         
     | 
| 
      
 96 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 97 
     | 
    
         
            +
            #       "position": 0,
         
     | 
| 
      
 98 
     | 
    
         
            +
            #       "force_prefix": false,
         
     | 
| 
      
 99 
     | 
    
         
            +
            #       "value": "He"
         
     | 
| 
      
 100 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 101 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 102 
     | 
    
         
            +
            #       "position": 1,
         
     | 
| 
      
 103 
     | 
    
         
            +
            #       "force_prefix": false,
         
     | 
| 
      
 104 
     | 
    
         
            +
            #       "value": "el"
         
     | 
| 
      
 105 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 106 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 107 
     | 
    
         
            +
            #       "position": 2,
         
     | 
| 
      
 108 
     | 
    
         
            +
            #       "force_prefix": false,
         
     | 
| 
      
 109 
     | 
    
         
            +
            #       "value": "ll"
         
     | 
| 
      
 110 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 111 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 112 
     | 
    
         
            +
            #       "position": 3,
         
     | 
| 
      
 113 
     | 
    
         
            +
            #       "force_prefix": false,
         
     | 
| 
      
 114 
     | 
    
         
            +
            #       "value": "lo"
         
     | 
| 
      
 115 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 116 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 117 
     | 
    
         
            +
            #       "position": 4,
         
     | 
| 
      
 118 
     | 
    
         
            +
            #       "force_prefix": false,
         
     | 
| 
      
 119 
     | 
    
         
            +
            #       "value": "o "
         
     | 
| 
      
 120 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 121 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 122 
     | 
    
         
            +
            #       "position": 5,
         
     | 
| 
      
 123 
     | 
    
         
            +
            #       "force_prefix": false,
         
     | 
| 
      
 124 
     | 
    
         
            +
            #       "value": " W"
         
     | 
| 
      
 125 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 126 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 127 
     | 
    
         
            +
            #       "position": 6,
         
     | 
| 
      
 128 
     | 
    
         
            +
            #       "force_prefix": false,
         
     | 
| 
      
 129 
     | 
    
         
            +
            #       "value": "Wo"
         
     | 
| 
      
 130 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 131 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 132 
     | 
    
         
            +
            #       "position": 7,
         
     | 
| 
      
 133 
     | 
    
         
            +
            #       "force_prefix": false,
         
     | 
| 
      
 134 
     | 
    
         
            +
            #       "value": "or"
         
     | 
| 
      
 135 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 136 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 137 
     | 
    
         
            +
            #       "position": 8,
         
     | 
| 
      
 138 
     | 
    
         
            +
            #       "force_prefix": false,
         
     | 
| 
      
 139 
     | 
    
         
            +
            #       "value": "rl"
         
     | 
| 
      
 140 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 141 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 142 
     | 
    
         
            +
            #       "position": 9,
         
     | 
| 
      
 143 
     | 
    
         
            +
            #       "force_prefix": false,
         
     | 
| 
      
 144 
     | 
    
         
            +
            #       "value": "ld"
         
     | 
| 
      
 145 
     | 
    
         
            +
            #     },
         
     | 
| 
      
 146 
     | 
    
         
            +
            #     {
         
     | 
| 
      
 147 
     | 
    
         
            +
            #       "position": 10,
         
     | 
| 
      
 148 
     | 
    
         
            +
            #       "force_prefix": false,
         
     | 
| 
      
 149 
     | 
    
         
            +
            #       "value": "d"
         
     | 
| 
      
 150 
     | 
    
         
            +
            #     }
         
     | 
| 
      
 151 
     | 
    
         
            +
            #   ]
         
     | 
| 
      
 152 
     | 
    
         
            +
            # ]
         
     | 
| 
      
 153 
     | 
    
         
            +
            </pre></div>
         
     | 
| 
      
 154 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 155 
     | 
    
         
            +
            <p>「トークナイズ」はテキストから0個以上のトークンを抽出する処理です。「トークナイズ」する方法はいくつかあります。</p>
         
     | 
| 
      
 156 
     | 
    
         
            +
            <p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
         
     | 
| 
      
 157 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 158 
     | 
    
         
            +
            <div><ul class="simple">
         
     | 
| 
      
 159 
     | 
    
         
            +
            <li><p><code class="docutils literal notranslate"><span class="pre">He</span></code></p></li>
         
     | 
| 
      
 160 
     | 
    
         
            +
            <li><p><code class="docutils literal notranslate"><span class="pre">el</span></code></p></li>
         
     | 
| 
      
 161 
     | 
    
         
            +
            <li><p><code class="docutils literal notranslate"><span class="pre">ll</span></code></p></li>
         
     | 
| 
      
 162 
     | 
    
         
            +
            <li><p><code class="docutils literal notranslate"><span class="pre">lo</span></code></p></li>
         
     | 
| 
      
 163 
     | 
    
         
            +
            <li><p><code class="docutils literal notranslate"><span class="pre">o_</span></code> ( <code class="docutils literal notranslate"><span class="pre">_</span></code> は空白文字という意味)</p></li>
         
     | 
| 
      
 164 
     | 
    
         
            +
            <li><p><code class="docutils literal notranslate"><span class="pre">_W</span></code> ( <code class="docutils literal notranslate"><span class="pre">_</span></code> は空白文字という意味)</p></li>
         
     | 
| 
      
 165 
     | 
    
         
            +
            <li><p><code class="docutils literal notranslate"><span class="pre">Wo</span></code></p></li>
         
     | 
| 
      
 166 
     | 
    
         
            +
            <li><p><code class="docutils literal notranslate"><span class="pre">or</span></code></p></li>
         
     | 
| 
      
 167 
     | 
    
         
            +
            <li><p><code class="docutils literal notranslate"><span class="pre">rl</span></code></p></li>
         
     | 
| 
      
 168 
     | 
    
         
            +
            <li><p><code class="docutils literal notranslate"><span class="pre">ld</span></code></p></li>
         
     | 
| 
      
 169 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 170 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 171 
     | 
    
         
            +
            <p>上記の例では、 <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> から10個のトークンを抽出しました。</p>
         
     | 
| 
      
 172 
     | 
    
         
            +
            <p>例えば、空白区切りのトークナイズ方法では <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
         
     | 
| 
      
 173 
     | 
    
         
            +
            <blockquote>
         
     | 
| 
      
 174 
     | 
    
         
            +
            <div><ul class="simple">
         
     | 
| 
      
 175 
     | 
    
         
            +
            <li><p><code class="docutils literal notranslate"><span class="pre">Hello</span></code></p></li>
         
     | 
| 
      
 176 
     | 
    
         
            +
            <li><p><code class="docutils literal notranslate"><span class="pre">World</span></code></p></li>
         
     | 
| 
      
 177 
     | 
    
         
            +
            </ul>
         
     | 
| 
      
 178 
     | 
    
         
            +
            </div></blockquote>
         
     | 
| 
      
 179 
     | 
    
         
            +
            <p>上記の例では、<code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> から2つのトークンを抽出しました。</p>
         
     | 
| 
      
 180 
     | 
    
         
            +
            <p>トークンは検索時のキーとして使われます。使用したトークナイズ方法で抽出したトークンでしかインデックス化されたドキュメントを探すことはできません。例えば、トークナイズ方法としてバイグラムを使った場合は <code class="docutils literal notranslate"><span class="pre">ll</span></code> で <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることができます。しかし、空白区切りのトークナイズ方法を使ったときは <code class="docutils literal notranslate"><span class="pre">ll</span></code> で <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることはできません。なぜなら、空白区切りのトークナイズ方法は <code class="docutils literal notranslate"><span class="pre">ll</span></code> というトークンを抽出していないからです。空白区切りのトークナイズ方法は <code class="docutils literal notranslate"><span class="pre">Hello</span></code> というトークンと <code class="docutils literal notranslate"><span class="pre">World</span></code> というトークンしか抽出していません。</p>
         
     | 
| 
      
 181 
     | 
    
         
            +
            <p>一般的に、小さいトークンを生成するトークナイズ方法は再現率が高い代わりに適合率が低くなりがちです。一方、大きいトークンを生成するトークナイズ方法は適合率が高い代わりに再現率が低くなりがちです。</p>
         
     | 
| 
      
 182 
     | 
    
         
            +
            <p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal notranslate"><span class="pre">or</span></code> で <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> と <code class="docutils literal notranslate"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> を検索できます。しかし、「論理和」を検索したい人にとっては <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> は不要な結果です。これは、適合率が下がったということです。しかし、再現率は上がっています。</p>
         
     | 
| 
      
 183 
     | 
    
         
            +
            <p>空白区切りのトークナイズ方法を使った場合は <code class="docutils literal notranslate"><span class="pre">or</span></code> で <code class="docutils literal notranslate"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> だけが見つかります。なぜなら、空白区切りのトークナイズ方法では <code class="docutils literal notranslate"><span class="pre">World</span></code> は <code class="docutils literal notranslate"><span class="pre">World</span></code> という1つのトークンだけにトークナイズされるからです。これは、「論理和」を探したい人にとっては適合率が挙がっています。しかし、 <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> も <code class="docutils literal notranslate"><span class="pre">or</span></code> を含んでいるのに見つかっていないので再現率が下がっています。</p>
         
     | 
| 
      
 184 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 185 
     | 
    
         
            +
             
     | 
| 
      
 186 
     | 
    
         
            +
             
     | 
| 
      
 187 
     | 
    
         
            +
                      </div>
         
     | 
| 
      
 188 
     | 
    
         
            +
                    </div>
         
     | 
| 
      
 189 
     | 
    
         
            +
                  </div>
         
     | 
| 
      
 190 
     | 
    
         
            +
                  <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
         
     | 
| 
      
 191 
     | 
    
         
            +
                    <div class="sphinxsidebarwrapper">
         
     | 
| 
      
 192 
     | 
    
         
            +
              <h4>前のトピックへ</h4>
         
     | 
| 
      
 193 
     | 
    
         
            +
              <p class="topless"><a href="../tokenizers.html"
         
     | 
| 
      
 194 
     | 
    
         
            +
                                    title="前の章へ">7.8. トークナイザー</a></p>
         
     | 
| 
      
 195 
     | 
    
         
            +
              <h4>次のトピックへ</h4>
         
     | 
| 
      
 196 
     | 
    
         
            +
              <p class="topless"><a href="../tokenizers/token_bigram.html"
         
     | 
| 
      
 197 
     | 
    
         
            +
                                    title="次の章へ">7.8.2. <code class="docutils literal notranslate"><span class="pre">TokenBigram</span></code></a></p>
         
     | 
| 
      
 198 
     | 
    
         
            +
            <div id="searchbox" style="display: none" role="search">
         
     | 
| 
      
 199 
     | 
    
         
            +
              <h3>クイック検索</h3>
         
     | 
| 
      
 200 
     | 
    
         
            +
                <div class="searchformwrapper">
         
     | 
| 
      
 201 
     | 
    
         
            +
                <form class="search" action="../../search.html" method="get">
         
     | 
| 
      
 202 
     | 
    
         
            +
                  <input type="text" name="q" />
         
     | 
| 
      
 203 
     | 
    
         
            +
                  <input type="submit" value="検索" />
         
     | 
| 
      
 204 
     | 
    
         
            +
                </form>
         
     | 
| 
      
 205 
     | 
    
         
            +
                </div>
         
     | 
| 
      
 206 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 207 
     | 
    
         
            +
            <script type="text/javascript">$('#searchbox').show(0);</script>
         
     | 
| 
      
 208 
     | 
    
         
            +
                    </div>
         
     | 
| 
      
 209 
     | 
    
         
            +
                  </div>
         
     | 
| 
      
 210 
     | 
    
         
            +
                  <div class="clearer"></div>
         
     | 
| 
      
 211 
     | 
    
         
            +
                </div>
         
     | 
| 
      
 212 
     | 
    
         
            +
                <div class="related" role="navigation" aria-label="related navigation">
         
     | 
| 
      
 213 
     | 
    
         
            +
                  <h3>ナビゲーション</h3>
         
     | 
| 
      
 214 
     | 
    
         
            +
                  <ul>
         
     | 
| 
      
 215 
     | 
    
         
            +
                    <li class="right" style="margin-right: 10px">
         
     | 
| 
      
 216 
     | 
    
         
            +
                      <a href="../../genindex.html" title="総合索引"
         
     | 
| 
      
 217 
     | 
    
         
            +
                         >索引</a></li>
         
     | 
| 
      
 218 
     | 
    
         
            +
                    <li class="right" >
         
     | 
| 
      
 219 
     | 
    
         
            +
                      <a href="../tokenizers/token_bigram.html" title="7.8.2. TokenBigram"
         
     | 
| 
      
 220 
     | 
    
         
            +
                         >次へ</a> |</li>
         
     | 
| 
      
 221 
     | 
    
         
            +
                    <li class="right" >
         
     | 
| 
      
 222 
     | 
    
         
            +
                      <a href="../tokenizers.html" title="7.8. トークナイザー"
         
     | 
| 
      
 223 
     | 
    
         
            +
                         >前へ</a> |</li>
         
     | 
| 
      
 224 
     | 
    
         
            +
                    <li class="nav-item nav-item-0"><a href="../../index.html">Groonga v9.0.2ドキュメント</a> »</li>
         
     | 
| 
      
 225 
     | 
    
         
            +
                      <li class="nav-item nav-item-1"><a href="../../reference.html" >7. リファレンスマニュアル</a> »</li>
         
     | 
| 
      
 226 
     | 
    
         
            +
                      <li class="nav-item nav-item-2"><a href="../tokenizers.html" >7.8. トークナイザー</a> »</li> 
         
     | 
| 
      
 227 
     | 
    
         
            +
                  </ul>
         
     | 
| 
      
 228 
     | 
    
         
            +
                </div>
         
     | 
| 
      
 229 
     | 
    
         
            +
                <div class="footer" role="contentinfo">
         
     | 
| 
      
 230 
     | 
    
         
            +
                    © Copyright 2009-2019, Brazil, Inc.
         
     | 
| 
      
 231 
     | 
    
         
            +
                </div>
         
     | 
| 
      
 232 
     | 
    
         
            +
              </body>
         
     | 
| 
      
 233 
     | 
    
         
            +
            </html>
         
     | 
| 
         @@ -1,35 +1,27 @@ 
     | 
|
| 
       1 
1 
     | 
    
         | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
     | 
    
         
            -
            <!DOCTYPE html 
     | 
| 
       4 
     | 
    
         
            -
              "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
         
     | 
| 
      
 3 
     | 
    
         
            +
            <!DOCTYPE html>
         
     | 
| 
       5 
4 
     | 
    
         | 
| 
       6 
5 
     | 
    
         
             
            <html xmlns="http://www.w3.org/1999/xhtml" lang="ja">
         
     | 
| 
       7 
6 
     | 
    
         
             
              <head>
         
     | 
| 
       8 
     | 
    
         
            -
                <meta  
     | 
| 
       9 
     | 
    
         
            -
                <title>7.8. トークナイザー — Groonga  
     | 
| 
      
 7 
     | 
    
         
            +
                <meta charset="utf-8" />
         
     | 
| 
      
 8 
     | 
    
         
            +
                <title>7.8. トークナイザー — Groonga v9.0.2ドキュメント</title>
         
     | 
| 
       10 
9 
     | 
    
         
             
                <link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
         
     | 
| 
       11 
10 
     | 
    
         
             
                <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
         
     | 
| 
       12 
     | 
    
         
            -
                 
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
                    URL_ROOT:    '../',
         
     | 
| 
       15 
     | 
    
         
            -
                    VERSION:     '7.1.0-73-g6d02cfa',
         
     | 
| 
       16 
     | 
    
         
            -
                    COLLAPSE_INDEX: false,
         
     | 
| 
       17 
     | 
    
         
            -
                    FILE_SUFFIX: '.html',
         
     | 
| 
       18 
     | 
    
         
            -
                    HAS_SOURCE:  false,
         
     | 
| 
       19 
     | 
    
         
            -
                    SOURCELINK_SUFFIX: '.txt'
         
     | 
| 
       20 
     | 
    
         
            -
                  };
         
     | 
| 
       21 
     | 
    
         
            -
                </script>
         
     | 
| 
      
 11 
     | 
    
         
            +
                
         
     | 
| 
      
 12 
     | 
    
         
            +
                <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
         
     | 
| 
       22 
13 
     | 
    
         
             
                <script type="text/javascript" src="../_static/jquery.js"></script>
         
     | 
| 
       23 
14 
     | 
    
         
             
                <script type="text/javascript" src="../_static/underscore.js"></script>
         
     | 
| 
       24 
15 
     | 
    
         
             
                <script type="text/javascript" src="../_static/doctools.js"></script>
         
     | 
| 
      
 16 
     | 
    
         
            +
                <script type="text/javascript" src="../_static/language_data.js"></script>
         
     | 
| 
       25 
17 
     | 
    
         
             
                <script type="text/javascript" src="../_static/translations.js"></script>
         
     | 
| 
      
 18 
     | 
    
         
            +
                
         
     | 
| 
       26 
19 
     | 
    
         
             
                <link rel="shortcut icon" href="../_static/favicon.ico"/>
         
     | 
| 
       27 
20 
     | 
    
         
             
                <link rel="index" title="索引" href="../genindex.html" />
         
     | 
| 
       28 
21 
     | 
    
         
             
                <link rel="search" title="検索" href="../search.html" />
         
     | 
| 
       29 
     | 
    
         
            -
                <link rel="next" title="7. 
     | 
| 
       30 
     | 
    
         
            -
                <link rel="prev" title="7.7.  
     | 
| 
       31 
     | 
    
         
            -
              </head>
         
     | 
| 
       32 
     | 
    
         
            -
              <body>
         
     | 
| 
      
 22 
     | 
    
         
            +
                <link rel="next" title="7.8.1. 概要" href="tokenizer/summary.html" />
         
     | 
| 
      
 23 
     | 
    
         
            +
                <link rel="prev" title="7.7.2.3. NormalizerNFKC51" href="normalizers/normalizer_nfkc51.html" /> 
         
     | 
| 
      
 24 
     | 
    
         
            +
              </head><body>
         
     | 
| 
       33 
25 
     | 
    
         
             
            <div class="header">
         
     | 
| 
       34 
26 
     | 
    
         
             
              <h1 class="title">
         
     | 
| 
       35 
27 
     | 
    
         
             
                <a id="top-link" href="../index.html">
         
     | 
| 
         @@ -54,12 +46,12 @@ 
     | 
|
| 
       54 
46 
     | 
    
         
             
                      <a href="../genindex.html" title="総合索引"
         
     | 
| 
       55 
47 
     | 
    
         
             
                         accesskey="I">索引</a></li>
         
     | 
| 
       56 
48 
     | 
    
         
             
                    <li class="right" >
         
     | 
| 
       57 
     | 
    
         
            -
                      <a href=" 
     | 
| 
      
 49 
     | 
    
         
            +
                      <a href="tokenizer/summary.html" title="7.8.1. 概要"
         
     | 
| 
       58 
50 
     | 
    
         
             
                         accesskey="N">次へ</a> |</li>
         
     | 
| 
       59 
51 
     | 
    
         
             
                    <li class="right" >
         
     | 
| 
       60 
     | 
    
         
            -
                      <a href="normalizers.html" title="7.7.  
     | 
| 
      
 52 
     | 
    
         
            +
                      <a href="normalizers/normalizer_nfkc51.html" title="7.7.2.3. NormalizerNFKC51"
         
     | 
| 
       61 
53 
     | 
    
         
             
                         accesskey="P">前へ</a> |</li>
         
     | 
| 
       62 
     | 
    
         
            -
                    <li class="nav-item nav-item-0"><a href="../index.html">Groonga  
     | 
| 
      
 54 
     | 
    
         
            +
                    <li class="nav-item nav-item-0"><a href="../index.html">Groonga v9.0.2ドキュメント</a> »</li>
         
     | 
| 
       63 
55 
     | 
    
         
             
                      <li class="nav-item nav-item-1"><a href="../reference.html" accesskey="U">7. リファレンスマニュアル</a> »</li> 
         
     | 
| 
       64 
56 
     | 
    
         
             
                  </ul>
         
     | 
| 
       65 
57 
     | 
    
         
             
                </div>  
         
     | 
| 
         @@ -71,1297 +63,24 @@ 
     | 
|
| 
       71 
63 
     | 
    
         | 
| 
       72 
64 
     | 
    
         
             
              <div class="section" id="tokenizers">
         
     | 
| 
       73 
65 
     | 
    
         
             
            <h1>7.8. トークナイザー<a class="headerlink" href="#tokenizers" title="このヘッドラインへのパーマリンク">¶</a></h1>
         
     | 
| 
       74 
     | 
    
         
            -
            <div class=" 
     | 
| 
       75 
     | 
    
         
            -
            < 
     | 
| 
       76 
     | 
    
         
            -
            < 
     | 
| 
       77 
     | 
    
         
            -
            < 
     | 
| 
       78 
     | 
    
         
            -
            < 
     | 
| 
       79 
     | 
    
         
            -
            <li>< 
     | 
| 
       80 
     | 
    
         
            -
            < 
     | 
| 
       81 
     | 
    
         
            -
            <a class="reference internal 
     | 
| 
       82 
     | 
    
         
            -
            < 
     | 
| 
       83 
     | 
    
         
            -
            </ 
     | 
| 
       84 
     | 
    
         
            -
            </li>
         
     | 
| 
       85 
     | 
    
         
            -
            <li>< 
     | 
| 
       86 
     | 
    
         
            -
            < 
     | 
| 
       87 
     | 
    
         
            -
            <a class="reference internal 
     | 
| 
       88 
     | 
    
         
            -
            < 
     | 
| 
       89 
     | 
    
         
            -
            </ 
     | 
| 
       90 
     | 
    
         
            -
            </li>
         
     | 
| 
       91 
     | 
    
         
            -
            </ul>
         
     | 
| 
       92 
     | 
    
         
            -
            </div></blockquote>
         
     | 
| 
       93 
     | 
    
         
            -
            <p>全文検索ではトークナイザーは重要なモジュールです。トークナイザーを変えることで <a class="reference external" href="http://ja.wikipedia.org/wiki/%E6%83%85%E5%A0%B1%E6%A4%9C%E7%B4%A2">適合率と再現率</a> のトレードオフを調整することができます。</p>
         
     | 
| 
       94 
     | 
    
         
            -
            <p>一般的に <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> が適切なトークナイザーです。トークナイザーについてよく知らない場合は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> を使うことをオススメします。</p>
         
     | 
| 
       95 
     | 
    
         
            -
            <p><a class="reference internal" href="commands/tokenize.html"><span class="doc">tokenize</span></a> コマンドと <a class="reference internal" href="commands/table_tokenize.html"><span class="doc">table_tokenize</span></a> コマンドを使うことでトークナイザーを試すことができます。 <a class="reference internal" href="commands/tokenize.html"><span class="doc">tokenize</span></a> コマンドを使って <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> トークナイザーを試す例を以下に示します。</p>
         
     | 
| 
       96 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       97 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "Hello World"
         
     | 
| 
       98 
     | 
    
         
            -
            # [
         
     | 
| 
       99 
     | 
    
         
            -
            #   [
         
     | 
| 
       100 
     | 
    
         
            -
            #     0,
         
     | 
| 
       101 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       102 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       103 
     | 
    
         
            -
            #   ],
         
     | 
| 
       104 
     | 
    
         
            -
            #   [
         
     | 
| 
       105 
     | 
    
         
            -
            #     {
         
     | 
| 
       106 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       107 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       108 
     | 
    
         
            -
            #       "value": "He"
         
     | 
| 
       109 
     | 
    
         
            -
            #     },
         
     | 
| 
       110 
     | 
    
         
            -
            #     {
         
     | 
| 
       111 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       112 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       113 
     | 
    
         
            -
            #       "value": "el"
         
     | 
| 
       114 
     | 
    
         
            -
            #     },
         
     | 
| 
       115 
     | 
    
         
            -
            #     {
         
     | 
| 
       116 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       117 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       118 
     | 
    
         
            -
            #       "value": "ll"
         
     | 
| 
       119 
     | 
    
         
            -
            #     },
         
     | 
| 
       120 
     | 
    
         
            -
            #     {
         
     | 
| 
       121 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       122 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       123 
     | 
    
         
            -
            #       "value": "lo"
         
     | 
| 
       124 
     | 
    
         
            -
            #     },
         
     | 
| 
       125 
     | 
    
         
            -
            #     {
         
     | 
| 
       126 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       127 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       128 
     | 
    
         
            -
            #       "value": "o "
         
     | 
| 
       129 
     | 
    
         
            -
            #     },
         
     | 
| 
       130 
     | 
    
         
            -
            #     {
         
     | 
| 
       131 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       132 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       133 
     | 
    
         
            -
            #       "value": " W"
         
     | 
| 
       134 
     | 
    
         
            -
            #     },
         
     | 
| 
       135 
     | 
    
         
            -
            #     {
         
     | 
| 
       136 
     | 
    
         
            -
            #       "position": 6,
         
     | 
| 
       137 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       138 
     | 
    
         
            -
            #       "value": "Wo"
         
     | 
| 
       139 
     | 
    
         
            -
            #     },
         
     | 
| 
       140 
     | 
    
         
            -
            #     {
         
     | 
| 
       141 
     | 
    
         
            -
            #       "position": 7,
         
     | 
| 
       142 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       143 
     | 
    
         
            -
            #       "value": "or"
         
     | 
| 
       144 
     | 
    
         
            -
            #     },
         
     | 
| 
       145 
     | 
    
         
            -
            #     {
         
     | 
| 
       146 
     | 
    
         
            -
            #       "position": 8,
         
     | 
| 
       147 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       148 
     | 
    
         
            -
            #       "value": "rl"
         
     | 
| 
       149 
     | 
    
         
            -
            #     },
         
     | 
| 
       150 
     | 
    
         
            -
            #     {
         
     | 
| 
       151 
     | 
    
         
            -
            #       "position": 9,
         
     | 
| 
       152 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       153 
     | 
    
         
            -
            #       "value": "ld"
         
     | 
| 
       154 
     | 
    
         
            -
            #     },
         
     | 
| 
       155 
     | 
    
         
            -
            #     {
         
     | 
| 
       156 
     | 
    
         
            -
            #       "position": 10,
         
     | 
| 
       157 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       158 
     | 
    
         
            -
            #       "value": "d"
         
     | 
| 
       159 
     | 
    
         
            -
            #     }
         
     | 
| 
       160 
     | 
    
         
            -
            #   ]
         
     | 
| 
       161 
     | 
    
         
            -
            # ]
         
     | 
| 
       162 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       163 
     | 
    
         
            -
            </div>
         
     | 
| 
       164 
     | 
    
         
            -
            </div>
         
     | 
| 
       165 
     | 
    
         
            -
            <div class="section" id="what-is-tokenize">
         
     | 
| 
       166 
     | 
    
         
            -
            <h2>7.8.2. 「トークナイズ」とはなにか<a class="headerlink" href="#what-is-tokenize" title="このヘッドラインへのパーマリンク">¶</a></h2>
         
     | 
| 
       167 
     | 
    
         
            -
            <p>「トークナイズ」はテキストから0個以上のトークンを抽出する処理です。「トークナイズ」する方法はいくつかあります。</p>
         
     | 
| 
       168 
     | 
    
         
            -
            <p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
         
     | 
| 
       169 
     | 
    
         
            -
            <blockquote>
         
     | 
| 
       170 
     | 
    
         
            -
            <div><ul class="simple">
         
     | 
| 
       171 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">He</span></code></li>
         
     | 
| 
       172 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">el</span></code></li>
         
     | 
| 
       173 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">ll</span></code></li>
         
     | 
| 
       174 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">lo</span></code></li>
         
     | 
| 
       175 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">o_</span></code> ( <code class="docutils literal"><span class="pre">_</span></code> は空白文字という意味)</li>
         
     | 
| 
       176 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">_W</span></code> ( <code class="docutils literal"><span class="pre">_</span></code> は空白文字という意味)</li>
         
     | 
| 
       177 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">Wo</span></code></li>
         
     | 
| 
       178 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">or</span></code></li>
         
     | 
| 
       179 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">rl</span></code></li>
         
     | 
| 
       180 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">ld</span></code></li>
         
     | 
| 
       181 
     | 
    
         
            -
            </ul>
         
     | 
| 
       182 
     | 
    
         
            -
            </div></blockquote>
         
     | 
| 
       183 
     | 
    
         
            -
            <p>上記の例では、 <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> から10個のトークンを抽出しました。</p>
         
     | 
| 
       184 
     | 
    
         
            -
            <p>例えば、空白区切りのトークナイズ方法では <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
         
     | 
| 
       185 
     | 
    
         
            -
            <blockquote>
         
     | 
| 
       186 
     | 
    
         
            -
            <div><ul class="simple">
         
     | 
| 
       187 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">Hello</span></code></li>
         
     | 
| 
       188 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">World</span></code></li>
         
     | 
| 
       189 
     | 
    
         
            -
            </ul>
         
     | 
| 
       190 
     | 
    
         
            -
            </div></blockquote>
         
     | 
| 
       191 
     | 
    
         
            -
            <p>上記の例では、<code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> から2つのトークンを抽出しました。</p>
         
     | 
| 
       192 
     | 
    
         
            -
            <p>トークンは検索時のキーとして使われます。使用したトークナイズ方法で抽出したトークンでしかインデックス化されたドキュメントを探すことはできません。例えば、トークナイズ方法としてバイグラムを使った場合は <code class="docutils literal"><span class="pre">ll</span></code> で <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることができます。しかし、空白区切りのトークナイズ方法を使ったときは <code class="docutils literal"><span class="pre">ll</span></code> で <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることはできません。なぜなら、空白区切りのトークナイズ方法は <code class="docutils literal"><span class="pre">ll</span></code> というトークンを抽出していないからです。空白区切りのトークナイズ方法は <code class="docutils literal"><span class="pre">Hello</span></code> というトークンと <code class="docutils literal"><span class="pre">World</span></code> というトークンしか抽出していません。</p>
         
     | 
| 
       193 
     | 
    
         
            -
            <p>一般的に、小さいトークンを生成するトークナイズ方法は再現率が高い代わりに適合率が低くなりがちです。一方、大きいトークンを生成するトークナイズ方法は適合率が高い代わりに再現率が低くなりがちです。</p>
         
     | 
| 
       194 
     | 
    
         
            -
            <p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal"><span class="pre">or</span></code> で <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> と <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> を検索できます。しかし、「論理和」を検索したい人にとっては <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は不要な結果です。これは、適合率が下がったということです。しかし、再現率は上がっています。</p>
         
     | 
| 
       195 
     | 
    
         
            -
            <p>空白区切りのトークナイズ方法を使った場合は <code class="docutils literal"><span class="pre">or</span></code> で <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> だけが見つかります。なぜなら、空白区切りのトークナイズ方法では <code class="docutils literal"><span class="pre">World</span></code> は <code class="docutils literal"><span class="pre">World</span></code> という1つのトークンだけにトークナイズされるからです。これは、「論理和」を探したい人にとっては適合率が挙がっています。しかし、 <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> も <code class="docutils literal"><span class="pre">or</span></code> を含んでいるのに見つかっていないので再現率が下がっています。</p>
         
     | 
| 
       196 
     | 
    
         
            -
            </div>
         
     | 
| 
       197 
     | 
    
         
            -
            <div class="section" id="built-in-tokenizsers">
         
     | 
| 
       198 
     | 
    
         
            -
            <h2>7.8.3. 組み込みトークナイザー<a class="headerlink" href="#built-in-tokenizsers" title="このヘッドラインへのパーマリンク">¶</a></h2>
         
     | 
| 
       199 
     | 
    
         
            -
            <p>以下は組み込みのトークナイザーのリストです。</p>
         
     | 
| 
       200 
     | 
    
         
            -
            <blockquote>
         
     | 
| 
       201 
     | 
    
         
            -
            <div><ul class="simple">
         
     | 
| 
       202 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenBigram</span></code></li>
         
     | 
| 
       203 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></li>
         
     | 
| 
       204 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></li>
         
     | 
| 
       205 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></li>
         
     | 
| 
       206 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></li>
         
     | 
| 
       207 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></li>
         
     | 
| 
       208 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></li>
         
     | 
| 
       209 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></li>
         
     | 
| 
       210 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenUnigram</span></code></li>
         
     | 
| 
       211 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenTrigram</span></code></li>
         
     | 
| 
       212 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenDelimit</span></code></li>
         
     | 
| 
       213 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></li>
         
     | 
| 
       214 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenMecab</span></code></li>
         
     | 
| 
       215 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">TokenRegexp</span></code></li>
         
     | 
| 
       216 
     | 
    
         
            -
            </ul>
         
     | 
| 
       217 
     | 
    
         
            -
            </div></blockquote>
         
     | 
| 
       218 
     | 
    
         
            -
            <div class="section" id="tokenbigram">
         
     | 
| 
       219 
     | 
    
         
            -
            <span id="token-bigram"></span><h3>7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code><a class="headerlink" href="#tokenbigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       220 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> はバイグラムベースのトークナイザーです。多くのケースでは、このトークナイザーを使うことをオススメします。</p>
         
     | 
| 
       221 
     | 
    
         
            -
            <p>バイグラムというトークナイズ方法は、隣り合った2つの文字を1つのトークンとしてテキストをトークナイズします。例えば、 <code class="docutils literal"><span class="pre">Hello</span></code> は次のトークンにトークナイズします。</p>
         
     | 
| 
       222 
     | 
    
         
            -
            <blockquote>
         
     | 
| 
       223 
     | 
    
         
            -
            <div><ul class="simple">
         
     | 
| 
       224 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">He</span></code></li>
         
     | 
| 
       225 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">el</span></code></li>
         
     | 
| 
       226 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">ll</span></code></li>
         
     | 
| 
       227 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">lo</span></code></li>
         
     | 
| 
       228 
     | 
    
         
            -
            </ul>
         
     | 
| 
       229 
     | 
    
         
            -
            </div></blockquote>
         
     | 
| 
       230 
     | 
    
         
            -
            <p>バイグラムというトークナイズ方法は再現性に優れています。なぜなら、2文字以上の文字のクエリーに対してはすべてのテキストを見つけることができるからです。</p>
         
     | 
| 
       231 
     | 
    
         
            -
            <p>一般的に、1文字のクエリーではすべてのテキストを見つけることはできません。なぜなら、1つの文字のトークンが存在しないからです。しかし、Groongaでは1文字のクエリーでもすべてのテキストを見つけることができます。なぜなら、Groongaは前方一致検索によりクエリーで指定した文字で始まるトークンをすべて見つけることができるからです。例えば、Groongaは <code class="docutils literal"><span class="pre">l</span></code> というクエリーから <code class="docutils literal"><span class="pre">ll</span></code> というトークンと <code class="docutils literal"><span class="pre">lo</span></code> というトークンを見つけることができます。</p>
         
     | 
| 
       232 
     | 
    
         
            -
            <p>バイグラムというトークナイズ方法は適合率はそれほど優れていません。なぜなら、単語の一部にクエリーが含まれていればすべてのテキストが見つかってしまうからです。例えば、 <code class="docutils literal"><span class="pre">or</span></code> で <code class="docutils literal"><span class="pre">world</span></code> が見つかります。これは非ASCIIを使う言語よりASCIIのみを使う言語で顕著です。以降の説明で触れる通り、 <code class="docutils literal"><span class="pre">TokenBigram</span></code> はこの問題を解決しています。</p>
         
     | 
| 
       233 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> の挙動は <a class="reference internal" href="normalizers.html"><span class="doc">ノーマライザー</span></a> を使うかどうかで変わります。</p>
         
     | 
| 
       234 
     | 
    
         
            -
            <p>ノーマライザーを使っていない場合は <code class="docutils literal"><span class="pre">TokenBigram</span></code> は純粋なバイグラム(最後のトークンをのぞいてすべてのトークンを2文字にする)のトークナイズ方法を使います。</p>
         
     | 
| 
       235 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       236 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "Hello World"
         
     | 
| 
       237 
     | 
    
         
            -
            # [
         
     | 
| 
       238 
     | 
    
         
            -
            #   [
         
     | 
| 
       239 
     | 
    
         
            -
            #     0,
         
     | 
| 
       240 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       241 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       242 
     | 
    
         
            -
            #   ],
         
     | 
| 
       243 
     | 
    
         
            -
            #   [
         
     | 
| 
       244 
     | 
    
         
            -
            #     {
         
     | 
| 
       245 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       246 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       247 
     | 
    
         
            -
            #       "value": "He"
         
     | 
| 
       248 
     | 
    
         
            -
            #     },
         
     | 
| 
       249 
     | 
    
         
            -
            #     {
         
     | 
| 
       250 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       251 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       252 
     | 
    
         
            -
            #       "value": "el"
         
     | 
| 
       253 
     | 
    
         
            -
            #     },
         
     | 
| 
       254 
     | 
    
         
            -
            #     {
         
     | 
| 
       255 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       256 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       257 
     | 
    
         
            -
            #       "value": "ll"
         
     | 
| 
       258 
     | 
    
         
            -
            #     },
         
     | 
| 
       259 
     | 
    
         
            -
            #     {
         
     | 
| 
       260 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       261 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       262 
     | 
    
         
            -
            #       "value": "lo"
         
     | 
| 
       263 
     | 
    
         
            -
            #     },
         
     | 
| 
       264 
     | 
    
         
            -
            #     {
         
     | 
| 
       265 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       266 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       267 
     | 
    
         
            -
            #       "value": "o "
         
     | 
| 
       268 
     | 
    
         
            -
            #     },
         
     | 
| 
       269 
     | 
    
         
            -
            #     {
         
     | 
| 
       270 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       271 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       272 
     | 
    
         
            -
            #       "value": " W"
         
     | 
| 
       273 
     | 
    
         
            -
            #     },
         
     | 
| 
       274 
     | 
    
         
            -
            #     {
         
     | 
| 
       275 
     | 
    
         
            -
            #       "position": 6,
         
     | 
| 
       276 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       277 
     | 
    
         
            -
            #       "value": "Wo"
         
     | 
| 
       278 
     | 
    
         
            -
            #     },
         
     | 
| 
       279 
     | 
    
         
            -
            #     {
         
     | 
| 
       280 
     | 
    
         
            -
            #       "position": 7,
         
     | 
| 
       281 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       282 
     | 
    
         
            -
            #       "value": "or"
         
     | 
| 
       283 
     | 
    
         
            -
            #     },
         
     | 
| 
       284 
     | 
    
         
            -
            #     {
         
     | 
| 
       285 
     | 
    
         
            -
            #       "position": 8,
         
     | 
| 
       286 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       287 
     | 
    
         
            -
            #       "value": "rl"
         
     | 
| 
       288 
     | 
    
         
            -
            #     },
         
     | 
| 
       289 
     | 
    
         
            -
            #     {
         
     | 
| 
       290 
     | 
    
         
            -
            #       "position": 9,
         
     | 
| 
       291 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       292 
     | 
    
         
            -
            #       "value": "ld"
         
     | 
| 
       293 
     | 
    
         
            -
            #     },
         
     | 
| 
       294 
     | 
    
         
            -
            #     {
         
     | 
| 
       295 
     | 
    
         
            -
            #       "position": 10,
         
     | 
| 
       296 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       297 
     | 
    
         
            -
            #       "value": "d"
         
     | 
| 
       298 
     | 
    
         
            -
            #     }
         
     | 
| 
       299 
     | 
    
         
            -
            #   ]
         
     | 
| 
       300 
     | 
    
         
            -
            # ]
         
     | 
| 
       301 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       302 
     | 
    
         
            -
            </div>
         
     | 
| 
       303 
     | 
    
         
            -
            <p>ノーマライザーを使っている場合は <code class="docutils literal"><span class="pre">TokenBigram</span></code> はASCIIの文字には空白区切りのようなトークナイズ方法を使います。非ASCII文字にはバイグラムのトークナイズ方法を使います。</p>
         
     | 
| 
       304 
     | 
    
         
            -
            <p>もしかしたら、複数の方法が混ざったこの挙動はわかりにくいかもしれません。しかし、英語のテキスト(ASCII文字列のみ)や日本語テキスト(ASCII文字列と非ASCII文字列が混ざっている)ような多くのユースケースでは合理的な方法です。</p>
         
     | 
| 
       305 
     | 
    
         
            -
            <p>ASCII文字しか使わない多くの言語は単語の区切りに空白文字を使っています。このようなケースに空白区切りのトークナイズ方法は適切です。</p>
         
     | 
| 
       306 
     | 
    
         
            -
            <p>非ASCII文字を使う言語では単語の区切りに空白文字を使いません。このケースにはバイグラムなトークナイズ方法は適切です。</p>
         
     | 
| 
       307 
     | 
    
         
            -
            <p>複数の言語が混ざっている場合は、複数の方法を組み合わせたトークナイズ方法が適切です。</p>
         
     | 
| 
       308 
     | 
    
         
            -
            <p>ASCII文字にバイグラムなトークナイズ方法を使いたい場合は <a class="reference internal" href="#token-bigram-split-symbol-alpha"><span class="std std-ref">TokenBigramSplitSymbolAlpha</span></a> のような <code class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></code> というトークナイザーを参照してください。</p>
         
     | 
| 
       309 
     | 
    
         
            -
            <p>例を使いながら <code class="docutils literal"><span class="pre">TokenBigram</span></code> の挙動を確認しましょう。</p>
         
     | 
| 
       310 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> はASCII文字には1つ以上の空白文字をトークンの区切りとして使います。</p>
         
     | 
| 
       311 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       312 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "Hello World" NormalizerAuto
         
     | 
| 
       313 
     | 
    
         
            -
            # [
         
     | 
| 
       314 
     | 
    
         
            -
            #   [
         
     | 
| 
       315 
     | 
    
         
            -
            #     0,
         
     | 
| 
       316 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       317 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       318 
     | 
    
         
            -
            #   ],
         
     | 
| 
       319 
     | 
    
         
            -
            #   [
         
     | 
| 
       320 
     | 
    
         
            -
            #     {
         
     | 
| 
       321 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       322 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       323 
     | 
    
         
            -
            #       "value": "hello"
         
     | 
| 
       324 
     | 
    
         
            -
            #     },
         
     | 
| 
       325 
     | 
    
         
            -
            #     {
         
     | 
| 
       326 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       327 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       328 
     | 
    
         
            -
            #       "value": "world"
         
     | 
| 
       329 
     | 
    
         
            -
            #     }
         
     | 
| 
       330 
     | 
    
         
            -
            #   ]
         
     | 
| 
       331 
     | 
    
         
            -
            # ]
         
     | 
| 
       332 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       333 
     | 
    
         
            -
            </div>
         
     | 
| 
       334 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigram</span></code> はASCII文字には文字の種類が変わったところをトークンの区切りとします。文字の種類は次のどれかです。</p>
         
     | 
| 
       335 
     | 
    
         
            -
            <blockquote>
         
     | 
| 
       336 
     | 
    
         
            -
            <div><ul class="simple">
         
     | 
| 
       337 
     | 
    
         
            -
            <li>アルファベット</li>
         
     | 
| 
       338 
     | 
    
         
            -
            <li>数字</li>
         
     | 
| 
       339 
     | 
    
         
            -
            <li>記号(たとえば <code class="docutils literal"><span class="pre">(</span></code> 、 <code class="docutils literal"><span class="pre">)</span></code> 、 <code class="docutils literal"><span class="pre">!</span></code> など)</li>
         
     | 
| 
       340 
     | 
    
         
            -
            <li>ひらがな</li>
         
     | 
| 
       341 
     | 
    
         
            -
            <li>カタカナ</li>
         
     | 
| 
       342 
     | 
    
         
            -
            <li>漢字</li>
         
     | 
| 
       343 
     | 
    
         
            -
            <li>その他</li>
         
     | 
| 
       344 
     | 
    
         
            -
            </ul>
         
     | 
| 
       345 
     | 
    
         
            -
            </div></blockquote>
         
     | 
| 
       346 
     | 
    
         
            -
            <p>次の例は2つのトークン区切りを示しています。</p>
         
     | 
| 
       347 
     | 
    
         
            -
            <blockquote>
         
     | 
| 
       348 
     | 
    
         
            -
            <div><ul class="simple">
         
     | 
| 
       349 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">100</span></code> (数字)と <code class="docutils literal"><span class="pre">cents</span></code> (アルファベット)の間のところ</li>
         
     | 
| 
       350 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">cents</span></code> (アルファベット)と <code class="docutils literal"><span class="pre">!!!</span></code> (記号)の間のところ</li>
         
     | 
| 
       351 
     | 
    
         
            -
            </ul>
         
     | 
| 
       352 
     | 
    
         
            -
            </div></blockquote>
         
     | 
| 
       353 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       354 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "100cents!!!" NormalizerAuto
         
     | 
| 
       355 
     | 
    
         
            -
            # [
         
     | 
| 
       356 
     | 
    
         
            -
            #   [
         
     | 
| 
       357 
     | 
    
         
            -
            #     0,
         
     | 
| 
       358 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       359 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       360 
     | 
    
         
            -
            #   ],
         
     | 
| 
       361 
     | 
    
         
            -
            #   [
         
     | 
| 
       362 
     | 
    
         
            -
            #     {
         
     | 
| 
       363 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       364 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       365 
     | 
    
         
            -
            #       "value": "100"
         
     | 
| 
       366 
     | 
    
         
            -
            #     },
         
     | 
| 
       367 
     | 
    
         
            -
            #     {
         
     | 
| 
       368 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       369 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       370 
     | 
    
         
            -
            #       "value": "cents"
         
     | 
| 
       371 
     | 
    
         
            -
            #     },
         
     | 
| 
       372 
     | 
    
         
            -
            #     {
         
     | 
| 
       373 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       374 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       375 
     | 
    
         
            -
            #       "value": "!!!"
         
     | 
| 
       376 
     | 
    
         
            -
            #     }
         
     | 
| 
       377 
     | 
    
         
            -
            #   ]
         
     | 
| 
       378 
     | 
    
         
            -
            # ]
         
     | 
| 
       379 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       380 
     | 
    
         
            -
            </div>
         
     | 
| 
       381 
     | 
    
         
            -
            <p>以下は <code class="docutils literal"><span class="pre">TokenBigram</span></code> が非ASCII文字にはトークナイズ方法としてバイグラムを使う例です。</p>
         
     | 
| 
       382 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       383 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "日本語の勉強" NormalizerAuto
         
     | 
| 
       384 
     | 
    
         
            -
            # [
         
     | 
| 
       385 
     | 
    
         
            -
            #   [
         
     | 
| 
       386 
     | 
    
         
            -
            #     0,
         
     | 
| 
       387 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       388 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       389 
     | 
    
         
            -
            #   ],
         
     | 
| 
       390 
     | 
    
         
            -
            #   [
         
     | 
| 
       391 
     | 
    
         
            -
            #     {
         
     | 
| 
       392 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       393 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       394 
     | 
    
         
            -
            #       "value": "日本"
         
     | 
| 
       395 
     | 
    
         
            -
            #     },
         
     | 
| 
       396 
     | 
    
         
            -
            #     {
         
     | 
| 
       397 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       398 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       399 
     | 
    
         
            -
            #       "value": "本語"
         
     | 
| 
       400 
     | 
    
         
            -
            #     },
         
     | 
| 
       401 
     | 
    
         
            -
            #     {
         
     | 
| 
       402 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       403 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       404 
     | 
    
         
            -
            #       "value": "語の"
         
     | 
| 
       405 
     | 
    
         
            -
            #     },
         
     | 
| 
       406 
     | 
    
         
            -
            #     {
         
     | 
| 
       407 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       408 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       409 
     | 
    
         
            -
            #       "value": "の勉"
         
     | 
| 
       410 
     | 
    
         
            -
            #     },
         
     | 
| 
       411 
     | 
    
         
            -
            #     {
         
     | 
| 
       412 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       413 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       414 
     | 
    
         
            -
            #       "value": "勉強"
         
     | 
| 
       415 
     | 
    
         
            -
            #     },
         
     | 
| 
       416 
     | 
    
         
            -
            #     {
         
     | 
| 
       417 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       418 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       419 
     | 
    
         
            -
            #       "value": "強"
         
     | 
| 
       420 
     | 
    
         
            -
            #     }
         
     | 
| 
       421 
     | 
    
         
            -
            #   ]
         
     | 
| 
       422 
     | 
    
         
            -
            # ]
         
     | 
| 
       423 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       424 
     | 
    
         
            -
            </div>
         
     | 
| 
       425 
     | 
    
         
            -
            </div>
         
     | 
| 
       426 
     | 
    
         
            -
            <div class="section" id="tokenbigramsplitsymbol">
         
     | 
| 
       427 
     | 
    
         
            -
            <span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code><a class="headerlink" href="#tokenbigramsplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       428 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは記号の扱いです。 <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code> は記号のトークナイズ方法にバイグラムを使います。</p>
         
     | 
| 
       429 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       430 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbol "100cents!!!" NormalizerAuto
         
     | 
| 
       431 
     | 
    
         
            -
            # [
         
     | 
| 
       432 
     | 
    
         
            -
            #   [
         
     | 
| 
       433 
     | 
    
         
            -
            #     0,
         
     | 
| 
       434 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       435 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       436 
     | 
    
         
            -
            #   ],
         
     | 
| 
       437 
     | 
    
         
            -
            #   [
         
     | 
| 
       438 
     | 
    
         
            -
            #     {
         
     | 
| 
       439 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       440 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       441 
     | 
    
         
            -
            #       "value": "100"
         
     | 
| 
       442 
     | 
    
         
            -
            #     },
         
     | 
| 
       443 
     | 
    
         
            -
            #     {
         
     | 
| 
       444 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       445 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       446 
     | 
    
         
            -
            #       "value": "cents"
         
     | 
| 
       447 
     | 
    
         
            -
            #     },
         
     | 
| 
       448 
     | 
    
         
            -
            #     {
         
     | 
| 
       449 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       450 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       451 
     | 
    
         
            -
            #       "value": "!!"
         
     | 
| 
       452 
     | 
    
         
            -
            #     },
         
     | 
| 
       453 
     | 
    
         
            -
            #     {
         
     | 
| 
       454 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       455 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       456 
     | 
    
         
            -
            #       "value": "!!"
         
     | 
| 
       457 
     | 
    
         
            -
            #     },
         
     | 
| 
       458 
     | 
    
         
            -
            #     {
         
     | 
| 
       459 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       460 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       461 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       462 
     | 
    
         
            -
            #     }
         
     | 
| 
       463 
     | 
    
         
            -
            #   ]
         
     | 
| 
       464 
     | 
    
         
            -
            # ]
         
     | 
| 
       465 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       466 
     | 
    
         
            -
            </div>
         
     | 
| 
       467 
     | 
    
         
            -
            </div>
         
     | 
| 
       468 
     | 
    
         
            -
            <div class="section" id="tokenbigramsplitsymbolalpha">
         
     | 
| 
       469 
     | 
    
         
            -
            <span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       470 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは記号とアルファベットの扱いです。 <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> は記号とアルファベットのトークナイズ方法にバイグラムを使います。</p>
         
     | 
| 
       471 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       472 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbolAlpha "100cents!!!" NormalizerAuto
         
     | 
| 
       473 
     | 
    
         
            -
            # [
         
     | 
| 
       474 
     | 
    
         
            -
            #   [
         
     | 
| 
       475 
     | 
    
         
            -
            #     0,
         
     | 
| 
       476 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       477 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       478 
     | 
    
         
            -
            #   ],
         
     | 
| 
       479 
     | 
    
         
            -
            #   [
         
     | 
| 
       480 
     | 
    
         
            -
            #     {
         
     | 
| 
       481 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       482 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       483 
     | 
    
         
            -
            #       "value": "100"
         
     | 
| 
       484 
     | 
    
         
            -
            #     },
         
     | 
| 
       485 
     | 
    
         
            -
            #     {
         
     | 
| 
       486 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       487 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       488 
     | 
    
         
            -
            #       "value": "ce"
         
     | 
| 
       489 
     | 
    
         
            -
            #     },
         
     | 
| 
       490 
     | 
    
         
            -
            #     {
         
     | 
| 
       491 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       492 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       493 
     | 
    
         
            -
            #       "value": "en"
         
     | 
| 
       494 
     | 
    
         
            -
            #     },
         
     | 
| 
       495 
     | 
    
         
            -
            #     {
         
     | 
| 
       496 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       497 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       498 
     | 
    
         
            -
            #       "value": "nt"
         
     | 
| 
       499 
     | 
    
         
            -
            #     },
         
     | 
| 
       500 
     | 
    
         
            -
            #     {
         
     | 
| 
       501 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       502 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       503 
     | 
    
         
            -
            #       "value": "ts"
         
     | 
| 
       504 
     | 
    
         
            -
            #     },
         
     | 
| 
       505 
     | 
    
         
            -
            #     {
         
     | 
| 
       506 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       507 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       508 
     | 
    
         
            -
            #       "value": "s!"
         
     | 
| 
       509 
     | 
    
         
            -
            #     },
         
     | 
| 
       510 
     | 
    
         
            -
            #     {
         
     | 
| 
       511 
     | 
    
         
            -
            #       "position": 6,
         
     | 
| 
       512 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       513 
     | 
    
         
            -
            #       "value": "!!"
         
     | 
| 
       514 
     | 
    
         
            -
            #     },
         
     | 
| 
       515 
     | 
    
         
            -
            #     {
         
     | 
| 
       516 
     | 
    
         
            -
            #       "position": 7,
         
     | 
| 
       517 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       518 
     | 
    
         
            -
            #       "value": "!!"
         
     | 
| 
       519 
     | 
    
         
            -
            #     },
         
     | 
| 
       520 
     | 
    
         
            -
            #     {
         
     | 
| 
       521 
     | 
    
         
            -
            #       "position": 8,
         
     | 
| 
       522 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       523 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       524 
     | 
    
         
            -
            #     }
         
     | 
| 
       525 
     | 
    
         
            -
            #   ]
         
     | 
| 
       526 
     | 
    
         
            -
            # ]
         
     | 
| 
       527 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       528 
     | 
    
         
            -
            </div>
         
     | 
| 
       529 
     | 
    
         
            -
            </div>
         
     | 
| 
       530 
     | 
    
         
            -
            <div class="section" id="tokenbigramsplitsymbolalphadigit">
         
     | 
| 
       531 
     | 
    
         
            -
            <span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       532 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは記号とアルファベットと数字の扱いです。 <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> は記号とアルファベット数字のトークナイズ方法にバイグラムを使います。つまり、すべての文字をバイグラムでトークナイズします。</p>
         
     | 
| 
       533 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       534 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbolAlphaDigit "100cents!!!" NormalizerAuto
         
     | 
| 
       535 
     | 
    
         
            -
            # [
         
     | 
| 
       536 
     | 
    
         
            -
            #   [
         
     | 
| 
       537 
     | 
    
         
            -
            #     0,
         
     | 
| 
       538 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       539 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       540 
     | 
    
         
            -
            #   ],
         
     | 
| 
       541 
     | 
    
         
            -
            #   [
         
     | 
| 
       542 
     | 
    
         
            -
            #     {
         
     | 
| 
       543 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       544 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       545 
     | 
    
         
            -
            #       "value": "10"
         
     | 
| 
       546 
     | 
    
         
            -
            #     },
         
     | 
| 
       547 
     | 
    
         
            -
            #     {
         
     | 
| 
       548 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       549 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       550 
     | 
    
         
            -
            #       "value": "00"
         
     | 
| 
       551 
     | 
    
         
            -
            #     },
         
     | 
| 
       552 
     | 
    
         
            -
            #     {
         
     | 
| 
       553 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       554 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       555 
     | 
    
         
            -
            #       "value": "0c"
         
     | 
| 
       556 
     | 
    
         
            -
            #     },
         
     | 
| 
       557 
     | 
    
         
            -
            #     {
         
     | 
| 
       558 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       559 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       560 
     | 
    
         
            -
            #       "value": "ce"
         
     | 
| 
       561 
     | 
    
         
            -
            #     },
         
     | 
| 
       562 
     | 
    
         
            -
            #     {
         
     | 
| 
       563 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       564 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       565 
     | 
    
         
            -
            #       "value": "en"
         
     | 
| 
       566 
     | 
    
         
            -
            #     },
         
     | 
| 
       567 
     | 
    
         
            -
            #     {
         
     | 
| 
       568 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       569 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       570 
     | 
    
         
            -
            #       "value": "nt"
         
     | 
| 
       571 
     | 
    
         
            -
            #     },
         
     | 
| 
       572 
     | 
    
         
            -
            #     {
         
     | 
| 
       573 
     | 
    
         
            -
            #       "position": 6,
         
     | 
| 
       574 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       575 
     | 
    
         
            -
            #       "value": "ts"
         
     | 
| 
       576 
     | 
    
         
            -
            #     },
         
     | 
| 
       577 
     | 
    
         
            -
            #     {
         
     | 
| 
       578 
     | 
    
         
            -
            #       "position": 7,
         
     | 
| 
       579 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       580 
     | 
    
         
            -
            #       "value": "s!"
         
     | 
| 
       581 
     | 
    
         
            -
            #     },
         
     | 
| 
       582 
     | 
    
         
            -
            #     {
         
     | 
| 
       583 
     | 
    
         
            -
            #       "position": 8,
         
     | 
| 
       584 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       585 
     | 
    
         
            -
            #       "value": "!!"
         
     | 
| 
       586 
     | 
    
         
            -
            #     },
         
     | 
| 
       587 
     | 
    
         
            -
            #     {
         
     | 
| 
       588 
     | 
    
         
            -
            #       "position": 9,
         
     | 
| 
       589 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       590 
     | 
    
         
            -
            #       "value": "!!"
         
     | 
| 
       591 
     | 
    
         
            -
            #     },
         
     | 
| 
       592 
     | 
    
         
            -
            #     {
         
     | 
| 
       593 
     | 
    
         
            -
            #       "position": 10,
         
     | 
| 
       594 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       595 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       596 
     | 
    
         
            -
            #     }
         
     | 
| 
       597 
     | 
    
         
            -
            #   ]
         
     | 
| 
       598 
     | 
    
         
            -
            # ]
         
     | 
| 
       599 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       600 
     | 
    
         
            -
            </div>
         
     | 
| 
       601 
     | 
    
         
            -
            </div>
         
     | 
| 
       602 
     | 
    
         
            -
            <div class="section" id="tokenbigramignoreblank">
         
     | 
| 
       603 
     | 
    
         
            -
            <span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code><a class="headerlink" href="#tokenbigramignoreblank" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       604 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは空白文字の扱いです。 <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> は連続する記号と非ASCII文字の間にある空白文字を無視します。</p>
         
     | 
| 
       605 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
         
     | 
| 
       606 
     | 
    
         
            -
            <p><a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> での実行結果です。</p>
         
     | 
| 
       607 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       608 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
       609 
     | 
    
         
            -
            # [
         
     | 
| 
       610 
     | 
    
         
            -
            #   [
         
     | 
| 
       611 
     | 
    
         
            -
            #     0,
         
     | 
| 
       612 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       613 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       614 
     | 
    
         
            -
            #   ],
         
     | 
| 
       615 
     | 
    
         
            -
            #   [
         
     | 
| 
       616 
     | 
    
         
            -
            #     {
         
     | 
| 
       617 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       618 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       619 
     | 
    
         
            -
            #       "value": "日"
         
     | 
| 
       620 
     | 
    
         
            -
            #     },
         
     | 
| 
       621 
     | 
    
         
            -
            #     {
         
     | 
| 
       622 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       623 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       624 
     | 
    
         
            -
            #       "value": "本"
         
     | 
| 
       625 
     | 
    
         
            -
            #     },
         
     | 
| 
       626 
     | 
    
         
            -
            #     {
         
     | 
| 
       627 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       628 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       629 
     | 
    
         
            -
            #       "value": "語"
         
     | 
| 
       630 
     | 
    
         
            -
            #     },
         
     | 
| 
       631 
     | 
    
         
            -
            #     {
         
     | 
| 
       632 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       633 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       634 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       635 
     | 
    
         
            -
            #     },
         
     | 
| 
       636 
     | 
    
         
            -
            #     {
         
     | 
| 
       637 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       638 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       639 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       640 
     | 
    
         
            -
            #     },
         
     | 
| 
       641 
     | 
    
         
            -
            #     {
         
     | 
| 
       642 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       643 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       644 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       645 
     | 
    
         
            -
            #     }
         
     | 
| 
       646 
     | 
    
         
            -
            #   ]
         
     | 
| 
       647 
     | 
    
         
            -
            # ]
         
     | 
| 
       648 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       649 
     | 
    
         
            -
            </div>
         
     | 
| 
       650 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> での実行結果です。</p>
         
     | 
| 
       651 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       652 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlank "日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
       653 
     | 
    
         
            -
            # [
         
     | 
| 
       654 
     | 
    
         
            -
            #   [
         
     | 
| 
       655 
     | 
    
         
            -
            #     0,
         
     | 
| 
       656 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       657 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       658 
     | 
    
         
            -
            #   ],
         
     | 
| 
       659 
     | 
    
         
            -
            #   [
         
     | 
| 
       660 
     | 
    
         
            -
            #     {
         
     | 
| 
       661 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       662 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       663 
     | 
    
         
            -
            #       "value": "日本"
         
     | 
| 
       664 
     | 
    
         
            -
            #     },
         
     | 
| 
       665 
     | 
    
         
            -
            #     {
         
     | 
| 
       666 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       667 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       668 
     | 
    
         
            -
            #       "value": "本語"
         
     | 
| 
       669 
     | 
    
         
            -
            #     },
         
     | 
| 
       670 
     | 
    
         
            -
            #     {
         
     | 
| 
       671 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       672 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       673 
     | 
    
         
            -
            #       "value": "語"
         
     | 
| 
       674 
     | 
    
         
            -
            #     },
         
     | 
| 
       675 
     | 
    
         
            -
            #     {
         
     | 
| 
       676 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       677 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       678 
     | 
    
         
            -
            #       "value": "!!!"
         
     | 
| 
       679 
     | 
    
         
            -
            #     }
         
     | 
| 
       680 
     | 
    
         
            -
            #   ]
         
     | 
| 
       681 
     | 
    
         
            -
            # ]
         
     | 
| 
       682 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       683 
     | 
    
         
            -
            </div>
         
     | 
| 
       684 
     | 
    
         
            -
            </div>
         
     | 
| 
       685 
     | 
    
         
            -
            <div class="section" id="tokenbigramignoreblanksplitsymbol">
         
     | 
| 
       686 
     | 
    
         
            -
            <span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       687 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは次の通りです。</p>
         
     | 
| 
       688 
     | 
    
         
            -
            <blockquote>
         
     | 
| 
       689 
     | 
    
         
            -
            <div><ul class="simple">
         
     | 
| 
       690 
     | 
    
         
            -
            <li>空白文字の扱い</li>
         
     | 
| 
       691 
     | 
    
         
            -
            <li>記号の扱い</li>
         
     | 
| 
       692 
     | 
    
         
            -
            </ul>
         
     | 
| 
       693 
     | 
    
         
            -
            </div></blockquote>
         
     | 
| 
       694 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
         
     | 
| 
       695 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> は記号をバイグラムでトークナイズします。</p>
         
     | 
| 
       696 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
         
     | 
| 
       697 
     | 
    
         
            -
            <p><a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> での実行結果です。</p>
         
     | 
| 
       698 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       699 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
       700 
     | 
    
         
            -
            # [
         
     | 
| 
       701 
     | 
    
         
            -
            #   [
         
     | 
| 
       702 
     | 
    
         
            -
            #     0,
         
     | 
| 
       703 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       704 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       705 
     | 
    
         
            -
            #   ],
         
     | 
| 
       706 
     | 
    
         
            -
            #   [
         
     | 
| 
       707 
     | 
    
         
            -
            #     {
         
     | 
| 
       708 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       709 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       710 
     | 
    
         
            -
            #       "value": "日"
         
     | 
| 
       711 
     | 
    
         
            -
            #     },
         
     | 
| 
       712 
     | 
    
         
            -
            #     {
         
     | 
| 
       713 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       714 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       715 
     | 
    
         
            -
            #       "value": "本"
         
     | 
| 
       716 
     | 
    
         
            -
            #     },
         
     | 
| 
       717 
     | 
    
         
            -
            #     {
         
     | 
| 
       718 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       719 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       720 
     | 
    
         
            -
            #       "value": "語"
         
     | 
| 
       721 
     | 
    
         
            -
            #     },
         
     | 
| 
       722 
     | 
    
         
            -
            #     {
         
     | 
| 
       723 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       724 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       725 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       726 
     | 
    
         
            -
            #     },
         
     | 
| 
       727 
     | 
    
         
            -
            #     {
         
     | 
| 
       728 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       729 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       730 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       731 
     | 
    
         
            -
            #     },
         
     | 
| 
       732 
     | 
    
         
            -
            #     {
         
     | 
| 
       733 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       734 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       735 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       736 
     | 
    
         
            -
            #     }
         
     | 
| 
       737 
     | 
    
         
            -
            #   ]
         
     | 
| 
       738 
     | 
    
         
            -
            # ]
         
     | 
| 
       739 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       740 
     | 
    
         
            -
            </div>
         
     | 
| 
       741 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> の実行結果です。</p>
         
     | 
| 
       742 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       743 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbol "日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
       744 
     | 
    
         
            -
            # [
         
     | 
| 
       745 
     | 
    
         
            -
            #   [
         
     | 
| 
       746 
     | 
    
         
            -
            #     0,
         
     | 
| 
       747 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       748 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       749 
     | 
    
         
            -
            #   ],
         
     | 
| 
       750 
     | 
    
         
            -
            #   [
         
     | 
| 
       751 
     | 
    
         
            -
            #     {
         
     | 
| 
       752 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       753 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       754 
     | 
    
         
            -
            #       "value": "日本"
         
     | 
| 
       755 
     | 
    
         
            -
            #     },
         
     | 
| 
       756 
     | 
    
         
            -
            #     {
         
     | 
| 
       757 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       758 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       759 
     | 
    
         
            -
            #       "value": "本語"
         
     | 
| 
       760 
     | 
    
         
            -
            #     },
         
     | 
| 
       761 
     | 
    
         
            -
            #     {
         
     | 
| 
       762 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       763 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       764 
     | 
    
         
            -
            #       "value": "語!"
         
     | 
| 
       765 
     | 
    
         
            -
            #     },
         
     | 
| 
       766 
     | 
    
         
            -
            #     {
         
     | 
| 
       767 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       768 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       769 
     | 
    
         
            -
            #       "value": "!!"
         
     | 
| 
       770 
     | 
    
         
            -
            #     },
         
     | 
| 
       771 
     | 
    
         
            -
            #     {
         
     | 
| 
       772 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       773 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       774 
     | 
    
         
            -
            #       "value": "!!"
         
     | 
| 
       775 
     | 
    
         
            -
            #     },
         
     | 
| 
       776 
     | 
    
         
            -
            #     {
         
     | 
| 
       777 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       778 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       779 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       780 
     | 
    
         
            -
            #     }
         
     | 
| 
       781 
     | 
    
         
            -
            #   ]
         
     | 
| 
       782 
     | 
    
         
            -
            # ]
         
     | 
| 
       783 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       784 
     | 
    
         
            -
            </div>
         
     | 
| 
       785 
     | 
    
         
            -
            </div>
         
     | 
| 
       786 
     | 
    
         
            -
            <div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
         
     | 
| 
       787 
     | 
    
         
            -
            <span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       788 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは次の通りです。</p>
         
     | 
| 
       789 
     | 
    
         
            -
            <blockquote>
         
     | 
| 
       790 
     | 
    
         
            -
            <div><ul class="simple">
         
     | 
| 
       791 
     | 
    
         
            -
            <li>空白文字の扱い</li>
         
     | 
| 
       792 
     | 
    
         
            -
            <li>記号とアルファベットの扱い</li>
         
     | 
| 
       793 
     | 
    
         
            -
            </ul>
         
     | 
| 
       794 
     | 
    
         
            -
            </div></blockquote>
         
     | 
| 
       795 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
         
     | 
| 
       796 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> は記号とアルファベットをバイグラムでトークナイズします。</p>
         
     | 
| 
       797 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> というテキストを使うと違いがわかります。なぜなら空白文字入りの記号と非ASCII文字だけでなく、アルファベットも含んでいるからです。</p>
         
     | 
| 
       798 
     | 
    
         
            -
            <p><a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> での実行結果です。</p>
         
     | 
| 
       799 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       800 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "Hello 日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
       801 
     | 
    
         
            -
            # [
         
     | 
| 
       802 
     | 
    
         
            -
            #   [
         
     | 
| 
       803 
     | 
    
         
            -
            #     0,
         
     | 
| 
       804 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       805 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       806 
     | 
    
         
            -
            #   ],
         
     | 
| 
       807 
     | 
    
         
            -
            #   [
         
     | 
| 
       808 
     | 
    
         
            -
            #     {
         
     | 
| 
       809 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       810 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       811 
     | 
    
         
            -
            #       "value": "hello"
         
     | 
| 
       812 
     | 
    
         
            -
            #     },
         
     | 
| 
       813 
     | 
    
         
            -
            #     {
         
     | 
| 
       814 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       815 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       816 
     | 
    
         
            -
            #       "value": "日"
         
     | 
| 
       817 
     | 
    
         
            -
            #     },
         
     | 
| 
       818 
     | 
    
         
            -
            #     {
         
     | 
| 
       819 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       820 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       821 
     | 
    
         
            -
            #       "value": "本"
         
     | 
| 
       822 
     | 
    
         
            -
            #     },
         
     | 
| 
       823 
     | 
    
         
            -
            #     {
         
     | 
| 
       824 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       825 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       826 
     | 
    
         
            -
            #       "value": "語"
         
     | 
| 
       827 
     | 
    
         
            -
            #     },
         
     | 
| 
       828 
     | 
    
         
            -
            #     {
         
     | 
| 
       829 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       830 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       831 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       832 
     | 
    
         
            -
            #     },
         
     | 
| 
       833 
     | 
    
         
            -
            #     {
         
     | 
| 
       834 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       835 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       836 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       837 
     | 
    
         
            -
            #     },
         
     | 
| 
       838 
     | 
    
         
            -
            #     {
         
     | 
| 
       839 
     | 
    
         
            -
            #       "position": 6,
         
     | 
| 
       840 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       841 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       842 
     | 
    
         
            -
            #     }
         
     | 
| 
       843 
     | 
    
         
            -
            #   ]
         
     | 
| 
       844 
     | 
    
         
            -
            # ]
         
     | 
| 
       845 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       846 
     | 
    
         
            -
            </div>
         
     | 
| 
       847 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> の実行結果です。</p>
         
     | 
| 
       848 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       849 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha "Hello 日 本 語 ! ! !" NormalizerAuto
         
     | 
| 
       850 
     | 
    
         
            -
            # [
         
     | 
| 
       851 
     | 
    
         
            -
            #   [
         
     | 
| 
       852 
     | 
    
         
            -
            #     0,
         
     | 
| 
       853 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       854 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       855 
     | 
    
         
            -
            #   ],
         
     | 
| 
       856 
     | 
    
         
            -
            #   [
         
     | 
| 
       857 
     | 
    
         
            -
            #     {
         
     | 
| 
       858 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       859 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       860 
     | 
    
         
            -
            #       "value": "he"
         
     | 
| 
       861 
     | 
    
         
            -
            #     },
         
     | 
| 
       862 
     | 
    
         
            -
            #     {
         
     | 
| 
       863 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       864 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       865 
     | 
    
         
            -
            #       "value": "el"
         
     | 
| 
       866 
     | 
    
         
            -
            #     },
         
     | 
| 
       867 
     | 
    
         
            -
            #     {
         
     | 
| 
       868 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       869 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       870 
     | 
    
         
            -
            #       "value": "ll"
         
     | 
| 
       871 
     | 
    
         
            -
            #     },
         
     | 
| 
       872 
     | 
    
         
            -
            #     {
         
     | 
| 
       873 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       874 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       875 
     | 
    
         
            -
            #       "value": "lo"
         
     | 
| 
       876 
     | 
    
         
            -
            #     },
         
     | 
| 
       877 
     | 
    
         
            -
            #     {
         
     | 
| 
       878 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       879 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       880 
     | 
    
         
            -
            #       "value": "o日"
         
     | 
| 
       881 
     | 
    
         
            -
            #     },
         
     | 
| 
       882 
     | 
    
         
            -
            #     {
         
     | 
| 
       883 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       884 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       885 
     | 
    
         
            -
            #       "value": "日本"
         
     | 
| 
       886 
     | 
    
         
            -
            #     },
         
     | 
| 
       887 
     | 
    
         
            -
            #     {
         
     | 
| 
       888 
     | 
    
         
            -
            #       "position": 6,
         
     | 
| 
       889 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       890 
     | 
    
         
            -
            #       "value": "本語"
         
     | 
| 
       891 
     | 
    
         
            -
            #     },
         
     | 
| 
       892 
     | 
    
         
            -
            #     {
         
     | 
| 
       893 
     | 
    
         
            -
            #       "position": 7,
         
     | 
| 
       894 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       895 
     | 
    
         
            -
            #       "value": "語!"
         
     | 
| 
       896 
     | 
    
         
            -
            #     },
         
     | 
| 
       897 
     | 
    
         
            -
            #     {
         
     | 
| 
       898 
     | 
    
         
            -
            #       "position": 8,
         
     | 
| 
       899 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       900 
     | 
    
         
            -
            #       "value": "!!"
         
     | 
| 
       901 
     | 
    
         
            -
            #     },
         
     | 
| 
       902 
     | 
    
         
            -
            #     {
         
     | 
| 
       903 
     | 
    
         
            -
            #       "position": 9,
         
     | 
| 
       904 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       905 
     | 
    
         
            -
            #       "value": "!!"
         
     | 
| 
       906 
     | 
    
         
            -
            #     },
         
     | 
| 
       907 
     | 
    
         
            -
            #     {
         
     | 
| 
       908 
     | 
    
         
            -
            #       "position": 10,
         
     | 
| 
       909 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       910 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       911 
     | 
    
         
            -
            #     }
         
     | 
| 
       912 
     | 
    
         
            -
            #   ]
         
     | 
| 
       913 
     | 
    
         
            -
            # ]
         
     | 
| 
       914 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       915 
     | 
    
         
            -
            </div>
         
     | 
| 
       916 
     | 
    
         
            -
            </div>
         
     | 
| 
       917 
     | 
    
         
            -
            <div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
         
     | 
| 
       918 
     | 
    
         
            -
            <span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       919 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは次の通りです。</p>
         
     | 
| 
       920 
     | 
    
         
            -
            <blockquote>
         
     | 
| 
       921 
     | 
    
         
            -
            <div><ul class="simple">
         
     | 
| 
       922 
     | 
    
         
            -
            <li>空白文字の扱い</li>
         
     | 
| 
       923 
     | 
    
         
            -
            <li>記号とアルファベットと数字の扱い</li>
         
     | 
| 
      
 66 
     | 
    
         
            +
            <div class="toctree-wrapper compound">
         
     | 
| 
      
 67 
     | 
    
         
            +
            <ul>
         
     | 
| 
      
 68 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizer/summary.html">7.8.1. 概要</a></li>
         
     | 
| 
      
 69 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram.html">7.8.2. <code class="docutils literal notranslate"><span class="pre">TokenBigram</span></code></a></li>
         
     | 
| 
      
 70 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank.html">7.8.3. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
         
     | 
| 
      
 71 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol.html">7.8.4. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
         
     | 
| 
      
 72 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html">7.8.5. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
         
     | 
| 
      
 73 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html">7.8.6. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
         
     | 
| 
      
 74 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol.html">7.8.7. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
         
     | 
| 
      
 75 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol_alpha.html">7.8.8. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
         
     | 
| 
      
 76 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol_alpha_digit.html">7.8.9. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
         
     | 
| 
      
 77 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_delimit.html">7.8.10. <code class="docutils literal notranslate"><span class="pre">TokenDelimit</span></code></a></li>
         
     | 
| 
      
 78 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_delimit_null.html">7.8.11. <code class="docutils literal notranslate"><span class="pre">TokenDelimitNull</span></code></a></li>
         
     | 
| 
      
 79 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_mecab.html">7.8.12. <code class="docutils literal notranslate"><span class="pre">TokenMecab</span></code></a></li>
         
     | 
| 
      
 80 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_regexp.html">7.8.13. <code class="docutils literal notranslate"><span class="pre">TokenRegexp</span></code></a></li>
         
     | 
| 
      
 81 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_trigram.html">7.8.14. <code class="docutils literal notranslate"><span class="pre">TokenTrigram</span></code></a></li>
         
     | 
| 
      
 82 
     | 
    
         
            +
            <li class="toctree-l1"><a class="reference internal" href="tokenizers/token_unigram.html">7.8.15. <code class="docutils literal notranslate"><span class="pre">TokenUnigram</span></code></a></li>
         
     | 
| 
       924 
83 
     | 
    
         
             
            </ul>
         
     | 
| 
       925 
     | 
    
         
            -
            </div></blockquote>
         
     | 
| 
       926 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
         
     | 
| 
       927 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> は記号、アルファベット、数字をバイグラムでトークナイズします。つまり、すべての文字をバイグラムでトークナイズします。</p>
         
     | 
| 
       928 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></code> というテキストを使うと違いがわかります。なぜなら、このテキストは空白文字入りの記号と非ASCII文字だけでなく、アルファベットと数字も含んでいるからです。</p>
         
     | 
| 
       929 
     | 
    
         
            -
            <p><a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> での実行結果です。</p>
         
     | 
| 
       930 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       931 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "Hello 日 本 語 ! ! ! 777" NormalizerAuto
         
     | 
| 
       932 
     | 
    
         
            -
            # [
         
     | 
| 
       933 
     | 
    
         
            -
            #   [
         
     | 
| 
       934 
     | 
    
         
            -
            #     0,
         
     | 
| 
       935 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       936 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       937 
     | 
    
         
            -
            #   ],
         
     | 
| 
       938 
     | 
    
         
            -
            #   [
         
     | 
| 
       939 
     | 
    
         
            -
            #     {
         
     | 
| 
       940 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       941 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       942 
     | 
    
         
            -
            #       "value": "hello"
         
     | 
| 
       943 
     | 
    
         
            -
            #     },
         
     | 
| 
       944 
     | 
    
         
            -
            #     {
         
     | 
| 
       945 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       946 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       947 
     | 
    
         
            -
            #       "value": "日"
         
     | 
| 
       948 
     | 
    
         
            -
            #     },
         
     | 
| 
       949 
     | 
    
         
            -
            #     {
         
     | 
| 
       950 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       951 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       952 
     | 
    
         
            -
            #       "value": "本"
         
     | 
| 
       953 
     | 
    
         
            -
            #     },
         
     | 
| 
       954 
     | 
    
         
            -
            #     {
         
     | 
| 
       955 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       956 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       957 
     | 
    
         
            -
            #       "value": "語"
         
     | 
| 
       958 
     | 
    
         
            -
            #     },
         
     | 
| 
       959 
     | 
    
         
            -
            #     {
         
     | 
| 
       960 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       961 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       962 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       963 
     | 
    
         
            -
            #     },
         
     | 
| 
       964 
     | 
    
         
            -
            #     {
         
     | 
| 
       965 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       966 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       967 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       968 
     | 
    
         
            -
            #     },
         
     | 
| 
       969 
     | 
    
         
            -
            #     {
         
     | 
| 
       970 
     | 
    
         
            -
            #       "position": 6,
         
     | 
| 
       971 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       972 
     | 
    
         
            -
            #       "value": "!"
         
     | 
| 
       973 
     | 
    
         
            -
            #     },
         
     | 
| 
       974 
     | 
    
         
            -
            #     {
         
     | 
| 
       975 
     | 
    
         
            -
            #       "position": 7,
         
     | 
| 
       976 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       977 
     | 
    
         
            -
            #       "value": "777"
         
     | 
| 
       978 
     | 
    
         
            -
            #     }
         
     | 
| 
       979 
     | 
    
         
            -
            #   ]
         
     | 
| 
       980 
     | 
    
         
            -
            # ]
         
     | 
| 
       981 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       982 
     | 
    
         
            -
            </div>
         
     | 
| 
       983 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> の実行結果です。</p>
         
     | 
| 
       984 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       985 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit "Hello 日 本 語 ! ! ! 777" NormalizerAuto
         
     | 
| 
       986 
     | 
    
         
            -
            # [
         
     | 
| 
       987 
     | 
    
         
            -
            #   [
         
     | 
| 
       988 
     | 
    
         
            -
            #     0,
         
     | 
| 
       989 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       990 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       991 
     | 
    
         
            -
            #   ],
         
     | 
| 
       992 
     | 
    
         
            -
            #   [
         
     | 
| 
       993 
     | 
    
         
            -
            #     {
         
     | 
| 
       994 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       995 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       996 
     | 
    
         
            -
            #       "value": "he"
         
     | 
| 
       997 
     | 
    
         
            -
            #     },
         
     | 
| 
       998 
     | 
    
         
            -
            #     {
         
     | 
| 
       999 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       1000 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1001 
     | 
    
         
            -
            #       "value": "el"
         
     | 
| 
       1002 
     | 
    
         
            -
            #     },
         
     | 
| 
       1003 
     | 
    
         
            -
            #     {
         
     | 
| 
       1004 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       1005 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1006 
     | 
    
         
            -
            #       "value": "ll"
         
     | 
| 
       1007 
     | 
    
         
            -
            #     },
         
     | 
| 
       1008 
     | 
    
         
            -
            #     {
         
     | 
| 
       1009 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       1010 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1011 
     | 
    
         
            -
            #       "value": "lo"
         
     | 
| 
       1012 
     | 
    
         
            -
            #     },
         
     | 
| 
       1013 
     | 
    
         
            -
            #     {
         
     | 
| 
       1014 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       1015 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1016 
     | 
    
         
            -
            #       "value": "o日"
         
     | 
| 
       1017 
     | 
    
         
            -
            #     },
         
     | 
| 
       1018 
     | 
    
         
            -
            #     {
         
     | 
| 
       1019 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       1020 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1021 
     | 
    
         
            -
            #       "value": "日本"
         
     | 
| 
       1022 
     | 
    
         
            -
            #     },
         
     | 
| 
       1023 
     | 
    
         
            -
            #     {
         
     | 
| 
       1024 
     | 
    
         
            -
            #       "position": 6,
         
     | 
| 
       1025 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1026 
     | 
    
         
            -
            #       "value": "本語"
         
     | 
| 
       1027 
     | 
    
         
            -
            #     },
         
     | 
| 
       1028 
     | 
    
         
            -
            #     {
         
     | 
| 
       1029 
     | 
    
         
            -
            #       "position": 7,
         
     | 
| 
       1030 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1031 
     | 
    
         
            -
            #       "value": "語!"
         
     | 
| 
       1032 
     | 
    
         
            -
            #     },
         
     | 
| 
       1033 
     | 
    
         
            -
            #     {
         
     | 
| 
       1034 
     | 
    
         
            -
            #       "position": 8,
         
     | 
| 
       1035 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1036 
     | 
    
         
            -
            #       "value": "!!"
         
     | 
| 
       1037 
     | 
    
         
            -
            #     },
         
     | 
| 
       1038 
     | 
    
         
            -
            #     {
         
     | 
| 
       1039 
     | 
    
         
            -
            #       "position": 9,
         
     | 
| 
       1040 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1041 
     | 
    
         
            -
            #       "value": "!!"
         
     | 
| 
       1042 
     | 
    
         
            -
            #     },
         
     | 
| 
       1043 
     | 
    
         
            -
            #     {
         
     | 
| 
       1044 
     | 
    
         
            -
            #       "position": 10,
         
     | 
| 
       1045 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1046 
     | 
    
         
            -
            #       "value": "!7"
         
     | 
| 
       1047 
     | 
    
         
            -
            #     },
         
     | 
| 
       1048 
     | 
    
         
            -
            #     {
         
     | 
| 
       1049 
     | 
    
         
            -
            #       "position": 11,
         
     | 
| 
       1050 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1051 
     | 
    
         
            -
            #       "value": "77"
         
     | 
| 
       1052 
     | 
    
         
            -
            #     },
         
     | 
| 
       1053 
     | 
    
         
            -
            #     {
         
     | 
| 
       1054 
     | 
    
         
            -
            #       "position": 12,
         
     | 
| 
       1055 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1056 
     | 
    
         
            -
            #       "value": "77"
         
     | 
| 
       1057 
     | 
    
         
            -
            #     },
         
     | 
| 
       1058 
     | 
    
         
            -
            #     {
         
     | 
| 
       1059 
     | 
    
         
            -
            #       "position": 13,
         
     | 
| 
       1060 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1061 
     | 
    
         
            -
            #       "value": "7"
         
     | 
| 
       1062 
     | 
    
         
            -
            #     }
         
     | 
| 
       1063 
     | 
    
         
            -
            #   ]
         
     | 
| 
       1064 
     | 
    
         
            -
            # ]
         
     | 
| 
       1065 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       1066 
     | 
    
         
            -
            </div>
         
     | 
| 
       1067 
     | 
    
         
            -
            </div>
         
     | 
| 
       1068 
     | 
    
         
            -
            <div class="section" id="tokenunigram">
         
     | 
| 
       1069 
     | 
    
         
            -
            <span id="token-unigram"></span><h3>7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code><a class="headerlink" href="#tokenunigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       1070 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenUnigram</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> は各トークンが2文字ですが、 <code class="docutils literal"><span class="pre">TokenUnigram</span></code> は各トークンが1文字です。</p>
         
     | 
| 
       1071 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       1072 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenUnigram "100cents!!!" NormalizerAuto
         
     | 
| 
       1073 
     | 
    
         
            -
            # [
         
     | 
| 
       1074 
     | 
    
         
            -
            #   [
         
     | 
| 
       1075 
     | 
    
         
            -
            #     0,
         
     | 
| 
       1076 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       1077 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       1078 
     | 
    
         
            -
            #   ],
         
     | 
| 
       1079 
     | 
    
         
            -
            #   [
         
     | 
| 
       1080 
     | 
    
         
            -
            #     {
         
     | 
| 
       1081 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       1082 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1083 
     | 
    
         
            -
            #       "value": "100"
         
     | 
| 
       1084 
     | 
    
         
            -
            #     },
         
     | 
| 
       1085 
     | 
    
         
            -
            #     {
         
     | 
| 
       1086 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       1087 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1088 
     | 
    
         
            -
            #       "value": "cents"
         
     | 
| 
       1089 
     | 
    
         
            -
            #     },
         
     | 
| 
       1090 
     | 
    
         
            -
            #     {
         
     | 
| 
       1091 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       1092 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1093 
     | 
    
         
            -
            #       "value": "!!!"
         
     | 
| 
       1094 
     | 
    
         
            -
            #     }
         
     | 
| 
       1095 
     | 
    
         
            -
            #   ]
         
     | 
| 
       1096 
     | 
    
         
            -
            # ]
         
     | 
| 
       1097 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       1098 
     | 
    
         
            -
            </div>
         
     | 
| 
       1099 
     | 
    
         
            -
            </div>
         
     | 
| 
       1100 
     | 
    
         
            -
            <div class="section" id="tokentrigram">
         
     | 
| 
       1101 
     | 
    
         
            -
            <span id="token-trigram"></span><h3>7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code><a class="headerlink" href="#tokentrigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       1102 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenTrigram</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> は各トークンが2文字ですが、 <code class="docutils literal"><span class="pre">TokenTrigram</span></code> は各トークンが3文字です。</p>
         
     | 
| 
       1103 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       1104 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenTrigram "10000cents!!!!!" NormalizerAuto
         
     | 
| 
       1105 
     | 
    
         
            -
            # [
         
     | 
| 
       1106 
     | 
    
         
            -
            #   [
         
     | 
| 
       1107 
     | 
    
         
            -
            #     0,
         
     | 
| 
       1108 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       1109 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       1110 
     | 
    
         
            -
            #   ],
         
     | 
| 
       1111 
     | 
    
         
            -
            #   [
         
     | 
| 
       1112 
     | 
    
         
            -
            #     {
         
     | 
| 
       1113 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       1114 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1115 
     | 
    
         
            -
            #       "value": "10000"
         
     | 
| 
       1116 
     | 
    
         
            -
            #     },
         
     | 
| 
       1117 
     | 
    
         
            -
            #     {
         
     | 
| 
       1118 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       1119 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1120 
     | 
    
         
            -
            #       "value": "cents"
         
     | 
| 
       1121 
     | 
    
         
            -
            #     },
         
     | 
| 
       1122 
     | 
    
         
            -
            #     {
         
     | 
| 
       1123 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       1124 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1125 
     | 
    
         
            -
            #       "value": "!!!!!"
         
     | 
| 
       1126 
     | 
    
         
            -
            #     }
         
     | 
| 
       1127 
     | 
    
         
            -
            #   ]
         
     | 
| 
       1128 
     | 
    
         
            -
            # ]
         
     | 
| 
       1129 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       1130 
     | 
    
         
            -
            </div>
         
     | 
| 
       1131 
     | 
    
         
            -
            </div>
         
     | 
| 
       1132 
     | 
    
         
            -
            <div class="section" id="tokendelimit">
         
     | 
| 
       1133 
     | 
    
         
            -
            <span id="token-delimit"></span><h3>7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code><a class="headerlink" href="#tokendelimit" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       1134 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> は1つ以上の空白文字( <code class="docutils literal"><span class="pre">U+0020</span></code> )で分割してトークンを抽出します。たとえば、 <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は <code class="docutils literal"><span class="pre">Hello</span></code> と <code class="docutils literal"><span class="pre">World</span></code> にトークナイズされます。</p>
         
     | 
| 
       1135 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> はタグテキストに適切です。 <code class="docutils literal"><span class="pre">groonga</span> <span class="pre">full-text-search</span> <span class="pre">http</span></code> というテキストから <code class="docutils literal"><span class="pre">groonga</span></code> 、 <code class="docutils literal"><span class="pre">full-text-search</span></code> 、 <code class="docutils literal"><span class="pre">http</span></code> を抽出します。</p>
         
     | 
| 
       1136 
     | 
    
         
            -
            <p>以下は <code class="docutils literal"><span class="pre">TokenDelimit</span></code> の例です。</p>
         
     | 
| 
       1137 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       1138 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenDelimit "Groonga full-text-search HTTP" NormalizerAuto
         
     | 
| 
       1139 
     | 
    
         
            -
            # [
         
     | 
| 
       1140 
     | 
    
         
            -
            #   [
         
     | 
| 
       1141 
     | 
    
         
            -
            #     0,
         
     | 
| 
       1142 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       1143 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       1144 
     | 
    
         
            -
            #   ],
         
     | 
| 
       1145 
     | 
    
         
            -
            #   [
         
     | 
| 
       1146 
     | 
    
         
            -
            #     {
         
     | 
| 
       1147 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       1148 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1149 
     | 
    
         
            -
            #       "value": "groonga"
         
     | 
| 
       1150 
     | 
    
         
            -
            #     },
         
     | 
| 
       1151 
     | 
    
         
            -
            #     {
         
     | 
| 
       1152 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       1153 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1154 
     | 
    
         
            -
            #       "value": "full-text-search"
         
     | 
| 
       1155 
     | 
    
         
            -
            #     },
         
     | 
| 
       1156 
     | 
    
         
            -
            #     {
         
     | 
| 
       1157 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       1158 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1159 
     | 
    
         
            -
            #       "value": "http"
         
     | 
| 
       1160 
     | 
    
         
            -
            #     }
         
     | 
| 
       1161 
     | 
    
         
            -
            #   ]
         
     | 
| 
       1162 
     | 
    
         
            -
            # ]
         
     | 
| 
       1163 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       1164 
     | 
    
         
            -
            </div>
         
     | 
| 
       1165 
     | 
    
         
            -
            </div>
         
     | 
| 
       1166 
     | 
    
         
            -
            <div class="section" id="tokendelimitnull">
         
     | 
| 
       1167 
     | 
    
         
            -
            <span id="token-delimit-null"></span><h3>7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code><a class="headerlink" href="#tokendelimitnull" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       1168 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> は <a class="reference internal" href="#token-delimit"><span class="std std-ref">TokenDelimit</span></a> に似ています。違いは区切り文字です。 <a class="reference internal" href="#token-delimit"><span class="std std-ref">TokenDelimit</span></a> は空白文字( <code class="docutils literal"><span class="pre">U+0020</span></code> )を使いますが、 <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> はNUL文字( <code class="docutils literal"><span class="pre">U+0000</span></code> )を使います。</p>
         
     | 
| 
       1169 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> もタグテキストに適切です。</p>
         
     | 
| 
       1170 
     | 
    
         
            -
            <p>以下は <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> の例です。</p>
         
     | 
| 
       1171 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       1172 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenDelimitNull "Groonga\u0000full-text-search\u0000HTTP" NormalizerAuto
         
     | 
| 
       1173 
     | 
    
         
            -
            # [
         
     | 
| 
       1174 
     | 
    
         
            -
            #   [
         
     | 
| 
       1175 
     | 
    
         
            -
            #     0,
         
     | 
| 
       1176 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       1177 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       1178 
     | 
    
         
            -
            #   ],
         
     | 
| 
       1179 
     | 
    
         
            -
            #   [
         
     | 
| 
       1180 
     | 
    
         
            -
            #     {
         
     | 
| 
       1181 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       1182 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1183 
     | 
    
         
            -
            #       "value": "groongau0000full-text-searchu0000http"
         
     | 
| 
       1184 
     | 
    
         
            -
            #     }
         
     | 
| 
       1185 
     | 
    
         
            -
            #   ]
         
     | 
| 
       1186 
     | 
    
         
            -
            # ]
         
     | 
| 
       1187 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       1188 
     | 
    
         
            -
            </div>
         
     | 
| 
       1189 
     | 
    
         
            -
            </div>
         
     | 
| 
       1190 
     | 
    
         
            -
            <div class="section" id="tokenmecab">
         
     | 
| 
       1191 
     | 
    
         
            -
            <span id="token-mecab"></span><h3>7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code><a class="headerlink" href="#tokenmecab" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       1192 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenMecab</span></code> は <a class="reference external" href="https://taku910.github.io/mecab/">MeCab</a> 形態素解析器をベースにしたトークナイザーです。</p>
         
     | 
| 
       1193 
     | 
    
         
            -
            <p>MeCabは日本語に依存していません。その言語用の辞書を用意すれば日本語以外でもMeCabを使えます。日本語用の辞書には <a class="reference external" href="http://sourceforge.jp/projects/naist-jdic/">NAIST Japanese Dictionary</a> を使えます。</p>
         
     | 
| 
       1194 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenMecab</span></code> は再現率より適合率に優れています。 <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> では <code class="docutils literal"><span class="pre">京都</span></code> というクエリーで <code class="docutils literal"><span class="pre">東京都</span></code> も <code class="docutils literal"><span class="pre">京都</span></code> も見つかりますが、この場合は <code class="docutils literal"><span class="pre">東京都</span></code> は期待した結果ではありません。 <code class="docutils literal"><span class="pre">TokenMecab</span></code> を使うと <code class="docutils literal"><span class="pre">京都</span></code> というクエリーで <code class="docutils literal"><span class="pre">京都</span></code> だけを見つけられます。</p>
         
     | 
| 
       1195 
     | 
    
         
            -
            <p>新語をサポートしたい場合は、MeCabの辞書を更新し続ける必要があります。これはメンテナンスコストがかかります。( <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> には辞書のメンテナンスコストはありません。なぜなら、 <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> は辞書を使っていないからです。)新語への対応に <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> が役に立つかもしれません。</p>
         
     | 
| 
       1196 
     | 
    
         
            -
            <p>以下は <code class="docutils literal"><span class="pre">TokenMeCab</span></code> の例です。 <code class="docutils literal"><span class="pre">東京都</span></code> は <code class="docutils literal"><span class="pre">東京</span></code> と <code class="docutils literal"><span class="pre">都</span></code> にトークナイズされています。 <code class="docutils literal"><span class="pre">京都</span></code> というトークンはありません。</p>
         
     | 
| 
       1197 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       1198 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenMecab "東京都"
         
     | 
| 
       1199 
     | 
    
         
            -
            # [
         
     | 
| 
       1200 
     | 
    
         
            -
            #   [
         
     | 
| 
       1201 
     | 
    
         
            -
            #     -22,
         
     | 
| 
       1202 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       1203 
     | 
    
         
            -
            #     0.000355720520019531,
         
     | 
| 
       1204 
     | 
    
         
            -
            #     "[tokenize] nonexistent tokenizer: <TokenMecab>",
         
     | 
| 
       1205 
     | 
    
         
            -
            #     [
         
     | 
| 
       1206 
     | 
    
         
            -
            #       [
         
     | 
| 
       1207 
     | 
    
         
            -
            #         "create_lexicon_for_tokenize",
         
     | 
| 
       1208 
     | 
    
         
            -
            #         "proc_tokenize.c",
         
     | 
| 
       1209 
     | 
    
         
            -
            #         139
         
     | 
| 
       1210 
     | 
    
         
            -
            #       ]
         
     | 
| 
       1211 
     | 
    
         
            -
            #     ]
         
     | 
| 
       1212 
     | 
    
         
            -
            #   ]
         
     | 
| 
       1213 
     | 
    
         
            -
            # ]
         
     | 
| 
       1214 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       1215 
     | 
    
         
            -
            </div>
         
     | 
| 
       1216 
     | 
    
         
            -
            </div>
         
     | 
| 
       1217 
     | 
    
         
            -
            <div class="section" id="tokenregexp">
         
     | 
| 
       1218 
     | 
    
         
            -
            <span id="token-regexp"></span><h3>7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code><a class="headerlink" href="#tokenregexp" title="このヘッドラインへのパーマリンク">¶</a></h3>
         
     | 
| 
       1219 
     | 
    
         
            -
            <div class="versionadded">
         
     | 
| 
       1220 
     | 
    
         
            -
            <p><span class="versionmodified">バージョン 5.0.1 で追加.</span></p>
         
     | 
| 
       1221 
     | 
    
         
            -
            </div>
         
     | 
| 
       1222 
     | 
    
         
            -
            <div class="admonition caution">
         
     | 
| 
       1223 
     | 
    
         
            -
            <p class="first admonition-title">ご用心</p>
         
     | 
| 
       1224 
     | 
    
         
            -
            <p class="last">このトークナイザーは実験的です。仕様が変わる可能性があります。</p>
         
     | 
| 
       1225 
     | 
    
         
            -
            </div>
         
     | 
| 
       1226 
     | 
    
         
            -
            <div class="admonition caution">
         
     | 
| 
       1227 
     | 
    
         
            -
            <p class="first admonition-title">ご用心</p>
         
     | 
| 
       1228 
     | 
    
         
            -
            <p class="last">このトークナイザーはUTF-8でしか使えません。EUC-JPやShift_JISなどと一緒には使えません。</p>
         
     | 
| 
       1229 
     | 
    
         
            -
            </div>
         
     | 
| 
       1230 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> はインデックスを使った正規表現検索をサポートするトークナイザーです。</p>
         
     | 
| 
       1231 
     | 
    
         
            -
            <p>一般的に、正規表現検索は逐次検索で実行します。しかし、次のケースはインデックスを使って検索できます。</p>
         
     | 
| 
       1232 
     | 
    
         
            -
            <blockquote>
         
     | 
| 
       1233 
     | 
    
         
            -
            <div><ul class="simple">
         
     | 
| 
       1234 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">hello</span></code> のようにリテラルしかないケース</li>
         
     | 
| 
       1235 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">\A/home/alice</span></code> のようにテキストの最初でのマッチとリテラルのみのケース</li>
         
     | 
| 
       1236 
     | 
    
         
            -
            <li><code class="docutils literal"><span class="pre">\.txt\z</span></code> のようにテキストの最後でのマッチとリテラルのみのケース</li>
         
     | 
| 
       1237 
     | 
    
         
            -
            </ul>
         
     | 
| 
       1238 
     | 
    
         
            -
            </div></blockquote>
         
     | 
| 
       1239 
     | 
    
         
            -
            <p>多くのケースでは、逐次検索よりもインデックスを使った検索の方が高速です。</p>
         
     | 
| 
       1240 
     | 
    
         
            -
            <p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> はベースはバイグラムを使います。 <code class="docutils literal"><span class="pre">TokenRegexp</span></code> は、インデックス時に、テキストの先頭にテキストの先頭であるというマーク( <code class="docutils literal"><span class="pre">U+FFEF</span></code> )を入れ、テキストの最後にテキストの最後であるというマーク( <code class="docutils literal"><span class="pre">U+FFF0</span></code> )を入れます。</p>
         
     | 
| 
       1241 
     | 
    
         
            -
            <p>実行例:</p>
         
     | 
| 
       1242 
     | 
    
         
            -
            <div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenRegexp "/home/alice/test.txt" NormalizerAuto --mode ADD
         
     | 
| 
       1243 
     | 
    
         
            -
            # [
         
     | 
| 
       1244 
     | 
    
         
            -
            #   [
         
     | 
| 
       1245 
     | 
    
         
            -
            #     0,
         
     | 
| 
       1246 
     | 
    
         
            -
            #     1337566253.89858,
         
     | 
| 
       1247 
     | 
    
         
            -
            #     0.000355720520019531
         
     | 
| 
       1248 
     | 
    
         
            -
            #   ],
         
     | 
| 
       1249 
     | 
    
         
            -
            #   [
         
     | 
| 
       1250 
     | 
    
         
            -
            #     {
         
     | 
| 
       1251 
     | 
    
         
            -
            #       "position": 0,
         
     | 
| 
       1252 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1253 
     | 
    
         
            -
            #       "value": ""
         
     | 
| 
       1254 
     | 
    
         
            -
            #     },
         
     | 
| 
       1255 
     | 
    
         
            -
            #     {
         
     | 
| 
       1256 
     | 
    
         
            -
            #       "position": 1,
         
     | 
| 
       1257 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1258 
     | 
    
         
            -
            #       "value": "/h"
         
     | 
| 
       1259 
     | 
    
         
            -
            #     },
         
     | 
| 
       1260 
     | 
    
         
            -
            #     {
         
     | 
| 
       1261 
     | 
    
         
            -
            #       "position": 2,
         
     | 
| 
       1262 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1263 
     | 
    
         
            -
            #       "value": "ho"
         
     | 
| 
       1264 
     | 
    
         
            -
            #     },
         
     | 
| 
       1265 
     | 
    
         
            -
            #     {
         
     | 
| 
       1266 
     | 
    
         
            -
            #       "position": 3,
         
     | 
| 
       1267 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1268 
     | 
    
         
            -
            #       "value": "om"
         
     | 
| 
       1269 
     | 
    
         
            -
            #     },
         
     | 
| 
       1270 
     | 
    
         
            -
            #     {
         
     | 
| 
       1271 
     | 
    
         
            -
            #       "position": 4,
         
     | 
| 
       1272 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1273 
     | 
    
         
            -
            #       "value": "me"
         
     | 
| 
       1274 
     | 
    
         
            -
            #     },
         
     | 
| 
       1275 
     | 
    
         
            -
            #     {
         
     | 
| 
       1276 
     | 
    
         
            -
            #       "position": 5,
         
     | 
| 
       1277 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1278 
     | 
    
         
            -
            #       "value": "e/"
         
     | 
| 
       1279 
     | 
    
         
            -
            #     },
         
     | 
| 
       1280 
     | 
    
         
            -
            #     {
         
     | 
| 
       1281 
     | 
    
         
            -
            #       "position": 6,
         
     | 
| 
       1282 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1283 
     | 
    
         
            -
            #       "value": "/a"
         
     | 
| 
       1284 
     | 
    
         
            -
            #     },
         
     | 
| 
       1285 
     | 
    
         
            -
            #     {
         
     | 
| 
       1286 
     | 
    
         
            -
            #       "position": 7,
         
     | 
| 
       1287 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1288 
     | 
    
         
            -
            #       "value": "al"
         
     | 
| 
       1289 
     | 
    
         
            -
            #     },
         
     | 
| 
       1290 
     | 
    
         
            -
            #     {
         
     | 
| 
       1291 
     | 
    
         
            -
            #       "position": 8,
         
     | 
| 
       1292 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1293 
     | 
    
         
            -
            #       "value": "li"
         
     | 
| 
       1294 
     | 
    
         
            -
            #     },
         
     | 
| 
       1295 
     | 
    
         
            -
            #     {
         
     | 
| 
       1296 
     | 
    
         
            -
            #       "position": 9,
         
     | 
| 
       1297 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1298 
     | 
    
         
            -
            #       "value": "ic"
         
     | 
| 
       1299 
     | 
    
         
            -
            #     },
         
     | 
| 
       1300 
     | 
    
         
            -
            #     {
         
     | 
| 
       1301 
     | 
    
         
            -
            #       "position": 10,
         
     | 
| 
       1302 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1303 
     | 
    
         
            -
            #       "value": "ce"
         
     | 
| 
       1304 
     | 
    
         
            -
            #     },
         
     | 
| 
       1305 
     | 
    
         
            -
            #     {
         
     | 
| 
       1306 
     | 
    
         
            -
            #       "position": 11,
         
     | 
| 
       1307 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1308 
     | 
    
         
            -
            #       "value": "e/"
         
     | 
| 
       1309 
     | 
    
         
            -
            #     },
         
     | 
| 
       1310 
     | 
    
         
            -
            #     {
         
     | 
| 
       1311 
     | 
    
         
            -
            #       "position": 12,
         
     | 
| 
       1312 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1313 
     | 
    
         
            -
            #       "value": "/t"
         
     | 
| 
       1314 
     | 
    
         
            -
            #     },
         
     | 
| 
       1315 
     | 
    
         
            -
            #     {
         
     | 
| 
       1316 
     | 
    
         
            -
            #       "position": 13,
         
     | 
| 
       1317 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1318 
     | 
    
         
            -
            #       "value": "te"
         
     | 
| 
       1319 
     | 
    
         
            -
            #     },
         
     | 
| 
       1320 
     | 
    
         
            -
            #     {
         
     | 
| 
       1321 
     | 
    
         
            -
            #       "position": 14,
         
     | 
| 
       1322 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1323 
     | 
    
         
            -
            #       "value": "es"
         
     | 
| 
       1324 
     | 
    
         
            -
            #     },
         
     | 
| 
       1325 
     | 
    
         
            -
            #     {
         
     | 
| 
       1326 
     | 
    
         
            -
            #       "position": 15,
         
     | 
| 
       1327 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1328 
     | 
    
         
            -
            #       "value": "st"
         
     | 
| 
       1329 
     | 
    
         
            -
            #     },
         
     | 
| 
       1330 
     | 
    
         
            -
            #     {
         
     | 
| 
       1331 
     | 
    
         
            -
            #       "position": 16,
         
     | 
| 
       1332 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1333 
     | 
    
         
            -
            #       "value": "t."
         
     | 
| 
       1334 
     | 
    
         
            -
            #     },
         
     | 
| 
       1335 
     | 
    
         
            -
            #     {
         
     | 
| 
       1336 
     | 
    
         
            -
            #       "position": 17,
         
     | 
| 
       1337 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1338 
     | 
    
         
            -
            #       "value": ".t"
         
     | 
| 
       1339 
     | 
    
         
            -
            #     },
         
     | 
| 
       1340 
     | 
    
         
            -
            #     {
         
     | 
| 
       1341 
     | 
    
         
            -
            #       "position": 18,
         
     | 
| 
       1342 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1343 
     | 
    
         
            -
            #       "value": "tx"
         
     | 
| 
       1344 
     | 
    
         
            -
            #     },
         
     | 
| 
       1345 
     | 
    
         
            -
            #     {
         
     | 
| 
       1346 
     | 
    
         
            -
            #       "position": 19,
         
     | 
| 
       1347 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1348 
     | 
    
         
            -
            #       "value": "xt"
         
     | 
| 
       1349 
     | 
    
         
            -
            #     },
         
     | 
| 
       1350 
     | 
    
         
            -
            #     {
         
     | 
| 
       1351 
     | 
    
         
            -
            #       "position": 20,
         
     | 
| 
       1352 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1353 
     | 
    
         
            -
            #       "value": "t"
         
     | 
| 
       1354 
     | 
    
         
            -
            #     },
         
     | 
| 
       1355 
     | 
    
         
            -
            #     {
         
     | 
| 
       1356 
     | 
    
         
            -
            #       "position": 21,
         
     | 
| 
       1357 
     | 
    
         
            -
            #       "force_prefix": false,
         
     | 
| 
       1358 
     | 
    
         
            -
            #       "value": ""
         
     | 
| 
       1359 
     | 
    
         
            -
            #     }
         
     | 
| 
       1360 
     | 
    
         
            -
            #   ]
         
     | 
| 
       1361 
     | 
    
         
            -
            # ]
         
     | 
| 
       1362 
     | 
    
         
            -
            </pre></div>
         
     | 
| 
       1363 
     | 
    
         
            -
            </div>
         
     | 
| 
       1364 
     | 
    
         
            -
            </div>
         
     | 
| 
       1365 
84 
     | 
    
         
             
            </div>
         
     | 
| 
       1366 
85 
     | 
    
         
             
            </div>
         
     | 
| 
       1367 
86 
     | 
    
         | 
| 
         @@ -1371,46 +90,20 @@ 
     | 
|
| 
       1371 
90 
     | 
    
         
             
                  </div>
         
     | 
| 
       1372 
91 
     | 
    
         
             
                  <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
         
     | 
| 
       1373 
92 
     | 
    
         
             
                    <div class="sphinxsidebarwrapper">
         
     | 
| 
       1374 
     | 
    
         
            -
              <h3><a href="../index.html">目次</a></h3>
         
     | 
| 
       1375 
     | 
    
         
            -
              <ul>
         
     | 
| 
       1376 
     | 
    
         
            -
            <li><a class="reference internal" href="#">7.8. トークナイザー</a><ul>
         
     | 
| 
       1377 
     | 
    
         
            -
            <li><a class="reference internal" href="#summary">7.8.1. 概要</a></li>
         
     | 
| 
       1378 
     | 
    
         
            -
            <li><a class="reference internal" href="#what-is-tokenize">7.8.2. 「トークナイズ」とはなにか</a></li>
         
     | 
| 
       1379 
     | 
    
         
            -
            <li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. 組み込みトークナイザー</a><ul>
         
     | 
| 
       1380 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokenbigram">7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code></a></li>
         
     | 
| 
       1381 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
         
     | 
| 
       1382 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
         
     | 
| 
       1383 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
         
     | 
| 
       1384 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
         
     | 
| 
       1385 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
         
     | 
| 
       1386 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
         
     | 
| 
       1387 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
         
     | 
| 
       1388 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokenunigram">7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code></a></li>
         
     | 
| 
       1389 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokentrigram">7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code></a></li>
         
     | 
| 
       1390 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokendelimit">7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code></a></li>
         
     | 
| 
       1391 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></a></li>
         
     | 
| 
       1392 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokenmecab">7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code></a></li>
         
     | 
| 
       1393 
     | 
    
         
            -
            <li><a class="reference internal" href="#tokenregexp">7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code></a></li>
         
     | 
| 
       1394 
     | 
    
         
            -
            </ul>
         
     | 
| 
       1395 
     | 
    
         
            -
            </li>
         
     | 
| 
       1396 
     | 
    
         
            -
            </ul>
         
     | 
| 
       1397 
     | 
    
         
            -
            </li>
         
     | 
| 
       1398 
     | 
    
         
            -
            </ul>
         
     | 
| 
       1399 
     | 
    
         
            -
             
     | 
| 
       1400 
93 
     | 
    
         
             
              <h4>前のトピックへ</h4>
         
     | 
| 
       1401 
     | 
    
         
            -
              <p class="topless"><a href="normalizers.html"
         
     | 
| 
       1402 
     | 
    
         
            -
                                    title="前の章へ">7.7.  
     | 
| 
      
 94 
     | 
    
         
            +
              <p class="topless"><a href="normalizers/normalizer_nfkc51.html"
         
     | 
| 
      
 95 
     | 
    
         
            +
                                    title="前の章へ">7.7.2.3. <code class="docutils literal notranslate"><span class="pre">NormalizerNFKC51</span></code></a></p>
         
     | 
| 
       1403 
96 
     | 
    
         
             
              <h4>次のトピックへ</h4>
         
     | 
| 
       1404 
     | 
    
         
            -
              <p class="topless"><a href=" 
     | 
| 
       1405 
     | 
    
         
            -
                                    title="次の章へ">7. 
     | 
| 
      
 97 
     | 
    
         
            +
              <p class="topless"><a href="tokenizer/summary.html"
         
     | 
| 
      
 98 
     | 
    
         
            +
                                    title="次の章へ">7.8.1. 概要</a></p>
         
     | 
| 
       1406 
99 
     | 
    
         
             
            <div id="searchbox" style="display: none" role="search">
         
     | 
| 
       1407 
100 
     | 
    
         
             
              <h3>クイック検索</h3>
         
     | 
| 
      
 101 
     | 
    
         
            +
                <div class="searchformwrapper">
         
     | 
| 
       1408 
102 
     | 
    
         
             
                <form class="search" action="../search.html" method="get">
         
     | 
| 
       1409 
     | 
    
         
            -
                  < 
     | 
| 
       1410 
     | 
    
         
            -
                  < 
     | 
| 
       1411 
     | 
    
         
            -
                  <input type="hidden" name="check_keywords" value="yes" />
         
     | 
| 
       1412 
     | 
    
         
            -
                  <input type="hidden" name="area" value="default" />
         
     | 
| 
      
 103 
     | 
    
         
            +
                  <input type="text" name="q" />
         
     | 
| 
      
 104 
     | 
    
         
            +
                  <input type="submit" value="検索" />
         
     | 
| 
       1413 
105 
     | 
    
         
             
                </form>
         
     | 
| 
      
 106 
     | 
    
         
            +
                </div>
         
     | 
| 
       1414 
107 
     | 
    
         
             
            </div>
         
     | 
| 
       1415 
108 
     | 
    
         
             
            <script type="text/javascript">$('#searchbox').show(0);</script>
         
     | 
| 
       1416 
109 
     | 
    
         
             
                    </div>
         
     | 
| 
         @@ -1424,17 +117,17 @@ 
     | 
|
| 
       1424 
117 
     | 
    
         
             
                      <a href="../genindex.html" title="総合索引"
         
     | 
| 
       1425 
118 
     | 
    
         
             
                         >索引</a></li>
         
     | 
| 
       1426 
119 
     | 
    
         
             
                    <li class="right" >
         
     | 
| 
       1427 
     | 
    
         
            -
                      <a href=" 
     | 
| 
      
 120 
     | 
    
         
            +
                      <a href="tokenizer/summary.html" title="7.8.1. 概要"
         
     | 
| 
       1428 
121 
     | 
    
         
             
                         >次へ</a> |</li>
         
     | 
| 
       1429 
122 
     | 
    
         
             
                    <li class="right" >
         
     | 
| 
       1430 
     | 
    
         
            -
                      <a href="normalizers.html" title="7.7.  
     | 
| 
      
 123 
     | 
    
         
            +
                      <a href="normalizers/normalizer_nfkc51.html" title="7.7.2.3. NormalizerNFKC51"
         
     | 
| 
       1431 
124 
     | 
    
         
             
                         >前へ</a> |</li>
         
     | 
| 
       1432 
     | 
    
         
            -
                    <li class="nav-item nav-item-0"><a href="../index.html">Groonga  
     | 
| 
      
 125 
     | 
    
         
            +
                    <li class="nav-item nav-item-0"><a href="../index.html">Groonga v9.0.2ドキュメント</a> »</li>
         
     | 
| 
       1433 
126 
     | 
    
         
             
                      <li class="nav-item nav-item-1"><a href="../reference.html" >7. リファレンスマニュアル</a> »</li> 
         
     | 
| 
       1434 
127 
     | 
    
         
             
                  </ul>
         
     | 
| 
       1435 
128 
     | 
    
         
             
                </div>
         
     | 
| 
       1436 
129 
     | 
    
         
             
                <div class="footer" role="contentinfo">
         
     | 
| 
       1437 
     | 
    
         
            -
                    © Copyright 2009- 
     | 
| 
      
 130 
     | 
    
         
            +
                    © Copyright 2009-2019, Brazil, Inc.
         
     | 
| 
       1438 
131 
     | 
    
         
             
                </div>
         
     | 
| 
       1439 
132 
     | 
    
         
             
              </body>
         
     | 
| 
       1440 
133 
     | 
    
         
             
            </html>
         
     |