rroonga 7.1.1-x64-mingw32 → 9.0.2-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Rakefile +3 -3
- data/doc/text/news.md +22 -0
- data/ext/groonga/extconf.rb +29 -26
- data/ext/groonga/rb-grn.h +3 -3
- data/lib/2.2/groonga.so +0 -0
- data/lib/2.3/groonga.so +0 -0
- data/lib/2.4/groonga.so +0 -0
- data/lib/2.5/groonga.so +0 -0
- data/lib/groonga/expression-builder.rb +1 -1
- data/lib/groonga/schema.rb +13 -0
- data/rroonga-build.rb +4 -11
- data/test/test-expression-builder.rb +8 -0
- data/vendor/local/bin/cv2pdb.exe +0 -0
- data/vendor/local/bin/generate-pdb.bat +38 -36
- data/vendor/local/bin/grndb.exe +0 -0
- data/vendor/local/bin/groonga-benchmark.exe +0 -0
- data/vendor/local/bin/groonga-suggest-create-dataset.exe +0 -0
- data/vendor/local/bin/groonga.exe +0 -0
- data/vendor/local/bin/libgroonga-0.dll +0 -0
- data/vendor/local/bin/libmecab-2.dll +0 -0
- data/vendor/local/bin/libmsgpackc.dll +0 -0
- data/vendor/local/bin/libonigmo-6.dll +0 -0
- data/vendor/local/bin/libpcre-1.dll +0 -0
- data/vendor/local/bin/libpcrecpp-0.dll +0 -0
- data/vendor/local/bin/libpcreposix-0.dll +0 -0
- data/vendor/local/bin/lz4.exe +0 -0
- data/vendor/local/bin/lz4c.exe +0 -0
- data/vendor/local/bin/{lz4cat → lz4cat.exe} +0 -0
- data/vendor/local/bin/mecab.exe +0 -0
- data/vendor/local/bin/pcre-config +1 -1
- data/vendor/local/bin/pcregrep.exe +0 -0
- data/vendor/local/bin/pcretest.exe +0 -0
- data/vendor/local/bin/unlz4.exe +0 -0
- data/vendor/local/bin/zlib1.dll +0 -0
- data/vendor/local/include/groonga/groonga.h +16 -1
- data/vendor/local/include/groonga/groonga/accessor.h +5 -1
- data/vendor/local/include/groonga/groonga/column.h +4 -0
- data/vendor/local/include/groonga/groonga/db.h +3 -1
- data/vendor/local/include/groonga/groonga/expr.h +5 -0
- data/vendor/local/include/groonga/groonga/groonga.h +124 -171
- data/vendor/local/include/groonga/groonga/highlighter.h +57 -0
- data/vendor/local/include/groonga/groonga/ii.h +2 -0
- data/vendor/local/include/groonga/groonga/index_column.h +31 -0
- data/vendor/local/include/groonga/groonga/memory.h +29 -0
- data/vendor/local/include/groonga/groonga/msgpack.h +50 -0
- data/vendor/local/include/groonga/groonga/obj.h +22 -1
- data/vendor/local/include/groonga/groonga/option.h +61 -0
- data/vendor/local/include/groonga/groonga/output.h +57 -2
- data/vendor/local/include/groonga/groonga/output_columns.h +38 -0
- data/vendor/local/include/groonga/groonga/plugin.h +5 -0
- data/vendor/local/include/groonga/groonga/raw_string.h +60 -0
- data/vendor/local/include/groonga/groonga/string.h +113 -0
- data/vendor/local/include/groonga/groonga/table.h +89 -1
- data/vendor/local/include/groonga/groonga/thread.h +15 -0
- data/vendor/local/include/groonga/groonga/time.h +1 -0
- data/vendor/local/include/groonga/groonga/token.h +60 -10
- data/vendor/local/include/groonga/groonga/token_cursor.h +59 -0
- data/vendor/local/include/groonga/groonga/token_filter.h +24 -0
- data/vendor/local/include/groonga/groonga/token_metadata.h +49 -0
- data/vendor/local/include/groonga/groonga/tokenizer.h +99 -25
- data/vendor/local/include/groonga/groonga/tokenizer_query_deprecated.h +50 -0
- data/vendor/local/include/groonga/groonga/vector.h +80 -0
- data/vendor/local/include/groonga/groonga/version.h +32 -0
- data/vendor/local/include/groonga/groonga/window_function.h +18 -8
- data/vendor/local/include/groonga/groonga/window_function_executor.h +68 -0
- data/vendor/local/include/lz4.h +504 -212
- data/vendor/local/include/lz4frame.h +433 -153
- data/vendor/local/include/lz4frame_static.h +47 -0
- data/vendor/local/include/lz4hc.h +281 -108
- data/vendor/local/include/msgpack.hpp +4 -0
- data/vendor/local/include/msgpack/adaptor/adaptor_base.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/adaptor_base_decl.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/array_ref_decl.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/boost/msgpack_variant_decl.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/boost/string_view.hpp +15 -0
- data/vendor/local/include/msgpack/adaptor/check_container_size_decl.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/cpp17/optional.hpp +16 -0
- data/vendor/local/include/msgpack/adaptor/cpp17/string_view.hpp +16 -0
- data/vendor/local/include/msgpack/adaptor/define_decl.hpp +2 -0
- data/vendor/local/include/msgpack/adaptor/ext_decl.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/fixint_decl.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/int_decl.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/map_decl.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/msgpack_tuple_decl.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/nil_decl.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/raw_decl.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/size_equal_only_decl.hpp +1 -0
- data/vendor/local/include/msgpack/adaptor/tr1/unordered_map.hpp +2 -2
- data/vendor/local/include/msgpack/adaptor/tr1/unordered_set.hpp +2 -2
- data/vendor/local/include/msgpack/adaptor/v4raw_decl.hpp +1 -0
- data/vendor/local/include/msgpack/cpp_config_decl.hpp +1 -0
- data/vendor/local/include/msgpack/create_object_visitor.hpp +17 -0
- data/vendor/local/include/msgpack/create_object_visitor_decl.hpp +16 -0
- data/vendor/local/include/msgpack/fbuffer.h +1 -1
- data/vendor/local/include/msgpack/fbuffer_decl.hpp +1 -0
- data/vendor/local/include/msgpack/gcc_atomic.hpp +0 -2
- data/vendor/local/include/msgpack/iterator_decl.hpp +2 -1
- data/vendor/local/include/msgpack/meta_decl.hpp +1 -0
- data/vendor/local/include/msgpack/null_visitor.hpp +17 -0
- data/vendor/local/include/msgpack/null_visitor_decl.hpp +16 -0
- data/vendor/local/include/msgpack/object.h +5 -0
- data/vendor/local/include/msgpack/object_decl.hpp +1 -0
- data/vendor/local/include/msgpack/object_fwd.hpp +1 -0
- data/vendor/local/include/msgpack/object_fwd_decl.hpp +1 -0
- data/vendor/local/include/msgpack/pack.h +1 -0
- data/vendor/local/include/msgpack/pack_decl.hpp +1 -0
- data/vendor/local/include/msgpack/parse.hpp +18 -0
- data/vendor/local/include/msgpack/parse_decl.hpp +16 -0
- data/vendor/local/include/msgpack/parse_return.hpp +17 -0
- data/vendor/local/include/msgpack/sbuffer_decl.hpp +1 -0
- data/vendor/local/include/msgpack/sysdep.h +34 -26
- data/vendor/local/include/msgpack/type.hpp +9 -0
- data/vendor/local/include/msgpack/unpack.h +12 -1
- data/vendor/local/include/msgpack/unpack.hpp +1 -0
- data/vendor/local/include/msgpack/unpack_decl.hpp +1 -0
- data/vendor/local/include/msgpack/unpack_exception.hpp +15 -0
- data/vendor/local/include/msgpack/unpack_template.h +22 -30
- data/vendor/local/include/msgpack/v1/adaptor/array_ref.hpp +6 -6
- data/vendor/local/include/msgpack/v1/adaptor/boost/fusion.hpp +49 -6
- data/vendor/local/include/msgpack/v1/adaptor/boost/msgpack_variant.hpp +6 -4
- data/vendor/local/include/msgpack/v1/adaptor/boost/string_view.hpp +87 -0
- data/vendor/local/include/msgpack/v1/adaptor/carray.hpp +11 -11
- data/vendor/local/include/msgpack/v1/adaptor/char_ptr.hpp +1 -1
- data/vendor/local/include/msgpack/v1/adaptor/cpp11/array.hpp +1 -1
- data/vendor/local/include/msgpack/v1/adaptor/cpp11/array_char.hpp +8 -1
- data/vendor/local/include/msgpack/v1/adaptor/cpp11/array_unsigned_char.hpp +8 -1
- data/vendor/local/include/msgpack/v1/adaptor/cpp11/forward_list.hpp +1 -1
- data/vendor/local/include/msgpack/v1/adaptor/cpp11/tuple.hpp +2 -2
- data/vendor/local/include/msgpack/v1/adaptor/cpp11/unordered_map.hpp +4 -4
- data/vendor/local/include/msgpack/v1/adaptor/cpp11/unordered_set.hpp +2 -2
- data/vendor/local/include/msgpack/v1/adaptor/cpp17/optional.hpp +90 -0
- data/vendor/local/include/msgpack/v1/adaptor/cpp17/string_view.hpp +86 -0
- data/vendor/local/include/msgpack/v1/adaptor/deque.hpp +1 -1
- data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_define_array.hpp +1088 -32
- data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_define_map.hpp +32 -16
- data/vendor/local/include/msgpack/v1/adaptor/detail/cpp03_msgpack_tuple.hpp +32 -32
- data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_convert_helper.hpp +45 -0
- data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_define_array.hpp +4 -3
- data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_define_map.hpp +4 -2
- data/vendor/local/include/msgpack/v1/adaptor/detail/cpp11_msgpack_tuple.hpp +2 -2
- data/vendor/local/include/msgpack/v1/adaptor/ext.hpp +1 -1
- data/vendor/local/include/msgpack/v1/adaptor/fixint.hpp +40 -24
- data/vendor/local/include/msgpack/v1/adaptor/float.hpp +4 -4
- data/vendor/local/include/msgpack/v1/adaptor/int.hpp +55 -33
- data/vendor/local/include/msgpack/v1/adaptor/list.hpp +1 -1
- data/vendor/local/include/msgpack/v1/adaptor/map.hpp +10 -10
- data/vendor/local/include/msgpack/v1/adaptor/pair.hpp +2 -2
- data/vendor/local/include/msgpack/v1/adaptor/set.hpp +2 -2
- data/vendor/local/include/msgpack/v1/adaptor/string.hpp +1 -1
- data/vendor/local/include/msgpack/v1/adaptor/tr1/unordered_map.hpp +2 -2
- data/vendor/local/include/msgpack/v1/adaptor/tr1/unordered_set.hpp +2 -2
- data/vendor/local/include/msgpack/v1/adaptor/vector.hpp +5 -5
- data/vendor/local/include/msgpack/v1/adaptor/vector_bool.hpp +1 -1
- data/vendor/local/include/msgpack/v1/adaptor/vector_char.hpp +9 -9
- data/vendor/local/include/msgpack/v1/adaptor/vector_unsigned_char.hpp +9 -9
- data/vendor/local/include/msgpack/v1/cpp_config.hpp +6 -0
- data/vendor/local/include/msgpack/v1/cpp_config_decl.hpp +6 -0
- data/vendor/local/include/msgpack/v1/detail/cpp03_zone.hpp +41 -34
- data/vendor/local/include/msgpack/v1/detail/cpp03_zone_decl.hpp +8 -0
- data/vendor/local/include/msgpack/v1/detail/cpp11_zone.hpp +25 -19
- data/vendor/local/include/msgpack/v1/detail/cpp11_zone_decl.hpp +8 -0
- data/vendor/local/include/msgpack/v1/meta.hpp +6 -0
- data/vendor/local/include/msgpack/v1/meta_decl.hpp +5 -0
- data/vendor/local/include/msgpack/v1/object.hpp +768 -393
- data/vendor/local/include/msgpack/v1/object_decl.hpp +11 -1
- data/vendor/local/include/msgpack/v1/object_fwd.hpp +4 -1
- data/vendor/local/include/msgpack/v1/object_fwd_decl.hpp +3 -1
- data/vendor/local/include/msgpack/v1/parse_return.hpp +36 -0
- data/vendor/local/include/msgpack/v1/unpack.hpp +39 -120
- data/vendor/local/include/msgpack/v1/unpack_decl.hpp +2 -9
- data/vendor/local/include/msgpack/v1/unpack_exception.hpp +122 -0
- data/vendor/local/include/msgpack/v1/vrefbuffer.hpp +2 -2
- data/vendor/local/include/msgpack/v2/create_object_visitor.hpp +250 -0
- data/vendor/local/include/msgpack/v2/create_object_visitor_decl.hpp +33 -0
- data/vendor/local/include/msgpack/v2/meta_decl.hpp +4 -0
- data/vendor/local/include/msgpack/v2/null_visitor.hpp +96 -0
- data/vendor/local/include/msgpack/v2/null_visitor_decl.hpp +29 -0
- data/vendor/local/include/msgpack/v2/object_decl.hpp +4 -0
- data/vendor/local/include/msgpack/v2/object_fwd.hpp +1 -1
- data/vendor/local/include/msgpack/v2/object_fwd_decl.hpp +2 -0
- data/vendor/local/include/msgpack/v2/pack_decl.hpp +1 -0
- data/vendor/local/include/msgpack/v2/parse.hpp +1072 -0
- data/vendor/local/include/msgpack/v2/parse_decl.hpp +79 -0
- data/vendor/local/include/msgpack/v2/parse_return.hpp +37 -0
- data/vendor/local/include/msgpack/v2/unpack.hpp +21 -1298
- data/vendor/local/include/msgpack/v2/unpack_decl.hpp +9 -45
- data/vendor/local/include/msgpack/v2/x3_parse.hpp +875 -0
- data/vendor/local/include/msgpack/v2/x3_parse_decl.hpp +36 -0
- data/vendor/local/include/msgpack/v2/x3_unpack.hpp +120 -0
- data/vendor/local/include/msgpack/v2/x3_unpack_decl.hpp +71 -0
- data/vendor/local/include/msgpack/v3/adaptor/adaptor_base.hpp +58 -0
- data/vendor/local/include/msgpack/v3/adaptor/adaptor_base_decl.hpp +52 -0
- data/vendor/local/include/msgpack/v3/adaptor/array_ref_decl.hpp +36 -0
- data/vendor/local/include/msgpack/v3/adaptor/boost/msgpack_variant_decl.hpp +42 -0
- data/vendor/local/include/msgpack/v3/adaptor/check_container_size_decl.hpp +39 -0
- data/vendor/local/include/msgpack/v3/adaptor/define_decl.hpp +23 -0
- data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_define_array_decl.hpp +31 -0
- data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_define_map_decl.hpp +31 -0
- data/vendor/local/include/msgpack/v3/adaptor/detail/cpp03_msgpack_tuple_decl.hpp +43 -0
- data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_define_array_decl.hpp +32 -0
- data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_define_map_decl.hpp +31 -0
- data/vendor/local/include/msgpack/v3/adaptor/detail/cpp11_msgpack_tuple_decl.hpp +59 -0
- data/vendor/local/include/msgpack/v3/adaptor/ext_decl.hpp +34 -0
- data/vendor/local/include/msgpack/v3/adaptor/fixint_decl.hpp +43 -0
- data/vendor/local/include/msgpack/v3/adaptor/int_decl.hpp +54 -0
- data/vendor/local/include/msgpack/v3/adaptor/map_decl.hpp +33 -0
- data/vendor/local/include/msgpack/v3/adaptor/msgpack_tuple_decl.hpp +21 -0
- data/vendor/local/include/msgpack/v3/adaptor/nil_decl.hpp +42 -0
- data/vendor/local/include/msgpack/v3/adaptor/raw_decl.hpp +33 -0
- data/vendor/local/include/msgpack/v3/adaptor/size_equal_only_decl.hpp +35 -0
- data/vendor/local/include/msgpack/v3/adaptor/v4raw_decl.hpp +34 -0
- data/vendor/local/include/msgpack/v3/cpp_config_decl.hpp +84 -0
- data/vendor/local/include/msgpack/v3/create_object_visitor_decl.hpp +33 -0
- data/vendor/local/include/msgpack/v3/detail/cpp03_zone_decl.hpp +31 -0
- data/vendor/local/include/msgpack/v3/detail/cpp11_zone_decl.hpp +31 -0
- data/vendor/local/include/msgpack/v3/fbuffer_decl.hpp +32 -0
- data/vendor/local/include/msgpack/v3/iterator_decl.hpp +33 -0
- data/vendor/local/include/msgpack/v3/meta_decl.hpp +50 -0
- data/vendor/local/include/msgpack/v3/null_visitor_decl.hpp +29 -0
- data/vendor/local/include/msgpack/v3/object_decl.hpp +53 -0
- data/vendor/local/include/msgpack/v3/object_fwd.hpp +70 -0
- data/vendor/local/include/msgpack/v3/object_fwd_decl.hpp +75 -0
- data/vendor/local/include/msgpack/v3/pack_decl.hpp +55 -0
- data/vendor/local/include/msgpack/v3/parse.hpp +677 -0
- data/vendor/local/include/msgpack/v3/parse_decl.hpp +49 -0
- data/vendor/local/include/msgpack/v3/parse_return.hpp +35 -0
- data/vendor/local/include/msgpack/v3/sbuffer_decl.hpp +33 -0
- data/vendor/local/include/msgpack/v3/unpack.hpp +192 -0
- data/vendor/local/include/msgpack/v3/unpack_decl.hpp +304 -0
- data/vendor/local/include/msgpack/v3/vrefbuffer_decl.hpp +29 -0
- data/vendor/local/include/msgpack/v3/x3_parse_decl.hpp +34 -0
- data/vendor/local/include/msgpack/v3/x3_unpack.hpp +97 -0
- data/vendor/local/include/msgpack/v3/x3_unpack_decl.hpp +65 -0
- data/vendor/local/include/msgpack/v3/zbuffer_decl.hpp +29 -0
- data/vendor/local/include/msgpack/v3/zone_decl.hpp +21 -0
- data/vendor/local/include/msgpack/version_master.h +2 -2
- data/vendor/local/include/msgpack/versioning.hpp +5 -3
- data/vendor/local/include/msgpack/vrefbuffer.h +1 -2
- data/vendor/local/include/msgpack/vrefbuffer_decl.hpp +1 -0
- data/vendor/local/include/msgpack/x3_parse.hpp +15 -0
- data/vendor/local/include/msgpack/x3_parse_decl.hpp +16 -0
- data/vendor/local/include/msgpack/x3_unpack.hpp +16 -0
- data/vendor/local/include/msgpack/x3_unpack_decl.hpp +16 -0
- data/vendor/local/include/msgpack/zbuffer_decl.hpp +1 -0
- data/vendor/local/include/msgpack/zone_decl.hpp +1 -0
- data/vendor/local/include/pcre.h +6 -6
- data/vendor/local/lib/cmake/msgpack/msgpack-config-version.cmake +46 -0
- data/vendor/local/lib/cmake/msgpack/msgpack-config.cmake +47 -0
- data/vendor/local/lib/cmake/msgpack/msgpack-targets-noconfig.cmake +29 -0
- data/vendor/local/lib/cmake/msgpack/msgpack-targets.cmake +101 -0
- data/vendor/local/lib/groonga/plugins/functions/index_column.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/index_column.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/index_column.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/index_column.la +1 -1
- data/vendor/local/lib/groonga/plugins/functions/math.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/math.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/math.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/math.la +1 -1
- data/vendor/local/lib/groonga/plugins/functions/number.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/number.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/number.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/number.la +1 -1
- data/vendor/local/lib/groonga/plugins/functions/string.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/string.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/string.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/string.la +1 -1
- data/vendor/local/lib/groonga/plugins/functions/time.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/time.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/time.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/time.la +1 -1
- data/vendor/local/lib/groonga/plugins/functions/vector.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/vector.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/vector.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/vector.la +1 -1
- data/vendor/local/lib/groonga/plugins/normalizers/mysql.a +0 -0
- data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll +0 -0
- data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/normalizers/mysql.la +2 -2
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +1 -1
- data/vendor/local/lib/groonga/plugins/sharding/dynamic_columns.rb +150 -19
- data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +123 -65
- data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +528 -113
- data/vendor/local/lib/groonga/plugins/sharding/logical_select.rb +142 -40
- data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.la +1 -1
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +1 -1
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +1 -1
- data/vendor/local/lib/groonga/scripts/ruby/backtrace_entry.rb +1 -1
- data/vendor/local/lib/groonga/scripts/ruby/command_line/grndb.rb +64 -35
- data/vendor/local/lib/groonga/scripts/ruby/expression.rb +3 -1
- data/vendor/local/lib/groonga/scripts/ruby/expression_rewriters.rb +15 -21
- data/vendor/local/lib/groonga/scripts/ruby/expression_rewriters/optimizer.rb +274 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree.rb +8 -2
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/assign.rb +22 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/assign_binary_operation.rb +24 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/binary_operation.rb +206 -8
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/constant.rb +16 -1
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/function_call.rb +30 -1
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/logical_operation.rb +6 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/member.rb +18 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/null.rb +17 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/reference.rb +18 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/table.rb +14 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/unary_operation.rb +26 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/variable.rb +4 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree_builder.rb +78 -8
- data/vendor/local/lib/groonga/scripts/ruby/index_column.rb +10 -0
- data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +2 -0
- data/vendor/local/lib/groonga/scripts/ruby/locale_output.rb +28 -0
- data/vendor/local/lib/groonga/scripts/ruby/logger.rb +36 -4
- data/vendor/local/lib/groonga/scripts/ruby/record.rb +1 -1
- data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +0 -3
- data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +46 -5
- data/vendor/local/lib/groonga/scripts/ruby/scan_info_data_size_estimator.rb +5 -136
- data/vendor/local/lib/groonga/scripts/ruby/table.rb +2 -2
- data/vendor/local/lib/libgroonga.a +0 -0
- data/vendor/local/lib/libgroonga.dll.a +0 -0
- data/vendor/local/lib/libgroonga.la +1 -1
- data/vendor/local/lib/liblz4.a +0 -0
- data/vendor/local/lib/liblz4.dll +0 -0
- data/vendor/local/lib/liblz4.dll.1 +0 -0
- data/vendor/local/lib/{liblz4.dll.1.5.0 → liblz4.dll.1.8.2} +0 -0
- data/vendor/local/lib/libmecab.dll.a +0 -0
- data/vendor/local/lib/libmsgpackc.a +0 -0
- data/vendor/local/lib/libmsgpackc.dll.a +0 -0
- data/vendor/local/lib/libonigmo.a +0 -0
- data/vendor/local/lib/libonigmo.dll.a +0 -0
- data/vendor/local/lib/libpcre.a +0 -0
- data/vendor/local/lib/libpcre.dll.a +0 -0
- data/vendor/local/lib/libpcre.la +2 -2
- data/vendor/local/lib/libpcrecpp.dll.a +0 -0
- data/vendor/local/lib/libpcrecpp.la +1 -1
- data/vendor/local/lib/libpcreposix.a +0 -0
- data/vendor/local/lib/libpcreposix.dll.a +0 -0
- data/vendor/local/lib/libpcreposix.la +2 -2
- data/vendor/local/lib/libz.dll.a +0 -0
- data/vendor/local/lib/pkgconfig/groonga-normalizer-mysql.pc +1 -1
- data/vendor/local/lib/pkgconfig/groonga.pc +2 -2
- data/vendor/local/lib/pkgconfig/liblz4.pc +3 -3
- data/vendor/local/lib/pkgconfig/libpcre.pc +1 -1
- data/vendor/local/lib/pkgconfig/libpcrecpp.pc +1 -1
- data/vendor/local/lib/pkgconfig/libpcreposix.pc +1 -1
- data/vendor/local/lib/pkgconfig/msgpack.pc +1 -1
- data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
- data/vendor/local/share/doc/groonga-normalizer-mysql/README.md +14 -22
- data/vendor/local/share/doc/groonga-normalizer-mysql/news.md +22 -2
- data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
- data/vendor/local/share/doc/groonga/en/html/_static/basic.css +113 -4
- data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +46 -19
- data/vendor/local/share/doc/groonga/en/html/_static/documentation_options.js +10 -0
- data/vendor/local/share/doc/groonga/en/html/_static/{jquery-3.1.0.js → jquery-3.2.1.js} +474 -295
- data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +4 -4
- data/vendor/local/share/doc/groonga/en/html/_static/language_data.js +297 -0
- data/vendor/local/share/doc/groonga/en/html/_static/pygments.css +4 -0
- data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +69 -322
- data/vendor/local/share/doc/groonga/en/html/characteristic.html +16 -24
- data/vendor/local/share/doc/groonga/en/html/client.html +15 -23
- data/vendor/local/share/doc/groonga/en/html/community.html +30 -38
- data/vendor/local/share/doc/groonga/en/html/contribution.html +23 -31
- data/vendor/local/share/doc/groonga/en/html/contribution/development.html +15 -23
- data/vendor/local/share/doc/groonga/en/html/contribution/development/build.html +15 -23
- data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_autotools.html +58 -66
- data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_cmake.html +51 -56
- data/vendor/local/share/doc/groonga/en/html/contribution/development/build/windows_cmake.html +52 -56
- data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +27 -35
- data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +19 -27
- data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +26 -34
- data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +167 -167
- data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +16 -24
- data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +28 -36
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +15 -23
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +15 -23
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +59 -67
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +31 -39
- data/vendor/local/share/doc/groonga/en/html/contribution/report.html +18 -26
- data/vendor/local/share/doc/groonga/en/html/development.html +15 -23
- data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +38 -43
- data/vendor/local/share/doc/groonga/en/html/genindex.html +50 -28
- data/vendor/local/share/doc/groonga/en/html/index.html +248 -234
- data/vendor/local/share/doc/groonga/en/html/install.html +43 -47
- data/vendor/local/share/doc/groonga/en/html/install/centos.html +43 -51
- data/vendor/local/share/doc/groonga/en/html/install/debian.html +52 -131
- data/vendor/local/share/doc/groonga/en/html/install/docker.html +155 -0
- data/vendor/local/share/doc/groonga/en/html/install/fedora.html +41 -49
- data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +29 -37
- data/vendor/local/share/doc/groonga/en/html/install/others.html +142 -150
- data/vendor/local/share/doc/groonga/en/html/install/solaris.html +30 -38
- data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +43 -51
- data/vendor/local/share/doc/groonga/en/html/install/windows.html +33 -41
- data/vendor/local/share/doc/groonga/en/html/limitations.html +36 -42
- data/vendor/local/share/doc/groonga/en/html/news.html +1586 -598
- data/vendor/local/share/doc/groonga/en/html/news/0.x.html +83 -83
- data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +147 -155
- data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +26 -34
- data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +225 -233
- data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +48 -56
- data/vendor/local/share/doc/groonga/en/html/news/2.x.html +378 -386
- data/vendor/local/share/doc/groonga/en/html/news/3.x.html +320 -328
- data/vendor/local/share/doc/groonga/en/html/news/4.x.html +442 -448
- data/vendor/local/share/doc/groonga/en/html/news/5.x.html +742 -860
- data/vendor/local/share/doc/groonga/en/html/news/6.x.html +544 -621
- data/vendor/local/share/doc/groonga/en/html/news/senna.html +32 -40
- data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
- data/vendor/local/share/doc/groonga/en/html/reference.html +208 -198
- data/vendor/local/share/doc/groonga/en/html/reference/alias.html +85 -93
- data/vendor/local/share/doc/groonga/en/html/reference/api.html +50 -57
- data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +62 -77
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +117 -149
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +140 -176
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +43 -55
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +48 -56
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +194 -254
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +106 -138
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +62 -82
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +117 -137
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +74 -98
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +79 -103
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +40 -48
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +57 -73
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +75 -99
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_inspect.html +495 -0
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +52 -68
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +291 -357
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +69 -89
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +47 -59
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +226 -306
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +120 -160
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_thread.html +80 -103
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +46 -58
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +40 -52
- data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +52 -66
- data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +98 -122
- data/vendor/local/share/doc/groonga/en/html/reference/cast.html +40 -26
- data/vendor/local/share/doc/groonga/en/html/reference/column.html +16 -24
- data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +16 -24
- data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +30 -34
- data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +16 -24
- data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +92 -100
- data/vendor/local/share/doc/groonga/en/html/reference/command.html +76 -84
- data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +26 -34
- data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +64 -72
- data/vendor/local/share/doc/groonga/en/html/reference/command/pretty_print.html +21 -29
- data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +25 -33
- data/vendor/local/share/doc/groonga/en/html/reference/command/request_timeout.html +32 -40
- data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +105 -113
- data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +44 -50
- data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +85 -73
- data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +31 -37
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_copy.html +131 -139
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +370 -326
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +115 -117
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +38 -44
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +47 -53
- data/vendor/local/share/doc/groonga/en/html/reference/commands/config_delete.html +40 -48
- data/vendor/local/share/doc/groonga/en/html/reference/commands/config_get.html +42 -50
- data/vendor/local/share/doc/groonga/en/html/reference/commands/config_set.html +41 -49
- data/vendor/local/share/doc/groonga/en/html/reference/commands/database_unmap.html +37 -45
- data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +71 -63
- data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +31 -37
- data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +49 -51
- data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +64 -71
- data/vendor/local/share/doc/groonga/en/html/reference/commands/io_flush.html +335 -138
- data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +233 -87
- data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_acquire.html +45 -53
- data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +42 -48
- data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_release.html +43 -51
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +58 -64
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +33 -38
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +31 -38
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +295 -218
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_parameters.html +56 -64
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +532 -214
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_select.html +797 -388
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_shard_list.html +35 -43
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_table_remove.html +188 -196
- data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +83 -90
- data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +41 -48
- data/vendor/local/share/doc/groonga/en/html/reference/commands/object_exist.html +41 -49
- data/vendor/local/share/doc/groonga/en/html/reference/commands/object_inspect.html +401 -403
- data/vendor/local/share/doc/groonga/en/html/reference/commands/object_list.html +253 -261
- data/vendor/local/share/doc/groonga/en/html/reference/commands/object_remove.html +60 -68
- data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +36 -44
- data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +35 -43
- data/vendor/local/share/doc/groonga/en/html/reference/commands/query_expand.html +21 -29
- data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +22 -30
- data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +21 -29
- data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +39 -47
- data/vendor/local/share/doc/groonga/en/html/reference/commands/reindex.html +47 -53
- data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +72 -74
- data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +38 -45
- data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +38 -45
- data/vendor/local/share/doc/groonga/en/html/reference/commands/schema.html +330 -338
- data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +1545 -1194
- data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +57 -65
- data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +83 -91
- data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +119 -133
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_copy.html +30 -38
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +165 -174
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +50 -50
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +104 -112
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_rename.html +42 -50
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +49 -57
- data/vendor/local/share/doc/groonga/en/html/reference/commands/thread_limit.html +46 -54
- data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +110 -117
- data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +41 -48
- data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +40 -46
- data/vendor/local/share/doc/groonga/en/html/reference/configuration.html +37 -45
- data/vendor/local/share/doc/groonga/en/html/reference/executables.html +19 -27
- data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +134 -114
- data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +25 -31
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +66 -66
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +174 -182
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +25 -33
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +27 -35
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +191 -199
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +32 -40
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +189 -163
- data/vendor/local/share/doc/groonga/en/html/reference/function.html +59 -64
- data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +71 -79
- data/vendor/local/share/doc/groonga/en/html/reference/functions/cast_loose.html +210 -0
- data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +49 -55
- data/vendor/local/share/doc/groonga/en/html/reference/functions/fuzzy_search.html +38 -46
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +133 -142
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +67 -73
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +56 -62
- data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +80 -88
- data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +70 -78
- data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +56 -64
- data/vendor/local/share/doc/groonga/en/html/reference/functions/in_records.html +87 -94
- data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +54 -62
- data/vendor/local/share/doc/groonga/en/html/reference/functions/math_abs.html +55 -63
- data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +40 -48
- data/vendor/local/share/doc/groonga/en/html/reference/functions/number_classify.html +36 -44
- data/vendor/local/share/doc/groonga/en/html/reference/functions/prefix_rk_search.html +74 -82
- data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +152 -160
- data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +45 -52
- data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +76 -84
- data/vendor/local/share/doc/groonga/en/html/reference/functions/string_length.html +37 -45
- data/vendor/local/share/doc/groonga/en/html/reference/functions/string_substring.html +39 -47
- data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +76 -84
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day.html +37 -45
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day_of_week.html +278 -0
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_hour.html +37 -45
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_minute.html +36 -44
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_month.html +36 -44
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_second.html +36 -44
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_week.html +36 -44
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_year.html +37 -45
- data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_find.html +368 -0
- data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_new.html +40 -48
- data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_size.html +54 -62
- data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_slice.html +40 -47
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +44 -52
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +307 -316
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +486 -492
- data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +44 -52
- data/vendor/local/share/doc/groonga/en/html/reference/log.html +128 -147
- data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +43 -92
- data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_auto.html +179 -0
- data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_nfkc100.html +897 -0
- data/vendor/local/share/doc/groonga/en/html/reference/normalizers/normalizer_nfkc51.html +162 -0
- data/vendor/local/share/doc/groonga/en/html/reference/operations.html +26 -34
- data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +48 -56
- data/vendor/local/share/doc/groonga/en/html/reference/operations/prefix_rk_search.html +47 -55
- data/vendor/local/share/doc/groonga/en/html/reference/output.html +47 -55
- data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +20 -28
- data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +93 -101
- data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +228 -225
- data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +59 -67
- data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +50 -58
- data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +57 -65
- data/vendor/local/share/doc/groonga/en/html/reference/sharding.html +76 -86
- data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +43 -51
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +159 -167
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +93 -101
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +85 -93
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +88 -96
- data/vendor/local/share/doc/groonga/en/html/reference/tables.html +142 -150
- data/vendor/local/share/doc/groonga/en/html/reference/token_filter/summary.html +147 -0
- data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +31 -223
- data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_nfkc100.html +626 -0
- data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_stem.html +291 -0
- data/vendor/local/share/doc/groonga/en/html/reference/token_filters/token_filter_stop_word.html +287 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizer/summary.html +259 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +42 -1455
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram.html +368 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank.html +221 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol.html +240 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html +270 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html +292 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol.html +179 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol_alpha.html +200 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_bigram_split_symbol_alpha_digit.html +212 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_delimit.html +357 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_delimit_null.html +162 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_mecab.html +783 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_regexp.html +289 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_trigram.html +194 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers/token_unigram.html +194 -0
- data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +71 -79
- data/vendor/local/share/doc/groonga/en/html/reference/types.html +64 -72
- data/vendor/local/share/doc/groonga/en/html/reference/window_function.html +29 -37
- data/vendor/local/share/doc/groonga/en/html/reference/window_functions/record_number.html +38 -46
- data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_count.html +38 -46
- data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_record_number.html +38 -46
- data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_sum.html +38 -46
- data/vendor/local/share/doc/groonga/en/html/search.html +13 -24
- data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
- data/vendor/local/share/doc/groonga/en/html/server.html +15 -23
- data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +27 -35
- data/vendor/local/share/doc/groonga/en/html/server/http.html +18 -26
- data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +94 -102
- data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +15 -23
- data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +15 -23
- data/vendor/local/share/doc/groonga/en/html/server/memcached.html +18 -26
- data/vendor/local/share/doc/groonga/en/html/server/package.html +101 -109
- data/vendor/local/share/doc/groonga/en/html/spec.html +19 -27
- data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +207 -215
- data/vendor/local/share/doc/groonga/en/html/spec/search.html +39 -39
- data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +15 -23
- data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +46 -50
- data/vendor/local/share/doc/groonga/en/html/troubleshooting/how_to_analyze_error_message.html +27 -35
- data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +26 -31
- data/vendor/local/share/doc/groonga/en/html/tutorial.html +17 -25
- data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +46 -54
- data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +63 -71
- data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +30 -38
- data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +88 -97
- data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +19 -27
- data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +61 -69
- data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +108 -116
- data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +24 -32
- data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +19 -27
- data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +32 -40
- data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +52 -60
- data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +113 -4
- data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +46 -19
- data/vendor/local/share/doc/groonga/ja/html/_static/documentation_options.js +10 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/{jquery-3.1.0.js → jquery-3.2.1.js} +474 -295
- data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_static/language_data.js +124 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/pygments.css +4 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +70 -150
- data/vendor/local/share/doc/groonga/ja/html/characteristic.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/client.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/community.html +29 -37
- data/vendor/local/share/doc/groonga/ja/html/contribution.html +23 -31
- data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/build.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_autotools.html +50 -58
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_cmake.html +43 -48
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/windows_cmake.html +47 -51
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +26 -34
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +18 -26
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +23 -31
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +162 -162
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +16 -24
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +26 -34
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +50 -58
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +28 -36
- data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +17 -25
- data/vendor/local/share/doc/groonga/ja/html/development.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +32 -37
- data/vendor/local/share/doc/groonga/ja/html/genindex.html +50 -28
- data/vendor/local/share/doc/groonga/ja/html/index.html +247 -233
- data/vendor/local/share/doc/groonga/ja/html/install.html +41 -45
- data/vendor/local/share/doc/groonga/ja/html/install/centos.html +44 -52
- data/vendor/local/share/doc/groonga/ja/html/install/debian.html +52 -121
- data/vendor/local/share/doc/groonga/ja/html/install/docker.html +155 -0
- data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +40 -48
- data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +28 -36
- data/vendor/local/share/doc/groonga/ja/html/install/others.html +116 -124
- data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +28 -36
- data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +43 -51
- data/vendor/local/share/doc/groonga/ja/html/install/windows.html +29 -37
- data/vendor/local/share/doc/groonga/ja/html/limitations.html +30 -36
- data/vendor/local/share/doc/groonga/ja/html/news.html +1234 -384
- data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +82 -82
- data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +146 -154
- data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +25 -33
- data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +191 -199
- data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +41 -49
- data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +283 -291
- data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +229 -237
- data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +274 -280
- data/vendor/local/share/doc/groonga/ja/html/news/5.x.html +475 -593
- data/vendor/local/share/doc/groonga/ja/html/news/6.x.html +313 -390
- data/vendor/local/share/doc/groonga/ja/html/news/senna.html +31 -39
- data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
- data/vendor/local/share/doc/groonga/ja/html/reference.html +208 -198
- data/vendor/local/share/doc/groonga/ja/html/reference/alias.html +70 -78
- data/vendor/local/share/doc/groonga/ja/html/reference/api.html +50 -57
- data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +57 -72
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +107 -139
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +137 -173
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +40 -52
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +46 -54
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +184 -244
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +99 -131
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +57 -77
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +100 -120
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +71 -95
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +75 -99
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +37 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +54 -70
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +71 -95
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_inspect.html +487 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +49 -65
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +286 -352
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +64 -84
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +44 -56
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +219 -299
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +116 -156
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_thread.html +70 -93
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +42 -54
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +36 -48
- data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +48 -62
- data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +94 -118
- data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +39 -25
- data/vendor/local/share/doc/groonga/ja/html/reference/column.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +28 -32
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +76 -84
- data/vendor/local/share/doc/groonga/ja/html/reference/command.html +76 -84
- data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +25 -33
- data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +51 -59
- data/vendor/local/share/doc/groonga/ja/html/reference/command/pretty_print.html +20 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +21 -29
- data/vendor/local/share/doc/groonga/ja/html/reference/command/request_timeout.html +27 -35
- data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +101 -109
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +39 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +84 -72
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +30 -36
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_copy.html +104 -112
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +271 -237
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +100 -102
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +37 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +41 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_delete.html +38 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_get.html +39 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_set.html +39 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/database_unmap.html +34 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +70 -62
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +30 -36
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +42 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +59 -68
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/io_flush.html +300 -126
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +212 -80
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_acquire.html +42 -50
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +40 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_release.html +41 -49
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +57 -63
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +32 -37
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +30 -37
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +246 -178
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_parameters.html +51 -59
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +479 -175
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_select.html +718 -326
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_shard_list.html +34 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_table_remove.html +145 -153
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +78 -85
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +40 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_exist.html +36 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_inspect.html +360 -362
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_list.html +221 -229
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_remove.html +47 -55
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +32 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +33 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/query_expand.html +20 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +21 -29
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +20 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +35 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/reindex.html +42 -48
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +57 -57
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +36 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +36 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/schema.html +317 -325
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +1246 -917
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +50 -58
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +77 -85
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +109 -123
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_copy.html +29 -37
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +131 -140
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +49 -49
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +87 -95
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_rename.html +36 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +44 -52
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/thread_limit.html +38 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +93 -100
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +40 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +39 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/configuration.html +36 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +19 -27
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +125 -107
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +23 -29
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +62 -62
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +132 -140
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +23 -31
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +25 -33
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +166 -174
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +31 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +189 -165
- data/vendor/local/share/doc/groonga/ja/html/reference/function.html +59 -64
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +69 -77
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/cast_loose.html +208 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +48 -54
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/fuzzy_search.html +37 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +115 -124
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +66 -72
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +55 -61
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +69 -77
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +60 -68
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +54 -62
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_records.html +85 -93
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +54 -62
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/math_abs.html +54 -62
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +39 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/number_classify.html +35 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/prefix_rk_search.html +67 -75
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +130 -138
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +44 -51
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +61 -69
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_length.html +36 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_substring.html +38 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +63 -71
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day.html +36 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day_of_week.html +276 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_hour.html +36 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_minute.html +35 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_month.html +35 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_second.html +35 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_week.html +35 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_year.html +36 -44
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_find.html +353 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_new.html +39 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_size.html +52 -61
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_slice.html +38 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +38 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +200 -208
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +375 -382
- data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +39 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/log.html +125 -144
- data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +36 -70
- data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_auto.html +168 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_nfkc100.html +887 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/normalizers/normalizer_nfkc51.html +160 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +26 -34
- data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +38 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/operations/prefix_rk_search.html +41 -49
- data/vendor/local/share/doc/groonga/ja/html/reference/output.html +42 -50
- data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +20 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +68 -76
- data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +178 -184
- data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +38 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +38 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +39 -47
- data/vendor/local/share/doc/groonga/ja/html/reference/sharding.html +63 -73
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +43 -51
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +130 -138
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +72 -80
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +68 -76
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +76 -86
- data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +129 -137
- data/vendor/local/share/doc/groonga/ja/html/reference/token_filter/summary.html +145 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +31 -215
- data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_nfkc100.html +617 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_stem.html +289 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/token_filters/token_filter_stop_word.html +284 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizer/summary.html +233 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +42 -1349
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram.html +344 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank.html +219 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol.html +237 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html +267 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html +287 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol.html +179 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol_alpha.html +199 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_bigram_split_symbol_alpha_digit.html +209 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_delimit.html +344 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_delimit_null.html +160 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_mecab.html +764 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_regexp.html +284 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_trigram.html +191 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers/token_unigram.html +191 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +65 -73
- data/vendor/local/share/doc/groonga/ja/html/reference/types.html +48 -56
- data/vendor/local/share/doc/groonga/ja/html/reference/window_function.html +29 -37
- data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/record_number.html +37 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_count.html +37 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_record_number.html +37 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_sum.html +37 -45
- data/vendor/local/share/doc/groonga/ja/html/search.html +13 -24
- data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
- data/vendor/local/share/doc/groonga/ja/html/server.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +22 -30
- data/vendor/local/share/doc/groonga/ja/html/server/http.html +17 -25
- data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +82 -90
- data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +16 -24
- data/vendor/local/share/doc/groonga/ja/html/server/package.html +99 -107
- data/vendor/local/share/doc/groonga/ja/html/spec.html +19 -27
- data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +201 -209
- data/vendor/local/share/doc/groonga/ja/html/spec/search.html +36 -36
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +44 -48
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting/how_to_analyze_error_message.html +21 -29
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +24 -29
- data/vendor/local/share/doc/groonga/ja/html/tutorial.html +16 -24
- data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +32 -40
- data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +62 -70
- data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +22 -30
- data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +77 -86
- data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +15 -23
- data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +56 -64
- data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +84 -92
- data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +20 -28
- data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +18 -26
- data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +21 -29
- data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +43 -51
- data/vendor/local/share/doc/pcre/AUTHORS +3 -3
- data/vendor/local/share/doc/pcre/ChangeLog +53 -0
- data/vendor/local/share/doc/pcre/LICENCE +3 -3
- data/vendor/local/share/doc/pcre/NEWS +6 -0
- data/vendor/local/share/doc/pcre/html/NON-AUTOTOOLS-BUILD.txt +8 -7
- data/vendor/local/share/groonga/mruby/LEGAL +35 -35
- data/vendor/local/share/license/cv2pdb/{README → README.MD} +28 -10
- data/vendor/local/share/license/groonga-normalizer-mysql/README.md +14 -22
- data/vendor/local/share/license/lz4/LICENSE +2 -2
- data/vendor/local/share/license/mruby/AUTHORS +3 -0
- data/vendor/local/share/license/mruby/MITL +1 -1
- data/vendor/local/share/license/mruby/README.md +1 -1
- data/vendor/local/share/license/msgpack/README.md +5 -34
- data/vendor/local/share/license/pcre/LICENCE +3 -3
- data/vendor/local/share/man/man1/lz4.1 +221 -86
- data/vendor/local/share/man/man1/lz4c.1 +222 -32
- data/vendor/local/share/man/man1/lz4cat.1 +221 -30
- data/vendor/local/share/man/man1/unlz4.1 +223 -0
- metadata +231 -87
- data/lib/2.1/groonga.so +0 -0
- data/vendor/local/lib/groonga/plugins/expression_rewriters/optimizer.rb +0 -147
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/options.rb +0 -14
- data/vendor/local/share/doc/groonga/en/html/_static/ajax-loader.gif +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/comment-bright.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/comment-close.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/comment.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +0 -808
- data/vendor/local/share/doc/groonga/ja/html/_static/ajax-loader.gif +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/comment-bright.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/comment-close.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/comment.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +0 -808
@@ -0,0 +1,233 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
<!DOCTYPE html>
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" lang="ja">
|
6
|
+
<head>
|
7
|
+
<meta charset="utf-8" />
|
8
|
+
<title>7.8.1. 概要 — Groonga v9.0.2ドキュメント</title>
|
9
|
+
<link rel="stylesheet" href="../../_static/groonga.css" type="text/css" />
|
10
|
+
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
11
|
+
|
12
|
+
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
13
|
+
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
14
|
+
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
15
|
+
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
16
|
+
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
17
|
+
<script type="text/javascript" src="../../_static/translations.js"></script>
|
18
|
+
|
19
|
+
<link rel="shortcut icon" href="../../_static/favicon.ico"/>
|
20
|
+
<link rel="index" title="索引" href="../../genindex.html" />
|
21
|
+
<link rel="search" title="検索" href="../../search.html" />
|
22
|
+
<link rel="next" title="7.8.2. TokenBigram" href="../tokenizers/token_bigram.html" />
|
23
|
+
<link rel="prev" title="7.8. トークナイザー" href="../tokenizers.html" />
|
24
|
+
</head><body>
|
25
|
+
<div class="header">
|
26
|
+
<h1 class="title">
|
27
|
+
<a id="top-link" href="../../index.html">
|
28
|
+
<span class="project">groonga</span>
|
29
|
+
<span class="separator">-</span>
|
30
|
+
<span class="description">オープンソースのカラムストア機能付き全文検索エンジン</span>
|
31
|
+
</a>
|
32
|
+
</h1>
|
33
|
+
|
34
|
+
<div class="other-language-links">
|
35
|
+
<ul>
|
36
|
+
<li><a href="../../../../en/html/reference/tokenizer/summary.html">English</a></li>
|
37
|
+
</ul>
|
38
|
+
</div>
|
39
|
+
</div>
|
40
|
+
|
41
|
+
|
42
|
+
<div class="related" role="navigation" aria-label="related navigation">
|
43
|
+
<h3>ナビゲーション</h3>
|
44
|
+
<ul>
|
45
|
+
<li class="right" style="margin-right: 10px">
|
46
|
+
<a href="../../genindex.html" title="総合索引"
|
47
|
+
accesskey="I">索引</a></li>
|
48
|
+
<li class="right" >
|
49
|
+
<a href="../tokenizers/token_bigram.html" title="7.8.2. TokenBigram"
|
50
|
+
accesskey="N">次へ</a> |</li>
|
51
|
+
<li class="right" >
|
52
|
+
<a href="../tokenizers.html" title="7.8. トークナイザー"
|
53
|
+
accesskey="P">前へ</a> |</li>
|
54
|
+
<li class="nav-item nav-item-0"><a href="../../index.html">Groonga v9.0.2ドキュメント</a> »</li>
|
55
|
+
<li class="nav-item nav-item-1"><a href="../../reference.html" >7. リファレンスマニュアル</a> »</li>
|
56
|
+
<li class="nav-item nav-item-2"><a href="../tokenizers.html" accesskey="U">7.8. トークナイザー</a> »</li>
|
57
|
+
</ul>
|
58
|
+
</div>
|
59
|
+
|
60
|
+
<div class="document">
|
61
|
+
<div class="documentwrapper">
|
62
|
+
<div class="bodywrapper">
|
63
|
+
<div class="body" role="main">
|
64
|
+
|
65
|
+
<div class="section" id="summary">
|
66
|
+
<h1>7.8.1. 概要<a class="headerlink" href="#summary" title="このヘッドラインへのパーマリンク">¶</a></h1>
|
67
|
+
<p>Groongaにはテキストをトークナイズするトークナイザーモージュールがあります。次のケースのときにトークナイザーを使います。</p>
|
68
|
+
<blockquote>
|
69
|
+
<div><ul>
|
70
|
+
<li><p>テキストのインデックスを構築するとき</p>
|
71
|
+
<div class="figure align-center" id="id1">
|
72
|
+
<a class="reference internal image-reference" href="../../_images/used-when-indexing.png"><img alt="../../_images/used-when-indexing.png" src="../../_images/used-when-indexing.png" style="width: 80%;" /></a>
|
73
|
+
<p class="caption"><span class="caption-text">テキストのインデックスを構築するときにトークナイザーを使います。</span><a class="headerlink" href="#id1" title="この画像へのパーマリンク">¶</a></p>
|
74
|
+
</div>
|
75
|
+
</li>
|
76
|
+
<li><p>クエリーで検索するとき</p>
|
77
|
+
<div class="figure align-center" id="id2">
|
78
|
+
<a class="reference internal image-reference" href="../../_images/used-when-searching.png"><img alt="../../_images/used-when-searching.png" src="../../_images/used-when-searching.png" style="width: 80%;" /></a>
|
79
|
+
<p class="caption"><span class="caption-text">クエリーで検索するときにトークナイザーを使います。</span><a class="headerlink" href="#id2" title="この画像へのパーマリンク">¶</a></p>
|
80
|
+
</div>
|
81
|
+
</li>
|
82
|
+
</ul>
|
83
|
+
</div></blockquote>
|
84
|
+
<p>全文検索ではトークナイザーは重要なモジュールです。トークナイザーを変えることで <a class="reference external" href="http://ja.wikipedia.org/wiki/%E6%83%85%E5%A0%B1%E6%A4%9C%E7%B4%A2">適合率と再現率</a> のトレードオフを調整することができます。</p>
|
85
|
+
<p>一般的に <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> が適切なトークナイザーです。トークナイザーについてよく知らない場合は <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> を使うことをオススメします。</p>
|
86
|
+
<p><a class="reference internal" href="../commands/tokenize.html"><span class="doc">tokenize</span></a> コマンドと <a class="reference internal" href="../commands/table_tokenize.html"><span class="doc">table_tokenize</span></a> コマンドを使うことでトークナイザーを試すことができます。 <a class="reference internal" href="../commands/tokenize.html"><span class="doc">tokenize</span></a> コマンドを使って <a class="reference internal" href="../tokenizers/token_bigram.html#token-bigram"><span class="std std-ref">TokenBigram</span></a> トークナイザーを試す例を以下に示します。</p>
|
87
|
+
<p>実行例:</p>
|
88
|
+
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>tokenize TokenBigram "Hello World"
|
89
|
+
# [
|
90
|
+
# [
|
91
|
+
# 0,
|
92
|
+
# 1337566253.89858,
|
93
|
+
# 0.000355720520019531
|
94
|
+
# ],
|
95
|
+
# [
|
96
|
+
# {
|
97
|
+
# "position": 0,
|
98
|
+
# "force_prefix": false,
|
99
|
+
# "value": "He"
|
100
|
+
# },
|
101
|
+
# {
|
102
|
+
# "position": 1,
|
103
|
+
# "force_prefix": false,
|
104
|
+
# "value": "el"
|
105
|
+
# },
|
106
|
+
# {
|
107
|
+
# "position": 2,
|
108
|
+
# "force_prefix": false,
|
109
|
+
# "value": "ll"
|
110
|
+
# },
|
111
|
+
# {
|
112
|
+
# "position": 3,
|
113
|
+
# "force_prefix": false,
|
114
|
+
# "value": "lo"
|
115
|
+
# },
|
116
|
+
# {
|
117
|
+
# "position": 4,
|
118
|
+
# "force_prefix": false,
|
119
|
+
# "value": "o "
|
120
|
+
# },
|
121
|
+
# {
|
122
|
+
# "position": 5,
|
123
|
+
# "force_prefix": false,
|
124
|
+
# "value": " W"
|
125
|
+
# },
|
126
|
+
# {
|
127
|
+
# "position": 6,
|
128
|
+
# "force_prefix": false,
|
129
|
+
# "value": "Wo"
|
130
|
+
# },
|
131
|
+
# {
|
132
|
+
# "position": 7,
|
133
|
+
# "force_prefix": false,
|
134
|
+
# "value": "or"
|
135
|
+
# },
|
136
|
+
# {
|
137
|
+
# "position": 8,
|
138
|
+
# "force_prefix": false,
|
139
|
+
# "value": "rl"
|
140
|
+
# },
|
141
|
+
# {
|
142
|
+
# "position": 9,
|
143
|
+
# "force_prefix": false,
|
144
|
+
# "value": "ld"
|
145
|
+
# },
|
146
|
+
# {
|
147
|
+
# "position": 10,
|
148
|
+
# "force_prefix": false,
|
149
|
+
# "value": "d"
|
150
|
+
# }
|
151
|
+
# ]
|
152
|
+
# ]
|
153
|
+
</pre></div>
|
154
|
+
</div>
|
155
|
+
<p>「トークナイズ」はテキストから0個以上のトークンを抽出する処理です。「トークナイズ」する方法はいくつかあります。</p>
|
156
|
+
<p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
|
157
|
+
<blockquote>
|
158
|
+
<div><ul class="simple">
|
159
|
+
<li><p><code class="docutils literal notranslate"><span class="pre">He</span></code></p></li>
|
160
|
+
<li><p><code class="docutils literal notranslate"><span class="pre">el</span></code></p></li>
|
161
|
+
<li><p><code class="docutils literal notranslate"><span class="pre">ll</span></code></p></li>
|
162
|
+
<li><p><code class="docutils literal notranslate"><span class="pre">lo</span></code></p></li>
|
163
|
+
<li><p><code class="docutils literal notranslate"><span class="pre">o_</span></code> ( <code class="docutils literal notranslate"><span class="pre">_</span></code> は空白文字という意味)</p></li>
|
164
|
+
<li><p><code class="docutils literal notranslate"><span class="pre">_W</span></code> ( <code class="docutils literal notranslate"><span class="pre">_</span></code> は空白文字という意味)</p></li>
|
165
|
+
<li><p><code class="docutils literal notranslate"><span class="pre">Wo</span></code></p></li>
|
166
|
+
<li><p><code class="docutils literal notranslate"><span class="pre">or</span></code></p></li>
|
167
|
+
<li><p><code class="docutils literal notranslate"><span class="pre">rl</span></code></p></li>
|
168
|
+
<li><p><code class="docutils literal notranslate"><span class="pre">ld</span></code></p></li>
|
169
|
+
</ul>
|
170
|
+
</div></blockquote>
|
171
|
+
<p>上記の例では、 <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> から10個のトークンを抽出しました。</p>
|
172
|
+
<p>例えば、空白区切りのトークナイズ方法では <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
|
173
|
+
<blockquote>
|
174
|
+
<div><ul class="simple">
|
175
|
+
<li><p><code class="docutils literal notranslate"><span class="pre">Hello</span></code></p></li>
|
176
|
+
<li><p><code class="docutils literal notranslate"><span class="pre">World</span></code></p></li>
|
177
|
+
</ul>
|
178
|
+
</div></blockquote>
|
179
|
+
<p>上記の例では、<code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> から2つのトークンを抽出しました。</p>
|
180
|
+
<p>トークンは検索時のキーとして使われます。使用したトークナイズ方法で抽出したトークンでしかインデックス化されたドキュメントを探すことはできません。例えば、トークナイズ方法としてバイグラムを使った場合は <code class="docutils literal notranslate"><span class="pre">ll</span></code> で <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることができます。しかし、空白区切りのトークナイズ方法を使ったときは <code class="docutils literal notranslate"><span class="pre">ll</span></code> で <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることはできません。なぜなら、空白区切りのトークナイズ方法は <code class="docutils literal notranslate"><span class="pre">ll</span></code> というトークンを抽出していないからです。空白区切りのトークナイズ方法は <code class="docutils literal notranslate"><span class="pre">Hello</span></code> というトークンと <code class="docutils literal notranslate"><span class="pre">World</span></code> というトークンしか抽出していません。</p>
|
181
|
+
<p>一般的に、小さいトークンを生成するトークナイズ方法は再現率が高い代わりに適合率が低くなりがちです。一方、大きいトークンを生成するトークナイズ方法は適合率が高い代わりに再現率が低くなりがちです。</p>
|
182
|
+
<p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal notranslate"><span class="pre">or</span></code> で <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> と <code class="docutils literal notranslate"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> を検索できます。しかし、「論理和」を検索したい人にとっては <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> は不要な結果です。これは、適合率が下がったということです。しかし、再現率は上がっています。</p>
|
183
|
+
<p>空白区切りのトークナイズ方法を使った場合は <code class="docutils literal notranslate"><span class="pre">or</span></code> で <code class="docutils literal notranslate"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> だけが見つかります。なぜなら、空白区切りのトークナイズ方法では <code class="docutils literal notranslate"><span class="pre">World</span></code> は <code class="docutils literal notranslate"><span class="pre">World</span></code> という1つのトークンだけにトークナイズされるからです。これは、「論理和」を探したい人にとっては適合率が挙がっています。しかし、 <code class="docutils literal notranslate"><span class="pre">Hello</span> <span class="pre">World</span></code> も <code class="docutils literal notranslate"><span class="pre">or</span></code> を含んでいるのに見つかっていないので再現率が下がっています。</p>
|
184
|
+
</div>
|
185
|
+
|
186
|
+
|
187
|
+
</div>
|
188
|
+
</div>
|
189
|
+
</div>
|
190
|
+
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
191
|
+
<div class="sphinxsidebarwrapper">
|
192
|
+
<h4>前のトピックへ</h4>
|
193
|
+
<p class="topless"><a href="../tokenizers.html"
|
194
|
+
title="前の章へ">7.8. トークナイザー</a></p>
|
195
|
+
<h4>次のトピックへ</h4>
|
196
|
+
<p class="topless"><a href="../tokenizers/token_bigram.html"
|
197
|
+
title="次の章へ">7.8.2. <code class="docutils literal notranslate"><span class="pre">TokenBigram</span></code></a></p>
|
198
|
+
<div id="searchbox" style="display: none" role="search">
|
199
|
+
<h3>クイック検索</h3>
|
200
|
+
<div class="searchformwrapper">
|
201
|
+
<form class="search" action="../../search.html" method="get">
|
202
|
+
<input type="text" name="q" />
|
203
|
+
<input type="submit" value="検索" />
|
204
|
+
</form>
|
205
|
+
</div>
|
206
|
+
</div>
|
207
|
+
<script type="text/javascript">$('#searchbox').show(0);</script>
|
208
|
+
</div>
|
209
|
+
</div>
|
210
|
+
<div class="clearer"></div>
|
211
|
+
</div>
|
212
|
+
<div class="related" role="navigation" aria-label="related navigation">
|
213
|
+
<h3>ナビゲーション</h3>
|
214
|
+
<ul>
|
215
|
+
<li class="right" style="margin-right: 10px">
|
216
|
+
<a href="../../genindex.html" title="総合索引"
|
217
|
+
>索引</a></li>
|
218
|
+
<li class="right" >
|
219
|
+
<a href="../tokenizers/token_bigram.html" title="7.8.2. TokenBigram"
|
220
|
+
>次へ</a> |</li>
|
221
|
+
<li class="right" >
|
222
|
+
<a href="../tokenizers.html" title="7.8. トークナイザー"
|
223
|
+
>前へ</a> |</li>
|
224
|
+
<li class="nav-item nav-item-0"><a href="../../index.html">Groonga v9.0.2ドキュメント</a> »</li>
|
225
|
+
<li class="nav-item nav-item-1"><a href="../../reference.html" >7. リファレンスマニュアル</a> »</li>
|
226
|
+
<li class="nav-item nav-item-2"><a href="../tokenizers.html" >7.8. トークナイザー</a> »</li>
|
227
|
+
</ul>
|
228
|
+
</div>
|
229
|
+
<div class="footer" role="contentinfo">
|
230
|
+
© Copyright 2009-2019, Brazil, Inc.
|
231
|
+
</div>
|
232
|
+
</body>
|
233
|
+
</html>
|
@@ -1,35 +1,27 @@
|
|
1
1
|
|
2
2
|
|
3
|
-
<!DOCTYPE html
|
4
|
-
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
3
|
+
<!DOCTYPE html>
|
5
4
|
|
6
5
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="ja">
|
7
6
|
<head>
|
8
|
-
<meta
|
9
|
-
<title>7.8. トークナイザー — Groonga
|
7
|
+
<meta charset="utf-8" />
|
8
|
+
<title>7.8. トークナイザー — Groonga v9.0.2ドキュメント</title>
|
10
9
|
<link rel="stylesheet" href="../_static/groonga.css" type="text/css" />
|
11
10
|
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
12
|
-
|
13
|
-
|
14
|
-
URL_ROOT: '../',
|
15
|
-
VERSION: '7.1.0-73-g6d02cfa',
|
16
|
-
COLLAPSE_INDEX: false,
|
17
|
-
FILE_SUFFIX: '.html',
|
18
|
-
HAS_SOURCE: false,
|
19
|
-
SOURCELINK_SUFFIX: '.txt'
|
20
|
-
};
|
21
|
-
</script>
|
11
|
+
|
12
|
+
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
22
13
|
<script type="text/javascript" src="../_static/jquery.js"></script>
|
23
14
|
<script type="text/javascript" src="../_static/underscore.js"></script>
|
24
15
|
<script type="text/javascript" src="../_static/doctools.js"></script>
|
16
|
+
<script type="text/javascript" src="../_static/language_data.js"></script>
|
25
17
|
<script type="text/javascript" src="../_static/translations.js"></script>
|
18
|
+
|
26
19
|
<link rel="shortcut icon" href="../_static/favicon.ico"/>
|
27
20
|
<link rel="index" title="索引" href="../genindex.html" />
|
28
21
|
<link rel="search" title="検索" href="../search.html" />
|
29
|
-
<link rel="next" title="7.
|
30
|
-
<link rel="prev" title="7.7.
|
31
|
-
</head>
|
32
|
-
<body>
|
22
|
+
<link rel="next" title="7.8.1. 概要" href="tokenizer/summary.html" />
|
23
|
+
<link rel="prev" title="7.7.2.3. NormalizerNFKC51" href="normalizers/normalizer_nfkc51.html" />
|
24
|
+
</head><body>
|
33
25
|
<div class="header">
|
34
26
|
<h1 class="title">
|
35
27
|
<a id="top-link" href="../index.html">
|
@@ -54,12 +46,12 @@
|
|
54
46
|
<a href="../genindex.html" title="総合索引"
|
55
47
|
accesskey="I">索引</a></li>
|
56
48
|
<li class="right" >
|
57
|
-
<a href="
|
49
|
+
<a href="tokenizer/summary.html" title="7.8.1. 概要"
|
58
50
|
accesskey="N">次へ</a> |</li>
|
59
51
|
<li class="right" >
|
60
|
-
<a href="normalizers.html" title="7.7.
|
52
|
+
<a href="normalizers/normalizer_nfkc51.html" title="7.7.2.3. NormalizerNFKC51"
|
61
53
|
accesskey="P">前へ</a> |</li>
|
62
|
-
<li class="nav-item nav-item-0"><a href="../index.html">Groonga
|
54
|
+
<li class="nav-item nav-item-0"><a href="../index.html">Groonga v9.0.2ドキュメント</a> »</li>
|
63
55
|
<li class="nav-item nav-item-1"><a href="../reference.html" accesskey="U">7. リファレンスマニュアル</a> »</li>
|
64
56
|
</ul>
|
65
57
|
</div>
|
@@ -71,1297 +63,24 @@
|
|
71
63
|
|
72
64
|
<div class="section" id="tokenizers">
|
73
65
|
<h1>7.8. トークナイザー<a class="headerlink" href="#tokenizers" title="このヘッドラインへのパーマリンク">¶</a></h1>
|
74
|
-
<div class="
|
75
|
-
<
|
76
|
-
<
|
77
|
-
<
|
78
|
-
<
|
79
|
-
<li><
|
80
|
-
<
|
81
|
-
<a class="reference internal
|
82
|
-
<
|
83
|
-
</
|
84
|
-
</li>
|
85
|
-
<li><
|
86
|
-
<
|
87
|
-
<a class="reference internal
|
88
|
-
<
|
89
|
-
</
|
90
|
-
</li>
|
91
|
-
</ul>
|
92
|
-
</div></blockquote>
|
93
|
-
<p>全文検索ではトークナイザーは重要なモジュールです。トークナイザーを変えることで <a class="reference external" href="http://ja.wikipedia.org/wiki/%E6%83%85%E5%A0%B1%E6%A4%9C%E7%B4%A2">適合率と再現率</a> のトレードオフを調整することができます。</p>
|
94
|
-
<p>一般的に <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> が適切なトークナイザーです。トークナイザーについてよく知らない場合は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> を使うことをオススメします。</p>
|
95
|
-
<p><a class="reference internal" href="commands/tokenize.html"><span class="doc">tokenize</span></a> コマンドと <a class="reference internal" href="commands/table_tokenize.html"><span class="doc">table_tokenize</span></a> コマンドを使うことでトークナイザーを試すことができます。 <a class="reference internal" href="commands/tokenize.html"><span class="doc">tokenize</span></a> コマンドを使って <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> トークナイザーを試す例を以下に示します。</p>
|
96
|
-
<p>実行例:</p>
|
97
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "Hello World"
|
98
|
-
# [
|
99
|
-
# [
|
100
|
-
# 0,
|
101
|
-
# 1337566253.89858,
|
102
|
-
# 0.000355720520019531
|
103
|
-
# ],
|
104
|
-
# [
|
105
|
-
# {
|
106
|
-
# "position": 0,
|
107
|
-
# "force_prefix": false,
|
108
|
-
# "value": "He"
|
109
|
-
# },
|
110
|
-
# {
|
111
|
-
# "position": 1,
|
112
|
-
# "force_prefix": false,
|
113
|
-
# "value": "el"
|
114
|
-
# },
|
115
|
-
# {
|
116
|
-
# "position": 2,
|
117
|
-
# "force_prefix": false,
|
118
|
-
# "value": "ll"
|
119
|
-
# },
|
120
|
-
# {
|
121
|
-
# "position": 3,
|
122
|
-
# "force_prefix": false,
|
123
|
-
# "value": "lo"
|
124
|
-
# },
|
125
|
-
# {
|
126
|
-
# "position": 4,
|
127
|
-
# "force_prefix": false,
|
128
|
-
# "value": "o "
|
129
|
-
# },
|
130
|
-
# {
|
131
|
-
# "position": 5,
|
132
|
-
# "force_prefix": false,
|
133
|
-
# "value": " W"
|
134
|
-
# },
|
135
|
-
# {
|
136
|
-
# "position": 6,
|
137
|
-
# "force_prefix": false,
|
138
|
-
# "value": "Wo"
|
139
|
-
# },
|
140
|
-
# {
|
141
|
-
# "position": 7,
|
142
|
-
# "force_prefix": false,
|
143
|
-
# "value": "or"
|
144
|
-
# },
|
145
|
-
# {
|
146
|
-
# "position": 8,
|
147
|
-
# "force_prefix": false,
|
148
|
-
# "value": "rl"
|
149
|
-
# },
|
150
|
-
# {
|
151
|
-
# "position": 9,
|
152
|
-
# "force_prefix": false,
|
153
|
-
# "value": "ld"
|
154
|
-
# },
|
155
|
-
# {
|
156
|
-
# "position": 10,
|
157
|
-
# "force_prefix": false,
|
158
|
-
# "value": "d"
|
159
|
-
# }
|
160
|
-
# ]
|
161
|
-
# ]
|
162
|
-
</pre></div>
|
163
|
-
</div>
|
164
|
-
</div>
|
165
|
-
<div class="section" id="what-is-tokenize">
|
166
|
-
<h2>7.8.2. 「トークナイズ」とはなにか<a class="headerlink" href="#what-is-tokenize" title="このヘッドラインへのパーマリンク">¶</a></h2>
|
167
|
-
<p>「トークナイズ」はテキストから0個以上のトークンを抽出する処理です。「トークナイズ」する方法はいくつかあります。</p>
|
168
|
-
<p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
|
169
|
-
<blockquote>
|
170
|
-
<div><ul class="simple">
|
171
|
-
<li><code class="docutils literal"><span class="pre">He</span></code></li>
|
172
|
-
<li><code class="docutils literal"><span class="pre">el</span></code></li>
|
173
|
-
<li><code class="docutils literal"><span class="pre">ll</span></code></li>
|
174
|
-
<li><code class="docutils literal"><span class="pre">lo</span></code></li>
|
175
|
-
<li><code class="docutils literal"><span class="pre">o_</span></code> ( <code class="docutils literal"><span class="pre">_</span></code> は空白文字という意味)</li>
|
176
|
-
<li><code class="docutils literal"><span class="pre">_W</span></code> ( <code class="docutils literal"><span class="pre">_</span></code> は空白文字という意味)</li>
|
177
|
-
<li><code class="docutils literal"><span class="pre">Wo</span></code></li>
|
178
|
-
<li><code class="docutils literal"><span class="pre">or</span></code></li>
|
179
|
-
<li><code class="docutils literal"><span class="pre">rl</span></code></li>
|
180
|
-
<li><code class="docutils literal"><span class="pre">ld</span></code></li>
|
181
|
-
</ul>
|
182
|
-
</div></blockquote>
|
183
|
-
<p>上記の例では、 <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> から10個のトークンを抽出しました。</p>
|
184
|
-
<p>例えば、空白区切りのトークナイズ方法では <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は次のトークンにトークナイズされます。</p>
|
185
|
-
<blockquote>
|
186
|
-
<div><ul class="simple">
|
187
|
-
<li><code class="docutils literal"><span class="pre">Hello</span></code></li>
|
188
|
-
<li><code class="docutils literal"><span class="pre">World</span></code></li>
|
189
|
-
</ul>
|
190
|
-
</div></blockquote>
|
191
|
-
<p>上記の例では、<code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> から2つのトークンを抽出しました。</p>
|
192
|
-
<p>トークンは検索時のキーとして使われます。使用したトークナイズ方法で抽出したトークンでしかインデックス化されたドキュメントを探すことはできません。例えば、トークナイズ方法としてバイグラムを使った場合は <code class="docutils literal"><span class="pre">ll</span></code> で <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることができます。しかし、空白区切りのトークナイズ方法を使ったときは <code class="docutils literal"><span class="pre">ll</span></code> で <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> を見つけることはできません。なぜなら、空白区切りのトークナイズ方法は <code class="docutils literal"><span class="pre">ll</span></code> というトークンを抽出していないからです。空白区切りのトークナイズ方法は <code class="docutils literal"><span class="pre">Hello</span></code> というトークンと <code class="docutils literal"><span class="pre">World</span></code> というトークンしか抽出していません。</p>
|
193
|
-
<p>一般的に、小さいトークンを生成するトークナイズ方法は再現率が高い代わりに適合率が低くなりがちです。一方、大きいトークンを生成するトークナイズ方法は適合率が高い代わりに再現率が低くなりがちです。</p>
|
194
|
-
<p>例えば、バイグラムというトークナイズ方法では <code class="docutils literal"><span class="pre">or</span></code> で <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> と <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> を検索できます。しかし、「論理和」を検索したい人にとっては <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は不要な結果です。これは、適合率が下がったということです。しかし、再現率は上がっています。</p>
|
195
|
-
<p>空白区切りのトークナイズ方法を使った場合は <code class="docutils literal"><span class="pre">or</span></code> で <code class="docutils literal"><span class="pre">A</span> <span class="pre">or</span> <span class="pre">B</span></code> だけが見つかります。なぜなら、空白区切りのトークナイズ方法では <code class="docutils literal"><span class="pre">World</span></code> は <code class="docutils literal"><span class="pre">World</span></code> という1つのトークンだけにトークナイズされるからです。これは、「論理和」を探したい人にとっては適合率が挙がっています。しかし、 <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> も <code class="docutils literal"><span class="pre">or</span></code> を含んでいるのに見つかっていないので再現率が下がっています。</p>
|
196
|
-
</div>
|
197
|
-
<div class="section" id="built-in-tokenizsers">
|
198
|
-
<h2>7.8.3. 組み込みトークナイザー<a class="headerlink" href="#built-in-tokenizsers" title="このヘッドラインへのパーマリンク">¶</a></h2>
|
199
|
-
<p>以下は組み込みのトークナイザーのリストです。</p>
|
200
|
-
<blockquote>
|
201
|
-
<div><ul class="simple">
|
202
|
-
<li><code class="docutils literal"><span class="pre">TokenBigram</span></code></li>
|
203
|
-
<li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></li>
|
204
|
-
<li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></li>
|
205
|
-
<li><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></li>
|
206
|
-
<li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></li>
|
207
|
-
<li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></li>
|
208
|
-
<li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></li>
|
209
|
-
<li><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></li>
|
210
|
-
<li><code class="docutils literal"><span class="pre">TokenUnigram</span></code></li>
|
211
|
-
<li><code class="docutils literal"><span class="pre">TokenTrigram</span></code></li>
|
212
|
-
<li><code class="docutils literal"><span class="pre">TokenDelimit</span></code></li>
|
213
|
-
<li><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></li>
|
214
|
-
<li><code class="docutils literal"><span class="pre">TokenMecab</span></code></li>
|
215
|
-
<li><code class="docutils literal"><span class="pre">TokenRegexp</span></code></li>
|
216
|
-
</ul>
|
217
|
-
</div></blockquote>
|
218
|
-
<div class="section" id="tokenbigram">
|
219
|
-
<span id="token-bigram"></span><h3>7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code><a class="headerlink" href="#tokenbigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
220
|
-
<p><code class="docutils literal"><span class="pre">TokenBigram</span></code> はバイグラムベースのトークナイザーです。多くのケースでは、このトークナイザーを使うことをオススメします。</p>
|
221
|
-
<p>バイグラムというトークナイズ方法は、隣り合った2つの文字を1つのトークンとしてテキストをトークナイズします。例えば、 <code class="docutils literal"><span class="pre">Hello</span></code> は次のトークンにトークナイズします。</p>
|
222
|
-
<blockquote>
|
223
|
-
<div><ul class="simple">
|
224
|
-
<li><code class="docutils literal"><span class="pre">He</span></code></li>
|
225
|
-
<li><code class="docutils literal"><span class="pre">el</span></code></li>
|
226
|
-
<li><code class="docutils literal"><span class="pre">ll</span></code></li>
|
227
|
-
<li><code class="docutils literal"><span class="pre">lo</span></code></li>
|
228
|
-
</ul>
|
229
|
-
</div></blockquote>
|
230
|
-
<p>バイグラムというトークナイズ方法は再現性に優れています。なぜなら、2文字以上の文字のクエリーに対してはすべてのテキストを見つけることができるからです。</p>
|
231
|
-
<p>一般的に、1文字のクエリーではすべてのテキストを見つけることはできません。なぜなら、1つの文字のトークンが存在しないからです。しかし、Groongaでは1文字のクエリーでもすべてのテキストを見つけることができます。なぜなら、Groongaは前方一致検索によりクエリーで指定した文字で始まるトークンをすべて見つけることができるからです。例えば、Groongaは <code class="docutils literal"><span class="pre">l</span></code> というクエリーから <code class="docutils literal"><span class="pre">ll</span></code> というトークンと <code class="docutils literal"><span class="pre">lo</span></code> というトークンを見つけることができます。</p>
|
232
|
-
<p>バイグラムというトークナイズ方法は適合率はそれほど優れていません。なぜなら、単語の一部にクエリーが含まれていればすべてのテキストが見つかってしまうからです。例えば、 <code class="docutils literal"><span class="pre">or</span></code> で <code class="docutils literal"><span class="pre">world</span></code> が見つかります。これは非ASCIIを使う言語よりASCIIのみを使う言語で顕著です。以降の説明で触れる通り、 <code class="docutils literal"><span class="pre">TokenBigram</span></code> はこの問題を解決しています。</p>
|
233
|
-
<p><code class="docutils literal"><span class="pre">TokenBigram</span></code> の挙動は <a class="reference internal" href="normalizers.html"><span class="doc">ノーマライザー</span></a> を使うかどうかで変わります。</p>
|
234
|
-
<p>ノーマライザーを使っていない場合は <code class="docutils literal"><span class="pre">TokenBigram</span></code> は純粋なバイグラム(最後のトークンをのぞいてすべてのトークンを2文字にする)のトークナイズ方法を使います。</p>
|
235
|
-
<p>実行例:</p>
|
236
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "Hello World"
|
237
|
-
# [
|
238
|
-
# [
|
239
|
-
# 0,
|
240
|
-
# 1337566253.89858,
|
241
|
-
# 0.000355720520019531
|
242
|
-
# ],
|
243
|
-
# [
|
244
|
-
# {
|
245
|
-
# "position": 0,
|
246
|
-
# "force_prefix": false,
|
247
|
-
# "value": "He"
|
248
|
-
# },
|
249
|
-
# {
|
250
|
-
# "position": 1,
|
251
|
-
# "force_prefix": false,
|
252
|
-
# "value": "el"
|
253
|
-
# },
|
254
|
-
# {
|
255
|
-
# "position": 2,
|
256
|
-
# "force_prefix": false,
|
257
|
-
# "value": "ll"
|
258
|
-
# },
|
259
|
-
# {
|
260
|
-
# "position": 3,
|
261
|
-
# "force_prefix": false,
|
262
|
-
# "value": "lo"
|
263
|
-
# },
|
264
|
-
# {
|
265
|
-
# "position": 4,
|
266
|
-
# "force_prefix": false,
|
267
|
-
# "value": "o "
|
268
|
-
# },
|
269
|
-
# {
|
270
|
-
# "position": 5,
|
271
|
-
# "force_prefix": false,
|
272
|
-
# "value": " W"
|
273
|
-
# },
|
274
|
-
# {
|
275
|
-
# "position": 6,
|
276
|
-
# "force_prefix": false,
|
277
|
-
# "value": "Wo"
|
278
|
-
# },
|
279
|
-
# {
|
280
|
-
# "position": 7,
|
281
|
-
# "force_prefix": false,
|
282
|
-
# "value": "or"
|
283
|
-
# },
|
284
|
-
# {
|
285
|
-
# "position": 8,
|
286
|
-
# "force_prefix": false,
|
287
|
-
# "value": "rl"
|
288
|
-
# },
|
289
|
-
# {
|
290
|
-
# "position": 9,
|
291
|
-
# "force_prefix": false,
|
292
|
-
# "value": "ld"
|
293
|
-
# },
|
294
|
-
# {
|
295
|
-
# "position": 10,
|
296
|
-
# "force_prefix": false,
|
297
|
-
# "value": "d"
|
298
|
-
# }
|
299
|
-
# ]
|
300
|
-
# ]
|
301
|
-
</pre></div>
|
302
|
-
</div>
|
303
|
-
<p>ノーマライザーを使っている場合は <code class="docutils literal"><span class="pre">TokenBigram</span></code> はASCIIの文字には空白区切りのようなトークナイズ方法を使います。非ASCII文字にはバイグラムのトークナイズ方法を使います。</p>
|
304
|
-
<p>もしかしたら、複数の方法が混ざったこの挙動はわかりにくいかもしれません。しかし、英語のテキスト(ASCII文字列のみ)や日本語テキスト(ASCII文字列と非ASCII文字列が混ざっている)ような多くのユースケースでは合理的な方法です。</p>
|
305
|
-
<p>ASCII文字しか使わない多くの言語は単語の区切りに空白文字を使っています。このようなケースに空白区切りのトークナイズ方法は適切です。</p>
|
306
|
-
<p>非ASCII文字を使う言語では単語の区切りに空白文字を使いません。このケースにはバイグラムなトークナイズ方法は適切です。</p>
|
307
|
-
<p>複数の言語が混ざっている場合は、複数の方法を組み合わせたトークナイズ方法が適切です。</p>
|
308
|
-
<p>ASCII文字にバイグラムなトークナイズ方法を使いたい場合は <a class="reference internal" href="#token-bigram-split-symbol-alpha"><span class="std std-ref">TokenBigramSplitSymbolAlpha</span></a> のような <code class="docutils literal"><span class="pre">TokenBigramSplitXXX</span></code> というトークナイザーを参照してください。</p>
|
309
|
-
<p>例を使いながら <code class="docutils literal"><span class="pre">TokenBigram</span></code> の挙動を確認しましょう。</p>
|
310
|
-
<p><code class="docutils literal"><span class="pre">TokenBigram</span></code> はASCII文字には1つ以上の空白文字をトークンの区切りとして使います。</p>
|
311
|
-
<p>実行例:</p>
|
312
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "Hello World" NormalizerAuto
|
313
|
-
# [
|
314
|
-
# [
|
315
|
-
# 0,
|
316
|
-
# 1337566253.89858,
|
317
|
-
# 0.000355720520019531
|
318
|
-
# ],
|
319
|
-
# [
|
320
|
-
# {
|
321
|
-
# "position": 0,
|
322
|
-
# "force_prefix": false,
|
323
|
-
# "value": "hello"
|
324
|
-
# },
|
325
|
-
# {
|
326
|
-
# "position": 1,
|
327
|
-
# "force_prefix": false,
|
328
|
-
# "value": "world"
|
329
|
-
# }
|
330
|
-
# ]
|
331
|
-
# ]
|
332
|
-
</pre></div>
|
333
|
-
</div>
|
334
|
-
<p><code class="docutils literal"><span class="pre">TokenBigram</span></code> はASCII文字には文字の種類が変わったところをトークンの区切りとします。文字の種類は次のどれかです。</p>
|
335
|
-
<blockquote>
|
336
|
-
<div><ul class="simple">
|
337
|
-
<li>アルファベット</li>
|
338
|
-
<li>数字</li>
|
339
|
-
<li>記号(たとえば <code class="docutils literal"><span class="pre">(</span></code> 、 <code class="docutils literal"><span class="pre">)</span></code> 、 <code class="docutils literal"><span class="pre">!</span></code> など)</li>
|
340
|
-
<li>ひらがな</li>
|
341
|
-
<li>カタカナ</li>
|
342
|
-
<li>漢字</li>
|
343
|
-
<li>その他</li>
|
344
|
-
</ul>
|
345
|
-
</div></blockquote>
|
346
|
-
<p>次の例は2つのトークン区切りを示しています。</p>
|
347
|
-
<blockquote>
|
348
|
-
<div><ul class="simple">
|
349
|
-
<li><code class="docutils literal"><span class="pre">100</span></code> (数字)と <code class="docutils literal"><span class="pre">cents</span></code> (アルファベット)の間のところ</li>
|
350
|
-
<li><code class="docutils literal"><span class="pre">cents</span></code> (アルファベット)と <code class="docutils literal"><span class="pre">!!!</span></code> (記号)の間のところ</li>
|
351
|
-
</ul>
|
352
|
-
</div></blockquote>
|
353
|
-
<p>実行例:</p>
|
354
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "100cents!!!" NormalizerAuto
|
355
|
-
# [
|
356
|
-
# [
|
357
|
-
# 0,
|
358
|
-
# 1337566253.89858,
|
359
|
-
# 0.000355720520019531
|
360
|
-
# ],
|
361
|
-
# [
|
362
|
-
# {
|
363
|
-
# "position": 0,
|
364
|
-
# "force_prefix": false,
|
365
|
-
# "value": "100"
|
366
|
-
# },
|
367
|
-
# {
|
368
|
-
# "position": 1,
|
369
|
-
# "force_prefix": false,
|
370
|
-
# "value": "cents"
|
371
|
-
# },
|
372
|
-
# {
|
373
|
-
# "position": 2,
|
374
|
-
# "force_prefix": false,
|
375
|
-
# "value": "!!!"
|
376
|
-
# }
|
377
|
-
# ]
|
378
|
-
# ]
|
379
|
-
</pre></div>
|
380
|
-
</div>
|
381
|
-
<p>以下は <code class="docutils literal"><span class="pre">TokenBigram</span></code> が非ASCII文字にはトークナイズ方法としてバイグラムを使う例です。</p>
|
382
|
-
<p>実行例:</p>
|
383
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "日本語の勉強" NormalizerAuto
|
384
|
-
# [
|
385
|
-
# [
|
386
|
-
# 0,
|
387
|
-
# 1337566253.89858,
|
388
|
-
# 0.000355720520019531
|
389
|
-
# ],
|
390
|
-
# [
|
391
|
-
# {
|
392
|
-
# "position": 0,
|
393
|
-
# "force_prefix": false,
|
394
|
-
# "value": "日本"
|
395
|
-
# },
|
396
|
-
# {
|
397
|
-
# "position": 1,
|
398
|
-
# "force_prefix": false,
|
399
|
-
# "value": "本語"
|
400
|
-
# },
|
401
|
-
# {
|
402
|
-
# "position": 2,
|
403
|
-
# "force_prefix": false,
|
404
|
-
# "value": "語の"
|
405
|
-
# },
|
406
|
-
# {
|
407
|
-
# "position": 3,
|
408
|
-
# "force_prefix": false,
|
409
|
-
# "value": "の勉"
|
410
|
-
# },
|
411
|
-
# {
|
412
|
-
# "position": 4,
|
413
|
-
# "force_prefix": false,
|
414
|
-
# "value": "勉強"
|
415
|
-
# },
|
416
|
-
# {
|
417
|
-
# "position": 5,
|
418
|
-
# "force_prefix": false,
|
419
|
-
# "value": "強"
|
420
|
-
# }
|
421
|
-
# ]
|
422
|
-
# ]
|
423
|
-
</pre></div>
|
424
|
-
</div>
|
425
|
-
</div>
|
426
|
-
<div class="section" id="tokenbigramsplitsymbol">
|
427
|
-
<span id="token-bigram-split-symbol"></span><h3>7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code><a class="headerlink" href="#tokenbigramsplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
428
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは記号の扱いです。 <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code> は記号のトークナイズ方法にバイグラムを使います。</p>
|
429
|
-
<p>実行例:</p>
|
430
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbol "100cents!!!" NormalizerAuto
|
431
|
-
# [
|
432
|
-
# [
|
433
|
-
# 0,
|
434
|
-
# 1337566253.89858,
|
435
|
-
# 0.000355720520019531
|
436
|
-
# ],
|
437
|
-
# [
|
438
|
-
# {
|
439
|
-
# "position": 0,
|
440
|
-
# "force_prefix": false,
|
441
|
-
# "value": "100"
|
442
|
-
# },
|
443
|
-
# {
|
444
|
-
# "position": 1,
|
445
|
-
# "force_prefix": false,
|
446
|
-
# "value": "cents"
|
447
|
-
# },
|
448
|
-
# {
|
449
|
-
# "position": 2,
|
450
|
-
# "force_prefix": false,
|
451
|
-
# "value": "!!"
|
452
|
-
# },
|
453
|
-
# {
|
454
|
-
# "position": 3,
|
455
|
-
# "force_prefix": false,
|
456
|
-
# "value": "!!"
|
457
|
-
# },
|
458
|
-
# {
|
459
|
-
# "position": 4,
|
460
|
-
# "force_prefix": false,
|
461
|
-
# "value": "!"
|
462
|
-
# }
|
463
|
-
# ]
|
464
|
-
# ]
|
465
|
-
</pre></div>
|
466
|
-
</div>
|
467
|
-
</div>
|
468
|
-
<div class="section" id="tokenbigramsplitsymbolalpha">
|
469
|
-
<span id="token-bigram-split-symbol-alpha"></span><h3>7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
470
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは記号とアルファベットの扱いです。 <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code> は記号とアルファベットのトークナイズ方法にバイグラムを使います。</p>
|
471
|
-
<p>実行例:</p>
|
472
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbolAlpha "100cents!!!" NormalizerAuto
|
473
|
-
# [
|
474
|
-
# [
|
475
|
-
# 0,
|
476
|
-
# 1337566253.89858,
|
477
|
-
# 0.000355720520019531
|
478
|
-
# ],
|
479
|
-
# [
|
480
|
-
# {
|
481
|
-
# "position": 0,
|
482
|
-
# "force_prefix": false,
|
483
|
-
# "value": "100"
|
484
|
-
# },
|
485
|
-
# {
|
486
|
-
# "position": 1,
|
487
|
-
# "force_prefix": false,
|
488
|
-
# "value": "ce"
|
489
|
-
# },
|
490
|
-
# {
|
491
|
-
# "position": 2,
|
492
|
-
# "force_prefix": false,
|
493
|
-
# "value": "en"
|
494
|
-
# },
|
495
|
-
# {
|
496
|
-
# "position": 3,
|
497
|
-
# "force_prefix": false,
|
498
|
-
# "value": "nt"
|
499
|
-
# },
|
500
|
-
# {
|
501
|
-
# "position": 4,
|
502
|
-
# "force_prefix": false,
|
503
|
-
# "value": "ts"
|
504
|
-
# },
|
505
|
-
# {
|
506
|
-
# "position": 5,
|
507
|
-
# "force_prefix": false,
|
508
|
-
# "value": "s!"
|
509
|
-
# },
|
510
|
-
# {
|
511
|
-
# "position": 6,
|
512
|
-
# "force_prefix": false,
|
513
|
-
# "value": "!!"
|
514
|
-
# },
|
515
|
-
# {
|
516
|
-
# "position": 7,
|
517
|
-
# "force_prefix": false,
|
518
|
-
# "value": "!!"
|
519
|
-
# },
|
520
|
-
# {
|
521
|
-
# "position": 8,
|
522
|
-
# "force_prefix": false,
|
523
|
-
# "value": "!"
|
524
|
-
# }
|
525
|
-
# ]
|
526
|
-
# ]
|
527
|
-
</pre></div>
|
528
|
-
</div>
|
529
|
-
</div>
|
530
|
-
<div class="section" id="tokenbigramsplitsymbolalphadigit">
|
531
|
-
<span id="token-bigram-split-symbol-alpha-digit"></span><h3>7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramsplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
532
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは記号とアルファベットと数字の扱いです。 <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code> は記号とアルファベット数字のトークナイズ方法にバイグラムを使います。つまり、すべての文字をバイグラムでトークナイズします。</p>
|
533
|
-
<p>実行例:</p>
|
534
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramSplitSymbolAlphaDigit "100cents!!!" NormalizerAuto
|
535
|
-
# [
|
536
|
-
# [
|
537
|
-
# 0,
|
538
|
-
# 1337566253.89858,
|
539
|
-
# 0.000355720520019531
|
540
|
-
# ],
|
541
|
-
# [
|
542
|
-
# {
|
543
|
-
# "position": 0,
|
544
|
-
# "force_prefix": false,
|
545
|
-
# "value": "10"
|
546
|
-
# },
|
547
|
-
# {
|
548
|
-
# "position": 1,
|
549
|
-
# "force_prefix": false,
|
550
|
-
# "value": "00"
|
551
|
-
# },
|
552
|
-
# {
|
553
|
-
# "position": 2,
|
554
|
-
# "force_prefix": false,
|
555
|
-
# "value": "0c"
|
556
|
-
# },
|
557
|
-
# {
|
558
|
-
# "position": 3,
|
559
|
-
# "force_prefix": false,
|
560
|
-
# "value": "ce"
|
561
|
-
# },
|
562
|
-
# {
|
563
|
-
# "position": 4,
|
564
|
-
# "force_prefix": false,
|
565
|
-
# "value": "en"
|
566
|
-
# },
|
567
|
-
# {
|
568
|
-
# "position": 5,
|
569
|
-
# "force_prefix": false,
|
570
|
-
# "value": "nt"
|
571
|
-
# },
|
572
|
-
# {
|
573
|
-
# "position": 6,
|
574
|
-
# "force_prefix": false,
|
575
|
-
# "value": "ts"
|
576
|
-
# },
|
577
|
-
# {
|
578
|
-
# "position": 7,
|
579
|
-
# "force_prefix": false,
|
580
|
-
# "value": "s!"
|
581
|
-
# },
|
582
|
-
# {
|
583
|
-
# "position": 8,
|
584
|
-
# "force_prefix": false,
|
585
|
-
# "value": "!!"
|
586
|
-
# },
|
587
|
-
# {
|
588
|
-
# "position": 9,
|
589
|
-
# "force_prefix": false,
|
590
|
-
# "value": "!!"
|
591
|
-
# },
|
592
|
-
# {
|
593
|
-
# "position": 10,
|
594
|
-
# "force_prefix": false,
|
595
|
-
# "value": "!"
|
596
|
-
# }
|
597
|
-
# ]
|
598
|
-
# ]
|
599
|
-
</pre></div>
|
600
|
-
</div>
|
601
|
-
</div>
|
602
|
-
<div class="section" id="tokenbigramignoreblank">
|
603
|
-
<span id="token-bigram-ignore-blank"></span><h3>7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code><a class="headerlink" href="#tokenbigramignoreblank" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
604
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは空白文字の扱いです。 <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> は連続する記号と非ASCII文字の間にある空白文字を無視します。</p>
|
605
|
-
<p><code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
|
606
|
-
<p><a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> での実行結果です。</p>
|
607
|
-
<p>実行例:</p>
|
608
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
|
609
|
-
# [
|
610
|
-
# [
|
611
|
-
# 0,
|
612
|
-
# 1337566253.89858,
|
613
|
-
# 0.000355720520019531
|
614
|
-
# ],
|
615
|
-
# [
|
616
|
-
# {
|
617
|
-
# "position": 0,
|
618
|
-
# "force_prefix": false,
|
619
|
-
# "value": "日"
|
620
|
-
# },
|
621
|
-
# {
|
622
|
-
# "position": 1,
|
623
|
-
# "force_prefix": false,
|
624
|
-
# "value": "本"
|
625
|
-
# },
|
626
|
-
# {
|
627
|
-
# "position": 2,
|
628
|
-
# "force_prefix": false,
|
629
|
-
# "value": "語"
|
630
|
-
# },
|
631
|
-
# {
|
632
|
-
# "position": 3,
|
633
|
-
# "force_prefix": false,
|
634
|
-
# "value": "!"
|
635
|
-
# },
|
636
|
-
# {
|
637
|
-
# "position": 4,
|
638
|
-
# "force_prefix": false,
|
639
|
-
# "value": "!"
|
640
|
-
# },
|
641
|
-
# {
|
642
|
-
# "position": 5,
|
643
|
-
# "force_prefix": false,
|
644
|
-
# "value": "!"
|
645
|
-
# }
|
646
|
-
# ]
|
647
|
-
# ]
|
648
|
-
</pre></div>
|
649
|
-
</div>
|
650
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code> での実行結果です。</p>
|
651
|
-
<p>実行例:</p>
|
652
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlank "日 本 語 ! ! !" NormalizerAuto
|
653
|
-
# [
|
654
|
-
# [
|
655
|
-
# 0,
|
656
|
-
# 1337566253.89858,
|
657
|
-
# 0.000355720520019531
|
658
|
-
# ],
|
659
|
-
# [
|
660
|
-
# {
|
661
|
-
# "position": 0,
|
662
|
-
# "force_prefix": false,
|
663
|
-
# "value": "日本"
|
664
|
-
# },
|
665
|
-
# {
|
666
|
-
# "position": 1,
|
667
|
-
# "force_prefix": false,
|
668
|
-
# "value": "本語"
|
669
|
-
# },
|
670
|
-
# {
|
671
|
-
# "position": 2,
|
672
|
-
# "force_prefix": false,
|
673
|
-
# "value": "語"
|
674
|
-
# },
|
675
|
-
# {
|
676
|
-
# "position": 3,
|
677
|
-
# "force_prefix": false,
|
678
|
-
# "value": "!!!"
|
679
|
-
# }
|
680
|
-
# ]
|
681
|
-
# ]
|
682
|
-
</pre></div>
|
683
|
-
</div>
|
684
|
-
</div>
|
685
|
-
<div class="section" id="tokenbigramignoreblanksplitsymbol">
|
686
|
-
<span id="token-bigram-ignore-blank-split-symbol"></span><h3>7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbol" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
687
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは次の通りです。</p>
|
688
|
-
<blockquote>
|
689
|
-
<div><ul class="simple">
|
690
|
-
<li>空白文字の扱い</li>
|
691
|
-
<li>記号の扱い</li>
|
692
|
-
</ul>
|
693
|
-
</div></blockquote>
|
694
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
|
695
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> は記号をバイグラムでトークナイズします。</p>
|
696
|
-
<p><code class="docutils literal"><span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> というテキストを使うと違いがわかります。なぜならこのテキストは記号と非ASCII文字を両方含んでいるからです。</p>
|
697
|
-
<p><a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> での実行結果です。</p>
|
698
|
-
<p>実行例:</p>
|
699
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "日 本 語 ! ! !" NormalizerAuto
|
700
|
-
# [
|
701
|
-
# [
|
702
|
-
# 0,
|
703
|
-
# 1337566253.89858,
|
704
|
-
# 0.000355720520019531
|
705
|
-
# ],
|
706
|
-
# [
|
707
|
-
# {
|
708
|
-
# "position": 0,
|
709
|
-
# "force_prefix": false,
|
710
|
-
# "value": "日"
|
711
|
-
# },
|
712
|
-
# {
|
713
|
-
# "position": 1,
|
714
|
-
# "force_prefix": false,
|
715
|
-
# "value": "本"
|
716
|
-
# },
|
717
|
-
# {
|
718
|
-
# "position": 2,
|
719
|
-
# "force_prefix": false,
|
720
|
-
# "value": "語"
|
721
|
-
# },
|
722
|
-
# {
|
723
|
-
# "position": 3,
|
724
|
-
# "force_prefix": false,
|
725
|
-
# "value": "!"
|
726
|
-
# },
|
727
|
-
# {
|
728
|
-
# "position": 4,
|
729
|
-
# "force_prefix": false,
|
730
|
-
# "value": "!"
|
731
|
-
# },
|
732
|
-
# {
|
733
|
-
# "position": 5,
|
734
|
-
# "force_prefix": false,
|
735
|
-
# "value": "!"
|
736
|
-
# }
|
737
|
-
# ]
|
738
|
-
# ]
|
739
|
-
</pre></div>
|
740
|
-
</div>
|
741
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code> の実行結果です。</p>
|
742
|
-
<p>実行例:</p>
|
743
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbol "日 本 語 ! ! !" NormalizerAuto
|
744
|
-
# [
|
745
|
-
# [
|
746
|
-
# 0,
|
747
|
-
# 1337566253.89858,
|
748
|
-
# 0.000355720520019531
|
749
|
-
# ],
|
750
|
-
# [
|
751
|
-
# {
|
752
|
-
# "position": 0,
|
753
|
-
# "force_prefix": false,
|
754
|
-
# "value": "日本"
|
755
|
-
# },
|
756
|
-
# {
|
757
|
-
# "position": 1,
|
758
|
-
# "force_prefix": false,
|
759
|
-
# "value": "本語"
|
760
|
-
# },
|
761
|
-
# {
|
762
|
-
# "position": 2,
|
763
|
-
# "force_prefix": false,
|
764
|
-
# "value": "語!"
|
765
|
-
# },
|
766
|
-
# {
|
767
|
-
# "position": 3,
|
768
|
-
# "force_prefix": false,
|
769
|
-
# "value": "!!"
|
770
|
-
# },
|
771
|
-
# {
|
772
|
-
# "position": 4,
|
773
|
-
# "force_prefix": false,
|
774
|
-
# "value": "!!"
|
775
|
-
# },
|
776
|
-
# {
|
777
|
-
# "position": 5,
|
778
|
-
# "force_prefix": false,
|
779
|
-
# "value": "!"
|
780
|
-
# }
|
781
|
-
# ]
|
782
|
-
# ]
|
783
|
-
</pre></div>
|
784
|
-
</div>
|
785
|
-
</div>
|
786
|
-
<div class="section" id="tokenbigramignoreblanksplitsymbolalpha">
|
787
|
-
<span id="token-bigram-ignore-blank-split-symbol-alpha"></span><h3>7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalpha" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
788
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは次の通りです。</p>
|
789
|
-
<blockquote>
|
790
|
-
<div><ul class="simple">
|
791
|
-
<li>空白文字の扱い</li>
|
792
|
-
<li>記号とアルファベットの扱い</li>
|
793
|
-
</ul>
|
794
|
-
</div></blockquote>
|
795
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
|
796
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> は記号とアルファベットをバイグラムでトークナイズします。</p>
|
797
|
-
<p><code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span></code> というテキストを使うと違いがわかります。なぜなら空白文字入りの記号と非ASCII文字だけでなく、アルファベットも含んでいるからです。</p>
|
798
|
-
<p><a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> での実行結果です。</p>
|
799
|
-
<p>実行例:</p>
|
800
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "Hello 日 本 語 ! ! !" NormalizerAuto
|
801
|
-
# [
|
802
|
-
# [
|
803
|
-
# 0,
|
804
|
-
# 1337566253.89858,
|
805
|
-
# 0.000355720520019531
|
806
|
-
# ],
|
807
|
-
# [
|
808
|
-
# {
|
809
|
-
# "position": 0,
|
810
|
-
# "force_prefix": false,
|
811
|
-
# "value": "hello"
|
812
|
-
# },
|
813
|
-
# {
|
814
|
-
# "position": 1,
|
815
|
-
# "force_prefix": false,
|
816
|
-
# "value": "日"
|
817
|
-
# },
|
818
|
-
# {
|
819
|
-
# "position": 2,
|
820
|
-
# "force_prefix": false,
|
821
|
-
# "value": "本"
|
822
|
-
# },
|
823
|
-
# {
|
824
|
-
# "position": 3,
|
825
|
-
# "force_prefix": false,
|
826
|
-
# "value": "語"
|
827
|
-
# },
|
828
|
-
# {
|
829
|
-
# "position": 4,
|
830
|
-
# "force_prefix": false,
|
831
|
-
# "value": "!"
|
832
|
-
# },
|
833
|
-
# {
|
834
|
-
# "position": 5,
|
835
|
-
# "force_prefix": false,
|
836
|
-
# "value": "!"
|
837
|
-
# },
|
838
|
-
# {
|
839
|
-
# "position": 6,
|
840
|
-
# "force_prefix": false,
|
841
|
-
# "value": "!"
|
842
|
-
# }
|
843
|
-
# ]
|
844
|
-
# ]
|
845
|
-
</pre></div>
|
846
|
-
</div>
|
847
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code> の実行結果です。</p>
|
848
|
-
<p>実行例:</p>
|
849
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbolAlpha "Hello 日 本 語 ! ! !" NormalizerAuto
|
850
|
-
# [
|
851
|
-
# [
|
852
|
-
# 0,
|
853
|
-
# 1337566253.89858,
|
854
|
-
# 0.000355720520019531
|
855
|
-
# ],
|
856
|
-
# [
|
857
|
-
# {
|
858
|
-
# "position": 0,
|
859
|
-
# "force_prefix": false,
|
860
|
-
# "value": "he"
|
861
|
-
# },
|
862
|
-
# {
|
863
|
-
# "position": 1,
|
864
|
-
# "force_prefix": false,
|
865
|
-
# "value": "el"
|
866
|
-
# },
|
867
|
-
# {
|
868
|
-
# "position": 2,
|
869
|
-
# "force_prefix": false,
|
870
|
-
# "value": "ll"
|
871
|
-
# },
|
872
|
-
# {
|
873
|
-
# "position": 3,
|
874
|
-
# "force_prefix": false,
|
875
|
-
# "value": "lo"
|
876
|
-
# },
|
877
|
-
# {
|
878
|
-
# "position": 4,
|
879
|
-
# "force_prefix": false,
|
880
|
-
# "value": "o日"
|
881
|
-
# },
|
882
|
-
# {
|
883
|
-
# "position": 5,
|
884
|
-
# "force_prefix": false,
|
885
|
-
# "value": "日本"
|
886
|
-
# },
|
887
|
-
# {
|
888
|
-
# "position": 6,
|
889
|
-
# "force_prefix": false,
|
890
|
-
# "value": "本語"
|
891
|
-
# },
|
892
|
-
# {
|
893
|
-
# "position": 7,
|
894
|
-
# "force_prefix": false,
|
895
|
-
# "value": "語!"
|
896
|
-
# },
|
897
|
-
# {
|
898
|
-
# "position": 8,
|
899
|
-
# "force_prefix": false,
|
900
|
-
# "value": "!!"
|
901
|
-
# },
|
902
|
-
# {
|
903
|
-
# "position": 9,
|
904
|
-
# "force_prefix": false,
|
905
|
-
# "value": "!!"
|
906
|
-
# },
|
907
|
-
# {
|
908
|
-
# "position": 10,
|
909
|
-
# "force_prefix": false,
|
910
|
-
# "value": "!"
|
911
|
-
# }
|
912
|
-
# ]
|
913
|
-
# ]
|
914
|
-
</pre></div>
|
915
|
-
</div>
|
916
|
-
</div>
|
917
|
-
<div class="section" id="tokenbigramignoreblanksplitsymbolalphadigit">
|
918
|
-
<span id="token-bigram-ignore-blank-split-symbol-alpha-digit"></span><h3>7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code><a class="headerlink" href="#tokenbigramignoreblanksplitsymbolalphadigit" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
919
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> と似ています。違いは次の通りです。</p>
|
920
|
-
<blockquote>
|
921
|
-
<div><ul class="simple">
|
922
|
-
<li>空白文字の扱い</li>
|
923
|
-
<li>記号とアルファベットと数字の扱い</li>
|
66
|
+
<div class="toctree-wrapper compound">
|
67
|
+
<ul>
|
68
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizer/summary.html">7.8.1. 概要</a></li>
|
69
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram.html">7.8.2. <code class="docutils literal notranslate"><span class="pre">TokenBigram</span></code></a></li>
|
70
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank.html">7.8.3. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
|
71
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol.html">7.8.4. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
|
72
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol_alpha.html">7.8.5. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
|
73
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_ignore_blank_split_symbol_alpha_digit.html">7.8.6. <code class="docutils literal notranslate"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
|
74
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol.html">7.8.7. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
|
75
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol_alpha.html">7.8.8. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
|
76
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_bigram_split_symbol_alpha_digit.html">7.8.9. <code class="docutils literal notranslate"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
|
77
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_delimit.html">7.8.10. <code class="docutils literal notranslate"><span class="pre">TokenDelimit</span></code></a></li>
|
78
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_delimit_null.html">7.8.11. <code class="docutils literal notranslate"><span class="pre">TokenDelimitNull</span></code></a></li>
|
79
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_mecab.html">7.8.12. <code class="docutils literal notranslate"><span class="pre">TokenMecab</span></code></a></li>
|
80
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_regexp.html">7.8.13. <code class="docutils literal notranslate"><span class="pre">TokenRegexp</span></code></a></li>
|
81
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_trigram.html">7.8.14. <code class="docutils literal notranslate"><span class="pre">TokenTrigram</span></code></a></li>
|
82
|
+
<li class="toctree-l1"><a class="reference internal" href="tokenizers/token_unigram.html">7.8.15. <code class="docutils literal notranslate"><span class="pre">TokenUnigram</span></code></a></li>
|
924
83
|
</ul>
|
925
|
-
</div></blockquote>
|
926
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> は連続した記号と非ASCII文字の間の空白文字を無視します。</p>
|
927
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> は記号、アルファベット、数字をバイグラムでトークナイズします。つまり、すべての文字をバイグラムでトークナイズします。</p>
|
928
|
-
<p><code class="docutils literal"><span class="pre">Hello</span> <span class="pre">日</span> <span class="pre">本</span> <span class="pre">語</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">!</span> <span class="pre">777</span></code> というテキストを使うと違いがわかります。なぜなら、このテキストは空白文字入りの記号と非ASCII文字だけでなく、アルファベットと数字も含んでいるからです。</p>
|
929
|
-
<p><a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> での実行結果です。</p>
|
930
|
-
<p>実行例:</p>
|
931
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigram "Hello 日 本 語 ! ! ! 777" NormalizerAuto
|
932
|
-
# [
|
933
|
-
# [
|
934
|
-
# 0,
|
935
|
-
# 1337566253.89858,
|
936
|
-
# 0.000355720520019531
|
937
|
-
# ],
|
938
|
-
# [
|
939
|
-
# {
|
940
|
-
# "position": 0,
|
941
|
-
# "force_prefix": false,
|
942
|
-
# "value": "hello"
|
943
|
-
# },
|
944
|
-
# {
|
945
|
-
# "position": 1,
|
946
|
-
# "force_prefix": false,
|
947
|
-
# "value": "日"
|
948
|
-
# },
|
949
|
-
# {
|
950
|
-
# "position": 2,
|
951
|
-
# "force_prefix": false,
|
952
|
-
# "value": "本"
|
953
|
-
# },
|
954
|
-
# {
|
955
|
-
# "position": 3,
|
956
|
-
# "force_prefix": false,
|
957
|
-
# "value": "語"
|
958
|
-
# },
|
959
|
-
# {
|
960
|
-
# "position": 4,
|
961
|
-
# "force_prefix": false,
|
962
|
-
# "value": "!"
|
963
|
-
# },
|
964
|
-
# {
|
965
|
-
# "position": 5,
|
966
|
-
# "force_prefix": false,
|
967
|
-
# "value": "!"
|
968
|
-
# },
|
969
|
-
# {
|
970
|
-
# "position": 6,
|
971
|
-
# "force_prefix": false,
|
972
|
-
# "value": "!"
|
973
|
-
# },
|
974
|
-
# {
|
975
|
-
# "position": 7,
|
976
|
-
# "force_prefix": false,
|
977
|
-
# "value": "777"
|
978
|
-
# }
|
979
|
-
# ]
|
980
|
-
# ]
|
981
|
-
</pre></div>
|
982
|
-
</div>
|
983
|
-
<p><code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code> の実行結果です。</p>
|
984
|
-
<p>実行例:</p>
|
985
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenBigramIgnoreBlankSplitSymbolAlphaDigit "Hello 日 本 語 ! ! ! 777" NormalizerAuto
|
986
|
-
# [
|
987
|
-
# [
|
988
|
-
# 0,
|
989
|
-
# 1337566253.89858,
|
990
|
-
# 0.000355720520019531
|
991
|
-
# ],
|
992
|
-
# [
|
993
|
-
# {
|
994
|
-
# "position": 0,
|
995
|
-
# "force_prefix": false,
|
996
|
-
# "value": "he"
|
997
|
-
# },
|
998
|
-
# {
|
999
|
-
# "position": 1,
|
1000
|
-
# "force_prefix": false,
|
1001
|
-
# "value": "el"
|
1002
|
-
# },
|
1003
|
-
# {
|
1004
|
-
# "position": 2,
|
1005
|
-
# "force_prefix": false,
|
1006
|
-
# "value": "ll"
|
1007
|
-
# },
|
1008
|
-
# {
|
1009
|
-
# "position": 3,
|
1010
|
-
# "force_prefix": false,
|
1011
|
-
# "value": "lo"
|
1012
|
-
# },
|
1013
|
-
# {
|
1014
|
-
# "position": 4,
|
1015
|
-
# "force_prefix": false,
|
1016
|
-
# "value": "o日"
|
1017
|
-
# },
|
1018
|
-
# {
|
1019
|
-
# "position": 5,
|
1020
|
-
# "force_prefix": false,
|
1021
|
-
# "value": "日本"
|
1022
|
-
# },
|
1023
|
-
# {
|
1024
|
-
# "position": 6,
|
1025
|
-
# "force_prefix": false,
|
1026
|
-
# "value": "本語"
|
1027
|
-
# },
|
1028
|
-
# {
|
1029
|
-
# "position": 7,
|
1030
|
-
# "force_prefix": false,
|
1031
|
-
# "value": "語!"
|
1032
|
-
# },
|
1033
|
-
# {
|
1034
|
-
# "position": 8,
|
1035
|
-
# "force_prefix": false,
|
1036
|
-
# "value": "!!"
|
1037
|
-
# },
|
1038
|
-
# {
|
1039
|
-
# "position": 9,
|
1040
|
-
# "force_prefix": false,
|
1041
|
-
# "value": "!!"
|
1042
|
-
# },
|
1043
|
-
# {
|
1044
|
-
# "position": 10,
|
1045
|
-
# "force_prefix": false,
|
1046
|
-
# "value": "!7"
|
1047
|
-
# },
|
1048
|
-
# {
|
1049
|
-
# "position": 11,
|
1050
|
-
# "force_prefix": false,
|
1051
|
-
# "value": "77"
|
1052
|
-
# },
|
1053
|
-
# {
|
1054
|
-
# "position": 12,
|
1055
|
-
# "force_prefix": false,
|
1056
|
-
# "value": "77"
|
1057
|
-
# },
|
1058
|
-
# {
|
1059
|
-
# "position": 13,
|
1060
|
-
# "force_prefix": false,
|
1061
|
-
# "value": "7"
|
1062
|
-
# }
|
1063
|
-
# ]
|
1064
|
-
# ]
|
1065
|
-
</pre></div>
|
1066
|
-
</div>
|
1067
|
-
</div>
|
1068
|
-
<div class="section" id="tokenunigram">
|
1069
|
-
<span id="token-unigram"></span><h3>7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code><a class="headerlink" href="#tokenunigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
1070
|
-
<p><code class="docutils literal"><span class="pre">TokenUnigram</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> は各トークンが2文字ですが、 <code class="docutils literal"><span class="pre">TokenUnigram</span></code> は各トークンが1文字です。</p>
|
1071
|
-
<p>実行例:</p>
|
1072
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenUnigram "100cents!!!" NormalizerAuto
|
1073
|
-
# [
|
1074
|
-
# [
|
1075
|
-
# 0,
|
1076
|
-
# 1337566253.89858,
|
1077
|
-
# 0.000355720520019531
|
1078
|
-
# ],
|
1079
|
-
# [
|
1080
|
-
# {
|
1081
|
-
# "position": 0,
|
1082
|
-
# "force_prefix": false,
|
1083
|
-
# "value": "100"
|
1084
|
-
# },
|
1085
|
-
# {
|
1086
|
-
# "position": 1,
|
1087
|
-
# "force_prefix": false,
|
1088
|
-
# "value": "cents"
|
1089
|
-
# },
|
1090
|
-
# {
|
1091
|
-
# "position": 2,
|
1092
|
-
# "force_prefix": false,
|
1093
|
-
# "value": "!!!"
|
1094
|
-
# }
|
1095
|
-
# ]
|
1096
|
-
# ]
|
1097
|
-
</pre></div>
|
1098
|
-
</div>
|
1099
|
-
</div>
|
1100
|
-
<div class="section" id="tokentrigram">
|
1101
|
-
<span id="token-trigram"></span><h3>7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code><a class="headerlink" href="#tokentrigram" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
1102
|
-
<p><code class="docutils literal"><span class="pre">TokenTrigram</span></code> は <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> に似ています。違いはトークンの単位です。 <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> は各トークンが2文字ですが、 <code class="docutils literal"><span class="pre">TokenTrigram</span></code> は各トークンが3文字です。</p>
|
1103
|
-
<p>実行例:</p>
|
1104
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenTrigram "10000cents!!!!!" NormalizerAuto
|
1105
|
-
# [
|
1106
|
-
# [
|
1107
|
-
# 0,
|
1108
|
-
# 1337566253.89858,
|
1109
|
-
# 0.000355720520019531
|
1110
|
-
# ],
|
1111
|
-
# [
|
1112
|
-
# {
|
1113
|
-
# "position": 0,
|
1114
|
-
# "force_prefix": false,
|
1115
|
-
# "value": "10000"
|
1116
|
-
# },
|
1117
|
-
# {
|
1118
|
-
# "position": 1,
|
1119
|
-
# "force_prefix": false,
|
1120
|
-
# "value": "cents"
|
1121
|
-
# },
|
1122
|
-
# {
|
1123
|
-
# "position": 2,
|
1124
|
-
# "force_prefix": false,
|
1125
|
-
# "value": "!!!!!"
|
1126
|
-
# }
|
1127
|
-
# ]
|
1128
|
-
# ]
|
1129
|
-
</pre></div>
|
1130
|
-
</div>
|
1131
|
-
</div>
|
1132
|
-
<div class="section" id="tokendelimit">
|
1133
|
-
<span id="token-delimit"></span><h3>7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code><a class="headerlink" href="#tokendelimit" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
1134
|
-
<p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> は1つ以上の空白文字( <code class="docutils literal"><span class="pre">U+0020</span></code> )で分割してトークンを抽出します。たとえば、 <code class="docutils literal"><span class="pre">Hello</span> <span class="pre">World</span></code> は <code class="docutils literal"><span class="pre">Hello</span></code> と <code class="docutils literal"><span class="pre">World</span></code> にトークナイズされます。</p>
|
1135
|
-
<p><code class="docutils literal"><span class="pre">TokenDelimit</span></code> はタグテキストに適切です。 <code class="docutils literal"><span class="pre">groonga</span> <span class="pre">full-text-search</span> <span class="pre">http</span></code> というテキストから <code class="docutils literal"><span class="pre">groonga</span></code> 、 <code class="docutils literal"><span class="pre">full-text-search</span></code> 、 <code class="docutils literal"><span class="pre">http</span></code> を抽出します。</p>
|
1136
|
-
<p>以下は <code class="docutils literal"><span class="pre">TokenDelimit</span></code> の例です。</p>
|
1137
|
-
<p>実行例:</p>
|
1138
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenDelimit "Groonga full-text-search HTTP" NormalizerAuto
|
1139
|
-
# [
|
1140
|
-
# [
|
1141
|
-
# 0,
|
1142
|
-
# 1337566253.89858,
|
1143
|
-
# 0.000355720520019531
|
1144
|
-
# ],
|
1145
|
-
# [
|
1146
|
-
# {
|
1147
|
-
# "position": 0,
|
1148
|
-
# "force_prefix": false,
|
1149
|
-
# "value": "groonga"
|
1150
|
-
# },
|
1151
|
-
# {
|
1152
|
-
# "position": 1,
|
1153
|
-
# "force_prefix": false,
|
1154
|
-
# "value": "full-text-search"
|
1155
|
-
# },
|
1156
|
-
# {
|
1157
|
-
# "position": 2,
|
1158
|
-
# "force_prefix": false,
|
1159
|
-
# "value": "http"
|
1160
|
-
# }
|
1161
|
-
# ]
|
1162
|
-
# ]
|
1163
|
-
</pre></div>
|
1164
|
-
</div>
|
1165
|
-
</div>
|
1166
|
-
<div class="section" id="tokendelimitnull">
|
1167
|
-
<span id="token-delimit-null"></span><h3>7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code><a class="headerlink" href="#tokendelimitnull" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
1168
|
-
<p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> は <a class="reference internal" href="#token-delimit"><span class="std std-ref">TokenDelimit</span></a> に似ています。違いは区切り文字です。 <a class="reference internal" href="#token-delimit"><span class="std std-ref">TokenDelimit</span></a> は空白文字( <code class="docutils literal"><span class="pre">U+0020</span></code> )を使いますが、 <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> はNUL文字( <code class="docutils literal"><span class="pre">U+0000</span></code> )を使います。</p>
|
1169
|
-
<p><code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> もタグテキストに適切です。</p>
|
1170
|
-
<p>以下は <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code> の例です。</p>
|
1171
|
-
<p>実行例:</p>
|
1172
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenDelimitNull "Groonga\u0000full-text-search\u0000HTTP" NormalizerAuto
|
1173
|
-
# [
|
1174
|
-
# [
|
1175
|
-
# 0,
|
1176
|
-
# 1337566253.89858,
|
1177
|
-
# 0.000355720520019531
|
1178
|
-
# ],
|
1179
|
-
# [
|
1180
|
-
# {
|
1181
|
-
# "position": 0,
|
1182
|
-
# "force_prefix": false,
|
1183
|
-
# "value": "groongau0000full-text-searchu0000http"
|
1184
|
-
# }
|
1185
|
-
# ]
|
1186
|
-
# ]
|
1187
|
-
</pre></div>
|
1188
|
-
</div>
|
1189
|
-
</div>
|
1190
|
-
<div class="section" id="tokenmecab">
|
1191
|
-
<span id="token-mecab"></span><h3>7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code><a class="headerlink" href="#tokenmecab" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
1192
|
-
<p><code class="docutils literal"><span class="pre">TokenMecab</span></code> は <a class="reference external" href="https://taku910.github.io/mecab/">MeCab</a> 形態素解析器をベースにしたトークナイザーです。</p>
|
1193
|
-
<p>MeCabは日本語に依存していません。その言語用の辞書を用意すれば日本語以外でもMeCabを使えます。日本語用の辞書には <a class="reference external" href="http://sourceforge.jp/projects/naist-jdic/">NAIST Japanese Dictionary</a> を使えます。</p>
|
1194
|
-
<p><code class="docutils literal"><span class="pre">TokenMecab</span></code> は再現率より適合率に優れています。 <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> では <code class="docutils literal"><span class="pre">京都</span></code> というクエリーで <code class="docutils literal"><span class="pre">東京都</span></code> も <code class="docutils literal"><span class="pre">京都</span></code> も見つかりますが、この場合は <code class="docutils literal"><span class="pre">東京都</span></code> は期待した結果ではありません。 <code class="docutils literal"><span class="pre">TokenMecab</span></code> を使うと <code class="docutils literal"><span class="pre">京都</span></code> というクエリーで <code class="docutils literal"><span class="pre">京都</span></code> だけを見つけられます。</p>
|
1195
|
-
<p>新語をサポートしたい場合は、MeCabの辞書を更新し続ける必要があります。これはメンテナンスコストがかかります。( <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> には辞書のメンテナンスコストはありません。なぜなら、 <a class="reference internal" href="#token-bigram"><span class="std std-ref">TokenBigram</span></a> は辞書を使っていないからです。)新語への対応に <a class="reference external" href="https://github.com/neologd/mecab-ipadic-neologd">mecab-ipadic-NEologd : Neologism dictionary for MeCab</a> が役に立つかもしれません。</p>
|
1196
|
-
<p>以下は <code class="docutils literal"><span class="pre">TokenMeCab</span></code> の例です。 <code class="docutils literal"><span class="pre">東京都</span></code> は <code class="docutils literal"><span class="pre">東京</span></code> と <code class="docutils literal"><span class="pre">都</span></code> にトークナイズされています。 <code class="docutils literal"><span class="pre">京都</span></code> というトークンはありません。</p>
|
1197
|
-
<p>実行例:</p>
|
1198
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenMecab "東京都"
|
1199
|
-
# [
|
1200
|
-
# [
|
1201
|
-
# -22,
|
1202
|
-
# 1337566253.89858,
|
1203
|
-
# 0.000355720520019531,
|
1204
|
-
# "[tokenize] nonexistent tokenizer: <TokenMecab>",
|
1205
|
-
# [
|
1206
|
-
# [
|
1207
|
-
# "create_lexicon_for_tokenize",
|
1208
|
-
# "proc_tokenize.c",
|
1209
|
-
# 139
|
1210
|
-
# ]
|
1211
|
-
# ]
|
1212
|
-
# ]
|
1213
|
-
# ]
|
1214
|
-
</pre></div>
|
1215
|
-
</div>
|
1216
|
-
</div>
|
1217
|
-
<div class="section" id="tokenregexp">
|
1218
|
-
<span id="token-regexp"></span><h3>7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code><a class="headerlink" href="#tokenregexp" title="このヘッドラインへのパーマリンク">¶</a></h3>
|
1219
|
-
<div class="versionadded">
|
1220
|
-
<p><span class="versionmodified">バージョン 5.0.1 で追加.</span></p>
|
1221
|
-
</div>
|
1222
|
-
<div class="admonition caution">
|
1223
|
-
<p class="first admonition-title">ご用心</p>
|
1224
|
-
<p class="last">このトークナイザーは実験的です。仕様が変わる可能性があります。</p>
|
1225
|
-
</div>
|
1226
|
-
<div class="admonition caution">
|
1227
|
-
<p class="first admonition-title">ご用心</p>
|
1228
|
-
<p class="last">このトークナイザーはUTF-8でしか使えません。EUC-JPやShift_JISなどと一緒には使えません。</p>
|
1229
|
-
</div>
|
1230
|
-
<p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> はインデックスを使った正規表現検索をサポートするトークナイザーです。</p>
|
1231
|
-
<p>一般的に、正規表現検索は逐次検索で実行します。しかし、次のケースはインデックスを使って検索できます。</p>
|
1232
|
-
<blockquote>
|
1233
|
-
<div><ul class="simple">
|
1234
|
-
<li><code class="docutils literal"><span class="pre">hello</span></code> のようにリテラルしかないケース</li>
|
1235
|
-
<li><code class="docutils literal"><span class="pre">\A/home/alice</span></code> のようにテキストの最初でのマッチとリテラルのみのケース</li>
|
1236
|
-
<li><code class="docutils literal"><span class="pre">\.txt\z</span></code> のようにテキストの最後でのマッチとリテラルのみのケース</li>
|
1237
|
-
</ul>
|
1238
|
-
</div></blockquote>
|
1239
|
-
<p>多くのケースでは、逐次検索よりもインデックスを使った検索の方が高速です。</p>
|
1240
|
-
<p><code class="docutils literal"><span class="pre">TokenRegexp</span></code> はベースはバイグラムを使います。 <code class="docutils literal"><span class="pre">TokenRegexp</span></code> は、インデックス時に、テキストの先頭にテキストの先頭であるというマーク( <code class="docutils literal"><span class="pre">U+FFEF</span></code> )を入れ、テキストの最後にテキストの最後であるというマーク( <code class="docutils literal"><span class="pre">U+FFF0</span></code> )を入れます。</p>
|
1241
|
-
<p>実行例:</p>
|
1242
|
-
<div class="highlight-none"><div class="highlight"><pre><span></span>tokenize TokenRegexp "/home/alice/test.txt" NormalizerAuto --mode ADD
|
1243
|
-
# [
|
1244
|
-
# [
|
1245
|
-
# 0,
|
1246
|
-
# 1337566253.89858,
|
1247
|
-
# 0.000355720520019531
|
1248
|
-
# ],
|
1249
|
-
# [
|
1250
|
-
# {
|
1251
|
-
# "position": 0,
|
1252
|
-
# "force_prefix": false,
|
1253
|
-
# "value": ""
|
1254
|
-
# },
|
1255
|
-
# {
|
1256
|
-
# "position": 1,
|
1257
|
-
# "force_prefix": false,
|
1258
|
-
# "value": "/h"
|
1259
|
-
# },
|
1260
|
-
# {
|
1261
|
-
# "position": 2,
|
1262
|
-
# "force_prefix": false,
|
1263
|
-
# "value": "ho"
|
1264
|
-
# },
|
1265
|
-
# {
|
1266
|
-
# "position": 3,
|
1267
|
-
# "force_prefix": false,
|
1268
|
-
# "value": "om"
|
1269
|
-
# },
|
1270
|
-
# {
|
1271
|
-
# "position": 4,
|
1272
|
-
# "force_prefix": false,
|
1273
|
-
# "value": "me"
|
1274
|
-
# },
|
1275
|
-
# {
|
1276
|
-
# "position": 5,
|
1277
|
-
# "force_prefix": false,
|
1278
|
-
# "value": "e/"
|
1279
|
-
# },
|
1280
|
-
# {
|
1281
|
-
# "position": 6,
|
1282
|
-
# "force_prefix": false,
|
1283
|
-
# "value": "/a"
|
1284
|
-
# },
|
1285
|
-
# {
|
1286
|
-
# "position": 7,
|
1287
|
-
# "force_prefix": false,
|
1288
|
-
# "value": "al"
|
1289
|
-
# },
|
1290
|
-
# {
|
1291
|
-
# "position": 8,
|
1292
|
-
# "force_prefix": false,
|
1293
|
-
# "value": "li"
|
1294
|
-
# },
|
1295
|
-
# {
|
1296
|
-
# "position": 9,
|
1297
|
-
# "force_prefix": false,
|
1298
|
-
# "value": "ic"
|
1299
|
-
# },
|
1300
|
-
# {
|
1301
|
-
# "position": 10,
|
1302
|
-
# "force_prefix": false,
|
1303
|
-
# "value": "ce"
|
1304
|
-
# },
|
1305
|
-
# {
|
1306
|
-
# "position": 11,
|
1307
|
-
# "force_prefix": false,
|
1308
|
-
# "value": "e/"
|
1309
|
-
# },
|
1310
|
-
# {
|
1311
|
-
# "position": 12,
|
1312
|
-
# "force_prefix": false,
|
1313
|
-
# "value": "/t"
|
1314
|
-
# },
|
1315
|
-
# {
|
1316
|
-
# "position": 13,
|
1317
|
-
# "force_prefix": false,
|
1318
|
-
# "value": "te"
|
1319
|
-
# },
|
1320
|
-
# {
|
1321
|
-
# "position": 14,
|
1322
|
-
# "force_prefix": false,
|
1323
|
-
# "value": "es"
|
1324
|
-
# },
|
1325
|
-
# {
|
1326
|
-
# "position": 15,
|
1327
|
-
# "force_prefix": false,
|
1328
|
-
# "value": "st"
|
1329
|
-
# },
|
1330
|
-
# {
|
1331
|
-
# "position": 16,
|
1332
|
-
# "force_prefix": false,
|
1333
|
-
# "value": "t."
|
1334
|
-
# },
|
1335
|
-
# {
|
1336
|
-
# "position": 17,
|
1337
|
-
# "force_prefix": false,
|
1338
|
-
# "value": ".t"
|
1339
|
-
# },
|
1340
|
-
# {
|
1341
|
-
# "position": 18,
|
1342
|
-
# "force_prefix": false,
|
1343
|
-
# "value": "tx"
|
1344
|
-
# },
|
1345
|
-
# {
|
1346
|
-
# "position": 19,
|
1347
|
-
# "force_prefix": false,
|
1348
|
-
# "value": "xt"
|
1349
|
-
# },
|
1350
|
-
# {
|
1351
|
-
# "position": 20,
|
1352
|
-
# "force_prefix": false,
|
1353
|
-
# "value": "t"
|
1354
|
-
# },
|
1355
|
-
# {
|
1356
|
-
# "position": 21,
|
1357
|
-
# "force_prefix": false,
|
1358
|
-
# "value": ""
|
1359
|
-
# }
|
1360
|
-
# ]
|
1361
|
-
# ]
|
1362
|
-
</pre></div>
|
1363
|
-
</div>
|
1364
|
-
</div>
|
1365
84
|
</div>
|
1366
85
|
</div>
|
1367
86
|
|
@@ -1371,46 +90,20 @@
|
|
1371
90
|
</div>
|
1372
91
|
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
1373
92
|
<div class="sphinxsidebarwrapper">
|
1374
|
-
<h3><a href="../index.html">目次</a></h3>
|
1375
|
-
<ul>
|
1376
|
-
<li><a class="reference internal" href="#">7.8. トークナイザー</a><ul>
|
1377
|
-
<li><a class="reference internal" href="#summary">7.8.1. 概要</a></li>
|
1378
|
-
<li><a class="reference internal" href="#what-is-tokenize">7.8.2. 「トークナイズ」とはなにか</a></li>
|
1379
|
-
<li><a class="reference internal" href="#built-in-tokenizsers">7.8.3. 組み込みトークナイザー</a><ul>
|
1380
|
-
<li><a class="reference internal" href="#tokenbigram">7.8.3.1. <code class="docutils literal"><span class="pre">TokenBigram</span></code></a></li>
|
1381
|
-
<li><a class="reference internal" href="#tokenbigramsplitsymbol">7.8.3.2. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbol</span></code></a></li>
|
1382
|
-
<li><a class="reference internal" href="#tokenbigramsplitsymbolalpha">7.8.3.3. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlpha</span></code></a></li>
|
1383
|
-
<li><a class="reference internal" href="#tokenbigramsplitsymbolalphadigit">7.8.3.4. <code class="docutils literal"><span class="pre">TokenBigramSplitSymbolAlphaDigit</span></code></a></li>
|
1384
|
-
<li><a class="reference internal" href="#tokenbigramignoreblank">7.8.3.5. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlank</span></code></a></li>
|
1385
|
-
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbol">7.8.3.6. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbol</span></code></a></li>
|
1386
|
-
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalpha">7.8.3.7. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlpha</span></code></a></li>
|
1387
|
-
<li><a class="reference internal" href="#tokenbigramignoreblanksplitsymbolalphadigit">7.8.3.8. <code class="docutils literal"><span class="pre">TokenBigramIgnoreBlankSplitSymbolAlphaDigit</span></code></a></li>
|
1388
|
-
<li><a class="reference internal" href="#tokenunigram">7.8.3.9. <code class="docutils literal"><span class="pre">TokenUnigram</span></code></a></li>
|
1389
|
-
<li><a class="reference internal" href="#tokentrigram">7.8.3.10. <code class="docutils literal"><span class="pre">TokenTrigram</span></code></a></li>
|
1390
|
-
<li><a class="reference internal" href="#tokendelimit">7.8.3.11. <code class="docutils literal"><span class="pre">TokenDelimit</span></code></a></li>
|
1391
|
-
<li><a class="reference internal" href="#tokendelimitnull">7.8.3.12. <code class="docutils literal"><span class="pre">TokenDelimitNull</span></code></a></li>
|
1392
|
-
<li><a class="reference internal" href="#tokenmecab">7.8.3.13. <code class="docutils literal"><span class="pre">TokenMecab</span></code></a></li>
|
1393
|
-
<li><a class="reference internal" href="#tokenregexp">7.8.3.14. <code class="docutils literal"><span class="pre">TokenRegexp</span></code></a></li>
|
1394
|
-
</ul>
|
1395
|
-
</li>
|
1396
|
-
</ul>
|
1397
|
-
</li>
|
1398
|
-
</ul>
|
1399
|
-
|
1400
93
|
<h4>前のトピックへ</h4>
|
1401
|
-
<p class="topless"><a href="normalizers.html"
|
1402
|
-
title="前の章へ">7.7.
|
94
|
+
<p class="topless"><a href="normalizers/normalizer_nfkc51.html"
|
95
|
+
title="前の章へ">7.7.2.3. <code class="docutils literal notranslate"><span class="pre">NormalizerNFKC51</span></code></a></p>
|
1403
96
|
<h4>次のトピックへ</h4>
|
1404
|
-
<p class="topless"><a href="
|
1405
|
-
title="次の章へ">7.
|
97
|
+
<p class="topless"><a href="tokenizer/summary.html"
|
98
|
+
title="次の章へ">7.8.1. 概要</a></p>
|
1406
99
|
<div id="searchbox" style="display: none" role="search">
|
1407
100
|
<h3>クイック検索</h3>
|
101
|
+
<div class="searchformwrapper">
|
1408
102
|
<form class="search" action="../search.html" method="get">
|
1409
|
-
<
|
1410
|
-
<
|
1411
|
-
<input type="hidden" name="check_keywords" value="yes" />
|
1412
|
-
<input type="hidden" name="area" value="default" />
|
103
|
+
<input type="text" name="q" />
|
104
|
+
<input type="submit" value="検索" />
|
1413
105
|
</form>
|
106
|
+
</div>
|
1414
107
|
</div>
|
1415
108
|
<script type="text/javascript">$('#searchbox').show(0);</script>
|
1416
109
|
</div>
|
@@ -1424,17 +117,17 @@
|
|
1424
117
|
<a href="../genindex.html" title="総合索引"
|
1425
118
|
>索引</a></li>
|
1426
119
|
<li class="right" >
|
1427
|
-
<a href="
|
120
|
+
<a href="tokenizer/summary.html" title="7.8.1. 概要"
|
1428
121
|
>次へ</a> |</li>
|
1429
122
|
<li class="right" >
|
1430
|
-
<a href="normalizers.html" title="7.7.
|
123
|
+
<a href="normalizers/normalizer_nfkc51.html" title="7.7.2.3. NormalizerNFKC51"
|
1431
124
|
>前へ</a> |</li>
|
1432
|
-
<li class="nav-item nav-item-0"><a href="../index.html">Groonga
|
125
|
+
<li class="nav-item nav-item-0"><a href="../index.html">Groonga v9.0.2ドキュメント</a> »</li>
|
1433
126
|
<li class="nav-item nav-item-1"><a href="../reference.html" >7. リファレンスマニュアル</a> »</li>
|
1434
127
|
</ul>
|
1435
128
|
</div>
|
1436
129
|
<div class="footer" role="contentinfo">
|
1437
|
-
© Copyright 2009-
|
130
|
+
© Copyright 2009-2019, Brazil, Inc.
|
1438
131
|
</div>
|
1439
132
|
</body>
|
1440
133
|
</html>
|