text_ux 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +25 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +51 -0
- data/Rakefile +18 -0
- data/ext/text_ux/extconf.rb +5 -0
- data/ext/text_ux/text_ux.cpp +252 -0
- data/lib/text_ux.rb +5 -0
- data/lib/text_ux/version.rb +3 -0
- data/spec/fixtures/test.ux +0 -0
- data/spec/spec_helper.rb +19 -0
- data/spec/text_ux_spec.rb +137 -0
- data/text_ux.gemspec +26 -0
- data/vendor/ux-trie/ux-0.1.9/.lock-waf_darwin_build +8 -0
- data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest-all.cc +9118 -0
- data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest.h +19537 -0
- data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest_main.cc +39 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Build.py +733 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Build.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ConfigSet.py +147 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ConfigSet.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Configure.py +314 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Configure.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Context.py +298 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Context.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Errors.py +37 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Errors.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Logs.py +149 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Logs.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Node.py +500 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Node.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Options.py +130 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Options.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Runner.py +191 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Runner.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Scripting.py +358 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Scripting.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Task.py +669 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Task.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/TaskGen.py +341 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/TaskGen.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/__init__.py +4 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/__init__.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ar.py +12 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ar.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/asm.py +25 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/bison.py +29 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c.py +27 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_aliases.py +56 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_aliases.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_config.py +708 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_config.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_osx.py +121 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_osx.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_preproc.py +606 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_preproc.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_tests.py +110 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_tests.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ccroot.py +372 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ccroot.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_c.py +39 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_cxx.py +39 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_cxx.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_d.py +30 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_fc.py +45 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cs.py +98 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cxx.py +27 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cxx.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d.py +51 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d_config.py +47 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d_scan.py +133 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/dbus.py +30 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/dmd.py +43 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/errcheck.py +153 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc.py +123 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc_config.py +271 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc_scan.py +68 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/flex.py +27 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/g95.py +55 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gas.py +10 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gcc.py +98 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gdc.py +34 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gfortran.py +69 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/glib2.py +174 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gnu_dirs.py +65 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gxx.py +98 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gxx.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/icc.py +31 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/icpc.py +30 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ifort.py +42 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/intltool.py +78 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/irixcc.py +49 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/javaw.py +272 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/kde4.py +49 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/lua.py +19 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/msvc.py +650 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/nasm.py +13 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/perl.py +78 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/python.py +303 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/qt4.py +424 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ruby.py +104 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/suncc.py +54 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/suncxx.py +55 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/tex.py +222 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/vala.py +215 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/waf_unit_test.py +79 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/winres.py +34 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/xlc.py +46 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/xlcxx.py +46 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Utils.py +334 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Utils.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/__init__.py +4 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/__init__.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ansiterm.py +173 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ansiterm.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/__init__.py +4 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/__init__.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/compat15.py +223 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/compat15.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/fixpy2.py +50 -0
- data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/fixpy2.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/src/bitVec.cpp +119 -0
- data/vendor/ux-trie/ux-0.1.9/src/bitVec.hpp +64 -0
- data/vendor/ux-trie/ux-0.1.9/src/bitVecTest.cpp +143 -0
- data/vendor/ux-trie/ux-0.1.9/src/cmdline.h +809 -0
- data/vendor/ux-trie/ux-0.1.9/src/rsDic.cpp +121 -0
- data/vendor/ux-trie/ux-0.1.9/src/rsDic.hpp +57 -0
- data/vendor/ux-trie/ux-0.1.9/src/ux.hpp +26 -0
- data/vendor/ux-trie/ux-0.1.9/src/uxMain.cpp +206 -0
- data/vendor/ux-trie/ux-0.1.9/src/uxMap.cpp +0 -0
- data/vendor/ux-trie/ux-0.1.9/src/uxMap.hpp +248 -0
- data/vendor/ux-trie/ux-0.1.9/src/uxMapTest.cpp +139 -0
- data/vendor/ux-trie/ux-0.1.9/src/uxTest.cpp +229 -0
- data/vendor/ux-trie/ux-0.1.9/src/uxTrie.cpp +529 -0
- data/vendor/ux-trie/ux-0.1.9/src/uxTrie.hpp +220 -0
- data/vendor/ux-trie/ux-0.1.9/src/uxUtil.cpp +92 -0
- data/vendor/ux-trie/ux-0.1.9/src/uxUtil.hpp +35 -0
- data/vendor/ux-trie/ux-0.1.9/src/wscript +43 -0
- data/vendor/ux-trie/ux-0.1.9/unittest_gtest.py +0 -0
- data/vendor/ux-trie/ux-0.1.9/unittest_gtest.pyc +0 -0
- data/vendor/ux-trie/ux-0.1.9/unittestt.py +166 -0
- data/vendor/ux-trie/ux-0.1.9/ux.pc.in +10 -0
- data/vendor/ux-trie/ux-0.1.9/waf +0 -0
- data/vendor/ux-trie/ux-0.1.9/wscript +32 -0
- metadata +249 -0
@@ -0,0 +1,139 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2010 Daisuke Okanohara
|
3
|
+
*
|
4
|
+
* Redistribution and use in source and binary forms, with or without
|
5
|
+
* modification, are permitted provided that the following conditions
|
6
|
+
* are met:
|
7
|
+
*
|
8
|
+
* 1. Redistributions of source code must retain the above Copyright
|
9
|
+
* notice, this list of conditions and the following disclaimer.
|
10
|
+
*
|
11
|
+
* 2. Redistributions in binary form must reproduce the above Copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* 3. Neither the name of the authors nor the names of its contributors
|
16
|
+
* may be used to endorse or promote products derived from this
|
17
|
+
* software without specific prior written permission.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include <gtest/gtest.h>
|
21
|
+
#include <vector>
|
22
|
+
#include <string>
|
23
|
+
#include "uxMap.hpp"
|
24
|
+
|
25
|
+
using namespace std;
|
26
|
+
|
27
|
+
TEST(uxmap, trivial){
|
28
|
+
ux::Map<int> uxm;
|
29
|
+
|
30
|
+
vector<string> wordList;
|
31
|
+
vector<int> valueList;
|
32
|
+
uxm.build(wordList);
|
33
|
+
ASSERT_EQ(0, uxm.size());
|
34
|
+
}
|
35
|
+
|
36
|
+
|
37
|
+
TEST(uxmap, simple){
|
38
|
+
vector<string> wordList;
|
39
|
+
vector<int> valueList;
|
40
|
+
wordList.push_back("i");
|
41
|
+
valueList.push_back(1);
|
42
|
+
wordList.push_back("in");
|
43
|
+
valueList.push_back(2);
|
44
|
+
wordList.push_back("to");
|
45
|
+
valueList.push_back(3);
|
46
|
+
wordList.push_back("we");
|
47
|
+
valueList.push_back(4);
|
48
|
+
wordList.push_back("inn");
|
49
|
+
valueList.push_back(5);
|
50
|
+
wordList.push_back("tea");
|
51
|
+
valueList.push_back(6);
|
52
|
+
wordList.push_back("ten");
|
53
|
+
valueList.push_back(7);
|
54
|
+
|
55
|
+
vector<string> origWordList = wordList;
|
56
|
+
ux::Map<int> uxm;
|
57
|
+
uxm.build(wordList);
|
58
|
+
|
59
|
+
for (size_t i = 0; i < origWordList.size(); ++i){
|
60
|
+
string key = origWordList[i];
|
61
|
+
ASSERT_EQ(0, uxm.set(key.c_str(), key.size(), valueList[i]));
|
62
|
+
}
|
63
|
+
|
64
|
+
for (size_t i = 0; i < origWordList.size(); ++i){
|
65
|
+
string key = origWordList[i];
|
66
|
+
int ret = -1;
|
67
|
+
ASSERT_EQ(0, uxm.get(key.c_str(), key.size(), ret));
|
68
|
+
ASSERT_EQ(valueList[i], ret);
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
TEST(uxmap, pair){
|
73
|
+
vector<pair<string, int> > kvs;
|
74
|
+
kvs.push_back(make_pair("i", 1));
|
75
|
+
kvs.push_back(make_pair("in", 2));
|
76
|
+
kvs.push_back(make_pair("to", 3));
|
77
|
+
kvs.push_back(make_pair("we", 4));
|
78
|
+
kvs.push_back(make_pair("inn", 5));
|
79
|
+
kvs.push_back(make_pair("tea", 6));
|
80
|
+
kvs.push_back(make_pair("ten", 7));
|
81
|
+
|
82
|
+
ux::Map<int> uxm;
|
83
|
+
uxm.build(kvs);
|
84
|
+
|
85
|
+
for (size_t i = 0; i < kvs.size(); ++i){
|
86
|
+
int ret = -1;
|
87
|
+
string key = kvs[i].first;
|
88
|
+
ASSERT_EQ(0, uxm.get(key.c_str(), key.size(), ret));
|
89
|
+
ASSERT_EQ(kvs[i].second, ret);
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
TEST(uxmap, map){
|
94
|
+
map<string, int> kvs;
|
95
|
+
kvs[string("i")] = 1;
|
96
|
+
kvs[string("in")] = 2;
|
97
|
+
kvs[string("to")] = 3;
|
98
|
+
kvs[string("we")] = 4;
|
99
|
+
kvs[string("inn")] = 5;
|
100
|
+
kvs[string("tea")] = 6;
|
101
|
+
kvs[string("ten")] = 7;
|
102
|
+
|
103
|
+
ux::Map<int> uxm;
|
104
|
+
uxm.build(kvs);
|
105
|
+
for (map<string, int>::const_iterator it = kvs.begin();
|
106
|
+
it != kvs.end(); ++it){
|
107
|
+
string key = it->first;
|
108
|
+
int ret = -1;
|
109
|
+
ASSERT_EQ(0, uxm.get(key.c_str(), key.size(), ret));
|
110
|
+
ASSERT_EQ(it->second, ret);
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
TEST(uxmap, save){
|
115
|
+
map<string, int> kvs;
|
116
|
+
kvs[string("i")] = 1;
|
117
|
+
kvs[string("in")] = 2;
|
118
|
+
kvs[string("to")] = 3;
|
119
|
+
kvs[string("we")] = 4;
|
120
|
+
kvs[string("inn")] = 5;
|
121
|
+
kvs[string("tea")] = 6;
|
122
|
+
kvs[string("ten")] = 7;
|
123
|
+
|
124
|
+
ux::Map<int> uxm;
|
125
|
+
uxm.build(kvs);
|
126
|
+
|
127
|
+
ostringstream os;
|
128
|
+
ASSERT_EQ(0, uxm.save(os));
|
129
|
+
istringstream is(os.str());
|
130
|
+
ux::Map<int> uxm_load;
|
131
|
+
ASSERT_EQ(0, uxm_load.load(is));
|
132
|
+
for (map<string, int>::const_iterator it = kvs.begin();
|
133
|
+
it != kvs.end(); ++it){
|
134
|
+
string key = it->first;
|
135
|
+
int ret = -1;
|
136
|
+
ASSERT_EQ(0, uxm_load.get(key.c_str(), key.size(), ret));
|
137
|
+
ASSERT_EQ(it->second, ret);
|
138
|
+
}
|
139
|
+
}
|
@@ -0,0 +1,229 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2010 Daisuke Okanohara
|
3
|
+
*
|
4
|
+
* Redistribution and use in source and binary forms, with or without
|
5
|
+
* modification, are permitted provided that the following conditions
|
6
|
+
* are met:
|
7
|
+
*
|
8
|
+
* 1. Redistributions of source code must retain the above Copyright
|
9
|
+
* notice, this list of conditions and the following disclaimer.
|
10
|
+
*
|
11
|
+
* 2. Redistributions in binary form must reproduce the above Copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* 3. Neither the name of the authors nor the names of its contributors
|
16
|
+
* may be used to endorse or promote products derived from this
|
17
|
+
* software without specific prior written permission.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include <gtest/gtest.h>
|
21
|
+
#include <vector>
|
22
|
+
#include <string>
|
23
|
+
#include <sstream>
|
24
|
+
#include <map>
|
25
|
+
#include "uxTrie.hpp"
|
26
|
+
|
27
|
+
using namespace std;
|
28
|
+
|
29
|
+
TEST(ux, trivial){
|
30
|
+
ux::Trie ux;
|
31
|
+
vector<string> wordList;
|
32
|
+
ux.build(wordList);
|
33
|
+
string q = "hoge";
|
34
|
+
size_t retLen = 0;
|
35
|
+
ASSERT_EQ(ux::NOTFOUND, ux.prefixSearch(q.c_str(), q.size(), retLen));
|
36
|
+
}
|
37
|
+
|
38
|
+
TEST(ux, simple){
|
39
|
+
vector<string> wordList;
|
40
|
+
wordList.push_back("i");
|
41
|
+
wordList.push_back("in");
|
42
|
+
wordList.push_back("to");
|
43
|
+
wordList.push_back("we");
|
44
|
+
wordList.push_back("inn");
|
45
|
+
wordList.push_back("tea");
|
46
|
+
wordList.push_back("ten");
|
47
|
+
vector<string> origWordList = wordList;
|
48
|
+
ux::Trie ux;
|
49
|
+
ux.build(wordList);
|
50
|
+
|
51
|
+
for (size_t i = 0; i < origWordList.size(); ++i){
|
52
|
+
ASSERT_EQ(origWordList[i], ux.decodeKey(i));
|
53
|
+
}
|
54
|
+
}
|
55
|
+
|
56
|
+
TEST(ux, clear){
|
57
|
+
vector<string> wordList;
|
58
|
+
wordList.push_back("i");
|
59
|
+
wordList.push_back("in");
|
60
|
+
wordList.push_back("to");
|
61
|
+
wordList.push_back("we");
|
62
|
+
wordList.push_back("inn");
|
63
|
+
wordList.push_back("tea");
|
64
|
+
wordList.push_back("ten");
|
65
|
+
vector<string> origWordList = wordList;
|
66
|
+
ux::Trie ux;
|
67
|
+
ux.build(wordList);
|
68
|
+
|
69
|
+
for (size_t i = 0; i < origWordList.size(); ++i){
|
70
|
+
ASSERT_EQ(origWordList[i], ux.decodeKey(i));
|
71
|
+
}
|
72
|
+
|
73
|
+
ux.clear();
|
74
|
+
ux.build(wordList);
|
75
|
+
for (size_t i = 0; i < origWordList.size(); ++i){
|
76
|
+
ASSERT_EQ(origWordList[i], ux.decodeKey(i));
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
|
81
|
+
TEST(ux, decodeKey){
|
82
|
+
ux::Trie ux;
|
83
|
+
vector<string> wordList;
|
84
|
+
wordList.push_back("tok");
|
85
|
+
wordList.push_back("osak");
|
86
|
+
wordList.push_back("okina");
|
87
|
+
wordList.push_back("fukush");
|
88
|
+
ux.build(wordList);
|
89
|
+
|
90
|
+
ASSERT_EQ("fukush", ux.decodeKey(0));
|
91
|
+
ASSERT_EQ("tok" , ux.decodeKey(1));
|
92
|
+
ASSERT_EQ("okina" , ux.decodeKey(2));
|
93
|
+
ASSERT_EQ("osak" , ux.decodeKey(3));
|
94
|
+
|
95
|
+
|
96
|
+
}
|
97
|
+
|
98
|
+
TEST(ux, prefixSearch){
|
99
|
+
ux::Trie ux;
|
100
|
+
vector<string> wordList;
|
101
|
+
wordList.push_back("tea");
|
102
|
+
wordList.push_back("top");
|
103
|
+
wordList.push_back("bear");
|
104
|
+
wordList.push_back("bep");
|
105
|
+
wordList.push_back("東京都");
|
106
|
+
ux.build(wordList);
|
107
|
+
|
108
|
+
size_t retLen = 0;
|
109
|
+
string q1 = "tea";
|
110
|
+
ASSERT_NE(ux::NOTFOUND, ux.prefixSearch(q1.c_str(), q1.size(), retLen));
|
111
|
+
ASSERT_EQ(3, retLen);
|
112
|
+
string q2 = "hoge";
|
113
|
+
ASSERT_EQ(ux::NOTFOUND, ux.prefixSearch(q2.c_str(), q2.size(), retLen));
|
114
|
+
string q3 = "te";
|
115
|
+
ASSERT_EQ(ux::NOTFOUND, ux.prefixSearch(q3.c_str(), q3.size(), retLen));
|
116
|
+
string q4 = "東京都";
|
117
|
+
ASSERT_NE(ux::NOTFOUND, ux.prefixSearch(q4.c_str(), q4.size(), retLen));
|
118
|
+
ASSERT_EQ(9, retLen);
|
119
|
+
}
|
120
|
+
|
121
|
+
TEST(ux, commonPrefixSearch){
|
122
|
+
ux::Trie ux;
|
123
|
+
vector<string> wordList;
|
124
|
+
wordList.push_back("tea");
|
125
|
+
wordList.push_back("top");
|
126
|
+
wordList.push_back("bear");
|
127
|
+
wordList.push_back("bep");
|
128
|
+
wordList.push_back("beppu");
|
129
|
+
ux.build(wordList);
|
130
|
+
|
131
|
+
vector<ux::id_t> retIDs;
|
132
|
+
string q1 = "beppuhaiiyu";
|
133
|
+
ASSERT_EQ(2, ux.commonPrefixSearch(q1.c_str(), q1.size(), retIDs));
|
134
|
+
ASSERT_EQ("bep", ux.decodeKey(retIDs[0]));
|
135
|
+
ASSERT_EQ("beppu", ux.decodeKey(retIDs[1]));
|
136
|
+
}
|
137
|
+
|
138
|
+
TEST(ux, predictiveSearch){
|
139
|
+
ux::Trie ux;
|
140
|
+
vector<string> wordList;
|
141
|
+
wordList.push_back("tea");
|
142
|
+
wordList.push_back("top");
|
143
|
+
wordList.push_back("bear");
|
144
|
+
wordList.push_back("bep");
|
145
|
+
wordList.push_back("beppu");
|
146
|
+
ux.build(wordList);
|
147
|
+
|
148
|
+
vector<ux::id_t> retIDs;
|
149
|
+
string q1 = "be";
|
150
|
+
ASSERT_EQ(3, ux.predictiveSearch(q1.c_str(), q1.size(), retIDs));
|
151
|
+
ASSERT_EQ("bear", ux.decodeKey(retIDs[0]));
|
152
|
+
ASSERT_EQ("bep", ux.decodeKey(retIDs[1]));
|
153
|
+
ASSERT_EQ("beppu", ux.decodeKey(retIDs[2]));
|
154
|
+
}
|
155
|
+
|
156
|
+
TEST(ux, predictiveSearch2){
|
157
|
+
ux::Trie ux;
|
158
|
+
vector<string> wordList;
|
159
|
+
wordList.push_back("東京都");
|
160
|
+
ux.build(wordList);
|
161
|
+
|
162
|
+
vector<ux::id_t> retIDs;
|
163
|
+
string q1 = "東";
|
164
|
+
ASSERT_EQ(1, ux.predictiveSearch(q1.c_str(), q1.size(), retIDs));
|
165
|
+
ASSERT_EQ(1, retIDs.size());
|
166
|
+
}
|
167
|
+
|
168
|
+
TEST(ux, save){
|
169
|
+
const char* fn = "uxTestSave.ind";
|
170
|
+
ux::Trie ux;
|
171
|
+
string q1 = "tea";
|
172
|
+
string q2 = "top";
|
173
|
+
string q3 = "bear";
|
174
|
+
string q4 = "bep";
|
175
|
+
string q5 = "beppu";
|
176
|
+
|
177
|
+
vector<string> wordList;
|
178
|
+
wordList.push_back(q1);
|
179
|
+
wordList.push_back(q2);
|
180
|
+
wordList.push_back(q3);
|
181
|
+
wordList.push_back(q4);
|
182
|
+
wordList.push_back(q5);
|
183
|
+
ux.build(wordList);
|
184
|
+
ASSERT_EQ(0, ux.save(fn));
|
185
|
+
|
186
|
+
ux::Trie ux2;
|
187
|
+
ASSERT_EQ(0, ux2.load(fn));
|
188
|
+
ASSERT_EQ(0, remove(fn));
|
189
|
+
|
190
|
+
ASSERT_EQ(ux.size(), ux2.size());
|
191
|
+
|
192
|
+
size_t retLen = 0;
|
193
|
+
ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q1.c_str(), q1.size(), retLen));
|
194
|
+
ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q2.c_str(), q2.size(), retLen));
|
195
|
+
ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q3.c_str(), q3.size(), retLen));
|
196
|
+
ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q4.c_str(), q4.size(), retLen));
|
197
|
+
ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q5.c_str(), q5.size(), retLen));
|
198
|
+
}
|
199
|
+
|
200
|
+
TEST(ux, large){
|
201
|
+
ux::Trie trie;
|
202
|
+
vector<string> wordList;
|
203
|
+
for (int i = 0; i < 10000; ++i){
|
204
|
+
ostringstream os;
|
205
|
+
os << i;
|
206
|
+
wordList.push_back(os.str());
|
207
|
+
}
|
208
|
+
|
209
|
+
trie.build(wordList);
|
210
|
+
map<int, int> dic;
|
211
|
+
for (size_t i = 0; i < wordList.size(); ++i){
|
212
|
+
size_t retLen = 0;
|
213
|
+
dic[trie.prefixSearch(wordList[i].c_str(), wordList[i].size(), retLen)]++;
|
214
|
+
}
|
215
|
+
ASSERT_EQ(dic.size(), trie.size());
|
216
|
+
}
|
217
|
+
|
218
|
+
TEST(ux, predictiveTest){
|
219
|
+
vector<string> str;
|
220
|
+
str.push_back("xx");
|
221
|
+
str.push_back("xxy");
|
222
|
+
str.push_back("xxxz");
|
223
|
+
ux::Trie trie(str);
|
224
|
+
|
225
|
+
vector<ux::id_t> v;
|
226
|
+
string q = "xxy";
|
227
|
+
ASSERT_EQ(1, trie.predictiveSearch(q.c_str(), q.size(), v));
|
228
|
+
}
|
229
|
+
|
@@ -0,0 +1,529 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2010 Daisuke Okanohara
|
3
|
+
*
|
4
|
+
* Redistribution and use in source and binary forms, with or without
|
5
|
+
* modification, are permitted provided that the following conditions
|
6
|
+
* are met:
|
7
|
+
*
|
8
|
+
* 1. Redistributions of source code must retain the above Copyright
|
9
|
+
* notice, this list of conditions and the following disclaimer.
|
10
|
+
*
|
11
|
+
* 2. Redistributions in binary form must reproduce the above Copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* 3. Neither the name of the authors nor the names of its contributors
|
16
|
+
* may be used to endorse or promote products derived from this
|
17
|
+
* software without specific prior written permission.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include <algorithm>
|
21
|
+
#include <queue>
|
22
|
+
#include <fstream>
|
23
|
+
#include <cassert>
|
24
|
+
#include <map>
|
25
|
+
#include <cmath>
|
26
|
+
#include "uxTrie.hpp"
|
27
|
+
|
28
|
+
using namespace std;
|
29
|
+
|
30
|
+
namespace ux{
|
31
|
+
|
32
|
+
struct RangeNode{
|
33
|
+
RangeNode(size_t _left, size_t _right) :
|
34
|
+
left(_left), right(_right) {}
|
35
|
+
size_t left;
|
36
|
+
size_t right;
|
37
|
+
};
|
38
|
+
|
39
|
+
Trie::Trie() : vtailux_(NULL), tailIDLen_(0), keyNum_(0), isReady_(false) {
|
40
|
+
}
|
41
|
+
|
42
|
+
Trie::Trie(vector<string>& keyList, const bool isTailUX) : vtailux_(NULL), tailIDLen_(0), keyNum_(0), isReady_(false) {
|
43
|
+
build(keyList, isTailUX);
|
44
|
+
}
|
45
|
+
|
46
|
+
Trie::~Trie(){
|
47
|
+
delete vtailux_;
|
48
|
+
}
|
49
|
+
|
50
|
+
void Trie::build(vector<string>& keyList, const bool isTailUX){
|
51
|
+
clear();
|
52
|
+
sort(keyList.begin(), keyList.end());
|
53
|
+
keyList.erase(unique(keyList.begin(), keyList.end()), keyList.end());
|
54
|
+
|
55
|
+
keyNum_ = keyList.size();
|
56
|
+
|
57
|
+
queue<RangeNode> q;
|
58
|
+
queue<RangeNode> nextQ;
|
59
|
+
if (keyNum_ != 0){
|
60
|
+
q.push(RangeNode(0, keyNum_));
|
61
|
+
}
|
62
|
+
|
63
|
+
BitVec terminalBV;
|
64
|
+
BitVec tailBV;
|
65
|
+
BitVec loudBV;
|
66
|
+
loudBV.push_back(0); // super root
|
67
|
+
loudBV.push_back(1);
|
68
|
+
|
69
|
+
for (size_t depth = 0;;){
|
70
|
+
if (q.empty()){
|
71
|
+
swap(q, nextQ);
|
72
|
+
++depth;
|
73
|
+
if (q.empty()) break;
|
74
|
+
}
|
75
|
+
RangeNode& rn = q.front();
|
76
|
+
const size_t left = rn.left;
|
77
|
+
const size_t right = rn.right;
|
78
|
+
q.pop();
|
79
|
+
|
80
|
+
string& cur = keyList[left];
|
81
|
+
if (left + 1 == right &&
|
82
|
+
depth + 1 < cur.size()){ // tail candidate
|
83
|
+
loudBV.push_back(1);
|
84
|
+
terminalBV.push_back(1);
|
85
|
+
tailBV.push_back(1);
|
86
|
+
string tail;
|
87
|
+
for (size_t i = depth; i < cur.size(); ++i){
|
88
|
+
tail += cur[i];
|
89
|
+
}
|
90
|
+
vtails_.push_back(tail);
|
91
|
+
continue;
|
92
|
+
} else {
|
93
|
+
tailBV.push_back(0);
|
94
|
+
}
|
95
|
+
|
96
|
+
assert(keyList.size() > left);
|
97
|
+
size_t newLeft = left;
|
98
|
+
if (depth == cur.size()){
|
99
|
+
terminalBV.push_back(1);
|
100
|
+
++newLeft;
|
101
|
+
if (newLeft == right){
|
102
|
+
loudBV.push_back(1);
|
103
|
+
continue;
|
104
|
+
}
|
105
|
+
} else {
|
106
|
+
terminalBV.push_back(0);
|
107
|
+
}
|
108
|
+
|
109
|
+
size_t prev = newLeft;
|
110
|
+
assert(keyList[prev].size() > depth);
|
111
|
+
uint8_t prevC = (uint8_t)keyList[prev][depth];
|
112
|
+
uint64_t degree = 0;
|
113
|
+
for (size_t i = prev+1; ; ++i){
|
114
|
+
if (i < right &&
|
115
|
+
prevC == (uint8_t)keyList[i][depth]){
|
116
|
+
continue;
|
117
|
+
}
|
118
|
+
edges_.push_back(prevC);
|
119
|
+
loudBV.push_back(0);
|
120
|
+
degree++;
|
121
|
+
nextQ.push(RangeNode(prev, i));
|
122
|
+
if (i == right){
|
123
|
+
break;
|
124
|
+
}
|
125
|
+
prev = i;
|
126
|
+
assert(keyList[prev].size() > depth);
|
127
|
+
prevC = keyList[prev][depth];
|
128
|
+
|
129
|
+
}
|
130
|
+
loudBV.push_back(1);
|
131
|
+
}
|
132
|
+
|
133
|
+
loud_.build(loudBV);
|
134
|
+
terminal_.build(terminalBV);
|
135
|
+
tail_.build(tailBV);
|
136
|
+
|
137
|
+
if (keyNum_ > 0){
|
138
|
+
isReady_ = true;
|
139
|
+
}
|
140
|
+
|
141
|
+
if (isTailUX){
|
142
|
+
buildTailUX();
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
int Trie::save(const char* fn) const {
|
147
|
+
ofstream ofs(fn, ios::binary);
|
148
|
+
if (!ofs){
|
149
|
+
return FILE_OPEN_ERROR;
|
150
|
+
}
|
151
|
+
return save(ofs);
|
152
|
+
}
|
153
|
+
|
154
|
+
int Trie::load(const char* fn){
|
155
|
+
ifstream ifs(fn, ios::binary);
|
156
|
+
if (!ifs){
|
157
|
+
return FILE_OPEN_ERROR;
|
158
|
+
}
|
159
|
+
return load(ifs);
|
160
|
+
}
|
161
|
+
|
162
|
+
int Trie::save(std::ostream& os) const {
|
163
|
+
loud_.save(os);
|
164
|
+
terminal_.save(os);
|
165
|
+
tail_.save(os);
|
166
|
+
tailIDs_.save(os);
|
167
|
+
|
168
|
+
os.write((const char*)&keyNum_, sizeof(keyNum_));
|
169
|
+
size_t edgesSize = edges_.size();
|
170
|
+
os.write((const char*)&edgesSize, sizeof(edgesSize));
|
171
|
+
os.write((const char*)&edges_[0], sizeof(edges_[0]) * edges_.size());
|
172
|
+
|
173
|
+
int useUX = (vtailux_ != NULL);
|
174
|
+
os.write((const char*)&useUX, sizeof(useUX));
|
175
|
+
if (useUX){
|
176
|
+
int err = 0;
|
177
|
+
if ((err = vtailux_->save(os)) != 0){
|
178
|
+
return err;
|
179
|
+
}
|
180
|
+
} else {
|
181
|
+
size_t tailsNum = vtails_.size();
|
182
|
+
os.write((const char*)&tailsNum, sizeof(tailsNum));
|
183
|
+
for (size_t i = 0; i < vtails_.size(); ++i){
|
184
|
+
size_t tailSize = vtails_[i].size();
|
185
|
+
os.write((const char*)&tailSize, sizeof(tailSize));
|
186
|
+
os.write((const char*)&vtails_[i][0], sizeof(vtails_[i][0]) * vtails_[i].size());
|
187
|
+
}
|
188
|
+
}
|
189
|
+
|
190
|
+
if (!os){
|
191
|
+
return SAVE_ERROR;
|
192
|
+
}
|
193
|
+
return 0;
|
194
|
+
}
|
195
|
+
|
196
|
+
int Trie::load(std::istream& is){
|
197
|
+
clear();
|
198
|
+
loud_.load(is);
|
199
|
+
terminal_.load(is);
|
200
|
+
tail_.load(is);
|
201
|
+
tailIDs_.load(is);
|
202
|
+
|
203
|
+
is.read((char*)&keyNum_, sizeof(keyNum_));
|
204
|
+
size_t edgesSize = 0;
|
205
|
+
is.read((char*)&edgesSize, sizeof(edgesSize));
|
206
|
+
edges_.resize(edgesSize);
|
207
|
+
is.read((char*)&edges_[0], sizeof(edges_[0]) * edges_.size());
|
208
|
+
|
209
|
+
int useUX = 0;
|
210
|
+
is.read((char*)&useUX, sizeof(useUX));
|
211
|
+
if (useUX){
|
212
|
+
vtailux_ = new Trie;
|
213
|
+
int err = 0;
|
214
|
+
if ((err = vtailux_->load(is)) != 0){
|
215
|
+
return err;
|
216
|
+
}
|
217
|
+
size_t tailNum = vtailux_->size();
|
218
|
+
tailIDLen_ = lg2(tailNum);
|
219
|
+
|
220
|
+
} else {
|
221
|
+
size_t tailsNum = 0;
|
222
|
+
is.read((char*)&tailsNum, sizeof(tailsNum));
|
223
|
+
vtails_.resize(tailsNum);
|
224
|
+
for (size_t i = 0; i < tailsNum; ++i){
|
225
|
+
size_t tailSize = 0;
|
226
|
+
is.read((char*)&tailSize, sizeof(tailSize));
|
227
|
+
vtails_[i].resize(tailSize);
|
228
|
+
is.read((char*)&vtails_[i][0], sizeof(vtails_[i][0]) * vtails_[i].size());
|
229
|
+
}
|
230
|
+
}
|
231
|
+
|
232
|
+
if (!is){
|
233
|
+
return LOAD_ERROR;
|
234
|
+
}
|
235
|
+
isReady_ = true;
|
236
|
+
return 0;
|
237
|
+
}
|
238
|
+
|
239
|
+
id_t Trie::prefixSearch(const char* str, const size_t len, size_t& retLen) const{
|
240
|
+
vector<id_t> retIDs;
|
241
|
+
traverse(str, len, retLen, retIDs, 0xFFFFFFFF);
|
242
|
+
if (retIDs.size() == 0){
|
243
|
+
return NOTFOUND;
|
244
|
+
}
|
245
|
+
return retIDs.back();
|
246
|
+
}
|
247
|
+
|
248
|
+
size_t Trie::commonPrefixSearch(const char* str, const size_t len, vector<id_t>& retIDs,
|
249
|
+
const size_t limit) const {
|
250
|
+
retIDs.clear();
|
251
|
+
size_t lastLen = 0;
|
252
|
+
traverse(str, len, lastLen, retIDs, limit);
|
253
|
+
return retIDs.size();
|
254
|
+
}
|
255
|
+
|
256
|
+
size_t Trie::predictiveSearch(const char* str, const size_t len, vector<id_t>& retIDs,
|
257
|
+
const size_t limit) const{
|
258
|
+
retIDs.clear();
|
259
|
+
if (!isReady_) return 0;
|
260
|
+
if (limit == 0) return 0;
|
261
|
+
|
262
|
+
uint64_t pos = 2;
|
263
|
+
uint64_t zeros = 2;
|
264
|
+
for (size_t i = 0; i < len; ++i){
|
265
|
+
uint64_t ones = pos - zeros;
|
266
|
+
|
267
|
+
if (tail_.getBit(ones)){
|
268
|
+
uint64_t tailID = tail_.rank(ones, 1) - 1;
|
269
|
+
string tail = getTail(tailID);
|
270
|
+
for (size_t j = i; j < len; ++j){
|
271
|
+
if (str[j] != tail[j-i]){
|
272
|
+
return 0;
|
273
|
+
}
|
274
|
+
}
|
275
|
+
retIDs.push_back(terminal_.rank(ones, 1) - 1);
|
276
|
+
|
277
|
+
return retIDs.size();
|
278
|
+
}
|
279
|
+
getChild((uint8_t)str[i], pos, zeros);
|
280
|
+
if (pos == NOTFOUND){
|
281
|
+
return 0;
|
282
|
+
}
|
283
|
+
}
|
284
|
+
|
285
|
+
// search all descendant nodes from curPos
|
286
|
+
enumerateAll(pos, zeros, retIDs, limit);
|
287
|
+
return retIDs.size();
|
288
|
+
}
|
289
|
+
|
290
|
+
void Trie::decodeKey(const id_t id, string& ret) const{
|
291
|
+
ret.clear();
|
292
|
+
if (!isReady_) return;
|
293
|
+
|
294
|
+
uint64_t nodeID = terminal_.select(id+1, 1);
|
295
|
+
|
296
|
+
uint64_t pos = loud_.select(nodeID+1, 1) + 1;
|
297
|
+
uint64_t zeros = pos - nodeID;
|
298
|
+
for (;;) {
|
299
|
+
uint8_t c = 0;
|
300
|
+
getParent(c, pos, zeros);
|
301
|
+
if (pos == 0) break;
|
302
|
+
ret += (char)c;
|
303
|
+
}
|
304
|
+
reverse(ret.begin(), ret.end());
|
305
|
+
if (tail_.getBit(nodeID)){
|
306
|
+
ret += getTail(tail_.rank(nodeID, 1) - 1);
|
307
|
+
}
|
308
|
+
}
|
309
|
+
|
310
|
+
string Trie::decodeKey(const id_t id) const {
|
311
|
+
std::string ret;
|
312
|
+
decodeKey(id, ret);
|
313
|
+
return ret;
|
314
|
+
}
|
315
|
+
|
316
|
+
size_t Trie::size() const {
|
317
|
+
return keyNum_;
|
318
|
+
}
|
319
|
+
|
320
|
+
void Trie::clear() {
|
321
|
+
loud_.clear();
|
322
|
+
terminal_.clear();
|
323
|
+
tail_.clear();
|
324
|
+
vtails_.clear();
|
325
|
+
delete vtailux_;
|
326
|
+
vtailux_ = NULL;
|
327
|
+
edges_.clear();
|
328
|
+
tailIDs_.clear();
|
329
|
+
tailIDLen_ = 0;
|
330
|
+
keyNum_ = 0;
|
331
|
+
isReady_ = false;
|
332
|
+
}
|
333
|
+
|
334
|
+
std::string Trie::what(const int error){
|
335
|
+
switch(error) {
|
336
|
+
case 0:
|
337
|
+
return string("succeeded");
|
338
|
+
case FILE_OPEN_ERROR:
|
339
|
+
return string("file open error");
|
340
|
+
case FILE_WRITE_ERROR:
|
341
|
+
return string("file write error");
|
342
|
+
case FILE_READ_ERROR:
|
343
|
+
return string("file read error");
|
344
|
+
default:
|
345
|
+
return string("unknown error");
|
346
|
+
}
|
347
|
+
}
|
348
|
+
|
349
|
+
size_t Trie::getAllocSize() const{
|
350
|
+
size_t retSize = 0;
|
351
|
+
if (vtailux_) {
|
352
|
+
retSize += vtailux_->getAllocSize();
|
353
|
+
retSize += tailIDs_.getAllocSize();
|
354
|
+
} else {
|
355
|
+
size_t tailLenSum = 0;
|
356
|
+
for (size_t i = 0; i < vtails_.size(); ++i){
|
357
|
+
tailLenSum += vtails_[i].size();
|
358
|
+
}
|
359
|
+
retSize += tailLenSum + tailLenSum / 8; // length bit vector
|
360
|
+
}
|
361
|
+
return retSize + loud_.getAllocSize() + terminal_.getAllocSize() +
|
362
|
+
tail_.getAllocSize() + edges_.size();
|
363
|
+
}
|
364
|
+
|
365
|
+
void Trie::allocStat(size_t allocSize, ostream& os) const{
|
366
|
+
if (vtailux_) {
|
367
|
+
vtailux_->allocStat(allocSize, os);
|
368
|
+
size_t size = tailIDs_.getAllocSize();
|
369
|
+
os << "tailIDs:\t" << size << "\t" << (float)size / allocSize << endl;
|
370
|
+
} else {
|
371
|
+
size_t tailLenSum = 0;
|
372
|
+
for (size_t i = 0; i < vtails_.size(); ++i){
|
373
|
+
tailLenSum += vtails_[i].size();
|
374
|
+
}
|
375
|
+
os << " tails:\t" << tailLenSum << "\t" << (float)tailLenSum / allocSize << endl;
|
376
|
+
os << " tailLen:\t" << tailLenSum/8 << "\t" << (float)tailLenSum/8 / allocSize << endl;
|
377
|
+
}
|
378
|
+
os << " loud:\t" << loud_.getAllocSize() << "\t" << (float)loud_.getAllocSize() / allocSize << endl;
|
379
|
+
os << "terminal:\t" << terminal_.getAllocSize() << "\t" << (float)terminal_.getAllocSize() / allocSize << endl;
|
380
|
+
os << " tail:\t" << tail_.getAllocSize() << "\t" << (float)tail_.getAllocSize() / allocSize << endl;
|
381
|
+
os << " edge:\t" << edges_.size() << "\t" << (float)edges_.size() / allocSize << endl;
|
382
|
+
}
|
383
|
+
|
384
|
+
void Trie::stat(ostream & os) const {
|
385
|
+
size_t tailslen = 0;
|
386
|
+
for (size_t i = 0; i < vtails_.size(); ++i){
|
387
|
+
tailslen += vtails_[i].size();
|
388
|
+
}
|
389
|
+
|
390
|
+
os << " keyNum\t" << keyNum_ << endl
|
391
|
+
<< " loud:\t" << loud_.size() << endl
|
392
|
+
<< "terminal:\t" << terminal_.size() << endl
|
393
|
+
<< " edge:\t" << edges_.size() << endl
|
394
|
+
<< " avgedge:\t" << (float)edges_.size() / keyNum_ << endl
|
395
|
+
<< " vtails:\t" << tailslen << endl
|
396
|
+
<< " tailnum:\t" << vtails_.size() << endl
|
397
|
+
<< " avgtail:\t" << (float)tailslen / keyNum_ << endl
|
398
|
+
<< endl;
|
399
|
+
}
|
400
|
+
|
401
|
+
|
402
|
+
void Trie::buildTailUX(){
|
403
|
+
vector<string> origTails = vtails_;
|
404
|
+
try {
|
405
|
+
vtailux_ = new Trie;
|
406
|
+
} catch (bad_alloc){
|
407
|
+
isReady_ = false;
|
408
|
+
return;
|
409
|
+
}
|
410
|
+
for (size_t i = 0; i < vtails_.size(); ++i){
|
411
|
+
reverse(vtails_[i].begin(), vtails_[i].end());
|
412
|
+
}
|
413
|
+
vtailux_->build(vtails_, false);
|
414
|
+
tailIDLen_ = lg2(vtailux_->size());
|
415
|
+
|
416
|
+
for (size_t i = 0; i < origTails.size(); ++i){
|
417
|
+
size_t retLen = 0;
|
418
|
+
reverse(origTails[i].begin(), origTails[i].end());
|
419
|
+
id_t id = vtailux_->prefixSearch(origTails[i].c_str(), origTails[i].size(), retLen);
|
420
|
+
assert(id != NOTFOUND);
|
421
|
+
assert(retLen == origTails[i].size());
|
422
|
+
tailIDs_.push_back_with_len(id, tailIDLen_);
|
423
|
+
}
|
424
|
+
vector<string>().swap(vtails_);
|
425
|
+
}
|
426
|
+
|
427
|
+
void Trie::getChild(const uint8_t c, uint64_t& pos, uint64_t& zeros) const {
|
428
|
+
for (;; ++pos, ++zeros){
|
429
|
+
if (loud_.getBit(pos)){
|
430
|
+
pos = NOTFOUND;
|
431
|
+
return;
|
432
|
+
}
|
433
|
+
assert(zeros >= 2);
|
434
|
+
assert(edges_.size() > zeros-2);
|
435
|
+
if (edges_[zeros-2] == c){
|
436
|
+
pos = loud_.select(zeros, 1)+1;
|
437
|
+
zeros = pos - zeros + 1;
|
438
|
+
return;
|
439
|
+
}
|
440
|
+
}
|
441
|
+
}
|
442
|
+
|
443
|
+
bool Trie::isLeaf(const uint64_t pos) const {
|
444
|
+
return loud_.getBit(pos);
|
445
|
+
}
|
446
|
+
|
447
|
+
void Trie::getParent(uint8_t& c, uint64_t& pos, uint64_t& zeros) const {
|
448
|
+
zeros = pos - zeros + 1;
|
449
|
+
pos = loud_.select(zeros, 0);
|
450
|
+
if (zeros < 2) return;
|
451
|
+
assert(edges_.size() > zeros-2);
|
452
|
+
c = edges_[zeros-2];
|
453
|
+
}
|
454
|
+
|
455
|
+
|
456
|
+
void Trie::traverse(const char* str, const size_t len,
|
457
|
+
size_t& lastLen, std::vector<id_t>& retIDs, const size_t limit) const{
|
458
|
+
lastLen = 0;
|
459
|
+
if (!isReady_) return;
|
460
|
+
if (limit == 0) return;
|
461
|
+
|
462
|
+
uint64_t pos = 2;
|
463
|
+
uint64_t zeros = 2;
|
464
|
+
for (size_t depth = 0; pos != NOTFOUND; ++depth){
|
465
|
+
uint64_t ones = pos - zeros;
|
466
|
+
|
467
|
+
if (tail_.getBit(ones)){
|
468
|
+
size_t retLen = 0;
|
469
|
+
if (tailMatch(str, len, depth, tail_.rank(ones, 1)-1, retLen)){
|
470
|
+
lastLen = depth + retLen;
|
471
|
+
retIDs.push_back(terminal_.rank(ones, 1) - 1);
|
472
|
+
}
|
473
|
+
break;
|
474
|
+
} else if (terminal_.getBit(ones)){
|
475
|
+
lastLen = depth;
|
476
|
+
retIDs.push_back(terminal_.rank(ones, 1)-1);
|
477
|
+
if (retIDs.size() == limit) {
|
478
|
+
break;
|
479
|
+
}
|
480
|
+
}
|
481
|
+
if (depth == len) break;
|
482
|
+
getChild((uint8_t)str[depth], pos, zeros);
|
483
|
+
}
|
484
|
+
}
|
485
|
+
|
486
|
+
|
487
|
+
void Trie::enumerateAll(const uint64_t pos, const uint64_t zeros, vector<id_t>& retIDs, const size_t limit) const{
|
488
|
+
const uint64_t ones = pos - zeros;
|
489
|
+
if (terminal_.getBit(ones)){
|
490
|
+
retIDs.push_back(terminal_.rank(ones, 1) - 1);
|
491
|
+
}
|
492
|
+
|
493
|
+
for (uint64_t i = 0; loud_.getBit(pos + i) == 0 &&
|
494
|
+
retIDs.size() < limit; ++i){
|
495
|
+
uint64_t nextPos = loud_.select(zeros + i, 1)+1;
|
496
|
+
enumerateAll(nextPos, nextPos - zeros - i + 1, retIDs, limit);
|
497
|
+
}
|
498
|
+
}
|
499
|
+
|
500
|
+
|
501
|
+
|
502
|
+
bool Trie::tailMatch(const char* str, const size_t len, const size_t depth,
|
503
|
+
const uint64_t tailID, size_t& retLen) const{
|
504
|
+
string tail = getTail(tailID);
|
505
|
+
if (tail.size() > len-depth) {
|
506
|
+
return false;
|
507
|
+
}
|
508
|
+
|
509
|
+
for (size_t i = 0; i < tail.size(); ++i){
|
510
|
+
if (str[i+depth] != tail[i]) {
|
511
|
+
return false;
|
512
|
+
}
|
513
|
+
}
|
514
|
+
retLen = tail.size();
|
515
|
+
return true;
|
516
|
+
}
|
517
|
+
|
518
|
+
std::string Trie::getTail(const uint64_t i) const{
|
519
|
+
if (vtailux_) {
|
520
|
+
string ret;
|
521
|
+
vtailux_->decodeKey(tailIDs_.getBits(tailIDLen_ * i, tailIDLen_), ret);
|
522
|
+
reverse(ret.begin(), ret.end());
|
523
|
+
return ret;
|
524
|
+
} else {
|
525
|
+
return vtails_[i];
|
526
|
+
}
|
527
|
+
}
|
528
|
+
|
529
|
+
}
|