text_ux 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (147) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +25 -0
  3. data/.rspec +2 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +51 -0
  7. data/Rakefile +18 -0
  8. data/ext/text_ux/extconf.rb +5 -0
  9. data/ext/text_ux/text_ux.cpp +252 -0
  10. data/lib/text_ux.rb +5 -0
  11. data/lib/text_ux/version.rb +3 -0
  12. data/spec/fixtures/test.ux +0 -0
  13. data/spec/spec_helper.rb +19 -0
  14. data/spec/text_ux_spec.rb +137 -0
  15. data/text_ux.gemspec +26 -0
  16. data/vendor/ux-trie/ux-0.1.9/.lock-waf_darwin_build +8 -0
  17. data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest-all.cc +9118 -0
  18. data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest.h +19537 -0
  19. data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest_main.cc +39 -0
  20. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Build.py +733 -0
  21. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Build.pyc +0 -0
  22. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ConfigSet.py +147 -0
  23. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ConfigSet.pyc +0 -0
  24. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Configure.py +314 -0
  25. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Configure.pyc +0 -0
  26. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Context.py +298 -0
  27. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Context.pyc +0 -0
  28. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Errors.py +37 -0
  29. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Errors.pyc +0 -0
  30. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Logs.py +149 -0
  31. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Logs.pyc +0 -0
  32. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Node.py +500 -0
  33. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Node.pyc +0 -0
  34. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Options.py +130 -0
  35. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Options.pyc +0 -0
  36. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Runner.py +191 -0
  37. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Runner.pyc +0 -0
  38. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Scripting.py +358 -0
  39. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Scripting.pyc +0 -0
  40. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Task.py +669 -0
  41. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Task.pyc +0 -0
  42. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/TaskGen.py +341 -0
  43. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/TaskGen.pyc +0 -0
  44. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/__init__.py +4 -0
  45. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/__init__.pyc +0 -0
  46. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ar.py +12 -0
  47. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ar.pyc +0 -0
  48. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/asm.py +25 -0
  49. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/bison.py +29 -0
  50. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c.py +27 -0
  51. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_aliases.py +56 -0
  52. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_aliases.pyc +0 -0
  53. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_config.py +708 -0
  54. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_config.pyc +0 -0
  55. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_osx.py +121 -0
  56. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_osx.pyc +0 -0
  57. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_preproc.py +606 -0
  58. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_preproc.pyc +0 -0
  59. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_tests.py +110 -0
  60. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_tests.pyc +0 -0
  61. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ccroot.py +372 -0
  62. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ccroot.pyc +0 -0
  63. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_c.py +39 -0
  64. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_cxx.py +39 -0
  65. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_cxx.pyc +0 -0
  66. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_d.py +30 -0
  67. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_fc.py +45 -0
  68. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cs.py +98 -0
  69. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cxx.py +27 -0
  70. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cxx.pyc +0 -0
  71. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d.py +51 -0
  72. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d_config.py +47 -0
  73. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d_scan.py +133 -0
  74. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/dbus.py +30 -0
  75. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/dmd.py +43 -0
  76. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/errcheck.py +153 -0
  77. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc.py +123 -0
  78. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc_config.py +271 -0
  79. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc_scan.py +68 -0
  80. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/flex.py +27 -0
  81. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/g95.py +55 -0
  82. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gas.py +10 -0
  83. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gcc.py +98 -0
  84. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gdc.py +34 -0
  85. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gfortran.py +69 -0
  86. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/glib2.py +174 -0
  87. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gnu_dirs.py +65 -0
  88. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gxx.py +98 -0
  89. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gxx.pyc +0 -0
  90. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/icc.py +31 -0
  91. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/icpc.py +30 -0
  92. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ifort.py +42 -0
  93. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/intltool.py +78 -0
  94. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/irixcc.py +49 -0
  95. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/javaw.py +272 -0
  96. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/kde4.py +49 -0
  97. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/lua.py +19 -0
  98. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/msvc.py +650 -0
  99. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/nasm.py +13 -0
  100. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/perl.py +78 -0
  101. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/python.py +303 -0
  102. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/qt4.py +424 -0
  103. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ruby.py +104 -0
  104. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/suncc.py +54 -0
  105. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/suncxx.py +55 -0
  106. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/tex.py +222 -0
  107. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/vala.py +215 -0
  108. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/waf_unit_test.py +79 -0
  109. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/winres.py +34 -0
  110. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/xlc.py +46 -0
  111. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/xlcxx.py +46 -0
  112. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Utils.py +334 -0
  113. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Utils.pyc +0 -0
  114. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/__init__.py +4 -0
  115. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/__init__.pyc +0 -0
  116. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ansiterm.py +173 -0
  117. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ansiterm.pyc +0 -0
  118. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/__init__.py +4 -0
  119. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/__init__.pyc +0 -0
  120. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/compat15.py +223 -0
  121. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/compat15.pyc +0 -0
  122. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/fixpy2.py +50 -0
  123. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/fixpy2.pyc +0 -0
  124. data/vendor/ux-trie/ux-0.1.9/src/bitVec.cpp +119 -0
  125. data/vendor/ux-trie/ux-0.1.9/src/bitVec.hpp +64 -0
  126. data/vendor/ux-trie/ux-0.1.9/src/bitVecTest.cpp +143 -0
  127. data/vendor/ux-trie/ux-0.1.9/src/cmdline.h +809 -0
  128. data/vendor/ux-trie/ux-0.1.9/src/rsDic.cpp +121 -0
  129. data/vendor/ux-trie/ux-0.1.9/src/rsDic.hpp +57 -0
  130. data/vendor/ux-trie/ux-0.1.9/src/ux.hpp +26 -0
  131. data/vendor/ux-trie/ux-0.1.9/src/uxMain.cpp +206 -0
  132. data/vendor/ux-trie/ux-0.1.9/src/uxMap.cpp +0 -0
  133. data/vendor/ux-trie/ux-0.1.9/src/uxMap.hpp +248 -0
  134. data/vendor/ux-trie/ux-0.1.9/src/uxMapTest.cpp +139 -0
  135. data/vendor/ux-trie/ux-0.1.9/src/uxTest.cpp +229 -0
  136. data/vendor/ux-trie/ux-0.1.9/src/uxTrie.cpp +529 -0
  137. data/vendor/ux-trie/ux-0.1.9/src/uxTrie.hpp +220 -0
  138. data/vendor/ux-trie/ux-0.1.9/src/uxUtil.cpp +92 -0
  139. data/vendor/ux-trie/ux-0.1.9/src/uxUtil.hpp +35 -0
  140. data/vendor/ux-trie/ux-0.1.9/src/wscript +43 -0
  141. data/vendor/ux-trie/ux-0.1.9/unittest_gtest.py +0 -0
  142. data/vendor/ux-trie/ux-0.1.9/unittest_gtest.pyc +0 -0
  143. data/vendor/ux-trie/ux-0.1.9/unittestt.py +166 -0
  144. data/vendor/ux-trie/ux-0.1.9/ux.pc.in +10 -0
  145. data/vendor/ux-trie/ux-0.1.9/waf +0 -0
  146. data/vendor/ux-trie/ux-0.1.9/wscript +32 -0
  147. metadata +249 -0
@@ -0,0 +1,139 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #include <gtest/gtest.h>
21
+ #include <vector>
22
+ #include <string>
23
+ #include "uxMap.hpp"
24
+
25
+ using namespace std;
26
+
27
+ TEST(uxmap, trivial){
28
+ ux::Map<int> uxm;
29
+
30
+ vector<string> wordList;
31
+ vector<int> valueList;
32
+ uxm.build(wordList);
33
+ ASSERT_EQ(0, uxm.size());
34
+ }
35
+
36
+
37
+ TEST(uxmap, simple){
38
+ vector<string> wordList;
39
+ vector<int> valueList;
40
+ wordList.push_back("i");
41
+ valueList.push_back(1);
42
+ wordList.push_back("in");
43
+ valueList.push_back(2);
44
+ wordList.push_back("to");
45
+ valueList.push_back(3);
46
+ wordList.push_back("we");
47
+ valueList.push_back(4);
48
+ wordList.push_back("inn");
49
+ valueList.push_back(5);
50
+ wordList.push_back("tea");
51
+ valueList.push_back(6);
52
+ wordList.push_back("ten");
53
+ valueList.push_back(7);
54
+
55
+ vector<string> origWordList = wordList;
56
+ ux::Map<int> uxm;
57
+ uxm.build(wordList);
58
+
59
+ for (size_t i = 0; i < origWordList.size(); ++i){
60
+ string key = origWordList[i];
61
+ ASSERT_EQ(0, uxm.set(key.c_str(), key.size(), valueList[i]));
62
+ }
63
+
64
+ for (size_t i = 0; i < origWordList.size(); ++i){
65
+ string key = origWordList[i];
66
+ int ret = -1;
67
+ ASSERT_EQ(0, uxm.get(key.c_str(), key.size(), ret));
68
+ ASSERT_EQ(valueList[i], ret);
69
+ }
70
+ }
71
+
72
+ TEST(uxmap, pair){
73
+ vector<pair<string, int> > kvs;
74
+ kvs.push_back(make_pair("i", 1));
75
+ kvs.push_back(make_pair("in", 2));
76
+ kvs.push_back(make_pair("to", 3));
77
+ kvs.push_back(make_pair("we", 4));
78
+ kvs.push_back(make_pair("inn", 5));
79
+ kvs.push_back(make_pair("tea", 6));
80
+ kvs.push_back(make_pair("ten", 7));
81
+
82
+ ux::Map<int> uxm;
83
+ uxm.build(kvs);
84
+
85
+ for (size_t i = 0; i < kvs.size(); ++i){
86
+ int ret = -1;
87
+ string key = kvs[i].first;
88
+ ASSERT_EQ(0, uxm.get(key.c_str(), key.size(), ret));
89
+ ASSERT_EQ(kvs[i].second, ret);
90
+ }
91
+ }
92
+
93
+ TEST(uxmap, map){
94
+ map<string, int> kvs;
95
+ kvs[string("i")] = 1;
96
+ kvs[string("in")] = 2;
97
+ kvs[string("to")] = 3;
98
+ kvs[string("we")] = 4;
99
+ kvs[string("inn")] = 5;
100
+ kvs[string("tea")] = 6;
101
+ kvs[string("ten")] = 7;
102
+
103
+ ux::Map<int> uxm;
104
+ uxm.build(kvs);
105
+ for (map<string, int>::const_iterator it = kvs.begin();
106
+ it != kvs.end(); ++it){
107
+ string key = it->first;
108
+ int ret = -1;
109
+ ASSERT_EQ(0, uxm.get(key.c_str(), key.size(), ret));
110
+ ASSERT_EQ(it->second, ret);
111
+ }
112
+ }
113
+
114
+ TEST(uxmap, save){
115
+ map<string, int> kvs;
116
+ kvs[string("i")] = 1;
117
+ kvs[string("in")] = 2;
118
+ kvs[string("to")] = 3;
119
+ kvs[string("we")] = 4;
120
+ kvs[string("inn")] = 5;
121
+ kvs[string("tea")] = 6;
122
+ kvs[string("ten")] = 7;
123
+
124
+ ux::Map<int> uxm;
125
+ uxm.build(kvs);
126
+
127
+ ostringstream os;
128
+ ASSERT_EQ(0, uxm.save(os));
129
+ istringstream is(os.str());
130
+ ux::Map<int> uxm_load;
131
+ ASSERT_EQ(0, uxm_load.load(is));
132
+ for (map<string, int>::const_iterator it = kvs.begin();
133
+ it != kvs.end(); ++it){
134
+ string key = it->first;
135
+ int ret = -1;
136
+ ASSERT_EQ(0, uxm_load.get(key.c_str(), key.size(), ret));
137
+ ASSERT_EQ(it->second, ret);
138
+ }
139
+ }
@@ -0,0 +1,229 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #include <gtest/gtest.h>
21
+ #include <vector>
22
+ #include <string>
23
+ #include <sstream>
24
+ #include <map>
25
+ #include "uxTrie.hpp"
26
+
27
+ using namespace std;
28
+
29
+ TEST(ux, trivial){
30
+ ux::Trie ux;
31
+ vector<string> wordList;
32
+ ux.build(wordList);
33
+ string q = "hoge";
34
+ size_t retLen = 0;
35
+ ASSERT_EQ(ux::NOTFOUND, ux.prefixSearch(q.c_str(), q.size(), retLen));
36
+ }
37
+
38
+ TEST(ux, simple){
39
+ vector<string> wordList;
40
+ wordList.push_back("i");
41
+ wordList.push_back("in");
42
+ wordList.push_back("to");
43
+ wordList.push_back("we");
44
+ wordList.push_back("inn");
45
+ wordList.push_back("tea");
46
+ wordList.push_back("ten");
47
+ vector<string> origWordList = wordList;
48
+ ux::Trie ux;
49
+ ux.build(wordList);
50
+
51
+ for (size_t i = 0; i < origWordList.size(); ++i){
52
+ ASSERT_EQ(origWordList[i], ux.decodeKey(i));
53
+ }
54
+ }
55
+
56
+ TEST(ux, clear){
57
+ vector<string> wordList;
58
+ wordList.push_back("i");
59
+ wordList.push_back("in");
60
+ wordList.push_back("to");
61
+ wordList.push_back("we");
62
+ wordList.push_back("inn");
63
+ wordList.push_back("tea");
64
+ wordList.push_back("ten");
65
+ vector<string> origWordList = wordList;
66
+ ux::Trie ux;
67
+ ux.build(wordList);
68
+
69
+ for (size_t i = 0; i < origWordList.size(); ++i){
70
+ ASSERT_EQ(origWordList[i], ux.decodeKey(i));
71
+ }
72
+
73
+ ux.clear();
74
+ ux.build(wordList);
75
+ for (size_t i = 0; i < origWordList.size(); ++i){
76
+ ASSERT_EQ(origWordList[i], ux.decodeKey(i));
77
+ }
78
+ }
79
+
80
+
81
+ TEST(ux, decodeKey){
82
+ ux::Trie ux;
83
+ vector<string> wordList;
84
+ wordList.push_back("tok");
85
+ wordList.push_back("osak");
86
+ wordList.push_back("okina");
87
+ wordList.push_back("fukush");
88
+ ux.build(wordList);
89
+
90
+ ASSERT_EQ("fukush", ux.decodeKey(0));
91
+ ASSERT_EQ("tok" , ux.decodeKey(1));
92
+ ASSERT_EQ("okina" , ux.decodeKey(2));
93
+ ASSERT_EQ("osak" , ux.decodeKey(3));
94
+
95
+
96
+ }
97
+
98
+ TEST(ux, prefixSearch){
99
+ ux::Trie ux;
100
+ vector<string> wordList;
101
+ wordList.push_back("tea");
102
+ wordList.push_back("top");
103
+ wordList.push_back("bear");
104
+ wordList.push_back("bep");
105
+ wordList.push_back("東京都");
106
+ ux.build(wordList);
107
+
108
+ size_t retLen = 0;
109
+ string q1 = "tea";
110
+ ASSERT_NE(ux::NOTFOUND, ux.prefixSearch(q1.c_str(), q1.size(), retLen));
111
+ ASSERT_EQ(3, retLen);
112
+ string q2 = "hoge";
113
+ ASSERT_EQ(ux::NOTFOUND, ux.prefixSearch(q2.c_str(), q2.size(), retLen));
114
+ string q3 = "te";
115
+ ASSERT_EQ(ux::NOTFOUND, ux.prefixSearch(q3.c_str(), q3.size(), retLen));
116
+ string q4 = "東京都";
117
+ ASSERT_NE(ux::NOTFOUND, ux.prefixSearch(q4.c_str(), q4.size(), retLen));
118
+ ASSERT_EQ(9, retLen);
119
+ }
120
+
121
+ TEST(ux, commonPrefixSearch){
122
+ ux::Trie ux;
123
+ vector<string> wordList;
124
+ wordList.push_back("tea");
125
+ wordList.push_back("top");
126
+ wordList.push_back("bear");
127
+ wordList.push_back("bep");
128
+ wordList.push_back("beppu");
129
+ ux.build(wordList);
130
+
131
+ vector<ux::id_t> retIDs;
132
+ string q1 = "beppuhaiiyu";
133
+ ASSERT_EQ(2, ux.commonPrefixSearch(q1.c_str(), q1.size(), retIDs));
134
+ ASSERT_EQ("bep", ux.decodeKey(retIDs[0]));
135
+ ASSERT_EQ("beppu", ux.decodeKey(retIDs[1]));
136
+ }
137
+
138
+ TEST(ux, predictiveSearch){
139
+ ux::Trie ux;
140
+ vector<string> wordList;
141
+ wordList.push_back("tea");
142
+ wordList.push_back("top");
143
+ wordList.push_back("bear");
144
+ wordList.push_back("bep");
145
+ wordList.push_back("beppu");
146
+ ux.build(wordList);
147
+
148
+ vector<ux::id_t> retIDs;
149
+ string q1 = "be";
150
+ ASSERT_EQ(3, ux.predictiveSearch(q1.c_str(), q1.size(), retIDs));
151
+ ASSERT_EQ("bear", ux.decodeKey(retIDs[0]));
152
+ ASSERT_EQ("bep", ux.decodeKey(retIDs[1]));
153
+ ASSERT_EQ("beppu", ux.decodeKey(retIDs[2]));
154
+ }
155
+
156
+ TEST(ux, predictiveSearch2){
157
+ ux::Trie ux;
158
+ vector<string> wordList;
159
+ wordList.push_back("東京都");
160
+ ux.build(wordList);
161
+
162
+ vector<ux::id_t> retIDs;
163
+ string q1 = "東";
164
+ ASSERT_EQ(1, ux.predictiveSearch(q1.c_str(), q1.size(), retIDs));
165
+ ASSERT_EQ(1, retIDs.size());
166
+ }
167
+
168
+ TEST(ux, save){
169
+ const char* fn = "uxTestSave.ind";
170
+ ux::Trie ux;
171
+ string q1 = "tea";
172
+ string q2 = "top";
173
+ string q3 = "bear";
174
+ string q4 = "bep";
175
+ string q5 = "beppu";
176
+
177
+ vector<string> wordList;
178
+ wordList.push_back(q1);
179
+ wordList.push_back(q2);
180
+ wordList.push_back(q3);
181
+ wordList.push_back(q4);
182
+ wordList.push_back(q5);
183
+ ux.build(wordList);
184
+ ASSERT_EQ(0, ux.save(fn));
185
+
186
+ ux::Trie ux2;
187
+ ASSERT_EQ(0, ux2.load(fn));
188
+ ASSERT_EQ(0, remove(fn));
189
+
190
+ ASSERT_EQ(ux.size(), ux2.size());
191
+
192
+ size_t retLen = 0;
193
+ ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q1.c_str(), q1.size(), retLen));
194
+ ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q2.c_str(), q2.size(), retLen));
195
+ ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q3.c_str(), q3.size(), retLen));
196
+ ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q4.c_str(), q4.size(), retLen));
197
+ ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q5.c_str(), q5.size(), retLen));
198
+ }
199
+
200
+ TEST(ux, large){
201
+ ux::Trie trie;
202
+ vector<string> wordList;
203
+ for (int i = 0; i < 10000; ++i){
204
+ ostringstream os;
205
+ os << i;
206
+ wordList.push_back(os.str());
207
+ }
208
+
209
+ trie.build(wordList);
210
+ map<int, int> dic;
211
+ for (size_t i = 0; i < wordList.size(); ++i){
212
+ size_t retLen = 0;
213
+ dic[trie.prefixSearch(wordList[i].c_str(), wordList[i].size(), retLen)]++;
214
+ }
215
+ ASSERT_EQ(dic.size(), trie.size());
216
+ }
217
+
218
+ TEST(ux, predictiveTest){
219
+ vector<string> str;
220
+ str.push_back("xx");
221
+ str.push_back("xxy");
222
+ str.push_back("xxxz");
223
+ ux::Trie trie(str);
224
+
225
+ vector<ux::id_t> v;
226
+ string q = "xxy";
227
+ ASSERT_EQ(1, trie.predictiveSearch(q.c_str(), q.size(), v));
228
+ }
229
+
@@ -0,0 +1,529 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #include <algorithm>
21
+ #include <queue>
22
+ #include <fstream>
23
+ #include <cassert>
24
+ #include <map>
25
+ #include <cmath>
26
+ #include "uxTrie.hpp"
27
+
28
+ using namespace std;
29
+
30
+ namespace ux{
31
+
32
+ struct RangeNode{
33
+ RangeNode(size_t _left, size_t _right) :
34
+ left(_left), right(_right) {}
35
+ size_t left;
36
+ size_t right;
37
+ };
38
+
39
+ Trie::Trie() : vtailux_(NULL), tailIDLen_(0), keyNum_(0), isReady_(false) {
40
+ }
41
+
42
+ Trie::Trie(vector<string>& keyList, const bool isTailUX) : vtailux_(NULL), tailIDLen_(0), keyNum_(0), isReady_(false) {
43
+ build(keyList, isTailUX);
44
+ }
45
+
46
+ Trie::~Trie(){
47
+ delete vtailux_;
48
+ }
49
+
50
+ void Trie::build(vector<string>& keyList, const bool isTailUX){
51
+ clear();
52
+ sort(keyList.begin(), keyList.end());
53
+ keyList.erase(unique(keyList.begin(), keyList.end()), keyList.end());
54
+
55
+ keyNum_ = keyList.size();
56
+
57
+ queue<RangeNode> q;
58
+ queue<RangeNode> nextQ;
59
+ if (keyNum_ != 0){
60
+ q.push(RangeNode(0, keyNum_));
61
+ }
62
+
63
+ BitVec terminalBV;
64
+ BitVec tailBV;
65
+ BitVec loudBV;
66
+ loudBV.push_back(0); // super root
67
+ loudBV.push_back(1);
68
+
69
+ for (size_t depth = 0;;){
70
+ if (q.empty()){
71
+ swap(q, nextQ);
72
+ ++depth;
73
+ if (q.empty()) break;
74
+ }
75
+ RangeNode& rn = q.front();
76
+ const size_t left = rn.left;
77
+ const size_t right = rn.right;
78
+ q.pop();
79
+
80
+ string& cur = keyList[left];
81
+ if (left + 1 == right &&
82
+ depth + 1 < cur.size()){ // tail candidate
83
+ loudBV.push_back(1);
84
+ terminalBV.push_back(1);
85
+ tailBV.push_back(1);
86
+ string tail;
87
+ for (size_t i = depth; i < cur.size(); ++i){
88
+ tail += cur[i];
89
+ }
90
+ vtails_.push_back(tail);
91
+ continue;
92
+ } else {
93
+ tailBV.push_back(0);
94
+ }
95
+
96
+ assert(keyList.size() > left);
97
+ size_t newLeft = left;
98
+ if (depth == cur.size()){
99
+ terminalBV.push_back(1);
100
+ ++newLeft;
101
+ if (newLeft == right){
102
+ loudBV.push_back(1);
103
+ continue;
104
+ }
105
+ } else {
106
+ terminalBV.push_back(0);
107
+ }
108
+
109
+ size_t prev = newLeft;
110
+ assert(keyList[prev].size() > depth);
111
+ uint8_t prevC = (uint8_t)keyList[prev][depth];
112
+ uint64_t degree = 0;
113
+ for (size_t i = prev+1; ; ++i){
114
+ if (i < right &&
115
+ prevC == (uint8_t)keyList[i][depth]){
116
+ continue;
117
+ }
118
+ edges_.push_back(prevC);
119
+ loudBV.push_back(0);
120
+ degree++;
121
+ nextQ.push(RangeNode(prev, i));
122
+ if (i == right){
123
+ break;
124
+ }
125
+ prev = i;
126
+ assert(keyList[prev].size() > depth);
127
+ prevC = keyList[prev][depth];
128
+
129
+ }
130
+ loudBV.push_back(1);
131
+ }
132
+
133
+ loud_.build(loudBV);
134
+ terminal_.build(terminalBV);
135
+ tail_.build(tailBV);
136
+
137
+ if (keyNum_ > 0){
138
+ isReady_ = true;
139
+ }
140
+
141
+ if (isTailUX){
142
+ buildTailUX();
143
+ }
144
+ }
145
+
146
+ int Trie::save(const char* fn) const {
147
+ ofstream ofs(fn, ios::binary);
148
+ if (!ofs){
149
+ return FILE_OPEN_ERROR;
150
+ }
151
+ return save(ofs);
152
+ }
153
+
154
+ int Trie::load(const char* fn){
155
+ ifstream ifs(fn, ios::binary);
156
+ if (!ifs){
157
+ return FILE_OPEN_ERROR;
158
+ }
159
+ return load(ifs);
160
+ }
161
+
162
+ int Trie::save(std::ostream& os) const {
163
+ loud_.save(os);
164
+ terminal_.save(os);
165
+ tail_.save(os);
166
+ tailIDs_.save(os);
167
+
168
+ os.write((const char*)&keyNum_, sizeof(keyNum_));
169
+ size_t edgesSize = edges_.size();
170
+ os.write((const char*)&edgesSize, sizeof(edgesSize));
171
+ os.write((const char*)&edges_[0], sizeof(edges_[0]) * edges_.size());
172
+
173
+ int useUX = (vtailux_ != NULL);
174
+ os.write((const char*)&useUX, sizeof(useUX));
175
+ if (useUX){
176
+ int err = 0;
177
+ if ((err = vtailux_->save(os)) != 0){
178
+ return err;
179
+ }
180
+ } else {
181
+ size_t tailsNum = vtails_.size();
182
+ os.write((const char*)&tailsNum, sizeof(tailsNum));
183
+ for (size_t i = 0; i < vtails_.size(); ++i){
184
+ size_t tailSize = vtails_[i].size();
185
+ os.write((const char*)&tailSize, sizeof(tailSize));
186
+ os.write((const char*)&vtails_[i][0], sizeof(vtails_[i][0]) * vtails_[i].size());
187
+ }
188
+ }
189
+
190
+ if (!os){
191
+ return SAVE_ERROR;
192
+ }
193
+ return 0;
194
+ }
195
+
196
+ int Trie::load(std::istream& is){
197
+ clear();
198
+ loud_.load(is);
199
+ terminal_.load(is);
200
+ tail_.load(is);
201
+ tailIDs_.load(is);
202
+
203
+ is.read((char*)&keyNum_, sizeof(keyNum_));
204
+ size_t edgesSize = 0;
205
+ is.read((char*)&edgesSize, sizeof(edgesSize));
206
+ edges_.resize(edgesSize);
207
+ is.read((char*)&edges_[0], sizeof(edges_[0]) * edges_.size());
208
+
209
+ int useUX = 0;
210
+ is.read((char*)&useUX, sizeof(useUX));
211
+ if (useUX){
212
+ vtailux_ = new Trie;
213
+ int err = 0;
214
+ if ((err = vtailux_->load(is)) != 0){
215
+ return err;
216
+ }
217
+ size_t tailNum = vtailux_->size();
218
+ tailIDLen_ = lg2(tailNum);
219
+
220
+ } else {
221
+ size_t tailsNum = 0;
222
+ is.read((char*)&tailsNum, sizeof(tailsNum));
223
+ vtails_.resize(tailsNum);
224
+ for (size_t i = 0; i < tailsNum; ++i){
225
+ size_t tailSize = 0;
226
+ is.read((char*)&tailSize, sizeof(tailSize));
227
+ vtails_[i].resize(tailSize);
228
+ is.read((char*)&vtails_[i][0], sizeof(vtails_[i][0]) * vtails_[i].size());
229
+ }
230
+ }
231
+
232
+ if (!is){
233
+ return LOAD_ERROR;
234
+ }
235
+ isReady_ = true;
236
+ return 0;
237
+ }
238
+
239
+ id_t Trie::prefixSearch(const char* str, const size_t len, size_t& retLen) const{
240
+ vector<id_t> retIDs;
241
+ traverse(str, len, retLen, retIDs, 0xFFFFFFFF);
242
+ if (retIDs.size() == 0){
243
+ return NOTFOUND;
244
+ }
245
+ return retIDs.back();
246
+ }
247
+
248
+ size_t Trie::commonPrefixSearch(const char* str, const size_t len, vector<id_t>& retIDs,
249
+ const size_t limit) const {
250
+ retIDs.clear();
251
+ size_t lastLen = 0;
252
+ traverse(str, len, lastLen, retIDs, limit);
253
+ return retIDs.size();
254
+ }
255
+
256
+ size_t Trie::predictiveSearch(const char* str, const size_t len, vector<id_t>& retIDs,
257
+ const size_t limit) const{
258
+ retIDs.clear();
259
+ if (!isReady_) return 0;
260
+ if (limit == 0) return 0;
261
+
262
+ uint64_t pos = 2;
263
+ uint64_t zeros = 2;
264
+ for (size_t i = 0; i < len; ++i){
265
+ uint64_t ones = pos - zeros;
266
+
267
+ if (tail_.getBit(ones)){
268
+ uint64_t tailID = tail_.rank(ones, 1) - 1;
269
+ string tail = getTail(tailID);
270
+ for (size_t j = i; j < len; ++j){
271
+ if (str[j] != tail[j-i]){
272
+ return 0;
273
+ }
274
+ }
275
+ retIDs.push_back(terminal_.rank(ones, 1) - 1);
276
+
277
+ return retIDs.size();
278
+ }
279
+ getChild((uint8_t)str[i], pos, zeros);
280
+ if (pos == NOTFOUND){
281
+ return 0;
282
+ }
283
+ }
284
+
285
+ // search all descendant nodes from curPos
286
+ enumerateAll(pos, zeros, retIDs, limit);
287
+ return retIDs.size();
288
+ }
289
+
290
+ void Trie::decodeKey(const id_t id, string& ret) const{
291
+ ret.clear();
292
+ if (!isReady_) return;
293
+
294
+ uint64_t nodeID = terminal_.select(id+1, 1);
295
+
296
+ uint64_t pos = loud_.select(nodeID+1, 1) + 1;
297
+ uint64_t zeros = pos - nodeID;
298
+ for (;;) {
299
+ uint8_t c = 0;
300
+ getParent(c, pos, zeros);
301
+ if (pos == 0) break;
302
+ ret += (char)c;
303
+ }
304
+ reverse(ret.begin(), ret.end());
305
+ if (tail_.getBit(nodeID)){
306
+ ret += getTail(tail_.rank(nodeID, 1) - 1);
307
+ }
308
+ }
309
+
310
+ string Trie::decodeKey(const id_t id) const {
311
+ std::string ret;
312
+ decodeKey(id, ret);
313
+ return ret;
314
+ }
315
+
316
+ size_t Trie::size() const {
317
+ return keyNum_;
318
+ }
319
+
320
+ void Trie::clear() {
321
+ loud_.clear();
322
+ terminal_.clear();
323
+ tail_.clear();
324
+ vtails_.clear();
325
+ delete vtailux_;
326
+ vtailux_ = NULL;
327
+ edges_.clear();
328
+ tailIDs_.clear();
329
+ tailIDLen_ = 0;
330
+ keyNum_ = 0;
331
+ isReady_ = false;
332
+ }
333
+
334
+ std::string Trie::what(const int error){
335
+ switch(error) {
336
+ case 0:
337
+ return string("succeeded");
338
+ case FILE_OPEN_ERROR:
339
+ return string("file open error");
340
+ case FILE_WRITE_ERROR:
341
+ return string("file write error");
342
+ case FILE_READ_ERROR:
343
+ return string("file read error");
344
+ default:
345
+ return string("unknown error");
346
+ }
347
+ }
348
+
349
+ size_t Trie::getAllocSize() const{
350
+ size_t retSize = 0;
351
+ if (vtailux_) {
352
+ retSize += vtailux_->getAllocSize();
353
+ retSize += tailIDs_.getAllocSize();
354
+ } else {
355
+ size_t tailLenSum = 0;
356
+ for (size_t i = 0; i < vtails_.size(); ++i){
357
+ tailLenSum += vtails_[i].size();
358
+ }
359
+ retSize += tailLenSum + tailLenSum / 8; // length bit vector
360
+ }
361
+ return retSize + loud_.getAllocSize() + terminal_.getAllocSize() +
362
+ tail_.getAllocSize() + edges_.size();
363
+ }
364
+
365
+ void Trie::allocStat(size_t allocSize, ostream& os) const{
366
+ if (vtailux_) {
367
+ vtailux_->allocStat(allocSize, os);
368
+ size_t size = tailIDs_.getAllocSize();
369
+ os << "tailIDs:\t" << size << "\t" << (float)size / allocSize << endl;
370
+ } else {
371
+ size_t tailLenSum = 0;
372
+ for (size_t i = 0; i < vtails_.size(); ++i){
373
+ tailLenSum += vtails_[i].size();
374
+ }
375
+ os << " tails:\t" << tailLenSum << "\t" << (float)tailLenSum / allocSize << endl;
376
+ os << " tailLen:\t" << tailLenSum/8 << "\t" << (float)tailLenSum/8 / allocSize << endl;
377
+ }
378
+ os << " loud:\t" << loud_.getAllocSize() << "\t" << (float)loud_.getAllocSize() / allocSize << endl;
379
+ os << "terminal:\t" << terminal_.getAllocSize() << "\t" << (float)terminal_.getAllocSize() / allocSize << endl;
380
+ os << " tail:\t" << tail_.getAllocSize() << "\t" << (float)tail_.getAllocSize() / allocSize << endl;
381
+ os << " edge:\t" << edges_.size() << "\t" << (float)edges_.size() / allocSize << endl;
382
+ }
383
+
384
+ void Trie::stat(ostream & os) const {
385
+ size_t tailslen = 0;
386
+ for (size_t i = 0; i < vtails_.size(); ++i){
387
+ tailslen += vtails_[i].size();
388
+ }
389
+
390
+ os << " keyNum\t" << keyNum_ << endl
391
+ << " loud:\t" << loud_.size() << endl
392
+ << "terminal:\t" << terminal_.size() << endl
393
+ << " edge:\t" << edges_.size() << endl
394
+ << " avgedge:\t" << (float)edges_.size() / keyNum_ << endl
395
+ << " vtails:\t" << tailslen << endl
396
+ << " tailnum:\t" << vtails_.size() << endl
397
+ << " avgtail:\t" << (float)tailslen / keyNum_ << endl
398
+ << endl;
399
+ }
400
+
401
+
402
+ void Trie::buildTailUX(){
403
+ vector<string> origTails = vtails_;
404
+ try {
405
+ vtailux_ = new Trie;
406
+ } catch (bad_alloc){
407
+ isReady_ = false;
408
+ return;
409
+ }
410
+ for (size_t i = 0; i < vtails_.size(); ++i){
411
+ reverse(vtails_[i].begin(), vtails_[i].end());
412
+ }
413
+ vtailux_->build(vtails_, false);
414
+ tailIDLen_ = lg2(vtailux_->size());
415
+
416
+ for (size_t i = 0; i < origTails.size(); ++i){
417
+ size_t retLen = 0;
418
+ reverse(origTails[i].begin(), origTails[i].end());
419
+ id_t id = vtailux_->prefixSearch(origTails[i].c_str(), origTails[i].size(), retLen);
420
+ assert(id != NOTFOUND);
421
+ assert(retLen == origTails[i].size());
422
+ tailIDs_.push_back_with_len(id, tailIDLen_);
423
+ }
424
+ vector<string>().swap(vtails_);
425
+ }
426
+
427
+ void Trie::getChild(const uint8_t c, uint64_t& pos, uint64_t& zeros) const {
428
+ for (;; ++pos, ++zeros){
429
+ if (loud_.getBit(pos)){
430
+ pos = NOTFOUND;
431
+ return;
432
+ }
433
+ assert(zeros >= 2);
434
+ assert(edges_.size() > zeros-2);
435
+ if (edges_[zeros-2] == c){
436
+ pos = loud_.select(zeros, 1)+1;
437
+ zeros = pos - zeros + 1;
438
+ return;
439
+ }
440
+ }
441
+ }
442
+
443
+ bool Trie::isLeaf(const uint64_t pos) const {
444
+ return loud_.getBit(pos);
445
+ }
446
+
447
+ void Trie::getParent(uint8_t& c, uint64_t& pos, uint64_t& zeros) const {
448
+ zeros = pos - zeros + 1;
449
+ pos = loud_.select(zeros, 0);
450
+ if (zeros < 2) return;
451
+ assert(edges_.size() > zeros-2);
452
+ c = edges_[zeros-2];
453
+ }
454
+
455
+
456
+ void Trie::traverse(const char* str, const size_t len,
457
+ size_t& lastLen, std::vector<id_t>& retIDs, const size_t limit) const{
458
+ lastLen = 0;
459
+ if (!isReady_) return;
460
+ if (limit == 0) return;
461
+
462
+ uint64_t pos = 2;
463
+ uint64_t zeros = 2;
464
+ for (size_t depth = 0; pos != NOTFOUND; ++depth){
465
+ uint64_t ones = pos - zeros;
466
+
467
+ if (tail_.getBit(ones)){
468
+ size_t retLen = 0;
469
+ if (tailMatch(str, len, depth, tail_.rank(ones, 1)-1, retLen)){
470
+ lastLen = depth + retLen;
471
+ retIDs.push_back(terminal_.rank(ones, 1) - 1);
472
+ }
473
+ break;
474
+ } else if (terminal_.getBit(ones)){
475
+ lastLen = depth;
476
+ retIDs.push_back(terminal_.rank(ones, 1)-1);
477
+ if (retIDs.size() == limit) {
478
+ break;
479
+ }
480
+ }
481
+ if (depth == len) break;
482
+ getChild((uint8_t)str[depth], pos, zeros);
483
+ }
484
+ }
485
+
486
+
487
+ void Trie::enumerateAll(const uint64_t pos, const uint64_t zeros, vector<id_t>& retIDs, const size_t limit) const{
488
+ const uint64_t ones = pos - zeros;
489
+ if (terminal_.getBit(ones)){
490
+ retIDs.push_back(terminal_.rank(ones, 1) - 1);
491
+ }
492
+
493
+ for (uint64_t i = 0; loud_.getBit(pos + i) == 0 &&
494
+ retIDs.size() < limit; ++i){
495
+ uint64_t nextPos = loud_.select(zeros + i, 1)+1;
496
+ enumerateAll(nextPos, nextPos - zeros - i + 1, retIDs, limit);
497
+ }
498
+ }
499
+
500
+
501
+
502
+ bool Trie::tailMatch(const char* str, const size_t len, const size_t depth,
503
+ const uint64_t tailID, size_t& retLen) const{
504
+ string tail = getTail(tailID);
505
+ if (tail.size() > len-depth) {
506
+ return false;
507
+ }
508
+
509
+ for (size_t i = 0; i < tail.size(); ++i){
510
+ if (str[i+depth] != tail[i]) {
511
+ return false;
512
+ }
513
+ }
514
+ retLen = tail.size();
515
+ return true;
516
+ }
517
+
518
+ std::string Trie::getTail(const uint64_t i) const{
519
+ if (vtailux_) {
520
+ string ret;
521
+ vtailux_->decodeKey(tailIDs_.getBits(tailIDLen_ * i, tailIDLen_), ret);
522
+ reverse(ret.begin(), ret.end());
523
+ return ret;
524
+ } else {
525
+ return vtails_[i];
526
+ }
527
+ }
528
+
529
+ }