text_ux 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +25 -0
  3. data/.rspec +2 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +51 -0
  7. data/Rakefile +18 -0
  8. data/ext/text_ux/extconf.rb +5 -0
  9. data/ext/text_ux/text_ux.cpp +252 -0
  10. data/lib/text_ux.rb +5 -0
  11. data/lib/text_ux/version.rb +3 -0
  12. data/spec/fixtures/test.ux +0 -0
  13. data/spec/spec_helper.rb +19 -0
  14. data/spec/text_ux_spec.rb +137 -0
  15. data/text_ux.gemspec +26 -0
  16. data/vendor/ux-trie/ux-0.1.9/.lock-waf_darwin_build +8 -0
  17. data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest-all.cc +9118 -0
  18. data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest.h +19537 -0
  19. data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest_main.cc +39 -0
  20. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Build.py +733 -0
  21. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Build.pyc +0 -0
  22. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ConfigSet.py +147 -0
  23. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ConfigSet.pyc +0 -0
  24. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Configure.py +314 -0
  25. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Configure.pyc +0 -0
  26. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Context.py +298 -0
  27. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Context.pyc +0 -0
  28. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Errors.py +37 -0
  29. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Errors.pyc +0 -0
  30. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Logs.py +149 -0
  31. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Logs.pyc +0 -0
  32. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Node.py +500 -0
  33. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Node.pyc +0 -0
  34. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Options.py +130 -0
  35. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Options.pyc +0 -0
  36. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Runner.py +191 -0
  37. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Runner.pyc +0 -0
  38. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Scripting.py +358 -0
  39. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Scripting.pyc +0 -0
  40. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Task.py +669 -0
  41. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Task.pyc +0 -0
  42. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/TaskGen.py +341 -0
  43. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/TaskGen.pyc +0 -0
  44. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/__init__.py +4 -0
  45. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/__init__.pyc +0 -0
  46. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ar.py +12 -0
  47. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ar.pyc +0 -0
  48. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/asm.py +25 -0
  49. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/bison.py +29 -0
  50. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c.py +27 -0
  51. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_aliases.py +56 -0
  52. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_aliases.pyc +0 -0
  53. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_config.py +708 -0
  54. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_config.pyc +0 -0
  55. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_osx.py +121 -0
  56. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_osx.pyc +0 -0
  57. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_preproc.py +606 -0
  58. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_preproc.pyc +0 -0
  59. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_tests.py +110 -0
  60. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_tests.pyc +0 -0
  61. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ccroot.py +372 -0
  62. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ccroot.pyc +0 -0
  63. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_c.py +39 -0
  64. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_cxx.py +39 -0
  65. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_cxx.pyc +0 -0
  66. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_d.py +30 -0
  67. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_fc.py +45 -0
  68. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cs.py +98 -0
  69. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cxx.py +27 -0
  70. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cxx.pyc +0 -0
  71. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d.py +51 -0
  72. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d_config.py +47 -0
  73. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d_scan.py +133 -0
  74. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/dbus.py +30 -0
  75. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/dmd.py +43 -0
  76. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/errcheck.py +153 -0
  77. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc.py +123 -0
  78. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc_config.py +271 -0
  79. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc_scan.py +68 -0
  80. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/flex.py +27 -0
  81. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/g95.py +55 -0
  82. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gas.py +10 -0
  83. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gcc.py +98 -0
  84. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gdc.py +34 -0
  85. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gfortran.py +69 -0
  86. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/glib2.py +174 -0
  87. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gnu_dirs.py +65 -0
  88. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gxx.py +98 -0
  89. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gxx.pyc +0 -0
  90. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/icc.py +31 -0
  91. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/icpc.py +30 -0
  92. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ifort.py +42 -0
  93. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/intltool.py +78 -0
  94. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/irixcc.py +49 -0
  95. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/javaw.py +272 -0
  96. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/kde4.py +49 -0
  97. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/lua.py +19 -0
  98. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/msvc.py +650 -0
  99. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/nasm.py +13 -0
  100. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/perl.py +78 -0
  101. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/python.py +303 -0
  102. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/qt4.py +424 -0
  103. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ruby.py +104 -0
  104. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/suncc.py +54 -0
  105. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/suncxx.py +55 -0
  106. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/tex.py +222 -0
  107. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/vala.py +215 -0
  108. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/waf_unit_test.py +79 -0
  109. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/winres.py +34 -0
  110. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/xlc.py +46 -0
  111. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/xlcxx.py +46 -0
  112. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Utils.py +334 -0
  113. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Utils.pyc +0 -0
  114. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/__init__.py +4 -0
  115. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/__init__.pyc +0 -0
  116. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ansiterm.py +173 -0
  117. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ansiterm.pyc +0 -0
  118. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/__init__.py +4 -0
  119. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/__init__.pyc +0 -0
  120. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/compat15.py +223 -0
  121. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/compat15.pyc +0 -0
  122. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/fixpy2.py +50 -0
  123. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/fixpy2.pyc +0 -0
  124. data/vendor/ux-trie/ux-0.1.9/src/bitVec.cpp +119 -0
  125. data/vendor/ux-trie/ux-0.1.9/src/bitVec.hpp +64 -0
  126. data/vendor/ux-trie/ux-0.1.9/src/bitVecTest.cpp +143 -0
  127. data/vendor/ux-trie/ux-0.1.9/src/cmdline.h +809 -0
  128. data/vendor/ux-trie/ux-0.1.9/src/rsDic.cpp +121 -0
  129. data/vendor/ux-trie/ux-0.1.9/src/rsDic.hpp +57 -0
  130. data/vendor/ux-trie/ux-0.1.9/src/ux.hpp +26 -0
  131. data/vendor/ux-trie/ux-0.1.9/src/uxMain.cpp +206 -0
  132. data/vendor/ux-trie/ux-0.1.9/src/uxMap.cpp +0 -0
  133. data/vendor/ux-trie/ux-0.1.9/src/uxMap.hpp +248 -0
  134. data/vendor/ux-trie/ux-0.1.9/src/uxMapTest.cpp +139 -0
  135. data/vendor/ux-trie/ux-0.1.9/src/uxTest.cpp +229 -0
  136. data/vendor/ux-trie/ux-0.1.9/src/uxTrie.cpp +529 -0
  137. data/vendor/ux-trie/ux-0.1.9/src/uxTrie.hpp +220 -0
  138. data/vendor/ux-trie/ux-0.1.9/src/uxUtil.cpp +92 -0
  139. data/vendor/ux-trie/ux-0.1.9/src/uxUtil.hpp +35 -0
  140. data/vendor/ux-trie/ux-0.1.9/src/wscript +43 -0
  141. data/vendor/ux-trie/ux-0.1.9/unittest_gtest.py +0 -0
  142. data/vendor/ux-trie/ux-0.1.9/unittest_gtest.pyc +0 -0
  143. data/vendor/ux-trie/ux-0.1.9/unittestt.py +166 -0
  144. data/vendor/ux-trie/ux-0.1.9/ux.pc.in +10 -0
  145. data/vendor/ux-trie/ux-0.1.9/waf +0 -0
  146. data/vendor/ux-trie/ux-0.1.9/wscript +32 -0
  147. metadata +249 -0
@@ -0,0 +1,139 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #include <gtest/gtest.h>
21
+ #include <vector>
22
+ #include <string>
23
+ #include "uxMap.hpp"
24
+
25
+ using namespace std;
26
+
27
+ TEST(uxmap, trivial){
28
+ ux::Map<int> uxm;
29
+
30
+ vector<string> wordList;
31
+ vector<int> valueList;
32
+ uxm.build(wordList);
33
+ ASSERT_EQ(0, uxm.size());
34
+ }
35
+
36
+
37
+ TEST(uxmap, simple){
38
+ vector<string> wordList;
39
+ vector<int> valueList;
40
+ wordList.push_back("i");
41
+ valueList.push_back(1);
42
+ wordList.push_back("in");
43
+ valueList.push_back(2);
44
+ wordList.push_back("to");
45
+ valueList.push_back(3);
46
+ wordList.push_back("we");
47
+ valueList.push_back(4);
48
+ wordList.push_back("inn");
49
+ valueList.push_back(5);
50
+ wordList.push_back("tea");
51
+ valueList.push_back(6);
52
+ wordList.push_back("ten");
53
+ valueList.push_back(7);
54
+
55
+ vector<string> origWordList = wordList;
56
+ ux::Map<int> uxm;
57
+ uxm.build(wordList);
58
+
59
+ for (size_t i = 0; i < origWordList.size(); ++i){
60
+ string key = origWordList[i];
61
+ ASSERT_EQ(0, uxm.set(key.c_str(), key.size(), valueList[i]));
62
+ }
63
+
64
+ for (size_t i = 0; i < origWordList.size(); ++i){
65
+ string key = origWordList[i];
66
+ int ret = -1;
67
+ ASSERT_EQ(0, uxm.get(key.c_str(), key.size(), ret));
68
+ ASSERT_EQ(valueList[i], ret);
69
+ }
70
+ }
71
+
72
+ TEST(uxmap, pair){
73
+ vector<pair<string, int> > kvs;
74
+ kvs.push_back(make_pair("i", 1));
75
+ kvs.push_back(make_pair("in", 2));
76
+ kvs.push_back(make_pair("to", 3));
77
+ kvs.push_back(make_pair("we", 4));
78
+ kvs.push_back(make_pair("inn", 5));
79
+ kvs.push_back(make_pair("tea", 6));
80
+ kvs.push_back(make_pair("ten", 7));
81
+
82
+ ux::Map<int> uxm;
83
+ uxm.build(kvs);
84
+
85
+ for (size_t i = 0; i < kvs.size(); ++i){
86
+ int ret = -1;
87
+ string key = kvs[i].first;
88
+ ASSERT_EQ(0, uxm.get(key.c_str(), key.size(), ret));
89
+ ASSERT_EQ(kvs[i].second, ret);
90
+ }
91
+ }
92
+
93
+ TEST(uxmap, map){
94
+ map<string, int> kvs;
95
+ kvs[string("i")] = 1;
96
+ kvs[string("in")] = 2;
97
+ kvs[string("to")] = 3;
98
+ kvs[string("we")] = 4;
99
+ kvs[string("inn")] = 5;
100
+ kvs[string("tea")] = 6;
101
+ kvs[string("ten")] = 7;
102
+
103
+ ux::Map<int> uxm;
104
+ uxm.build(kvs);
105
+ for (map<string, int>::const_iterator it = kvs.begin();
106
+ it != kvs.end(); ++it){
107
+ string key = it->first;
108
+ int ret = -1;
109
+ ASSERT_EQ(0, uxm.get(key.c_str(), key.size(), ret));
110
+ ASSERT_EQ(it->second, ret);
111
+ }
112
+ }
113
+
114
+ TEST(uxmap, save){
115
+ map<string, int> kvs;
116
+ kvs[string("i")] = 1;
117
+ kvs[string("in")] = 2;
118
+ kvs[string("to")] = 3;
119
+ kvs[string("we")] = 4;
120
+ kvs[string("inn")] = 5;
121
+ kvs[string("tea")] = 6;
122
+ kvs[string("ten")] = 7;
123
+
124
+ ux::Map<int> uxm;
125
+ uxm.build(kvs);
126
+
127
+ ostringstream os;
128
+ ASSERT_EQ(0, uxm.save(os));
129
+ istringstream is(os.str());
130
+ ux::Map<int> uxm_load;
131
+ ASSERT_EQ(0, uxm_load.load(is));
132
+ for (map<string, int>::const_iterator it = kvs.begin();
133
+ it != kvs.end(); ++it){
134
+ string key = it->first;
135
+ int ret = -1;
136
+ ASSERT_EQ(0, uxm_load.get(key.c_str(), key.size(), ret));
137
+ ASSERT_EQ(it->second, ret);
138
+ }
139
+ }
@@ -0,0 +1,229 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #include <gtest/gtest.h>
21
+ #include <vector>
22
+ #include <string>
23
+ #include <sstream>
24
+ #include <map>
25
+ #include "uxTrie.hpp"
26
+
27
+ using namespace std;
28
+
29
+ TEST(ux, trivial){
30
+ ux::Trie ux;
31
+ vector<string> wordList;
32
+ ux.build(wordList);
33
+ string q = "hoge";
34
+ size_t retLen = 0;
35
+ ASSERT_EQ(ux::NOTFOUND, ux.prefixSearch(q.c_str(), q.size(), retLen));
36
+ }
37
+
38
+ TEST(ux, simple){
39
+ vector<string> wordList;
40
+ wordList.push_back("i");
41
+ wordList.push_back("in");
42
+ wordList.push_back("to");
43
+ wordList.push_back("we");
44
+ wordList.push_back("inn");
45
+ wordList.push_back("tea");
46
+ wordList.push_back("ten");
47
+ vector<string> origWordList = wordList;
48
+ ux::Trie ux;
49
+ ux.build(wordList);
50
+
51
+ for (size_t i = 0; i < origWordList.size(); ++i){
52
+ ASSERT_EQ(origWordList[i], ux.decodeKey(i));
53
+ }
54
+ }
55
+
56
+ TEST(ux, clear){
57
+ vector<string> wordList;
58
+ wordList.push_back("i");
59
+ wordList.push_back("in");
60
+ wordList.push_back("to");
61
+ wordList.push_back("we");
62
+ wordList.push_back("inn");
63
+ wordList.push_back("tea");
64
+ wordList.push_back("ten");
65
+ vector<string> origWordList = wordList;
66
+ ux::Trie ux;
67
+ ux.build(wordList);
68
+
69
+ for (size_t i = 0; i < origWordList.size(); ++i){
70
+ ASSERT_EQ(origWordList[i], ux.decodeKey(i));
71
+ }
72
+
73
+ ux.clear();
74
+ ux.build(wordList);
75
+ for (size_t i = 0; i < origWordList.size(); ++i){
76
+ ASSERT_EQ(origWordList[i], ux.decodeKey(i));
77
+ }
78
+ }
79
+
80
+
81
+ TEST(ux, decodeKey){
82
+ ux::Trie ux;
83
+ vector<string> wordList;
84
+ wordList.push_back("tok");
85
+ wordList.push_back("osak");
86
+ wordList.push_back("okina");
87
+ wordList.push_back("fukush");
88
+ ux.build(wordList);
89
+
90
+ ASSERT_EQ("fukush", ux.decodeKey(0));
91
+ ASSERT_EQ("tok" , ux.decodeKey(1));
92
+ ASSERT_EQ("okina" , ux.decodeKey(2));
93
+ ASSERT_EQ("osak" , ux.decodeKey(3));
94
+
95
+
96
+ }
97
+
98
+ TEST(ux, prefixSearch){
99
+ ux::Trie ux;
100
+ vector<string> wordList;
101
+ wordList.push_back("tea");
102
+ wordList.push_back("top");
103
+ wordList.push_back("bear");
104
+ wordList.push_back("bep");
105
+ wordList.push_back("東京都");
106
+ ux.build(wordList);
107
+
108
+ size_t retLen = 0;
109
+ string q1 = "tea";
110
+ ASSERT_NE(ux::NOTFOUND, ux.prefixSearch(q1.c_str(), q1.size(), retLen));
111
+ ASSERT_EQ(3, retLen);
112
+ string q2 = "hoge";
113
+ ASSERT_EQ(ux::NOTFOUND, ux.prefixSearch(q2.c_str(), q2.size(), retLen));
114
+ string q3 = "te";
115
+ ASSERT_EQ(ux::NOTFOUND, ux.prefixSearch(q3.c_str(), q3.size(), retLen));
116
+ string q4 = "東京都";
117
+ ASSERT_NE(ux::NOTFOUND, ux.prefixSearch(q4.c_str(), q4.size(), retLen));
118
+ ASSERT_EQ(9, retLen);
119
+ }
120
+
121
+ TEST(ux, commonPrefixSearch){
122
+ ux::Trie ux;
123
+ vector<string> wordList;
124
+ wordList.push_back("tea");
125
+ wordList.push_back("top");
126
+ wordList.push_back("bear");
127
+ wordList.push_back("bep");
128
+ wordList.push_back("beppu");
129
+ ux.build(wordList);
130
+
131
+ vector<ux::id_t> retIDs;
132
+ string q1 = "beppuhaiiyu";
133
+ ASSERT_EQ(2, ux.commonPrefixSearch(q1.c_str(), q1.size(), retIDs));
134
+ ASSERT_EQ("bep", ux.decodeKey(retIDs[0]));
135
+ ASSERT_EQ("beppu", ux.decodeKey(retIDs[1]));
136
+ }
137
+
138
+ TEST(ux, predictiveSearch){
139
+ ux::Trie ux;
140
+ vector<string> wordList;
141
+ wordList.push_back("tea");
142
+ wordList.push_back("top");
143
+ wordList.push_back("bear");
144
+ wordList.push_back("bep");
145
+ wordList.push_back("beppu");
146
+ ux.build(wordList);
147
+
148
+ vector<ux::id_t> retIDs;
149
+ string q1 = "be";
150
+ ASSERT_EQ(3, ux.predictiveSearch(q1.c_str(), q1.size(), retIDs));
151
+ ASSERT_EQ("bear", ux.decodeKey(retIDs[0]));
152
+ ASSERT_EQ("bep", ux.decodeKey(retIDs[1]));
153
+ ASSERT_EQ("beppu", ux.decodeKey(retIDs[2]));
154
+ }
155
+
156
+ TEST(ux, predictiveSearch2){
157
+ ux::Trie ux;
158
+ vector<string> wordList;
159
+ wordList.push_back("東京都");
160
+ ux.build(wordList);
161
+
162
+ vector<ux::id_t> retIDs;
163
+ string q1 = "東";
164
+ ASSERT_EQ(1, ux.predictiveSearch(q1.c_str(), q1.size(), retIDs));
165
+ ASSERT_EQ(1, retIDs.size());
166
+ }
167
+
168
+ TEST(ux, save){
169
+ const char* fn = "uxTestSave.ind";
170
+ ux::Trie ux;
171
+ string q1 = "tea";
172
+ string q2 = "top";
173
+ string q3 = "bear";
174
+ string q4 = "bep";
175
+ string q5 = "beppu";
176
+
177
+ vector<string> wordList;
178
+ wordList.push_back(q1);
179
+ wordList.push_back(q2);
180
+ wordList.push_back(q3);
181
+ wordList.push_back(q4);
182
+ wordList.push_back(q5);
183
+ ux.build(wordList);
184
+ ASSERT_EQ(0, ux.save(fn));
185
+
186
+ ux::Trie ux2;
187
+ ASSERT_EQ(0, ux2.load(fn));
188
+ ASSERT_EQ(0, remove(fn));
189
+
190
+ ASSERT_EQ(ux.size(), ux2.size());
191
+
192
+ size_t retLen = 0;
193
+ ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q1.c_str(), q1.size(), retLen));
194
+ ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q2.c_str(), q2.size(), retLen));
195
+ ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q3.c_str(), q3.size(), retLen));
196
+ ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q4.c_str(), q4.size(), retLen));
197
+ ASSERT_NE(ux::NOTFOUND, ux2.prefixSearch(q5.c_str(), q5.size(), retLen));
198
+ }
199
+
200
+ TEST(ux, large){
201
+ ux::Trie trie;
202
+ vector<string> wordList;
203
+ for (int i = 0; i < 10000; ++i){
204
+ ostringstream os;
205
+ os << i;
206
+ wordList.push_back(os.str());
207
+ }
208
+
209
+ trie.build(wordList);
210
+ map<int, int> dic;
211
+ for (size_t i = 0; i < wordList.size(); ++i){
212
+ size_t retLen = 0;
213
+ dic[trie.prefixSearch(wordList[i].c_str(), wordList[i].size(), retLen)]++;
214
+ }
215
+ ASSERT_EQ(dic.size(), trie.size());
216
+ }
217
+
218
+ TEST(ux, predictiveTest){
219
+ vector<string> str;
220
+ str.push_back("xx");
221
+ str.push_back("xxy");
222
+ str.push_back("xxxz");
223
+ ux::Trie trie(str);
224
+
225
+ vector<ux::id_t> v;
226
+ string q = "xxy";
227
+ ASSERT_EQ(1, trie.predictiveSearch(q.c_str(), q.size(), v));
228
+ }
229
+
@@ -0,0 +1,529 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #include <algorithm>
21
+ #include <queue>
22
+ #include <fstream>
23
+ #include <cassert>
24
+ #include <map>
25
+ #include <cmath>
26
+ #include "uxTrie.hpp"
27
+
28
+ using namespace std;
29
+
30
+ namespace ux{
31
+
32
+ struct RangeNode{
33
+ RangeNode(size_t _left, size_t _right) :
34
+ left(_left), right(_right) {}
35
+ size_t left;
36
+ size_t right;
37
+ };
38
+
39
+ Trie::Trie() : vtailux_(NULL), tailIDLen_(0), keyNum_(0), isReady_(false) {
40
+ }
41
+
42
+ Trie::Trie(vector<string>& keyList, const bool isTailUX) : vtailux_(NULL), tailIDLen_(0), keyNum_(0), isReady_(false) {
43
+ build(keyList, isTailUX);
44
+ }
45
+
46
+ Trie::~Trie(){
47
+ delete vtailux_;
48
+ }
49
+
50
+ void Trie::build(vector<string>& keyList, const bool isTailUX){
51
+ clear();
52
+ sort(keyList.begin(), keyList.end());
53
+ keyList.erase(unique(keyList.begin(), keyList.end()), keyList.end());
54
+
55
+ keyNum_ = keyList.size();
56
+
57
+ queue<RangeNode> q;
58
+ queue<RangeNode> nextQ;
59
+ if (keyNum_ != 0){
60
+ q.push(RangeNode(0, keyNum_));
61
+ }
62
+
63
+ BitVec terminalBV;
64
+ BitVec tailBV;
65
+ BitVec loudBV;
66
+ loudBV.push_back(0); // super root
67
+ loudBV.push_back(1);
68
+
69
+ for (size_t depth = 0;;){
70
+ if (q.empty()){
71
+ swap(q, nextQ);
72
+ ++depth;
73
+ if (q.empty()) break;
74
+ }
75
+ RangeNode& rn = q.front();
76
+ const size_t left = rn.left;
77
+ const size_t right = rn.right;
78
+ q.pop();
79
+
80
+ string& cur = keyList[left];
81
+ if (left + 1 == right &&
82
+ depth + 1 < cur.size()){ // tail candidate
83
+ loudBV.push_back(1);
84
+ terminalBV.push_back(1);
85
+ tailBV.push_back(1);
86
+ string tail;
87
+ for (size_t i = depth; i < cur.size(); ++i){
88
+ tail += cur[i];
89
+ }
90
+ vtails_.push_back(tail);
91
+ continue;
92
+ } else {
93
+ tailBV.push_back(0);
94
+ }
95
+
96
+ assert(keyList.size() > left);
97
+ size_t newLeft = left;
98
+ if (depth == cur.size()){
99
+ terminalBV.push_back(1);
100
+ ++newLeft;
101
+ if (newLeft == right){
102
+ loudBV.push_back(1);
103
+ continue;
104
+ }
105
+ } else {
106
+ terminalBV.push_back(0);
107
+ }
108
+
109
+ size_t prev = newLeft;
110
+ assert(keyList[prev].size() > depth);
111
+ uint8_t prevC = (uint8_t)keyList[prev][depth];
112
+ uint64_t degree = 0;
113
+ for (size_t i = prev+1; ; ++i){
114
+ if (i < right &&
115
+ prevC == (uint8_t)keyList[i][depth]){
116
+ continue;
117
+ }
118
+ edges_.push_back(prevC);
119
+ loudBV.push_back(0);
120
+ degree++;
121
+ nextQ.push(RangeNode(prev, i));
122
+ if (i == right){
123
+ break;
124
+ }
125
+ prev = i;
126
+ assert(keyList[prev].size() > depth);
127
+ prevC = keyList[prev][depth];
128
+
129
+ }
130
+ loudBV.push_back(1);
131
+ }
132
+
133
+ loud_.build(loudBV);
134
+ terminal_.build(terminalBV);
135
+ tail_.build(tailBV);
136
+
137
+ if (keyNum_ > 0){
138
+ isReady_ = true;
139
+ }
140
+
141
+ if (isTailUX){
142
+ buildTailUX();
143
+ }
144
+ }
145
+
146
+ int Trie::save(const char* fn) const {
147
+ ofstream ofs(fn, ios::binary);
148
+ if (!ofs){
149
+ return FILE_OPEN_ERROR;
150
+ }
151
+ return save(ofs);
152
+ }
153
+
154
+ int Trie::load(const char* fn){
155
+ ifstream ifs(fn, ios::binary);
156
+ if (!ifs){
157
+ return FILE_OPEN_ERROR;
158
+ }
159
+ return load(ifs);
160
+ }
161
+
162
+ int Trie::save(std::ostream& os) const {
163
+ loud_.save(os);
164
+ terminal_.save(os);
165
+ tail_.save(os);
166
+ tailIDs_.save(os);
167
+
168
+ os.write((const char*)&keyNum_, sizeof(keyNum_));
169
+ size_t edgesSize = edges_.size();
170
+ os.write((const char*)&edgesSize, sizeof(edgesSize));
171
+ os.write((const char*)&edges_[0], sizeof(edges_[0]) * edges_.size());
172
+
173
+ int useUX = (vtailux_ != NULL);
174
+ os.write((const char*)&useUX, sizeof(useUX));
175
+ if (useUX){
176
+ int err = 0;
177
+ if ((err = vtailux_->save(os)) != 0){
178
+ return err;
179
+ }
180
+ } else {
181
+ size_t tailsNum = vtails_.size();
182
+ os.write((const char*)&tailsNum, sizeof(tailsNum));
183
+ for (size_t i = 0; i < vtails_.size(); ++i){
184
+ size_t tailSize = vtails_[i].size();
185
+ os.write((const char*)&tailSize, sizeof(tailSize));
186
+ os.write((const char*)&vtails_[i][0], sizeof(vtails_[i][0]) * vtails_[i].size());
187
+ }
188
+ }
189
+
190
+ if (!os){
191
+ return SAVE_ERROR;
192
+ }
193
+ return 0;
194
+ }
195
+
196
+ int Trie::load(std::istream& is){
197
+ clear();
198
+ loud_.load(is);
199
+ terminal_.load(is);
200
+ tail_.load(is);
201
+ tailIDs_.load(is);
202
+
203
+ is.read((char*)&keyNum_, sizeof(keyNum_));
204
+ size_t edgesSize = 0;
205
+ is.read((char*)&edgesSize, sizeof(edgesSize));
206
+ edges_.resize(edgesSize);
207
+ is.read((char*)&edges_[0], sizeof(edges_[0]) * edges_.size());
208
+
209
+ int useUX = 0;
210
+ is.read((char*)&useUX, sizeof(useUX));
211
+ if (useUX){
212
+ vtailux_ = new Trie;
213
+ int err = 0;
214
+ if ((err = vtailux_->load(is)) != 0){
215
+ return err;
216
+ }
217
+ size_t tailNum = vtailux_->size();
218
+ tailIDLen_ = lg2(tailNum);
219
+
220
+ } else {
221
+ size_t tailsNum = 0;
222
+ is.read((char*)&tailsNum, sizeof(tailsNum));
223
+ vtails_.resize(tailsNum);
224
+ for (size_t i = 0; i < tailsNum; ++i){
225
+ size_t tailSize = 0;
226
+ is.read((char*)&tailSize, sizeof(tailSize));
227
+ vtails_[i].resize(tailSize);
228
+ is.read((char*)&vtails_[i][0], sizeof(vtails_[i][0]) * vtails_[i].size());
229
+ }
230
+ }
231
+
232
+ if (!is){
233
+ return LOAD_ERROR;
234
+ }
235
+ isReady_ = true;
236
+ return 0;
237
+ }
238
+
239
+ id_t Trie::prefixSearch(const char* str, const size_t len, size_t& retLen) const{
240
+ vector<id_t> retIDs;
241
+ traverse(str, len, retLen, retIDs, 0xFFFFFFFF);
242
+ if (retIDs.size() == 0){
243
+ return NOTFOUND;
244
+ }
245
+ return retIDs.back();
246
+ }
247
+
248
+ size_t Trie::commonPrefixSearch(const char* str, const size_t len, vector<id_t>& retIDs,
249
+ const size_t limit) const {
250
+ retIDs.clear();
251
+ size_t lastLen = 0;
252
+ traverse(str, len, lastLen, retIDs, limit);
253
+ return retIDs.size();
254
+ }
255
+
256
+ size_t Trie::predictiveSearch(const char* str, const size_t len, vector<id_t>& retIDs,
257
+ const size_t limit) const{
258
+ retIDs.clear();
259
+ if (!isReady_) return 0;
260
+ if (limit == 0) return 0;
261
+
262
+ uint64_t pos = 2;
263
+ uint64_t zeros = 2;
264
+ for (size_t i = 0; i < len; ++i){
265
+ uint64_t ones = pos - zeros;
266
+
267
+ if (tail_.getBit(ones)){
268
+ uint64_t tailID = tail_.rank(ones, 1) - 1;
269
+ string tail = getTail(tailID);
270
+ for (size_t j = i; j < len; ++j){
271
+ if (str[j] != tail[j-i]){
272
+ return 0;
273
+ }
274
+ }
275
+ retIDs.push_back(terminal_.rank(ones, 1) - 1);
276
+
277
+ return retIDs.size();
278
+ }
279
+ getChild((uint8_t)str[i], pos, zeros);
280
+ if (pos == NOTFOUND){
281
+ return 0;
282
+ }
283
+ }
284
+
285
+ // search all descendant nodes from curPos
286
+ enumerateAll(pos, zeros, retIDs, limit);
287
+ return retIDs.size();
288
+ }
289
+
290
+ void Trie::decodeKey(const id_t id, string& ret) const{
291
+ ret.clear();
292
+ if (!isReady_) return;
293
+
294
+ uint64_t nodeID = terminal_.select(id+1, 1);
295
+
296
+ uint64_t pos = loud_.select(nodeID+1, 1) + 1;
297
+ uint64_t zeros = pos - nodeID;
298
+ for (;;) {
299
+ uint8_t c = 0;
300
+ getParent(c, pos, zeros);
301
+ if (pos == 0) break;
302
+ ret += (char)c;
303
+ }
304
+ reverse(ret.begin(), ret.end());
305
+ if (tail_.getBit(nodeID)){
306
+ ret += getTail(tail_.rank(nodeID, 1) - 1);
307
+ }
308
+ }
309
+
310
+ string Trie::decodeKey(const id_t id) const {
311
+ std::string ret;
312
+ decodeKey(id, ret);
313
+ return ret;
314
+ }
315
+
316
+ size_t Trie::size() const {
317
+ return keyNum_;
318
+ }
319
+
320
+ void Trie::clear() {
321
+ loud_.clear();
322
+ terminal_.clear();
323
+ tail_.clear();
324
+ vtails_.clear();
325
+ delete vtailux_;
326
+ vtailux_ = NULL;
327
+ edges_.clear();
328
+ tailIDs_.clear();
329
+ tailIDLen_ = 0;
330
+ keyNum_ = 0;
331
+ isReady_ = false;
332
+ }
333
+
334
+ std::string Trie::what(const int error){
335
+ switch(error) {
336
+ case 0:
337
+ return string("succeeded");
338
+ case FILE_OPEN_ERROR:
339
+ return string("file open error");
340
+ case FILE_WRITE_ERROR:
341
+ return string("file write error");
342
+ case FILE_READ_ERROR:
343
+ return string("file read error");
344
+ default:
345
+ return string("unknown error");
346
+ }
347
+ }
348
+
349
+ size_t Trie::getAllocSize() const{
350
+ size_t retSize = 0;
351
+ if (vtailux_) {
352
+ retSize += vtailux_->getAllocSize();
353
+ retSize += tailIDs_.getAllocSize();
354
+ } else {
355
+ size_t tailLenSum = 0;
356
+ for (size_t i = 0; i < vtails_.size(); ++i){
357
+ tailLenSum += vtails_[i].size();
358
+ }
359
+ retSize += tailLenSum + tailLenSum / 8; // length bit vector
360
+ }
361
+ return retSize + loud_.getAllocSize() + terminal_.getAllocSize() +
362
+ tail_.getAllocSize() + edges_.size();
363
+ }
364
+
365
+ void Trie::allocStat(size_t allocSize, ostream& os) const{
366
+ if (vtailux_) {
367
+ vtailux_->allocStat(allocSize, os);
368
+ size_t size = tailIDs_.getAllocSize();
369
+ os << "tailIDs:\t" << size << "\t" << (float)size / allocSize << endl;
370
+ } else {
371
+ size_t tailLenSum = 0;
372
+ for (size_t i = 0; i < vtails_.size(); ++i){
373
+ tailLenSum += vtails_[i].size();
374
+ }
375
+ os << " tails:\t" << tailLenSum << "\t" << (float)tailLenSum / allocSize << endl;
376
+ os << " tailLen:\t" << tailLenSum/8 << "\t" << (float)tailLenSum/8 / allocSize << endl;
377
+ }
378
+ os << " loud:\t" << loud_.getAllocSize() << "\t" << (float)loud_.getAllocSize() / allocSize << endl;
379
+ os << "terminal:\t" << terminal_.getAllocSize() << "\t" << (float)terminal_.getAllocSize() / allocSize << endl;
380
+ os << " tail:\t" << tail_.getAllocSize() << "\t" << (float)tail_.getAllocSize() / allocSize << endl;
381
+ os << " edge:\t" << edges_.size() << "\t" << (float)edges_.size() / allocSize << endl;
382
+ }
383
+
384
+ void Trie::stat(ostream & os) const {
385
+ size_t tailslen = 0;
386
+ for (size_t i = 0; i < vtails_.size(); ++i){
387
+ tailslen += vtails_[i].size();
388
+ }
389
+
390
+ os << " keyNum\t" << keyNum_ << endl
391
+ << " loud:\t" << loud_.size() << endl
392
+ << "terminal:\t" << terminal_.size() << endl
393
+ << " edge:\t" << edges_.size() << endl
394
+ << " avgedge:\t" << (float)edges_.size() / keyNum_ << endl
395
+ << " vtails:\t" << tailslen << endl
396
+ << " tailnum:\t" << vtails_.size() << endl
397
+ << " avgtail:\t" << (float)tailslen / keyNum_ << endl
398
+ << endl;
399
+ }
400
+
401
+
402
+ void Trie::buildTailUX(){
403
+ vector<string> origTails = vtails_;
404
+ try {
405
+ vtailux_ = new Trie;
406
+ } catch (bad_alloc){
407
+ isReady_ = false;
408
+ return;
409
+ }
410
+ for (size_t i = 0; i < vtails_.size(); ++i){
411
+ reverse(vtails_[i].begin(), vtails_[i].end());
412
+ }
413
+ vtailux_->build(vtails_, false);
414
+ tailIDLen_ = lg2(vtailux_->size());
415
+
416
+ for (size_t i = 0; i < origTails.size(); ++i){
417
+ size_t retLen = 0;
418
+ reverse(origTails[i].begin(), origTails[i].end());
419
+ id_t id = vtailux_->prefixSearch(origTails[i].c_str(), origTails[i].size(), retLen);
420
+ assert(id != NOTFOUND);
421
+ assert(retLen == origTails[i].size());
422
+ tailIDs_.push_back_with_len(id, tailIDLen_);
423
+ }
424
+ vector<string>().swap(vtails_);
425
+ }
426
+
427
+ void Trie::getChild(const uint8_t c, uint64_t& pos, uint64_t& zeros) const {
428
+ for (;; ++pos, ++zeros){
429
+ if (loud_.getBit(pos)){
430
+ pos = NOTFOUND;
431
+ return;
432
+ }
433
+ assert(zeros >= 2);
434
+ assert(edges_.size() > zeros-2);
435
+ if (edges_[zeros-2] == c){
436
+ pos = loud_.select(zeros, 1)+1;
437
+ zeros = pos - zeros + 1;
438
+ return;
439
+ }
440
+ }
441
+ }
442
+
443
+ bool Trie::isLeaf(const uint64_t pos) const {
444
+ return loud_.getBit(pos);
445
+ }
446
+
447
+ void Trie::getParent(uint8_t& c, uint64_t& pos, uint64_t& zeros) const {
448
+ zeros = pos - zeros + 1;
449
+ pos = loud_.select(zeros, 0);
450
+ if (zeros < 2) return;
451
+ assert(edges_.size() > zeros-2);
452
+ c = edges_[zeros-2];
453
+ }
454
+
455
+
456
+ void Trie::traverse(const char* str, const size_t len,
457
+ size_t& lastLen, std::vector<id_t>& retIDs, const size_t limit) const{
458
+ lastLen = 0;
459
+ if (!isReady_) return;
460
+ if (limit == 0) return;
461
+
462
+ uint64_t pos = 2;
463
+ uint64_t zeros = 2;
464
+ for (size_t depth = 0; pos != NOTFOUND; ++depth){
465
+ uint64_t ones = pos - zeros;
466
+
467
+ if (tail_.getBit(ones)){
468
+ size_t retLen = 0;
469
+ if (tailMatch(str, len, depth, tail_.rank(ones, 1)-1, retLen)){
470
+ lastLen = depth + retLen;
471
+ retIDs.push_back(terminal_.rank(ones, 1) - 1);
472
+ }
473
+ break;
474
+ } else if (terminal_.getBit(ones)){
475
+ lastLen = depth;
476
+ retIDs.push_back(terminal_.rank(ones, 1)-1);
477
+ if (retIDs.size() == limit) {
478
+ break;
479
+ }
480
+ }
481
+ if (depth == len) break;
482
+ getChild((uint8_t)str[depth], pos, zeros);
483
+ }
484
+ }
485
+
486
+
487
+ void Trie::enumerateAll(const uint64_t pos, const uint64_t zeros, vector<id_t>& retIDs, const size_t limit) const{
488
+ const uint64_t ones = pos - zeros;
489
+ if (terminal_.getBit(ones)){
490
+ retIDs.push_back(terminal_.rank(ones, 1) - 1);
491
+ }
492
+
493
+ for (uint64_t i = 0; loud_.getBit(pos + i) == 0 &&
494
+ retIDs.size() < limit; ++i){
495
+ uint64_t nextPos = loud_.select(zeros + i, 1)+1;
496
+ enumerateAll(nextPos, nextPos - zeros - i + 1, retIDs, limit);
497
+ }
498
+ }
499
+
500
+
501
+
502
+ bool Trie::tailMatch(const char* str, const size_t len, const size_t depth,
503
+ const uint64_t tailID, size_t& retLen) const{
504
+ string tail = getTail(tailID);
505
+ if (tail.size() > len-depth) {
506
+ return false;
507
+ }
508
+
509
+ for (size_t i = 0; i < tail.size(); ++i){
510
+ if (str[i+depth] != tail[i]) {
511
+ return false;
512
+ }
513
+ }
514
+ retLen = tail.size();
515
+ return true;
516
+ }
517
+
518
+ std::string Trie::getTail(const uint64_t i) const{
519
+ if (vtailux_) {
520
+ string ret;
521
+ vtailux_->decodeKey(tailIDs_.getBits(tailIDLen_ * i, tailIDLen_), ret);
522
+ reverse(ret.begin(), ret.end());
523
+ return ret;
524
+ } else {
525
+ return vtails_[i];
526
+ }
527
+ }
528
+
529
+ }