text_ux 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (147) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +25 -0
  3. data/.rspec +2 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +51 -0
  7. data/Rakefile +18 -0
  8. data/ext/text_ux/extconf.rb +5 -0
  9. data/ext/text_ux/text_ux.cpp +252 -0
  10. data/lib/text_ux.rb +5 -0
  11. data/lib/text_ux/version.rb +3 -0
  12. data/spec/fixtures/test.ux +0 -0
  13. data/spec/spec_helper.rb +19 -0
  14. data/spec/text_ux_spec.rb +137 -0
  15. data/text_ux.gemspec +26 -0
  16. data/vendor/ux-trie/ux-0.1.9/.lock-waf_darwin_build +8 -0
  17. data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest-all.cc +9118 -0
  18. data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest.h +19537 -0
  19. data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest_main.cc +39 -0
  20. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Build.py +733 -0
  21. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Build.pyc +0 -0
  22. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ConfigSet.py +147 -0
  23. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ConfigSet.pyc +0 -0
  24. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Configure.py +314 -0
  25. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Configure.pyc +0 -0
  26. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Context.py +298 -0
  27. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Context.pyc +0 -0
  28. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Errors.py +37 -0
  29. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Errors.pyc +0 -0
  30. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Logs.py +149 -0
  31. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Logs.pyc +0 -0
  32. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Node.py +500 -0
  33. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Node.pyc +0 -0
  34. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Options.py +130 -0
  35. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Options.pyc +0 -0
  36. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Runner.py +191 -0
  37. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Runner.pyc +0 -0
  38. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Scripting.py +358 -0
  39. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Scripting.pyc +0 -0
  40. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Task.py +669 -0
  41. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Task.pyc +0 -0
  42. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/TaskGen.py +341 -0
  43. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/TaskGen.pyc +0 -0
  44. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/__init__.py +4 -0
  45. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/__init__.pyc +0 -0
  46. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ar.py +12 -0
  47. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ar.pyc +0 -0
  48. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/asm.py +25 -0
  49. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/bison.py +29 -0
  50. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c.py +27 -0
  51. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_aliases.py +56 -0
  52. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_aliases.pyc +0 -0
  53. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_config.py +708 -0
  54. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_config.pyc +0 -0
  55. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_osx.py +121 -0
  56. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_osx.pyc +0 -0
  57. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_preproc.py +606 -0
  58. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_preproc.pyc +0 -0
  59. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_tests.py +110 -0
  60. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_tests.pyc +0 -0
  61. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ccroot.py +372 -0
  62. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ccroot.pyc +0 -0
  63. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_c.py +39 -0
  64. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_cxx.py +39 -0
  65. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_cxx.pyc +0 -0
  66. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_d.py +30 -0
  67. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_fc.py +45 -0
  68. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cs.py +98 -0
  69. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cxx.py +27 -0
  70. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cxx.pyc +0 -0
  71. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d.py +51 -0
  72. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d_config.py +47 -0
  73. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d_scan.py +133 -0
  74. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/dbus.py +30 -0
  75. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/dmd.py +43 -0
  76. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/errcheck.py +153 -0
  77. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc.py +123 -0
  78. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc_config.py +271 -0
  79. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc_scan.py +68 -0
  80. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/flex.py +27 -0
  81. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/g95.py +55 -0
  82. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gas.py +10 -0
  83. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gcc.py +98 -0
  84. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gdc.py +34 -0
  85. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gfortran.py +69 -0
  86. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/glib2.py +174 -0
  87. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gnu_dirs.py +65 -0
  88. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gxx.py +98 -0
  89. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gxx.pyc +0 -0
  90. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/icc.py +31 -0
  91. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/icpc.py +30 -0
  92. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ifort.py +42 -0
  93. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/intltool.py +78 -0
  94. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/irixcc.py +49 -0
  95. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/javaw.py +272 -0
  96. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/kde4.py +49 -0
  97. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/lua.py +19 -0
  98. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/msvc.py +650 -0
  99. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/nasm.py +13 -0
  100. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/perl.py +78 -0
  101. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/python.py +303 -0
  102. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/qt4.py +424 -0
  103. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ruby.py +104 -0
  104. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/suncc.py +54 -0
  105. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/suncxx.py +55 -0
  106. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/tex.py +222 -0
  107. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/vala.py +215 -0
  108. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/waf_unit_test.py +79 -0
  109. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/winres.py +34 -0
  110. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/xlc.py +46 -0
  111. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/xlcxx.py +46 -0
  112. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Utils.py +334 -0
  113. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Utils.pyc +0 -0
  114. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/__init__.py +4 -0
  115. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/__init__.pyc +0 -0
  116. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ansiterm.py +173 -0
  117. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ansiterm.pyc +0 -0
  118. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/__init__.py +4 -0
  119. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/__init__.pyc +0 -0
  120. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/compat15.py +223 -0
  121. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/compat15.pyc +0 -0
  122. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/fixpy2.py +50 -0
  123. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/fixpy2.pyc +0 -0
  124. data/vendor/ux-trie/ux-0.1.9/src/bitVec.cpp +119 -0
  125. data/vendor/ux-trie/ux-0.1.9/src/bitVec.hpp +64 -0
  126. data/vendor/ux-trie/ux-0.1.9/src/bitVecTest.cpp +143 -0
  127. data/vendor/ux-trie/ux-0.1.9/src/cmdline.h +809 -0
  128. data/vendor/ux-trie/ux-0.1.9/src/rsDic.cpp +121 -0
  129. data/vendor/ux-trie/ux-0.1.9/src/rsDic.hpp +57 -0
  130. data/vendor/ux-trie/ux-0.1.9/src/ux.hpp +26 -0
  131. data/vendor/ux-trie/ux-0.1.9/src/uxMain.cpp +206 -0
  132. data/vendor/ux-trie/ux-0.1.9/src/uxMap.cpp +0 -0
  133. data/vendor/ux-trie/ux-0.1.9/src/uxMap.hpp +248 -0
  134. data/vendor/ux-trie/ux-0.1.9/src/uxMapTest.cpp +139 -0
  135. data/vendor/ux-trie/ux-0.1.9/src/uxTest.cpp +229 -0
  136. data/vendor/ux-trie/ux-0.1.9/src/uxTrie.cpp +529 -0
  137. data/vendor/ux-trie/ux-0.1.9/src/uxTrie.hpp +220 -0
  138. data/vendor/ux-trie/ux-0.1.9/src/uxUtil.cpp +92 -0
  139. data/vendor/ux-trie/ux-0.1.9/src/uxUtil.hpp +35 -0
  140. data/vendor/ux-trie/ux-0.1.9/src/wscript +43 -0
  141. data/vendor/ux-trie/ux-0.1.9/unittest_gtest.py +0 -0
  142. data/vendor/ux-trie/ux-0.1.9/unittest_gtest.pyc +0 -0
  143. data/vendor/ux-trie/ux-0.1.9/unittestt.py +166 -0
  144. data/vendor/ux-trie/ux-0.1.9/ux.pc.in +10 -0
  145. data/vendor/ux-trie/ux-0.1.9/waf +0 -0
  146. data/vendor/ux-trie/ux-0.1.9/wscript +32 -0
  147. metadata +249 -0
@@ -0,0 +1,121 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #include <iostream>
21
+ #include <cassert>
22
+ #include "rsDic.hpp"
23
+
24
+ using namespace std;
25
+
26
+ namespace ux {
27
+
28
+ RSDic::RSDic() : size_(0) {
29
+ }
30
+
31
+ RSDic::~RSDic() {
32
+ }
33
+
34
+ void RSDic::build(BitVec& bv){
35
+ size_ = bv.size();
36
+ swap(bitVec_, bv);
37
+ L_.resize((size_ + L_BLOCK-1) / L_BLOCK);
38
+ size_t sum = 0;
39
+ for (uint64_t il = 0; il < size_; il += L_BLOCK){
40
+ L_[il/L_BLOCK] = sum;
41
+ for (uint64_t is = 0; is < L_BLOCK && il + is < size_; is += S_BLOCK){
42
+ sum += popCount(bitVec_.lookupBlock((il + is)/S_BLOCK));
43
+ }
44
+ }
45
+ L_.push_back(sum);
46
+ }
47
+
48
+ uint64_t RSDic::rank(const uint64_t pos, const uint8_t b) const{
49
+ uint64_t pos1 = pos+1;
50
+ uint64_t rank1 = L_[pos1 >> L_SHIFT];
51
+ uint64_t bpos = (pos1 >> L_SHIFT) << (L_SHIFT - S_SHIFT);
52
+ uint64_t epos = pos1 >> S_SHIFT;
53
+ for (uint64_t i = bpos; i < epos; ++i){
54
+ rank1 += popCount(bitVec_.lookupBlock(i));
55
+ }
56
+ rank1 += popCountMasked(bitVec_.lookupBlock(epos), pos1 % S_BLOCK);
57
+
58
+ if (b == 1) return rank1;
59
+ else return pos1 - rank1;
60
+ }
61
+
62
+ uint64_t RSDic::select(const uint64_t pos, const uint8_t b) const{
63
+ uint64_t retPos = 0;
64
+ uint64_t posS = selectOverL(pos, b, retPos);
65
+ return posS * S_BLOCK + selectBlock(retPos, bitVec_.lookupBlock(posS), b);
66
+ }
67
+
68
+ uint64_t RSDic::selectOverL(const uint64_t pos, const uint8_t b, uint64_t& retPos) const {
69
+ uint64_t left = 0;
70
+ uint64_t right = L_.size();
71
+
72
+ retPos = pos;
73
+ while (left < right){
74
+ uint64_t mid = (left + right)/2;
75
+ assert(mid < L_.size());
76
+ if (getBitNum(L_[mid], L_BLOCK * mid, b) < retPos) left = mid+1;
77
+ else right = mid;
78
+ }
79
+ uint64_t posL = (left != 0) ? left - 1 : 0;
80
+ uint64_t posS = posL * S_RATIO;
81
+
82
+ assert(retPos >= getBitNum(L_[posL], L_BLOCK * posL, b));
83
+
84
+ retPos -= getBitNum(L_[posL], L_BLOCK * posL, b);
85
+ for (;;posS++){
86
+ if (posS >= bitVec_.size()) break;
87
+ uint64_t num = getBitNum(popCount(bitVec_.lookupBlock(posS)), S_BLOCK, b);
88
+ if (retPos <= num) break;
89
+ retPos -= num;
90
+ }
91
+ return posS;
92
+ }
93
+
94
+ void RSDic::save(ostream& ofs) const{
95
+ bitVec_.save(ofs);
96
+ }
97
+
98
+ void RSDic::load(istream& ifs) {
99
+ bitVec_.load(ifs);
100
+ build(bitVec_);
101
+ }
102
+
103
+ size_t RSDic::getAllocSize() const {
104
+ return bitVec_.getAllocSize() + sizeof(L_[0]) * L_.size();
105
+ }
106
+
107
+ uint8_t RSDic::getBit(const uint64_t pos) const{
108
+ return bitVec_.getBit(pos);
109
+ }
110
+
111
+ size_t RSDic::size() const {
112
+ return bitVec_.size();
113
+ }
114
+
115
+ void RSDic::clear() {
116
+ bitVec_.clear();
117
+ L_.clear();
118
+ size_ = 0;
119
+ }
120
+
121
+ }
@@ -0,0 +1,57 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #ifndef RSDIC_HPP__
21
+ #define RSDIC_HPP__
22
+
23
+ #include <stdint.h>
24
+ #include <vector>
25
+ #include <iostream>
26
+ #include "bitVec.hpp"
27
+ #include "uxUtil.hpp"
28
+
29
+ namespace ux {
30
+
31
+ class RSDic {
32
+ public:
33
+ RSDic();
34
+ ~RSDic();
35
+
36
+ void build(BitVec& bv);
37
+ uint64_t rank(uint64_t pos, uint8_t b) const;
38
+ uint64_t select(uint64_t pos, uint8_t b) const;
39
+
40
+ void save(std::ostream& os) const;
41
+ void load(std::istream& is);
42
+ size_t getAllocSize() const;
43
+ uint8_t getBit(uint64_t pos) const;
44
+ size_t size() const;
45
+ void clear();
46
+
47
+ private:
48
+ uint64_t selectOverL(uint64_t pos, uint8_t b, uint64_t& retPos) const;
49
+
50
+ BitVec bitVec_;
51
+ std::vector<uint64_t> L_;
52
+ size_t size_;
53
+ };
54
+
55
+ }
56
+
57
+ #endif // RSDIC_HPP__
@@ -0,0 +1,26 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #ifndef UX_HPP__
21
+ #define UX_HPP__
22
+
23
+ #include "uxTrie.hpp"
24
+ #include "uxMap.hpp"
25
+
26
+ #endif // UX_HPP__
@@ -0,0 +1,206 @@
1
+ #include <iostream>
2
+ #include <fstream>
3
+ #include <string>
4
+ #include "cmdline.h"
5
+ #include "uxTrie.hpp"
6
+
7
+ using namespace std;
8
+
9
+ #include <time.h>
10
+ #include <sys/time.h>
11
+ #include <stdio.h>
12
+
13
+ double gettimeofday_sec()
14
+ {
15
+ struct timeval tv;
16
+ gettimeofday(&tv, NULL);
17
+ return tv.tv_sec + (double)tv.tv_usec*1e-6;
18
+ }
19
+
20
+ void analyzeKeyList(const vector<string>& keyList){
21
+ size_t lcs = 0;
22
+ for (size_t i = 1; i < keyList.size(); ++i){
23
+ const string& s1 = keyList[i-1];
24
+ const string& s2 = keyList[i];
25
+ size_t j = 0;
26
+ for (; j < s1.size() && j < s2.size() && s1[j] == s2[j]; ++j) {};
27
+ lcs += j;
28
+ }
29
+ cout << " avelcs:\t" << (float)lcs / keyList.size() << endl;
30
+ }
31
+
32
+
33
+ size_t allKeySize(const vector<string>& keyList){
34
+ size_t ret = 0;
35
+ for (size_t i = 0; i < keyList.size(); ++i){
36
+ ret += keyList[i].size();
37
+ }
38
+ return ret;
39
+ }
40
+
41
+ void reportStat(const ux::Trie& ux, const vector<string>& keyList){
42
+ ux.allocStat(ux.getAllocSize(), cout);
43
+ ux.stat(cout);
44
+ analyzeKeyList(keyList);
45
+ size_t originalSize = allKeySize(keyList);
46
+ cout << "originalSize:\t" << allKeySize(keyList) << endl
47
+ << " indexSize:\t" << ux.getAllocSize() << " (" << (float)ux.getAllocSize() / originalSize << ")" << endl
48
+ << " keyNum:\t" << keyList.size() << endl;
49
+ }
50
+
51
+ void printQuery(const ux::Trie& ux,
52
+ const std::string& query,
53
+ const int limit){
54
+ cout << "query:[" << query << "]" << endl;
55
+
56
+ // prefixSearch
57
+ size_t retLen = 0;
58
+ cout << "prefixSearch: ";
59
+ ux::id_t id = ux.prefixSearch(query.c_str(), query.size(), retLen);
60
+ if (id == ux::NOTFOUND){
61
+ cout << "not found." << endl;
62
+ } else {
63
+ cout << ux.decodeKey(id) << "\t(id=" << id << ")" << endl;
64
+ }
65
+
66
+ vector<ux::id_t> retIDs;
67
+ // commonPrefixSearch
68
+ ux.commonPrefixSearch(query.c_str(), query.size(), retIDs, (size_t)limit);
69
+ cout << "commonPrefixSearch: " << retIDs.size() << " found." << endl;
70
+ for (size_t i = 0; i < retIDs.size(); ++i){
71
+ cout << ux.decodeKey(retIDs[i]) << "\t(id=" << retIDs[i] << ")" << endl;
72
+ }
73
+
74
+ // predictiveSearch
75
+ ux.predictiveSearch(query.c_str(), query.size(), retIDs, (size_t)limit);
76
+ cout << "predictiveSearch: " << retIDs.size() << " found." << endl;
77
+ for (size_t i = 0; i < retIDs.size(); ++i){
78
+ cout << ux.decodeKey(retIDs[i]) << "\t(id=" << retIDs[i] << ")" << endl;
79
+ }
80
+ }
81
+
82
+
83
+ int readKeyList(const string& fn, vector<string>& keyList){
84
+ ifstream ifs(fn.c_str());
85
+ if (!ifs){
86
+ cerr << "cannot open " << fn << endl;
87
+ return -1;
88
+ }
89
+
90
+ for (string key; getline(ifs, key); ){
91
+ if (key.size() > 0 &&
92
+ key[key.size()-1] == '\r'){
93
+ key = key.substr(0, key.size()-1);
94
+ }
95
+ keyList.push_back(key);
96
+ }
97
+ return 0;
98
+ }
99
+
100
+ void performanceTest(ux::Trie& ux, vector<string>& keyList){
101
+ random_shuffle(keyList.begin(), keyList.end());
102
+
103
+ double start = gettimeofday_sec();
104
+ size_t dummy = 0;
105
+ for (size_t i = 0; i < keyList.size() && i < 1000; ++i){
106
+ size_t retLen = 0;
107
+ dummy += ux.prefixSearch(keyList[i].c_str(), keyList[i].size(), retLen);
108
+ }
109
+ double end = gettimeofday_sec();
110
+ cout << " query time:\t" << end - start << endl;
111
+ cout << " check keys:\t" << min((int)keyList.size(), 1000) << endl;
112
+
113
+ if (dummy == 777){
114
+ cerr << "luckey" << endl;
115
+ }
116
+ }
117
+
118
+ int buildUX(const string& fn, const string& index, const bool uncompress, const int verbose){
119
+ vector<string> keyList;
120
+ if (readKeyList(fn, keyList) == -1){
121
+ return -1;
122
+ }
123
+ ux::Trie ux;
124
+ double start = gettimeofday_sec();
125
+ ux.build(keyList, !uncompress);
126
+ double elapsedTime = gettimeofday_sec() - start;
127
+ if (verbose >= 1){
128
+ cout << " index time:\t" << elapsedTime << endl;
129
+ reportStat(ux, keyList);
130
+ }
131
+ if (verbose >= 2){
132
+ performanceTest(ux, keyList);
133
+ }
134
+
135
+ if (index == "") return 0;
136
+ int err = ux.save(index.c_str());
137
+ if (err != ux::Trie::SUCCESS){
138
+ cerr << ux.what(err) << " " << index << endl;
139
+ return -1;
140
+ }
141
+ return 0;
142
+ }
143
+
144
+ int searchUX(const string& index, const int limit){
145
+ ux::Trie ux;
146
+ int err = ux.load(index.c_str());
147
+ if (err != ux::Trie::SUCCESS){
148
+ cerr << ux.what(err) << " " << index << endl;
149
+ return -1;
150
+ }
151
+ cout << "read:" << ux.size() << " keys" << endl;
152
+
153
+ string query;
154
+ for (;;){
155
+ putchar('>');
156
+ getline(cin, query);
157
+ if (query.size() == 0){
158
+ break;
159
+ }
160
+ printQuery(ux, query, limit);
161
+ }
162
+
163
+ return 0;
164
+ }
165
+
166
+ int listUX(const string& index){
167
+ ux::Trie ux;
168
+ int err = ux.load(index.c_str());
169
+ if (err != ux::Trie::SUCCESS){
170
+ cerr << ux.what(err) << " " << index << endl;
171
+ return -1;
172
+ }
173
+
174
+ for (size_t i = 0; i < ux.size(); ++i){
175
+ cout << ux.decodeKey(i) << endl;
176
+ }
177
+ return 0;
178
+ }
179
+
180
+
181
+ int main(int argc, char* argv[]){
182
+ cmdline::parser p;
183
+ p.add<string>("keylist", 'k', "key list", false);
184
+ p.add<string>("index", 'i', "index", true);
185
+ p.add<int> ("limit", 'l', "limit at search", false, 10);
186
+ p.add ("uncompress", 'u', "tail is uncompressed");
187
+ p.add ("enumerate", 'e', "enumerate all keywords");
188
+ p.add<int> ("verbose", 'v', "verbose mode", 0);
189
+ p.add("help", 'h', "this message");
190
+ p.set_program_name("ux");
191
+
192
+ if (!p.parse(argc, argv) || p.exist("help")){
193
+ cerr << p.usage() << endl;
194
+ return -1;
195
+ }
196
+
197
+ if (p.exist("keylist")){
198
+ return buildUX(p.get<string>("keylist"), p.get<string>("index"), p.exist("uncompress"), p.get<int>("verbose"));
199
+ } else if (p.exist("enumerate")){
200
+ return listUX(p.get<string>("index"));
201
+ } else {
202
+ return searchUX(p.get<string>("index"), p.get<int>("limit"));
203
+ }
204
+
205
+ return 0; // NOT COME
206
+ }
File without changes
@@ -0,0 +1,248 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #ifndef UX_MAP_HPP__
21
+ #define UX_MAP_HPP__
22
+
23
+ #include <vector>
24
+ #include <cassert>
25
+ #include <iostream>
26
+ #include <map>
27
+ #include <string>
28
+ #include "uxTrie.hpp"
29
+
30
+ namespace ux{
31
+
32
+ /**
33
+ * Succict Map using UX
34
+ */
35
+ template <class V>
36
+ class Map{
37
+ public:
38
+ /**
39
+ * Constructor
40
+ */
41
+ Map() : size_(0){}
42
+
43
+ /**
44
+ * Destructor
45
+ */
46
+ ~Map() {}
47
+
48
+ /**
49
+ * Build a map without values
50
+ * @param keys keys to be associated
51
+ */
52
+ void build(std::vector<std::string>& keys){
53
+ trie_.build(keys);
54
+ vs_.resize(trie_.size());
55
+ }
56
+
57
+ /**
58
+ * Build a map from std::map
59
+ * @param m A std::map as an input
60
+ */
61
+ void build(const std::map<std::string, V>& m){
62
+ std::vector<std::string> wordList;
63
+ for (typename std::map<std::string, V>::const_iterator it =
64
+ m.begin(); it != m.end(); ++it){
65
+ wordList.push_back(it->first);
66
+ }
67
+ trie_.build(wordList);
68
+ vs_.resize(wordList.size());
69
+ for (typename std::map<std::string, V>::const_iterator it =
70
+ m.begin(); it != m.end(); ++it){
71
+ const std::string key = it->first;
72
+ if (set(key.c_str(), key.size(), it->second) != 0){
73
+ return;
74
+ }
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Build a map from the vector of the pair of a key and a value
80
+ * @param kvs A vector of the pair of a key and vlaue
81
+ */
82
+ void build(const std::vector< std::pair<std::string, V> >& kvs){
83
+ std::vector<std::string> wordList;
84
+ for (size_t i = 0; i < kvs.size(); ++i){
85
+ wordList.push_back(kvs[i].first);
86
+ }
87
+
88
+ trie_.build(wordList);
89
+ vs_.resize(wordList.size());
90
+
91
+ for (size_t i = 0; i < kvs.size(); ++i){
92
+ const std::string key = kvs[i].first;
93
+ assert(set(key.c_str(), key.size(), kvs[i].second) == 0);
94
+ }
95
+ }
96
+
97
+ /**
98
+ * Get a value for a given key
99
+ * @param str the key
100
+ * @param len the length of str
101
+ * @param v An associated value for a key
102
+ * @return 0 on success and -1 if not found
103
+ */
104
+ int get(const char* str, size_t len, V& v) const {
105
+ size_t retLen = 0;
106
+ id_t id = trie_.prefixSearch(str, len, retLen);
107
+ if (id == NOTFOUND){
108
+ return -1;
109
+ }
110
+ v = vs_[id];
111
+ return 0;
112
+ }
113
+
114
+ /**
115
+ * Set a value for a given key
116
+ * @param str the key
117
+ * @param len the length of str
118
+ * @param v A value to be associated for a key
119
+ * @return 0 on success and -1 if not found
120
+ */
121
+ int set(const char* str, size_t len, const V& v){
122
+ size_t retLen = 0;
123
+ id_t id = trie_.prefixSearch(str, len, retLen);
124
+ if (id == NOTFOUND){
125
+ return -1;
126
+ }
127
+ vs_[id] = v;
128
+ return 0;
129
+ }
130
+
131
+ /**
132
+ * Return the longest key that matches the prefix of the query in the dictionary
133
+ * @param str the query
134
+ * @param len the length of the query
135
+ * @param retLen The length of the matched key in the dictionary
136
+ * @param v The associated value for the key
137
+ * @return 0 if found and -1 if not found
138
+ */
139
+ int prefixSearch(const char* str, size_t len, size_t& retLen, V& v) const {
140
+ id_t id = trie_.prefixSearch(str, len, retLen);
141
+ if (id == NOTFOUND){
142
+ return -1;
143
+ }
144
+ v = vs_[id];
145
+ return 0;
146
+ }
147
+
148
+ /**
149
+ * Return the all associated values that match the prefix of the query in the dictionary
150
+ * @param str the query
151
+ * @param len the length of the query
152
+ * @param vs The returned values associated for the input key
153
+ * @param limit The maximum number of matched keys
154
+ * @return The number of matched keys
155
+ */
156
+ size_t commonPrefixSearch(const char* str, size_t len, std::vector<V>& vs, size_t limit = LIMIT_DEFAULT) const {
157
+ vs.clear();
158
+ std::vector<id_t> retIDs;
159
+ commonPrefixSearch(str, len, retIDs, limit);
160
+ vs.resize(retIDs.size());
161
+ for (size_t i = 0; i < retIDs.size(); ++i){
162
+ vs[i] = vs_[retIDs[i]];
163
+ }
164
+ return vs.size();
165
+ }
166
+
167
+ /**
168
+ * Return the all keys whose their prefixes match the query
169
+ * @param str the query
170
+ * @param len the length of the query
171
+ * @param vs The associated values for the input key
172
+ * @param limit The maximum number of matched keys
173
+ * @return The number of matched keys
174
+ */
175
+ size_t predictiveSearch(const char* str, size_t len, std::vector<V>& vs, size_t limit = LIMIT_DEFAULT) const {
176
+ vs.clear();
177
+ std::vector<id_t> retIDs;
178
+ predictiveSearch(str, len, retIDs, limit);
179
+ vs.resize(retIDs.size());
180
+ for (size_t i = 0; i < retIDs.size(); ++i){
181
+ vs[i] = vs_[retIDs[i]];
182
+ }
183
+ return vs.size();
184
+ }
185
+
186
+ /**
187
+ * Return the key for the given ID
188
+ * @param id The ID of the key
189
+ * @param ret The key for the given ID or empty if such ID does not exist
190
+ */
191
+ void decodeKey(const size_t ind, std::string& ret) const {
192
+ trie_.decodeKey(ind, ret);
193
+ }
194
+
195
+ /**
196
+ * Save the map in ostream
197
+ * @param os The ostream as an output
198
+ * @return 0 on success, -1 on failure
199
+ */
200
+ int save(std::ostream& os) const {
201
+ trie_.save(os);
202
+ size_t vsSize = vs_.size();
203
+ os.write((const char*)&vsSize, sizeof(vsSize));
204
+ os.write((const char*)&vs_[0], sizeof(vs_[0]) * vs_.size());
205
+ if (!os){
206
+ return -1;
207
+ } else {
208
+ return 0;
209
+ }
210
+ }
211
+
212
+ /**
213
+ * Load the map from istream
214
+ * @param is The istream as an input
215
+ * @return 0 on success, -1 on failure
216
+ */
217
+ int load(std::istream& is){
218
+ trie_.load(is);
219
+ size_t vsSize = 0;
220
+ is.read((char*)&vsSize, sizeof(vsSize));
221
+ vs_.resize(vsSize);
222
+ is.read((char*)&vs_[0], sizeof(vs_[0]) * vs_.size());
223
+ if (!is){
224
+ return -1;
225
+ } else {
226
+ return 0;
227
+ }
228
+ }
229
+
230
+ /**
231
+ * Get the number of keys
232
+ * @return the number of keys
233
+ */
234
+ size_t size() const {
235
+ return trie_.size();
236
+ }
237
+
238
+ private:
239
+ Trie trie_;
240
+ std::vector<V> vs_;
241
+ size_t size_;
242
+ };
243
+
244
+
245
+ }
246
+
247
+
248
+ #endif // TRIE_MAP_HPP__