text_ux 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +25 -0
  3. data/.rspec +2 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +51 -0
  7. data/Rakefile +18 -0
  8. data/ext/text_ux/extconf.rb +5 -0
  9. data/ext/text_ux/text_ux.cpp +252 -0
  10. data/lib/text_ux.rb +5 -0
  11. data/lib/text_ux/version.rb +3 -0
  12. data/spec/fixtures/test.ux +0 -0
  13. data/spec/spec_helper.rb +19 -0
  14. data/spec/text_ux_spec.rb +137 -0
  15. data/text_ux.gemspec +26 -0
  16. data/vendor/ux-trie/ux-0.1.9/.lock-waf_darwin_build +8 -0
  17. data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest-all.cc +9118 -0
  18. data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest.h +19537 -0
  19. data/vendor/ux-trie/ux-0.1.9/.unittest-gtest/gtest-1.6.0/fused-src/gtest/gtest_main.cc +39 -0
  20. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Build.py +733 -0
  21. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Build.pyc +0 -0
  22. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ConfigSet.py +147 -0
  23. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ConfigSet.pyc +0 -0
  24. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Configure.py +314 -0
  25. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Configure.pyc +0 -0
  26. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Context.py +298 -0
  27. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Context.pyc +0 -0
  28. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Errors.py +37 -0
  29. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Errors.pyc +0 -0
  30. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Logs.py +149 -0
  31. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Logs.pyc +0 -0
  32. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Node.py +500 -0
  33. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Node.pyc +0 -0
  34. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Options.py +130 -0
  35. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Options.pyc +0 -0
  36. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Runner.py +191 -0
  37. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Runner.pyc +0 -0
  38. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Scripting.py +358 -0
  39. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Scripting.pyc +0 -0
  40. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Task.py +669 -0
  41. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Task.pyc +0 -0
  42. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/TaskGen.py +341 -0
  43. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/TaskGen.pyc +0 -0
  44. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/__init__.py +4 -0
  45. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/__init__.pyc +0 -0
  46. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ar.py +12 -0
  47. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ar.pyc +0 -0
  48. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/asm.py +25 -0
  49. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/bison.py +29 -0
  50. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c.py +27 -0
  51. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_aliases.py +56 -0
  52. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_aliases.pyc +0 -0
  53. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_config.py +708 -0
  54. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_config.pyc +0 -0
  55. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_osx.py +121 -0
  56. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_osx.pyc +0 -0
  57. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_preproc.py +606 -0
  58. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_preproc.pyc +0 -0
  59. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_tests.py +110 -0
  60. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/c_tests.pyc +0 -0
  61. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ccroot.py +372 -0
  62. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ccroot.pyc +0 -0
  63. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_c.py +39 -0
  64. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_cxx.py +39 -0
  65. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_cxx.pyc +0 -0
  66. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_d.py +30 -0
  67. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/compiler_fc.py +45 -0
  68. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cs.py +98 -0
  69. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cxx.py +27 -0
  70. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/cxx.pyc +0 -0
  71. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d.py +51 -0
  72. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d_config.py +47 -0
  73. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/d_scan.py +133 -0
  74. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/dbus.py +30 -0
  75. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/dmd.py +43 -0
  76. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/errcheck.py +153 -0
  77. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc.py +123 -0
  78. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc_config.py +271 -0
  79. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/fc_scan.py +68 -0
  80. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/flex.py +27 -0
  81. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/g95.py +55 -0
  82. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gas.py +10 -0
  83. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gcc.py +98 -0
  84. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gdc.py +34 -0
  85. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gfortran.py +69 -0
  86. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/glib2.py +174 -0
  87. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gnu_dirs.py +65 -0
  88. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gxx.py +98 -0
  89. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/gxx.pyc +0 -0
  90. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/icc.py +31 -0
  91. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/icpc.py +30 -0
  92. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ifort.py +42 -0
  93. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/intltool.py +78 -0
  94. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/irixcc.py +49 -0
  95. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/javaw.py +272 -0
  96. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/kde4.py +49 -0
  97. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/lua.py +19 -0
  98. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/msvc.py +650 -0
  99. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/nasm.py +13 -0
  100. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/perl.py +78 -0
  101. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/python.py +303 -0
  102. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/qt4.py +424 -0
  103. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/ruby.py +104 -0
  104. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/suncc.py +54 -0
  105. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/suncxx.py +55 -0
  106. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/tex.py +222 -0
  107. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/vala.py +215 -0
  108. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/waf_unit_test.py +79 -0
  109. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/winres.py +34 -0
  110. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/xlc.py +46 -0
  111. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Tools/xlcxx.py +46 -0
  112. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Utils.py +334 -0
  113. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/Utils.pyc +0 -0
  114. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/__init__.py +4 -0
  115. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/__init__.pyc +0 -0
  116. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ansiterm.py +173 -0
  117. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/ansiterm.pyc +0 -0
  118. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/__init__.py +4 -0
  119. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/__init__.pyc +0 -0
  120. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/compat15.py +223 -0
  121. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/extras/compat15.pyc +0 -0
  122. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/fixpy2.py +50 -0
  123. data/vendor/ux-trie/ux-0.1.9/.waf-1.6.8-3e3391c5f23fbabad81e6d17c63a1b1e/waflib/fixpy2.pyc +0 -0
  124. data/vendor/ux-trie/ux-0.1.9/src/bitVec.cpp +119 -0
  125. data/vendor/ux-trie/ux-0.1.9/src/bitVec.hpp +64 -0
  126. data/vendor/ux-trie/ux-0.1.9/src/bitVecTest.cpp +143 -0
  127. data/vendor/ux-trie/ux-0.1.9/src/cmdline.h +809 -0
  128. data/vendor/ux-trie/ux-0.1.9/src/rsDic.cpp +121 -0
  129. data/vendor/ux-trie/ux-0.1.9/src/rsDic.hpp +57 -0
  130. data/vendor/ux-trie/ux-0.1.9/src/ux.hpp +26 -0
  131. data/vendor/ux-trie/ux-0.1.9/src/uxMain.cpp +206 -0
  132. data/vendor/ux-trie/ux-0.1.9/src/uxMap.cpp +0 -0
  133. data/vendor/ux-trie/ux-0.1.9/src/uxMap.hpp +248 -0
  134. data/vendor/ux-trie/ux-0.1.9/src/uxMapTest.cpp +139 -0
  135. data/vendor/ux-trie/ux-0.1.9/src/uxTest.cpp +229 -0
  136. data/vendor/ux-trie/ux-0.1.9/src/uxTrie.cpp +529 -0
  137. data/vendor/ux-trie/ux-0.1.9/src/uxTrie.hpp +220 -0
  138. data/vendor/ux-trie/ux-0.1.9/src/uxUtil.cpp +92 -0
  139. data/vendor/ux-trie/ux-0.1.9/src/uxUtil.hpp +35 -0
  140. data/vendor/ux-trie/ux-0.1.9/src/wscript +43 -0
  141. data/vendor/ux-trie/ux-0.1.9/unittest_gtest.py +0 -0
  142. data/vendor/ux-trie/ux-0.1.9/unittest_gtest.pyc +0 -0
  143. data/vendor/ux-trie/ux-0.1.9/unittestt.py +166 -0
  144. data/vendor/ux-trie/ux-0.1.9/ux.pc.in +10 -0
  145. data/vendor/ux-trie/ux-0.1.9/waf +0 -0
  146. data/vendor/ux-trie/ux-0.1.9/wscript +32 -0
  147. metadata +249 -0
@@ -0,0 +1,121 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #include <iostream>
21
+ #include <cassert>
22
+ #include "rsDic.hpp"
23
+
24
+ using namespace std;
25
+
26
+ namespace ux {
27
+
28
+ RSDic::RSDic() : size_(0) {
29
+ }
30
+
31
+ RSDic::~RSDic() {
32
+ }
33
+
34
+ void RSDic::build(BitVec& bv){
35
+ size_ = bv.size();
36
+ swap(bitVec_, bv);
37
+ L_.resize((size_ + L_BLOCK-1) / L_BLOCK);
38
+ size_t sum = 0;
39
+ for (uint64_t il = 0; il < size_; il += L_BLOCK){
40
+ L_[il/L_BLOCK] = sum;
41
+ for (uint64_t is = 0; is < L_BLOCK && il + is < size_; is += S_BLOCK){
42
+ sum += popCount(bitVec_.lookupBlock((il + is)/S_BLOCK));
43
+ }
44
+ }
45
+ L_.push_back(sum);
46
+ }
47
+
48
+ uint64_t RSDic::rank(const uint64_t pos, const uint8_t b) const{
49
+ uint64_t pos1 = pos+1;
50
+ uint64_t rank1 = L_[pos1 >> L_SHIFT];
51
+ uint64_t bpos = (pos1 >> L_SHIFT) << (L_SHIFT - S_SHIFT);
52
+ uint64_t epos = pos1 >> S_SHIFT;
53
+ for (uint64_t i = bpos; i < epos; ++i){
54
+ rank1 += popCount(bitVec_.lookupBlock(i));
55
+ }
56
+ rank1 += popCountMasked(bitVec_.lookupBlock(epos), pos1 % S_BLOCK);
57
+
58
+ if (b == 1) return rank1;
59
+ else return pos1 - rank1;
60
+ }
61
+
62
+ uint64_t RSDic::select(const uint64_t pos, const uint8_t b) const{
63
+ uint64_t retPos = 0;
64
+ uint64_t posS = selectOverL(pos, b, retPos);
65
+ return posS * S_BLOCK + selectBlock(retPos, bitVec_.lookupBlock(posS), b);
66
+ }
67
+
68
+ uint64_t RSDic::selectOverL(const uint64_t pos, const uint8_t b, uint64_t& retPos) const {
69
+ uint64_t left = 0;
70
+ uint64_t right = L_.size();
71
+
72
+ retPos = pos;
73
+ while (left < right){
74
+ uint64_t mid = (left + right)/2;
75
+ assert(mid < L_.size());
76
+ if (getBitNum(L_[mid], L_BLOCK * mid, b) < retPos) left = mid+1;
77
+ else right = mid;
78
+ }
79
+ uint64_t posL = (left != 0) ? left - 1 : 0;
80
+ uint64_t posS = posL * S_RATIO;
81
+
82
+ assert(retPos >= getBitNum(L_[posL], L_BLOCK * posL, b));
83
+
84
+ retPos -= getBitNum(L_[posL], L_BLOCK * posL, b);
85
+ for (;;posS++){
86
+ if (posS >= bitVec_.size()) break;
87
+ uint64_t num = getBitNum(popCount(bitVec_.lookupBlock(posS)), S_BLOCK, b);
88
+ if (retPos <= num) break;
89
+ retPos -= num;
90
+ }
91
+ return posS;
92
+ }
93
+
94
+ void RSDic::save(ostream& ofs) const{
95
+ bitVec_.save(ofs);
96
+ }
97
+
98
+ void RSDic::load(istream& ifs) {
99
+ bitVec_.load(ifs);
100
+ build(bitVec_);
101
+ }
102
+
103
+ size_t RSDic::getAllocSize() const {
104
+ return bitVec_.getAllocSize() + sizeof(L_[0]) * L_.size();
105
+ }
106
+
107
+ uint8_t RSDic::getBit(const uint64_t pos) const{
108
+ return bitVec_.getBit(pos);
109
+ }
110
+
111
+ size_t RSDic::size() const {
112
+ return bitVec_.size();
113
+ }
114
+
115
+ void RSDic::clear() {
116
+ bitVec_.clear();
117
+ L_.clear();
118
+ size_ = 0;
119
+ }
120
+
121
+ }
@@ -0,0 +1,57 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #ifndef RSDIC_HPP__
21
+ #define RSDIC_HPP__
22
+
23
+ #include <stdint.h>
24
+ #include <vector>
25
+ #include <iostream>
26
+ #include "bitVec.hpp"
27
+ #include "uxUtil.hpp"
28
+
29
+ namespace ux {
30
+
31
+ class RSDic {
32
+ public:
33
+ RSDic();
34
+ ~RSDic();
35
+
36
+ void build(BitVec& bv);
37
+ uint64_t rank(uint64_t pos, uint8_t b) const;
38
+ uint64_t select(uint64_t pos, uint8_t b) const;
39
+
40
+ void save(std::ostream& os) const;
41
+ void load(std::istream& is);
42
+ size_t getAllocSize() const;
43
+ uint8_t getBit(uint64_t pos) const;
44
+ size_t size() const;
45
+ void clear();
46
+
47
+ private:
48
+ uint64_t selectOverL(uint64_t pos, uint8_t b, uint64_t& retPos) const;
49
+
50
+ BitVec bitVec_;
51
+ std::vector<uint64_t> L_;
52
+ size_t size_;
53
+ };
54
+
55
+ }
56
+
57
+ #endif // RSDIC_HPP__
@@ -0,0 +1,26 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #ifndef UX_HPP__
21
+ #define UX_HPP__
22
+
23
+ #include "uxTrie.hpp"
24
+ #include "uxMap.hpp"
25
+
26
+ #endif // UX_HPP__
@@ -0,0 +1,206 @@
1
+ #include <iostream>
2
+ #include <fstream>
3
+ #include <string>
4
+ #include "cmdline.h"
5
+ #include "uxTrie.hpp"
6
+
7
+ using namespace std;
8
+
9
+ #include <time.h>
10
+ #include <sys/time.h>
11
+ #include <stdio.h>
12
+
13
+ double gettimeofday_sec()
14
+ {
15
+ struct timeval tv;
16
+ gettimeofday(&tv, NULL);
17
+ return tv.tv_sec + (double)tv.tv_usec*1e-6;
18
+ }
19
+
20
+ void analyzeKeyList(const vector<string>& keyList){
21
+ size_t lcs = 0;
22
+ for (size_t i = 1; i < keyList.size(); ++i){
23
+ const string& s1 = keyList[i-1];
24
+ const string& s2 = keyList[i];
25
+ size_t j = 0;
26
+ for (; j < s1.size() && j < s2.size() && s1[j] == s2[j]; ++j) {};
27
+ lcs += j;
28
+ }
29
+ cout << " avelcs:\t" << (float)lcs / keyList.size() << endl;
30
+ }
31
+
32
+
33
+ size_t allKeySize(const vector<string>& keyList){
34
+ size_t ret = 0;
35
+ for (size_t i = 0; i < keyList.size(); ++i){
36
+ ret += keyList[i].size();
37
+ }
38
+ return ret;
39
+ }
40
+
41
+ void reportStat(const ux::Trie& ux, const vector<string>& keyList){
42
+ ux.allocStat(ux.getAllocSize(), cout);
43
+ ux.stat(cout);
44
+ analyzeKeyList(keyList);
45
+ size_t originalSize = allKeySize(keyList);
46
+ cout << "originalSize:\t" << allKeySize(keyList) << endl
47
+ << " indexSize:\t" << ux.getAllocSize() << " (" << (float)ux.getAllocSize() / originalSize << ")" << endl
48
+ << " keyNum:\t" << keyList.size() << endl;
49
+ }
50
+
51
+ void printQuery(const ux::Trie& ux,
52
+ const std::string& query,
53
+ const int limit){
54
+ cout << "query:[" << query << "]" << endl;
55
+
56
+ // prefixSearch
57
+ size_t retLen = 0;
58
+ cout << "prefixSearch: ";
59
+ ux::id_t id = ux.prefixSearch(query.c_str(), query.size(), retLen);
60
+ if (id == ux::NOTFOUND){
61
+ cout << "not found." << endl;
62
+ } else {
63
+ cout << ux.decodeKey(id) << "\t(id=" << id << ")" << endl;
64
+ }
65
+
66
+ vector<ux::id_t> retIDs;
67
+ // commonPrefixSearch
68
+ ux.commonPrefixSearch(query.c_str(), query.size(), retIDs, (size_t)limit);
69
+ cout << "commonPrefixSearch: " << retIDs.size() << " found." << endl;
70
+ for (size_t i = 0; i < retIDs.size(); ++i){
71
+ cout << ux.decodeKey(retIDs[i]) << "\t(id=" << retIDs[i] << ")" << endl;
72
+ }
73
+
74
+ // predictiveSearch
75
+ ux.predictiveSearch(query.c_str(), query.size(), retIDs, (size_t)limit);
76
+ cout << "predictiveSearch: " << retIDs.size() << " found." << endl;
77
+ for (size_t i = 0; i < retIDs.size(); ++i){
78
+ cout << ux.decodeKey(retIDs[i]) << "\t(id=" << retIDs[i] << ")" << endl;
79
+ }
80
+ }
81
+
82
+
83
+ int readKeyList(const string& fn, vector<string>& keyList){
84
+ ifstream ifs(fn.c_str());
85
+ if (!ifs){
86
+ cerr << "cannot open " << fn << endl;
87
+ return -1;
88
+ }
89
+
90
+ for (string key; getline(ifs, key); ){
91
+ if (key.size() > 0 &&
92
+ key[key.size()-1] == '\r'){
93
+ key = key.substr(0, key.size()-1);
94
+ }
95
+ keyList.push_back(key);
96
+ }
97
+ return 0;
98
+ }
99
+
100
+ void performanceTest(ux::Trie& ux, vector<string>& keyList){
101
+ random_shuffle(keyList.begin(), keyList.end());
102
+
103
+ double start = gettimeofday_sec();
104
+ size_t dummy = 0;
105
+ for (size_t i = 0; i < keyList.size() && i < 1000; ++i){
106
+ size_t retLen = 0;
107
+ dummy += ux.prefixSearch(keyList[i].c_str(), keyList[i].size(), retLen);
108
+ }
109
+ double end = gettimeofday_sec();
110
+ cout << " query time:\t" << end - start << endl;
111
+ cout << " check keys:\t" << min((int)keyList.size(), 1000) << endl;
112
+
113
+ if (dummy == 777){
114
+ cerr << "luckey" << endl;
115
+ }
116
+ }
117
+
118
+ int buildUX(const string& fn, const string& index, const bool uncompress, const int verbose){
119
+ vector<string> keyList;
120
+ if (readKeyList(fn, keyList) == -1){
121
+ return -1;
122
+ }
123
+ ux::Trie ux;
124
+ double start = gettimeofday_sec();
125
+ ux.build(keyList, !uncompress);
126
+ double elapsedTime = gettimeofday_sec() - start;
127
+ if (verbose >= 1){
128
+ cout << " index time:\t" << elapsedTime << endl;
129
+ reportStat(ux, keyList);
130
+ }
131
+ if (verbose >= 2){
132
+ performanceTest(ux, keyList);
133
+ }
134
+
135
+ if (index == "") return 0;
136
+ int err = ux.save(index.c_str());
137
+ if (err != ux::Trie::SUCCESS){
138
+ cerr << ux.what(err) << " " << index << endl;
139
+ return -1;
140
+ }
141
+ return 0;
142
+ }
143
+
144
+ int searchUX(const string& index, const int limit){
145
+ ux::Trie ux;
146
+ int err = ux.load(index.c_str());
147
+ if (err != ux::Trie::SUCCESS){
148
+ cerr << ux.what(err) << " " << index << endl;
149
+ return -1;
150
+ }
151
+ cout << "read:" << ux.size() << " keys" << endl;
152
+
153
+ string query;
154
+ for (;;){
155
+ putchar('>');
156
+ getline(cin, query);
157
+ if (query.size() == 0){
158
+ break;
159
+ }
160
+ printQuery(ux, query, limit);
161
+ }
162
+
163
+ return 0;
164
+ }
165
+
166
+ int listUX(const string& index){
167
+ ux::Trie ux;
168
+ int err = ux.load(index.c_str());
169
+ if (err != ux::Trie::SUCCESS){
170
+ cerr << ux.what(err) << " " << index << endl;
171
+ return -1;
172
+ }
173
+
174
+ for (size_t i = 0; i < ux.size(); ++i){
175
+ cout << ux.decodeKey(i) << endl;
176
+ }
177
+ return 0;
178
+ }
179
+
180
+
181
+ int main(int argc, char* argv[]){
182
+ cmdline::parser p;
183
+ p.add<string>("keylist", 'k', "key list", false);
184
+ p.add<string>("index", 'i', "index", true);
185
+ p.add<int> ("limit", 'l', "limit at search", false, 10);
186
+ p.add ("uncompress", 'u', "tail is uncompressed");
187
+ p.add ("enumerate", 'e', "enumerate all keywords");
188
+ p.add<int> ("verbose", 'v', "verbose mode", 0);
189
+ p.add("help", 'h', "this message");
190
+ p.set_program_name("ux");
191
+
192
+ if (!p.parse(argc, argv) || p.exist("help")){
193
+ cerr << p.usage() << endl;
194
+ return -1;
195
+ }
196
+
197
+ if (p.exist("keylist")){
198
+ return buildUX(p.get<string>("keylist"), p.get<string>("index"), p.exist("uncompress"), p.get<int>("verbose"));
199
+ } else if (p.exist("enumerate")){
200
+ return listUX(p.get<string>("index"));
201
+ } else {
202
+ return searchUX(p.get<string>("index"), p.get<int>("limit"));
203
+ }
204
+
205
+ return 0; // NOT COME
206
+ }
File without changes
@@ -0,0 +1,248 @@
1
+ /*
2
+ * Copyright (c) 2010 Daisuke Okanohara
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions
6
+ * are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above Copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above Copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * 3. Neither the name of the authors nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this
17
+ * software without specific prior written permission.
18
+ */
19
+
20
+ #ifndef UX_MAP_HPP__
21
+ #define UX_MAP_HPP__
22
+
23
+ #include <vector>
24
+ #include <cassert>
25
+ #include <iostream>
26
+ #include <map>
27
+ #include <string>
28
+ #include "uxTrie.hpp"
29
+
30
+ namespace ux{
31
+
32
+ /**
33
+ * Succict Map using UX
34
+ */
35
+ template <class V>
36
+ class Map{
37
+ public:
38
+ /**
39
+ * Constructor
40
+ */
41
+ Map() : size_(0){}
42
+
43
+ /**
44
+ * Destructor
45
+ */
46
+ ~Map() {}
47
+
48
+ /**
49
+ * Build a map without values
50
+ * @param keys keys to be associated
51
+ */
52
+ void build(std::vector<std::string>& keys){
53
+ trie_.build(keys);
54
+ vs_.resize(trie_.size());
55
+ }
56
+
57
+ /**
58
+ * Build a map from std::map
59
+ * @param m A std::map as an input
60
+ */
61
+ void build(const std::map<std::string, V>& m){
62
+ std::vector<std::string> wordList;
63
+ for (typename std::map<std::string, V>::const_iterator it =
64
+ m.begin(); it != m.end(); ++it){
65
+ wordList.push_back(it->first);
66
+ }
67
+ trie_.build(wordList);
68
+ vs_.resize(wordList.size());
69
+ for (typename std::map<std::string, V>::const_iterator it =
70
+ m.begin(); it != m.end(); ++it){
71
+ const std::string key = it->first;
72
+ if (set(key.c_str(), key.size(), it->second) != 0){
73
+ return;
74
+ }
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Build a map from the vector of the pair of a key and a value
80
+ * @param kvs A vector of the pair of a key and vlaue
81
+ */
82
+ void build(const std::vector< std::pair<std::string, V> >& kvs){
83
+ std::vector<std::string> wordList;
84
+ for (size_t i = 0; i < kvs.size(); ++i){
85
+ wordList.push_back(kvs[i].first);
86
+ }
87
+
88
+ trie_.build(wordList);
89
+ vs_.resize(wordList.size());
90
+
91
+ for (size_t i = 0; i < kvs.size(); ++i){
92
+ const std::string key = kvs[i].first;
93
+ assert(set(key.c_str(), key.size(), kvs[i].second) == 0);
94
+ }
95
+ }
96
+
97
+ /**
98
+ * Get a value for a given key
99
+ * @param str the key
100
+ * @param len the length of str
101
+ * @param v An associated value for a key
102
+ * @return 0 on success and -1 if not found
103
+ */
104
+ int get(const char* str, size_t len, V& v) const {
105
+ size_t retLen = 0;
106
+ id_t id = trie_.prefixSearch(str, len, retLen);
107
+ if (id == NOTFOUND){
108
+ return -1;
109
+ }
110
+ v = vs_[id];
111
+ return 0;
112
+ }
113
+
114
+ /**
115
+ * Set a value for a given key
116
+ * @param str the key
117
+ * @param len the length of str
118
+ * @param v A value to be associated for a key
119
+ * @return 0 on success and -1 if not found
120
+ */
121
+ int set(const char* str, size_t len, const V& v){
122
+ size_t retLen = 0;
123
+ id_t id = trie_.prefixSearch(str, len, retLen);
124
+ if (id == NOTFOUND){
125
+ return -1;
126
+ }
127
+ vs_[id] = v;
128
+ return 0;
129
+ }
130
+
131
+ /**
132
+ * Return the longest key that matches the prefix of the query in the dictionary
133
+ * @param str the query
134
+ * @param len the length of the query
135
+ * @param retLen The length of the matched key in the dictionary
136
+ * @param v The associated value for the key
137
+ * @return 0 if found and -1 if not found
138
+ */
139
+ int prefixSearch(const char* str, size_t len, size_t& retLen, V& v) const {
140
+ id_t id = trie_.prefixSearch(str, len, retLen);
141
+ if (id == NOTFOUND){
142
+ return -1;
143
+ }
144
+ v = vs_[id];
145
+ return 0;
146
+ }
147
+
148
+ /**
149
+ * Return the all associated values that match the prefix of the query in the dictionary
150
+ * @param str the query
151
+ * @param len the length of the query
152
+ * @param vs The returned values associated for the input key
153
+ * @param limit The maximum number of matched keys
154
+ * @return The number of matched keys
155
+ */
156
+ size_t commonPrefixSearch(const char* str, size_t len, std::vector<V>& vs, size_t limit = LIMIT_DEFAULT) const {
157
+ vs.clear();
158
+ std::vector<id_t> retIDs;
159
+ commonPrefixSearch(str, len, retIDs, limit);
160
+ vs.resize(retIDs.size());
161
+ for (size_t i = 0; i < retIDs.size(); ++i){
162
+ vs[i] = vs_[retIDs[i]];
163
+ }
164
+ return vs.size();
165
+ }
166
+
167
+ /**
168
+ * Return the all keys whose their prefixes match the query
169
+ * @param str the query
170
+ * @param len the length of the query
171
+ * @param vs The associated values for the input key
172
+ * @param limit The maximum number of matched keys
173
+ * @return The number of matched keys
174
+ */
175
+ size_t predictiveSearch(const char* str, size_t len, std::vector<V>& vs, size_t limit = LIMIT_DEFAULT) const {
176
+ vs.clear();
177
+ std::vector<id_t> retIDs;
178
+ predictiveSearch(str, len, retIDs, limit);
179
+ vs.resize(retIDs.size());
180
+ for (size_t i = 0; i < retIDs.size(); ++i){
181
+ vs[i] = vs_[retIDs[i]];
182
+ }
183
+ return vs.size();
184
+ }
185
+
186
+ /**
187
+ * Return the key for the given ID
188
+ * @param id The ID of the key
189
+ * @param ret The key for the given ID or empty if such ID does not exist
190
+ */
191
+ void decodeKey(const size_t ind, std::string& ret) const {
192
+ trie_.decodeKey(ind, ret);
193
+ }
194
+
195
+ /**
196
+ * Save the map in ostream
197
+ * @param os The ostream as an output
198
+ * @return 0 on success, -1 on failure
199
+ */
200
+ int save(std::ostream& os) const {
201
+ trie_.save(os);
202
+ size_t vsSize = vs_.size();
203
+ os.write((const char*)&vsSize, sizeof(vsSize));
204
+ os.write((const char*)&vs_[0], sizeof(vs_[0]) * vs_.size());
205
+ if (!os){
206
+ return -1;
207
+ } else {
208
+ return 0;
209
+ }
210
+ }
211
+
212
+ /**
213
+ * Load the map from istream
214
+ * @param is The istream as an input
215
+ * @return 0 on success, -1 on failure
216
+ */
217
+ int load(std::istream& is){
218
+ trie_.load(is);
219
+ size_t vsSize = 0;
220
+ is.read((char*)&vsSize, sizeof(vsSize));
221
+ vs_.resize(vsSize);
222
+ is.read((char*)&vs_[0], sizeof(vs_[0]) * vs_.size());
223
+ if (!is){
224
+ return -1;
225
+ } else {
226
+ return 0;
227
+ }
228
+ }
229
+
230
+ /**
231
+ * Get the number of keys
232
+ * @return the number of keys
233
+ */
234
+ size_t size() const {
235
+ return trie_.size();
236
+ }
237
+
238
+ private:
239
+ Trie trie_;
240
+ std::vector<V> vs_;
241
+ size_t size_;
242
+ };
243
+
244
+
245
+ }
246
+
247
+
248
+ #endif // TRIE_MAP_HPP__