melisa 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. data/README.md +11 -0
  2. data/ext/marisa/bindings/marisa-swig.cxx +253 -0
  3. data/ext/marisa/bindings/marisa-swig.h +183 -0
  4. data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
  5. data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
  6. data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
  7. data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
  8. data/ext/marisa/bindings/python/marisa-swig.h +183 -0
  9. data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
  10. data/ext/marisa/bindings/ruby/extconf.rb +5 -0
  11. data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
  12. data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
  13. data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
  14. data/ext/marisa/lib/marisa.h +14 -0
  15. data/ext/marisa/lib/marisa/agent.cc +51 -0
  16. data/ext/marisa/lib/marisa/agent.h +73 -0
  17. data/ext/marisa/lib/marisa/base.h +193 -0
  18. data/ext/marisa/lib/marisa/exception.h +82 -0
  19. data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
  20. data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
  21. data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
  22. data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
  23. data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
  24. data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
  25. data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
  26. data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
  27. data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
  28. data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
  29. data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
  30. data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
  31. data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
  32. data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
  33. data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
  34. data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
  35. data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
  36. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
  37. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
  38. data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
  39. data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
  40. data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
  41. data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
  42. data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
  43. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
  44. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
  45. data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
  46. data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
  47. data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
  48. data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
  49. data/ext/marisa/lib/marisa/iostream.h +18 -0
  50. data/ext/marisa/lib/marisa/key.h +85 -0
  51. data/ext/marisa/lib/marisa/keyset.cc +181 -0
  52. data/ext/marisa/lib/marisa/keyset.h +80 -0
  53. data/ext/marisa/lib/marisa/query.h +71 -0
  54. data/ext/marisa/lib/marisa/scoped-array.h +48 -0
  55. data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
  56. data/ext/marisa/lib/marisa/stdio.h +15 -0
  57. data/ext/marisa/lib/marisa/trie.cc +249 -0
  58. data/ext/marisa/lib/marisa/trie.h +64 -0
  59. data/ext/marisa/tests/base-test.cc +309 -0
  60. data/ext/marisa/tests/io-test.cc +252 -0
  61. data/ext/marisa/tests/marisa-assert.h +26 -0
  62. data/ext/marisa/tests/marisa-test.cc +388 -0
  63. data/ext/marisa/tests/trie-test.cc +507 -0
  64. data/ext/marisa/tests/vector-test.cc +466 -0
  65. data/ext/marisa/tools/cmdopt.cc +298 -0
  66. data/ext/marisa/tools/cmdopt.h +58 -0
  67. data/ext/marisa/tools/marisa-benchmark.cc +418 -0
  68. data/ext/marisa/tools/marisa-build.cc +206 -0
  69. data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
  70. data/ext/marisa/tools/marisa-dump.cc +151 -0
  71. data/ext/marisa/tools/marisa-lookup.cc +110 -0
  72. data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
  73. data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
  74. data/lib/melisa.rb +7 -0
  75. data/lib/melisa/base_config_flags.rb +76 -0
  76. data/lib/melisa/bytes_trie.rb +55 -0
  77. data/lib/melisa/int_trie.rb +14 -0
  78. data/lib/melisa/search.rb +55 -0
  79. data/lib/melisa/trie.rb +96 -0
  80. data/lib/melisa/version.rb +3 -0
  81. data/melisa.gemspec +36 -0
  82. data/spec/base_config_flags_spec.rb +73 -0
  83. data/spec/bytes_trie_spec.rb +16 -0
  84. data/spec/int_trie_spec.rb +16 -0
  85. data/spec/search_spec.rb +29 -0
  86. data/spec/spec_helper.rb +1 -0
  87. data/spec/trie_spec.rb +30 -0
  88. metadata +207 -0
@@ -0,0 +1,72 @@
1
+ #ifndef MARISA_GRIMOIRE_TRIE_TAIL_H_
2
+ #define MARISA_GRIMOIRE_TRIE_TAIL_H_
3
+
4
+ #include "marisa/agent.h"
5
+ #include "marisa/grimoire/vector.h"
6
+ #include "marisa/grimoire/trie/entry.h"
7
+
8
+ namespace marisa {
9
+ namespace grimoire {
10
+ namespace trie {
11
+
12
+ class Tail {
13
+ public:
14
+ Tail();
15
+
16
+ void build(Vector<Entry> &entries, Vector<UInt32> *offsets,
17
+ TailMode mode);
18
+
19
+ void map(Mapper &mapper);
20
+ void read(Reader &reader);
21
+ void write(Writer &writer) const;
22
+
23
+ void restore(Agent &agent, std::size_t offset) const;
24
+ bool match(Agent &agent, std::size_t offset) const;
25
+ bool prefix_match(Agent &agent, std::size_t offset) const;
26
+
27
+ const char &operator[](std::size_t offset) const {
28
+ MARISA_DEBUG_IF(offset >= buf_.size(), MARISA_BOUND_ERROR);
29
+ return buf_[offset];
30
+ }
31
+
32
+ TailMode mode() const {
33
+ return end_flags_.empty() ? MARISA_TEXT_TAIL : MARISA_BINARY_TAIL;
34
+ }
35
+
36
+ bool empty() const {
37
+ return buf_.empty();
38
+ }
39
+ std::size_t size() const {
40
+ return buf_.size();
41
+ }
42
+ std::size_t total_size() const {
43
+ return buf_.total_size() + end_flags_.total_size();
44
+ }
45
+ std::size_t io_size() const {
46
+ return buf_.io_size() + end_flags_.io_size();
47
+ }
48
+
49
+ void clear();
50
+ void swap(Tail &rhs);
51
+
52
+ private:
53
+ Vector<char> buf_;
54
+ BitVector end_flags_;
55
+
56
+ void build_(Vector<Entry> &entries, Vector<UInt32> *offsets,
57
+ TailMode mode);
58
+
59
+ void map_(Mapper &mapper);
60
+ void read_(Reader &reader);
61
+ void write_(Writer &writer) const;
62
+
63
+ // Disallows copy and assignment.
64
+ Tail(const Tail &);
65
+ Tail &operator=(const Tail &);
66
+ };
67
+
68
+ } // namespace trie
69
+ } // namespace grimoire
70
+ } // namespace marisa
71
+
72
+ #endif // MARISA_GRIMOIRE_TRIE_TAIL_H_
@@ -0,0 +1,18 @@
1
+ #ifndef MARISA_GRIMOIRE_VECTOR_H_
2
+ #define MARISA_GRIMOIRE_VECTOR_H_
3
+
4
+ #include "marisa/grimoire/vector/vector.h"
5
+ #include "marisa/grimoire/vector/flat-vector.h"
6
+ #include "marisa/grimoire/vector/bit-vector.h"
7
+
8
+ namespace marisa {
9
+ namespace grimoire {
10
+
11
+ using vector::Vector;
12
+ typedef vector::FlatVector FlatVector;
13
+ typedef vector::BitVector BitVector;
14
+
15
+ } // namespace grimoire
16
+ } // namespace marisa
17
+
18
+ #endif // MARISA_GRIMOIRE_VECTOR_H_
@@ -0,0 +1,826 @@
1
+ #include "marisa/grimoire/vector/pop-count.h"
2
+ #include "marisa/grimoire/vector/bit-vector.h"
3
+
4
+ namespace marisa {
5
+ namespace grimoire {
6
+ namespace vector {
7
+ namespace {
8
+
9
+ const UInt8 SELECT_TABLE[8][256] = {
10
+ {
11
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
12
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
13
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
14
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
15
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
16
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
17
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
18
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
19
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
20
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
21
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
22
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
23
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
24
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
25
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
26
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
27
+ },
28
+ {
29
+ 7, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1,
30
+ 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
31
+ 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
32
+ 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
33
+ 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1,
34
+ 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
35
+ 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
36
+ 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
37
+ 7, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1,
38
+ 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
39
+ 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
40
+ 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
41
+ 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1,
42
+ 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
43
+ 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
44
+ 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1
45
+ },
46
+ {
47
+ 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 3, 7, 3, 3, 2,
48
+ 7, 7, 7, 4, 7, 4, 4, 2, 7, 4, 4, 3, 4, 3, 3, 2,
49
+ 7, 7, 7, 5, 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2,
50
+ 7, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2,
51
+ 7, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2,
52
+ 7, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2,
53
+ 7, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2,
54
+ 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2,
55
+ 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 3, 7, 3, 3, 2,
56
+ 7, 7, 7, 4, 7, 4, 4, 2, 7, 4, 4, 3, 4, 3, 3, 2,
57
+ 7, 7, 7, 5, 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2,
58
+ 7, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2,
59
+ 7, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2,
60
+ 7, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2,
61
+ 7, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2,
62
+ 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2
63
+ },
64
+ {
65
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
66
+ 7, 7, 7, 7, 7, 7, 7, 4, 7, 7, 7, 4, 7, 4, 4, 3,
67
+ 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 5, 7, 5, 5, 3,
68
+ 7, 7, 7, 5, 7, 5, 5, 4, 7, 5, 5, 4, 5, 4, 4, 3,
69
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 3,
70
+ 7, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 6, 4, 4, 3,
71
+ 7, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3,
72
+ 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3,
73
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
74
+ 7, 7, 7, 7, 7, 7, 7, 4, 7, 7, 7, 4, 7, 4, 4, 3,
75
+ 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 5, 7, 5, 5, 3,
76
+ 7, 7, 7, 5, 7, 5, 5, 4, 7, 5, 5, 4, 5, 4, 4, 3,
77
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 3,
78
+ 7, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 6, 4, 4, 3,
79
+ 7, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3,
80
+ 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3
81
+ },
82
+ {
83
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
84
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4,
85
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5,
86
+ 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 5, 7, 5, 5, 4,
87
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
88
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 4,
89
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 5,
90
+ 7, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4,
91
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
92
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4,
93
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5,
94
+ 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 5, 7, 5, 5, 4,
95
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
96
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 4,
97
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 5,
98
+ 7, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4
99
+ },
100
+ {
101
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
102
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
103
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
104
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5,
105
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
106
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
107
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
108
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 5,
109
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
110
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
111
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
112
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5,
113
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
114
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
115
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
116
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 5
117
+ },
118
+ {
119
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
120
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
121
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
122
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
123
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
124
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
125
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
126
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
127
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
128
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
129
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
130
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
131
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
132
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
133
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
134
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6
135
+ },
136
+ {
137
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
138
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
139
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
140
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
141
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
142
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
143
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
144
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
145
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
146
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
147
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
148
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
149
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
150
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
151
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
152
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
153
+ }
154
+ };
155
+
156
+ #if MARISA_WORD_SIZE == 64
157
+ const UInt64 MASK_55 = 0x5555555555555555ULL;
158
+ const UInt64 MASK_33 = 0x3333333333333333ULL;
159
+ const UInt64 MASK_0F = 0x0F0F0F0F0F0F0F0FULL;
160
+ const UInt64 MASK_01 = 0x0101010101010101ULL;
161
+ const UInt64 MASK_80 = 0x8080808080808080ULL;
162
+
163
+ std::size_t select_bit(std::size_t i, std::size_t bit_id, UInt64 unit) {
164
+ UInt64 counts;
165
+ {
166
+ #if defined(MARISA_X64) && defined(MARISA_USE_SSSE3)
167
+ __m128i lower_nibbles = _mm_cvtsi64_si128(unit & 0x0F0F0F0F0F0F0F0FULL);
168
+ __m128i upper_nibbles = _mm_cvtsi64_si128(unit & 0xF0F0F0F0F0F0F0F0ULL);
169
+ upper_nibbles = _mm_srli_epi32(upper_nibbles, 4);
170
+
171
+ __m128i lower_counts =
172
+ _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
173
+ lower_counts = _mm_shuffle_epi8(lower_counts, lower_nibbles);
174
+ __m128i upper_counts =
175
+ _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
176
+ upper_counts = _mm_shuffle_epi8(upper_counts, upper_nibbles);
177
+
178
+ counts = _mm_cvtsi128_si64(_mm_add_epi8(lower_counts, upper_counts));
179
+ #else // defined(MARISA_X64) && defined(MARISA_USE_SSSE3)
180
+ counts = unit - ((unit >> 1) & MASK_55);
181
+ counts = (counts & MASK_33) + ((counts >> 2) & MASK_33);
182
+ counts = (counts + (counts >> 4)) & MASK_0F;
183
+ #endif // defined(MARISA_X64) && defined(MARISA_USE_SSSE3)
184
+ counts *= MASK_01;
185
+ }
186
+
187
+ #if defined(MARISA_X64) && defined(MARISA_USE_POPCNT)
188
+ UInt8 skip;
189
+ {
190
+ __m128i x = _mm_cvtsi64_si128((i + 1) * MASK_01);
191
+ __m128i y = _mm_cvtsi64_si128(counts);
192
+ x = _mm_cmpgt_epi8(x, y);
193
+ skip = (UInt8)PopCount::count(_mm_cvtsi128_si64(x));
194
+ }
195
+ #else // defined(MARISA_X64) && defined(MARISA_USE_POPCNT)
196
+ const UInt64 x = (counts | MASK_80) - ((i + 1) * MASK_01);
197
+ #ifdef _MSC_VER
198
+ unsigned long skip;
199
+ ::_BitScanForward64(&skip, (x & MASK_80) >> 7);
200
+ --skip;
201
+ #else // _MSC_VER
202
+ const int skip = ::__builtin_ctzll((x & MASK_80) >> 7);
203
+ #endif // _MSC_VER
204
+ #endif // defined(MARISA_X64) && defined(MARISA_USE_POPCNT)
205
+
206
+ bit_id += skip;
207
+ unit >>= skip;
208
+ i -= ((counts << 8) >> skip) & 0xFF;
209
+
210
+ return bit_id + SELECT_TABLE[i][unit & 0xFF];
211
+ }
212
+ #else // MARISA_WORD_SIZE == 64
213
+ #ifdef MARISA_USE_SSE2
214
+ const UInt8 POPCNT_TABLE[256] = {
215
+ 0, 8, 8, 16, 8, 16, 16, 24, 8, 16, 16, 24, 16, 24, 24, 32,
216
+ 8, 16, 16, 24, 16, 24, 24, 32, 16, 24, 24, 32, 24, 32, 32, 40,
217
+ 8, 16, 16, 24, 16, 24, 24, 32, 16, 24, 24, 32, 24, 32, 32, 40,
218
+ 16, 24, 24, 32, 24, 32, 32, 40, 24, 32, 32, 40, 32, 40, 40, 48,
219
+ 8, 16, 16, 24, 16, 24, 24, 32, 16, 24, 24, 32, 24, 32, 32, 40,
220
+ 16, 24, 24, 32, 24, 32, 32, 40, 24, 32, 32, 40, 32, 40, 40, 48,
221
+ 16, 24, 24, 32, 24, 32, 32, 40, 24, 32, 32, 40, 32, 40, 40, 48,
222
+ 24, 32, 32, 40, 32, 40, 40, 48, 32, 40, 40, 48, 40, 48, 48, 56,
223
+ 8, 16, 16, 24, 16, 24, 24, 32, 16, 24, 24, 32, 24, 32, 32, 40,
224
+ 16, 24, 24, 32, 24, 32, 32, 40, 24, 32, 32, 40, 32, 40, 40, 48,
225
+ 16, 24, 24, 32, 24, 32, 32, 40, 24, 32, 32, 40, 32, 40, 40, 48,
226
+ 24, 32, 32, 40, 32, 40, 40, 48, 32, 40, 40, 48, 40, 48, 48, 56,
227
+ 16, 24, 24, 32, 24, 32, 32, 40, 24, 32, 32, 40, 32, 40, 40, 48,
228
+ 24, 32, 32, 40, 32, 40, 40, 48, 32, 40, 40, 48, 40, 48, 48, 56,
229
+ 24, 32, 32, 40, 32, 40, 40, 48, 32, 40, 40, 48, 40, 48, 48, 56,
230
+ 32, 40, 40, 48, 40, 48, 48, 56, 40, 48, 48, 56, 48, 56, 56, 64
231
+ };
232
+
233
+ std::size_t select_bit(std::size_t i, std::size_t bit_id,
234
+ UInt32 unit_lo, UInt32 unit_hi) {
235
+ __m128i unit;
236
+ {
237
+ __m128i lower_dword = _mm_cvtsi32_si128(unit_lo);
238
+ __m128i upper_dword = _mm_cvtsi32_si128(unit_hi);
239
+ upper_dword = _mm_slli_si128(upper_dword, 4);
240
+ unit = _mm_or_si128(lower_dword, upper_dword);
241
+ }
242
+
243
+ __m128i counts;
244
+ {
245
+ #ifdef MARISA_USE_SSSE3
246
+ __m128i lower_nibbles = _mm_set1_epi8(0x0F);
247
+ lower_nibbles = _mm_and_si128(lower_nibbles, unit);
248
+ __m128i upper_nibbles = _mm_set1_epi8((UInt8)0xF0);
249
+ upper_nibbles = _mm_and_si128(upper_nibbles, unit);
250
+ upper_nibbles = _mm_srli_epi32(upper_nibbles, 4);
251
+
252
+ __m128i lower_counts =
253
+ _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
254
+ lower_counts = _mm_shuffle_epi8(lower_counts, lower_nibbles);
255
+ __m128i upper_counts =
256
+ _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
257
+ upper_counts = _mm_shuffle_epi8(upper_counts, upper_nibbles);
258
+
259
+ counts = _mm_add_epi8(lower_counts, upper_counts);
260
+ #else // MARISA_USE_SSSE3
261
+ __m128i x = _mm_srli_epi32(unit, 1);
262
+ x = _mm_and_si128(x, _mm_set1_epi8(0x55));
263
+ x = _mm_sub_epi8(unit, x);
264
+
265
+ __m128i y = _mm_srli_epi32(x, 2);
266
+ y = _mm_and_si128(y, _mm_set1_epi8(0x33));
267
+ x = _mm_and_si128(x, _mm_set1_epi8(0x33));
268
+ x = _mm_add_epi8(x, y);
269
+
270
+ y = _mm_srli_epi32(x, 4);
271
+ x = _mm_add_epi8(x, y);
272
+ counts = _mm_and_si128(x, _mm_set1_epi8(0x0F));
273
+ #endif // MARISA_USE_SSSE3
274
+ }
275
+
276
+ __m128i accumulated_counts;
277
+ {
278
+ __m128i x = counts;
279
+ x = _mm_slli_si128(x, 1);
280
+ __m128i y = counts;
281
+ y = _mm_add_epi32(y, x);
282
+
283
+ x = y;
284
+ y = _mm_slli_si128(y, 2);
285
+ x = _mm_add_epi32(x, y);
286
+
287
+ y = x;
288
+ x = _mm_slli_si128(x, 4);
289
+ y = _mm_add_epi32(y, x);
290
+
291
+ accumulated_counts = _mm_set_epi32(0x7F7F7F7FU, 0x7F7F7F7FU, 0, 0);
292
+ accumulated_counts = _mm_or_si128(accumulated_counts, y);
293
+ }
294
+
295
+ UInt8 skip;
296
+ {
297
+ __m128i x = _mm_set1_epi8((UInt8)(i + 1));
298
+ x = _mm_cmpgt_epi8(x, accumulated_counts);
299
+ skip = POPCNT_TABLE[_mm_movemask_epi8(x)];
300
+ }
301
+
302
+ UInt8 byte;
303
+ {
304
+ #ifdef _MSC_VER
305
+ __declspec(align(16)) UInt8 unit_bytes[16];
306
+ __declspec(align(16)) UInt8 accumulated_counts_bytes[16];
307
+ #else // _MSC_VER
308
+ UInt8 unit_bytes[16] __attribute__ ((aligned (16)));
309
+ UInt8 accumulated_counts_bytes[16] __attribute__ ((aligned (16)));
310
+ #endif // _MSC_VER
311
+ accumulated_counts = _mm_slli_si128(accumulated_counts, 1);
312
+ _mm_store_si128(reinterpret_cast<__m128i *>(unit_bytes), unit);
313
+ _mm_store_si128(reinterpret_cast<__m128i *>(accumulated_counts_bytes),
314
+ accumulated_counts);
315
+
316
+ bit_id += skip;
317
+ byte = unit_bytes[skip / 8];
318
+ i -= accumulated_counts_bytes[skip / 8];
319
+ }
320
+
321
+ return bit_id + SELECT_TABLE[i][byte];
322
+ }
323
+ #endif // MARISA_USE_SSE2
324
+ #endif // MARISA_WORD_SIZE == 64
325
+
326
+ } // namespace
327
+
328
+ #if MARISA_WORD_SIZE == 64
329
+
330
+ std::size_t BitVector::rank1(std::size_t i) const {
331
+ MARISA_DEBUG_IF(ranks_.empty(), MARISA_STATE_ERROR);
332
+ MARISA_DEBUG_IF(i > size_, MARISA_BOUND_ERROR);
333
+
334
+ const RankIndex &rank = ranks_[i / 512];
335
+ std::size_t offset = rank.abs();
336
+ switch ((i / 64) % 8) {
337
+ case 1: {
338
+ offset += rank.rel1();
339
+ break;
340
+ }
341
+ case 2: {
342
+ offset += rank.rel2();
343
+ break;
344
+ }
345
+ case 3: {
346
+ offset += rank.rel3();
347
+ break;
348
+ }
349
+ case 4: {
350
+ offset += rank.rel4();
351
+ break;
352
+ }
353
+ case 5: {
354
+ offset += rank.rel5();
355
+ break;
356
+ }
357
+ case 6: {
358
+ offset += rank.rel6();
359
+ break;
360
+ }
361
+ case 7: {
362
+ offset += rank.rel7();
363
+ break;
364
+ }
365
+ }
366
+ offset += PopCount::count(units_[i / 64] & ((1ULL << (i % 64)) - 1));
367
+ return offset;
368
+ }
369
+
370
+ std::size_t BitVector::select0(std::size_t i) const {
371
+ MARISA_DEBUG_IF(select0s_.empty(), MARISA_STATE_ERROR);
372
+ MARISA_DEBUG_IF(i >= num_0s(), MARISA_BOUND_ERROR);
373
+
374
+ const std::size_t select_id = i / 512;
375
+ MARISA_DEBUG_IF((select_id + 1) >= select0s_.size(), MARISA_BOUND_ERROR);
376
+ if ((i % 512) == 0) {
377
+ return select0s_[select_id];
378
+ }
379
+ std::size_t begin = select0s_[select_id] / 512;
380
+ std::size_t end = (select0s_[select_id + 1] + 511) / 512;
381
+ if (begin + 10 >= end) {
382
+ while (i >= ((begin + 1) * 512) - ranks_[begin + 1].abs()) {
383
+ ++begin;
384
+ }
385
+ } else {
386
+ while (begin + 1 < end) {
387
+ const std::size_t middle = (begin + end) / 2;
388
+ if (i < (middle * 512) - ranks_[middle].abs()) {
389
+ end = middle;
390
+ } else {
391
+ begin = middle;
392
+ }
393
+ }
394
+ }
395
+ const std::size_t rank_id = begin;
396
+ i -= (rank_id * 512) - ranks_[rank_id].abs();
397
+
398
+ const RankIndex &rank = ranks_[rank_id];
399
+ std::size_t unit_id = rank_id * 8;
400
+ if (i < (256U - rank.rel4())) {
401
+ if (i < (128U - rank.rel2())) {
402
+ if (i >= (64U - rank.rel1())) {
403
+ unit_id += 1;
404
+ i -= 64 - rank.rel1();
405
+ }
406
+ } else if (i < (192U - rank.rel3())) {
407
+ unit_id += 2;
408
+ i -= 128 - rank.rel2();
409
+ } else {
410
+ unit_id += 3;
411
+ i -= 192 - rank.rel3();
412
+ }
413
+ } else if (i < (384U - rank.rel6())) {
414
+ if (i < (320U - rank.rel5())) {
415
+ unit_id += 4;
416
+ i -= 256 - rank.rel4();
417
+ } else {
418
+ unit_id += 5;
419
+ i -= 320 - rank.rel5();
420
+ }
421
+ } else if (i < (448U - rank.rel7())) {
422
+ unit_id += 6;
423
+ i -= 384 - rank.rel6();
424
+ } else {
425
+ unit_id += 7;
426
+ i -= 448 - rank.rel7();
427
+ }
428
+
429
+ return select_bit(i, unit_id * 64, ~units_[unit_id]);
430
+ }
431
+
432
+ std::size_t BitVector::select1(std::size_t i) const {
433
+ MARISA_DEBUG_IF(select1s_.empty(), MARISA_STATE_ERROR);
434
+ MARISA_DEBUG_IF(i >= num_1s(), MARISA_BOUND_ERROR);
435
+
436
+ const std::size_t select_id = i / 512;
437
+ MARISA_DEBUG_IF((select_id + 1) >= select1s_.size(), MARISA_BOUND_ERROR);
438
+ if ((i % 512) == 0) {
439
+ return select1s_[select_id];
440
+ }
441
+ std::size_t begin = select1s_[select_id] / 512;
442
+ std::size_t end = (select1s_[select_id + 1] + 511) / 512;
443
+ if (begin + 10 >= end) {
444
+ while (i >= ranks_[begin + 1].abs()) {
445
+ ++begin;
446
+ }
447
+ } else {
448
+ while (begin + 1 < end) {
449
+ const std::size_t middle = (begin + end) / 2;
450
+ if (i < ranks_[middle].abs()) {
451
+ end = middle;
452
+ } else {
453
+ begin = middle;
454
+ }
455
+ }
456
+ }
457
+ const std::size_t rank_id = begin;
458
+ i -= ranks_[rank_id].abs();
459
+
460
+ const RankIndex &rank = ranks_[rank_id];
461
+ std::size_t unit_id = rank_id * 8;
462
+ if (i < rank.rel4()) {
463
+ if (i < rank.rel2()) {
464
+ if (i >= rank.rel1()) {
465
+ unit_id += 1;
466
+ i -= rank.rel1();
467
+ }
468
+ } else if (i < rank.rel3()) {
469
+ unit_id += 2;
470
+ i -= rank.rel2();
471
+ } else {
472
+ unit_id += 3;
473
+ i -= rank.rel3();
474
+ }
475
+ } else if (i < rank.rel6()) {
476
+ if (i < rank.rel5()) {
477
+ unit_id += 4;
478
+ i -= rank.rel4();
479
+ } else {
480
+ unit_id += 5;
481
+ i -= rank.rel5();
482
+ }
483
+ } else if (i < rank.rel7()) {
484
+ unit_id += 6;
485
+ i -= rank.rel6();
486
+ } else {
487
+ unit_id += 7;
488
+ i -= rank.rel7();
489
+ }
490
+
491
+ return select_bit(i, unit_id * 64, units_[unit_id]);
492
+ }
493
+
494
+ #else // MARISA_WORD_SIZE == 64
495
+
496
+ std::size_t BitVector::rank1(std::size_t i) const {
497
+ MARISA_DEBUG_IF(ranks_.empty(), MARISA_STATE_ERROR);
498
+ MARISA_DEBUG_IF(i > size_, MARISA_BOUND_ERROR);
499
+
500
+ const RankIndex &rank = ranks_[i / 512];
501
+ std::size_t offset = rank.abs();
502
+ switch ((i / 64) % 8) {
503
+ case 1: {
504
+ offset += rank.rel1();
505
+ break;
506
+ }
507
+ case 2: {
508
+ offset += rank.rel2();
509
+ break;
510
+ }
511
+ case 3: {
512
+ offset += rank.rel3();
513
+ break;
514
+ }
515
+ case 4: {
516
+ offset += rank.rel4();
517
+ break;
518
+ }
519
+ case 5: {
520
+ offset += rank.rel5();
521
+ break;
522
+ }
523
+ case 6: {
524
+ offset += rank.rel6();
525
+ break;
526
+ }
527
+ case 7: {
528
+ offset += rank.rel7();
529
+ break;
530
+ }
531
+ }
532
+ if (((i / 32) & 1) == 1) {
533
+ offset += PopCount::count(units_[(i / 32) - 1]);
534
+ }
535
+ offset += PopCount::count(units_[i / 32] & ((1U << (i % 32)) - 1));
536
+ return offset;
537
+ }
538
+
539
+ std::size_t BitVector::select0(std::size_t i) const {
540
+ MARISA_DEBUG_IF(select0s_.empty(), MARISA_STATE_ERROR);
541
+ MARISA_DEBUG_IF(i >= num_0s(), MARISA_BOUND_ERROR);
542
+
543
+ const std::size_t select_id = i / 512;
544
+ MARISA_DEBUG_IF((select_id + 1) >= select0s_.size(), MARISA_BOUND_ERROR);
545
+ if ((i % 512) == 0) {
546
+ return select0s_[select_id];
547
+ }
548
+ std::size_t begin = select0s_[select_id] / 512;
549
+ std::size_t end = (select0s_[select_id + 1] + 511) / 512;
550
+ if (begin + 10 >= end) {
551
+ while (i >= ((begin + 1) * 512) - ranks_[begin + 1].abs()) {
552
+ ++begin;
553
+ }
554
+ } else {
555
+ while (begin + 1 < end) {
556
+ const std::size_t middle = (begin + end) / 2;
557
+ if (i < (middle * 512) - ranks_[middle].abs()) {
558
+ end = middle;
559
+ } else {
560
+ begin = middle;
561
+ }
562
+ }
563
+ }
564
+ const std::size_t rank_id = begin;
565
+ i -= (rank_id * 512) - ranks_[rank_id].abs();
566
+
567
+ const RankIndex &rank = ranks_[rank_id];
568
+ std::size_t unit_id = rank_id * 16;
569
+ if (i < (256U - rank.rel4())) {
570
+ if (i < (128U - rank.rel2())) {
571
+ if (i >= (64U - rank.rel1())) {
572
+ unit_id += 2;
573
+ i -= 64 - rank.rel1();
574
+ }
575
+ } else if (i < (192U - rank.rel3())) {
576
+ unit_id += 4;
577
+ i -= 128 - rank.rel2();
578
+ } else {
579
+ unit_id += 6;
580
+ i -= 192 - rank.rel3();
581
+ }
582
+ } else if (i < (384U - rank.rel6())) {
583
+ if (i < (320U - rank.rel5())) {
584
+ unit_id += 8;
585
+ i -= 256 - rank.rel4();
586
+ } else {
587
+ unit_id += 10;
588
+ i -= 320 - rank.rel5();
589
+ }
590
+ } else if (i < (448U - rank.rel7())) {
591
+ unit_id += 12;
592
+ i -= 384 - rank.rel6();
593
+ } else {
594
+ unit_id += 14;
595
+ i -= 448 - rank.rel7();
596
+ }
597
+
598
+ #ifdef MARISA_USE_SSE2
599
+ return select_bit(i, unit_id * 32, ~units_[unit_id], ~units_[unit_id + 1]);
600
+ #else // MARISA_USE_SSE2
601
+ UInt32 unit = ~units_[unit_id];
602
+ PopCount count(unit);
603
+ if (i >= count.lo32()) {
604
+ ++unit_id;
605
+ i -= count.lo32();
606
+ unit = ~units_[unit_id];
607
+ count = PopCount(unit);
608
+ }
609
+
610
+ std::size_t bit_id = unit_id * 32;
611
+ if (i < count.lo16()) {
612
+ if (i >= count.lo8()) {
613
+ bit_id += 8;
614
+ unit >>= 8;
615
+ i -= count.lo8();
616
+ }
617
+ } else if (i < count.lo24()) {
618
+ bit_id += 16;
619
+ unit >>= 16;
620
+ i -= count.lo16();
621
+ } else {
622
+ bit_id += 24;
623
+ unit >>= 24;
624
+ i -= count.lo24();
625
+ }
626
+ return bit_id + SELECT_TABLE[i][unit & 0xFF];
627
+ #endif // MARISA_USE_SSE2
628
+ }
629
+
630
+ std::size_t BitVector::select1(std::size_t i) const {
631
+ MARISA_DEBUG_IF(select1s_.empty(), MARISA_STATE_ERROR);
632
+ MARISA_DEBUG_IF(i >= num_1s(), MARISA_BOUND_ERROR);
633
+
634
+ const std::size_t select_id = i / 512;
635
+ MARISA_DEBUG_IF((select_id + 1) >= select1s_.size(), MARISA_BOUND_ERROR);
636
+ if ((i % 512) == 0) {
637
+ return select1s_[select_id];
638
+ }
639
+ std::size_t begin = select1s_[select_id] / 512;
640
+ std::size_t end = (select1s_[select_id + 1] + 511) / 512;
641
+ if (begin + 10 >= end) {
642
+ while (i >= ranks_[begin + 1].abs()) {
643
+ ++begin;
644
+ }
645
+ } else {
646
+ while (begin + 1 < end) {
647
+ const std::size_t middle = (begin + end) / 2;
648
+ if (i < ranks_[middle].abs()) {
649
+ end = middle;
650
+ } else {
651
+ begin = middle;
652
+ }
653
+ }
654
+ }
655
+ const std::size_t rank_id = begin;
656
+ i -= ranks_[rank_id].abs();
657
+
658
+ const RankIndex &rank = ranks_[rank_id];
659
+ std::size_t unit_id = rank_id * 16;
660
+ if (i < rank.rel4()) {
661
+ if (i < rank.rel2()) {
662
+ if (i >= rank.rel1()) {
663
+ unit_id += 2;
664
+ i -= rank.rel1();
665
+ }
666
+ } else if (i < rank.rel3()) {
667
+ unit_id += 4;
668
+ i -= rank.rel2();
669
+ } else {
670
+ unit_id += 6;
671
+ i -= rank.rel3();
672
+ }
673
+ } else if (i < rank.rel6()) {
674
+ if (i < rank.rel5()) {
675
+ unit_id += 8;
676
+ i -= rank.rel4();
677
+ } else {
678
+ unit_id += 10;
679
+ i -= rank.rel5();
680
+ }
681
+ } else if (i < rank.rel7()) {
682
+ unit_id += 12;
683
+ i -= rank.rel6();
684
+ } else {
685
+ unit_id += 14;
686
+ i -= rank.rel7();
687
+ }
688
+
689
+ #ifdef MARISA_USE_SSE2
690
+ return select_bit(i, unit_id * 32, units_[unit_id], units_[unit_id + 1]);
691
+ #else // MARISA_USE_SSE2
692
+ UInt32 unit = units_[unit_id];
693
+ PopCount count(unit);
694
+ if (i >= count.lo32()) {
695
+ ++unit_id;
696
+ i -= count.lo32();
697
+ unit = units_[unit_id];
698
+ count = PopCount(unit);
699
+ }
700
+
701
+ std::size_t bit_id = unit_id * 32;
702
+ if (i < count.lo16()) {
703
+ if (i >= count.lo8()) {
704
+ bit_id += 8;
705
+ unit >>= 8;
706
+ i -= count.lo8();
707
+ }
708
+ } else if (i < count.lo24()) {
709
+ bit_id += 16;
710
+ unit >>= 16;
711
+ i -= count.lo16();
712
+ } else {
713
+ bit_id += 24;
714
+ unit >>= 24;
715
+ i -= count.lo24();
716
+ }
717
+ return bit_id + SELECT_TABLE[i][unit & 0xFF];
718
+ #endif // MARISA_USE_SSE2
719
+ }
720
+
721
+ #endif // MARISA_WORD_SIZE == 64
722
+
723
+ void BitVector::build_index(const BitVector &bv,
724
+ bool enables_select0, bool enables_select1) {
725
+ ranks_.resize((bv.size() / 512) + (((bv.size() % 512) != 0) ? 1 : 0) + 1);
726
+
727
+ std::size_t num_0s = 0;
728
+ std::size_t num_1s = 0;
729
+
730
+ for (std::size_t i = 0; i < bv.size(); ++i) {
731
+ if ((i % 64) == 0) {
732
+ const std::size_t rank_id = i / 512;
733
+ switch ((i / 64) % 8) {
734
+ case 0: {
735
+ ranks_[rank_id].set_abs(num_1s);
736
+ break;
737
+ }
738
+ case 1: {
739
+ ranks_[rank_id].set_rel1(num_1s - ranks_[rank_id].abs());
740
+ break;
741
+ }
742
+ case 2: {
743
+ ranks_[rank_id].set_rel2(num_1s - ranks_[rank_id].abs());
744
+ break;
745
+ }
746
+ case 3: {
747
+ ranks_[rank_id].set_rel3(num_1s - ranks_[rank_id].abs());
748
+ break;
749
+ }
750
+ case 4: {
751
+ ranks_[rank_id].set_rel4(num_1s - ranks_[rank_id].abs());
752
+ break;
753
+ }
754
+ case 5: {
755
+ ranks_[rank_id].set_rel5(num_1s - ranks_[rank_id].abs());
756
+ break;
757
+ }
758
+ case 6: {
759
+ ranks_[rank_id].set_rel6(num_1s - ranks_[rank_id].abs());
760
+ break;
761
+ }
762
+ case 7: {
763
+ ranks_[rank_id].set_rel7(num_1s - ranks_[rank_id].abs());
764
+ break;
765
+ }
766
+ }
767
+ }
768
+
769
+ if (bv[i]) {
770
+ if (enables_select1 && ((num_1s % 512) == 0)) {
771
+ select1s_.push_back(i);
772
+ }
773
+ ++num_1s;
774
+ } else {
775
+ if (enables_select0 && ((num_0s % 512) == 0)) {
776
+ select0s_.push_back(i);
777
+ }
778
+ ++num_0s;
779
+ }
780
+ }
781
+
782
+ if ((bv.size() % 512) != 0) {
783
+ const std::size_t rank_id = (bv.size() - 1) / 512;
784
+ switch (((bv.size() - 1) / 64) % 8) {
785
+ case 0: {
786
+ ranks_[rank_id].set_rel1(num_1s - ranks_[rank_id].abs());
787
+ }
788
+ case 1: {
789
+ ranks_[rank_id].set_rel2(num_1s - ranks_[rank_id].abs());
790
+ }
791
+ case 2: {
792
+ ranks_[rank_id].set_rel3(num_1s - ranks_[rank_id].abs());
793
+ }
794
+ case 3: {
795
+ ranks_[rank_id].set_rel4(num_1s - ranks_[rank_id].abs());
796
+ }
797
+ case 4: {
798
+ ranks_[rank_id].set_rel5(num_1s - ranks_[rank_id].abs());
799
+ }
800
+ case 5: {
801
+ ranks_[rank_id].set_rel6(num_1s - ranks_[rank_id].abs());
802
+ }
803
+ case 6: {
804
+ ranks_[rank_id].set_rel7(num_1s - ranks_[rank_id].abs());
805
+ break;
806
+ }
807
+ }
808
+ }
809
+
810
+ size_ = bv.size();
811
+ num_1s_ = bv.num_1s();
812
+
813
+ ranks_.back().set_abs(num_1s);
814
+ if (enables_select0) {
815
+ select0s_.push_back(bv.size());
816
+ select0s_.shrink();
817
+ }
818
+ if (enables_select1) {
819
+ select1s_.push_back(bv.size());
820
+ select1s_.shrink();
821
+ }
822
+ }
823
+
824
+ } // namespace vector
825
+ } // namespace grimoire
826
+ } // namespace marisa