melisa 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. data/README.md +11 -0
  2. data/ext/marisa/bindings/marisa-swig.cxx +253 -0
  3. data/ext/marisa/bindings/marisa-swig.h +183 -0
  4. data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
  5. data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
  6. data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
  7. data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
  8. data/ext/marisa/bindings/python/marisa-swig.h +183 -0
  9. data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
  10. data/ext/marisa/bindings/ruby/extconf.rb +5 -0
  11. data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
  12. data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
  13. data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
  14. data/ext/marisa/lib/marisa.h +14 -0
  15. data/ext/marisa/lib/marisa/agent.cc +51 -0
  16. data/ext/marisa/lib/marisa/agent.h +73 -0
  17. data/ext/marisa/lib/marisa/base.h +193 -0
  18. data/ext/marisa/lib/marisa/exception.h +82 -0
  19. data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
  20. data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
  21. data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
  22. data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
  23. data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
  24. data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
  25. data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
  26. data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
  27. data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
  28. data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
  29. data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
  30. data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
  31. data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
  32. data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
  33. data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
  34. data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
  35. data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
  36. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
  37. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
  38. data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
  39. data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
  40. data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
  41. data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
  42. data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
  43. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
  44. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
  45. data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
  46. data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
  47. data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
  48. data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
  49. data/ext/marisa/lib/marisa/iostream.h +18 -0
  50. data/ext/marisa/lib/marisa/key.h +85 -0
  51. data/ext/marisa/lib/marisa/keyset.cc +181 -0
  52. data/ext/marisa/lib/marisa/keyset.h +80 -0
  53. data/ext/marisa/lib/marisa/query.h +71 -0
  54. data/ext/marisa/lib/marisa/scoped-array.h +48 -0
  55. data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
  56. data/ext/marisa/lib/marisa/stdio.h +15 -0
  57. data/ext/marisa/lib/marisa/trie.cc +249 -0
  58. data/ext/marisa/lib/marisa/trie.h +64 -0
  59. data/ext/marisa/tests/base-test.cc +309 -0
  60. data/ext/marisa/tests/io-test.cc +252 -0
  61. data/ext/marisa/tests/marisa-assert.h +26 -0
  62. data/ext/marisa/tests/marisa-test.cc +388 -0
  63. data/ext/marisa/tests/trie-test.cc +507 -0
  64. data/ext/marisa/tests/vector-test.cc +466 -0
  65. data/ext/marisa/tools/cmdopt.cc +298 -0
  66. data/ext/marisa/tools/cmdopt.h +58 -0
  67. data/ext/marisa/tools/marisa-benchmark.cc +418 -0
  68. data/ext/marisa/tools/marisa-build.cc +206 -0
  69. data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
  70. data/ext/marisa/tools/marisa-dump.cc +151 -0
  71. data/ext/marisa/tools/marisa-lookup.cc +110 -0
  72. data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
  73. data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
  74. data/lib/melisa.rb +7 -0
  75. data/lib/melisa/base_config_flags.rb +76 -0
  76. data/lib/melisa/bytes_trie.rb +55 -0
  77. data/lib/melisa/int_trie.rb +14 -0
  78. data/lib/melisa/search.rb +55 -0
  79. data/lib/melisa/trie.rb +96 -0
  80. data/lib/melisa/version.rb +3 -0
  81. data/melisa.gemspec +36 -0
  82. data/spec/base_config_flags_spec.rb +73 -0
  83. data/spec/bytes_trie_spec.rb +16 -0
  84. data/spec/int_trie_spec.rb +16 -0
  85. data/spec/search_spec.rb +29 -0
  86. data/spec/spec_helper.rb +1 -0
  87. data/spec/trie_spec.rb +30 -0
  88. metadata +207 -0
@@ -0,0 +1,72 @@
1
+ #ifndef MARISA_GRIMOIRE_TRIE_TAIL_H_
2
+ #define MARISA_GRIMOIRE_TRIE_TAIL_H_
3
+
4
+ #include "marisa/agent.h"
5
+ #include "marisa/grimoire/vector.h"
6
+ #include "marisa/grimoire/trie/entry.h"
7
+
8
+ namespace marisa {
9
+ namespace grimoire {
10
+ namespace trie {
11
+
12
+ class Tail {
13
+ public:
14
+ Tail();
15
+
16
+ void build(Vector<Entry> &entries, Vector<UInt32> *offsets,
17
+ TailMode mode);
18
+
19
+ void map(Mapper &mapper);
20
+ void read(Reader &reader);
21
+ void write(Writer &writer) const;
22
+
23
+ void restore(Agent &agent, std::size_t offset) const;
24
+ bool match(Agent &agent, std::size_t offset) const;
25
+ bool prefix_match(Agent &agent, std::size_t offset) const;
26
+
27
+ const char &operator[](std::size_t offset) const {
28
+ MARISA_DEBUG_IF(offset >= buf_.size(), MARISA_BOUND_ERROR);
29
+ return buf_[offset];
30
+ }
31
+
32
+ TailMode mode() const {
33
+ return end_flags_.empty() ? MARISA_TEXT_TAIL : MARISA_BINARY_TAIL;
34
+ }
35
+
36
+ bool empty() const {
37
+ return buf_.empty();
38
+ }
39
+ std::size_t size() const {
40
+ return buf_.size();
41
+ }
42
+ std::size_t total_size() const {
43
+ return buf_.total_size() + end_flags_.total_size();
44
+ }
45
+ std::size_t io_size() const {
46
+ return buf_.io_size() + end_flags_.io_size();
47
+ }
48
+
49
+ void clear();
50
+ void swap(Tail &rhs);
51
+
52
+ private:
53
+ Vector<char> buf_;
54
+ BitVector end_flags_;
55
+
56
+ void build_(Vector<Entry> &entries, Vector<UInt32> *offsets,
57
+ TailMode mode);
58
+
59
+ void map_(Mapper &mapper);
60
+ void read_(Reader &reader);
61
+ void write_(Writer &writer) const;
62
+
63
+ // Disallows copy and assignment.
64
+ Tail(const Tail &);
65
+ Tail &operator=(const Tail &);
66
+ };
67
+
68
+ } // namespace trie
69
+ } // namespace grimoire
70
+ } // namespace marisa
71
+
72
+ #endif // MARISA_GRIMOIRE_TRIE_TAIL_H_
@@ -0,0 +1,18 @@
1
+ #ifndef MARISA_GRIMOIRE_VECTOR_H_
2
+ #define MARISA_GRIMOIRE_VECTOR_H_
3
+
4
+ #include "marisa/grimoire/vector/vector.h"
5
+ #include "marisa/grimoire/vector/flat-vector.h"
6
+ #include "marisa/grimoire/vector/bit-vector.h"
7
+
8
+ namespace marisa {
9
+ namespace grimoire {
10
+
11
+ using vector::Vector;
12
+ typedef vector::FlatVector FlatVector;
13
+ typedef vector::BitVector BitVector;
14
+
15
+ } // namespace grimoire
16
+ } // namespace marisa
17
+
18
+ #endif // MARISA_GRIMOIRE_VECTOR_H_
@@ -0,0 +1,826 @@
1
+ #include "marisa/grimoire/vector/pop-count.h"
2
+ #include "marisa/grimoire/vector/bit-vector.h"
3
+
4
+ namespace marisa {
5
+ namespace grimoire {
6
+ namespace vector {
7
+ namespace {
8
+
9
+ const UInt8 SELECT_TABLE[8][256] = {
10
+ {
11
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
12
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
13
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
14
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
15
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
16
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
17
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
18
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
19
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
20
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
21
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
22
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
23
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
24
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
25
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
26
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
27
+ },
28
+ {
29
+ 7, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1,
30
+ 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
31
+ 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
32
+ 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
33
+ 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1,
34
+ 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
35
+ 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
36
+ 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
37
+ 7, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1,
38
+ 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
39
+ 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
40
+ 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
41
+ 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1,
42
+ 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
43
+ 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
44
+ 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1
45
+ },
46
+ {
47
+ 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 3, 7, 3, 3, 2,
48
+ 7, 7, 7, 4, 7, 4, 4, 2, 7, 4, 4, 3, 4, 3, 3, 2,
49
+ 7, 7, 7, 5, 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2,
50
+ 7, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2,
51
+ 7, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2,
52
+ 7, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2,
53
+ 7, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2,
54
+ 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2,
55
+ 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 3, 7, 3, 3, 2,
56
+ 7, 7, 7, 4, 7, 4, 4, 2, 7, 4, 4, 3, 4, 3, 3, 2,
57
+ 7, 7, 7, 5, 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2,
58
+ 7, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2,
59
+ 7, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2,
60
+ 7, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2,
61
+ 7, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2,
62
+ 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2
63
+ },
64
+ {
65
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
66
+ 7, 7, 7, 7, 7, 7, 7, 4, 7, 7, 7, 4, 7, 4, 4, 3,
67
+ 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 5, 7, 5, 5, 3,
68
+ 7, 7, 7, 5, 7, 5, 5, 4, 7, 5, 5, 4, 5, 4, 4, 3,
69
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 3,
70
+ 7, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 6, 4, 4, 3,
71
+ 7, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3,
72
+ 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3,
73
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
74
+ 7, 7, 7, 7, 7, 7, 7, 4, 7, 7, 7, 4, 7, 4, 4, 3,
75
+ 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 5, 7, 5, 5, 3,
76
+ 7, 7, 7, 5, 7, 5, 5, 4, 7, 5, 5, 4, 5, 4, 4, 3,
77
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 3,
78
+ 7, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 6, 4, 4, 3,
79
+ 7, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3,
80
+ 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3
81
+ },
82
+ {
83
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
84
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4,
85
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5,
86
+ 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 5, 7, 5, 5, 4,
87
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
88
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 4,
89
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 5,
90
+ 7, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4,
91
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
92
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4,
93
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5,
94
+ 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 5, 7, 5, 5, 4,
95
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
96
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 4,
97
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 5,
98
+ 7, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4
99
+ },
100
+ {
101
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
102
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
103
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
104
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5,
105
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
106
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
107
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
108
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 5,
109
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
110
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
111
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
112
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5,
113
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
114
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
115
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
116
+ 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 5
117
+ },
118
+ {
119
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
120
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
121
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
122
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
123
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
124
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
125
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
126
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6,
127
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
128
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
129
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
130
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
131
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
132
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
133
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
134
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6
135
+ },
136
+ {
137
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
138
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
139
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
140
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
141
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
142
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
143
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
144
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
145
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
146
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
147
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
148
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
149
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
150
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
151
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
152
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
153
+ }
154
+ };
155
+
156
+ #if MARISA_WORD_SIZE == 64
157
+ const UInt64 MASK_55 = 0x5555555555555555ULL;
158
+ const UInt64 MASK_33 = 0x3333333333333333ULL;
159
+ const UInt64 MASK_0F = 0x0F0F0F0F0F0F0F0FULL;
160
+ const UInt64 MASK_01 = 0x0101010101010101ULL;
161
+ const UInt64 MASK_80 = 0x8080808080808080ULL;
162
+
163
+ std::size_t select_bit(std::size_t i, std::size_t bit_id, UInt64 unit) {
164
+ UInt64 counts;
165
+ {
166
+ #if defined(MARISA_X64) && defined(MARISA_USE_SSSE3)
167
+ __m128i lower_nibbles = _mm_cvtsi64_si128(unit & 0x0F0F0F0F0F0F0F0FULL);
168
+ __m128i upper_nibbles = _mm_cvtsi64_si128(unit & 0xF0F0F0F0F0F0F0F0ULL);
169
+ upper_nibbles = _mm_srli_epi32(upper_nibbles, 4);
170
+
171
+ __m128i lower_counts =
172
+ _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
173
+ lower_counts = _mm_shuffle_epi8(lower_counts, lower_nibbles);
174
+ __m128i upper_counts =
175
+ _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
176
+ upper_counts = _mm_shuffle_epi8(upper_counts, upper_nibbles);
177
+
178
+ counts = _mm_cvtsi128_si64(_mm_add_epi8(lower_counts, upper_counts));
179
+ #else // defined(MARISA_X64) && defined(MARISA_USE_SSSE3)
180
+ counts = unit - ((unit >> 1) & MASK_55);
181
+ counts = (counts & MASK_33) + ((counts >> 2) & MASK_33);
182
+ counts = (counts + (counts >> 4)) & MASK_0F;
183
+ #endif // defined(MARISA_X64) && defined(MARISA_USE_SSSE3)
184
+ counts *= MASK_01;
185
+ }
186
+
187
+ #if defined(MARISA_X64) && defined(MARISA_USE_POPCNT)
188
+ UInt8 skip;
189
+ {
190
+ __m128i x = _mm_cvtsi64_si128((i + 1) * MASK_01);
191
+ __m128i y = _mm_cvtsi64_si128(counts);
192
+ x = _mm_cmpgt_epi8(x, y);
193
+ skip = (UInt8)PopCount::count(_mm_cvtsi128_si64(x));
194
+ }
195
+ #else // defined(MARISA_X64) && defined(MARISA_USE_POPCNT)
196
+ const UInt64 x = (counts | MASK_80) - ((i + 1) * MASK_01);
197
+ #ifdef _MSC_VER
198
+ unsigned long skip;
199
+ ::_BitScanForward64(&skip, (x & MASK_80) >> 7);
200
+ --skip;
201
+ #else // _MSC_VER
202
+ const int skip = ::__builtin_ctzll((x & MASK_80) >> 7);
203
+ #endif // _MSC_VER
204
+ #endif // defined(MARISA_X64) && defined(MARISA_USE_POPCNT)
205
+
206
+ bit_id += skip;
207
+ unit >>= skip;
208
+ i -= ((counts << 8) >> skip) & 0xFF;
209
+
210
+ return bit_id + SELECT_TABLE[i][unit & 0xFF];
211
+ }
212
+ #else // MARISA_WORD_SIZE == 64
213
+ #ifdef MARISA_USE_SSE2
214
+ const UInt8 POPCNT_TABLE[256] = {
215
+ 0, 8, 8, 16, 8, 16, 16, 24, 8, 16, 16, 24, 16, 24, 24, 32,
216
+ 8, 16, 16, 24, 16, 24, 24, 32, 16, 24, 24, 32, 24, 32, 32, 40,
217
+ 8, 16, 16, 24, 16, 24, 24, 32, 16, 24, 24, 32, 24, 32, 32, 40,
218
+ 16, 24, 24, 32, 24, 32, 32, 40, 24, 32, 32, 40, 32, 40, 40, 48,
219
+ 8, 16, 16, 24, 16, 24, 24, 32, 16, 24, 24, 32, 24, 32, 32, 40,
220
+ 16, 24, 24, 32, 24, 32, 32, 40, 24, 32, 32, 40, 32, 40, 40, 48,
221
+ 16, 24, 24, 32, 24, 32, 32, 40, 24, 32, 32, 40, 32, 40, 40, 48,
222
+ 24, 32, 32, 40, 32, 40, 40, 48, 32, 40, 40, 48, 40, 48, 48, 56,
223
+ 8, 16, 16, 24, 16, 24, 24, 32, 16, 24, 24, 32, 24, 32, 32, 40,
224
+ 16, 24, 24, 32, 24, 32, 32, 40, 24, 32, 32, 40, 32, 40, 40, 48,
225
+ 16, 24, 24, 32, 24, 32, 32, 40, 24, 32, 32, 40, 32, 40, 40, 48,
226
+ 24, 32, 32, 40, 32, 40, 40, 48, 32, 40, 40, 48, 40, 48, 48, 56,
227
+ 16, 24, 24, 32, 24, 32, 32, 40, 24, 32, 32, 40, 32, 40, 40, 48,
228
+ 24, 32, 32, 40, 32, 40, 40, 48, 32, 40, 40, 48, 40, 48, 48, 56,
229
+ 24, 32, 32, 40, 32, 40, 40, 48, 32, 40, 40, 48, 40, 48, 48, 56,
230
+ 32, 40, 40, 48, 40, 48, 48, 56, 40, 48, 48, 56, 48, 56, 56, 64
231
+ };
232
+
233
+ std::size_t select_bit(std::size_t i, std::size_t bit_id,
234
+ UInt32 unit_lo, UInt32 unit_hi) {
235
+ __m128i unit;
236
+ {
237
+ __m128i lower_dword = _mm_cvtsi32_si128(unit_lo);
238
+ __m128i upper_dword = _mm_cvtsi32_si128(unit_hi);
239
+ upper_dword = _mm_slli_si128(upper_dword, 4);
240
+ unit = _mm_or_si128(lower_dword, upper_dword);
241
+ }
242
+
243
+ __m128i counts;
244
+ {
245
+ #ifdef MARISA_USE_SSSE3
246
+ __m128i lower_nibbles = _mm_set1_epi8(0x0F);
247
+ lower_nibbles = _mm_and_si128(lower_nibbles, unit);
248
+ __m128i upper_nibbles = _mm_set1_epi8((UInt8)0xF0);
249
+ upper_nibbles = _mm_and_si128(upper_nibbles, unit);
250
+ upper_nibbles = _mm_srli_epi32(upper_nibbles, 4);
251
+
252
+ __m128i lower_counts =
253
+ _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
254
+ lower_counts = _mm_shuffle_epi8(lower_counts, lower_nibbles);
255
+ __m128i upper_counts =
256
+ _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
257
+ upper_counts = _mm_shuffle_epi8(upper_counts, upper_nibbles);
258
+
259
+ counts = _mm_add_epi8(lower_counts, upper_counts);
260
+ #else // MARISA_USE_SSSE3
261
+ __m128i x = _mm_srli_epi32(unit, 1);
262
+ x = _mm_and_si128(x, _mm_set1_epi8(0x55));
263
+ x = _mm_sub_epi8(unit, x);
264
+
265
+ __m128i y = _mm_srli_epi32(x, 2);
266
+ y = _mm_and_si128(y, _mm_set1_epi8(0x33));
267
+ x = _mm_and_si128(x, _mm_set1_epi8(0x33));
268
+ x = _mm_add_epi8(x, y);
269
+
270
+ y = _mm_srli_epi32(x, 4);
271
+ x = _mm_add_epi8(x, y);
272
+ counts = _mm_and_si128(x, _mm_set1_epi8(0x0F));
273
+ #endif // MARISA_USE_SSSE3
274
+ }
275
+
276
+ __m128i accumulated_counts;
277
+ {
278
+ __m128i x = counts;
279
+ x = _mm_slli_si128(x, 1);
280
+ __m128i y = counts;
281
+ y = _mm_add_epi32(y, x);
282
+
283
+ x = y;
284
+ y = _mm_slli_si128(y, 2);
285
+ x = _mm_add_epi32(x, y);
286
+
287
+ y = x;
288
+ x = _mm_slli_si128(x, 4);
289
+ y = _mm_add_epi32(y, x);
290
+
291
+ accumulated_counts = _mm_set_epi32(0x7F7F7F7FU, 0x7F7F7F7FU, 0, 0);
292
+ accumulated_counts = _mm_or_si128(accumulated_counts, y);
293
+ }
294
+
295
+ UInt8 skip;
296
+ {
297
+ __m128i x = _mm_set1_epi8((UInt8)(i + 1));
298
+ x = _mm_cmpgt_epi8(x, accumulated_counts);
299
+ skip = POPCNT_TABLE[_mm_movemask_epi8(x)];
300
+ }
301
+
302
+ UInt8 byte;
303
+ {
304
+ #ifdef _MSC_VER
305
+ __declspec(align(16)) UInt8 unit_bytes[16];
306
+ __declspec(align(16)) UInt8 accumulated_counts_bytes[16];
307
+ #else // _MSC_VER
308
+ UInt8 unit_bytes[16] __attribute__ ((aligned (16)));
309
+ UInt8 accumulated_counts_bytes[16] __attribute__ ((aligned (16)));
310
+ #endif // _MSC_VER
311
+ accumulated_counts = _mm_slli_si128(accumulated_counts, 1);
312
+ _mm_store_si128(reinterpret_cast<__m128i *>(unit_bytes), unit);
313
+ _mm_store_si128(reinterpret_cast<__m128i *>(accumulated_counts_bytes),
314
+ accumulated_counts);
315
+
316
+ bit_id += skip;
317
+ byte = unit_bytes[skip / 8];
318
+ i -= accumulated_counts_bytes[skip / 8];
319
+ }
320
+
321
+ return bit_id + SELECT_TABLE[i][byte];
322
+ }
323
+ #endif // MARISA_USE_SSE2
324
+ #endif // MARISA_WORD_SIZE == 64
325
+
326
+ } // namespace
327
+
328
+ #if MARISA_WORD_SIZE == 64
329
+
330
+ std::size_t BitVector::rank1(std::size_t i) const {
331
+ MARISA_DEBUG_IF(ranks_.empty(), MARISA_STATE_ERROR);
332
+ MARISA_DEBUG_IF(i > size_, MARISA_BOUND_ERROR);
333
+
334
+ const RankIndex &rank = ranks_[i / 512];
335
+ std::size_t offset = rank.abs();
336
+ switch ((i / 64) % 8) {
337
+ case 1: {
338
+ offset += rank.rel1();
339
+ break;
340
+ }
341
+ case 2: {
342
+ offset += rank.rel2();
343
+ break;
344
+ }
345
+ case 3: {
346
+ offset += rank.rel3();
347
+ break;
348
+ }
349
+ case 4: {
350
+ offset += rank.rel4();
351
+ break;
352
+ }
353
+ case 5: {
354
+ offset += rank.rel5();
355
+ break;
356
+ }
357
+ case 6: {
358
+ offset += rank.rel6();
359
+ break;
360
+ }
361
+ case 7: {
362
+ offset += rank.rel7();
363
+ break;
364
+ }
365
+ }
366
+ offset += PopCount::count(units_[i / 64] & ((1ULL << (i % 64)) - 1));
367
+ return offset;
368
+ }
369
+
370
+ std::size_t BitVector::select0(std::size_t i) const {
371
+ MARISA_DEBUG_IF(select0s_.empty(), MARISA_STATE_ERROR);
372
+ MARISA_DEBUG_IF(i >= num_0s(), MARISA_BOUND_ERROR);
373
+
374
+ const std::size_t select_id = i / 512;
375
+ MARISA_DEBUG_IF((select_id + 1) >= select0s_.size(), MARISA_BOUND_ERROR);
376
+ if ((i % 512) == 0) {
377
+ return select0s_[select_id];
378
+ }
379
+ std::size_t begin = select0s_[select_id] / 512;
380
+ std::size_t end = (select0s_[select_id + 1] + 511) / 512;
381
+ if (begin + 10 >= end) {
382
+ while (i >= ((begin + 1) * 512) - ranks_[begin + 1].abs()) {
383
+ ++begin;
384
+ }
385
+ } else {
386
+ while (begin + 1 < end) {
387
+ const std::size_t middle = (begin + end) / 2;
388
+ if (i < (middle * 512) - ranks_[middle].abs()) {
389
+ end = middle;
390
+ } else {
391
+ begin = middle;
392
+ }
393
+ }
394
+ }
395
+ const std::size_t rank_id = begin;
396
+ i -= (rank_id * 512) - ranks_[rank_id].abs();
397
+
398
+ const RankIndex &rank = ranks_[rank_id];
399
+ std::size_t unit_id = rank_id * 8;
400
+ if (i < (256U - rank.rel4())) {
401
+ if (i < (128U - rank.rel2())) {
402
+ if (i >= (64U - rank.rel1())) {
403
+ unit_id += 1;
404
+ i -= 64 - rank.rel1();
405
+ }
406
+ } else if (i < (192U - rank.rel3())) {
407
+ unit_id += 2;
408
+ i -= 128 - rank.rel2();
409
+ } else {
410
+ unit_id += 3;
411
+ i -= 192 - rank.rel3();
412
+ }
413
+ } else if (i < (384U - rank.rel6())) {
414
+ if (i < (320U - rank.rel5())) {
415
+ unit_id += 4;
416
+ i -= 256 - rank.rel4();
417
+ } else {
418
+ unit_id += 5;
419
+ i -= 320 - rank.rel5();
420
+ }
421
+ } else if (i < (448U - rank.rel7())) {
422
+ unit_id += 6;
423
+ i -= 384 - rank.rel6();
424
+ } else {
425
+ unit_id += 7;
426
+ i -= 448 - rank.rel7();
427
+ }
428
+
429
+ return select_bit(i, unit_id * 64, ~units_[unit_id]);
430
+ }
431
+
432
+ std::size_t BitVector::select1(std::size_t i) const {
433
+ MARISA_DEBUG_IF(select1s_.empty(), MARISA_STATE_ERROR);
434
+ MARISA_DEBUG_IF(i >= num_1s(), MARISA_BOUND_ERROR);
435
+
436
+ const std::size_t select_id = i / 512;
437
+ MARISA_DEBUG_IF((select_id + 1) >= select1s_.size(), MARISA_BOUND_ERROR);
438
+ if ((i % 512) == 0) {
439
+ return select1s_[select_id];
440
+ }
441
+ std::size_t begin = select1s_[select_id] / 512;
442
+ std::size_t end = (select1s_[select_id + 1] + 511) / 512;
443
+ if (begin + 10 >= end) {
444
+ while (i >= ranks_[begin + 1].abs()) {
445
+ ++begin;
446
+ }
447
+ } else {
448
+ while (begin + 1 < end) {
449
+ const std::size_t middle = (begin + end) / 2;
450
+ if (i < ranks_[middle].abs()) {
451
+ end = middle;
452
+ } else {
453
+ begin = middle;
454
+ }
455
+ }
456
+ }
457
+ const std::size_t rank_id = begin;
458
+ i -= ranks_[rank_id].abs();
459
+
460
+ const RankIndex &rank = ranks_[rank_id];
461
+ std::size_t unit_id = rank_id * 8;
462
+ if (i < rank.rel4()) {
463
+ if (i < rank.rel2()) {
464
+ if (i >= rank.rel1()) {
465
+ unit_id += 1;
466
+ i -= rank.rel1();
467
+ }
468
+ } else if (i < rank.rel3()) {
469
+ unit_id += 2;
470
+ i -= rank.rel2();
471
+ } else {
472
+ unit_id += 3;
473
+ i -= rank.rel3();
474
+ }
475
+ } else if (i < rank.rel6()) {
476
+ if (i < rank.rel5()) {
477
+ unit_id += 4;
478
+ i -= rank.rel4();
479
+ } else {
480
+ unit_id += 5;
481
+ i -= rank.rel5();
482
+ }
483
+ } else if (i < rank.rel7()) {
484
+ unit_id += 6;
485
+ i -= rank.rel6();
486
+ } else {
487
+ unit_id += 7;
488
+ i -= rank.rel7();
489
+ }
490
+
491
+ return select_bit(i, unit_id * 64, units_[unit_id]);
492
+ }
493
+
494
+ #else // MARISA_WORD_SIZE == 64
495
+
496
+ std::size_t BitVector::rank1(std::size_t i) const {
497
+ MARISA_DEBUG_IF(ranks_.empty(), MARISA_STATE_ERROR);
498
+ MARISA_DEBUG_IF(i > size_, MARISA_BOUND_ERROR);
499
+
500
+ const RankIndex &rank = ranks_[i / 512];
501
+ std::size_t offset = rank.abs();
502
+ switch ((i / 64) % 8) {
503
+ case 1: {
504
+ offset += rank.rel1();
505
+ break;
506
+ }
507
+ case 2: {
508
+ offset += rank.rel2();
509
+ break;
510
+ }
511
+ case 3: {
512
+ offset += rank.rel3();
513
+ break;
514
+ }
515
+ case 4: {
516
+ offset += rank.rel4();
517
+ break;
518
+ }
519
+ case 5: {
520
+ offset += rank.rel5();
521
+ break;
522
+ }
523
+ case 6: {
524
+ offset += rank.rel6();
525
+ break;
526
+ }
527
+ case 7: {
528
+ offset += rank.rel7();
529
+ break;
530
+ }
531
+ }
532
+ if (((i / 32) & 1) == 1) {
533
+ offset += PopCount::count(units_[(i / 32) - 1]);
534
+ }
535
+ offset += PopCount::count(units_[i / 32] & ((1U << (i % 32)) - 1));
536
+ return offset;
537
+ }
538
+
539
+ std::size_t BitVector::select0(std::size_t i) const {
540
+ MARISA_DEBUG_IF(select0s_.empty(), MARISA_STATE_ERROR);
541
+ MARISA_DEBUG_IF(i >= num_0s(), MARISA_BOUND_ERROR);
542
+
543
+ const std::size_t select_id = i / 512;
544
+ MARISA_DEBUG_IF((select_id + 1) >= select0s_.size(), MARISA_BOUND_ERROR);
545
+ if ((i % 512) == 0) {
546
+ return select0s_[select_id];
547
+ }
548
+ std::size_t begin = select0s_[select_id] / 512;
549
+ std::size_t end = (select0s_[select_id + 1] + 511) / 512;
550
+ if (begin + 10 >= end) {
551
+ while (i >= ((begin + 1) * 512) - ranks_[begin + 1].abs()) {
552
+ ++begin;
553
+ }
554
+ } else {
555
+ while (begin + 1 < end) {
556
+ const std::size_t middle = (begin + end) / 2;
557
+ if (i < (middle * 512) - ranks_[middle].abs()) {
558
+ end = middle;
559
+ } else {
560
+ begin = middle;
561
+ }
562
+ }
563
+ }
564
+ const std::size_t rank_id = begin;
565
+ i -= (rank_id * 512) - ranks_[rank_id].abs();
566
+
567
+ const RankIndex &rank = ranks_[rank_id];
568
+ std::size_t unit_id = rank_id * 16;
569
+ if (i < (256U - rank.rel4())) {
570
+ if (i < (128U - rank.rel2())) {
571
+ if (i >= (64U - rank.rel1())) {
572
+ unit_id += 2;
573
+ i -= 64 - rank.rel1();
574
+ }
575
+ } else if (i < (192U - rank.rel3())) {
576
+ unit_id += 4;
577
+ i -= 128 - rank.rel2();
578
+ } else {
579
+ unit_id += 6;
580
+ i -= 192 - rank.rel3();
581
+ }
582
+ } else if (i < (384U - rank.rel6())) {
583
+ if (i < (320U - rank.rel5())) {
584
+ unit_id += 8;
585
+ i -= 256 - rank.rel4();
586
+ } else {
587
+ unit_id += 10;
588
+ i -= 320 - rank.rel5();
589
+ }
590
+ } else if (i < (448U - rank.rel7())) {
591
+ unit_id += 12;
592
+ i -= 384 - rank.rel6();
593
+ } else {
594
+ unit_id += 14;
595
+ i -= 448 - rank.rel7();
596
+ }
597
+
598
+ #ifdef MARISA_USE_SSE2
599
+ return select_bit(i, unit_id * 32, ~units_[unit_id], ~units_[unit_id + 1]);
600
+ #else // MARISA_USE_SSE2
601
+ UInt32 unit = ~units_[unit_id];
602
+ PopCount count(unit);
603
+ if (i >= count.lo32()) {
604
+ ++unit_id;
605
+ i -= count.lo32();
606
+ unit = ~units_[unit_id];
607
+ count = PopCount(unit);
608
+ }
609
+
610
+ std::size_t bit_id = unit_id * 32;
611
+ if (i < count.lo16()) {
612
+ if (i >= count.lo8()) {
613
+ bit_id += 8;
614
+ unit >>= 8;
615
+ i -= count.lo8();
616
+ }
617
+ } else if (i < count.lo24()) {
618
+ bit_id += 16;
619
+ unit >>= 16;
620
+ i -= count.lo16();
621
+ } else {
622
+ bit_id += 24;
623
+ unit >>= 24;
624
+ i -= count.lo24();
625
+ }
626
+ return bit_id + SELECT_TABLE[i][unit & 0xFF];
627
+ #endif // MARISA_USE_SSE2
628
+ }
629
+
630
+ std::size_t BitVector::select1(std::size_t i) const {
631
+ MARISA_DEBUG_IF(select1s_.empty(), MARISA_STATE_ERROR);
632
+ MARISA_DEBUG_IF(i >= num_1s(), MARISA_BOUND_ERROR);
633
+
634
+ const std::size_t select_id = i / 512;
635
+ MARISA_DEBUG_IF((select_id + 1) >= select1s_.size(), MARISA_BOUND_ERROR);
636
+ if ((i % 512) == 0) {
637
+ return select1s_[select_id];
638
+ }
639
+ std::size_t begin = select1s_[select_id] / 512;
640
+ std::size_t end = (select1s_[select_id + 1] + 511) / 512;
641
+ if (begin + 10 >= end) {
642
+ while (i >= ranks_[begin + 1].abs()) {
643
+ ++begin;
644
+ }
645
+ } else {
646
+ while (begin + 1 < end) {
647
+ const std::size_t middle = (begin + end) / 2;
648
+ if (i < ranks_[middle].abs()) {
649
+ end = middle;
650
+ } else {
651
+ begin = middle;
652
+ }
653
+ }
654
+ }
655
+ const std::size_t rank_id = begin;
656
+ i -= ranks_[rank_id].abs();
657
+
658
+ const RankIndex &rank = ranks_[rank_id];
659
+ std::size_t unit_id = rank_id * 16;
660
+ if (i < rank.rel4()) {
661
+ if (i < rank.rel2()) {
662
+ if (i >= rank.rel1()) {
663
+ unit_id += 2;
664
+ i -= rank.rel1();
665
+ }
666
+ } else if (i < rank.rel3()) {
667
+ unit_id += 4;
668
+ i -= rank.rel2();
669
+ } else {
670
+ unit_id += 6;
671
+ i -= rank.rel3();
672
+ }
673
+ } else if (i < rank.rel6()) {
674
+ if (i < rank.rel5()) {
675
+ unit_id += 8;
676
+ i -= rank.rel4();
677
+ } else {
678
+ unit_id += 10;
679
+ i -= rank.rel5();
680
+ }
681
+ } else if (i < rank.rel7()) {
682
+ unit_id += 12;
683
+ i -= rank.rel6();
684
+ } else {
685
+ unit_id += 14;
686
+ i -= rank.rel7();
687
+ }
688
+
689
+ #ifdef MARISA_USE_SSE2
690
+ return select_bit(i, unit_id * 32, units_[unit_id], units_[unit_id + 1]);
691
+ #else // MARISA_USE_SSE2
692
+ UInt32 unit = units_[unit_id];
693
+ PopCount count(unit);
694
+ if (i >= count.lo32()) {
695
+ ++unit_id;
696
+ i -= count.lo32();
697
+ unit = units_[unit_id];
698
+ count = PopCount(unit);
699
+ }
700
+
701
+ std::size_t bit_id = unit_id * 32;
702
+ if (i < count.lo16()) {
703
+ if (i >= count.lo8()) {
704
+ bit_id += 8;
705
+ unit >>= 8;
706
+ i -= count.lo8();
707
+ }
708
+ } else if (i < count.lo24()) {
709
+ bit_id += 16;
710
+ unit >>= 16;
711
+ i -= count.lo16();
712
+ } else {
713
+ bit_id += 24;
714
+ unit >>= 24;
715
+ i -= count.lo24();
716
+ }
717
+ return bit_id + SELECT_TABLE[i][unit & 0xFF];
718
+ #endif // MARISA_USE_SSE2
719
+ }
720
+
721
+ #endif // MARISA_WORD_SIZE == 64
722
+
723
+ void BitVector::build_index(const BitVector &bv,
724
+ bool enables_select0, bool enables_select1) {
725
+ ranks_.resize((bv.size() / 512) + (((bv.size() % 512) != 0) ? 1 : 0) + 1);
726
+
727
+ std::size_t num_0s = 0;
728
+ std::size_t num_1s = 0;
729
+
730
+ for (std::size_t i = 0; i < bv.size(); ++i) {
731
+ if ((i % 64) == 0) {
732
+ const std::size_t rank_id = i / 512;
733
+ switch ((i / 64) % 8) {
734
+ case 0: {
735
+ ranks_[rank_id].set_abs(num_1s);
736
+ break;
737
+ }
738
+ case 1: {
739
+ ranks_[rank_id].set_rel1(num_1s - ranks_[rank_id].abs());
740
+ break;
741
+ }
742
+ case 2: {
743
+ ranks_[rank_id].set_rel2(num_1s - ranks_[rank_id].abs());
744
+ break;
745
+ }
746
+ case 3: {
747
+ ranks_[rank_id].set_rel3(num_1s - ranks_[rank_id].abs());
748
+ break;
749
+ }
750
+ case 4: {
751
+ ranks_[rank_id].set_rel4(num_1s - ranks_[rank_id].abs());
752
+ break;
753
+ }
754
+ case 5: {
755
+ ranks_[rank_id].set_rel5(num_1s - ranks_[rank_id].abs());
756
+ break;
757
+ }
758
+ case 6: {
759
+ ranks_[rank_id].set_rel6(num_1s - ranks_[rank_id].abs());
760
+ break;
761
+ }
762
+ case 7: {
763
+ ranks_[rank_id].set_rel7(num_1s - ranks_[rank_id].abs());
764
+ break;
765
+ }
766
+ }
767
+ }
768
+
769
+ if (bv[i]) {
770
+ if (enables_select1 && ((num_1s % 512) == 0)) {
771
+ select1s_.push_back(i);
772
+ }
773
+ ++num_1s;
774
+ } else {
775
+ if (enables_select0 && ((num_0s % 512) == 0)) {
776
+ select0s_.push_back(i);
777
+ }
778
+ ++num_0s;
779
+ }
780
+ }
781
+
782
+ if ((bv.size() % 512) != 0) {
783
+ const std::size_t rank_id = (bv.size() - 1) / 512;
784
+ switch (((bv.size() - 1) / 64) % 8) {
785
+ case 0: {
786
+ ranks_[rank_id].set_rel1(num_1s - ranks_[rank_id].abs());
787
+ }
788
+ case 1: {
789
+ ranks_[rank_id].set_rel2(num_1s - ranks_[rank_id].abs());
790
+ }
791
+ case 2: {
792
+ ranks_[rank_id].set_rel3(num_1s - ranks_[rank_id].abs());
793
+ }
794
+ case 3: {
795
+ ranks_[rank_id].set_rel4(num_1s - ranks_[rank_id].abs());
796
+ }
797
+ case 4: {
798
+ ranks_[rank_id].set_rel5(num_1s - ranks_[rank_id].abs());
799
+ }
800
+ case 5: {
801
+ ranks_[rank_id].set_rel6(num_1s - ranks_[rank_id].abs());
802
+ }
803
+ case 6: {
804
+ ranks_[rank_id].set_rel7(num_1s - ranks_[rank_id].abs());
805
+ break;
806
+ }
807
+ }
808
+ }
809
+
810
+ size_ = bv.size();
811
+ num_1s_ = bv.num_1s();
812
+
813
+ ranks_.back().set_abs(num_1s);
814
+ if (enables_select0) {
815
+ select0s_.push_back(bv.size());
816
+ select0s_.shrink();
817
+ }
818
+ if (enables_select1) {
819
+ select1s_.push_back(bv.size());
820
+ select1s_.shrink();
821
+ }
822
+ }
823
+
824
+ } // namespace vector
825
+ } // namespace grimoire
826
+ } // namespace marisa