melisa 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. data/README.md +11 -0
  2. data/ext/marisa/bindings/marisa-swig.cxx +253 -0
  3. data/ext/marisa/bindings/marisa-swig.h +183 -0
  4. data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
  5. data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
  6. data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
  7. data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
  8. data/ext/marisa/bindings/python/marisa-swig.h +183 -0
  9. data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
  10. data/ext/marisa/bindings/ruby/extconf.rb +5 -0
  11. data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
  12. data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
  13. data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
  14. data/ext/marisa/lib/marisa.h +14 -0
  15. data/ext/marisa/lib/marisa/agent.cc +51 -0
  16. data/ext/marisa/lib/marisa/agent.h +73 -0
  17. data/ext/marisa/lib/marisa/base.h +193 -0
  18. data/ext/marisa/lib/marisa/exception.h +82 -0
  19. data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
  20. data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
  21. data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
  22. data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
  23. data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
  24. data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
  25. data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
  26. data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
  27. data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
  28. data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
  29. data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
  30. data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
  31. data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
  32. data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
  33. data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
  34. data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
  35. data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
  36. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
  37. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
  38. data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
  39. data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
  40. data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
  41. data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
  42. data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
  43. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
  44. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
  45. data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
  46. data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
  47. data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
  48. data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
  49. data/ext/marisa/lib/marisa/iostream.h +18 -0
  50. data/ext/marisa/lib/marisa/key.h +85 -0
  51. data/ext/marisa/lib/marisa/keyset.cc +181 -0
  52. data/ext/marisa/lib/marisa/keyset.h +80 -0
  53. data/ext/marisa/lib/marisa/query.h +71 -0
  54. data/ext/marisa/lib/marisa/scoped-array.h +48 -0
  55. data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
  56. data/ext/marisa/lib/marisa/stdio.h +15 -0
  57. data/ext/marisa/lib/marisa/trie.cc +249 -0
  58. data/ext/marisa/lib/marisa/trie.h +64 -0
  59. data/ext/marisa/tests/base-test.cc +309 -0
  60. data/ext/marisa/tests/io-test.cc +252 -0
  61. data/ext/marisa/tests/marisa-assert.h +26 -0
  62. data/ext/marisa/tests/marisa-test.cc +388 -0
  63. data/ext/marisa/tests/trie-test.cc +507 -0
  64. data/ext/marisa/tests/vector-test.cc +466 -0
  65. data/ext/marisa/tools/cmdopt.cc +298 -0
  66. data/ext/marisa/tools/cmdopt.h +58 -0
  67. data/ext/marisa/tools/marisa-benchmark.cc +418 -0
  68. data/ext/marisa/tools/marisa-build.cc +206 -0
  69. data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
  70. data/ext/marisa/tools/marisa-dump.cc +151 -0
  71. data/ext/marisa/tools/marisa-lookup.cc +110 -0
  72. data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
  73. data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
  74. data/lib/melisa.rb +7 -0
  75. data/lib/melisa/base_config_flags.rb +76 -0
  76. data/lib/melisa/bytes_trie.rb +55 -0
  77. data/lib/melisa/int_trie.rb +14 -0
  78. data/lib/melisa/search.rb +55 -0
  79. data/lib/melisa/trie.rb +96 -0
  80. data/lib/melisa/version.rb +3 -0
  81. data/melisa.gemspec +36 -0
  82. data/spec/base_config_flags_spec.rb +73 -0
  83. data/spec/bytes_trie_spec.rb +16 -0
  84. data/spec/int_trie_spec.rb +16 -0
  85. data/spec/search_spec.rb +29 -0
  86. data/spec/spec_helper.rb +1 -0
  87. data/spec/trie_spec.rb +30 -0
  88. metadata +207 -0
@@ -0,0 +1,134 @@
1
+ #ifndef MARISA_GRIMOIRE_TRIE_LOUDS_TRIE_H_
2
+ #define MARISA_GRIMOIRE_TRIE_LOUDS_TRIE_H_
3
+
4
+ #include "marisa/keyset.h"
5
+ #include "marisa/agent.h"
6
+ #include "marisa/grimoire/vector.h"
7
+ #include "marisa/grimoire/trie/config.h"
8
+ #include "marisa/grimoire/trie/key.h"
9
+ #include "marisa/grimoire/trie/tail.h"
10
+ #include "marisa/grimoire/trie/cache.h"
11
+
12
+ namespace marisa {
13
+ namespace grimoire {
14
+ namespace trie {
15
+
16
+ class LoudsTrie {
17
+ public:
18
+ LoudsTrie();
19
+ ~LoudsTrie();
20
+
21
+ void build(Keyset &keyset, int flags);
22
+
23
+ void map(Mapper &mapper);
24
+ void read(Reader &reader);
25
+ void write(Writer &writer) const;
26
+
27
+ bool lookup(Agent &agent) const;
28
+ void reverse_lookup(Agent &agent) const;
29
+ bool common_prefix_search(Agent &agent) const;
30
+ bool predictive_search(Agent &agent) const;
31
+
32
+ std::size_t num_tries() const {
33
+ return config_.num_tries();
34
+ }
35
+ std::size_t num_keys() const {
36
+ return size();
37
+ }
38
+ std::size_t num_nodes() const {
39
+ return (louds_.size() / 2) - 1;
40
+ }
41
+
42
+ CacheLevel cache_level() const {
43
+ return config_.cache_level();
44
+ }
45
+ TailMode tail_mode() const {
46
+ return config_.tail_mode();
47
+ }
48
+ NodeOrder node_order() const {
49
+ return config_.node_order();
50
+ }
51
+
52
+ bool empty() const {
53
+ return size() == 0;
54
+ }
55
+ std::size_t size() const {
56
+ return terminal_flags_.num_1s();
57
+ }
58
+ std::size_t total_size() const;
59
+ std::size_t io_size() const;
60
+
61
+ void clear();
62
+ void swap(LoudsTrie &rhs);
63
+
64
+ private:
65
+ BitVector louds_;
66
+ BitVector terminal_flags_;
67
+ BitVector link_flags_;
68
+ Vector<UInt8> bases_;
69
+ FlatVector extras_;
70
+ Tail tail_;
71
+ scoped_ptr<LoudsTrie> next_trie_;
72
+ Vector<Cache> cache_;
73
+ std::size_t cache_mask_;
74
+ std::size_t num_l1_nodes_;
75
+ Config config_;
76
+ Mapper mapper_;
77
+
78
+ void build_(Keyset &keyset, const Config &config);
79
+
80
+ template <typename T>
81
+ void build_trie(Vector<T> &keys,
82
+ Vector<UInt32> *terminals, const Config &config, std::size_t trie_id);
83
+ template <typename T>
84
+ void build_current_trie(Vector<T> &keys,
85
+ Vector<UInt32> *terminals, const Config &config, std::size_t trie_id);
86
+ template <typename T>
87
+ void build_next_trie(Vector<T> &keys,
88
+ Vector<UInt32> *terminals, const Config &config, std::size_t trie_id);
89
+ template <typename T>
90
+ void build_terminals(const Vector<T> &keys,
91
+ Vector<UInt32> *terminals) const;
92
+
93
+ void reserve_cache(const Config &config, std::size_t trie_id,
94
+ std::size_t num_keys);
95
+ template <typename T>
96
+ void cache(std::size_t parent, std::size_t child,
97
+ float weight, char label);
98
+ void fill_cache();
99
+
100
+ void map_(Mapper &mapper);
101
+ void read_(Reader &reader);
102
+ void write_(Writer &writer) const;
103
+
104
+ inline bool find_child(Agent &agent) const;
105
+ inline bool predictive_find_child(Agent &agent) const;
106
+
107
+ inline void restore(Agent &agent, std::size_t node_id) const;
108
+ inline bool match(Agent &agent, std::size_t node_id) const;
109
+ inline bool prefix_match(Agent &agent, std::size_t node_id) const;
110
+
111
+ void restore_(Agent &agent, std::size_t node_id) const;
112
+ bool match_(Agent &agent, std::size_t node_id) const;
113
+ bool prefix_match_(Agent &agent, std::size_t node_id) const;
114
+
115
+ inline std::size_t get_cache_id(std::size_t node_id, char label) const;
116
+ inline std::size_t get_cache_id(std::size_t node_id) const;
117
+
118
+ inline std::size_t get_link(std::size_t node_id) const;
119
+ inline std::size_t get_link(std::size_t node_id,
120
+ std::size_t link_id) const;
121
+
122
+ inline std::size_t update_link_id(std::size_t link_id,
123
+ std::size_t node_id) const;
124
+
125
+ // Disallows copy and assignment.
126
+ LoudsTrie(const LoudsTrie &);
127
+ LoudsTrie &operator=(const LoudsTrie &);
128
+ };
129
+
130
+ } // namespace trie
131
+ } // namespace grimoire
132
+ } // namespace marisa
133
+
134
+ #endif // MARISA_GRIMOIRE_TRIE_LOUDS_TRIE_H_
@@ -0,0 +1,115 @@
1
+ #ifndef MARISA_GRIMOIRE_TRIE_RANGE_H_
2
+ #define MARISA_GRIMOIRE_TRIE_RANGE_H_
3
+
4
+ #include "marisa/base.h"
5
+
6
+ namespace marisa {
7
+ namespace grimoire {
8
+ namespace trie {
9
+
10
+ class Range {
11
+ public:
12
+ Range() : begin_(0), end_(0), key_pos_(0) {}
13
+
14
+ void set_begin(std::size_t begin) {
15
+ MARISA_DEBUG_IF(begin > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
16
+ begin_ = begin;
17
+ }
18
+ void set_end(std::size_t end) {
19
+ MARISA_DEBUG_IF(end > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
20
+ end_ = end;
21
+ }
22
+ void set_key_pos(std::size_t key_pos) {
23
+ MARISA_DEBUG_IF(key_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
24
+ key_pos_ = key_pos;
25
+ }
26
+
27
+ std::size_t begin() const {
28
+ return begin_;
29
+ }
30
+ std::size_t end() const {
31
+ return end_;
32
+ }
33
+ std::size_t key_pos() const {
34
+ return key_pos_;
35
+ }
36
+
37
+ private:
38
+ UInt32 begin_;
39
+ UInt32 end_;
40
+ UInt32 key_pos_;
41
+ };
42
+
43
+ inline Range make_range(std::size_t begin, std::size_t end,
44
+ std::size_t key_pos) {
45
+ Range range;
46
+ range.set_begin(begin);
47
+ range.set_end(end);
48
+ range.set_key_pos(key_pos);
49
+ return range;
50
+ }
51
+
52
+ class WeightedRange {
53
+ public:
54
+ WeightedRange() : range_(), weight_(0.0F) {}
55
+
56
+ void set_range(const Range &range) {
57
+ range_ = range;
58
+ }
59
+ void set_begin(std::size_t begin) {
60
+ range_.set_begin(begin);
61
+ }
62
+ void set_end(std::size_t end) {
63
+ range_.set_end(end);
64
+ }
65
+ void set_key_pos(std::size_t key_pos) {
66
+ range_.set_key_pos(key_pos);
67
+ }
68
+ void set_weight(float weight) {
69
+ weight_ = weight;
70
+ }
71
+
72
+ const Range &range() const {
73
+ return range_;
74
+ }
75
+ std::size_t begin() const {
76
+ return range_.begin();
77
+ }
78
+ std::size_t end() const {
79
+ return range_.end();
80
+ }
81
+ std::size_t key_pos() const {
82
+ return range_.key_pos();
83
+ }
84
+ float weight() const {
85
+ return weight_;
86
+ }
87
+
88
+ private:
89
+ Range range_;
90
+ float weight_;
91
+ };
92
+
93
+ inline bool operator<(const WeightedRange &lhs, const WeightedRange &rhs) {
94
+ return lhs.weight() < rhs.weight();
95
+ }
96
+
97
+ inline bool operator>(const WeightedRange &lhs, const WeightedRange &rhs) {
98
+ return lhs.weight() > rhs.weight();
99
+ }
100
+
101
+ inline WeightedRange make_weighted_range(std::size_t begin, std::size_t end,
102
+ std::size_t key_pos, float weight) {
103
+ WeightedRange range;
104
+ range.set_begin(begin);
105
+ range.set_end(end);
106
+ range.set_key_pos(key_pos);
107
+ range.set_weight(weight);
108
+ return range;
109
+ }
110
+
111
+ } // namespace trie
112
+ } // namespace grimoire
113
+ } // namespace marisa
114
+
115
+ #endif // MARISA_GRIMOIRE_TRIE_RANGE_H_
@@ -0,0 +1,117 @@
1
+ #ifndef MARISA_GRIMOIRE_TRIE_STATE_H_
2
+ #define MARISA_GRIMOIRE_TRIE_STATE_H_
3
+
4
+ #include "marisa/grimoire/vector.h"
5
+ #include "marisa/grimoire/trie/history.h"
6
+
7
+ namespace marisa {
8
+ namespace grimoire {
9
+ namespace trie {
10
+
11
+ // A search agent has its internal state and the status codes are defined
12
+ // below.
13
+ typedef enum StatusCode {
14
+ MARISA_READY_TO_ALL,
15
+ MARISA_READY_TO_COMMON_PREFIX_SEARCH,
16
+ MARISA_READY_TO_PREDICTIVE_SEARCH,
17
+ MARISA_END_OF_COMMON_PREFIX_SEARCH,
18
+ MARISA_END_OF_PREDICTIVE_SEARCH,
19
+ } StatusCode;
20
+
21
+ class State {
22
+ public:
23
+ State()
24
+ : key_buf_(), history_(), node_id_(0), query_pos_(0),
25
+ history_pos_(0), status_code_(MARISA_READY_TO_ALL) {}
26
+
27
+ void set_node_id(std::size_t node_id) {
28
+ MARISA_DEBUG_IF(node_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
29
+ node_id_ = (UInt32)node_id;
30
+ }
31
+ void set_query_pos(std::size_t query_pos) {
32
+ MARISA_DEBUG_IF(query_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
33
+ query_pos_ = (UInt32)query_pos;
34
+ }
35
+ void set_history_pos(std::size_t history_pos) {
36
+ MARISA_DEBUG_IF(history_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
37
+ history_pos_ = (UInt32)history_pos;
38
+ }
39
+ void set_status_code(StatusCode status_code) {
40
+ status_code_ = status_code;
41
+ }
42
+
43
+ std::size_t node_id() const {
44
+ return node_id_;
45
+ }
46
+ std::size_t query_pos() const {
47
+ return query_pos_;
48
+ }
49
+ std::size_t history_pos() const {
50
+ return history_pos_;
51
+ }
52
+ StatusCode status_code() const {
53
+ return status_code_;
54
+ }
55
+
56
+ const Vector<char> &key_buf() const {
57
+ return key_buf_;
58
+ }
59
+ const Vector<History> &history() const {
60
+ return history_;
61
+ }
62
+
63
+ Vector<char> &key_buf() {
64
+ return key_buf_;
65
+ }
66
+ Vector<History> &history() {
67
+ return history_;
68
+ }
69
+
70
+ void reset() {
71
+ status_code_ = MARISA_READY_TO_ALL;
72
+ }
73
+
74
+ void lookup_init() {
75
+ node_id_ = 0;
76
+ query_pos_ = 0;
77
+ status_code_ = MARISA_READY_TO_ALL;
78
+ }
79
+ void reverse_lookup_init() {
80
+ key_buf_.resize(0);
81
+ key_buf_.reserve(32);
82
+ status_code_ = MARISA_READY_TO_ALL;
83
+ }
84
+ void common_prefix_search_init() {
85
+ node_id_ = 0;
86
+ query_pos_ = 0;
87
+ status_code_ = MARISA_READY_TO_COMMON_PREFIX_SEARCH;
88
+ }
89
+ void predictive_search_init() {
90
+ key_buf_.resize(0);
91
+ key_buf_.reserve(64);
92
+ history_.resize(0);
93
+ history_.reserve(4);
94
+ node_id_ = 0;
95
+ query_pos_ = 0;
96
+ history_pos_ = 0;
97
+ status_code_ = MARISA_READY_TO_PREDICTIVE_SEARCH;
98
+ }
99
+
100
+ private:
101
+ Vector<char> key_buf_;
102
+ Vector<History> history_;
103
+ UInt32 node_id_;
104
+ UInt32 query_pos_;
105
+ UInt32 history_pos_;
106
+ StatusCode status_code_;
107
+
108
+ // Disallows copy and assignment.
109
+ State(const State &);
110
+ State &operator=(const State &);
111
+ };
112
+
113
+ } // namespace trie
114
+ } // namespace grimoire
115
+ } // namespace marisa
116
+
117
+ #endif // MARISA_GRIMOIRE_TRIE_STATE_H_
@@ -0,0 +1,218 @@
1
+ #include "marisa/grimoire/algorithm.h"
2
+ #include "marisa/grimoire/trie/state.h"
3
+ #include "marisa/grimoire/trie/tail.h"
4
+
5
+ namespace marisa {
6
+ namespace grimoire {
7
+ namespace trie {
8
+
9
+ Tail::Tail() : buf_(), end_flags_() {}
10
+
11
+ void Tail::build(Vector<Entry> &entries, Vector<UInt32> *offsets,
12
+ TailMode mode) {
13
+ MARISA_THROW_IF(offsets == NULL, MARISA_NULL_ERROR);
14
+
15
+ switch (mode) {
16
+ case MARISA_TEXT_TAIL: {
17
+ for (std::size_t i = 0; i < entries.size(); ++i) {
18
+ const char * const ptr = entries[i].ptr();
19
+ const std::size_t length = entries[i].length();
20
+ for (std::size_t j = 0; j < length; ++j) {
21
+ if (ptr[j] == '\0') {
22
+ mode = MARISA_BINARY_TAIL;
23
+ break;
24
+ }
25
+ }
26
+ if (mode == MARISA_BINARY_TAIL) {
27
+ break;
28
+ }
29
+ }
30
+ break;
31
+ }
32
+ case MARISA_BINARY_TAIL: {
33
+ break;
34
+ }
35
+ default: {
36
+ MARISA_THROW(MARISA_CODE_ERROR, "undefined tail mode");
37
+ }
38
+ }
39
+
40
+ Tail temp;
41
+ temp.build_(entries, offsets, mode);
42
+ swap(temp);
43
+ }
44
+
45
+ void Tail::map(Mapper &mapper) {
46
+ Tail temp;
47
+ temp.map_(mapper);
48
+ swap(temp);
49
+ }
50
+
51
+ void Tail::read(Reader &reader) {
52
+ Tail temp;
53
+ temp.read_(reader);
54
+ swap(temp);
55
+ }
56
+
57
+ void Tail::write(Writer &writer) const {
58
+ write_(writer);
59
+ }
60
+
61
+ void Tail::restore(Agent &agent, std::size_t offset) const {
62
+ MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
63
+
64
+ State &state = agent.state();
65
+ if (end_flags_.empty()) {
66
+ for (const char *ptr = &buf_[offset]; *ptr != '\0'; ++ptr) {
67
+ state.key_buf().push_back(*ptr);
68
+ }
69
+ } else {
70
+ do {
71
+ state.key_buf().push_back(buf_[offset]);
72
+ } while (!end_flags_[offset++]);
73
+ }
74
+ }
75
+
76
+ bool Tail::match(Agent &agent, std::size_t offset) const {
77
+ MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
78
+ MARISA_DEBUG_IF(agent.state().query_pos() >= agent.query().length(),
79
+ MARISA_BOUND_ERROR);
80
+
81
+ State &state = agent.state();
82
+ if (end_flags_.empty()) {
83
+ const char * const ptr = &buf_[offset] - state.query_pos();
84
+ do {
85
+ if (ptr[state.query_pos()] != agent.query()[state.query_pos()]) {
86
+ return false;
87
+ }
88
+ state.set_query_pos(state.query_pos() + 1);
89
+ if (ptr[state.query_pos()] == '\0') {
90
+ return true;
91
+ }
92
+ } while (state.query_pos() < agent.query().length());
93
+ return false;
94
+ } else {
95
+ do {
96
+ if (buf_[offset] != agent.query()[state.query_pos()]) {
97
+ return false;
98
+ }
99
+ state.set_query_pos(state.query_pos() + 1);
100
+ if (end_flags_[offset++]) {
101
+ return true;
102
+ }
103
+ } while (state.query_pos() < agent.query().length());
104
+ return false;
105
+ }
106
+ }
107
+
108
+ bool Tail::prefix_match(Agent &agent, std::size_t offset) const {
109
+ MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
110
+
111
+ State &state = agent.state();
112
+ if (end_flags_.empty()) {
113
+ const char *ptr = &buf_[offset] - state.query_pos();
114
+ do {
115
+ if (ptr[state.query_pos()] != agent.query()[state.query_pos()]) {
116
+ return false;
117
+ }
118
+ state.key_buf().push_back(ptr[state.query_pos()]);
119
+ state.set_query_pos(state.query_pos() + 1);
120
+ if (ptr[state.query_pos()] == '\0') {
121
+ return true;
122
+ }
123
+ } while (state.query_pos() < agent.query().length());
124
+ ptr += state.query_pos();
125
+ do {
126
+ state.key_buf().push_back(*ptr);
127
+ } while (*++ptr != '\0');
128
+ return true;
129
+ } else {
130
+ do {
131
+ if (buf_[offset] != agent.query()[state.query_pos()]) {
132
+ return false;
133
+ }
134
+ state.key_buf().push_back(buf_[offset]);
135
+ state.set_query_pos(state.query_pos() + 1);
136
+ if (end_flags_[offset++]) {
137
+ return true;
138
+ }
139
+ } while (state.query_pos() < agent.query().length());
140
+ do {
141
+ state.key_buf().push_back(buf_[offset]);
142
+ } while (!end_flags_[offset++]);
143
+ return true;
144
+ }
145
+ }
146
+
147
+ void Tail::clear() {
148
+ Tail().swap(*this);
149
+ }
150
+
151
+ void Tail::swap(Tail &rhs) {
152
+ buf_.swap(rhs.buf_);
153
+ end_flags_.swap(rhs.end_flags_);
154
+ }
155
+
156
+ void Tail::build_(Vector<Entry> &entries, Vector<UInt32> *offsets,
157
+ TailMode mode) {
158
+ for (std::size_t i = 0; i < entries.size(); ++i) {
159
+ entries[i].set_id(i);
160
+ }
161
+ Algorithm().sort(entries.begin(), entries.end());
162
+
163
+ Vector<UInt32> temp_offsets;
164
+ temp_offsets.resize(entries.size(), 0);
165
+
166
+ const Entry dummy;
167
+ const Entry *last = &dummy;
168
+ for (std::size_t i = entries.size(); i > 0; --i) {
169
+ const Entry &current = entries[i - 1];
170
+ MARISA_THROW_IF(current.length() == 0, MARISA_RANGE_ERROR);
171
+ std::size_t match = 0;
172
+ while ((match < current.length()) && (match < last->length()) &&
173
+ ((*last)[match] == current[match])) {
174
+ ++match;
175
+ }
176
+ if ((match == current.length()) && (last->length() != 0)) {
177
+ temp_offsets[current.id()] = (UInt32)(
178
+ temp_offsets[last->id()] + (last->length() - match));
179
+ } else {
180
+ temp_offsets[current.id()] = (UInt32)buf_.size();
181
+ for (std::size_t j = 1; j <= current.length(); ++j) {
182
+ buf_.push_back(current[current.length() - j]);
183
+ }
184
+ if (mode == MARISA_TEXT_TAIL) {
185
+ buf_.push_back('\0');
186
+ } else {
187
+ for (std::size_t j = 1; j < current.length(); ++j) {
188
+ end_flags_.push_back(false);
189
+ }
190
+ end_flags_.push_back(true);
191
+ }
192
+ MARISA_THROW_IF(buf_.size() > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
193
+ }
194
+ last = &current;
195
+ }
196
+ buf_.shrink();
197
+
198
+ offsets->swap(temp_offsets);
199
+ }
200
+
201
+ void Tail::map_(Mapper &mapper) {
202
+ buf_.map(mapper);
203
+ end_flags_.map(mapper);
204
+ }
205
+
206
+ void Tail::read_(Reader &reader) {
207
+ buf_.read(reader);
208
+ end_flags_.read(reader);
209
+ }
210
+
211
+ void Tail::write_(Writer &writer) const {
212
+ buf_.write(writer);
213
+ end_flags_.write(writer);
214
+ }
215
+
216
+ } // namespace trie
217
+ } // namespace grimoire
218
+ } // namespace marisa