melisa 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. data/README.md +11 -0
  2. data/ext/marisa/bindings/marisa-swig.cxx +253 -0
  3. data/ext/marisa/bindings/marisa-swig.h +183 -0
  4. data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
  5. data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
  6. data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
  7. data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
  8. data/ext/marisa/bindings/python/marisa-swig.h +183 -0
  9. data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
  10. data/ext/marisa/bindings/ruby/extconf.rb +5 -0
  11. data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
  12. data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
  13. data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
  14. data/ext/marisa/lib/marisa.h +14 -0
  15. data/ext/marisa/lib/marisa/agent.cc +51 -0
  16. data/ext/marisa/lib/marisa/agent.h +73 -0
  17. data/ext/marisa/lib/marisa/base.h +193 -0
  18. data/ext/marisa/lib/marisa/exception.h +82 -0
  19. data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
  20. data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
  21. data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
  22. data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
  23. data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
  24. data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
  25. data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
  26. data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
  27. data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
  28. data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
  29. data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
  30. data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
  31. data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
  32. data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
  33. data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
  34. data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
  35. data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
  36. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
  37. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
  38. data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
  39. data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
  40. data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
  41. data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
  42. data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
  43. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
  44. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
  45. data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
  46. data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
  47. data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
  48. data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
  49. data/ext/marisa/lib/marisa/iostream.h +18 -0
  50. data/ext/marisa/lib/marisa/key.h +85 -0
  51. data/ext/marisa/lib/marisa/keyset.cc +181 -0
  52. data/ext/marisa/lib/marisa/keyset.h +80 -0
  53. data/ext/marisa/lib/marisa/query.h +71 -0
  54. data/ext/marisa/lib/marisa/scoped-array.h +48 -0
  55. data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
  56. data/ext/marisa/lib/marisa/stdio.h +15 -0
  57. data/ext/marisa/lib/marisa/trie.cc +249 -0
  58. data/ext/marisa/lib/marisa/trie.h +64 -0
  59. data/ext/marisa/tests/base-test.cc +309 -0
  60. data/ext/marisa/tests/io-test.cc +252 -0
  61. data/ext/marisa/tests/marisa-assert.h +26 -0
  62. data/ext/marisa/tests/marisa-test.cc +388 -0
  63. data/ext/marisa/tests/trie-test.cc +507 -0
  64. data/ext/marisa/tests/vector-test.cc +466 -0
  65. data/ext/marisa/tools/cmdopt.cc +298 -0
  66. data/ext/marisa/tools/cmdopt.h +58 -0
  67. data/ext/marisa/tools/marisa-benchmark.cc +418 -0
  68. data/ext/marisa/tools/marisa-build.cc +206 -0
  69. data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
  70. data/ext/marisa/tools/marisa-dump.cc +151 -0
  71. data/ext/marisa/tools/marisa-lookup.cc +110 -0
  72. data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
  73. data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
  74. data/lib/melisa.rb +7 -0
  75. data/lib/melisa/base_config_flags.rb +76 -0
  76. data/lib/melisa/bytes_trie.rb +55 -0
  77. data/lib/melisa/int_trie.rb +14 -0
  78. data/lib/melisa/search.rb +55 -0
  79. data/lib/melisa/trie.rb +96 -0
  80. data/lib/melisa/version.rb +3 -0
  81. data/melisa.gemspec +36 -0
  82. data/spec/base_config_flags_spec.rb +73 -0
  83. data/spec/bytes_trie_spec.rb +16 -0
  84. data/spec/int_trie_spec.rb +16 -0
  85. data/spec/search_spec.rb +29 -0
  86. data/spec/spec_helper.rb +1 -0
  87. data/spec/trie_spec.rb +30 -0
  88. metadata +207 -0
@@ -0,0 +1,134 @@
1
+ #ifndef MARISA_GRIMOIRE_TRIE_LOUDS_TRIE_H_
2
+ #define MARISA_GRIMOIRE_TRIE_LOUDS_TRIE_H_
3
+
4
+ #include "marisa/keyset.h"
5
+ #include "marisa/agent.h"
6
+ #include "marisa/grimoire/vector.h"
7
+ #include "marisa/grimoire/trie/config.h"
8
+ #include "marisa/grimoire/trie/key.h"
9
+ #include "marisa/grimoire/trie/tail.h"
10
+ #include "marisa/grimoire/trie/cache.h"
11
+
12
+ namespace marisa {
13
+ namespace grimoire {
14
+ namespace trie {
15
+
16
+ class LoudsTrie {
17
+ public:
18
+ LoudsTrie();
19
+ ~LoudsTrie();
20
+
21
+ void build(Keyset &keyset, int flags);
22
+
23
+ void map(Mapper &mapper);
24
+ void read(Reader &reader);
25
+ void write(Writer &writer) const;
26
+
27
+ bool lookup(Agent &agent) const;
28
+ void reverse_lookup(Agent &agent) const;
29
+ bool common_prefix_search(Agent &agent) const;
30
+ bool predictive_search(Agent &agent) const;
31
+
32
+ std::size_t num_tries() const {
33
+ return config_.num_tries();
34
+ }
35
+ std::size_t num_keys() const {
36
+ return size();
37
+ }
38
+ std::size_t num_nodes() const {
39
+ return (louds_.size() / 2) - 1;
40
+ }
41
+
42
+ CacheLevel cache_level() const {
43
+ return config_.cache_level();
44
+ }
45
+ TailMode tail_mode() const {
46
+ return config_.tail_mode();
47
+ }
48
+ NodeOrder node_order() const {
49
+ return config_.node_order();
50
+ }
51
+
52
+ bool empty() const {
53
+ return size() == 0;
54
+ }
55
+ std::size_t size() const {
56
+ return terminal_flags_.num_1s();
57
+ }
58
+ std::size_t total_size() const;
59
+ std::size_t io_size() const;
60
+
61
+ void clear();
62
+ void swap(LoudsTrie &rhs);
63
+
64
+ private:
65
+ BitVector louds_;
66
+ BitVector terminal_flags_;
67
+ BitVector link_flags_;
68
+ Vector<UInt8> bases_;
69
+ FlatVector extras_;
70
+ Tail tail_;
71
+ scoped_ptr<LoudsTrie> next_trie_;
72
+ Vector<Cache> cache_;
73
+ std::size_t cache_mask_;
74
+ std::size_t num_l1_nodes_;
75
+ Config config_;
76
+ Mapper mapper_;
77
+
78
+ void build_(Keyset &keyset, const Config &config);
79
+
80
+ template <typename T>
81
+ void build_trie(Vector<T> &keys,
82
+ Vector<UInt32> *terminals, const Config &config, std::size_t trie_id);
83
+ template <typename T>
84
+ void build_current_trie(Vector<T> &keys,
85
+ Vector<UInt32> *terminals, const Config &config, std::size_t trie_id);
86
+ template <typename T>
87
+ void build_next_trie(Vector<T> &keys,
88
+ Vector<UInt32> *terminals, const Config &config, std::size_t trie_id);
89
+ template <typename T>
90
+ void build_terminals(const Vector<T> &keys,
91
+ Vector<UInt32> *terminals) const;
92
+
93
+ void reserve_cache(const Config &config, std::size_t trie_id,
94
+ std::size_t num_keys);
95
+ template <typename T>
96
+ void cache(std::size_t parent, std::size_t child,
97
+ float weight, char label);
98
+ void fill_cache();
99
+
100
+ void map_(Mapper &mapper);
101
+ void read_(Reader &reader);
102
+ void write_(Writer &writer) const;
103
+
104
+ inline bool find_child(Agent &agent) const;
105
+ inline bool predictive_find_child(Agent &agent) const;
106
+
107
+ inline void restore(Agent &agent, std::size_t node_id) const;
108
+ inline bool match(Agent &agent, std::size_t node_id) const;
109
+ inline bool prefix_match(Agent &agent, std::size_t node_id) const;
110
+
111
+ void restore_(Agent &agent, std::size_t node_id) const;
112
+ bool match_(Agent &agent, std::size_t node_id) const;
113
+ bool prefix_match_(Agent &agent, std::size_t node_id) const;
114
+
115
+ inline std::size_t get_cache_id(std::size_t node_id, char label) const;
116
+ inline std::size_t get_cache_id(std::size_t node_id) const;
117
+
118
+ inline std::size_t get_link(std::size_t node_id) const;
119
+ inline std::size_t get_link(std::size_t node_id,
120
+ std::size_t link_id) const;
121
+
122
+ inline std::size_t update_link_id(std::size_t link_id,
123
+ std::size_t node_id) const;
124
+
125
+ // Disallows copy and assignment.
126
+ LoudsTrie(const LoudsTrie &);
127
+ LoudsTrie &operator=(const LoudsTrie &);
128
+ };
129
+
130
+ } // namespace trie
131
+ } // namespace grimoire
132
+ } // namespace marisa
133
+
134
+ #endif // MARISA_GRIMOIRE_TRIE_LOUDS_TRIE_H_
@@ -0,0 +1,115 @@
1
+ #ifndef MARISA_GRIMOIRE_TRIE_RANGE_H_
2
+ #define MARISA_GRIMOIRE_TRIE_RANGE_H_
3
+
4
+ #include "marisa/base.h"
5
+
6
+ namespace marisa {
7
+ namespace grimoire {
8
+ namespace trie {
9
+
10
+ class Range {
11
+ public:
12
+ Range() : begin_(0), end_(0), key_pos_(0) {}
13
+
14
+ void set_begin(std::size_t begin) {
15
+ MARISA_DEBUG_IF(begin > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
16
+ begin_ = begin;
17
+ }
18
+ void set_end(std::size_t end) {
19
+ MARISA_DEBUG_IF(end > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
20
+ end_ = end;
21
+ }
22
+ void set_key_pos(std::size_t key_pos) {
23
+ MARISA_DEBUG_IF(key_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
24
+ key_pos_ = key_pos;
25
+ }
26
+
27
+ std::size_t begin() const {
28
+ return begin_;
29
+ }
30
+ std::size_t end() const {
31
+ return end_;
32
+ }
33
+ std::size_t key_pos() const {
34
+ return key_pos_;
35
+ }
36
+
37
+ private:
38
+ UInt32 begin_;
39
+ UInt32 end_;
40
+ UInt32 key_pos_;
41
+ };
42
+
43
+ inline Range make_range(std::size_t begin, std::size_t end,
44
+ std::size_t key_pos) {
45
+ Range range;
46
+ range.set_begin(begin);
47
+ range.set_end(end);
48
+ range.set_key_pos(key_pos);
49
+ return range;
50
+ }
51
+
52
+ class WeightedRange {
53
+ public:
54
+ WeightedRange() : range_(), weight_(0.0F) {}
55
+
56
+ void set_range(const Range &range) {
57
+ range_ = range;
58
+ }
59
+ void set_begin(std::size_t begin) {
60
+ range_.set_begin(begin);
61
+ }
62
+ void set_end(std::size_t end) {
63
+ range_.set_end(end);
64
+ }
65
+ void set_key_pos(std::size_t key_pos) {
66
+ range_.set_key_pos(key_pos);
67
+ }
68
+ void set_weight(float weight) {
69
+ weight_ = weight;
70
+ }
71
+
72
+ const Range &range() const {
73
+ return range_;
74
+ }
75
+ std::size_t begin() const {
76
+ return range_.begin();
77
+ }
78
+ std::size_t end() const {
79
+ return range_.end();
80
+ }
81
+ std::size_t key_pos() const {
82
+ return range_.key_pos();
83
+ }
84
+ float weight() const {
85
+ return weight_;
86
+ }
87
+
88
+ private:
89
+ Range range_;
90
+ float weight_;
91
+ };
92
+
93
+ inline bool operator<(const WeightedRange &lhs, const WeightedRange &rhs) {
94
+ return lhs.weight() < rhs.weight();
95
+ }
96
+
97
+ inline bool operator>(const WeightedRange &lhs, const WeightedRange &rhs) {
98
+ return lhs.weight() > rhs.weight();
99
+ }
100
+
101
+ inline WeightedRange make_weighted_range(std::size_t begin, std::size_t end,
102
+ std::size_t key_pos, float weight) {
103
+ WeightedRange range;
104
+ range.set_begin(begin);
105
+ range.set_end(end);
106
+ range.set_key_pos(key_pos);
107
+ range.set_weight(weight);
108
+ return range;
109
+ }
110
+
111
+ } // namespace trie
112
+ } // namespace grimoire
113
+ } // namespace marisa
114
+
115
+ #endif // MARISA_GRIMOIRE_TRIE_RANGE_H_
@@ -0,0 +1,117 @@
1
+ #ifndef MARISA_GRIMOIRE_TRIE_STATE_H_
2
+ #define MARISA_GRIMOIRE_TRIE_STATE_H_
3
+
4
+ #include "marisa/grimoire/vector.h"
5
+ #include "marisa/grimoire/trie/history.h"
6
+
7
+ namespace marisa {
8
+ namespace grimoire {
9
+ namespace trie {
10
+
11
+ // A search agent has its internal state and the status codes are defined
12
+ // below.
13
+ typedef enum StatusCode {
14
+ MARISA_READY_TO_ALL,
15
+ MARISA_READY_TO_COMMON_PREFIX_SEARCH,
16
+ MARISA_READY_TO_PREDICTIVE_SEARCH,
17
+ MARISA_END_OF_COMMON_PREFIX_SEARCH,
18
+ MARISA_END_OF_PREDICTIVE_SEARCH,
19
+ } StatusCode;
20
+
21
+ class State {
22
+ public:
23
+ State()
24
+ : key_buf_(), history_(), node_id_(0), query_pos_(0),
25
+ history_pos_(0), status_code_(MARISA_READY_TO_ALL) {}
26
+
27
+ void set_node_id(std::size_t node_id) {
28
+ MARISA_DEBUG_IF(node_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
29
+ node_id_ = (UInt32)node_id;
30
+ }
31
+ void set_query_pos(std::size_t query_pos) {
32
+ MARISA_DEBUG_IF(query_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
33
+ query_pos_ = (UInt32)query_pos;
34
+ }
35
+ void set_history_pos(std::size_t history_pos) {
36
+ MARISA_DEBUG_IF(history_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
37
+ history_pos_ = (UInt32)history_pos;
38
+ }
39
+ void set_status_code(StatusCode status_code) {
40
+ status_code_ = status_code;
41
+ }
42
+
43
+ std::size_t node_id() const {
44
+ return node_id_;
45
+ }
46
+ std::size_t query_pos() const {
47
+ return query_pos_;
48
+ }
49
+ std::size_t history_pos() const {
50
+ return history_pos_;
51
+ }
52
+ StatusCode status_code() const {
53
+ return status_code_;
54
+ }
55
+
56
+ const Vector<char> &key_buf() const {
57
+ return key_buf_;
58
+ }
59
+ const Vector<History> &history() const {
60
+ return history_;
61
+ }
62
+
63
+ Vector<char> &key_buf() {
64
+ return key_buf_;
65
+ }
66
+ Vector<History> &history() {
67
+ return history_;
68
+ }
69
+
70
+ void reset() {
71
+ status_code_ = MARISA_READY_TO_ALL;
72
+ }
73
+
74
+ void lookup_init() {
75
+ node_id_ = 0;
76
+ query_pos_ = 0;
77
+ status_code_ = MARISA_READY_TO_ALL;
78
+ }
79
+ void reverse_lookup_init() {
80
+ key_buf_.resize(0);
81
+ key_buf_.reserve(32);
82
+ status_code_ = MARISA_READY_TO_ALL;
83
+ }
84
+ void common_prefix_search_init() {
85
+ node_id_ = 0;
86
+ query_pos_ = 0;
87
+ status_code_ = MARISA_READY_TO_COMMON_PREFIX_SEARCH;
88
+ }
89
+ void predictive_search_init() {
90
+ key_buf_.resize(0);
91
+ key_buf_.reserve(64);
92
+ history_.resize(0);
93
+ history_.reserve(4);
94
+ node_id_ = 0;
95
+ query_pos_ = 0;
96
+ history_pos_ = 0;
97
+ status_code_ = MARISA_READY_TO_PREDICTIVE_SEARCH;
98
+ }
99
+
100
+ private:
101
+ Vector<char> key_buf_;
102
+ Vector<History> history_;
103
+ UInt32 node_id_;
104
+ UInt32 query_pos_;
105
+ UInt32 history_pos_;
106
+ StatusCode status_code_;
107
+
108
+ // Disallows copy and assignment.
109
+ State(const State &);
110
+ State &operator=(const State &);
111
+ };
112
+
113
+ } // namespace trie
114
+ } // namespace grimoire
115
+ } // namespace marisa
116
+
117
+ #endif // MARISA_GRIMOIRE_TRIE_STATE_H_
@@ -0,0 +1,218 @@
1
+ #include "marisa/grimoire/algorithm.h"
2
+ #include "marisa/grimoire/trie/state.h"
3
+ #include "marisa/grimoire/trie/tail.h"
4
+
5
+ namespace marisa {
6
+ namespace grimoire {
7
+ namespace trie {
8
+
9
+ Tail::Tail() : buf_(), end_flags_() {}
10
+
11
+ void Tail::build(Vector<Entry> &entries, Vector<UInt32> *offsets,
12
+ TailMode mode) {
13
+ MARISA_THROW_IF(offsets == NULL, MARISA_NULL_ERROR);
14
+
15
+ switch (mode) {
16
+ case MARISA_TEXT_TAIL: {
17
+ for (std::size_t i = 0; i < entries.size(); ++i) {
18
+ const char * const ptr = entries[i].ptr();
19
+ const std::size_t length = entries[i].length();
20
+ for (std::size_t j = 0; j < length; ++j) {
21
+ if (ptr[j] == '\0') {
22
+ mode = MARISA_BINARY_TAIL;
23
+ break;
24
+ }
25
+ }
26
+ if (mode == MARISA_BINARY_TAIL) {
27
+ break;
28
+ }
29
+ }
30
+ break;
31
+ }
32
+ case MARISA_BINARY_TAIL: {
33
+ break;
34
+ }
35
+ default: {
36
+ MARISA_THROW(MARISA_CODE_ERROR, "undefined tail mode");
37
+ }
38
+ }
39
+
40
+ Tail temp;
41
+ temp.build_(entries, offsets, mode);
42
+ swap(temp);
43
+ }
44
+
45
+ void Tail::map(Mapper &mapper) {
46
+ Tail temp;
47
+ temp.map_(mapper);
48
+ swap(temp);
49
+ }
50
+
51
+ void Tail::read(Reader &reader) {
52
+ Tail temp;
53
+ temp.read_(reader);
54
+ swap(temp);
55
+ }
56
+
57
+ void Tail::write(Writer &writer) const {
58
+ write_(writer);
59
+ }
60
+
61
+ void Tail::restore(Agent &agent, std::size_t offset) const {
62
+ MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
63
+
64
+ State &state = agent.state();
65
+ if (end_flags_.empty()) {
66
+ for (const char *ptr = &buf_[offset]; *ptr != '\0'; ++ptr) {
67
+ state.key_buf().push_back(*ptr);
68
+ }
69
+ } else {
70
+ do {
71
+ state.key_buf().push_back(buf_[offset]);
72
+ } while (!end_flags_[offset++]);
73
+ }
74
+ }
75
+
76
+ bool Tail::match(Agent &agent, std::size_t offset) const {
77
+ MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
78
+ MARISA_DEBUG_IF(agent.state().query_pos() >= agent.query().length(),
79
+ MARISA_BOUND_ERROR);
80
+
81
+ State &state = agent.state();
82
+ if (end_flags_.empty()) {
83
+ const char * const ptr = &buf_[offset] - state.query_pos();
84
+ do {
85
+ if (ptr[state.query_pos()] != agent.query()[state.query_pos()]) {
86
+ return false;
87
+ }
88
+ state.set_query_pos(state.query_pos() + 1);
89
+ if (ptr[state.query_pos()] == '\0') {
90
+ return true;
91
+ }
92
+ } while (state.query_pos() < agent.query().length());
93
+ return false;
94
+ } else {
95
+ do {
96
+ if (buf_[offset] != agent.query()[state.query_pos()]) {
97
+ return false;
98
+ }
99
+ state.set_query_pos(state.query_pos() + 1);
100
+ if (end_flags_[offset++]) {
101
+ return true;
102
+ }
103
+ } while (state.query_pos() < agent.query().length());
104
+ return false;
105
+ }
106
+ }
107
+
108
+ bool Tail::prefix_match(Agent &agent, std::size_t offset) const {
109
+ MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR);
110
+
111
+ State &state = agent.state();
112
+ if (end_flags_.empty()) {
113
+ const char *ptr = &buf_[offset] - state.query_pos();
114
+ do {
115
+ if (ptr[state.query_pos()] != agent.query()[state.query_pos()]) {
116
+ return false;
117
+ }
118
+ state.key_buf().push_back(ptr[state.query_pos()]);
119
+ state.set_query_pos(state.query_pos() + 1);
120
+ if (ptr[state.query_pos()] == '\0') {
121
+ return true;
122
+ }
123
+ } while (state.query_pos() < agent.query().length());
124
+ ptr += state.query_pos();
125
+ do {
126
+ state.key_buf().push_back(*ptr);
127
+ } while (*++ptr != '\0');
128
+ return true;
129
+ } else {
130
+ do {
131
+ if (buf_[offset] != agent.query()[state.query_pos()]) {
132
+ return false;
133
+ }
134
+ state.key_buf().push_back(buf_[offset]);
135
+ state.set_query_pos(state.query_pos() + 1);
136
+ if (end_flags_[offset++]) {
137
+ return true;
138
+ }
139
+ } while (state.query_pos() < agent.query().length());
140
+ do {
141
+ state.key_buf().push_back(buf_[offset]);
142
+ } while (!end_flags_[offset++]);
143
+ return true;
144
+ }
145
+ }
146
+
147
+ void Tail::clear() {
148
+ Tail().swap(*this);
149
+ }
150
+
151
+ void Tail::swap(Tail &rhs) {
152
+ buf_.swap(rhs.buf_);
153
+ end_flags_.swap(rhs.end_flags_);
154
+ }
155
+
156
+ void Tail::build_(Vector<Entry> &entries, Vector<UInt32> *offsets,
157
+ TailMode mode) {
158
+ for (std::size_t i = 0; i < entries.size(); ++i) {
159
+ entries[i].set_id(i);
160
+ }
161
+ Algorithm().sort(entries.begin(), entries.end());
162
+
163
+ Vector<UInt32> temp_offsets;
164
+ temp_offsets.resize(entries.size(), 0);
165
+
166
+ const Entry dummy;
167
+ const Entry *last = &dummy;
168
+ for (std::size_t i = entries.size(); i > 0; --i) {
169
+ const Entry &current = entries[i - 1];
170
+ MARISA_THROW_IF(current.length() == 0, MARISA_RANGE_ERROR);
171
+ std::size_t match = 0;
172
+ while ((match < current.length()) && (match < last->length()) &&
173
+ ((*last)[match] == current[match])) {
174
+ ++match;
175
+ }
176
+ if ((match == current.length()) && (last->length() != 0)) {
177
+ temp_offsets[current.id()] = (UInt32)(
178
+ temp_offsets[last->id()] + (last->length() - match));
179
+ } else {
180
+ temp_offsets[current.id()] = (UInt32)buf_.size();
181
+ for (std::size_t j = 1; j <= current.length(); ++j) {
182
+ buf_.push_back(current[current.length() - j]);
183
+ }
184
+ if (mode == MARISA_TEXT_TAIL) {
185
+ buf_.push_back('\0');
186
+ } else {
187
+ for (std::size_t j = 1; j < current.length(); ++j) {
188
+ end_flags_.push_back(false);
189
+ }
190
+ end_flags_.push_back(true);
191
+ }
192
+ MARISA_THROW_IF(buf_.size() > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
193
+ }
194
+ last = &current;
195
+ }
196
+ buf_.shrink();
197
+
198
+ offsets->swap(temp_offsets);
199
+ }
200
+
201
+ void Tail::map_(Mapper &mapper) {
202
+ buf_.map(mapper);
203
+ end_flags_.map(mapper);
204
+ }
205
+
206
+ void Tail::read_(Reader &reader) {
207
+ buf_.read(reader);
208
+ end_flags_.read(reader);
209
+ }
210
+
211
+ void Tail::write_(Writer &writer) const {
212
+ buf_.write(writer);
213
+ end_flags_.write(writer);
214
+ }
215
+
216
+ } // namespace trie
217
+ } // namespace grimoire
218
+ } // namespace marisa