tx 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,93 @@
1
+ #ifndef __SSV_HPP__
2
+ #define __SSV_HPP__
3
+
4
+ #include <memory.h>
5
+ #include <vector>
6
+ #include <cassert>
7
+ #include <stdio.h>
8
+
9
+ namespace tx_tool{
10
+
11
+ typedef unsigned int uint; // 32bit
12
+ typedef unsigned short ushort; // 16bit
13
+ typedef unsigned char uchar; // 8bit
14
+
15
+
16
+ #define SSV_BLOCK_SHIFT (5)
17
+ #define SSV_BLOCK (1U << SSV_BLOCK_SHIFT)
18
+
19
+ #define SSV_LBLOCK_SHIFT (8)
20
+ #define SSV_LBLOCK (1U << SSV_LBLOCK_SHIFT)
21
+ #define SSV_MBLOCK_SHIFT (5)
22
+ #define SSV_MBLOCK (1U << SSV_MBLOCK_SHIFT)
23
+
24
+ #define logLL (16)
25
+ #define LL (1U << logLL)
26
+ #define logLLL (5)
27
+ #define LLL (1U << logLLL)
28
+ #define logL (logLL-1-5)
29
+ #define L (1U << logL)
30
+
31
+ class ssv{
32
+
33
+ public:
34
+ ssv(const uint _size = 0);
35
+ ssv(std::vector<bool>& bv);
36
+
37
+ int resize(const uint _size);
38
+
39
+ void free();
40
+
41
+ ~ssv();
42
+
43
+ inline uint getBit(const uint pos) const {
44
+ return (B[pos / SSV_BLOCK] >> (pos % SSV_BLOCK)) & 1;
45
+ }
46
+
47
+ uint getBits(const uint pos, uint width) const;
48
+ void setBits(const uint pos, const uint width, const uint x);
49
+ uint rank(uint pos, const uint bit) const;
50
+ uint select(uint pos, const uint bit) const;
51
+
52
+ void setBit(uint pos, uint x);
53
+ uint getAllocate() const;
54
+
55
+ void build();
56
+
57
+ uint rankBuild(const uint t_size); // return oneNum
58
+ void selectBuild(const uint t_size);
59
+
60
+ int write(FILE* outfp);
61
+ int read(FILE* infp);
62
+
63
+ uint getBlock(const uint blockPos) const;
64
+ void setBlock(const uint blockPos, const uint x);
65
+ uint getSize() const;
66
+ uint getBlockSize() const;
67
+
68
+ size_t set_array(void* ptr);
69
+
70
+ private:
71
+ uint popCount(uint r) const;
72
+ uint _rank1(const uint pos) const;
73
+ uint _select1(uint x) const;
74
+ uint _select0(uint x) const;
75
+ uint* B;
76
+ uint size;
77
+ uint oneNum;
78
+ uint blockSize; // (size+SSV_BLOCK-1)/SSV_BLOCKSZIE
79
+
80
+ uint LBlockSize;
81
+ uint MBlockSize;
82
+
83
+ // for rank
84
+ uint* levelL;
85
+ uchar* levelM;
86
+
87
+ bool isBuild;
88
+ bool no_delete;
89
+ };
90
+
91
+ }
92
+
93
+ #endif
@@ -0,0 +1,192 @@
1
+ Index: Lib/ruby/rubycontainer.swg
2
+ ===================================================================
3
+ --- Lib/ruby/rubycontainer.swg (revision 11423)
4
+ +++ Lib/ruby/rubycontainer.swg (working copy)
5
+ @@ -446,11 +446,10 @@
6
+ %typemap(out,noblock=1,fragment="RubySequence_Cont")
7
+ std::pair<const_iterator, const_iterator> {
8
+ $result = rb_ary_new2(2);
9
+ - RARRAY_PTR($result)[0] = SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).first),
10
+ - swig::ConstIterator::descriptor(),SWIG_POINTER_OWN);
11
+ - RARRAY_PTR($result)[1] = SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).second),
12
+ - swig::ConstIterator::descriptor(),SWIG_POINTER_OWN);
13
+ - RARRAY_LEN($result) = 2;
14
+ + rb_ary_push($result, SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).first),
15
+ + swig::ConstIterator::descriptor(),SWIG_POINTER_OWN));
16
+ + rb_ary_push($result, SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).second),
17
+ + swig::ConstIterator::descriptor(),SWIG_POINTER_OWN));
18
+ }
19
+
20
+ // std::map/multimap/set allow returning std::pair< iterator, iterator > from
21
+ @@ -459,11 +458,10 @@
22
+ %typemap(out,noblock=1,fragment="RubySequence_Cont")
23
+ std::pair<iterator, iterator> {
24
+ $result = rb_ary_new2(2);
25
+ - RARRAY_PTR($result)[0] = SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).first),
26
+ - swig::ConstIterator::descriptor(),SWIG_POINTER_OWN);
27
+ - RARRAY_PTR($result)[1] = SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).second),
28
+ - swig::ConstIterator::descriptor(),SWIG_POINTER_OWN);
29
+ - RARRAY_LEN($result) = 2;
30
+ + rb_ary_push($result, SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).first),
31
+ + swig::ConstIterator::descriptor(),SWIG_POINTER_OWN));
32
+ + rb_ary_push($result, SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).second),
33
+ + swig::ConstIterator::descriptor(),SWIG_POINTER_OWN));
34
+ }
35
+
36
+
37
+ @@ -566,7 +564,8 @@
38
+ {
39
+ Sequence::const_iterator i = $self->begin();
40
+ Sequence::const_iterator e = $self->end();
41
+ - VALUE str = rb_str_new2( swig::type_name< Sequence >() );
42
+ + // Double parentheses to avoid macro expansion error in case Sequence contains comma.
43
+ + VALUE str = rb_str_new2(( swig::type_name< Sequence >() ));
44
+ str = rb_str_cat2( str, " [" );
45
+ bool comma = false;
46
+ VALUE tmp;
47
+ @@ -1112,9 +1111,8 @@
48
+ int i = 0;
49
+ for (const_iterator it = seq.begin();
50
+ it != seq.end(); ++it, ++i) {
51
+ - RARRAY_PTR(obj)[i] = swig::from< value_type >(*it);
52
+ + rb_ary_push(obj, swig::from< value_type >(*it));
53
+ }
54
+ - RARRAY_LEN(obj) = size;
55
+ rb_obj_freeze(obj); // treat as immutable result
56
+ return obj;
57
+ } else {
58
+ Index: Lib/ruby/std_set.i
59
+ ===================================================================
60
+ --- Lib/ruby/std_set.i (revision 11423)
61
+ +++ Lib/ruby/std_set.i (working copy)
62
+ @@ -170,10 +170,9 @@
63
+ %typemap(out,noblock=1,fragment="RubyPairBoolOutputIterator")
64
+ std::pair<iterator, bool> {
65
+ $result = rb_ary_new2(2);
66
+ - RARRAY_PTR($result)[0] = SWIG_NewPointerObj(swig::make_set_nonconst_iterator(%static_cast($1,$type &).first),
67
+ - swig::Iterator::descriptor(),SWIG_POINTER_OWN);
68
+ - RARRAY_PTR($result)[1] = SWIG_From(bool)(%static_cast($1,const $type &).second);
69
+ - RARRAY_LEN($result) = 2;
70
+ + rb_ary_push($result, SWIG_NewPointerObj(swig::make_set_nonconst_iterator(%static_cast($1,$type &).first),
71
+ + swig::Iterator::descriptor(),SWIG_POINTER_OWN));
72
+ + rb_ary_push($result, SWIG_From(bool)(%static_cast($1,const $type &).second));
73
+ }
74
+
75
+ %extend {
76
+ Index: Lib/ruby/std_multimap.i
77
+ ===================================================================
78
+ --- Lib/ruby/std_multimap.i (revision 11423)
79
+ +++ Lib/ruby/std_multimap.i (working copy)
80
+ @@ -115,7 +115,8 @@
81
+ {
82
+ MultiMap::iterator i = $self->begin();
83
+ MultiMap::iterator e = $self->end();
84
+ - VALUE str = rb_str_new2( swig::type_name< MultiMap >() );
85
+ + // Double parentheses to avoid macro expansion error in case MultiMap contains comma.
86
+ + VALUE str = rb_str_new2(( swig::type_name< MultiMap >() ));
87
+ str = rb_str_cat2( str, " {" );
88
+ VALUE tmp;
89
+ while ( i != e )
90
+ Index: Lib/ruby/std_pair.i
91
+ ===================================================================
92
+ --- Lib/ruby/std_pair.i (revision 11423)
93
+ +++ Lib/ruby/std_pair.i (working copy)
94
+ @@ -118,11 +118,9 @@
95
+
96
+ static VALUE from(const std::pair<T,U>& val) {
97
+ VALUE obj = rb_ary_new2(2);
98
+ - RARRAY_PTR(obj)[0] = swig::from<
99
+ - typename swig::noconst_traits<T >::noconst_type>(val.first);
100
+ - RARRAY_PTR(obj)[1] = swig::from(val.second);
101
+ - RARRAY_LEN(obj) = 2;
102
+ - rb_define_singleton_method(obj, "second",
103
+ + rb_ary_push(obj, swig::from<typename swig::noconst_traits<T >::noconst_type>(val.first));
104
+ + rb_ary_push(obj, swig::from(val.second));
105
+ + rb_define_singleton_method(obj, "second",
106
+ VALUEFUNC(_wrap_pair_second), 0 );
107
+ rb_define_singleton_method(obj, "second=",
108
+ VALUEFUNC(_wrap_pair_second_eq), 1 );
109
+ @@ -148,7 +146,8 @@
110
+ VALUE inspect() const
111
+ {
112
+ VALUE tmp;
113
+ - VALUE str = rb_str_new2( swig::type_name< pair >() );
114
+ + // Double parentheses to avoid macro expansion error in case pair contains comma.
115
+ + VALUE str = rb_str_new2(( swig::type_name< pair >() ));
116
+ str = rb_str_cat2( str, " (" );
117
+ tmp = swig::from( $self->first );
118
+ tmp = rb_obj_as_string( tmp );
119
+ Index: Lib/ruby/rubyclasses.swg
120
+ ===================================================================
121
+ --- Lib/ruby/rubyclasses.swg (revision 11423)
122
+ +++ Lib/ruby/rubyclasses.swg (working copy)
123
+ @@ -315,30 +315,36 @@
124
+
125
+ };
126
+
127
+ - ID GC_VALUE::hash_id = rb_intern("hash");
128
+ - ID GC_VALUE::lt_id = rb_intern("<");
129
+ - ID GC_VALUE::gt_id = rb_intern(">");
130
+ - ID GC_VALUE::eq_id = rb_intern("==");
131
+ - ID GC_VALUE::le_id = rb_intern("<=");
132
+ - ID GC_VALUE::ge_id = rb_intern(">=");
133
+ + // We need this because rb_intern macro uses statement-expression and
134
+ + // statement-expression is allowed only inside functions.
135
+ + static ID rb_intern_wrapper(const char* str) {
136
+ + return rb_intern(str);
137
+ + }
138
+
139
+ - ID GC_VALUE::pos_id = rb_intern("+@");
140
+ - ID GC_VALUE::neg_id = rb_intern("-@");
141
+ - ID GC_VALUE::inv_id = rb_intern("~");
142
+ + ID GC_VALUE::hash_id = rb_intern_wrapper("hash");
143
+ + ID GC_VALUE::lt_id = rb_intern_wrapper("<");
144
+ + ID GC_VALUE::gt_id = rb_intern_wrapper(">");
145
+ + ID GC_VALUE::eq_id = rb_intern_wrapper("==");
146
+ + ID GC_VALUE::le_id = rb_intern_wrapper("<=");
147
+ + ID GC_VALUE::ge_id = rb_intern_wrapper(">=");
148
+
149
+ - ID GC_VALUE::add_id = rb_intern("+");
150
+ - ID GC_VALUE::sub_id = rb_intern("-");
151
+ - ID GC_VALUE::mul_id = rb_intern("*");
152
+ - ID GC_VALUE::div_id = rb_intern("/");
153
+ - ID GC_VALUE::mod_id = rb_intern("%");
154
+ + ID GC_VALUE::pos_id = rb_intern_wrapper("+@");
155
+ + ID GC_VALUE::neg_id = rb_intern_wrapper("-@");
156
+ + ID GC_VALUE::inv_id = rb_intern_wrapper("~");
157
+
158
+ - ID GC_VALUE::and_id = rb_intern("&");
159
+ - ID GC_VALUE::or_id = rb_intern("|");
160
+ - ID GC_VALUE::xor_id = rb_intern("^");
161
+ + ID GC_VALUE::add_id = rb_intern_wrapper("+");
162
+ + ID GC_VALUE::sub_id = rb_intern_wrapper("-");
163
+ + ID GC_VALUE::mul_id = rb_intern_wrapper("*");
164
+ + ID GC_VALUE::div_id = rb_intern_wrapper("/");
165
+ + ID GC_VALUE::mod_id = rb_intern_wrapper("%");
166
+
167
+ - ID GC_VALUE::lshift_id = rb_intern("<<");
168
+ - ID GC_VALUE::rshift_id = rb_intern(">>");
169
+ + ID GC_VALUE::and_id = rb_intern_wrapper("&");
170
+ + ID GC_VALUE::or_id = rb_intern_wrapper("|");
171
+ + ID GC_VALUE::xor_id = rb_intern_wrapper("^");
172
+
173
+ + ID GC_VALUE::lshift_id = rb_intern_wrapper("<<");
174
+ + ID GC_VALUE::rshift_id = rb_intern_wrapper(">>");
175
+ +
176
+ VALUE GC_VALUE::_hash = Qnil;
177
+
178
+ typedef GC_VALUE LANGUAGE_OBJ;
179
+ Index: Lib/ruby/std_map.i
180
+ ===================================================================
181
+ --- Lib/ruby/std_map.i (revision 11423)
182
+ +++ Lib/ruby/std_map.i (working copy)
183
+ @@ -345,7 +345,8 @@
184
+ {
185
+ Map::const_iterator i = $self->begin();
186
+ Map::const_iterator e = $self->end();
187
+ - VALUE str = rb_str_new2( swig::type_name< Map >() );
188
+ + // Double parentheses to avoid macro expansion error in case Map contains comma.
189
+ + VALUE str = rb_str_new2(( swig::type_name< Map >() ));
190
+ str = rb_str_cat2( str, " {" );
191
+ bool comma = false;
192
+ VALUE tmp;
@@ -0,0 +1,442 @@
1
+ #include "tx.hpp"
2
+
3
+ #include <climits>
4
+ #include "ssv.hpp"
5
+ namespace tx_tool{
6
+
7
+ uint tx::NOTFOUND = UINT_MAX;
8
+
9
+ struct queue_elem{
10
+ queue_elem(size_t _left, size_t _right, int _depth) :left(_left),right(_right),depth(_depth){}
11
+ size_t left;
12
+ size_t right;
13
+ int depth;
14
+ };
15
+
16
+ tx::tx():edge(NULL), keyNum(0), no_delete(false) {}
17
+
18
+ tx::~tx(){
19
+ if (!no_delete) {
20
+ delete[] edge;
21
+ edge = NULL;
22
+ no_delete = false;
23
+ }
24
+ }
25
+
26
+ int tx::build(std::vector<std::string>& wordList, const char* fileName) {
27
+ sort(wordList.begin(),wordList.end());
28
+ const size_t origWordNum = wordList.size();
29
+ wordList.erase(unique(wordList.begin(),wordList.end()),wordList.end());
30
+ int keyNum = (int)wordList.size();
31
+ if (keyNum != origWordNum){
32
+ resultLog << "shrink word list " << origWordNum << " -> " << keyNum << std::endl;
33
+ } else {
34
+ resultLog << "word list " << keyNum << " elements" << std::endl;
35
+ }
36
+
37
+ uint totalSize = 0;
38
+ for (size_t i = 0; i < wordList.size(); i++){
39
+ totalSize += (uint)wordList[i].size();
40
+ }
41
+
42
+ FILE* outfp = fopen(fileName,"wb");
43
+ if (outfp == NULL){
44
+ errorLog << "cannot open " << fileName << std::endl;
45
+ return -1;
46
+ }
47
+
48
+ std::queue<queue_elem> q;
49
+ if (keyNum != 0){
50
+ q.push(queue_elem(0,keyNum,0));
51
+ }
52
+ if (fwrite(&keyNum,sizeof(int),1,outfp) != 1){
53
+ errorLog << "fwrite error " << std::endl;
54
+ return -1;
55
+ }
56
+
57
+ std::vector<bool> vb_loud;
58
+ std::vector<bool> vb_terminal;
59
+
60
+ vb_loud.push_back(0); // super root
61
+ vb_loud.push_back(1);
62
+
63
+ uint nodeNum = 0;
64
+
65
+ while (!q.empty()){
66
+ queue_elem& elem = q.front();
67
+ const int depth = elem.depth;
68
+ const size_t left = elem.left;
69
+ const size_t right = elem.right;
70
+ q.pop();
71
+
72
+ nodeNum++;
73
+ size_t newLeft = left;
74
+ if (wordList[left].size() == depth){
75
+ vb_terminal.push_back(1); // this node has terminate
76
+ newLeft++;
77
+ if (newLeft == right){
78
+ vb_loud.push_back(1);
79
+ continue;
80
+ }
81
+ } else {
82
+ vb_terminal.push_back(0);
83
+ }
84
+ size_t prev = newLeft;
85
+ char prev_c = wordList[prev][depth];
86
+ for (size_t i = newLeft+1; i < right; i++){
87
+ if (prev_c != wordList[i][depth]){
88
+ fputc(prev_c,outfp);
89
+ vb_loud.push_back(0);
90
+ q.push(queue_elem(prev,i,depth+1));
91
+ prev = i;
92
+ prev_c = wordList[prev][depth];
93
+ }
94
+ }
95
+ if (prev != right){
96
+ fputc(prev_c,outfp);
97
+ vb_loud.push_back(0);
98
+ q.push(queue_elem(prev,right,depth+1));
99
+ }
100
+ vb_loud.push_back(1);
101
+ }
102
+
103
+ {
104
+ ssv sv(vb_loud);
105
+ sv.build();
106
+ if (sv.write(outfp) == -1){
107
+ errorLog << "fwrite error " << std::endl;
108
+ return -1;
109
+ }
110
+ }
111
+ {
112
+ ssv sv(vb_terminal);
113
+ sv.build();
114
+ if (sv.write(outfp) == -1){
115
+ errorLog << "fwrite error " << std::endl;
116
+ return -1;
117
+ }
118
+ }
119
+
120
+ if (fwrite(&nodeNum,sizeof(int),1,outfp) != 1){
121
+ errorLog << "fwrite error " << std::endl;
122
+ return -1;
123
+ }
124
+
125
+ size_t outfpSize = ftell(outfp);
126
+ resultLog << "outputSize:" << outfpSize << " inputSize:" << totalSize << " ratio:" << (float)outfpSize/totalSize << std::endl;
127
+ if (outfp) fclose(outfp);
128
+ return 0;
129
+ }
130
+
131
+ int tx::read(const char* fileName){
132
+ FILE* infp = fopen(fileName,"rb");
133
+ if (infp == NULL){
134
+ errorLog << "cannot open " << fileName << std::endl;
135
+ return -1;
136
+ }
137
+
138
+ keyNum = 0;
139
+ if (fread(&keyNum,sizeof(int),1,infp) != 1){
140
+ errorLog << "keyNum read error" << std::endl;
141
+ fclose(infp);
142
+ return -1;
143
+ }
144
+
145
+ fseek(infp,0,SEEK_END);
146
+ size_t fileSize = ftell(infp);
147
+ if (fseek(infp,fileSize-(1*sizeof(int)),SEEK_SET) == -1){
148
+ errorLog << "fseek error" << std::endl;
149
+ fclose(infp);
150
+ return -1;
151
+ }
152
+
153
+ int nodeNum = -1;
154
+ if (fread(&nodeNum,sizeof(int),1,infp) != 1){
155
+ errorLog << "nodeNum read error" << std::endl;
156
+ fclose(infp);
157
+ return -1;
158
+ }
159
+ resultLog << "keyNum:" << (int)keyNum << " nodeNum:" << nodeNum << std::endl;
160
+
161
+ if (fseek(infp,sizeof(int)*1,SEEK_SET) == -1){
162
+ errorLog << "fseek error" << std::endl;
163
+ fclose(infp);
164
+ return -1;
165
+ }
166
+
167
+ if (nodeNum > 0){
168
+ edge = new char [nodeNum-1];
169
+ if (fread(edge,sizeof(char),nodeNum-1,infp) != nodeNum-1){
170
+ errorLog << "fseek error" << std::endl;
171
+ fclose(infp);
172
+ return -1;
173
+ }
174
+ }
175
+
176
+ loud.read(infp);
177
+ terminal.read(infp);
178
+
179
+ if (infp) fclose(infp);
180
+ return 0;
181
+ }
182
+
183
+ int tx::setArray(void* ptr, size_t readSize){
184
+ keyNum = *(uint*)(ptr);
185
+ printf("keyNum:%d\n", keyNum);
186
+ int nodeNum = *(uint*)((uchar*)ptr+readSize-sizeof(uint));
187
+ printf("nodeNum:%d\n", nodeNum);
188
+ edge = (char*)ptr + sizeof(uint);
189
+ size_t readNum = loud.set_array((void*)((uchar*)ptr + sizeof(uint) + nodeNum - 1));
190
+ size_t readNum2 = terminal.set_array((void*)((uchar*)ptr + sizeof(uint) + nodeNum - 1 + readNum));
191
+ if (sizeof(uint) + nodeNum + - 1 + readNum + readNum2 + sizeof(uint) != readSize){
192
+ errorLog << "setArray error" << std::endl;
193
+ return -1;
194
+ }
195
+ no_delete = true;
196
+ return 0;
197
+ }
198
+
199
+ uint tx::prefixSearch(const char* str, const size_t len, size_t& retLen, bool matchPrefix) const {
200
+ uint curPos = 2;
201
+ uint retId = NOTFOUND;
202
+ retLen = NOTFOUND;
203
+ if (terminal.getSize() <= 2) return retId;
204
+
205
+ for (size_t i = 0 ; ; i++){
206
+ const uint nodeId = loud.rank(curPos-1,1)-1;
207
+ if (terminal.getBit(nodeId)){
208
+ retLen = i;
209
+ retId = terminal.rank(nodeId,1)-1;
210
+ } else if (matchPrefix){
211
+ retLen = i;
212
+ retId = NOTFOUND;
213
+ }
214
+ if (i == len) break;
215
+ uint nextPos = getChild(curPos,str[i]);
216
+ if (nextPos == UINT_MAX){
217
+ break;
218
+ }
219
+ curPos = nextPos;
220
+ }
221
+ return retId;
222
+ }
223
+
224
+ uint tx::expandSearch(const char* str, const size_t len, std::vector<std::string>& ret, const uint limit) const {
225
+ ret.clear();
226
+ if (limit == 0) return 0;
227
+ if (terminal.getSize() <= 2) return 0;
228
+
229
+ bool prefix = false;
230
+ uint curPos = 2;
231
+ for (size_t i = 0; i < len; i++){
232
+ uint nextPos = getChild(curPos,str[i]);
233
+ const uint nodeId = loud.rank(curPos-1,1)-1;
234
+ if (terminal.getBit(nodeId)){
235
+ ret.push_back(std::string(str,str+i));
236
+ }
237
+
238
+ if (nextPos == UINT_MAX){
239
+ prefix = true;
240
+ break;
241
+ }
242
+ curPos = nextPos;
243
+ }
244
+
245
+ if (!prefix){
246
+ std::string curStr(str, len);
247
+ std::vector<std::pair<size_t, std::pair<std::string, uint> > > ret_p;
248
+ enumerateAll(curPos,curStr,ret_p);
249
+ sort(ret_p.begin(),ret_p.end());
250
+ for (size_t i = 0; i < ret_p.size() && i < limit; i++){
251
+ ret.push_back(ret_p[i].second.first);
252
+ }
253
+ }
254
+ return (uint)ret.size();
255
+ }
256
+
257
+ uint tx::commonPrefixSearch(const char* str, const size_t len, std::vector<std::string>& ret, std::vector<uint>& retID, const uint limit) const{
258
+ ret.clear();
259
+ retID.clear();
260
+ if (limit == 0) return 0;
261
+ if (terminal.getSize() <= 2) return 0;
262
+
263
+ uint curPos = 2;
264
+
265
+ for (size_t i = 0; ; i++){
266
+ const uint nodeId = loud.rank(curPos-1,1)-1;
267
+ if (terminal.getBit(nodeId)){
268
+ ret.push_back(std::string(str, str+i));
269
+ retID.push_back(terminal.rank(nodeId,1)-1);
270
+ if (ret.size() == limit) break;
271
+ }
272
+ if (i == len) break;
273
+
274
+ uint nextPos = getChild(curPos,str[i]);
275
+ if (nextPos == UINT_MAX){
276
+ break;
277
+ }
278
+ curPos = nextPos;
279
+ }
280
+ return (uint)ret.size();
281
+ }
282
+
283
+ uint tx::commonPrefixSearch(const char* str, const size_t len, std::vector<uint>& retLen, std::vector<uint>& retID, const uint limit) const{
284
+ retLen.clear();
285
+ retID.clear();
286
+ if (limit == 0) return 0;
287
+ if (terminal.getSize() <= 2) return 0;
288
+
289
+ uint curPos = 2;
290
+ for (size_t i = 0; ; i++){
291
+ const uint nodeId = loud.rank(curPos-1,1)-1;
292
+ if (terminal.getBit(nodeId)){
293
+ retLen.push_back(i);
294
+ retID.push_back(terminal.rank(nodeId,1)-1);
295
+ if (retLen.size() == limit) break;
296
+ }
297
+ if (i == len) break;
298
+
299
+ uint nextPos = getChild(curPos,str[i]);
300
+ if (nextPos == UINT_MAX){
301
+ break;
302
+ }
303
+ curPos = nextPos;
304
+ }
305
+
306
+ return (uint)retLen.size();
307
+ }
308
+
309
+
310
+ uint tx::predictiveSearch(const char* str, const size_t len, std::vector<std::string>& ret, std::vector<uint>& retID, const uint limit) const{
311
+ ret.clear();
312
+ retID.clear();
313
+ if (limit == 0) return 0;
314
+ if (terminal.getSize() <= 2) return 0;
315
+
316
+ bool prefix = false;
317
+ uint curPos = 2;
318
+ for (size_t i = 0; i < len; i++){
319
+ uint nextPos = getChild(curPos,str[i]);
320
+ if (nextPos == UINT_MAX){
321
+ prefix = true;
322
+ break;
323
+ }
324
+ curPos = nextPos;
325
+ }
326
+
327
+ if (!prefix){
328
+ std::string curStr(str, len);
329
+ std::vector<std::pair<size_t, std::pair<std::string, uint> > > ret_p;
330
+ enumerateAll(curPos, curStr, ret_p);
331
+ sort(ret_p.begin(),ret_p.end());
332
+ for (size_t i = 0; i < ret_p.size() && i < limit; i++){
333
+ ret.push_back(ret_p[i].second.first);
334
+ retID.push_back(ret_p[i].second.second);
335
+ }
336
+ }
337
+ return (uint)ret.size();
338
+ }
339
+
340
+ uint tx::predictiveSearch(const char* str, const size_t len, std::vector<uint>& retLen, std::vector<uint>& retID, const uint limit) const{
341
+ retLen.clear();
342
+ retID.clear();
343
+ if (limit == 0) return 0;
344
+ if (terminal.getSize() <= 2) return 0;
345
+
346
+ bool prefix = false;
347
+ uint curPos = 2;
348
+ for (size_t i = 0; i < len; i++){
349
+ uint nextPos = getChild(curPos,str[i]);
350
+ if (nextPos == UINT_MAX){
351
+ prefix = true;
352
+ break;
353
+ }
354
+ curPos = nextPos;
355
+ }
356
+
357
+ if (!prefix){
358
+ std::string curStr(str, len);
359
+ std::vector<std::pair<size_t, std::pair<std::string, uint> > > ret_p;
360
+ enumerateAll(curPos, curStr, ret_p);
361
+ sort(ret_p.begin(),ret_p.end());
362
+ for (size_t i = 0; i < ret_p.size() && i < limit; i++){
363
+ retLen.push_back(ret_p[i].second.first.size());
364
+ retID.push_back(ret_p[i].second.second);
365
+ }
366
+ }
367
+ return (uint)retLen.size();
368
+ }
369
+
370
+
371
+
372
+ void tx::enumerateAll(const uint pos, const std::string str, std::vector<std::pair<size_t, std::pair<std::string, uint> > >& ret) const{
373
+ const uint nodeId = loud.rank(pos-1,1)-1;
374
+ if (terminal.getBit(nodeId)){
375
+ std::pair<std::string, uint> tmp(str, terminal.rank(nodeId,1)-1);
376
+ ret.push_back(std::make_pair<size_t, std::pair<std::string, uint> >(str.size(), tmp));
377
+ }
378
+
379
+ uint curPos = pos;
380
+ uint edgePos = loud.rank(pos,0)-2;
381
+ while (loud.getBit(curPos) == 0){
382
+ const uint nextPos = loud.select(loud.rank(curPos,0),1)+1;
383
+ enumerateAll(nextPos,str + edge[edgePos],ret);
384
+ curPos++;
385
+ edgePos++;
386
+ }
387
+ }
388
+
389
+ uint tx::getChild(const uint pos, const char c) const{
390
+ uint curPos = pos;
391
+ uint edgePos = loud.rank(pos,0)-2;
392
+ for (;;){
393
+ if (loud.getBit(curPos) == 1) {
394
+ curPos = UINT_MAX;
395
+ return curPos;
396
+ }
397
+ if (edge[edgePos] == c){
398
+ uint nextPos = loud.select(loud.rank(curPos,0),1)+1;
399
+ return nextPos;
400
+ }
401
+ curPos++;
402
+ edgePos++;
403
+ }
404
+ }
405
+
406
+ uint tx::getParent(const uint pos, char& c) const{
407
+ c = edge[loud.rank(pos,0)-2];
408
+ return loud.select(loud.rank(pos-1, 1), 0);
409
+ }
410
+
411
+
412
+ uint tx::reverseLookup(const uint id, std::string& ret) const {
413
+ ret.clear();
414
+ if (id >= keyNum) return 0;
415
+ if (terminal.getSize() <= 2) return 0;
416
+
417
+ const uint nodeId = terminal.select(id + 1, 1);
418
+ char unused_c = 0;
419
+ uint curPos = getParent(loud.select(nodeId+1,1)+1, unused_c);
420
+ while (curPos >= 2){
421
+ char c = 0;
422
+ curPos = getParent(curPos, c);
423
+ ret += c;
424
+ }
425
+ reverse(ret.begin(), ret.end());
426
+ return ret.size();
427
+ }
428
+
429
+ std::string tx::getResultLog() const {
430
+ return resultLog.str();
431
+ }
432
+
433
+ std::string tx::getErrorLog() const{
434
+ return errorLog.str();
435
+ }
436
+
437
+ uint tx::getKeyNum() const {
438
+ return keyNum;
439
+ }
440
+
441
+
442
+ } // namespace tx_tool