tx 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ #ifndef __SSV_HPP__
2
+ #define __SSV_HPP__
3
+
4
+ #include <memory.h>
5
+ #include <vector>
6
+ #include <cassert>
7
+ #include <stdio.h>
8
+
9
+ namespace tx_tool{
10
+
11
+ typedef unsigned int uint; // 32bit
12
+ typedef unsigned short ushort; // 16bit
13
+ typedef unsigned char uchar; // 8bit
14
+
15
+
16
+ #define SSV_BLOCK_SHIFT (5)
17
+ #define SSV_BLOCK (1U << SSV_BLOCK_SHIFT)
18
+
19
+ #define SSV_LBLOCK_SHIFT (8)
20
+ #define SSV_LBLOCK (1U << SSV_LBLOCK_SHIFT)
21
+ #define SSV_MBLOCK_SHIFT (5)
22
+ #define SSV_MBLOCK (1U << SSV_MBLOCK_SHIFT)
23
+
24
+ #define logLL (16)
25
+ #define LL (1U << logLL)
26
+ #define logLLL (5)
27
+ #define LLL (1U << logLLL)
28
+ #define logL (logLL-1-5)
29
+ #define L (1U << logL)
30
+
31
+ class ssv{
32
+
33
+ public:
34
+ ssv(const uint _size = 0);
35
+ ssv(std::vector<bool>& bv);
36
+
37
+ int resize(const uint _size);
38
+
39
+ void free();
40
+
41
+ ~ssv();
42
+
43
+ inline uint getBit(const uint pos) const {
44
+ return (B[pos / SSV_BLOCK] >> (pos % SSV_BLOCK)) & 1;
45
+ }
46
+
47
+ uint getBits(const uint pos, uint width) const;
48
+ void setBits(const uint pos, const uint width, const uint x);
49
+ uint rank(uint pos, const uint bit) const;
50
+ uint select(uint pos, const uint bit) const;
51
+
52
+ void setBit(uint pos, uint x);
53
+ uint getAllocate() const;
54
+
55
+ void build();
56
+
57
+ uint rankBuild(const uint t_size); // return oneNum
58
+ void selectBuild(const uint t_size);
59
+
60
+ int write(FILE* outfp);
61
+ int read(FILE* infp);
62
+
63
+ uint getBlock(const uint blockPos) const;
64
+ void setBlock(const uint blockPos, const uint x);
65
+ uint getSize() const;
66
+ uint getBlockSize() const;
67
+
68
+ size_t set_array(void* ptr);
69
+
70
+ private:
71
+ uint popCount(uint r) const;
72
+ uint _rank1(const uint pos) const;
73
+ uint _select1(uint x) const;
74
+ uint _select0(uint x) const;
75
+ uint* B;
76
+ uint size;
77
+ uint oneNum;
78
+ uint blockSize; // (size+SSV_BLOCK-1)/SSV_BLOCKSZIE
79
+
80
+ uint LBlockSize;
81
+ uint MBlockSize;
82
+
83
+ // for rank
84
+ uint* levelL;
85
+ uchar* levelM;
86
+
87
+ bool isBuild;
88
+ bool no_delete;
89
+ };
90
+
91
+ }
92
+
93
+ #endif
@@ -0,0 +1,192 @@
1
+ Index: Lib/ruby/rubycontainer.swg
2
+ ===================================================================
3
+ --- Lib/ruby/rubycontainer.swg (revision 11423)
4
+ +++ Lib/ruby/rubycontainer.swg (working copy)
5
+ @@ -446,11 +446,10 @@
6
+ %typemap(out,noblock=1,fragment="RubySequence_Cont")
7
+ std::pair<const_iterator, const_iterator> {
8
+ $result = rb_ary_new2(2);
9
+ - RARRAY_PTR($result)[0] = SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).first),
10
+ - swig::ConstIterator::descriptor(),SWIG_POINTER_OWN);
11
+ - RARRAY_PTR($result)[1] = SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).second),
12
+ - swig::ConstIterator::descriptor(),SWIG_POINTER_OWN);
13
+ - RARRAY_LEN($result) = 2;
14
+ + rb_ary_push($result, SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).first),
15
+ + swig::ConstIterator::descriptor(),SWIG_POINTER_OWN));
16
+ + rb_ary_push($result, SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).second),
17
+ + swig::ConstIterator::descriptor(),SWIG_POINTER_OWN));
18
+ }
19
+
20
+ // std::map/multimap/set allow returning std::pair< iterator, iterator > from
21
+ @@ -459,11 +458,10 @@
22
+ %typemap(out,noblock=1,fragment="RubySequence_Cont")
23
+ std::pair<iterator, iterator> {
24
+ $result = rb_ary_new2(2);
25
+ - RARRAY_PTR($result)[0] = SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).first),
26
+ - swig::ConstIterator::descriptor(),SWIG_POINTER_OWN);
27
+ - RARRAY_PTR($result)[1] = SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).second),
28
+ - swig::ConstIterator::descriptor(),SWIG_POINTER_OWN);
29
+ - RARRAY_LEN($result) = 2;
30
+ + rb_ary_push($result, SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).first),
31
+ + swig::ConstIterator::descriptor(),SWIG_POINTER_OWN));
32
+ + rb_ary_push($result, SWIG_NewPointerObj(swig::make_const_iterator(%static_cast($1,const $type &).second),
33
+ + swig::ConstIterator::descriptor(),SWIG_POINTER_OWN));
34
+ }
35
+
36
+
37
+ @@ -566,7 +564,8 @@
38
+ {
39
+ Sequence::const_iterator i = $self->begin();
40
+ Sequence::const_iterator e = $self->end();
41
+ - VALUE str = rb_str_new2( swig::type_name< Sequence >() );
42
+ + // Double parentheses to avoid macro expansion error in case Sequence contains comma.
43
+ + VALUE str = rb_str_new2(( swig::type_name< Sequence >() ));
44
+ str = rb_str_cat2( str, " [" );
45
+ bool comma = false;
46
+ VALUE tmp;
47
+ @@ -1112,9 +1111,8 @@
48
+ int i = 0;
49
+ for (const_iterator it = seq.begin();
50
+ it != seq.end(); ++it, ++i) {
51
+ - RARRAY_PTR(obj)[i] = swig::from< value_type >(*it);
52
+ + rb_ary_push(obj, swig::from< value_type >(*it));
53
+ }
54
+ - RARRAY_LEN(obj) = size;
55
+ rb_obj_freeze(obj); // treat as immutable result
56
+ return obj;
57
+ } else {
58
+ Index: Lib/ruby/std_set.i
59
+ ===================================================================
60
+ --- Lib/ruby/std_set.i (revision 11423)
61
+ +++ Lib/ruby/std_set.i (working copy)
62
+ @@ -170,10 +170,9 @@
63
+ %typemap(out,noblock=1,fragment="RubyPairBoolOutputIterator")
64
+ std::pair<iterator, bool> {
65
+ $result = rb_ary_new2(2);
66
+ - RARRAY_PTR($result)[0] = SWIG_NewPointerObj(swig::make_set_nonconst_iterator(%static_cast($1,$type &).first),
67
+ - swig::Iterator::descriptor(),SWIG_POINTER_OWN);
68
+ - RARRAY_PTR($result)[1] = SWIG_From(bool)(%static_cast($1,const $type &).second);
69
+ - RARRAY_LEN($result) = 2;
70
+ + rb_ary_push($result, SWIG_NewPointerObj(swig::make_set_nonconst_iterator(%static_cast($1,$type &).first),
71
+ + swig::Iterator::descriptor(),SWIG_POINTER_OWN));
72
+ + rb_ary_push($result, SWIG_From(bool)(%static_cast($1,const $type &).second));
73
+ }
74
+
75
+ %extend {
76
+ Index: Lib/ruby/std_multimap.i
77
+ ===================================================================
78
+ --- Lib/ruby/std_multimap.i (revision 11423)
79
+ +++ Lib/ruby/std_multimap.i (working copy)
80
+ @@ -115,7 +115,8 @@
81
+ {
82
+ MultiMap::iterator i = $self->begin();
83
+ MultiMap::iterator e = $self->end();
84
+ - VALUE str = rb_str_new2( swig::type_name< MultiMap >() );
85
+ + // Double parentheses to avoid macro expansion error in case MultiMap contains comma.
86
+ + VALUE str = rb_str_new2(( swig::type_name< MultiMap >() ));
87
+ str = rb_str_cat2( str, " {" );
88
+ VALUE tmp;
89
+ while ( i != e )
90
+ Index: Lib/ruby/std_pair.i
91
+ ===================================================================
92
+ --- Lib/ruby/std_pair.i (revision 11423)
93
+ +++ Lib/ruby/std_pair.i (working copy)
94
+ @@ -118,11 +118,9 @@
95
+
96
+ static VALUE from(const std::pair<T,U>& val) {
97
+ VALUE obj = rb_ary_new2(2);
98
+ - RARRAY_PTR(obj)[0] = swig::from<
99
+ - typename swig::noconst_traits<T >::noconst_type>(val.first);
100
+ - RARRAY_PTR(obj)[1] = swig::from(val.second);
101
+ - RARRAY_LEN(obj) = 2;
102
+ - rb_define_singleton_method(obj, "second",
103
+ + rb_ary_push(obj, swig::from<typename swig::noconst_traits<T >::noconst_type>(val.first));
104
+ + rb_ary_push(obj, swig::from(val.second));
105
+ + rb_define_singleton_method(obj, "second",
106
+ VALUEFUNC(_wrap_pair_second), 0 );
107
+ rb_define_singleton_method(obj, "second=",
108
+ VALUEFUNC(_wrap_pair_second_eq), 1 );
109
+ @@ -148,7 +146,8 @@
110
+ VALUE inspect() const
111
+ {
112
+ VALUE tmp;
113
+ - VALUE str = rb_str_new2( swig::type_name< pair >() );
114
+ + // Double parentheses to avoid macro expansion error in case pair contains comma.
115
+ + VALUE str = rb_str_new2(( swig::type_name< pair >() ));
116
+ str = rb_str_cat2( str, " (" );
117
+ tmp = swig::from( $self->first );
118
+ tmp = rb_obj_as_string( tmp );
119
+ Index: Lib/ruby/rubyclasses.swg
120
+ ===================================================================
121
+ --- Lib/ruby/rubyclasses.swg (revision 11423)
122
+ +++ Lib/ruby/rubyclasses.swg (working copy)
123
+ @@ -315,30 +315,36 @@
124
+
125
+ };
126
+
127
+ - ID GC_VALUE::hash_id = rb_intern("hash");
128
+ - ID GC_VALUE::lt_id = rb_intern("<");
129
+ - ID GC_VALUE::gt_id = rb_intern(">");
130
+ - ID GC_VALUE::eq_id = rb_intern("==");
131
+ - ID GC_VALUE::le_id = rb_intern("<=");
132
+ - ID GC_VALUE::ge_id = rb_intern(">=");
133
+ + // We need this because rb_intern macro uses statement-expression and
134
+ + // statement-expression is allowed only inside functions.
135
+ + static ID rb_intern_wrapper(const char* str) {
136
+ + return rb_intern(str);
137
+ + }
138
+
139
+ - ID GC_VALUE::pos_id = rb_intern("+@");
140
+ - ID GC_VALUE::neg_id = rb_intern("-@");
141
+ - ID GC_VALUE::inv_id = rb_intern("~");
142
+ + ID GC_VALUE::hash_id = rb_intern_wrapper("hash");
143
+ + ID GC_VALUE::lt_id = rb_intern_wrapper("<");
144
+ + ID GC_VALUE::gt_id = rb_intern_wrapper(">");
145
+ + ID GC_VALUE::eq_id = rb_intern_wrapper("==");
146
+ + ID GC_VALUE::le_id = rb_intern_wrapper("<=");
147
+ + ID GC_VALUE::ge_id = rb_intern_wrapper(">=");
148
+
149
+ - ID GC_VALUE::add_id = rb_intern("+");
150
+ - ID GC_VALUE::sub_id = rb_intern("-");
151
+ - ID GC_VALUE::mul_id = rb_intern("*");
152
+ - ID GC_VALUE::div_id = rb_intern("/");
153
+ - ID GC_VALUE::mod_id = rb_intern("%");
154
+ + ID GC_VALUE::pos_id = rb_intern_wrapper("+@");
155
+ + ID GC_VALUE::neg_id = rb_intern_wrapper("-@");
156
+ + ID GC_VALUE::inv_id = rb_intern_wrapper("~");
157
+
158
+ - ID GC_VALUE::and_id = rb_intern("&");
159
+ - ID GC_VALUE::or_id = rb_intern("|");
160
+ - ID GC_VALUE::xor_id = rb_intern("^");
161
+ + ID GC_VALUE::add_id = rb_intern_wrapper("+");
162
+ + ID GC_VALUE::sub_id = rb_intern_wrapper("-");
163
+ + ID GC_VALUE::mul_id = rb_intern_wrapper("*");
164
+ + ID GC_VALUE::div_id = rb_intern_wrapper("/");
165
+ + ID GC_VALUE::mod_id = rb_intern_wrapper("%");
166
+
167
+ - ID GC_VALUE::lshift_id = rb_intern("<<");
168
+ - ID GC_VALUE::rshift_id = rb_intern(">>");
169
+ + ID GC_VALUE::and_id = rb_intern_wrapper("&");
170
+ + ID GC_VALUE::or_id = rb_intern_wrapper("|");
171
+ + ID GC_VALUE::xor_id = rb_intern_wrapper("^");
172
+
173
+ + ID GC_VALUE::lshift_id = rb_intern_wrapper("<<");
174
+ + ID GC_VALUE::rshift_id = rb_intern_wrapper(">>");
175
+ +
176
+ VALUE GC_VALUE::_hash = Qnil;
177
+
178
+ typedef GC_VALUE LANGUAGE_OBJ;
179
+ Index: Lib/ruby/std_map.i
180
+ ===================================================================
181
+ --- Lib/ruby/std_map.i (revision 11423)
182
+ +++ Lib/ruby/std_map.i (working copy)
183
+ @@ -345,7 +345,8 @@
184
+ {
185
+ Map::const_iterator i = $self->begin();
186
+ Map::const_iterator e = $self->end();
187
+ - VALUE str = rb_str_new2( swig::type_name< Map >() );
188
+ + // Double parentheses to avoid macro expansion error in case Map contains comma.
189
+ + VALUE str = rb_str_new2(( swig::type_name< Map >() ));
190
+ str = rb_str_cat2( str, " {" );
191
+ bool comma = false;
192
+ VALUE tmp;
@@ -0,0 +1,442 @@
1
+ #include "tx.hpp"
2
+
3
+ #include <climits>
4
+ #include "ssv.hpp"
5
+ namespace tx_tool{
6
+
7
+ uint tx::NOTFOUND = UINT_MAX;
8
+
9
+ struct queue_elem{
10
+ queue_elem(size_t _left, size_t _right, int _depth) :left(_left),right(_right),depth(_depth){}
11
+ size_t left;
12
+ size_t right;
13
+ int depth;
14
+ };
15
+
16
+ tx::tx():edge(NULL), keyNum(0), no_delete(false) {}
17
+
18
+ tx::~tx(){
19
+ if (!no_delete) {
20
+ delete[] edge;
21
+ edge = NULL;
22
+ no_delete = false;
23
+ }
24
+ }
25
+
26
+ int tx::build(std::vector<std::string>& wordList, const char* fileName) {
27
+ sort(wordList.begin(),wordList.end());
28
+ const size_t origWordNum = wordList.size();
29
+ wordList.erase(unique(wordList.begin(),wordList.end()),wordList.end());
30
+ int keyNum = (int)wordList.size();
31
+ if (keyNum != origWordNum){
32
+ resultLog << "shrink word list " << origWordNum << " -> " << keyNum << std::endl;
33
+ } else {
34
+ resultLog << "word list " << keyNum << " elements" << std::endl;
35
+ }
36
+
37
+ uint totalSize = 0;
38
+ for (size_t i = 0; i < wordList.size(); i++){
39
+ totalSize += (uint)wordList[i].size();
40
+ }
41
+
42
+ FILE* outfp = fopen(fileName,"wb");
43
+ if (outfp == NULL){
44
+ errorLog << "cannot open " << fileName << std::endl;
45
+ return -1;
46
+ }
47
+
48
+ std::queue<queue_elem> q;
49
+ if (keyNum != 0){
50
+ q.push(queue_elem(0,keyNum,0));
51
+ }
52
+ if (fwrite(&keyNum,sizeof(int),1,outfp) != 1){
53
+ errorLog << "fwrite error " << std::endl;
54
+ return -1;
55
+ }
56
+
57
+ std::vector<bool> vb_loud;
58
+ std::vector<bool> vb_terminal;
59
+
60
+ vb_loud.push_back(0); // super root
61
+ vb_loud.push_back(1);
62
+
63
+ uint nodeNum = 0;
64
+
65
+ while (!q.empty()){
66
+ queue_elem& elem = q.front();
67
+ const int depth = elem.depth;
68
+ const size_t left = elem.left;
69
+ const size_t right = elem.right;
70
+ q.pop();
71
+
72
+ nodeNum++;
73
+ size_t newLeft = left;
74
+ if (wordList[left].size() == depth){
75
+ vb_terminal.push_back(1); // this node has terminate
76
+ newLeft++;
77
+ if (newLeft == right){
78
+ vb_loud.push_back(1);
79
+ continue;
80
+ }
81
+ } else {
82
+ vb_terminal.push_back(0);
83
+ }
84
+ size_t prev = newLeft;
85
+ char prev_c = wordList[prev][depth];
86
+ for (size_t i = newLeft+1; i < right; i++){
87
+ if (prev_c != wordList[i][depth]){
88
+ fputc(prev_c,outfp);
89
+ vb_loud.push_back(0);
90
+ q.push(queue_elem(prev,i,depth+1));
91
+ prev = i;
92
+ prev_c = wordList[prev][depth];
93
+ }
94
+ }
95
+ if (prev != right){
96
+ fputc(prev_c,outfp);
97
+ vb_loud.push_back(0);
98
+ q.push(queue_elem(prev,right,depth+1));
99
+ }
100
+ vb_loud.push_back(1);
101
+ }
102
+
103
+ {
104
+ ssv sv(vb_loud);
105
+ sv.build();
106
+ if (sv.write(outfp) == -1){
107
+ errorLog << "fwrite error " << std::endl;
108
+ return -1;
109
+ }
110
+ }
111
+ {
112
+ ssv sv(vb_terminal);
113
+ sv.build();
114
+ if (sv.write(outfp) == -1){
115
+ errorLog << "fwrite error " << std::endl;
116
+ return -1;
117
+ }
118
+ }
119
+
120
+ if (fwrite(&nodeNum,sizeof(int),1,outfp) != 1){
121
+ errorLog << "fwrite error " << std::endl;
122
+ return -1;
123
+ }
124
+
125
+ size_t outfpSize = ftell(outfp);
126
+ resultLog << "outputSize:" << outfpSize << " inputSize:" << totalSize << " ratio:" << (float)outfpSize/totalSize << std::endl;
127
+ if (outfp) fclose(outfp);
128
+ return 0;
129
+ }
130
+
131
+ int tx::read(const char* fileName){
132
+ FILE* infp = fopen(fileName,"rb");
133
+ if (infp == NULL){
134
+ errorLog << "cannot open " << fileName << std::endl;
135
+ return -1;
136
+ }
137
+
138
+ keyNum = 0;
139
+ if (fread(&keyNum,sizeof(int),1,infp) != 1){
140
+ errorLog << "keyNum read error" << std::endl;
141
+ fclose(infp);
142
+ return -1;
143
+ }
144
+
145
+ fseek(infp,0,SEEK_END);
146
+ size_t fileSize = ftell(infp);
147
+ if (fseek(infp,fileSize-(1*sizeof(int)),SEEK_SET) == -1){
148
+ errorLog << "fseek error" << std::endl;
149
+ fclose(infp);
150
+ return -1;
151
+ }
152
+
153
+ int nodeNum = -1;
154
+ if (fread(&nodeNum,sizeof(int),1,infp) != 1){
155
+ errorLog << "nodeNum read error" << std::endl;
156
+ fclose(infp);
157
+ return -1;
158
+ }
159
+ resultLog << "keyNum:" << (int)keyNum << " nodeNum:" << nodeNum << std::endl;
160
+
161
+ if (fseek(infp,sizeof(int)*1,SEEK_SET) == -1){
162
+ errorLog << "fseek error" << std::endl;
163
+ fclose(infp);
164
+ return -1;
165
+ }
166
+
167
+ if (nodeNum > 0){
168
+ edge = new char [nodeNum-1];
169
+ if (fread(edge,sizeof(char),nodeNum-1,infp) != nodeNum-1){
170
+ errorLog << "fseek error" << std::endl;
171
+ fclose(infp);
172
+ return -1;
173
+ }
174
+ }
175
+
176
+ loud.read(infp);
177
+ terminal.read(infp);
178
+
179
+ if (infp) fclose(infp);
180
+ return 0;
181
+ }
182
+
183
+ int tx::setArray(void* ptr, size_t readSize){
184
+ keyNum = *(uint*)(ptr);
185
+ printf("keyNum:%d\n", keyNum);
186
+ int nodeNum = *(uint*)((uchar*)ptr+readSize-sizeof(uint));
187
+ printf("nodeNum:%d\n", nodeNum);
188
+ edge = (char*)ptr + sizeof(uint);
189
+ size_t readNum = loud.set_array((void*)((uchar*)ptr + sizeof(uint) + nodeNum - 1));
190
+ size_t readNum2 = terminal.set_array((void*)((uchar*)ptr + sizeof(uint) + nodeNum - 1 + readNum));
191
+ if (sizeof(uint) + nodeNum + - 1 + readNum + readNum2 + sizeof(uint) != readSize){
192
+ errorLog << "setArray error" << std::endl;
193
+ return -1;
194
+ }
195
+ no_delete = true;
196
+ return 0;
197
+ }
198
+
199
+ uint tx::prefixSearch(const char* str, const size_t len, size_t& retLen, bool matchPrefix) const {
200
+ uint curPos = 2;
201
+ uint retId = NOTFOUND;
202
+ retLen = NOTFOUND;
203
+ if (terminal.getSize() <= 2) return retId;
204
+
205
+ for (size_t i = 0 ; ; i++){
206
+ const uint nodeId = loud.rank(curPos-1,1)-1;
207
+ if (terminal.getBit(nodeId)){
208
+ retLen = i;
209
+ retId = terminal.rank(nodeId,1)-1;
210
+ } else if (matchPrefix){
211
+ retLen = i;
212
+ retId = NOTFOUND;
213
+ }
214
+ if (i == len) break;
215
+ uint nextPos = getChild(curPos,str[i]);
216
+ if (nextPos == UINT_MAX){
217
+ break;
218
+ }
219
+ curPos = nextPos;
220
+ }
221
+ return retId;
222
+ }
223
+
224
+ uint tx::expandSearch(const char* str, const size_t len, std::vector<std::string>& ret, const uint limit) const {
225
+ ret.clear();
226
+ if (limit == 0) return 0;
227
+ if (terminal.getSize() <= 2) return 0;
228
+
229
+ bool prefix = false;
230
+ uint curPos = 2;
231
+ for (size_t i = 0; i < len; i++){
232
+ uint nextPos = getChild(curPos,str[i]);
233
+ const uint nodeId = loud.rank(curPos-1,1)-1;
234
+ if (terminal.getBit(nodeId)){
235
+ ret.push_back(std::string(str,str+i));
236
+ }
237
+
238
+ if (nextPos == UINT_MAX){
239
+ prefix = true;
240
+ break;
241
+ }
242
+ curPos = nextPos;
243
+ }
244
+
245
+ if (!prefix){
246
+ std::string curStr(str, len);
247
+ std::vector<std::pair<size_t, std::pair<std::string, uint> > > ret_p;
248
+ enumerateAll(curPos,curStr,ret_p);
249
+ sort(ret_p.begin(),ret_p.end());
250
+ for (size_t i = 0; i < ret_p.size() && i < limit; i++){
251
+ ret.push_back(ret_p[i].second.first);
252
+ }
253
+ }
254
+ return (uint)ret.size();
255
+ }
256
+
257
+ uint tx::commonPrefixSearch(const char* str, const size_t len, std::vector<std::string>& ret, std::vector<uint>& retID, const uint limit) const{
258
+ ret.clear();
259
+ retID.clear();
260
+ if (limit == 0) return 0;
261
+ if (terminal.getSize() <= 2) return 0;
262
+
263
+ uint curPos = 2;
264
+
265
+ for (size_t i = 0; ; i++){
266
+ const uint nodeId = loud.rank(curPos-1,1)-1;
267
+ if (terminal.getBit(nodeId)){
268
+ ret.push_back(std::string(str, str+i));
269
+ retID.push_back(terminal.rank(nodeId,1)-1);
270
+ if (ret.size() == limit) break;
271
+ }
272
+ if (i == len) break;
273
+
274
+ uint nextPos = getChild(curPos,str[i]);
275
+ if (nextPos == UINT_MAX){
276
+ break;
277
+ }
278
+ curPos = nextPos;
279
+ }
280
+ return (uint)ret.size();
281
+ }
282
+
283
+ uint tx::commonPrefixSearch(const char* str, const size_t len, std::vector<uint>& retLen, std::vector<uint>& retID, const uint limit) const{
284
+ retLen.clear();
285
+ retID.clear();
286
+ if (limit == 0) return 0;
287
+ if (terminal.getSize() <= 2) return 0;
288
+
289
+ uint curPos = 2;
290
+ for (size_t i = 0; ; i++){
291
+ const uint nodeId = loud.rank(curPos-1,1)-1;
292
+ if (terminal.getBit(nodeId)){
293
+ retLen.push_back(i);
294
+ retID.push_back(terminal.rank(nodeId,1)-1);
295
+ if (retLen.size() == limit) break;
296
+ }
297
+ if (i == len) break;
298
+
299
+ uint nextPos = getChild(curPos,str[i]);
300
+ if (nextPos == UINT_MAX){
301
+ break;
302
+ }
303
+ curPos = nextPos;
304
+ }
305
+
306
+ return (uint)retLen.size();
307
+ }
308
+
309
+
310
+ uint tx::predictiveSearch(const char* str, const size_t len, std::vector<std::string>& ret, std::vector<uint>& retID, const uint limit) const{
311
+ ret.clear();
312
+ retID.clear();
313
+ if (limit == 0) return 0;
314
+ if (terminal.getSize() <= 2) return 0;
315
+
316
+ bool prefix = false;
317
+ uint curPos = 2;
318
+ for (size_t i = 0; i < len; i++){
319
+ uint nextPos = getChild(curPos,str[i]);
320
+ if (nextPos == UINT_MAX){
321
+ prefix = true;
322
+ break;
323
+ }
324
+ curPos = nextPos;
325
+ }
326
+
327
+ if (!prefix){
328
+ std::string curStr(str, len);
329
+ std::vector<std::pair<size_t, std::pair<std::string, uint> > > ret_p;
330
+ enumerateAll(curPos, curStr, ret_p);
331
+ sort(ret_p.begin(),ret_p.end());
332
+ for (size_t i = 0; i < ret_p.size() && i < limit; i++){
333
+ ret.push_back(ret_p[i].second.first);
334
+ retID.push_back(ret_p[i].second.second);
335
+ }
336
+ }
337
+ return (uint)ret.size();
338
+ }
339
+
340
+ uint tx::predictiveSearch(const char* str, const size_t len, std::vector<uint>& retLen, std::vector<uint>& retID, const uint limit) const{
341
+ retLen.clear();
342
+ retID.clear();
343
+ if (limit == 0) return 0;
344
+ if (terminal.getSize() <= 2) return 0;
345
+
346
+ bool prefix = false;
347
+ uint curPos = 2;
348
+ for (size_t i = 0; i < len; i++){
349
+ uint nextPos = getChild(curPos,str[i]);
350
+ if (nextPos == UINT_MAX){
351
+ prefix = true;
352
+ break;
353
+ }
354
+ curPos = nextPos;
355
+ }
356
+
357
+ if (!prefix){
358
+ std::string curStr(str, len);
359
+ std::vector<std::pair<size_t, std::pair<std::string, uint> > > ret_p;
360
+ enumerateAll(curPos, curStr, ret_p);
361
+ sort(ret_p.begin(),ret_p.end());
362
+ for (size_t i = 0; i < ret_p.size() && i < limit; i++){
363
+ retLen.push_back(ret_p[i].second.first.size());
364
+ retID.push_back(ret_p[i].second.second);
365
+ }
366
+ }
367
+ return (uint)retLen.size();
368
+ }
369
+
370
+
371
+
372
+ void tx::enumerateAll(const uint pos, const std::string str, std::vector<std::pair<size_t, std::pair<std::string, uint> > >& ret) const{
373
+ const uint nodeId = loud.rank(pos-1,1)-1;
374
+ if (terminal.getBit(nodeId)){
375
+ std::pair<std::string, uint> tmp(str, terminal.rank(nodeId,1)-1);
376
+ ret.push_back(std::make_pair<size_t, std::pair<std::string, uint> >(str.size(), tmp));
377
+ }
378
+
379
+ uint curPos = pos;
380
+ uint edgePos = loud.rank(pos,0)-2;
381
+ while (loud.getBit(curPos) == 0){
382
+ const uint nextPos = loud.select(loud.rank(curPos,0),1)+1;
383
+ enumerateAll(nextPos,str + edge[edgePos],ret);
384
+ curPos++;
385
+ edgePos++;
386
+ }
387
+ }
388
+
389
+ uint tx::getChild(const uint pos, const char c) const{
390
+ uint curPos = pos;
391
+ uint edgePos = loud.rank(pos,0)-2;
392
+ for (;;){
393
+ if (loud.getBit(curPos) == 1) {
394
+ curPos = UINT_MAX;
395
+ return curPos;
396
+ }
397
+ if (edge[edgePos] == c){
398
+ uint nextPos = loud.select(loud.rank(curPos,0),1)+1;
399
+ return nextPos;
400
+ }
401
+ curPos++;
402
+ edgePos++;
403
+ }
404
+ }
405
+
406
+ uint tx::getParent(const uint pos, char& c) const{
407
+ c = edge[loud.rank(pos,0)-2];
408
+ return loud.select(loud.rank(pos-1, 1), 0);
409
+ }
410
+
411
+
412
+ uint tx::reverseLookup(const uint id, std::string& ret) const {
413
+ ret.clear();
414
+ if (id >= keyNum) return 0;
415
+ if (terminal.getSize() <= 2) return 0;
416
+
417
+ const uint nodeId = terminal.select(id + 1, 1);
418
+ char unused_c = 0;
419
+ uint curPos = getParent(loud.select(nodeId+1,1)+1, unused_c);
420
+ while (curPos >= 2){
421
+ char c = 0;
422
+ curPos = getParent(curPos, c);
423
+ ret += c;
424
+ }
425
+ reverse(ret.begin(), ret.end());
426
+ return ret.size();
427
+ }
428
+
429
+ std::string tx::getResultLog() const {
430
+ return resultLog.str();
431
+ }
432
+
433
+ std::string tx::getErrorLog() const{
434
+ return errorLog.str();
435
+ }
436
+
437
+ uint tx::getKeyNum() const {
438
+ return keyNum;
439
+ }
440
+
441
+
442
+ } // namespace tx_tool