ruby_da 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.gitmodules +6 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +38 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/ext/ruby_da/RubyDaService.java +153 -0
- data/ext/ruby_da/extconf.rb +9 -0
- data/ext/ruby_da/ruby_da.cpp +171 -0
- data/ext/ruby_da/ruby_da.h +6 -0
- data/lib/ruby_da/version.rb +3 -0
- data/lib/ruby_da.rb +12 -0
- data/libda/README.md +1 -0
- data/libda/include/double_array.hpp +138 -0
- data/libda/include/double_array_internal.hpp +358 -0
- data/libda/include/utf8.hpp +117 -0
- data/libda/src/double_array.cc +114 -0
- data/libda/src/double_array_static.cc +228 -0
- data/libda/src/profile.hpp +38 -0
- data/ruby_da.gemspec +36 -0
- metadata +124 -0
@@ -0,0 +1,358 @@
|
|
1
|
+
// Copyright (C) 2015 Masahiko Higashiyama
|
2
|
+
//
|
3
|
+
// Permission is hereby granted, free of charge, to any person obtaining a
|
4
|
+
// copy of this software and associated documentation files (the "Software"),
|
5
|
+
// to deal in the Software without restriction, including without limitation
|
6
|
+
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
7
|
+
// and/or sell copies of the Software, and to permit persons to whom the
|
8
|
+
// Software is furnished to do so, subject to the following conditions:
|
9
|
+
//
|
10
|
+
// The above copyright notice and this permission notice shall be included in
|
11
|
+
// all copies or substantial portions of the Software.
|
12
|
+
//
|
13
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
// SOFTWARE.
|
20
|
+
#ifndef _DOUBLE_ARRAY_INTERNAL_H_
|
21
|
+
#define _DOUBLE_ARRAY_INTERNAL_H_
|
22
|
+
#include <vector>
|
23
|
+
#include <utility>
|
24
|
+
#include <algorithm>
|
25
|
+
#include <stdexcept>
|
26
|
+
#include <iostream>
|
27
|
+
#include <fstream>
|
28
|
+
|
29
|
+
|
30
|
+
class DoubleArrayInternal {
|
31
|
+
struct node {
|
32
|
+
int base;
|
33
|
+
int check;
|
34
|
+
};
|
35
|
+
|
36
|
+
int& empty_head() {
|
37
|
+
return array_[0].check;
|
38
|
+
}
|
39
|
+
|
40
|
+
int& entry_num() {
|
41
|
+
return array_[0].base;
|
42
|
+
}
|
43
|
+
|
44
|
+
void set_check(int pos, int base, bool overwrite = false){
|
45
|
+
if(overwrite && array_[pos].check > 0){
|
46
|
+
array_[pos].check = base;
|
47
|
+
}else if(pos == empty_head()){
|
48
|
+
empty_head() = -array_[pos].check;
|
49
|
+
array_[pos].check = base;
|
50
|
+
}else{
|
51
|
+
int i = empty_head();
|
52
|
+
int n = array_.size();
|
53
|
+
while(i < n){
|
54
|
+
if(pos == -array_[i].check) break;
|
55
|
+
i = -array_[i].check;
|
56
|
+
}
|
57
|
+
if(i >= n) throw std::runtime_error("failed set check");
|
58
|
+
array_[i].check = array_[pos].check;
|
59
|
+
array_[pos].check = base;
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
void delete_check(int pos){
|
64
|
+
if(pos < empty_head()){
|
65
|
+
array_[pos].check = -empty_head();
|
66
|
+
empty_head() = pos;
|
67
|
+
}else{
|
68
|
+
int i = empty_head();
|
69
|
+
int n = array_.size();
|
70
|
+
while(i < n){
|
71
|
+
if(i < pos && pos < -array_[i].check) break;
|
72
|
+
i = -array_[i].check;
|
73
|
+
}
|
74
|
+
if(i >= n) throw std::runtime_error("failed delete check");
|
75
|
+
array_[pos].check = array_[i].check;
|
76
|
+
array_[i].check = -pos;
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
void expand(size_t pos){
|
81
|
+
if(pos < array_.size()) return;
|
82
|
+
size_t i = array_.size();
|
83
|
+
size_t n = array_.capacity();
|
84
|
+
while(pos > n) n <<= 1;
|
85
|
+
array_.reserve(n);
|
86
|
+
array_.resize(pos+1);
|
87
|
+
for(; i <= pos; i++){
|
88
|
+
array_[i].check = -(i+1);
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
std::pair<int,const char *> fetch(const char *str) const {
|
93
|
+
const char *p = str;
|
94
|
+
int state = 1;
|
95
|
+
while(1){
|
96
|
+
unsigned char c = *p;
|
97
|
+
int t = array_[state].base + c;
|
98
|
+
//printf("%d %d %d %c %d\n",
|
99
|
+
//state, t, array_[state].base, c, array_[t].check);
|
100
|
+
if(t < static_cast<int>(array_.size()) && array_[t].check == state){
|
101
|
+
if(c == 0) return std::make_pair(state, p);
|
102
|
+
state = t;
|
103
|
+
++p;
|
104
|
+
}else{
|
105
|
+
return std::make_pair(-state, p);
|
106
|
+
}
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
void get_labels(int index, int base, std::vector<unsigned char> &labels) const {
|
111
|
+
size_t maxlen = std::min(static_cast<int>(array_.size()), base + 256);
|
112
|
+
for(size_t i = base; i < maxlen; i++){
|
113
|
+
if(array_[i].check == index){
|
114
|
+
labels.push_back(i - base);
|
115
|
+
}
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
int find_base(const std::vector<unsigned char> &codes, unsigned char c){
|
120
|
+
int base_cand;
|
121
|
+
int empty_index = empty_head();
|
122
|
+
while(1){
|
123
|
+
expand(empty_index);
|
124
|
+
base_cand = empty_index - c;
|
125
|
+
if(base_cand <= 1){
|
126
|
+
empty_index = -array_[empty_index].check;
|
127
|
+
continue;
|
128
|
+
}
|
129
|
+
bool found = true;
|
130
|
+
for(size_t i = 0; i < codes.size(); i++){
|
131
|
+
expand(base_cand + codes[i]);
|
132
|
+
if(array_[base_cand + codes[i]].check > 0){
|
133
|
+
found = false;
|
134
|
+
break;
|
135
|
+
}
|
136
|
+
}
|
137
|
+
if(found) break;
|
138
|
+
empty_index = -array_[empty_index].check;
|
139
|
+
}
|
140
|
+
return base_cand;
|
141
|
+
}
|
142
|
+
|
143
|
+
void move_to(int from, int from_base, int to){
|
144
|
+
//printf("copy base[%d] = from %d\n", to, from_base);
|
145
|
+
array_[to].base = from_base;
|
146
|
+
if(from_base > 0){
|
147
|
+
std::vector<unsigned char> trans;
|
148
|
+
get_labels(from, from_base, trans);
|
149
|
+
for(size_t j = 0; j < trans.size(); j++){
|
150
|
+
//printf("move from check[%d](%c) = %d\n",
|
151
|
+
//from_base + trans[j], trans[j], to);
|
152
|
+
set_check(from_base + trans[j], to, true);
|
153
|
+
}
|
154
|
+
}
|
155
|
+
//printf("init from address %d %d %d\n",
|
156
|
+
//from, array_[from].base, from_base);
|
157
|
+
array_[from].base = 0;
|
158
|
+
delete_check(from);
|
159
|
+
}
|
160
|
+
|
161
|
+
void _insert(const char *str, int base, int id){
|
162
|
+
int pos = array_[base].base + static_cast<unsigned char>(*str);
|
163
|
+
expand(std::max(base,pos));
|
164
|
+
if(array_[base].base == 0 || array_[pos].check >= 0){ //conflict
|
165
|
+
int oldbase = array_[base].base;
|
166
|
+
std::vector<unsigned char> codes;
|
167
|
+
if(oldbase > 0) get_labels(base, oldbase, codes);
|
168
|
+
int base_cand = find_base(codes, *str);
|
169
|
+
//printf("set base base[%d] = %d\n", base, base_cand);
|
170
|
+
array_[base].base = base_cand;
|
171
|
+
std::vector<int> from,from_base;
|
172
|
+
for(size_t i = 0; i < codes.size(); i++){
|
173
|
+
int old_t = oldbase + codes[i];
|
174
|
+
from.push_back(old_t);
|
175
|
+
from_base.push_back(array_[old_t].base);
|
176
|
+
//printf("move check check[%d](%c) = %d\n",
|
177
|
+
//base_cand + codes[i], codes[i], base);
|
178
|
+
set_check(base_cand + codes[i], base);
|
179
|
+
}
|
180
|
+
for(size_t i = 0; i < from.size(); i++){
|
181
|
+
move_to(from[i], from_base[i], base_cand + codes[i]);
|
182
|
+
}
|
183
|
+
pos = base_cand + static_cast<unsigned char>(*str);
|
184
|
+
}
|
185
|
+
//printf("set check check[%d](%c) = %d\n", pos, *str, base);
|
186
|
+
set_check(pos, base);
|
187
|
+
if(*str != '\0'){
|
188
|
+
_insert(str+1, pos, id);
|
189
|
+
}else{
|
190
|
+
if(id < 1){
|
191
|
+
array_[pos].base = -(entry_num() + 1);
|
192
|
+
}else{
|
193
|
+
array_[pos].base = -id;
|
194
|
+
}
|
195
|
+
entry_num() += 1;
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
199
|
+
void _erase(const char *str, int index, const char *p){
|
200
|
+
int newbase = array_[index].check;
|
201
|
+
delete_check(array_[index].base+*p);
|
202
|
+
std::vector<unsigned char> labels;
|
203
|
+
get_labels(index, array_[index].base, labels);
|
204
|
+
if(labels.size() == 0 && str != p)
|
205
|
+
_erase(str, newbase, --p);
|
206
|
+
}
|
207
|
+
|
208
|
+
/* For DEBUG */
|
209
|
+
void print_array() const {
|
210
|
+
std::cout << "[";
|
211
|
+
for(size_t i = 0; i < array_.size(); i++){
|
212
|
+
std::cout << "element " << i << ":" << array_[i].base << ":" << array_[i].check << std::endl;
|
213
|
+
}
|
214
|
+
std::cout << "]" << std::endl;
|
215
|
+
}
|
216
|
+
|
217
|
+
void _enumerate(int current_idx,
|
218
|
+
std::vector<unsigned char> &path,
|
219
|
+
std::vector<std::pair<std::string, int> > &result) const {
|
220
|
+
std::vector<unsigned char> labels;
|
221
|
+
get_labels(current_idx, array_[current_idx].base, labels);
|
222
|
+
for(std::vector<unsigned char>::iterator itr = labels.begin(); itr != labels.end(); ++itr){
|
223
|
+
int new_idx = array_[current_idx].base + *itr;
|
224
|
+
if(*itr == '\0'){
|
225
|
+
std::string res;
|
226
|
+
res.reserve(path.size());
|
227
|
+
for(std::vector<unsigned char>::iterator jtr = path.begin(); jtr != path.end(); ++jtr){
|
228
|
+
res += static_cast<char>(*jtr);
|
229
|
+
}
|
230
|
+
result.push_back(std::make_pair(res, -array_[new_idx].base));
|
231
|
+
}else{
|
232
|
+
path.push_back(*itr);
|
233
|
+
_enumerate(new_idx, path, result);
|
234
|
+
path.pop_back();
|
235
|
+
}
|
236
|
+
}
|
237
|
+
}
|
238
|
+
|
239
|
+
public:
|
240
|
+
|
241
|
+
int exact_match(const char *str) const {
|
242
|
+
std::pair<int, const char*> state = fetch(str);
|
243
|
+
if(state.first > 0){
|
244
|
+
int t = array_[state.first].base + static_cast<unsigned char>(*state.second);
|
245
|
+
return -array_[t].base;
|
246
|
+
}
|
247
|
+
return -1;
|
248
|
+
}
|
249
|
+
|
250
|
+
void enumerate(const char *str, std::vector<std::pair<std::string, int> > &result) const {
|
251
|
+
int index = 1;
|
252
|
+
if(str[0] != '\0'){
|
253
|
+
std::pair<int, const char*> state = fetch(str);
|
254
|
+
if(state.first > 0){
|
255
|
+
index = state.first;
|
256
|
+
}else{
|
257
|
+
index = -state.first;
|
258
|
+
}
|
259
|
+
}
|
260
|
+
std::vector<unsigned char> path;
|
261
|
+
for(const char *p = str; *p != '\0'; p++){
|
262
|
+
path.push_back(static_cast<unsigned int>(*p));
|
263
|
+
}
|
264
|
+
_enumerate(index, path, result);
|
265
|
+
}
|
266
|
+
|
267
|
+
void common_prefix_search(const char *str,
|
268
|
+
std::vector<int> &res_len,
|
269
|
+
std::vector<int> &res_id) const {
|
270
|
+
const char *p = str;
|
271
|
+
int state = 1;
|
272
|
+
while(1){
|
273
|
+
int t = array_[state].base;
|
274
|
+
if(state != 1 && t < static_cast<int>(array_.size()) && array_[t].check == state){
|
275
|
+
res_len.push_back(std::distance(str,p));
|
276
|
+
res_id.push_back(-array_[t].base);
|
277
|
+
}
|
278
|
+
unsigned char c = *p;
|
279
|
+
if(t+c < static_cast<int>(array_.size()) && array_[t+c].check == state){
|
280
|
+
if(c == 0) return;
|
281
|
+
state = t+c;
|
282
|
+
++p;
|
283
|
+
}else{
|
284
|
+
return;
|
285
|
+
}
|
286
|
+
}
|
287
|
+
}
|
288
|
+
|
289
|
+
bool insert(const char *str, int id = -1){
|
290
|
+
std::pair<int,const char*> state = fetch(str);
|
291
|
+
if(state.first > 0){
|
292
|
+
//printf("insert failed %s, Found.", str);
|
293
|
+
return false;
|
294
|
+
}
|
295
|
+
//printf("insert %s", str);
|
296
|
+
_insert(state.second, -state.first, id);
|
297
|
+
return true;
|
298
|
+
}
|
299
|
+
|
300
|
+
|
301
|
+
bool erase(const char *str){
|
302
|
+
std::pair<int,const char*> state = fetch(str);
|
303
|
+
if(state.first < 0){
|
304
|
+
//printf("erase failed %s, Not Found.", str);
|
305
|
+
return false;
|
306
|
+
}
|
307
|
+
//printf("erase %s", str);
|
308
|
+
_erase(str, state.first, state.second);
|
309
|
+
|
310
|
+
return true;
|
311
|
+
}
|
312
|
+
|
313
|
+
DoubleArrayInternal() : array_(2) {
|
314
|
+
entry_num() = 0;
|
315
|
+
empty_head() = 1;
|
316
|
+
array_[1].check = -2;
|
317
|
+
expand(8192);
|
318
|
+
}
|
319
|
+
|
320
|
+
bool save(const char *filename) const {
|
321
|
+
std::ofstream ofs(filename, std::ios::binary);
|
322
|
+
if(!ofs){
|
323
|
+
return false;
|
324
|
+
}
|
325
|
+
return save(ofs);
|
326
|
+
}
|
327
|
+
|
328
|
+
bool save(std::ostream &os) const {
|
329
|
+
os.write(reinterpret_cast<const char *>(&array_[0]),
|
330
|
+
array_.size()*sizeof(node));
|
331
|
+
if(os.fail()) return false;
|
332
|
+
return true;
|
333
|
+
}
|
334
|
+
|
335
|
+
bool load(const char *filename) {
|
336
|
+
std::ifstream ifs(filename, std::ios::binary);
|
337
|
+
if(!ifs){
|
338
|
+
return false;
|
339
|
+
}
|
340
|
+
return load(ifs);
|
341
|
+
}
|
342
|
+
|
343
|
+
bool load(std::istream &is){
|
344
|
+
size_t siz_ = is.seekg(0,std::ios::end).tellg();
|
345
|
+
if(is.fail()) return false;
|
346
|
+
array_.resize(siz_ / sizeof(node));
|
347
|
+
is.seekg(0, std::ios::beg);
|
348
|
+
if(is.fail()) return false;
|
349
|
+
is.read(reinterpret_cast<char *>(&array_[0]), array_.size()*sizeof(node));
|
350
|
+
if(is.fail() && !is.eof()) return false;
|
351
|
+
return true;
|
352
|
+
}
|
353
|
+
|
354
|
+
private:
|
355
|
+
std::vector<node> array_;
|
356
|
+
};
|
357
|
+
|
358
|
+
#endif /* _DOUBLE_ARRAY_INTERNAL_H_ */
|
@@ -0,0 +1,117 @@
|
|
1
|
+
// Copyright (C) 2015 Masahiko Higashiyama
|
2
|
+
//
|
3
|
+
// Permission is hereby granted, free of charge, to any person obtaining a
|
4
|
+
// copy of this software and associated documentation files (the "Software"),
|
5
|
+
// to deal in the Software without restriction, including without limitation
|
6
|
+
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
7
|
+
// and/or sell copies of the Software, and to permit persons to whom the
|
8
|
+
// Software is furnished to do so, subject to the following conditions:
|
9
|
+
//
|
10
|
+
// The above copyright notice and this permission notice shall be included in
|
11
|
+
// all copies or substantial portions of the Software.
|
12
|
+
//
|
13
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
// SOFTWARE.
|
20
|
+
|
21
|
+
#ifndef UTF8_UTILITY_HPP
|
22
|
+
#define UTF8_UTILITY_HPP
|
23
|
+
|
24
|
+
#include <cstring>
|
25
|
+
#include <exception>
|
26
|
+
#include <vector>
|
27
|
+
|
28
|
+
class UTF8Exception : public std::exception {
|
29
|
+
public:
|
30
|
+
UTF8Exception() {}
|
31
|
+
virtual ~UTF8Exception() throw() {}
|
32
|
+
};
|
33
|
+
|
34
|
+
int utf8charlen(const unsigned char c)
|
35
|
+
{
|
36
|
+
if(c == 0x00) return 0;
|
37
|
+
if(c < 0x80) return 1;
|
38
|
+
if(c < 0xC2) throw UTF8Exception();
|
39
|
+
if(c < 0xE0) return 2;
|
40
|
+
if(c < 0xF0) return 3;
|
41
|
+
if(c < 0xF8) return 4;
|
42
|
+
if(c < 0xFC) return 5;
|
43
|
+
if(c < 0xFE) return 6;
|
44
|
+
return 1;
|
45
|
+
}
|
46
|
+
|
47
|
+
char *utf8substr(const char *s, int len){
|
48
|
+
int n = 0, size = 0;
|
49
|
+
const char *p = s;
|
50
|
+
int l;
|
51
|
+
while((l = utf8charlen(*p)) && n != len){
|
52
|
+
p += l;
|
53
|
+
size += l;
|
54
|
+
n++;
|
55
|
+
}
|
56
|
+
if(l == 0) size++;
|
57
|
+
char *str = new char[size + 1];
|
58
|
+
strncpy(str, s, size);
|
59
|
+
str[size] = '\0';
|
60
|
+
|
61
|
+
return str;
|
62
|
+
}
|
63
|
+
|
64
|
+
int utf8len(const char *s, int len){
|
65
|
+
const char *p = s;
|
66
|
+
int n = 0, nc = 0;
|
67
|
+
while(*p != '\0' && n < len){
|
68
|
+
int k = utf8charlen(*p);
|
69
|
+
n += k;
|
70
|
+
nc++;
|
71
|
+
p += k;
|
72
|
+
}
|
73
|
+
if(n > len) return -1;
|
74
|
+
return nc;
|
75
|
+
}
|
76
|
+
|
77
|
+
const char *utf8nextchar(const char *s){
|
78
|
+
return s + utf8charlen(*s);
|
79
|
+
}
|
80
|
+
|
81
|
+
std::vector<const char *> utf8index(const char *s){
|
82
|
+
std::vector<const char *> v;
|
83
|
+
const char *p = s;
|
84
|
+
while(*p != '\0'){
|
85
|
+
v.push_back(p);
|
86
|
+
p = utf8nextchar(p);
|
87
|
+
}
|
88
|
+
return v;
|
89
|
+
}
|
90
|
+
|
91
|
+
const char *utf8advance(const char *s, unsigned int len){
|
92
|
+
size_t l = 0;
|
93
|
+
const char *p = s;
|
94
|
+
while(*p != '\0' && l < len ){
|
95
|
+
p = utf8nextchar(p);
|
96
|
+
l++;
|
97
|
+
}
|
98
|
+
return p;
|
99
|
+
}
|
100
|
+
|
101
|
+
// // bi-gram extraction example
|
102
|
+
// #include <iostream>
|
103
|
+
// using namespace std;
|
104
|
+
//
|
105
|
+
// int main(int argc, char *argv[])
|
106
|
+
// {
|
107
|
+
// const char *p = "大きなノッポの古時計";
|
108
|
+
// while(*p != '\0'){
|
109
|
+
// char *sub = utf8substr(p, 2);
|
110
|
+
// cout << sub << " ";
|
111
|
+
// delete[] sub;
|
112
|
+
// p = utf8nextchar(p);
|
113
|
+
// }
|
114
|
+
// cout << endl;
|
115
|
+
// return 0;
|
116
|
+
// }
|
117
|
+
#endif /* UTF8_UTILITY_HPP */
|
@@ -0,0 +1,114 @@
|
|
1
|
+
/*
|
2
|
+
* WRITTEN BY Masahiko Higashiyama in 2010.
|
3
|
+
*
|
4
|
+
* THIS CODE IS IN PUBLIC DOMAIN.
|
5
|
+
* THIS SOFTWARE IS COMPLETELY FREE TO COPY, MODIFY AND/OR RE-DISTRIBUTE.
|
6
|
+
*
|
7
|
+
* THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
8
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
9
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
10
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
11
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
12
|
+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
13
|
+
* SOFTWARE.
|
14
|
+
*/
|
15
|
+
|
16
|
+
#include <fstream>
|
17
|
+
#include <string>
|
18
|
+
#include "../include/double_array.hpp"
|
19
|
+
#include "profile.hpp"
|
20
|
+
using namespace std;
|
21
|
+
|
22
|
+
|
23
|
+
void print(const DoubleArray &da, const char *str){
|
24
|
+
vector<int> v1;
|
25
|
+
vector<int> v2;
|
26
|
+
cout << "searching " << str << endl;
|
27
|
+
da.common_prefix_search(str,v1,v2);
|
28
|
+
if(v1.size() == 0) cout << "Not Found" << endl;
|
29
|
+
for(size_t i = 0; i < v1.size(); i++)
|
30
|
+
cout << v1[i] << " " << v2[i] << endl;
|
31
|
+
}
|
32
|
+
|
33
|
+
void read(vector<string> &v){
|
34
|
+
string line;
|
35
|
+
while(getline(cin,line)){
|
36
|
+
v.push_back(line);
|
37
|
+
}
|
38
|
+
stable_sort(v.begin(),v.end());
|
39
|
+
}
|
40
|
+
|
41
|
+
void build(DoubleArray &da, const vector<string> &v){
|
42
|
+
double t1,t2;
|
43
|
+
t1 = GetusageSec();
|
44
|
+
for(size_t i = 0; i < v.size(); i++){
|
45
|
+
da.insert(v[i].c_str());
|
46
|
+
//cout << i << " " << v[i] << endl;
|
47
|
+
}
|
48
|
+
t2 = GetusageSec();
|
49
|
+
PrintTime(t1,t2);
|
50
|
+
}
|
51
|
+
|
52
|
+
|
53
|
+
void main2(int argc, char *argv[], DoubleArray &da){
|
54
|
+
|
55
|
+
if(argc < 1 || argv[1] == NULL){
|
56
|
+
print(da,"bisons");
|
57
|
+
da.erase("bisons");
|
58
|
+
cout << "erase bisons" << endl;
|
59
|
+
da.erase("bison");
|
60
|
+
da.erase("bison");
|
61
|
+
cout << "erase bison" << endl;
|
62
|
+
print(da,"bisons");
|
63
|
+
print(da,"bison");
|
64
|
+
da.insert("bisons");
|
65
|
+
print(da,"bisons");
|
66
|
+
print(da,"bison");
|
67
|
+
da.insert("bison");
|
68
|
+
print(da,"bison");
|
69
|
+
da.erase("ARPANET");
|
70
|
+
cout << "erase ARPANET" << endl;
|
71
|
+
print(da,"ARPA");
|
72
|
+
print(da,"ARPANET");
|
73
|
+
}else{
|
74
|
+
print(da,argv[1]);
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
void main3(int argc, char *argv[], DoubleArray &da, vector<string> &v){
|
79
|
+
double t1,t2;
|
80
|
+
t1 = GetusageSec();
|
81
|
+
for(size_t i = 0; i < v.size(); i++){
|
82
|
+
int a;
|
83
|
+
if((a = da.exact_match(v[i].c_str())) == -1){
|
84
|
+
cout << "error " << v[i] << " " << a << endl;
|
85
|
+
}else{
|
86
|
+
//cout << v[i] << " " << a << endl;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
t2 = GetusageSec();
|
90
|
+
PrintTime(t1,t2);
|
91
|
+
}
|
92
|
+
|
93
|
+
int main(int argc, char *argv[]){
|
94
|
+
|
95
|
+
vector<string> lines;
|
96
|
+
read(lines);
|
97
|
+
DoubleArray da;
|
98
|
+
build(da,lines);
|
99
|
+
/*
|
100
|
+
std::vector<std::pair<std::string, int> > result;
|
101
|
+
da.enumerate("", result);
|
102
|
+
for(size_t i = 0; i < result.size(); i++){
|
103
|
+
std::cout << result[i].first << " " << result[i].second << std::endl;
|
104
|
+
}
|
105
|
+
*/
|
106
|
+
std::vector<std::string> res;
|
107
|
+
da.extract_all_matched("zoo", res);
|
108
|
+
for(size_t i = 0; i < res.size(); i++){
|
109
|
+
std::cout << res[i] << std::endl;
|
110
|
+
}
|
111
|
+
|
112
|
+
|
113
|
+
return 0;
|
114
|
+
}
|