ruby_da 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.gitmodules +6 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +38 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/ext/ruby_da/RubyDaService.java +153 -0
- data/ext/ruby_da/extconf.rb +9 -0
- data/ext/ruby_da/ruby_da.cpp +171 -0
- data/ext/ruby_da/ruby_da.h +6 -0
- data/lib/ruby_da/version.rb +3 -0
- data/lib/ruby_da.rb +12 -0
- data/libda/README.md +1 -0
- data/libda/include/double_array.hpp +138 -0
- data/libda/include/double_array_internal.hpp +358 -0
- data/libda/include/utf8.hpp +117 -0
- data/libda/src/double_array.cc +114 -0
- data/libda/src/double_array_static.cc +228 -0
- data/libda/src/profile.hpp +38 -0
- data/ruby_da.gemspec +36 -0
- metadata +124 -0
@@ -0,0 +1,358 @@
|
|
1
|
+
// Copyright (C) 2015 Masahiko Higashiyama
|
2
|
+
//
|
3
|
+
// Permission is hereby granted, free of charge, to any person obtaining a
|
4
|
+
// copy of this software and associated documentation files (the "Software"),
|
5
|
+
// to deal in the Software without restriction, including without limitation
|
6
|
+
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
7
|
+
// and/or sell copies of the Software, and to permit persons to whom the
|
8
|
+
// Software is furnished to do so, subject to the following conditions:
|
9
|
+
//
|
10
|
+
// The above copyright notice and this permission notice shall be included in
|
11
|
+
// all copies or substantial portions of the Software.
|
12
|
+
//
|
13
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
// SOFTWARE.
|
20
|
+
#ifndef _DOUBLE_ARRAY_INTERNAL_H_
|
21
|
+
#define _DOUBLE_ARRAY_INTERNAL_H_
|
22
|
+
#include <vector>
|
23
|
+
#include <utility>
|
24
|
+
#include <algorithm>
|
25
|
+
#include <stdexcept>
|
26
|
+
#include <iostream>
|
27
|
+
#include <fstream>
|
28
|
+
|
29
|
+
|
30
|
+
class DoubleArrayInternal {
|
31
|
+
struct node {
|
32
|
+
int base;
|
33
|
+
int check;
|
34
|
+
};
|
35
|
+
|
36
|
+
int& empty_head() {
|
37
|
+
return array_[0].check;
|
38
|
+
}
|
39
|
+
|
40
|
+
int& entry_num() {
|
41
|
+
return array_[0].base;
|
42
|
+
}
|
43
|
+
|
44
|
+
void set_check(int pos, int base, bool overwrite = false){
|
45
|
+
if(overwrite && array_[pos].check > 0){
|
46
|
+
array_[pos].check = base;
|
47
|
+
}else if(pos == empty_head()){
|
48
|
+
empty_head() = -array_[pos].check;
|
49
|
+
array_[pos].check = base;
|
50
|
+
}else{
|
51
|
+
int i = empty_head();
|
52
|
+
int n = array_.size();
|
53
|
+
while(i < n){
|
54
|
+
if(pos == -array_[i].check) break;
|
55
|
+
i = -array_[i].check;
|
56
|
+
}
|
57
|
+
if(i >= n) throw std::runtime_error("failed set check");
|
58
|
+
array_[i].check = array_[pos].check;
|
59
|
+
array_[pos].check = base;
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
void delete_check(int pos){
|
64
|
+
if(pos < empty_head()){
|
65
|
+
array_[pos].check = -empty_head();
|
66
|
+
empty_head() = pos;
|
67
|
+
}else{
|
68
|
+
int i = empty_head();
|
69
|
+
int n = array_.size();
|
70
|
+
while(i < n){
|
71
|
+
if(i < pos && pos < -array_[i].check) break;
|
72
|
+
i = -array_[i].check;
|
73
|
+
}
|
74
|
+
if(i >= n) throw std::runtime_error("failed delete check");
|
75
|
+
array_[pos].check = array_[i].check;
|
76
|
+
array_[i].check = -pos;
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
void expand(size_t pos){
|
81
|
+
if(pos < array_.size()) return;
|
82
|
+
size_t i = array_.size();
|
83
|
+
size_t n = array_.capacity();
|
84
|
+
while(pos > n) n <<= 1;
|
85
|
+
array_.reserve(n);
|
86
|
+
array_.resize(pos+1);
|
87
|
+
for(; i <= pos; i++){
|
88
|
+
array_[i].check = -(i+1);
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
std::pair<int,const char *> fetch(const char *str) const {
|
93
|
+
const char *p = str;
|
94
|
+
int state = 1;
|
95
|
+
while(1){
|
96
|
+
unsigned char c = *p;
|
97
|
+
int t = array_[state].base + c;
|
98
|
+
//printf("%d %d %d %c %d\n",
|
99
|
+
//state, t, array_[state].base, c, array_[t].check);
|
100
|
+
if(t < static_cast<int>(array_.size()) && array_[t].check == state){
|
101
|
+
if(c == 0) return std::make_pair(state, p);
|
102
|
+
state = t;
|
103
|
+
++p;
|
104
|
+
}else{
|
105
|
+
return std::make_pair(-state, p);
|
106
|
+
}
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
void get_labels(int index, int base, std::vector<unsigned char> &labels) const {
|
111
|
+
size_t maxlen = std::min(static_cast<int>(array_.size()), base + 256);
|
112
|
+
for(size_t i = base; i < maxlen; i++){
|
113
|
+
if(array_[i].check == index){
|
114
|
+
labels.push_back(i - base);
|
115
|
+
}
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
int find_base(const std::vector<unsigned char> &codes, unsigned char c){
|
120
|
+
int base_cand;
|
121
|
+
int empty_index = empty_head();
|
122
|
+
while(1){
|
123
|
+
expand(empty_index);
|
124
|
+
base_cand = empty_index - c;
|
125
|
+
if(base_cand <= 1){
|
126
|
+
empty_index = -array_[empty_index].check;
|
127
|
+
continue;
|
128
|
+
}
|
129
|
+
bool found = true;
|
130
|
+
for(size_t i = 0; i < codes.size(); i++){
|
131
|
+
expand(base_cand + codes[i]);
|
132
|
+
if(array_[base_cand + codes[i]].check > 0){
|
133
|
+
found = false;
|
134
|
+
break;
|
135
|
+
}
|
136
|
+
}
|
137
|
+
if(found) break;
|
138
|
+
empty_index = -array_[empty_index].check;
|
139
|
+
}
|
140
|
+
return base_cand;
|
141
|
+
}
|
142
|
+
|
143
|
+
void move_to(int from, int from_base, int to){
|
144
|
+
//printf("copy base[%d] = from %d\n", to, from_base);
|
145
|
+
array_[to].base = from_base;
|
146
|
+
if(from_base > 0){
|
147
|
+
std::vector<unsigned char> trans;
|
148
|
+
get_labels(from, from_base, trans);
|
149
|
+
for(size_t j = 0; j < trans.size(); j++){
|
150
|
+
//printf("move from check[%d](%c) = %d\n",
|
151
|
+
//from_base + trans[j], trans[j], to);
|
152
|
+
set_check(from_base + trans[j], to, true);
|
153
|
+
}
|
154
|
+
}
|
155
|
+
//printf("init from address %d %d %d\n",
|
156
|
+
//from, array_[from].base, from_base);
|
157
|
+
array_[from].base = 0;
|
158
|
+
delete_check(from);
|
159
|
+
}
|
160
|
+
|
161
|
+
void _insert(const char *str, int base, int id){
|
162
|
+
int pos = array_[base].base + static_cast<unsigned char>(*str);
|
163
|
+
expand(std::max(base,pos));
|
164
|
+
if(array_[base].base == 0 || array_[pos].check >= 0){ //conflict
|
165
|
+
int oldbase = array_[base].base;
|
166
|
+
std::vector<unsigned char> codes;
|
167
|
+
if(oldbase > 0) get_labels(base, oldbase, codes);
|
168
|
+
int base_cand = find_base(codes, *str);
|
169
|
+
//printf("set base base[%d] = %d\n", base, base_cand);
|
170
|
+
array_[base].base = base_cand;
|
171
|
+
std::vector<int> from,from_base;
|
172
|
+
for(size_t i = 0; i < codes.size(); i++){
|
173
|
+
int old_t = oldbase + codes[i];
|
174
|
+
from.push_back(old_t);
|
175
|
+
from_base.push_back(array_[old_t].base);
|
176
|
+
//printf("move check check[%d](%c) = %d\n",
|
177
|
+
//base_cand + codes[i], codes[i], base);
|
178
|
+
set_check(base_cand + codes[i], base);
|
179
|
+
}
|
180
|
+
for(size_t i = 0; i < from.size(); i++){
|
181
|
+
move_to(from[i], from_base[i], base_cand + codes[i]);
|
182
|
+
}
|
183
|
+
pos = base_cand + static_cast<unsigned char>(*str);
|
184
|
+
}
|
185
|
+
//printf("set check check[%d](%c) = %d\n", pos, *str, base);
|
186
|
+
set_check(pos, base);
|
187
|
+
if(*str != '\0'){
|
188
|
+
_insert(str+1, pos, id);
|
189
|
+
}else{
|
190
|
+
if(id < 1){
|
191
|
+
array_[pos].base = -(entry_num() + 1);
|
192
|
+
}else{
|
193
|
+
array_[pos].base = -id;
|
194
|
+
}
|
195
|
+
entry_num() += 1;
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
199
|
+
void _erase(const char *str, int index, const char *p){
|
200
|
+
int newbase = array_[index].check;
|
201
|
+
delete_check(array_[index].base+*p);
|
202
|
+
std::vector<unsigned char> labels;
|
203
|
+
get_labels(index, array_[index].base, labels);
|
204
|
+
if(labels.size() == 0 && str != p)
|
205
|
+
_erase(str, newbase, --p);
|
206
|
+
}
|
207
|
+
|
208
|
+
/* For DEBUG */
|
209
|
+
void print_array() const {
|
210
|
+
std::cout << "[";
|
211
|
+
for(size_t i = 0; i < array_.size(); i++){
|
212
|
+
std::cout << "element " << i << ":" << array_[i].base << ":" << array_[i].check << std::endl;
|
213
|
+
}
|
214
|
+
std::cout << "]" << std::endl;
|
215
|
+
}
|
216
|
+
|
217
|
+
void _enumerate(int current_idx,
|
218
|
+
std::vector<unsigned char> &path,
|
219
|
+
std::vector<std::pair<std::string, int> > &result) const {
|
220
|
+
std::vector<unsigned char> labels;
|
221
|
+
get_labels(current_idx, array_[current_idx].base, labels);
|
222
|
+
for(std::vector<unsigned char>::iterator itr = labels.begin(); itr != labels.end(); ++itr){
|
223
|
+
int new_idx = array_[current_idx].base + *itr;
|
224
|
+
if(*itr == '\0'){
|
225
|
+
std::string res;
|
226
|
+
res.reserve(path.size());
|
227
|
+
for(std::vector<unsigned char>::iterator jtr = path.begin(); jtr != path.end(); ++jtr){
|
228
|
+
res += static_cast<char>(*jtr);
|
229
|
+
}
|
230
|
+
result.push_back(std::make_pair(res, -array_[new_idx].base));
|
231
|
+
}else{
|
232
|
+
path.push_back(*itr);
|
233
|
+
_enumerate(new_idx, path, result);
|
234
|
+
path.pop_back();
|
235
|
+
}
|
236
|
+
}
|
237
|
+
}
|
238
|
+
|
239
|
+
public:
|
240
|
+
|
241
|
+
int exact_match(const char *str) const {
|
242
|
+
std::pair<int, const char*> state = fetch(str);
|
243
|
+
if(state.first > 0){
|
244
|
+
int t = array_[state.first].base + static_cast<unsigned char>(*state.second);
|
245
|
+
return -array_[t].base;
|
246
|
+
}
|
247
|
+
return -1;
|
248
|
+
}
|
249
|
+
|
250
|
+
void enumerate(const char *str, std::vector<std::pair<std::string, int> > &result) const {
|
251
|
+
int index = 1;
|
252
|
+
if(str[0] != '\0'){
|
253
|
+
std::pair<int, const char*> state = fetch(str);
|
254
|
+
if(state.first > 0){
|
255
|
+
index = state.first;
|
256
|
+
}else{
|
257
|
+
index = -state.first;
|
258
|
+
}
|
259
|
+
}
|
260
|
+
std::vector<unsigned char> path;
|
261
|
+
for(const char *p = str; *p != '\0'; p++){
|
262
|
+
path.push_back(static_cast<unsigned int>(*p));
|
263
|
+
}
|
264
|
+
_enumerate(index, path, result);
|
265
|
+
}
|
266
|
+
|
267
|
+
void common_prefix_search(const char *str,
|
268
|
+
std::vector<int> &res_len,
|
269
|
+
std::vector<int> &res_id) const {
|
270
|
+
const char *p = str;
|
271
|
+
int state = 1;
|
272
|
+
while(1){
|
273
|
+
int t = array_[state].base;
|
274
|
+
if(state != 1 && t < static_cast<int>(array_.size()) && array_[t].check == state){
|
275
|
+
res_len.push_back(std::distance(str,p));
|
276
|
+
res_id.push_back(-array_[t].base);
|
277
|
+
}
|
278
|
+
unsigned char c = *p;
|
279
|
+
if(t+c < static_cast<int>(array_.size()) && array_[t+c].check == state){
|
280
|
+
if(c == 0) return;
|
281
|
+
state = t+c;
|
282
|
+
++p;
|
283
|
+
}else{
|
284
|
+
return;
|
285
|
+
}
|
286
|
+
}
|
287
|
+
}
|
288
|
+
|
289
|
+
bool insert(const char *str, int id = -1){
|
290
|
+
std::pair<int,const char*> state = fetch(str);
|
291
|
+
if(state.first > 0){
|
292
|
+
//printf("insert failed %s, Found.", str);
|
293
|
+
return false;
|
294
|
+
}
|
295
|
+
//printf("insert %s", str);
|
296
|
+
_insert(state.second, -state.first, id);
|
297
|
+
return true;
|
298
|
+
}
|
299
|
+
|
300
|
+
|
301
|
+
bool erase(const char *str){
|
302
|
+
std::pair<int,const char*> state = fetch(str);
|
303
|
+
if(state.first < 0){
|
304
|
+
//printf("erase failed %s, Not Found.", str);
|
305
|
+
return false;
|
306
|
+
}
|
307
|
+
//printf("erase %s", str);
|
308
|
+
_erase(str, state.first, state.second);
|
309
|
+
|
310
|
+
return true;
|
311
|
+
}
|
312
|
+
|
313
|
+
DoubleArrayInternal() : array_(2) {
|
314
|
+
entry_num() = 0;
|
315
|
+
empty_head() = 1;
|
316
|
+
array_[1].check = -2;
|
317
|
+
expand(8192);
|
318
|
+
}
|
319
|
+
|
320
|
+
bool save(const char *filename) const {
|
321
|
+
std::ofstream ofs(filename, std::ios::binary);
|
322
|
+
if(!ofs){
|
323
|
+
return false;
|
324
|
+
}
|
325
|
+
return save(ofs);
|
326
|
+
}
|
327
|
+
|
328
|
+
bool save(std::ostream &os) const {
|
329
|
+
os.write(reinterpret_cast<const char *>(&array_[0]),
|
330
|
+
array_.size()*sizeof(node));
|
331
|
+
if(os.fail()) return false;
|
332
|
+
return true;
|
333
|
+
}
|
334
|
+
|
335
|
+
bool load(const char *filename) {
|
336
|
+
std::ifstream ifs(filename, std::ios::binary);
|
337
|
+
if(!ifs){
|
338
|
+
return false;
|
339
|
+
}
|
340
|
+
return load(ifs);
|
341
|
+
}
|
342
|
+
|
343
|
+
bool load(std::istream &is){
|
344
|
+
size_t siz_ = is.seekg(0,std::ios::end).tellg();
|
345
|
+
if(is.fail()) return false;
|
346
|
+
array_.resize(siz_ / sizeof(node));
|
347
|
+
is.seekg(0, std::ios::beg);
|
348
|
+
if(is.fail()) return false;
|
349
|
+
is.read(reinterpret_cast<char *>(&array_[0]), array_.size()*sizeof(node));
|
350
|
+
if(is.fail() && !is.eof()) return false;
|
351
|
+
return true;
|
352
|
+
}
|
353
|
+
|
354
|
+
private:
|
355
|
+
std::vector<node> array_;
|
356
|
+
};
|
357
|
+
|
358
|
+
#endif /* _DOUBLE_ARRAY_INTERNAL_H_ */
|
@@ -0,0 +1,117 @@
|
|
1
|
+
// Copyright (C) 2015 Masahiko Higashiyama
|
2
|
+
//
|
3
|
+
// Permission is hereby granted, free of charge, to any person obtaining a
|
4
|
+
// copy of this software and associated documentation files (the "Software"),
|
5
|
+
// to deal in the Software without restriction, including without limitation
|
6
|
+
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
7
|
+
// and/or sell copies of the Software, and to permit persons to whom the
|
8
|
+
// Software is furnished to do so, subject to the following conditions:
|
9
|
+
//
|
10
|
+
// The above copyright notice and this permission notice shall be included in
|
11
|
+
// all copies or substantial portions of the Software.
|
12
|
+
//
|
13
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
// SOFTWARE.
|
20
|
+
|
21
|
+
#ifndef UTF8_UTILITY_HPP
|
22
|
+
#define UTF8_UTILITY_HPP
|
23
|
+
|
24
|
+
#include <cstring>
|
25
|
+
#include <exception>
|
26
|
+
#include <vector>
|
27
|
+
|
28
|
+
class UTF8Exception : public std::exception {
|
29
|
+
public:
|
30
|
+
UTF8Exception() {}
|
31
|
+
virtual ~UTF8Exception() throw() {}
|
32
|
+
};
|
33
|
+
|
34
|
+
int utf8charlen(const unsigned char c)
|
35
|
+
{
|
36
|
+
if(c == 0x00) return 0;
|
37
|
+
if(c < 0x80) return 1;
|
38
|
+
if(c < 0xC2) throw UTF8Exception();
|
39
|
+
if(c < 0xE0) return 2;
|
40
|
+
if(c < 0xF0) return 3;
|
41
|
+
if(c < 0xF8) return 4;
|
42
|
+
if(c < 0xFC) return 5;
|
43
|
+
if(c < 0xFE) return 6;
|
44
|
+
return 1;
|
45
|
+
}
|
46
|
+
|
47
|
+
char *utf8substr(const char *s, int len){
|
48
|
+
int n = 0, size = 0;
|
49
|
+
const char *p = s;
|
50
|
+
int l;
|
51
|
+
while((l = utf8charlen(*p)) && n != len){
|
52
|
+
p += l;
|
53
|
+
size += l;
|
54
|
+
n++;
|
55
|
+
}
|
56
|
+
if(l == 0) size++;
|
57
|
+
char *str = new char[size + 1];
|
58
|
+
strncpy(str, s, size);
|
59
|
+
str[size] = '\0';
|
60
|
+
|
61
|
+
return str;
|
62
|
+
}
|
63
|
+
|
64
|
+
int utf8len(const char *s, int len){
|
65
|
+
const char *p = s;
|
66
|
+
int n = 0, nc = 0;
|
67
|
+
while(*p != '\0' && n < len){
|
68
|
+
int k = utf8charlen(*p);
|
69
|
+
n += k;
|
70
|
+
nc++;
|
71
|
+
p += k;
|
72
|
+
}
|
73
|
+
if(n > len) return -1;
|
74
|
+
return nc;
|
75
|
+
}
|
76
|
+
|
77
|
+
const char *utf8nextchar(const char *s){
|
78
|
+
return s + utf8charlen(*s);
|
79
|
+
}
|
80
|
+
|
81
|
+
std::vector<const char *> utf8index(const char *s){
|
82
|
+
std::vector<const char *> v;
|
83
|
+
const char *p = s;
|
84
|
+
while(*p != '\0'){
|
85
|
+
v.push_back(p);
|
86
|
+
p = utf8nextchar(p);
|
87
|
+
}
|
88
|
+
return v;
|
89
|
+
}
|
90
|
+
|
91
|
+
const char *utf8advance(const char *s, unsigned int len){
|
92
|
+
size_t l = 0;
|
93
|
+
const char *p = s;
|
94
|
+
while(*p != '\0' && l < len ){
|
95
|
+
p = utf8nextchar(p);
|
96
|
+
l++;
|
97
|
+
}
|
98
|
+
return p;
|
99
|
+
}
|
100
|
+
|
101
|
+
// // bi-gram extraction example
|
102
|
+
// #include <iostream>
|
103
|
+
// using namespace std;
|
104
|
+
//
|
105
|
+
// int main(int argc, char *argv[])
|
106
|
+
// {
|
107
|
+
// const char *p = "大きなノッポの古時計";
|
108
|
+
// while(*p != '\0'){
|
109
|
+
// char *sub = utf8substr(p, 2);
|
110
|
+
// cout << sub << " ";
|
111
|
+
// delete[] sub;
|
112
|
+
// p = utf8nextchar(p);
|
113
|
+
// }
|
114
|
+
// cout << endl;
|
115
|
+
// return 0;
|
116
|
+
// }
|
117
|
+
#endif /* UTF8_UTILITY_HPP */
|
@@ -0,0 +1,114 @@
|
|
1
|
+
/*
|
2
|
+
* WRITTEN BY Masahiko Higashiyama in 2010.
|
3
|
+
*
|
4
|
+
* THIS CODE IS IN PUBLIC DOMAIN.
|
5
|
+
* THIS SOFTWARE IS COMPLETELY FREE TO COPY, MODIFY AND/OR RE-DISTRIBUTE.
|
6
|
+
*
|
7
|
+
* THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
8
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
9
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
10
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
11
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
12
|
+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
13
|
+
* SOFTWARE.
|
14
|
+
*/
|
15
|
+
|
16
|
+
#include <fstream>
|
17
|
+
#include <string>
|
18
|
+
#include "../include/double_array.hpp"
|
19
|
+
#include "profile.hpp"
|
20
|
+
using namespace std;
|
21
|
+
|
22
|
+
|
23
|
+
void print(const DoubleArray &da, const char *str){
|
24
|
+
vector<int> v1;
|
25
|
+
vector<int> v2;
|
26
|
+
cout << "searching " << str << endl;
|
27
|
+
da.common_prefix_search(str,v1,v2);
|
28
|
+
if(v1.size() == 0) cout << "Not Found" << endl;
|
29
|
+
for(size_t i = 0; i < v1.size(); i++)
|
30
|
+
cout << v1[i] << " " << v2[i] << endl;
|
31
|
+
}
|
32
|
+
|
33
|
+
void read(vector<string> &v){
|
34
|
+
string line;
|
35
|
+
while(getline(cin,line)){
|
36
|
+
v.push_back(line);
|
37
|
+
}
|
38
|
+
stable_sort(v.begin(),v.end());
|
39
|
+
}
|
40
|
+
|
41
|
+
void build(DoubleArray &da, const vector<string> &v){
|
42
|
+
double t1,t2;
|
43
|
+
t1 = GetusageSec();
|
44
|
+
for(size_t i = 0; i < v.size(); i++){
|
45
|
+
da.insert(v[i].c_str());
|
46
|
+
//cout << i << " " << v[i] << endl;
|
47
|
+
}
|
48
|
+
t2 = GetusageSec();
|
49
|
+
PrintTime(t1,t2);
|
50
|
+
}
|
51
|
+
|
52
|
+
|
53
|
+
void main2(int argc, char *argv[], DoubleArray &da){
|
54
|
+
|
55
|
+
if(argc < 1 || argv[1] == NULL){
|
56
|
+
print(da,"bisons");
|
57
|
+
da.erase("bisons");
|
58
|
+
cout << "erase bisons" << endl;
|
59
|
+
da.erase("bison");
|
60
|
+
da.erase("bison");
|
61
|
+
cout << "erase bison" << endl;
|
62
|
+
print(da,"bisons");
|
63
|
+
print(da,"bison");
|
64
|
+
da.insert("bisons");
|
65
|
+
print(da,"bisons");
|
66
|
+
print(da,"bison");
|
67
|
+
da.insert("bison");
|
68
|
+
print(da,"bison");
|
69
|
+
da.erase("ARPANET");
|
70
|
+
cout << "erase ARPANET" << endl;
|
71
|
+
print(da,"ARPA");
|
72
|
+
print(da,"ARPANET");
|
73
|
+
}else{
|
74
|
+
print(da,argv[1]);
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
void main3(int argc, char *argv[], DoubleArray &da, vector<string> &v){
|
79
|
+
double t1,t2;
|
80
|
+
t1 = GetusageSec();
|
81
|
+
for(size_t i = 0; i < v.size(); i++){
|
82
|
+
int a;
|
83
|
+
if((a = da.exact_match(v[i].c_str())) == -1){
|
84
|
+
cout << "error " << v[i] << " " << a << endl;
|
85
|
+
}else{
|
86
|
+
//cout << v[i] << " " << a << endl;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
t2 = GetusageSec();
|
90
|
+
PrintTime(t1,t2);
|
91
|
+
}
|
92
|
+
|
93
|
+
int main(int argc, char *argv[]){
|
94
|
+
|
95
|
+
vector<string> lines;
|
96
|
+
read(lines);
|
97
|
+
DoubleArray da;
|
98
|
+
build(da,lines);
|
99
|
+
/*
|
100
|
+
std::vector<std::pair<std::string, int> > result;
|
101
|
+
da.enumerate("", result);
|
102
|
+
for(size_t i = 0; i < result.size(); i++){
|
103
|
+
std::cout << result[i].first << " " << result[i].second << std::endl;
|
104
|
+
}
|
105
|
+
*/
|
106
|
+
std::vector<std::string> res;
|
107
|
+
da.extract_all_matched("zoo", res);
|
108
|
+
for(size_t i = 0; i < res.size(); i++){
|
109
|
+
std::cout << res[i] << std::endl;
|
110
|
+
}
|
111
|
+
|
112
|
+
|
113
|
+
return 0;
|
114
|
+
}
|