chipper 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +51 -0
- data/ext/extconf.rb +58 -0
- data/ext/libstemmer_c/Makefile +10 -0
- data/ext/libstemmer_c/examples/stemwords.c +209 -0
- data/ext/libstemmer_c/include/libstemmer.h +79 -0
- data/ext/libstemmer_c/libstemmer/libstemmer.c +95 -0
- data/ext/libstemmer_c/libstemmer/libstemmer_utf8.c +95 -0
- data/ext/libstemmer_c/libstemmer/modules.h +190 -0
- data/ext/libstemmer_c/libstemmer/modules_utf8.h +121 -0
- data/ext/libstemmer_c/mkinc.mak +82 -0
- data/ext/libstemmer_c/mkinc_utf8.mak +52 -0
- data/ext/libstemmer_c/runtime/api.c +66 -0
- data/ext/libstemmer_c/runtime/api.h +26 -0
- data/ext/libstemmer_c/runtime/header.h +58 -0
- data/ext/libstemmer_c/runtime/utilities.c +478 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.c +337 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c +624 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.c +1117 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c +762 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.c +1246 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.c +521 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.c +1065 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.c +749 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c +1017 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c +307 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c +998 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.c +700 -0
- data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_danish.c +339 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_danish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.c +634 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_english.c +1125 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_english.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.c +768 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_french.c +1256 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_french.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_german.c +527 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_german.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.c +1234 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_italian.c +1073 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_italian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.c +299 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_porter.c +755 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_porter.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.c +1023 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.c +1004 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_russian.c +694 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_russian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.c +1097 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.c +309 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.c +2205 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.h +16 -0
- data/ext/re2/bitstate.cc +378 -0
- data/ext/re2/compile.cc +1138 -0
- data/ext/re2/dfa.cc +2086 -0
- data/ext/re2/filtered_re2.cc +100 -0
- data/ext/re2/filtered_re2.h +99 -0
- data/ext/re2/hash.cc +231 -0
- data/ext/re2/mimics_pcre.cc +185 -0
- data/ext/re2/nfa.cc +709 -0
- data/ext/re2/onepass.cc +614 -0
- data/ext/re2/parse.cc +2202 -0
- data/ext/re2/perl_groups.cc +119 -0
- data/ext/re2/prefilter.cc +671 -0
- data/ext/re2/prefilter.h +105 -0
- data/ext/re2/prefilter_tree.cc +398 -0
- data/ext/re2/prefilter_tree.h +130 -0
- data/ext/re2/prog.cc +341 -0
- data/ext/re2/prog.h +376 -0
- data/ext/re2/re2.cc +1180 -0
- data/ext/re2/re2.h +837 -0
- data/ext/re2/regexp.cc +920 -0
- data/ext/re2/regexp.h +632 -0
- data/ext/re2/rune.cc +258 -0
- data/ext/re2/set.cc +113 -0
- data/ext/re2/set.h +55 -0
- data/ext/re2/simplify.cc +393 -0
- data/ext/re2/stringpiece.cc +87 -0
- data/ext/re2/stringpiece.h +182 -0
- data/ext/re2/tostring.cc +341 -0
- data/ext/re2/unicode_casefold.cc +469 -0
- data/ext/re2/unicode_casefold.h +75 -0
- data/ext/re2/unicode_groups.cc +4851 -0
- data/ext/re2/unicode_groups.h +64 -0
- data/ext/re2/valgrind.cc +24 -0
- data/ext/re2/variadic_function.h +346 -0
- data/ext/re2/walker-inl.h +244 -0
- data/ext/src/chipper.cc +626 -0
- data/ext/src/version.h +1 -0
- data/ext/stemmer.rb +40 -0
- data/ext/util/arena.h +103 -0
- data/ext/util/atomicops.h +79 -0
- data/ext/util/benchmark.h +41 -0
- data/ext/util/flags.h +27 -0
- data/ext/util/logging.h +78 -0
- data/ext/util/mutex.h +190 -0
- data/ext/util/pcre.h +679 -0
- data/ext/util/random.h +29 -0
- data/ext/util/sparse_array.h +451 -0
- data/ext/util/sparse_set.h +177 -0
- data/ext/util/test.h +57 -0
- data/ext/util/thread.h +26 -0
- data/ext/util/utf.h +43 -0
- data/ext/util/util.h +127 -0
- data/ext/util/valgrind.h +4517 -0
- data/test/helper.rb +5 -0
- data/test/test_entities.rb +57 -0
- data/test/test_tokens.rb +118 -0
- metadata +199 -0
data/ext/re2/prog.cc
ADDED
@@ -0,0 +1,341 @@
|
|
1
|
+
// Copyright 2007 The RE2 Authors. All Rights Reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style
|
3
|
+
// license that can be found in the LICENSE file.
|
4
|
+
|
5
|
+
// Compiled regular expression representation.
|
6
|
+
// Tested by compile_test.cc
|
7
|
+
|
8
|
+
#include "util/util.h"
|
9
|
+
#include "util/sparse_set.h"
|
10
|
+
#include "re2/prog.h"
|
11
|
+
#include "re2/stringpiece.h"
|
12
|
+
|
13
|
+
namespace re2 {
|
14
|
+
|
15
|
+
// Constructors per Inst opcode
|
16
|
+
|
17
|
+
void Prog::Inst::InitAlt(uint32 out, uint32 out1) {
|
18
|
+
DCHECK_EQ(out_opcode_, 0);
|
19
|
+
set_out_opcode(out, kInstAlt);
|
20
|
+
out1_ = out1;
|
21
|
+
}
|
22
|
+
|
23
|
+
void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32 out) {
|
24
|
+
DCHECK_EQ(out_opcode_, 0);
|
25
|
+
set_out_opcode(out, kInstByteRange);
|
26
|
+
lo_ = lo & 0xFF;
|
27
|
+
hi_ = hi & 0xFF;
|
28
|
+
foldcase_ = foldcase;
|
29
|
+
}
|
30
|
+
|
31
|
+
void Prog::Inst::InitCapture(int cap, uint32 out) {
|
32
|
+
DCHECK_EQ(out_opcode_, 0);
|
33
|
+
set_out_opcode(out, kInstCapture);
|
34
|
+
cap_ = cap;
|
35
|
+
}
|
36
|
+
|
37
|
+
void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32 out) {
|
38
|
+
DCHECK_EQ(out_opcode_, 0);
|
39
|
+
set_out_opcode(out, kInstEmptyWidth);
|
40
|
+
empty_ = empty;
|
41
|
+
}
|
42
|
+
|
43
|
+
void Prog::Inst::InitMatch(int32 id) {
|
44
|
+
DCHECK_EQ(out_opcode_, 0);
|
45
|
+
set_opcode(kInstMatch);
|
46
|
+
match_id_ = id;
|
47
|
+
}
|
48
|
+
|
49
|
+
void Prog::Inst::InitNop(uint32 out) {
|
50
|
+
DCHECK_EQ(out_opcode_, 0);
|
51
|
+
set_opcode(kInstNop);
|
52
|
+
}
|
53
|
+
|
54
|
+
void Prog::Inst::InitFail() {
|
55
|
+
DCHECK_EQ(out_opcode_, 0);
|
56
|
+
set_opcode(kInstFail);
|
57
|
+
}
|
58
|
+
|
59
|
+
string Prog::Inst::Dump() {
|
60
|
+
switch (opcode()) {
|
61
|
+
default:
|
62
|
+
return StringPrintf("opcode %d", static_cast<int>(opcode()));
|
63
|
+
|
64
|
+
case kInstAlt:
|
65
|
+
return StringPrintf("alt -> %d | %d", out(), out1_);
|
66
|
+
|
67
|
+
case kInstAltMatch:
|
68
|
+
return StringPrintf("altmatch -> %d | %d", out(), out1_);
|
69
|
+
|
70
|
+
case kInstByteRange:
|
71
|
+
return StringPrintf("byte%s [%02x-%02x] -> %d",
|
72
|
+
foldcase_ ? "/i" : "",
|
73
|
+
lo_, hi_, out());
|
74
|
+
|
75
|
+
case kInstCapture:
|
76
|
+
return StringPrintf("capture %d -> %d", cap_, out());
|
77
|
+
|
78
|
+
case kInstEmptyWidth:
|
79
|
+
return StringPrintf("emptywidth %#x -> %d",
|
80
|
+
static_cast<int>(empty_), out());
|
81
|
+
|
82
|
+
case kInstMatch:
|
83
|
+
return StringPrintf("match! %d", match_id());
|
84
|
+
|
85
|
+
case kInstNop:
|
86
|
+
return StringPrintf("nop -> %d", out());
|
87
|
+
|
88
|
+
case kInstFail:
|
89
|
+
return StringPrintf("fail");
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
Prog::Prog()
|
94
|
+
: anchor_start_(false),
|
95
|
+
anchor_end_(false),
|
96
|
+
reversed_(false),
|
97
|
+
did_onepass_(false),
|
98
|
+
start_(0),
|
99
|
+
start_unanchored_(0),
|
100
|
+
size_(0),
|
101
|
+
byte_inst_count_(0),
|
102
|
+
bytemap_range_(0),
|
103
|
+
flags_(0),
|
104
|
+
onepass_statesize_(0),
|
105
|
+
inst_(NULL),
|
106
|
+
dfa_first_(NULL),
|
107
|
+
dfa_longest_(NULL),
|
108
|
+
dfa_mem_(0),
|
109
|
+
delete_dfa_(NULL),
|
110
|
+
unbytemap_(NULL),
|
111
|
+
onepass_nodes_(NULL),
|
112
|
+
onepass_start_(NULL) {
|
113
|
+
}
|
114
|
+
|
115
|
+
Prog::~Prog() {
|
116
|
+
if (delete_dfa_) {
|
117
|
+
if (dfa_first_)
|
118
|
+
delete_dfa_(dfa_first_);
|
119
|
+
if (dfa_longest_)
|
120
|
+
delete_dfa_(dfa_longest_);
|
121
|
+
}
|
122
|
+
delete[] onepass_nodes_;
|
123
|
+
delete[] inst_;
|
124
|
+
delete[] unbytemap_;
|
125
|
+
}
|
126
|
+
|
127
|
+
typedef SparseSet Workq;
|
128
|
+
|
129
|
+
static inline void AddToQueue(Workq* q, int id) {
|
130
|
+
if (id != 0)
|
131
|
+
q->insert(id);
|
132
|
+
}
|
133
|
+
|
134
|
+
static string ProgToString(Prog* prog, Workq* q) {
|
135
|
+
string s;
|
136
|
+
|
137
|
+
for (Workq::iterator i = q->begin(); i != q->end(); ++i) {
|
138
|
+
int id = *i;
|
139
|
+
Prog::Inst* ip = prog->inst(id);
|
140
|
+
StringAppendF(&s, "%d. %s\n", id, ip->Dump().c_str());
|
141
|
+
AddToQueue(q, ip->out());
|
142
|
+
if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch)
|
143
|
+
AddToQueue(q, ip->out1());
|
144
|
+
}
|
145
|
+
return s;
|
146
|
+
}
|
147
|
+
|
148
|
+
string Prog::Dump() {
|
149
|
+
string map;
|
150
|
+
if (false) { // Debugging
|
151
|
+
int lo = 0;
|
152
|
+
StringAppendF(&map, "byte map:\n");
|
153
|
+
for (int i = 0; i < bytemap_range_; i++) {
|
154
|
+
StringAppendF(&map, "\t%d. [%02x-%02x]\n", i, lo, unbytemap_[i]);
|
155
|
+
lo = unbytemap_[i] + 1;
|
156
|
+
}
|
157
|
+
StringAppendF(&map, "\n");
|
158
|
+
}
|
159
|
+
|
160
|
+
Workq q(size_);
|
161
|
+
AddToQueue(&q, start_);
|
162
|
+
return map + ProgToString(this, &q);
|
163
|
+
}
|
164
|
+
|
165
|
+
string Prog::DumpUnanchored() {
|
166
|
+
Workq q(size_);
|
167
|
+
AddToQueue(&q, start_unanchored_);
|
168
|
+
return ProgToString(this, &q);
|
169
|
+
}
|
170
|
+
|
171
|
+
static bool IsMatch(Prog*, Prog::Inst*);
|
172
|
+
|
173
|
+
// Peep-hole optimizer.
|
174
|
+
void Prog::Optimize() {
|
175
|
+
Workq q(size_);
|
176
|
+
|
177
|
+
// Eliminate nops. Most are taken out during compilation
|
178
|
+
// but a few are hard to avoid.
|
179
|
+
q.clear();
|
180
|
+
AddToQueue(&q, start_);
|
181
|
+
for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
|
182
|
+
int id = *i;
|
183
|
+
|
184
|
+
Inst* ip = inst(id);
|
185
|
+
int j = ip->out();
|
186
|
+
Inst* jp;
|
187
|
+
while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
|
188
|
+
j = jp->out();
|
189
|
+
}
|
190
|
+
ip->set_out(j);
|
191
|
+
AddToQueue(&q, ip->out());
|
192
|
+
|
193
|
+
if (ip->opcode() == kInstAlt) {
|
194
|
+
j = ip->out1();
|
195
|
+
while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
|
196
|
+
j = jp->out();
|
197
|
+
}
|
198
|
+
ip->out1_ = j;
|
199
|
+
AddToQueue(&q, ip->out1());
|
200
|
+
}
|
201
|
+
}
|
202
|
+
|
203
|
+
// Insert kInstAltMatch instructions
|
204
|
+
// Look for
|
205
|
+
// ip: Alt -> j | k
|
206
|
+
// j: ByteRange [00-FF] -> ip
|
207
|
+
// k: Match
|
208
|
+
// or the reverse (the above is the greedy one).
|
209
|
+
// Rewrite Alt to AltMatch.
|
210
|
+
q.clear();
|
211
|
+
AddToQueue(&q, start_);
|
212
|
+
for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
|
213
|
+
int id = *i;
|
214
|
+
Inst* ip = inst(id);
|
215
|
+
AddToQueue(&q, ip->out());
|
216
|
+
if (ip->opcode() == kInstAlt)
|
217
|
+
AddToQueue(&q, ip->out1());
|
218
|
+
|
219
|
+
if (ip->opcode() == kInstAlt) {
|
220
|
+
Inst* j = inst(ip->out());
|
221
|
+
Inst* k = inst(ip->out1());
|
222
|
+
if (j->opcode() == kInstByteRange && j->out() == id &&
|
223
|
+
j->lo() == 0x00 && j->hi() == 0xFF &&
|
224
|
+
IsMatch(this, k)) {
|
225
|
+
ip->set_opcode(kInstAltMatch);
|
226
|
+
continue;
|
227
|
+
}
|
228
|
+
if (IsMatch(this, j) &&
|
229
|
+
k->opcode() == kInstByteRange && k->out() == id &&
|
230
|
+
k->lo() == 0x00 && k->hi() == 0xFF) {
|
231
|
+
ip->set_opcode(kInstAltMatch);
|
232
|
+
}
|
233
|
+
}
|
234
|
+
}
|
235
|
+
}
|
236
|
+
|
237
|
+
// Is ip a guaranteed match at end of text, perhaps after some capturing?
|
238
|
+
static bool IsMatch(Prog* prog, Prog::Inst* ip) {
|
239
|
+
for (;;) {
|
240
|
+
switch (ip->opcode()) {
|
241
|
+
default:
|
242
|
+
LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
|
243
|
+
return false;
|
244
|
+
|
245
|
+
case kInstAlt:
|
246
|
+
case kInstAltMatch:
|
247
|
+
case kInstByteRange:
|
248
|
+
case kInstFail:
|
249
|
+
case kInstEmptyWidth:
|
250
|
+
return false;
|
251
|
+
|
252
|
+
case kInstCapture:
|
253
|
+
case kInstNop:
|
254
|
+
ip = prog->inst(ip->out());
|
255
|
+
break;
|
256
|
+
|
257
|
+
case kInstMatch:
|
258
|
+
return true;
|
259
|
+
}
|
260
|
+
}
|
261
|
+
}
|
262
|
+
|
263
|
+
uint32 Prog::EmptyFlags(const StringPiece& text, const char* p) {
|
264
|
+
int flags = 0;
|
265
|
+
|
266
|
+
// ^ and \A
|
267
|
+
if (p == text.begin())
|
268
|
+
flags |= kEmptyBeginText | kEmptyBeginLine;
|
269
|
+
else if (p[-1] == '\n')
|
270
|
+
flags |= kEmptyBeginLine;
|
271
|
+
|
272
|
+
// $ and \z
|
273
|
+
if (p == text.end())
|
274
|
+
flags |= kEmptyEndText | kEmptyEndLine;
|
275
|
+
else if (p < text.end() && p[0] == '\n')
|
276
|
+
flags |= kEmptyEndLine;
|
277
|
+
|
278
|
+
// \b and \B
|
279
|
+
if (p == text.begin() && p == text.end()) {
|
280
|
+
// no word boundary here
|
281
|
+
} else if (p == text.begin()) {
|
282
|
+
if (IsWordChar(p[0]))
|
283
|
+
flags |= kEmptyWordBoundary;
|
284
|
+
} else if (p == text.end()) {
|
285
|
+
if (IsWordChar(p[-1]))
|
286
|
+
flags |= kEmptyWordBoundary;
|
287
|
+
} else {
|
288
|
+
if (IsWordChar(p[-1]) != IsWordChar(p[0]))
|
289
|
+
flags |= kEmptyWordBoundary;
|
290
|
+
}
|
291
|
+
if (!(flags & kEmptyWordBoundary))
|
292
|
+
flags |= kEmptyNonWordBoundary;
|
293
|
+
|
294
|
+
return flags;
|
295
|
+
}
|
296
|
+
|
297
|
+
void Prog::MarkByteRange(int lo, int hi) {
|
298
|
+
CHECK_GE(lo, 0);
|
299
|
+
CHECK_GE(hi, 0);
|
300
|
+
CHECK_LE(lo, 255);
|
301
|
+
CHECK_LE(hi, 255);
|
302
|
+
if (lo > 0)
|
303
|
+
byterange_.Set(lo - 1);
|
304
|
+
byterange_.Set(hi);
|
305
|
+
}
|
306
|
+
|
307
|
+
void Prog::ComputeByteMap() {
|
308
|
+
// Fill in bytemap with byte classes for prog_.
|
309
|
+
// Ranges of bytes that are treated as indistinguishable
|
310
|
+
// by the regexp program are mapped to a single byte class.
|
311
|
+
// The vector prog_->byterange() marks the end of each
|
312
|
+
// such range.
|
313
|
+
const Bitmap<256>& v = byterange();
|
314
|
+
|
315
|
+
COMPILE_ASSERT(8*sizeof(v.Word(0)) == 32, wordsize);
|
316
|
+
uint8 n = 0;
|
317
|
+
uint32 bits = 0;
|
318
|
+
for (int i = 0; i < 256; i++) {
|
319
|
+
if ((i&31) == 0)
|
320
|
+
bits = v.Word(i >> 5);
|
321
|
+
bytemap_[i] = n;
|
322
|
+
n += bits & 1;
|
323
|
+
bits >>= 1;
|
324
|
+
}
|
325
|
+
bytemap_range_ = bytemap_[255] + 1;
|
326
|
+
unbytemap_ = new uint8[bytemap_range_];
|
327
|
+
for (int i = 0; i < 256; i++)
|
328
|
+
unbytemap_[bytemap_[i]] = i;
|
329
|
+
|
330
|
+
if (0) { // For debugging: use trivial byte map.
|
331
|
+
for (int i = 0; i < 256; i++) {
|
332
|
+
bytemap_[i] = i;
|
333
|
+
unbytemap_[i] = i;
|
334
|
+
}
|
335
|
+
bytemap_range_ = 256;
|
336
|
+
LOG(INFO) << "Using trivial bytemap.";
|
337
|
+
}
|
338
|
+
}
|
339
|
+
|
340
|
+
} // namespace re2
|
341
|
+
|