chipper 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +51 -0
- data/ext/extconf.rb +58 -0
- data/ext/libstemmer_c/Makefile +10 -0
- data/ext/libstemmer_c/examples/stemwords.c +209 -0
- data/ext/libstemmer_c/include/libstemmer.h +79 -0
- data/ext/libstemmer_c/libstemmer/libstemmer.c +95 -0
- data/ext/libstemmer_c/libstemmer/libstemmer_utf8.c +95 -0
- data/ext/libstemmer_c/libstemmer/modules.h +190 -0
- data/ext/libstemmer_c/libstemmer/modules_utf8.h +121 -0
- data/ext/libstemmer_c/mkinc.mak +82 -0
- data/ext/libstemmer_c/mkinc_utf8.mak +52 -0
- data/ext/libstemmer_c/runtime/api.c +66 -0
- data/ext/libstemmer_c/runtime/api.h +26 -0
- data/ext/libstemmer_c/runtime/header.h +58 -0
- data/ext/libstemmer_c/runtime/utilities.c +478 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.c +337 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c +624 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.c +1117 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c +762 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.c +1246 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.c +521 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.c +1065 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.c +749 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c +1017 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c +307 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c +998 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.c +700 -0
- data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_danish.c +339 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_danish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.c +634 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_english.c +1125 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_english.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.c +768 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_french.c +1256 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_french.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_german.c +527 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_german.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.c +1234 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_italian.c +1073 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_italian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.c +299 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_porter.c +755 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_porter.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.c +1023 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.c +1004 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_russian.c +694 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_russian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.c +1097 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.c +309 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.c +2205 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.h +16 -0
- data/ext/re2/bitstate.cc +378 -0
- data/ext/re2/compile.cc +1138 -0
- data/ext/re2/dfa.cc +2086 -0
- data/ext/re2/filtered_re2.cc +100 -0
- data/ext/re2/filtered_re2.h +99 -0
- data/ext/re2/hash.cc +231 -0
- data/ext/re2/mimics_pcre.cc +185 -0
- data/ext/re2/nfa.cc +709 -0
- data/ext/re2/onepass.cc +614 -0
- data/ext/re2/parse.cc +2202 -0
- data/ext/re2/perl_groups.cc +119 -0
- data/ext/re2/prefilter.cc +671 -0
- data/ext/re2/prefilter.h +105 -0
- data/ext/re2/prefilter_tree.cc +398 -0
- data/ext/re2/prefilter_tree.h +130 -0
- data/ext/re2/prog.cc +341 -0
- data/ext/re2/prog.h +376 -0
- data/ext/re2/re2.cc +1180 -0
- data/ext/re2/re2.h +837 -0
- data/ext/re2/regexp.cc +920 -0
- data/ext/re2/regexp.h +632 -0
- data/ext/re2/rune.cc +258 -0
- data/ext/re2/set.cc +113 -0
- data/ext/re2/set.h +55 -0
- data/ext/re2/simplify.cc +393 -0
- data/ext/re2/stringpiece.cc +87 -0
- data/ext/re2/stringpiece.h +182 -0
- data/ext/re2/tostring.cc +341 -0
- data/ext/re2/unicode_casefold.cc +469 -0
- data/ext/re2/unicode_casefold.h +75 -0
- data/ext/re2/unicode_groups.cc +4851 -0
- data/ext/re2/unicode_groups.h +64 -0
- data/ext/re2/valgrind.cc +24 -0
- data/ext/re2/variadic_function.h +346 -0
- data/ext/re2/walker-inl.h +244 -0
- data/ext/src/chipper.cc +626 -0
- data/ext/src/version.h +1 -0
- data/ext/stemmer.rb +40 -0
- data/ext/util/arena.h +103 -0
- data/ext/util/atomicops.h +79 -0
- data/ext/util/benchmark.h +41 -0
- data/ext/util/flags.h +27 -0
- data/ext/util/logging.h +78 -0
- data/ext/util/mutex.h +190 -0
- data/ext/util/pcre.h +679 -0
- data/ext/util/random.h +29 -0
- data/ext/util/sparse_array.h +451 -0
- data/ext/util/sparse_set.h +177 -0
- data/ext/util/test.h +57 -0
- data/ext/util/thread.h +26 -0
- data/ext/util/utf.h +43 -0
- data/ext/util/util.h +127 -0
- data/ext/util/valgrind.h +4517 -0
- data/test/helper.rb +5 -0
- data/test/test_entities.rb +57 -0
- data/test/test_tokens.rb +118 -0
- metadata +199 -0
@@ -0,0 +1,119 @@
|
|
1
|
+
// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
|
2
|
+
// make_perl_groups.pl >perl_groups.cc
|
3
|
+
|
4
|
+
#include "re2/unicode_groups.h"
|
5
|
+
|
6
|
+
namespace re2 {
|
7
|
+
|
8
|
+
static URange16 code1[] = { /* \d */
|
9
|
+
{ 0x30, 0x39 },
|
10
|
+
};
|
11
|
+
static URange16 code2[] = { /* \s */
|
12
|
+
{ 0x9, 0xa },
|
13
|
+
{ 0xc, 0xd },
|
14
|
+
{ 0x20, 0x20 },
|
15
|
+
};
|
16
|
+
static URange16 code3[] = { /* \w */
|
17
|
+
{ 0x30, 0x39 },
|
18
|
+
{ 0x41, 0x5a },
|
19
|
+
{ 0x5f, 0x5f },
|
20
|
+
{ 0x61, 0x7a },
|
21
|
+
};
|
22
|
+
UGroup perl_groups[] = {
|
23
|
+
{ "\\d", +1, code1, 1 },
|
24
|
+
{ "\\D", -1, code1, 1 },
|
25
|
+
{ "\\s", +1, code2, 3 },
|
26
|
+
{ "\\S", -1, code2, 3 },
|
27
|
+
{ "\\w", +1, code3, 4 },
|
28
|
+
{ "\\W", -1, code3, 4 },
|
29
|
+
};
|
30
|
+
int num_perl_groups = 6;
|
31
|
+
static URange16 code4[] = { /* [:alnum:] */
|
32
|
+
{ 0x30, 0x39 },
|
33
|
+
{ 0x41, 0x5a },
|
34
|
+
{ 0x61, 0x7a },
|
35
|
+
};
|
36
|
+
static URange16 code5[] = { /* [:alpha:] */
|
37
|
+
{ 0x41, 0x5a },
|
38
|
+
{ 0x61, 0x7a },
|
39
|
+
};
|
40
|
+
static URange16 code6[] = { /* [:ascii:] */
|
41
|
+
{ 0x0, 0x7f },
|
42
|
+
};
|
43
|
+
static URange16 code7[] = { /* [:blank:] */
|
44
|
+
{ 0x9, 0x9 },
|
45
|
+
{ 0x20, 0x20 },
|
46
|
+
};
|
47
|
+
static URange16 code8[] = { /* [:cntrl:] */
|
48
|
+
{ 0x0, 0x1f },
|
49
|
+
{ 0x7f, 0x7f },
|
50
|
+
};
|
51
|
+
static URange16 code9[] = { /* [:digit:] */
|
52
|
+
{ 0x30, 0x39 },
|
53
|
+
};
|
54
|
+
static URange16 code10[] = { /* [:graph:] */
|
55
|
+
{ 0x21, 0x7e },
|
56
|
+
};
|
57
|
+
static URange16 code11[] = { /* [:lower:] */
|
58
|
+
{ 0x61, 0x7a },
|
59
|
+
};
|
60
|
+
static URange16 code12[] = { /* [:print:] */
|
61
|
+
{ 0x20, 0x7e },
|
62
|
+
};
|
63
|
+
static URange16 code13[] = { /* [:punct:] */
|
64
|
+
{ 0x21, 0x2f },
|
65
|
+
{ 0x3a, 0x40 },
|
66
|
+
{ 0x5b, 0x60 },
|
67
|
+
{ 0x7b, 0x7e },
|
68
|
+
};
|
69
|
+
static URange16 code14[] = { /* [:space:] */
|
70
|
+
{ 0x9, 0xd },
|
71
|
+
{ 0x20, 0x20 },
|
72
|
+
};
|
73
|
+
static URange16 code15[] = { /* [:upper:] */
|
74
|
+
{ 0x41, 0x5a },
|
75
|
+
};
|
76
|
+
static URange16 code16[] = { /* [:word:] */
|
77
|
+
{ 0x30, 0x39 },
|
78
|
+
{ 0x41, 0x5a },
|
79
|
+
{ 0x5f, 0x5f },
|
80
|
+
{ 0x61, 0x7a },
|
81
|
+
};
|
82
|
+
static URange16 code17[] = { /* [:xdigit:] */
|
83
|
+
{ 0x30, 0x39 },
|
84
|
+
{ 0x41, 0x46 },
|
85
|
+
{ 0x61, 0x66 },
|
86
|
+
};
|
87
|
+
UGroup posix_groups[] = {
|
88
|
+
{ "[:alnum:]", +1, code4, 3 },
|
89
|
+
{ "[:^alnum:]", -1, code4, 3 },
|
90
|
+
{ "[:alpha:]", +1, code5, 2 },
|
91
|
+
{ "[:^alpha:]", -1, code5, 2 },
|
92
|
+
{ "[:ascii:]", +1, code6, 1 },
|
93
|
+
{ "[:^ascii:]", -1, code6, 1 },
|
94
|
+
{ "[:blank:]", +1, code7, 2 },
|
95
|
+
{ "[:^blank:]", -1, code7, 2 },
|
96
|
+
{ "[:cntrl:]", +1, code8, 2 },
|
97
|
+
{ "[:^cntrl:]", -1, code8, 2 },
|
98
|
+
{ "[:digit:]", +1, code9, 1 },
|
99
|
+
{ "[:^digit:]", -1, code9, 1 },
|
100
|
+
{ "[:graph:]", +1, code10, 1 },
|
101
|
+
{ "[:^graph:]", -1, code10, 1 },
|
102
|
+
{ "[:lower:]", +1, code11, 1 },
|
103
|
+
{ "[:^lower:]", -1, code11, 1 },
|
104
|
+
{ "[:print:]", +1, code12, 1 },
|
105
|
+
{ "[:^print:]", -1, code12, 1 },
|
106
|
+
{ "[:punct:]", +1, code13, 4 },
|
107
|
+
{ "[:^punct:]", -1, code13, 4 },
|
108
|
+
{ "[:space:]", +1, code14, 2 },
|
109
|
+
{ "[:^space:]", -1, code14, 2 },
|
110
|
+
{ "[:upper:]", +1, code15, 1 },
|
111
|
+
{ "[:^upper:]", -1, code15, 1 },
|
112
|
+
{ "[:word:]", +1, code16, 4 },
|
113
|
+
{ "[:^word:]", -1, code16, 4 },
|
114
|
+
{ "[:xdigit:]", +1, code17, 3 },
|
115
|
+
{ "[:^xdigit:]", -1, code17, 3 },
|
116
|
+
};
|
117
|
+
int num_posix_groups = 28;
|
118
|
+
|
119
|
+
} // namespace re2
|