chipper 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (134) hide show
  1. data/README.rdoc +51 -0
  2. data/ext/extconf.rb +58 -0
  3. data/ext/libstemmer_c/Makefile +10 -0
  4. data/ext/libstemmer_c/examples/stemwords.c +209 -0
  5. data/ext/libstemmer_c/include/libstemmer.h +79 -0
  6. data/ext/libstemmer_c/libstemmer/libstemmer.c +95 -0
  7. data/ext/libstemmer_c/libstemmer/libstemmer_utf8.c +95 -0
  8. data/ext/libstemmer_c/libstemmer/modules.h +190 -0
  9. data/ext/libstemmer_c/libstemmer/modules_utf8.h +121 -0
  10. data/ext/libstemmer_c/mkinc.mak +82 -0
  11. data/ext/libstemmer_c/mkinc_utf8.mak +52 -0
  12. data/ext/libstemmer_c/runtime/api.c +66 -0
  13. data/ext/libstemmer_c/runtime/api.h +26 -0
  14. data/ext/libstemmer_c/runtime/header.h +58 -0
  15. data/ext/libstemmer_c/runtime/utilities.c +478 -0
  16. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.c +337 -0
  17. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +16 -0
  18. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c +624 -0
  19. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +16 -0
  20. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.c +1117 -0
  21. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.h +16 -0
  22. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +16 -0
  24. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.c +1246 -0
  25. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.h +16 -0
  26. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.c +521 -0
  27. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.h +16 -0
  28. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c +1230 -0
  29. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +16 -0
  30. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.c +1065 -0
  31. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +16 -0
  32. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c +297 -0
  33. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +16 -0
  34. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.c +749 -0
  35. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +16 -0
  36. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c +1017 -0
  37. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +16 -0
  38. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c +1093 -0
  39. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +16 -0
  40. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c +307 -0
  41. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +16 -0
  42. data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c +998 -0
  43. data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +16 -0
  44. data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.c +700 -0
  45. data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.h +16 -0
  46. data/ext/libstemmer_c/src_c/stem_UTF_8_danish.c +339 -0
  47. data/ext/libstemmer_c/src_c/stem_UTF_8_danish.h +16 -0
  48. data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.c +634 -0
  49. data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.h +16 -0
  50. data/ext/libstemmer_c/src_c/stem_UTF_8_english.c +1125 -0
  51. data/ext/libstemmer_c/src_c/stem_UTF_8_english.h +16 -0
  52. data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.c +768 -0
  53. data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.h +16 -0
  54. data/ext/libstemmer_c/src_c/stem_UTF_8_french.c +1256 -0
  55. data/ext/libstemmer_c/src_c/stem_UTF_8_french.h +16 -0
  56. data/ext/libstemmer_c/src_c/stem_UTF_8_german.c +527 -0
  57. data/ext/libstemmer_c/src_c/stem_UTF_8_german.h +16 -0
  58. data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.c +1234 -0
  59. data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.h +16 -0
  60. data/ext/libstemmer_c/src_c/stem_UTF_8_italian.c +1073 -0
  61. data/ext/libstemmer_c/src_c/stem_UTF_8_italian.h +16 -0
  62. data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.c +299 -0
  63. data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.h +16 -0
  64. data/ext/libstemmer_c/src_c/stem_UTF_8_porter.c +755 -0
  65. data/ext/libstemmer_c/src_c/stem_UTF_8_porter.h +16 -0
  66. data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.c +1023 -0
  67. data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.h +16 -0
  68. data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.c +1004 -0
  69. data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.h +16 -0
  70. data/ext/libstemmer_c/src_c/stem_UTF_8_russian.c +694 -0
  71. data/ext/libstemmer_c/src_c/stem_UTF_8_russian.h +16 -0
  72. data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.c +1097 -0
  73. data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.h +16 -0
  74. data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.c +309 -0
  75. data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.h +16 -0
  76. data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.c +2205 -0
  77. data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.h +16 -0
  78. data/ext/re2/bitstate.cc +378 -0
  79. data/ext/re2/compile.cc +1138 -0
  80. data/ext/re2/dfa.cc +2086 -0
  81. data/ext/re2/filtered_re2.cc +100 -0
  82. data/ext/re2/filtered_re2.h +99 -0
  83. data/ext/re2/hash.cc +231 -0
  84. data/ext/re2/mimics_pcre.cc +185 -0
  85. data/ext/re2/nfa.cc +709 -0
  86. data/ext/re2/onepass.cc +614 -0
  87. data/ext/re2/parse.cc +2202 -0
  88. data/ext/re2/perl_groups.cc +119 -0
  89. data/ext/re2/prefilter.cc +671 -0
  90. data/ext/re2/prefilter.h +105 -0
  91. data/ext/re2/prefilter_tree.cc +398 -0
  92. data/ext/re2/prefilter_tree.h +130 -0
  93. data/ext/re2/prog.cc +341 -0
  94. data/ext/re2/prog.h +376 -0
  95. data/ext/re2/re2.cc +1180 -0
  96. data/ext/re2/re2.h +837 -0
  97. data/ext/re2/regexp.cc +920 -0
  98. data/ext/re2/regexp.h +632 -0
  99. data/ext/re2/rune.cc +258 -0
  100. data/ext/re2/set.cc +113 -0
  101. data/ext/re2/set.h +55 -0
  102. data/ext/re2/simplify.cc +393 -0
  103. data/ext/re2/stringpiece.cc +87 -0
  104. data/ext/re2/stringpiece.h +182 -0
  105. data/ext/re2/tostring.cc +341 -0
  106. data/ext/re2/unicode_casefold.cc +469 -0
  107. data/ext/re2/unicode_casefold.h +75 -0
  108. data/ext/re2/unicode_groups.cc +4851 -0
  109. data/ext/re2/unicode_groups.h +64 -0
  110. data/ext/re2/valgrind.cc +24 -0
  111. data/ext/re2/variadic_function.h +346 -0
  112. data/ext/re2/walker-inl.h +244 -0
  113. data/ext/src/chipper.cc +626 -0
  114. data/ext/src/version.h +1 -0
  115. data/ext/stemmer.rb +40 -0
  116. data/ext/util/arena.h +103 -0
  117. data/ext/util/atomicops.h +79 -0
  118. data/ext/util/benchmark.h +41 -0
  119. data/ext/util/flags.h +27 -0
  120. data/ext/util/logging.h +78 -0
  121. data/ext/util/mutex.h +190 -0
  122. data/ext/util/pcre.h +679 -0
  123. data/ext/util/random.h +29 -0
  124. data/ext/util/sparse_array.h +451 -0
  125. data/ext/util/sparse_set.h +177 -0
  126. data/ext/util/test.h +57 -0
  127. data/ext/util/thread.h +26 -0
  128. data/ext/util/utf.h +43 -0
  129. data/ext/util/util.h +127 -0
  130. data/ext/util/valgrind.h +4517 -0
  131. data/test/helper.rb +5 -0
  132. data/test/test_entities.rb +57 -0
  133. data/test/test_tokens.rb +118 -0
  134. metadata +199 -0
@@ -0,0 +1,64 @@
1
+ // Copyright 2008 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Unicode character groups.
6
+
7
+ // The codes get split into ranges of 16-bit codes
8
+ // and ranges of 32-bit codes. It would be simpler
9
+ // to use only 32-bit ranges, but these tables are large
10
+ // enough to warrant extra care.
11
+ //
12
+ // Using just 32-bit ranges gives 27 kB of data.
13
+ // Adding 16-bit ranges gives 18 kB of data.
14
+ // Adding an extra table of 16-bit singletons would reduce
15
+ // to 16.5 kB of data but make the data harder to use;
16
+ // we don't bother.
17
+
18
+ #ifndef RE2_UNICODE_GROUPS_H__
19
+ #define RE2_UNICODE_GROUPS_H__
20
+
21
+ #include "util/util.h"
22
+
23
+ namespace re2 {
24
+
25
+ struct URange16
26
+ {
27
+ uint16 lo;
28
+ uint16 hi;
29
+ };
30
+
31
+ struct URange32
32
+ {
33
+ uint32 lo;
34
+ uint32 hi;
35
+ };
36
+
37
+ struct UGroup
38
+ {
39
+ const char *name;
40
+ int sign; // +1 for [abc], -1 for [^abc]
41
+ URange16 *r16;
42
+ int nr16;
43
+ URange32 *r32;
44
+ int nr32;
45
+ };
46
+
47
+ // Named by property or script name (e.g., "Nd", "N", "Han").
48
+ // Negated groups are not included.
49
+ extern UGroup unicode_groups[];
50
+ extern int num_unicode_groups;
51
+
52
+ // Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]").
53
+ // Negated groups are included.
54
+ extern UGroup posix_groups[];
55
+ extern int num_posix_groups;
56
+
57
+ // Named by Perl name (e.g., "\\d", "\\D").
58
+ // Negated groups are included.
59
+ extern UGroup perl_groups[];
60
+ extern int num_perl_groups;
61
+
62
+ } // namespace re2
63
+
64
+ #endif // RE2_UNICODE_GROUPS_H__
@@ -0,0 +1,24 @@
1
+ // Copyright 2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #include "util/util.h"
6
+ #include "util/valgrind.h"
7
+
8
+ namespace re2 {
9
+
10
+ static bool checkValgrind() {
11
+ #ifdef RUNNING_ON_VALGRIND
12
+ return RUNNING_ON_VALGRIND;
13
+ #else
14
+ return false;
15
+ #endif
16
+ }
17
+
18
+ static const int valgrind = checkValgrind();
19
+
20
+ int RunningOnValgrind() {
21
+ return valgrind;
22
+ }
23
+
24
+ } // namespace re2
@@ -0,0 +1,346 @@
1
+ // Copyright 2010 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef RE2_VARIADIC_FUNCTION_H_
6
+ #define RE2_VARIADIC_FUNCTION_H_
7
+
8
+ namespace re2 {
9
+
10
+ template <typename Result, typename Param0, typename Param1, typename Arg,
11
+ Result (*Func)(Param0, Param1, const Arg* const [], int count)>
12
+ class VariadicFunction2 {
13
+ public:
14
+ VariadicFunction2() {}
15
+
16
+ Result operator()(Param0 p0, Param1 p1) const {
17
+ return Func(p0, p1, 0, 0);
18
+ }
19
+
20
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0) const {
21
+ const Arg* const args[] = { &a0 };
22
+ return Func(p0, p1, args, 1);
23
+ }
24
+
25
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1) const {
26
+ const Arg* const args[] = { &a0, &a1 };
27
+ return Func(p0, p1, args, 2);
28
+ }
29
+
30
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
31
+ const Arg& a2) const {
32
+ const Arg* const args[] = { &a0, &a1, &a2 };
33
+ return Func(p0, p1, args, 3);
34
+ }
35
+
36
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
37
+ const Arg& a2, const Arg& a3) const {
38
+ const Arg* const args[] = { &a0, &a1, &a2, &a3 };
39
+ return Func(p0, p1, args, 4);
40
+ }
41
+
42
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
43
+ const Arg& a2, const Arg& a3, const Arg& a4) const {
44
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4 };
45
+ return Func(p0, p1, args, 5);
46
+ }
47
+
48
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
49
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5) const {
50
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5 };
51
+ return Func(p0, p1, args, 6);
52
+ }
53
+
54
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
55
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
56
+ const Arg& a6) const {
57
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6 };
58
+ return Func(p0, p1, args, 7);
59
+ }
60
+
61
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
62
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
63
+ const Arg& a6, const Arg& a7) const {
64
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7 };
65
+ return Func(p0, p1, args, 8);
66
+ }
67
+
68
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
69
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
70
+ const Arg& a6, const Arg& a7, const Arg& a8) const {
71
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8 };
72
+ return Func(p0, p1, args, 9);
73
+ }
74
+
75
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
76
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
77
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9) const {
78
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
79
+ &a9 };
80
+ return Func(p0, p1, args, 10);
81
+ }
82
+
83
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
84
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
85
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
86
+ const Arg& a10) const {
87
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
88
+ &a9, &a10 };
89
+ return Func(p0, p1, args, 11);
90
+ }
91
+
92
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
93
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
94
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
95
+ const Arg& a10, const Arg& a11) const {
96
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
97
+ &a9, &a10, &a11 };
98
+ return Func(p0, p1, args, 12);
99
+ }
100
+
101
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
102
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
103
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
104
+ const Arg& a10, const Arg& a11, const Arg& a12) const {
105
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
106
+ &a9, &a10, &a11, &a12 };
107
+ return Func(p0, p1, args, 13);
108
+ }
109
+
110
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
111
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
112
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
113
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13) const {
114
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
115
+ &a9, &a10, &a11, &a12, &a13 };
116
+ return Func(p0, p1, args, 14);
117
+ }
118
+
119
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
120
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
121
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
122
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
123
+ const Arg& a14) const {
124
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
125
+ &a9, &a10, &a11, &a12, &a13, &a14 };
126
+ return Func(p0, p1, args, 15);
127
+ }
128
+
129
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
130
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
131
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
132
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
133
+ const Arg& a14, const Arg& a15) const {
134
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
135
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15 };
136
+ return Func(p0, p1, args, 16);
137
+ }
138
+
139
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
140
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
141
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
142
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
143
+ const Arg& a14, const Arg& a15, const Arg& a16) const {
144
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
145
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16 };
146
+ return Func(p0, p1, args, 17);
147
+ }
148
+
149
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
150
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
151
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
152
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
153
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17) const {
154
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
155
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17 };
156
+ return Func(p0, p1, args, 18);
157
+ }
158
+
159
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
160
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
161
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
162
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
163
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
164
+ const Arg& a18) const {
165
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
166
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18 };
167
+ return Func(p0, p1, args, 19);
168
+ }
169
+
170
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
171
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
172
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
173
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
174
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
175
+ const Arg& a18, const Arg& a19) const {
176
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
177
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19 };
178
+ return Func(p0, p1, args, 20);
179
+ }
180
+
181
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
182
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
183
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
184
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
185
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
186
+ const Arg& a18, const Arg& a19, const Arg& a20) const {
187
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
188
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19,
189
+ &a20 };
190
+ return Func(p0, p1, args, 21);
191
+ }
192
+
193
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
194
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
195
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
196
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
197
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
198
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21) const {
199
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
200
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
201
+ &a21 };
202
+ return Func(p0, p1, args, 22);
203
+ }
204
+
205
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
206
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
207
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
208
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
209
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
210
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
211
+ const Arg& a22) const {
212
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
213
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
214
+ &a21, &a22 };
215
+ return Func(p0, p1, args, 23);
216
+ }
217
+
218
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
219
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
220
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
221
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
222
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
223
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
224
+ const Arg& a22, const Arg& a23) const {
225
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
226
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
227
+ &a21, &a22, &a23 };
228
+ return Func(p0, p1, args, 24);
229
+ }
230
+
231
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
232
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
233
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
234
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
235
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
236
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
237
+ const Arg& a22, const Arg& a23, const Arg& a24) const {
238
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
239
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
240
+ &a21, &a22, &a23, &a24 };
241
+ return Func(p0, p1, args, 25);
242
+ }
243
+
244
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
245
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
246
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
247
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
248
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
249
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
250
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25) const {
251
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
252
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
253
+ &a21, &a22, &a23, &a24, &a25 };
254
+ return Func(p0, p1, args, 26);
255
+ }
256
+
257
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
258
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
259
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
260
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
261
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
262
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
263
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
264
+ const Arg& a26) const {
265
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
266
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
267
+ &a21, &a22, &a23, &a24, &a25, &a26 };
268
+ return Func(p0, p1, args, 27);
269
+ }
270
+
271
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
272
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
273
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
274
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
275
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
276
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
277
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
278
+ const Arg& a26, const Arg& a27) const {
279
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
280
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
281
+ &a21, &a22, &a23, &a24, &a25, &a26, &a27 };
282
+ return Func(p0, p1, args, 28);
283
+ }
284
+
285
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
286
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
287
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
288
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
289
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
290
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
291
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
292
+ const Arg& a26, const Arg& a27, const Arg& a28) const {
293
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
294
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
295
+ &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28 };
296
+ return Func(p0, p1, args, 29);
297
+ }
298
+
299
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
300
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
301
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
302
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
303
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
304
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
305
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
306
+ const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29) const {
307
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
308
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
309
+ &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29 };
310
+ return Func(p0, p1, args, 30);
311
+ }
312
+
313
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
314
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
315
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
316
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
317
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
318
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
319
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
320
+ const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29,
321
+ const Arg& a30) const {
322
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
323
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
324
+ &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29, &a30 };
325
+ return Func(p0, p1, args, 31);
326
+ }
327
+
328
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
329
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
330
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
331
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
332
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
333
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
334
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
335
+ const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29,
336
+ const Arg& a30, const Arg& a31) const {
337
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
338
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
339
+ &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29, &a30, &a31 };
340
+ return Func(p0, p1, args, 32);
341
+ }
342
+ };
343
+
344
+ } // namespace re2
345
+
346
+ #endif // RE2_VARIADIC_FUNCTION_H_
@@ -0,0 +1,244 @@
1
+ // Copyright 2006 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Helper class for traversing Regexps without recursion.
6
+ // Clients should declare their own subclasses that override
7
+ // the PreVisit and PostVisit methods, which are called before
8
+ // and after visiting the subexpressions.
9
+
10
+ // Not quite the Visitor pattern, because (among other things)
11
+ // the Visitor pattern is recursive.
12
+
13
+ #ifndef RE2_WALKER_INL_H__
14
+ #define RE2_WALKER_INL_H__
15
+
16
+ #include "re2/regexp.h"
17
+
18
+ namespace re2 {
19
+
20
+ template<typename T> struct WalkState;
21
+
22
+ template<typename T> class Regexp::Walker {
23
+ public:
24
+ Walker();
25
+ virtual ~Walker();
26
+
27
+ // Virtual method called before visiting re's children.
28
+ // PreVisit passes ownership of its return value to its caller.
29
+ // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg
30
+ // and passed to the child PreVisits and PostVisits as parent_arg.
31
+ // At the top-most Regexp, parent_arg is arg passed to walk.
32
+ // If PreVisit sets *stop to true, the walk does not recurse
33
+ // into the children. Instead it behaves as though the return
34
+ // value from PreVisit is the return value from PostVisit.
35
+ // The default PreVisit returns parent_arg.
36
+ virtual T PreVisit(Regexp* re, T parent_arg, bool* stop);
37
+
38
+ // Virtual method called after visiting re's children.
39
+ // The pre_arg is the T that PreVisit returned.
40
+ // The child_args is a vector of the T that the child PostVisits returned.
41
+ // PostVisit takes ownership of pre_arg.
42
+ // PostVisit takes ownership of the Ts
43
+ // in *child_args, but not the vector itself.
44
+ // PostVisit passes ownership of its return value
45
+ // to its caller.
46
+ // The default PostVisit simply returns pre_arg.
47
+ virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg,
48
+ T* child_args, int nchild_args);
49
+
50
+ // Virtual method called to copy a T,
51
+ // when Walk notices that more than one child is the same re.
52
+ virtual T Copy(T arg);
53
+
54
+ // Virtual method called to do a "quick visit" of the re,
55
+ // but not its children. Only called once the visit budget
56
+ // has been used up and we're trying to abort the walk
57
+ // as quickly as possible. Should return a value that
58
+ // makes sense for the parent PostVisits still to be run.
59
+ // This function is (hopefully) only called by
60
+ // WalkExponential, but must be implemented by all clients,
61
+ // just in case.
62
+ virtual T ShortVisit(Regexp* re, T parent_arg) = 0;
63
+
64
+ // Walks over a regular expression.
65
+ // Top_arg is passed as parent_arg to PreVisit and PostVisit of re.
66
+ // Returns the T returned by PostVisit on re.
67
+ T Walk(Regexp* re, T top_arg);
68
+
69
+ // Like Walk, but doesn't use Copy. This can lead to
70
+ // exponential runtimes on cross-linked Regexps like the
71
+ // ones generated by Simplify. To help limit this,
72
+ // at most max_visits nodes will be visited and then
73
+ // the walk will be cut off early.
74
+ // If the walk *is* cut off early, ShortVisit(re)
75
+ // will be called on regexps that cannot be fully
76
+ // visited rather than calling PreVisit/PostVisit.
77
+ T WalkExponential(Regexp* re, T top_arg, int max_visits);
78
+
79
+ // Clears the stack. Should never be necessary, since
80
+ // Walk always enters and exits with an empty stack.
81
+ // Logs DFATAL if stack is not already clear.
82
+ void Reset();
83
+
84
+ // Returns whether walk was cut off.
85
+ bool stopped_early() { return stopped_early_; }
86
+
87
+ private:
88
+ // Walk state for the entire traversal.
89
+ stack<WalkState<T> >* stack_;
90
+ bool stopped_early_;
91
+ int max_visits_;
92
+
93
+ T WalkInternal(Regexp* re, T top_arg, bool use_copy);
94
+
95
+ DISALLOW_EVIL_CONSTRUCTORS(Walker);
96
+ };
97
+
98
+ template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re,
99
+ T parent_arg,
100
+ bool* stop) {
101
+ return parent_arg;
102
+ }
103
+
104
+ template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re,
105
+ T parent_arg,
106
+ T pre_arg,
107
+ T* child_args,
108
+ int nchild_args) {
109
+ return pre_arg;
110
+ }
111
+
112
+ template<typename T> T Regexp::Walker<T>::Copy(T arg) {
113
+ return arg;
114
+ }
115
+
116
+ // State about a single level in the traversal.
117
+ template<typename T> struct WalkState {
118
+ WalkState<T>(Regexp* re, T parent)
119
+ : re(re),
120
+ n(-1),
121
+ parent_arg(parent),
122
+ child_args(NULL) { }
123
+
124
+ Regexp* re; // The regexp
125
+ int n; // The index of the next child to process; -1 means need to PreVisit
126
+ T parent_arg; // Accumulated arguments.
127
+ T pre_arg;
128
+ T child_arg; // One-element buffer for child_args.
129
+ T* child_args;
130
+ };
131
+
132
+ template<typename T> Regexp::Walker<T>::Walker() {
133
+ stack_ = new stack<WalkState<T> >;
134
+ stopped_early_ = false;
135
+ }
136
+
137
+ template<typename T> Regexp::Walker<T>::~Walker() {
138
+ Reset();
139
+ delete stack_;
140
+ }
141
+
142
+ // Clears the stack. Should never be necessary, since
143
+ // Walk always enters and exits with an empty stack.
144
+ // Logs DFATAL if stack is not already clear.
145
+ template<typename T> void Regexp::Walker<T>::Reset() {
146
+ if (stack_ && stack_->size() > 0) {
147
+ LOG(DFATAL) << "Stack not empty.";
148
+ while (stack_->size() > 0) {
149
+ delete stack_->top().child_args;
150
+ stack_->pop();
151
+ }
152
+ }
153
+ }
154
+
155
+ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
156
+ bool use_copy) {
157
+ Reset();
158
+
159
+ if (re == NULL) {
160
+ LOG(DFATAL) << "Walk NULL";
161
+ return top_arg;
162
+ }
163
+
164
+ stack_->push(WalkState<T>(re, top_arg));
165
+
166
+ WalkState<T>* s;
167
+ for (;;) {
168
+ T t;
169
+ s = &stack_->top();
170
+ Regexp* re = s->re;
171
+ switch (s->n) {
172
+ case -1: {
173
+ if (--max_visits_ < 0) {
174
+ stopped_early_ = true;
175
+ t = ShortVisit(re, s->parent_arg);
176
+ break;
177
+ }
178
+ bool stop = false;
179
+ s->pre_arg = PreVisit(re, s->parent_arg, &stop);
180
+ if (stop) {
181
+ t = s->pre_arg;
182
+ break;
183
+ }
184
+ s->n = 0;
185
+ s->child_args = NULL;
186
+ if (re->nsub_ == 1)
187
+ s->child_args = &s->child_arg;
188
+ else if (re->nsub_ > 1)
189
+ s->child_args = new T[re->nsub_];
190
+ // Fall through.
191
+ }
192
+ default: {
193
+ if (re->nsub_ > 0) {
194
+ Regexp** sub = re->sub();
195
+ if (s->n < re->nsub_) {
196
+ if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
197
+ s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
198
+ s->n++;
199
+ } else {
200
+ stack_->push(WalkState<T>(sub[s->n], s->pre_arg));
201
+ }
202
+ continue;
203
+ }
204
+ }
205
+
206
+ t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
207
+ if (re->nsub_ > 1)
208
+ delete[] s->child_args;
209
+ break;
210
+ }
211
+ }
212
+
213
+ // We've finished stack_->top().
214
+ // Update next guy down.
215
+ stack_->pop();
216
+ if (stack_->size() == 0)
217
+ return t;
218
+ s = &stack_->top();
219
+ if (s->child_args != NULL)
220
+ s->child_args[s->n] = t;
221
+ else
222
+ s->child_arg = t;
223
+ s->n++;
224
+ }
225
+ }
226
+
227
+ template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) {
228
+ // Without the exponential walking behavior,
229
+ // this budget should be more than enough for any
230
+ // regexp, and yet not enough to get us in trouble
231
+ // as far as CPU time.
232
+ max_visits_ = 1000000;
233
+ return WalkInternal(re, top_arg, true);
234
+ }
235
+
236
+ template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg,
237
+ int max_visits) {
238
+ max_visits_ = max_visits;
239
+ return WalkInternal(re, top_arg, false);
240
+ }
241
+
242
+ } // namespace re2
243
+
244
+ #endif // RE2_WALKER_INL_H__