chipper 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. data/README.rdoc +51 -0
  2. data/ext/extconf.rb +58 -0
  3. data/ext/libstemmer_c/Makefile +10 -0
  4. data/ext/libstemmer_c/examples/stemwords.c +209 -0
  5. data/ext/libstemmer_c/include/libstemmer.h +79 -0
  6. data/ext/libstemmer_c/libstemmer/libstemmer.c +95 -0
  7. data/ext/libstemmer_c/libstemmer/libstemmer_utf8.c +95 -0
  8. data/ext/libstemmer_c/libstemmer/modules.h +190 -0
  9. data/ext/libstemmer_c/libstemmer/modules_utf8.h +121 -0
  10. data/ext/libstemmer_c/mkinc.mak +82 -0
  11. data/ext/libstemmer_c/mkinc_utf8.mak +52 -0
  12. data/ext/libstemmer_c/runtime/api.c +66 -0
  13. data/ext/libstemmer_c/runtime/api.h +26 -0
  14. data/ext/libstemmer_c/runtime/header.h +58 -0
  15. data/ext/libstemmer_c/runtime/utilities.c +478 -0
  16. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.c +337 -0
  17. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +16 -0
  18. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c +624 -0
  19. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +16 -0
  20. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.c +1117 -0
  21. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.h +16 -0
  22. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +16 -0
  24. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.c +1246 -0
  25. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.h +16 -0
  26. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.c +521 -0
  27. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.h +16 -0
  28. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c +1230 -0
  29. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +16 -0
  30. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.c +1065 -0
  31. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +16 -0
  32. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c +297 -0
  33. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +16 -0
  34. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.c +749 -0
  35. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +16 -0
  36. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c +1017 -0
  37. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +16 -0
  38. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c +1093 -0
  39. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +16 -0
  40. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c +307 -0
  41. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +16 -0
  42. data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c +998 -0
  43. data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +16 -0
  44. data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.c +700 -0
  45. data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.h +16 -0
  46. data/ext/libstemmer_c/src_c/stem_UTF_8_danish.c +339 -0
  47. data/ext/libstemmer_c/src_c/stem_UTF_8_danish.h +16 -0
  48. data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.c +634 -0
  49. data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.h +16 -0
  50. data/ext/libstemmer_c/src_c/stem_UTF_8_english.c +1125 -0
  51. data/ext/libstemmer_c/src_c/stem_UTF_8_english.h +16 -0
  52. data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.c +768 -0
  53. data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.h +16 -0
  54. data/ext/libstemmer_c/src_c/stem_UTF_8_french.c +1256 -0
  55. data/ext/libstemmer_c/src_c/stem_UTF_8_french.h +16 -0
  56. data/ext/libstemmer_c/src_c/stem_UTF_8_german.c +527 -0
  57. data/ext/libstemmer_c/src_c/stem_UTF_8_german.h +16 -0
  58. data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.c +1234 -0
  59. data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.h +16 -0
  60. data/ext/libstemmer_c/src_c/stem_UTF_8_italian.c +1073 -0
  61. data/ext/libstemmer_c/src_c/stem_UTF_8_italian.h +16 -0
  62. data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.c +299 -0
  63. data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.h +16 -0
  64. data/ext/libstemmer_c/src_c/stem_UTF_8_porter.c +755 -0
  65. data/ext/libstemmer_c/src_c/stem_UTF_8_porter.h +16 -0
  66. data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.c +1023 -0
  67. data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.h +16 -0
  68. data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.c +1004 -0
  69. data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.h +16 -0
  70. data/ext/libstemmer_c/src_c/stem_UTF_8_russian.c +694 -0
  71. data/ext/libstemmer_c/src_c/stem_UTF_8_russian.h +16 -0
  72. data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.c +1097 -0
  73. data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.h +16 -0
  74. data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.c +309 -0
  75. data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.h +16 -0
  76. data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.c +2205 -0
  77. data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.h +16 -0
  78. data/ext/re2/bitstate.cc +378 -0
  79. data/ext/re2/compile.cc +1138 -0
  80. data/ext/re2/dfa.cc +2086 -0
  81. data/ext/re2/filtered_re2.cc +100 -0
  82. data/ext/re2/filtered_re2.h +99 -0
  83. data/ext/re2/hash.cc +231 -0
  84. data/ext/re2/mimics_pcre.cc +185 -0
  85. data/ext/re2/nfa.cc +709 -0
  86. data/ext/re2/onepass.cc +614 -0
  87. data/ext/re2/parse.cc +2202 -0
  88. data/ext/re2/perl_groups.cc +119 -0
  89. data/ext/re2/prefilter.cc +671 -0
  90. data/ext/re2/prefilter.h +105 -0
  91. data/ext/re2/prefilter_tree.cc +398 -0
  92. data/ext/re2/prefilter_tree.h +130 -0
  93. data/ext/re2/prog.cc +341 -0
  94. data/ext/re2/prog.h +376 -0
  95. data/ext/re2/re2.cc +1180 -0
  96. data/ext/re2/re2.h +837 -0
  97. data/ext/re2/regexp.cc +920 -0
  98. data/ext/re2/regexp.h +632 -0
  99. data/ext/re2/rune.cc +258 -0
  100. data/ext/re2/set.cc +113 -0
  101. data/ext/re2/set.h +55 -0
  102. data/ext/re2/simplify.cc +393 -0
  103. data/ext/re2/stringpiece.cc +87 -0
  104. data/ext/re2/stringpiece.h +182 -0
  105. data/ext/re2/tostring.cc +341 -0
  106. data/ext/re2/unicode_casefold.cc +469 -0
  107. data/ext/re2/unicode_casefold.h +75 -0
  108. data/ext/re2/unicode_groups.cc +4851 -0
  109. data/ext/re2/unicode_groups.h +64 -0
  110. data/ext/re2/valgrind.cc +24 -0
  111. data/ext/re2/variadic_function.h +346 -0
  112. data/ext/re2/walker-inl.h +244 -0
  113. data/ext/src/chipper.cc +626 -0
  114. data/ext/src/version.h +1 -0
  115. data/ext/stemmer.rb +40 -0
  116. data/ext/util/arena.h +103 -0
  117. data/ext/util/atomicops.h +79 -0
  118. data/ext/util/benchmark.h +41 -0
  119. data/ext/util/flags.h +27 -0
  120. data/ext/util/logging.h +78 -0
  121. data/ext/util/mutex.h +190 -0
  122. data/ext/util/pcre.h +679 -0
  123. data/ext/util/random.h +29 -0
  124. data/ext/util/sparse_array.h +451 -0
  125. data/ext/util/sparse_set.h +177 -0
  126. data/ext/util/test.h +57 -0
  127. data/ext/util/thread.h +26 -0
  128. data/ext/util/utf.h +43 -0
  129. data/ext/util/util.h +127 -0
  130. data/ext/util/valgrind.h +4517 -0
  131. data/test/helper.rb +5 -0
  132. data/test/test_entities.rb +57 -0
  133. data/test/test_tokens.rb +118 -0
  134. metadata +199 -0
@@ -0,0 +1,64 @@
1
+ // Copyright 2008 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Unicode character groups.
6
+
7
+ // The codes get split into ranges of 16-bit codes
8
+ // and ranges of 32-bit codes. It would be simpler
9
+ // to use only 32-bit ranges, but these tables are large
10
+ // enough to warrant extra care.
11
+ //
12
+ // Using just 32-bit ranges gives 27 kB of data.
13
+ // Adding 16-bit ranges gives 18 kB of data.
14
+ // Adding an extra table of 16-bit singletons would reduce
15
+ // to 16.5 kB of data but make the data harder to use;
16
+ // we don't bother.
17
+
18
+ #ifndef RE2_UNICODE_GROUPS_H__
19
+ #define RE2_UNICODE_GROUPS_H__
20
+
21
+ #include "util/util.h"
22
+
23
+ namespace re2 {
24
+
25
+ struct URange16
26
+ {
27
+ uint16 lo;
28
+ uint16 hi;
29
+ };
30
+
31
+ struct URange32
32
+ {
33
+ uint32 lo;
34
+ uint32 hi;
35
+ };
36
+
37
+ struct UGroup
38
+ {
39
+ const char *name;
40
+ int sign; // +1 for [abc], -1 for [^abc]
41
+ URange16 *r16;
42
+ int nr16;
43
+ URange32 *r32;
44
+ int nr32;
45
+ };
46
+
47
+ // Named by property or script name (e.g., "Nd", "N", "Han").
48
+ // Negated groups are not included.
49
+ extern UGroup unicode_groups[];
50
+ extern int num_unicode_groups;
51
+
52
+ // Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]").
53
+ // Negated groups are included.
54
+ extern UGroup posix_groups[];
55
+ extern int num_posix_groups;
56
+
57
+ // Named by Perl name (e.g., "\\d", "\\D").
58
+ // Negated groups are included.
59
+ extern UGroup perl_groups[];
60
+ extern int num_perl_groups;
61
+
62
+ } // namespace re2
63
+
64
+ #endif // RE2_UNICODE_GROUPS_H__
@@ -0,0 +1,24 @@
1
+ // Copyright 2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #include "util/util.h"
6
+ #include "util/valgrind.h"
7
+
8
+ namespace re2 {
9
+
10
+ static bool checkValgrind() {
11
+ #ifdef RUNNING_ON_VALGRIND
12
+ return RUNNING_ON_VALGRIND;
13
+ #else
14
+ return false;
15
+ #endif
16
+ }
17
+
18
+ static const int valgrind = checkValgrind();
19
+
20
+ int RunningOnValgrind() {
21
+ return valgrind;
22
+ }
23
+
24
+ } // namespace re2
@@ -0,0 +1,346 @@
1
+ // Copyright 2010 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef RE2_VARIADIC_FUNCTION_H_
6
+ #define RE2_VARIADIC_FUNCTION_H_
7
+
8
+ namespace re2 {
9
+
10
+ template <typename Result, typename Param0, typename Param1, typename Arg,
11
+ Result (*Func)(Param0, Param1, const Arg* const [], int count)>
12
+ class VariadicFunction2 {
13
+ public:
14
+ VariadicFunction2() {}
15
+
16
+ Result operator()(Param0 p0, Param1 p1) const {
17
+ return Func(p0, p1, 0, 0);
18
+ }
19
+
20
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0) const {
21
+ const Arg* const args[] = { &a0 };
22
+ return Func(p0, p1, args, 1);
23
+ }
24
+
25
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1) const {
26
+ const Arg* const args[] = { &a0, &a1 };
27
+ return Func(p0, p1, args, 2);
28
+ }
29
+
30
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
31
+ const Arg& a2) const {
32
+ const Arg* const args[] = { &a0, &a1, &a2 };
33
+ return Func(p0, p1, args, 3);
34
+ }
35
+
36
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
37
+ const Arg& a2, const Arg& a3) const {
38
+ const Arg* const args[] = { &a0, &a1, &a2, &a3 };
39
+ return Func(p0, p1, args, 4);
40
+ }
41
+
42
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
43
+ const Arg& a2, const Arg& a3, const Arg& a4) const {
44
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4 };
45
+ return Func(p0, p1, args, 5);
46
+ }
47
+
48
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
49
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5) const {
50
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5 };
51
+ return Func(p0, p1, args, 6);
52
+ }
53
+
54
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
55
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
56
+ const Arg& a6) const {
57
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6 };
58
+ return Func(p0, p1, args, 7);
59
+ }
60
+
61
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
62
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
63
+ const Arg& a6, const Arg& a7) const {
64
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7 };
65
+ return Func(p0, p1, args, 8);
66
+ }
67
+
68
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
69
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
70
+ const Arg& a6, const Arg& a7, const Arg& a8) const {
71
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8 };
72
+ return Func(p0, p1, args, 9);
73
+ }
74
+
75
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
76
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
77
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9) const {
78
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
79
+ &a9 };
80
+ return Func(p0, p1, args, 10);
81
+ }
82
+
83
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
84
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
85
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
86
+ const Arg& a10) const {
87
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
88
+ &a9, &a10 };
89
+ return Func(p0, p1, args, 11);
90
+ }
91
+
92
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
93
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
94
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
95
+ const Arg& a10, const Arg& a11) const {
96
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
97
+ &a9, &a10, &a11 };
98
+ return Func(p0, p1, args, 12);
99
+ }
100
+
101
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
102
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
103
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
104
+ const Arg& a10, const Arg& a11, const Arg& a12) const {
105
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
106
+ &a9, &a10, &a11, &a12 };
107
+ return Func(p0, p1, args, 13);
108
+ }
109
+
110
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
111
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
112
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
113
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13) const {
114
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
115
+ &a9, &a10, &a11, &a12, &a13 };
116
+ return Func(p0, p1, args, 14);
117
+ }
118
+
119
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
120
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
121
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
122
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
123
+ const Arg& a14) const {
124
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
125
+ &a9, &a10, &a11, &a12, &a13, &a14 };
126
+ return Func(p0, p1, args, 15);
127
+ }
128
+
129
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
130
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
131
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
132
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
133
+ const Arg& a14, const Arg& a15) const {
134
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
135
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15 };
136
+ return Func(p0, p1, args, 16);
137
+ }
138
+
139
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
140
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
141
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
142
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
143
+ const Arg& a14, const Arg& a15, const Arg& a16) const {
144
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
145
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16 };
146
+ return Func(p0, p1, args, 17);
147
+ }
148
+
149
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
150
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
151
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
152
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
153
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17) const {
154
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
155
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17 };
156
+ return Func(p0, p1, args, 18);
157
+ }
158
+
159
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
160
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
161
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
162
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
163
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
164
+ const Arg& a18) const {
165
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
166
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18 };
167
+ return Func(p0, p1, args, 19);
168
+ }
169
+
170
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
171
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
172
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
173
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
174
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
175
+ const Arg& a18, const Arg& a19) const {
176
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
177
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19 };
178
+ return Func(p0, p1, args, 20);
179
+ }
180
+
181
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
182
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
183
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
184
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
185
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
186
+ const Arg& a18, const Arg& a19, const Arg& a20) const {
187
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
188
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19,
189
+ &a20 };
190
+ return Func(p0, p1, args, 21);
191
+ }
192
+
193
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
194
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
195
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
196
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
197
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
198
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21) const {
199
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
200
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
201
+ &a21 };
202
+ return Func(p0, p1, args, 22);
203
+ }
204
+
205
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
206
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
207
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
208
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
209
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
210
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
211
+ const Arg& a22) const {
212
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
213
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
214
+ &a21, &a22 };
215
+ return Func(p0, p1, args, 23);
216
+ }
217
+
218
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
219
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
220
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
221
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
222
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
223
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
224
+ const Arg& a22, const Arg& a23) const {
225
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
226
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
227
+ &a21, &a22, &a23 };
228
+ return Func(p0, p1, args, 24);
229
+ }
230
+
231
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
232
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
233
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
234
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
235
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
236
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
237
+ const Arg& a22, const Arg& a23, const Arg& a24) const {
238
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
239
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
240
+ &a21, &a22, &a23, &a24 };
241
+ return Func(p0, p1, args, 25);
242
+ }
243
+
244
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
245
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
246
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
247
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
248
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
249
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
250
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25) const {
251
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
252
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
253
+ &a21, &a22, &a23, &a24, &a25 };
254
+ return Func(p0, p1, args, 26);
255
+ }
256
+
257
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
258
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
259
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
260
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
261
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
262
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
263
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
264
+ const Arg& a26) const {
265
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
266
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
267
+ &a21, &a22, &a23, &a24, &a25, &a26 };
268
+ return Func(p0, p1, args, 27);
269
+ }
270
+
271
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
272
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
273
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
274
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
275
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
276
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
277
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
278
+ const Arg& a26, const Arg& a27) const {
279
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
280
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
281
+ &a21, &a22, &a23, &a24, &a25, &a26, &a27 };
282
+ return Func(p0, p1, args, 28);
283
+ }
284
+
285
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
286
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
287
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
288
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
289
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
290
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
291
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
292
+ const Arg& a26, const Arg& a27, const Arg& a28) const {
293
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
294
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
295
+ &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28 };
296
+ return Func(p0, p1, args, 29);
297
+ }
298
+
299
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
300
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
301
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
302
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
303
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
304
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
305
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
306
+ const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29) const {
307
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
308
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
309
+ &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29 };
310
+ return Func(p0, p1, args, 30);
311
+ }
312
+
313
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
314
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
315
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
316
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
317
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
318
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
319
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
320
+ const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29,
321
+ const Arg& a30) const {
322
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
323
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
324
+ &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29, &a30 };
325
+ return Func(p0, p1, args, 31);
326
+ }
327
+
328
+ Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1,
329
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5,
330
+ const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9,
331
+ const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13,
332
+ const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17,
333
+ const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21,
334
+ const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25,
335
+ const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29,
336
+ const Arg& a30, const Arg& a31) const {
337
+ const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8,
338
+ &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20,
339
+ &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29, &a30, &a31 };
340
+ return Func(p0, p1, args, 32);
341
+ }
342
+ };
343
+
344
+ } // namespace re2
345
+
346
+ #endif // RE2_VARIADIC_FUNCTION_H_
@@ -0,0 +1,244 @@
1
+ // Copyright 2006 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Helper class for traversing Regexps without recursion.
6
+ // Clients should declare their own subclasses that override
7
+ // the PreVisit and PostVisit methods, which are called before
8
+ // and after visiting the subexpressions.
9
+
10
+ // Not quite the Visitor pattern, because (among other things)
11
+ // the Visitor pattern is recursive.
12
+
13
+ #ifndef RE2_WALKER_INL_H__
14
+ #define RE2_WALKER_INL_H__
15
+
16
+ #include "re2/regexp.h"
17
+
18
+ namespace re2 {
19
+
20
+ template<typename T> struct WalkState;
21
+
22
+ template<typename T> class Regexp::Walker {
23
+ public:
24
+ Walker();
25
+ virtual ~Walker();
26
+
27
+ // Virtual method called before visiting re's children.
28
+ // PreVisit passes ownership of its return value to its caller.
29
+ // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg
30
+ // and passed to the child PreVisits and PostVisits as parent_arg.
31
+ // At the top-most Regexp, parent_arg is arg passed to walk.
32
+ // If PreVisit sets *stop to true, the walk does not recurse
33
+ // into the children. Instead it behaves as though the return
34
+ // value from PreVisit is the return value from PostVisit.
35
+ // The default PreVisit returns parent_arg.
36
+ virtual T PreVisit(Regexp* re, T parent_arg, bool* stop);
37
+
38
+ // Virtual method called after visiting re's children.
39
+ // The pre_arg is the T that PreVisit returned.
40
+ // The child_args is a vector of the T that the child PostVisits returned.
41
+ // PostVisit takes ownership of pre_arg.
42
+ // PostVisit takes ownership of the Ts
43
+ // in *child_args, but not the vector itself.
44
+ // PostVisit passes ownership of its return value
45
+ // to its caller.
46
+ // The default PostVisit simply returns pre_arg.
47
+ virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg,
48
+ T* child_args, int nchild_args);
49
+
50
+ // Virtual method called to copy a T,
51
+ // when Walk notices that more than one child is the same re.
52
+ virtual T Copy(T arg);
53
+
54
+ // Virtual method called to do a "quick visit" of the re,
55
+ // but not its children. Only called once the visit budget
56
+ // has been used up and we're trying to abort the walk
57
+ // as quickly as possible. Should return a value that
58
+ // makes sense for the parent PostVisits still to be run.
59
+ // This function is (hopefully) only called by
60
+ // WalkExponential, but must be implemented by all clients,
61
+ // just in case.
62
+ virtual T ShortVisit(Regexp* re, T parent_arg) = 0;
63
+
64
+ // Walks over a regular expression.
65
+ // Top_arg is passed as parent_arg to PreVisit and PostVisit of re.
66
+ // Returns the T returned by PostVisit on re.
67
+ T Walk(Regexp* re, T top_arg);
68
+
69
+ // Like Walk, but doesn't use Copy. This can lead to
70
+ // exponential runtimes on cross-linked Regexps like the
71
+ // ones generated by Simplify. To help limit this,
72
+ // at most max_visits nodes will be visited and then
73
+ // the walk will be cut off early.
74
+ // If the walk *is* cut off early, ShortVisit(re)
75
+ // will be called on regexps that cannot be fully
76
+ // visited rather than calling PreVisit/PostVisit.
77
+ T WalkExponential(Regexp* re, T top_arg, int max_visits);
78
+
79
+ // Clears the stack. Should never be necessary, since
80
+ // Walk always enters and exits with an empty stack.
81
+ // Logs DFATAL if stack is not already clear.
82
+ void Reset();
83
+
84
+ // Returns whether walk was cut off.
85
+ bool stopped_early() { return stopped_early_; }
86
+
87
+ private:
88
+ // Walk state for the entire traversal.
89
+ stack<WalkState<T> >* stack_;
90
+ bool stopped_early_;
91
+ int max_visits_;
92
+
93
+ T WalkInternal(Regexp* re, T top_arg, bool use_copy);
94
+
95
+ DISALLOW_EVIL_CONSTRUCTORS(Walker);
96
+ };
97
+
98
+ template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re,
99
+ T parent_arg,
100
+ bool* stop) {
101
+ return parent_arg;
102
+ }
103
+
104
+ template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re,
105
+ T parent_arg,
106
+ T pre_arg,
107
+ T* child_args,
108
+ int nchild_args) {
109
+ return pre_arg;
110
+ }
111
+
112
+ template<typename T> T Regexp::Walker<T>::Copy(T arg) {
113
+ return arg;
114
+ }
115
+
116
+ // State about a single level in the traversal.
117
+ template<typename T> struct WalkState {
118
+ WalkState<T>(Regexp* re, T parent)
119
+ : re(re),
120
+ n(-1),
121
+ parent_arg(parent),
122
+ child_args(NULL) { }
123
+
124
+ Regexp* re; // The regexp
125
+ int n; // The index of the next child to process; -1 means need to PreVisit
126
+ T parent_arg; // Accumulated arguments.
127
+ T pre_arg;
128
+ T child_arg; // One-element buffer for child_args.
129
+ T* child_args;
130
+ };
131
+
132
+ template<typename T> Regexp::Walker<T>::Walker() {
133
+ stack_ = new stack<WalkState<T> >;
134
+ stopped_early_ = false;
135
+ }
136
+
137
+ template<typename T> Regexp::Walker<T>::~Walker() {
138
+ Reset();
139
+ delete stack_;
140
+ }
141
+
142
+ // Clears the stack. Should never be necessary, since
143
+ // Walk always enters and exits with an empty stack.
144
+ // Logs DFATAL if stack is not already clear.
145
+ template<typename T> void Regexp::Walker<T>::Reset() {
146
+ if (stack_ && stack_->size() > 0) {
147
+ LOG(DFATAL) << "Stack not empty.";
148
+ while (stack_->size() > 0) {
149
+ delete stack_->top().child_args;
150
+ stack_->pop();
151
+ }
152
+ }
153
+ }
154
+
155
+ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
156
+ bool use_copy) {
157
+ Reset();
158
+
159
+ if (re == NULL) {
160
+ LOG(DFATAL) << "Walk NULL";
161
+ return top_arg;
162
+ }
163
+
164
+ stack_->push(WalkState<T>(re, top_arg));
165
+
166
+ WalkState<T>* s;
167
+ for (;;) {
168
+ T t;
169
+ s = &stack_->top();
170
+ Regexp* re = s->re;
171
+ switch (s->n) {
172
+ case -1: {
173
+ if (--max_visits_ < 0) {
174
+ stopped_early_ = true;
175
+ t = ShortVisit(re, s->parent_arg);
176
+ break;
177
+ }
178
+ bool stop = false;
179
+ s->pre_arg = PreVisit(re, s->parent_arg, &stop);
180
+ if (stop) {
181
+ t = s->pre_arg;
182
+ break;
183
+ }
184
+ s->n = 0;
185
+ s->child_args = NULL;
186
+ if (re->nsub_ == 1)
187
+ s->child_args = &s->child_arg;
188
+ else if (re->nsub_ > 1)
189
+ s->child_args = new T[re->nsub_];
190
+ // Fall through.
191
+ }
192
+ default: {
193
+ if (re->nsub_ > 0) {
194
+ Regexp** sub = re->sub();
195
+ if (s->n < re->nsub_) {
196
+ if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
197
+ s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
198
+ s->n++;
199
+ } else {
200
+ stack_->push(WalkState<T>(sub[s->n], s->pre_arg));
201
+ }
202
+ continue;
203
+ }
204
+ }
205
+
206
+ t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
207
+ if (re->nsub_ > 1)
208
+ delete[] s->child_args;
209
+ break;
210
+ }
211
+ }
212
+
213
+ // We've finished stack_->top().
214
+ // Update next guy down.
215
+ stack_->pop();
216
+ if (stack_->size() == 0)
217
+ return t;
218
+ s = &stack_->top();
219
+ if (s->child_args != NULL)
220
+ s->child_args[s->n] = t;
221
+ else
222
+ s->child_arg = t;
223
+ s->n++;
224
+ }
225
+ }
226
+
227
+ template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) {
228
+ // Without the exponential walking behavior,
229
+ // this budget should be more than enough for any
230
+ // regexp, and yet not enough to get us in trouble
231
+ // as far as CPU time.
232
+ max_visits_ = 1000000;
233
+ return WalkInternal(re, top_arg, true);
234
+ }
235
+
236
+ template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg,
237
+ int max_visits) {
238
+ max_visits_ = max_visits;
239
+ return WalkInternal(re, top_arg, false);
240
+ }
241
+
242
+ } // namespace re2
243
+
244
+ #endif // RE2_WALKER_INL_H__