Mxx_ru 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +26 -26
- data/NEWS +107 -107
- data/README +21 -21
- data/Rakefile +56 -59
- data/examples/exe_and_lib/prj.rb +11 -11
- data/examples/exe_and_lib/say.rb +7 -7
- data/examples/exe_dll_lib/inout.rb +18 -18
- data/examples/exe_dll_lib/prj.rb +11 -11
- data/examples/exe_dll_lib/say.rb +9 -9
- data/examples/exe_dll_lib_2/build.rb +10 -10
- data/examples/exe_dll_lib_2/inout/prj.rb +16 -16
- data/examples/exe_dll_lib_2/main/prj.rb +9 -9
- data/examples/exe_dll_lib_2/say/prj.rb +8 -8
- data/examples/simple_exe/prj.rb +7 -7
- data/lib/mxx_ru/abstract_target.rb +335 -335
- data/lib/mxx_ru/binary_library.rb +106 -106
- data/lib/mxx_ru/binary_target.rb +173 -173
- data/lib/mxx_ru/binary_unittest.rb +143 -143
- data/lib/mxx_ru/compat.rb +33 -33
- data/lib/mxx_ru/cpp.rb +38 -38
- data/lib/mxx_ru/cpp/analyzer.rb +237 -237
- data/lib/mxx_ru/cpp/composite.rb +118 -118
- data/lib/mxx_ru/cpp/detect_toolset.rb +122 -122
- data/lib/mxx_ru/cpp/mode.rb +90 -90
- data/lib/mxx_ru/cpp/obj_placement.rb +330 -330
- data/lib/mxx_ru/cpp/obj_placements/custom_subdir.rb +155 -155
- data/lib/mxx_ru/cpp/qt.rb +366 -366
- data/lib/mxx_ru/cpp/rucodegen.rb +157 -157
- data/lib/mxx_ru/cpp/source_file.rb +79 -79
- data/lib/mxx_ru/cpp/target.rb +1523 -1523
- data/lib/mxx_ru/cpp/toolset.rb +1087 -1087
- data/lib/mxx_ru/cpp/toolsets/bcc_win32_5.rb +53 -53
- data/lib/mxx_ru/cpp/toolsets/bcc_win32_family.rb +460 -460
- data/lib/mxx_ru/cpp/toolsets/c89_etk_nsk.rb +59 -59
- data/lib/mxx_ru/cpp/toolsets/c89_nsk.rb +59 -59
- data/lib/mxx_ru/cpp/toolsets/c89_nsk_family.rb +277 -277
- data/lib/mxx_ru/cpp/toolsets/gcc_cygwin.rb +58 -58
- data/lib/mxx_ru/cpp/toolsets/gcc_family.rb +420 -424
- data/lib/mxx_ru/cpp/toolsets/gcc_linux.rb +64 -64
- data/lib/mxx_ru/cpp/toolsets/gcc_mingw.rb +150 -150
- data/lib/mxx_ru/cpp/toolsets/gcc_sparc_solaris.rb +91 -91
- data/lib/mxx_ru/cpp/toolsets/gcc_unix_family.rb +103 -84
- data/lib/mxx_ru/cpp/toolsets/vc7.rb +62 -62
- data/lib/mxx_ru/cpp/toolsets/vc8.rb +455 -455
- data/lib/mxx_ru/cpp/toolsets/vc_family.rb +448 -448
- data/lib/mxx_ru/ex.rb +165 -165
- data/lib/mxx_ru/makestyle_generator.rb +146 -146
- data/lib/mxx_ru/textfile_unittest.rb +303 -303
- data/lib/mxx_ru/util.rb +340 -340
- data/tests/c/pcre/chartables.c +183 -183
- data/tests/c/pcre/config.h +99 -99
- data/tests/c/pcre/dftables.c +167 -167
- data/tests/c/pcre/get.c +349 -349
- data/tests/c/pcre/internal.h +677 -677
- data/tests/c/pcre/maketables.c +140 -140
- data/tests/c/pcre/pcre.c +8304 -8304
- data/tests/c/pcre/pcre.h +193 -193
- data/tests/c/pcre/pcre.rb +14 -14
- data/tests/c/pcre/pcredemo.c +316 -316
- data/tests/c/pcre/pcregrep.c +642 -642
- data/tests/c/pcre/pcreposix.c +305 -305
- data/tests/c/pcre/pcreposix.h +88 -88
- data/tests/c/pcre/pcretest.c +1483 -1483
- data/tests/c/pcre/perltest +211 -211
- data/tests/c/pcre/printint.c +360 -360
- data/tests/c/pcre/study.c +472 -472
- data/tests/cpp/cpp_sources_glob/build.rb +7 -7
- data/tests/cpp/cpp_sources_glob/some/module/prj.rb +10 -10
- data/tests/cpp/cpp_sources_glob/tc_cpp_sources_glob.rb +18 -18
- data/tests/cpp/mswin_res_dll/build.rb +14 -14
- data/tests/cpp/mswin_res_dll/dll.cpp +17 -17
- data/tests/cpp/mswin_res_dll/dll.rb +29 -29
- data/tests/cpp/mswin_res_dll/dll.rc +48 -48
- data/tests/cpp/mswin_res_dll/h/dll.hpp +8 -8
- data/tests/cpp/mswin_res_dll/h/res.h +3 -3
- data/tests/cpp/mswin_res_dll/main.cpp +13 -13
- data/tests/cpp/mswin_res_dll/main.rb +19 -19
- data/tests/cpp/mswin_res_dll/tc_mswin_res_dll.rb +18 -18
- data/tests/cpp/mswin_res_exe/build.rb +23 -23
- data/tests/cpp/mswin_res_exe/h/res.h +3 -3
- data/tests/cpp/mswin_res_exe/main.cpp +17 -17
- data/tests/cpp/mswin_res_exe/main.rc +48 -48
- data/tests/cpp/mswin_res_exe/tc_mswin_res_exe.rb +18 -18
- data/tests/cpp/rucodegen.embedded/host_config.cpp +32 -32
- data/tests/cpp/rucodegen.embedded/impl/conn_params.cpp +7 -7
- data/tests/cpp/rucodegen.embedded/impl/conn_params.rb +14 -14
- data/tests/cpp/rucodegen.embedded/impl/h/conn_params.hpp +10 -10
- data/tests/cpp/rucodegen.embedded/prj.rb +16 -16
- data/tests/cpp/rucodegen.embedded/tc_rucodegen.rb +18 -18
- data/tests/cpp/rucodegen/host_config.cpp +20 -20
- data/tests/cpp/rucodegen/host_config.rb +14 -14
- data/tests/cpp/rucodegen/impl/conn_params.cpp +7 -7
- data/tests/cpp/rucodegen/impl/conn_params.rb +14 -14
- data/tests/cpp/rucodegen/impl/h/conn_params.hpp +10 -10
- data/tests/cpp/rucodegen/prj.rb +16 -16
- data/tests/cpp/rucodegen/tc_rucodegen.rb +18 -18
- data/tests/cpp/textfile_unittest/build.rb +8 -8
- data/tests/cpp/textfile_unittest/etalons/out_1.txt +1 -1
- data/tests/cpp/textfile_unittest/etalons/out_128.txt +128 -128
- data/tests/cpp/textfile_unittest/main.cpp +89 -89
- data/tests/cpp/textfile_unittest/prj.rb +8 -8
- data/tests/cpp/textfile_unittest/prj.ut.rb +18 -18
- data/tests/cpp/textfile_unittest/tc_textfile_unittest.rb +18 -18
- data/tests/cpp/toolset_name.rb +6 -6
- data/tests/cpp/vc_cleanup/prj_dll_no_implib.rb +10 -10
- data/tests/cpp/vc_cleanup/prj_dll_no_implib_simple_target_root.rb +11 -11
- data/tests/cpp/vc_cleanup/prj_dll_with_implib.rb +11 -11
- data/tests/cpp/vc_cleanup/prj_dll_with_implib_simple_target_root.rb +14 -14
- data/tests/cpp/vc_cleanup/prj_exe_no_implib.rb +10 -10
- data/tests/cpp/vc_cleanup/prj_exe_no_implib_simple_target_root.rb +11 -11
- data/tests/cpp/vc_cleanup/prj_lib.rb +10 -10
- data/tests/cpp/vc_cleanup/prj_lib_with_simple_target_root.rb +11 -11
- data/tests/cpp/vc_cleanup/tc_vc_cleanup.rb +23 -23
- data/tests/mxx_ru/binary_library/tc_binary_library.rb +57 -57
- data/tests/mxx_ru/binary_library/tc_binary_target_lib_methods.rb +114 -114
- data/tests/mxx_ru/change_default_value/ignoring_by_build_root/build.rb +8 -8
- data/tests/mxx_ru/change_default_value/ignoring_by_build_root/child_1.rb +5 -5
- data/tests/mxx_ru/change_default_value/ignoring_by_child_1/build.rb +7 -7
- data/tests/mxx_ru/change_default_value/ignoring_by_child_1/child_1.rb +5 -5
- data/tests/mxx_ru/change_default_value/ignoring_by_child_1/child_2.rb +5 -5
- data/tests/mxx_ru/change_default_value/ok/build.rb +8 -8
- data/tests/mxx_ru/change_default_value/ok/child_1.rb +8 -8
- data/tests/mxx_ru/lib_path/build.rb +8 -8
- data/tests/mxx_ru/lib_path/bye.rb +8 -8
- data/tests/mxx_ru/lib_path/hi.rb +8 -8
- data/tests/mxx_ru/lib_path/main.rb +12 -12
- data/tests/mxx_ru/lib_path/tc_lib_path.rb +18 -18
- data/tests/mxx_ru/obj_placements/tc_custom_subdir.rb +58 -58
- data/tests/mxx_ru/opt_lib_ext/build.rb +7 -7
- data/tests/mxx_ru/opt_lib_ext/hi.rb +7 -7
- data/tests/mxx_ru/opt_lib_ext/tc_opt_lib_ext.rb +18 -18
- data/tests/mxx_ru/opt_lib_ext/test-no-ext.rb +9 -9
- data/tests/mxx_ru/opt_lib_ext/test-with-ext.rb +13 -13
- data/tests/mxx_ru/plural_form_methods/tc.rb +72 -72
- data/tests/mxx_ru/qt_gen/tc_uic_result_subdir.rb +76 -76
- data/tests/mxx_ru/target_ext/prj_dll.rb +8 -8
- data/tests/mxx_ru/target_ext/prj_exe.rb +8 -8
- data/tests/mxx_ru/target_ext/prj_lib.rb +8 -8
- data/tests/mxx_ru/target_ext/tc_target_ext.rb +24 -24
- data/tests/mxx_ru/tc_makestyle_generator.rb +117 -117
- data/tests/mxx_ru/vc8/tc_actual_manifest.rb +230 -230
- data/tests/mxx_ru/vc8/tc_append_mt_commands.rb +104 -104
- data/tests/mxx_ru/vc8/tc_default_manifest.rb +17 -17
- data/tests/mxx_ru/vc8/tc_define_manifest.rb +173 -173
- data/tests/mxx_ru/vc8/tc_drop_default_manifest.rb +16 -16
- data/tests/mxx_ru/vc8/tc_invalid_params.rb +81 -81
- data/tests/mxx_ru/vc8/ts_vc8.rb +10 -10
- data/tests/qt/aclock/aclock.cpp +148 -148
- data/tests/qt/aclock/aclock.h +45 -45
- data/tests/qt/aclock/main.cpp +28 -28
- data/tests/qt/aclock/prj.rb +21 -21
- data/tests/qt/iconview/main.cpp +76 -76
- data/tests/qt/iconview/prj.rb +21 -21
- data/tests/qt/toplevel/main.cpp +9 -9
- data/tests/qt/toplevel/options.ui +587 -587
- data/tests/qt/toplevel/prj.rb +22 -22
- data/tests/test_with_compilation.rb +110 -110
- data/tests/unix/lib_linking_mode/a_shared.rb +7 -7
- data/tests/unix/lib_linking_mode/a_static.rb +7 -7
- data/tests/unix/lib_linking_mode/etalon/shared.txt +2 -2
- data/tests/unix/lib_linking_mode/etalon/static.txt +2 -2
- data/tests/unix/lib_linking_mode/main_conflict.rb +12 -12
- data/tests/unix/lib_linking_mode/main_conflict_2.rb +11 -11
- data/tests/unix/lib_linking_mode/main_shared.rb +9 -9
- data/tests/unix/lib_linking_mode/main_shared.ut.rb +11 -11
- data/tests/unix/lib_linking_mode/main_shared_2.rb +10 -10
- data/tests/unix/lib_linking_mode/main_shared_2.ut.rb +11 -11
- data/tests/unix/lib_linking_mode/main_static.rb +9 -9
- data/tests/unix/lib_linking_mode/main_static.ut.rb +11 -11
- data/tests/unix/lib_linking_mode/main_static_2.rb +10 -10
- data/tests/unix/lib_linking_mode/main_static_2.ut.rb +11 -11
- data/tests/unix/lib_linking_mode/tc_conflicted_build.rb +18 -18
- data/tests/unix/lib_linking_mode/tc_normal_build.rb +18 -18
- data/tests/unix/lib_order/a.cpp +4 -0
- data/tests/unix/lib_order/a.hpp +1 -0
- data/tests/unix/lib_order/a.rb +8 -0
- data/tests/unix/lib_order/b.cpp +10 -0
- data/tests/unix/lib_order/b.hpp +1 -0
- data/tests/unix/lib_order/b.rb +8 -0
- data/tests/unix/lib_order/build.rb +9 -0
- data/tests/unix/lib_order/c.cpp +12 -0
- data/tests/unix/lib_order/c.hpp +2 -0
- data/tests/unix/lib_order/c.rb +8 -0
- data/tests/unix/lib_order/d.cpp +7 -0
- data/tests/unix/lib_order/d.hpp +1 -0
- data/tests/unix/lib_order/d.rb +8 -0
- data/tests/unix/lib_order/main.cpp +7 -0
- data/tests/unix/lib_order/main.rb +14 -0
- data/tests/unix/lib_order/tc_normal_build.rb +16 -0
- metadata +342 -319
data/tests/c/pcre/study.c
CHANGED
@@ -1,472 +1,472 @@
|
|
1
|
-
/*************************************************
|
2
|
-
* Perl-Compatible Regular Expressions *
|
3
|
-
*************************************************/
|
4
|
-
|
5
|
-
/*
|
6
|
-
This is a library of functions to support regular expressions whose syntax
|
7
|
-
and semantics are as close as possible to those of the Perl 5 language. See
|
8
|
-
the file Tech.Notes for some information on the internals.
|
9
|
-
|
10
|
-
Written by: Philip Hazel <ph10@cam.ac.uk>
|
11
|
-
|
12
|
-
Copyright (c) 1997-2003 University of Cambridge
|
13
|
-
|
14
|
-
-----------------------------------------------------------------------------
|
15
|
-
Permission is granted to anyone to use this software for any purpose on any
|
16
|
-
computer system, and to redistribute it freely, subject to the following
|
17
|
-
restrictions:
|
18
|
-
|
19
|
-
1. This software is distributed in the hope that it will be useful,
|
20
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
21
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
22
|
-
|
23
|
-
2. The origin of this software must not be misrepresented, either by
|
24
|
-
explicit claim or by omission.
|
25
|
-
|
26
|
-
3. Altered versions must be plainly marked as such, and must not be
|
27
|
-
misrepresented as being the original software.
|
28
|
-
|
29
|
-
4. If PCRE is embedded in any software that is released under the GNU
|
30
|
-
General Purpose Licence (GPL), then the terms of that licence shall
|
31
|
-
supersede any condition above with which it is incompatible.
|
32
|
-
-----------------------------------------------------------------------------
|
33
|
-
*/
|
34
|
-
|
35
|
-
|
36
|
-
/* Include the internals header, which itself includes Standard C headers plus
|
37
|
-
the external pcre header. */
|
38
|
-
|
39
|
-
#include "internal.h"
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
/*************************************************
|
44
|
-
* Set a bit and maybe its alternate case *
|
45
|
-
*************************************************/
|
46
|
-
|
47
|
-
/* Given a character, set its bit in the table, and also the bit for the other
|
48
|
-
version of a letter if we are caseless.
|
49
|
-
|
50
|
-
Arguments:
|
51
|
-
start_bits points to the bit map
|
52
|
-
c is the character
|
53
|
-
caseless the caseless flag
|
54
|
-
cd the block with char table pointers
|
55
|
-
|
56
|
-
Returns: nothing
|
57
|
-
*/
|
58
|
-
|
59
|
-
static void
|
60
|
-
set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd)
|
61
|
-
{
|
62
|
-
start_bits[c/8] |= (1 << (c&7));
|
63
|
-
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
|
64
|
-
start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
|
65
|
-
}
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
/*************************************************
|
70
|
-
* Create bitmap of starting chars *
|
71
|
-
*************************************************/
|
72
|
-
|
73
|
-
/* This function scans a compiled unanchored expression and attempts to build a
|
74
|
-
bitmap of the set of initial characters. If it can't, it returns FALSE. As time
|
75
|
-
goes by, we may be able to get more clever at doing this.
|
76
|
-
|
77
|
-
Arguments:
|
78
|
-
code points to an expression
|
79
|
-
start_bits points to a 32-byte table, initialized to 0
|
80
|
-
caseless the current state of the caseless flag
|
81
|
-
utf8 TRUE if in UTF-8 mode
|
82
|
-
cd the block with char table pointers
|
83
|
-
|
84
|
-
Returns: TRUE if table built, FALSE otherwise
|
85
|
-
*/
|
86
|
-
|
87
|
-
static BOOL
|
88
|
-
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
|
89
|
-
BOOL utf8, compile_data *cd)
|
90
|
-
{
|
91
|
-
register int c;
|
92
|
-
|
93
|
-
/* This next statement and the later reference to dummy are here in order to
|
94
|
-
trick the optimizer of the IBM C compiler for OS/2 into generating correct
|
95
|
-
code. Apparently IBM isn't going to fix the problem, and we would rather not
|
96
|
-
disable optimization (in this module it actually makes a big difference, and
|
97
|
-
the pcre module can use all the optimization it can get). */
|
98
|
-
|
99
|
-
volatile int dummy;
|
100
|
-
|
101
|
-
do
|
102
|
-
{
|
103
|
-
const uschar *tcode = code + 1 + LINK_SIZE;
|
104
|
-
BOOL try_next = TRUE;
|
105
|
-
|
106
|
-
while (try_next)
|
107
|
-
{
|
108
|
-
/* If a branch starts with a bracket or a positive lookahead assertion,
|
109
|
-
recurse to set bits from within them. That's all for this branch. */
|
110
|
-
|
111
|
-
if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
|
112
|
-
{
|
113
|
-
if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))
|
114
|
-
return FALSE;
|
115
|
-
try_next = FALSE;
|
116
|
-
}
|
117
|
-
|
118
|
-
else switch(*tcode)
|
119
|
-
{
|
120
|
-
default:
|
121
|
-
return FALSE;
|
122
|
-
|
123
|
-
/* Skip over callout */
|
124
|
-
|
125
|
-
case OP_CALLOUT:
|
126
|
-
tcode += 2;
|
127
|
-
break;
|
128
|
-
|
129
|
-
/* Skip over extended extraction bracket number */
|
130
|
-
|
131
|
-
case OP_BRANUMBER:
|
132
|
-
tcode += 3;
|
133
|
-
break;
|
134
|
-
|
135
|
-
/* Skip over lookbehind and negative lookahead assertions */
|
136
|
-
|
137
|
-
case OP_ASSERT_NOT:
|
138
|
-
case OP_ASSERTBACK:
|
139
|
-
case OP_ASSERTBACK_NOT:
|
140
|
-
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
141
|
-
tcode += 1+LINK_SIZE;
|
142
|
-
break;
|
143
|
-
|
144
|
-
/* Skip over an option setting, changing the caseless flag */
|
145
|
-
|
146
|
-
case OP_OPT:
|
147
|
-
caseless = (tcode[1] & PCRE_CASELESS) != 0;
|
148
|
-
tcode += 2;
|
149
|
-
break;
|
150
|
-
|
151
|
-
/* BRAZERO does the bracket, but carries on. */
|
152
|
-
|
153
|
-
case OP_BRAZERO:
|
154
|
-
case OP_BRAMINZERO:
|
155
|
-
if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
|
156
|
-
return FALSE;
|
157
|
-
dummy = 1;
|
158
|
-
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
159
|
-
tcode += 1+LINK_SIZE;
|
160
|
-
break;
|
161
|
-
|
162
|
-
/* Single-char * or ? sets the bit and tries the next item */
|
163
|
-
|
164
|
-
case OP_STAR:
|
165
|
-
case OP_MINSTAR:
|
166
|
-
case OP_QUERY:
|
167
|
-
case OP_MINQUERY:
|
168
|
-
set_bit(start_bits, tcode[1], caseless, cd);
|
169
|
-
tcode += 2;
|
170
|
-
#ifdef SUPPORT_UTF8
|
171
|
-
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
172
|
-
#endif
|
173
|
-
break;
|
174
|
-
|
175
|
-
/* Single-char upto sets the bit and tries the next */
|
176
|
-
|
177
|
-
case OP_UPTO:
|
178
|
-
case OP_MINUPTO:
|
179
|
-
set_bit(start_bits, tcode[3], caseless, cd);
|
180
|
-
tcode += 4;
|
181
|
-
#ifdef SUPPORT_UTF8
|
182
|
-
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
183
|
-
#endif
|
184
|
-
break;
|
185
|
-
|
186
|
-
/* At least one single char sets the bit and stops */
|
187
|
-
|
188
|
-
case OP_EXACT: /* Fall through */
|
189
|
-
tcode++;
|
190
|
-
|
191
|
-
case OP_CHARS: /* Fall through */
|
192
|
-
tcode++;
|
193
|
-
|
194
|
-
case OP_PLUS:
|
195
|
-
case OP_MINPLUS:
|
196
|
-
set_bit(start_bits, tcode[1], caseless, cd);
|
197
|
-
try_next = FALSE;
|
198
|
-
break;
|
199
|
-
|
200
|
-
/* Single character type sets the bits and stops */
|
201
|
-
|
202
|
-
case OP_NOT_DIGIT:
|
203
|
-
for (c = 0; c < 32; c++)
|
204
|
-
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
205
|
-
try_next = FALSE;
|
206
|
-
break;
|
207
|
-
|
208
|
-
case OP_DIGIT:
|
209
|
-
for (c = 0; c < 32; c++)
|
210
|
-
start_bits[c] |= cd->cbits[c+cbit_digit];
|
211
|
-
try_next = FALSE;
|
212
|
-
break;
|
213
|
-
|
214
|
-
case OP_NOT_WHITESPACE:
|
215
|
-
for (c = 0; c < 32; c++)
|
216
|
-
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
217
|
-
try_next = FALSE;
|
218
|
-
break;
|
219
|
-
|
220
|
-
case OP_WHITESPACE:
|
221
|
-
for (c = 0; c < 32; c++)
|
222
|
-
start_bits[c] |= cd->cbits[c+cbit_space];
|
223
|
-
try_next = FALSE;
|
224
|
-
break;
|
225
|
-
|
226
|
-
case OP_NOT_WORDCHAR:
|
227
|
-
for (c = 0; c < 32; c++)
|
228
|
-
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
229
|
-
try_next = FALSE;
|
230
|
-
break;
|
231
|
-
|
232
|
-
case OP_WORDCHAR:
|
233
|
-
for (c = 0; c < 32; c++)
|
234
|
-
start_bits[c] |= cd->cbits[c+cbit_word];
|
235
|
-
try_next = FALSE;
|
236
|
-
break;
|
237
|
-
|
238
|
-
/* One or more character type fudges the pointer and restarts, knowing
|
239
|
-
it will hit a single character type and stop there. */
|
240
|
-
|
241
|
-
case OP_TYPEPLUS:
|
242
|
-
case OP_TYPEMINPLUS:
|
243
|
-
tcode++;
|
244
|
-
break;
|
245
|
-
|
246
|
-
case OP_TYPEEXACT:
|
247
|
-
tcode += 3;
|
248
|
-
break;
|
249
|
-
|
250
|
-
/* Zero or more repeats of character types set the bits and then
|
251
|
-
try again. */
|
252
|
-
|
253
|
-
case OP_TYPEUPTO:
|
254
|
-
case OP_TYPEMINUPTO:
|
255
|
-
tcode += 2; /* Fall through */
|
256
|
-
|
257
|
-
case OP_TYPESTAR:
|
258
|
-
case OP_TYPEMINSTAR:
|
259
|
-
case OP_TYPEQUERY:
|
260
|
-
case OP_TYPEMINQUERY:
|
261
|
-
switch(tcode[1])
|
262
|
-
{
|
263
|
-
case OP_ANY:
|
264
|
-
return FALSE;
|
265
|
-
|
266
|
-
case OP_NOT_DIGIT:
|
267
|
-
for (c = 0; c < 32; c++)
|
268
|
-
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
269
|
-
break;
|
270
|
-
|
271
|
-
case OP_DIGIT:
|
272
|
-
for (c = 0; c < 32; c++)
|
273
|
-
start_bits[c] |= cd->cbits[c+cbit_digit];
|
274
|
-
break;
|
275
|
-
|
276
|
-
case OP_NOT_WHITESPACE:
|
277
|
-
for (c = 0; c < 32; c++)
|
278
|
-
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
279
|
-
break;
|
280
|
-
|
281
|
-
case OP_WHITESPACE:
|
282
|
-
for (c = 0; c < 32; c++)
|
283
|
-
start_bits[c] |= cd->cbits[c+cbit_space];
|
284
|
-
break;
|
285
|
-
|
286
|
-
case OP_NOT_WORDCHAR:
|
287
|
-
for (c = 0; c < 32; c++)
|
288
|
-
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
289
|
-
break;
|
290
|
-
|
291
|
-
case OP_WORDCHAR:
|
292
|
-
for (c = 0; c < 32; c++)
|
293
|
-
start_bits[c] |= cd->cbits[c+cbit_word];
|
294
|
-
break;
|
295
|
-
}
|
296
|
-
|
297
|
-
tcode += 2;
|
298
|
-
break;
|
299
|
-
|
300
|
-
/* Character class where all the information is in a bit map: set the
|
301
|
-
bits and either carry on or not, according to the repeat count. If it was
|
302
|
-
a negative class, and we are operating with UTF-8 characters, any byte
|
303
|
-
with a value >= 0xc4 is a potentially valid starter because it starts a
|
304
|
-
character with a value > 255. */
|
305
|
-
|
306
|
-
case OP_NCLASS:
|
307
|
-
if (utf8)
|
308
|
-
{
|
309
|
-
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
310
|
-
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
311
|
-
}
|
312
|
-
/* Fall through */
|
313
|
-
|
314
|
-
case OP_CLASS:
|
315
|
-
{
|
316
|
-
tcode++;
|
317
|
-
|
318
|
-
/* In UTF-8 mode, the bits in a bit map correspond to character
|
319
|
-
values, not to byte values. However, the bit map we are constructing is
|
320
|
-
for byte values. So we have to do a conversion for characters whose
|
321
|
-
value is > 127. In fact, there are only two possible starting bytes for
|
322
|
-
characters in the range 128 - 255. */
|
323
|
-
|
324
|
-
if (utf8)
|
325
|
-
{
|
326
|
-
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
327
|
-
for (c = 128; c < 256; c++)
|
328
|
-
{
|
329
|
-
if ((tcode[c/8] && (1 << (c&7))) != 0)
|
330
|
-
{
|
331
|
-
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
332
|
-
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
|
333
|
-
c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
|
334
|
-
}
|
335
|
-
}
|
336
|
-
}
|
337
|
-
|
338
|
-
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
339
|
-
|
340
|
-
else
|
341
|
-
{
|
342
|
-
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
343
|
-
}
|
344
|
-
|
345
|
-
/* Advance past the bit map, and act on what follows */
|
346
|
-
|
347
|
-
tcode += 32;
|
348
|
-
switch (*tcode)
|
349
|
-
{
|
350
|
-
case OP_CRSTAR:
|
351
|
-
case OP_CRMINSTAR:
|
352
|
-
case OP_CRQUERY:
|
353
|
-
case OP_CRMINQUERY:
|
354
|
-
tcode++;
|
355
|
-
break;
|
356
|
-
|
357
|
-
case OP_CRRANGE:
|
358
|
-
case OP_CRMINRANGE:
|
359
|
-
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
|
360
|
-
else try_next = FALSE;
|
361
|
-
break;
|
362
|
-
|
363
|
-
default:
|
364
|
-
try_next = FALSE;
|
365
|
-
break;
|
366
|
-
}
|
367
|
-
}
|
368
|
-
break; /* End of bitmap class handling */
|
369
|
-
|
370
|
-
} /* End of switch */
|
371
|
-
} /* End of try_next loop */
|
372
|
-
|
373
|
-
code += GET(code, 1); /* Advance to next branch */
|
374
|
-
}
|
375
|
-
while (*code == OP_ALT);
|
376
|
-
return TRUE;
|
377
|
-
}
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
/*************************************************
|
382
|
-
* Study a compiled expression *
|
383
|
-
*************************************************/
|
384
|
-
|
385
|
-
/* This function is handed a compiled expression that it must study to produce
|
386
|
-
information that will speed up the matching. It returns a pcre_extra block
|
387
|
-
which then gets handed back to pcre_exec().
|
388
|
-
|
389
|
-
Arguments:
|
390
|
-
re points to the compiled expression
|
391
|
-
options contains option bits
|
392
|
-
errorptr points to where to place error messages;
|
393
|
-
set NULL unless error
|
394
|
-
|
395
|
-
Returns: pointer to a pcre_extra block, with study_data filled in and the
|
396
|
-
appropriate flag set;
|
397
|
-
NULL on error or if no optimization possible
|
398
|
-
*/
|
399
|
-
|
400
|
-
EXPORT pcre_extra *
|
401
|
-
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
402
|
-
{
|
403
|
-
uschar start_bits[32];
|
404
|
-
pcre_extra *extra;
|
405
|
-
pcre_study_data *study;
|
406
|
-
const real_pcre *re = (const real_pcre *)external_re;
|
407
|
-
uschar *code = (uschar *)re + sizeof(real_pcre) +
|
408
|
-
(re->name_count * re->name_entry_size);
|
409
|
-
compile_data compile_block;
|
410
|
-
|
411
|
-
*errorptr = NULL;
|
412
|
-
|
413
|
-
if (re == NULL || re->magic_number != MAGIC_NUMBER)
|
414
|
-
{
|
415
|
-
*errorptr = "argument is not a compiled regular expression";
|
416
|
-
return NULL;
|
417
|
-
}
|
418
|
-
|
419
|
-
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
|
420
|
-
{
|
421
|
-
*errorptr = "unknown or incorrect option bit(s) set";
|
422
|
-
return NULL;
|
423
|
-
}
|
424
|
-
|
425
|
-
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
426
|
-
a multiline pattern that matches only at "line starts", no further processing
|
427
|
-
at present. */
|
428
|
-
|
429
|
-
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
430
|
-
return NULL;
|
431
|
-
|
432
|
-
/* Set the character tables in the block which is passed around */
|
433
|
-
|
434
|
-
compile_block.lcc = re->tables + lcc_offset;
|
435
|
-
compile_block.fcc = re->tables + fcc_offset;
|
436
|
-
compile_block.cbits = re->tables + cbits_offset;
|
437
|
-
compile_block.ctypes = re->tables + ctypes_offset;
|
438
|
-
|
439
|
-
/* See if we can find a fixed set of initial characters for the pattern. */
|
440
|
-
|
441
|
-
memset(start_bits, 0, 32 * sizeof(uschar));
|
442
|
-
if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
443
|
-
(re->options & PCRE_UTF8) != 0, &compile_block)) return NULL;
|
444
|
-
|
445
|
-
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
|
446
|
-
the latter, which is pointed to by the former, which may also get additional
|
447
|
-
data set later by the calling program. At the moment, the size of
|
448
|
-
pcre_study_data is fixed. We nevertheless save it in a field for returning via
|
449
|
-
the pcre_fullinfo() function so that if it becomes variable in the future, we
|
450
|
-
don't have to change that code. */
|
451
|
-
|
452
|
-
extra = (pcre_extra *)(pcre_malloc)
|
453
|
-
(sizeof(pcre_extra) + sizeof(pcre_study_data));
|
454
|
-
|
455
|
-
if (extra == NULL)
|
456
|
-
{
|
457
|
-
*errorptr = "failed to get memory";
|
458
|
-
return NULL;
|
459
|
-
}
|
460
|
-
|
461
|
-
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
|
462
|
-
extra->flags = PCRE_EXTRA_STUDY_DATA;
|
463
|
-
extra->study_data = study;
|
464
|
-
|
465
|
-
study->size = sizeof(pcre_study_data);
|
466
|
-
study->options = PCRE_STUDY_MAPPED;
|
467
|
-
memcpy(study->start_bits, start_bits, sizeof(start_bits));
|
468
|
-
|
469
|
-
return extra;
|
470
|
-
}
|
471
|
-
|
472
|
-
/* End of study.c */
|
1
|
+
/*************************************************
|
2
|
+
* Perl-Compatible Regular Expressions *
|
3
|
+
*************************************************/
|
4
|
+
|
5
|
+
/*
|
6
|
+
This is a library of functions to support regular expressions whose syntax
|
7
|
+
and semantics are as close as possible to those of the Perl 5 language. See
|
8
|
+
the file Tech.Notes for some information on the internals.
|
9
|
+
|
10
|
+
Written by: Philip Hazel <ph10@cam.ac.uk>
|
11
|
+
|
12
|
+
Copyright (c) 1997-2003 University of Cambridge
|
13
|
+
|
14
|
+
-----------------------------------------------------------------------------
|
15
|
+
Permission is granted to anyone to use this software for any purpose on any
|
16
|
+
computer system, and to redistribute it freely, subject to the following
|
17
|
+
restrictions:
|
18
|
+
|
19
|
+
1. This software is distributed in the hope that it will be useful,
|
20
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
21
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
22
|
+
|
23
|
+
2. The origin of this software must not be misrepresented, either by
|
24
|
+
explicit claim or by omission.
|
25
|
+
|
26
|
+
3. Altered versions must be plainly marked as such, and must not be
|
27
|
+
misrepresented as being the original software.
|
28
|
+
|
29
|
+
4. If PCRE is embedded in any software that is released under the GNU
|
30
|
+
General Purpose Licence (GPL), then the terms of that licence shall
|
31
|
+
supersede any condition above with which it is incompatible.
|
32
|
+
-----------------------------------------------------------------------------
|
33
|
+
*/
|
34
|
+
|
35
|
+
|
36
|
+
/* Include the internals header, which itself includes Standard C headers plus
|
37
|
+
the external pcre header. */
|
38
|
+
|
39
|
+
#include "internal.h"
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
/*************************************************
|
44
|
+
* Set a bit and maybe its alternate case *
|
45
|
+
*************************************************/
|
46
|
+
|
47
|
+
/* Given a character, set its bit in the table, and also the bit for the other
|
48
|
+
version of a letter if we are caseless.
|
49
|
+
|
50
|
+
Arguments:
|
51
|
+
start_bits points to the bit map
|
52
|
+
c is the character
|
53
|
+
caseless the caseless flag
|
54
|
+
cd the block with char table pointers
|
55
|
+
|
56
|
+
Returns: nothing
|
57
|
+
*/
|
58
|
+
|
59
|
+
static void
|
60
|
+
set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd)
|
61
|
+
{
|
62
|
+
start_bits[c/8] |= (1 << (c&7));
|
63
|
+
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
|
64
|
+
start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
|
65
|
+
}
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
/*************************************************
|
70
|
+
* Create bitmap of starting chars *
|
71
|
+
*************************************************/
|
72
|
+
|
73
|
+
/* This function scans a compiled unanchored expression and attempts to build a
|
74
|
+
bitmap of the set of initial characters. If it can't, it returns FALSE. As time
|
75
|
+
goes by, we may be able to get more clever at doing this.
|
76
|
+
|
77
|
+
Arguments:
|
78
|
+
code points to an expression
|
79
|
+
start_bits points to a 32-byte table, initialized to 0
|
80
|
+
caseless the current state of the caseless flag
|
81
|
+
utf8 TRUE if in UTF-8 mode
|
82
|
+
cd the block with char table pointers
|
83
|
+
|
84
|
+
Returns: TRUE if table built, FALSE otherwise
|
85
|
+
*/
|
86
|
+
|
87
|
+
static BOOL
|
88
|
+
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
|
89
|
+
BOOL utf8, compile_data *cd)
|
90
|
+
{
|
91
|
+
register int c;
|
92
|
+
|
93
|
+
/* This next statement and the later reference to dummy are here in order to
|
94
|
+
trick the optimizer of the IBM C compiler for OS/2 into generating correct
|
95
|
+
code. Apparently IBM isn't going to fix the problem, and we would rather not
|
96
|
+
disable optimization (in this module it actually makes a big difference, and
|
97
|
+
the pcre module can use all the optimization it can get). */
|
98
|
+
|
99
|
+
volatile int dummy;
|
100
|
+
|
101
|
+
do
|
102
|
+
{
|
103
|
+
const uschar *tcode = code + 1 + LINK_SIZE;
|
104
|
+
BOOL try_next = TRUE;
|
105
|
+
|
106
|
+
while (try_next)
|
107
|
+
{
|
108
|
+
/* If a branch starts with a bracket or a positive lookahead assertion,
|
109
|
+
recurse to set bits from within them. That's all for this branch. */
|
110
|
+
|
111
|
+
if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
|
112
|
+
{
|
113
|
+
if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))
|
114
|
+
return FALSE;
|
115
|
+
try_next = FALSE;
|
116
|
+
}
|
117
|
+
|
118
|
+
else switch(*tcode)
|
119
|
+
{
|
120
|
+
default:
|
121
|
+
return FALSE;
|
122
|
+
|
123
|
+
/* Skip over callout */
|
124
|
+
|
125
|
+
case OP_CALLOUT:
|
126
|
+
tcode += 2;
|
127
|
+
break;
|
128
|
+
|
129
|
+
/* Skip over extended extraction bracket number */
|
130
|
+
|
131
|
+
case OP_BRANUMBER:
|
132
|
+
tcode += 3;
|
133
|
+
break;
|
134
|
+
|
135
|
+
/* Skip over lookbehind and negative lookahead assertions */
|
136
|
+
|
137
|
+
case OP_ASSERT_NOT:
|
138
|
+
case OP_ASSERTBACK:
|
139
|
+
case OP_ASSERTBACK_NOT:
|
140
|
+
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
141
|
+
tcode += 1+LINK_SIZE;
|
142
|
+
break;
|
143
|
+
|
144
|
+
/* Skip over an option setting, changing the caseless flag */
|
145
|
+
|
146
|
+
case OP_OPT:
|
147
|
+
caseless = (tcode[1] & PCRE_CASELESS) != 0;
|
148
|
+
tcode += 2;
|
149
|
+
break;
|
150
|
+
|
151
|
+
/* BRAZERO does the bracket, but carries on. */
|
152
|
+
|
153
|
+
case OP_BRAZERO:
|
154
|
+
case OP_BRAMINZERO:
|
155
|
+
if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
|
156
|
+
return FALSE;
|
157
|
+
dummy = 1;
|
158
|
+
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
159
|
+
tcode += 1+LINK_SIZE;
|
160
|
+
break;
|
161
|
+
|
162
|
+
/* Single-char * or ? sets the bit and tries the next item */
|
163
|
+
|
164
|
+
case OP_STAR:
|
165
|
+
case OP_MINSTAR:
|
166
|
+
case OP_QUERY:
|
167
|
+
case OP_MINQUERY:
|
168
|
+
set_bit(start_bits, tcode[1], caseless, cd);
|
169
|
+
tcode += 2;
|
170
|
+
#ifdef SUPPORT_UTF8
|
171
|
+
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
172
|
+
#endif
|
173
|
+
break;
|
174
|
+
|
175
|
+
/* Single-char upto sets the bit and tries the next */
|
176
|
+
|
177
|
+
case OP_UPTO:
|
178
|
+
case OP_MINUPTO:
|
179
|
+
set_bit(start_bits, tcode[3], caseless, cd);
|
180
|
+
tcode += 4;
|
181
|
+
#ifdef SUPPORT_UTF8
|
182
|
+
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
183
|
+
#endif
|
184
|
+
break;
|
185
|
+
|
186
|
+
/* At least one single char sets the bit and stops */
|
187
|
+
|
188
|
+
case OP_EXACT: /* Fall through */
|
189
|
+
tcode++;
|
190
|
+
|
191
|
+
case OP_CHARS: /* Fall through */
|
192
|
+
tcode++;
|
193
|
+
|
194
|
+
case OP_PLUS:
|
195
|
+
case OP_MINPLUS:
|
196
|
+
set_bit(start_bits, tcode[1], caseless, cd);
|
197
|
+
try_next = FALSE;
|
198
|
+
break;
|
199
|
+
|
200
|
+
/* Single character type sets the bits and stops */
|
201
|
+
|
202
|
+
case OP_NOT_DIGIT:
|
203
|
+
for (c = 0; c < 32; c++)
|
204
|
+
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
205
|
+
try_next = FALSE;
|
206
|
+
break;
|
207
|
+
|
208
|
+
case OP_DIGIT:
|
209
|
+
for (c = 0; c < 32; c++)
|
210
|
+
start_bits[c] |= cd->cbits[c+cbit_digit];
|
211
|
+
try_next = FALSE;
|
212
|
+
break;
|
213
|
+
|
214
|
+
case OP_NOT_WHITESPACE:
|
215
|
+
for (c = 0; c < 32; c++)
|
216
|
+
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
217
|
+
try_next = FALSE;
|
218
|
+
break;
|
219
|
+
|
220
|
+
case OP_WHITESPACE:
|
221
|
+
for (c = 0; c < 32; c++)
|
222
|
+
start_bits[c] |= cd->cbits[c+cbit_space];
|
223
|
+
try_next = FALSE;
|
224
|
+
break;
|
225
|
+
|
226
|
+
case OP_NOT_WORDCHAR:
|
227
|
+
for (c = 0; c < 32; c++)
|
228
|
+
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
229
|
+
try_next = FALSE;
|
230
|
+
break;
|
231
|
+
|
232
|
+
case OP_WORDCHAR:
|
233
|
+
for (c = 0; c < 32; c++)
|
234
|
+
start_bits[c] |= cd->cbits[c+cbit_word];
|
235
|
+
try_next = FALSE;
|
236
|
+
break;
|
237
|
+
|
238
|
+
/* One or more character type fudges the pointer and restarts, knowing
|
239
|
+
it will hit a single character type and stop there. */
|
240
|
+
|
241
|
+
case OP_TYPEPLUS:
|
242
|
+
case OP_TYPEMINPLUS:
|
243
|
+
tcode++;
|
244
|
+
break;
|
245
|
+
|
246
|
+
case OP_TYPEEXACT:
|
247
|
+
tcode += 3;
|
248
|
+
break;
|
249
|
+
|
250
|
+
/* Zero or more repeats of character types set the bits and then
|
251
|
+
try again. */
|
252
|
+
|
253
|
+
case OP_TYPEUPTO:
|
254
|
+
case OP_TYPEMINUPTO:
|
255
|
+
tcode += 2; /* Fall through */
|
256
|
+
|
257
|
+
case OP_TYPESTAR:
|
258
|
+
case OP_TYPEMINSTAR:
|
259
|
+
case OP_TYPEQUERY:
|
260
|
+
case OP_TYPEMINQUERY:
|
261
|
+
switch(tcode[1])
|
262
|
+
{
|
263
|
+
case OP_ANY:
|
264
|
+
return FALSE;
|
265
|
+
|
266
|
+
case OP_NOT_DIGIT:
|
267
|
+
for (c = 0; c < 32; c++)
|
268
|
+
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
269
|
+
break;
|
270
|
+
|
271
|
+
case OP_DIGIT:
|
272
|
+
for (c = 0; c < 32; c++)
|
273
|
+
start_bits[c] |= cd->cbits[c+cbit_digit];
|
274
|
+
break;
|
275
|
+
|
276
|
+
case OP_NOT_WHITESPACE:
|
277
|
+
for (c = 0; c < 32; c++)
|
278
|
+
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
279
|
+
break;
|
280
|
+
|
281
|
+
case OP_WHITESPACE:
|
282
|
+
for (c = 0; c < 32; c++)
|
283
|
+
start_bits[c] |= cd->cbits[c+cbit_space];
|
284
|
+
break;
|
285
|
+
|
286
|
+
case OP_NOT_WORDCHAR:
|
287
|
+
for (c = 0; c < 32; c++)
|
288
|
+
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
289
|
+
break;
|
290
|
+
|
291
|
+
case OP_WORDCHAR:
|
292
|
+
for (c = 0; c < 32; c++)
|
293
|
+
start_bits[c] |= cd->cbits[c+cbit_word];
|
294
|
+
break;
|
295
|
+
}
|
296
|
+
|
297
|
+
tcode += 2;
|
298
|
+
break;
|
299
|
+
|
300
|
+
/* Character class where all the information is in a bit map: set the
|
301
|
+
bits and either carry on or not, according to the repeat count. If it was
|
302
|
+
a negative class, and we are operating with UTF-8 characters, any byte
|
303
|
+
with a value >= 0xc4 is a potentially valid starter because it starts a
|
304
|
+
character with a value > 255. */
|
305
|
+
|
306
|
+
case OP_NCLASS:
|
307
|
+
if (utf8)
|
308
|
+
{
|
309
|
+
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
310
|
+
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
311
|
+
}
|
312
|
+
/* Fall through */
|
313
|
+
|
314
|
+
case OP_CLASS:
|
315
|
+
{
|
316
|
+
tcode++;
|
317
|
+
|
318
|
+
/* In UTF-8 mode, the bits in a bit map correspond to character
|
319
|
+
values, not to byte values. However, the bit map we are constructing is
|
320
|
+
for byte values. So we have to do a conversion for characters whose
|
321
|
+
value is > 127. In fact, there are only two possible starting bytes for
|
322
|
+
characters in the range 128 - 255. */
|
323
|
+
|
324
|
+
if (utf8)
|
325
|
+
{
|
326
|
+
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
327
|
+
for (c = 128; c < 256; c++)
|
328
|
+
{
|
329
|
+
if ((tcode[c/8] && (1 << (c&7))) != 0)
|
330
|
+
{
|
331
|
+
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
332
|
+
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
|
333
|
+
c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
|
334
|
+
}
|
335
|
+
}
|
336
|
+
}
|
337
|
+
|
338
|
+
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
339
|
+
|
340
|
+
else
|
341
|
+
{
|
342
|
+
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
343
|
+
}
|
344
|
+
|
345
|
+
/* Advance past the bit map, and act on what follows */
|
346
|
+
|
347
|
+
tcode += 32;
|
348
|
+
switch (*tcode)
|
349
|
+
{
|
350
|
+
case OP_CRSTAR:
|
351
|
+
case OP_CRMINSTAR:
|
352
|
+
case OP_CRQUERY:
|
353
|
+
case OP_CRMINQUERY:
|
354
|
+
tcode++;
|
355
|
+
break;
|
356
|
+
|
357
|
+
case OP_CRRANGE:
|
358
|
+
case OP_CRMINRANGE:
|
359
|
+
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
|
360
|
+
else try_next = FALSE;
|
361
|
+
break;
|
362
|
+
|
363
|
+
default:
|
364
|
+
try_next = FALSE;
|
365
|
+
break;
|
366
|
+
}
|
367
|
+
}
|
368
|
+
break; /* End of bitmap class handling */
|
369
|
+
|
370
|
+
} /* End of switch */
|
371
|
+
} /* End of try_next loop */
|
372
|
+
|
373
|
+
code += GET(code, 1); /* Advance to next branch */
|
374
|
+
}
|
375
|
+
while (*code == OP_ALT);
|
376
|
+
return TRUE;
|
377
|
+
}
|
378
|
+
|
379
|
+
|
380
|
+
|
381
|
+
/*************************************************
|
382
|
+
* Study a compiled expression *
|
383
|
+
*************************************************/
|
384
|
+
|
385
|
+
/* This function is handed a compiled expression that it must study to produce
|
386
|
+
information that will speed up the matching. It returns a pcre_extra block
|
387
|
+
which then gets handed back to pcre_exec().
|
388
|
+
|
389
|
+
Arguments:
|
390
|
+
re points to the compiled expression
|
391
|
+
options contains option bits
|
392
|
+
errorptr points to where to place error messages;
|
393
|
+
set NULL unless error
|
394
|
+
|
395
|
+
Returns: pointer to a pcre_extra block, with study_data filled in and the
|
396
|
+
appropriate flag set;
|
397
|
+
NULL on error or if no optimization possible
|
398
|
+
*/
|
399
|
+
|
400
|
+
EXPORT pcre_extra *
|
401
|
+
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
402
|
+
{
|
403
|
+
uschar start_bits[32];
|
404
|
+
pcre_extra *extra;
|
405
|
+
pcre_study_data *study;
|
406
|
+
const real_pcre *re = (const real_pcre *)external_re;
|
407
|
+
uschar *code = (uschar *)re + sizeof(real_pcre) +
|
408
|
+
(re->name_count * re->name_entry_size);
|
409
|
+
compile_data compile_block;
|
410
|
+
|
411
|
+
*errorptr = NULL;
|
412
|
+
|
413
|
+
if (re == NULL || re->magic_number != MAGIC_NUMBER)
|
414
|
+
{
|
415
|
+
*errorptr = "argument is not a compiled regular expression";
|
416
|
+
return NULL;
|
417
|
+
}
|
418
|
+
|
419
|
+
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
|
420
|
+
{
|
421
|
+
*errorptr = "unknown or incorrect option bit(s) set";
|
422
|
+
return NULL;
|
423
|
+
}
|
424
|
+
|
425
|
+
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
426
|
+
a multiline pattern that matches only at "line starts", no further processing
|
427
|
+
at present. */
|
428
|
+
|
429
|
+
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
430
|
+
return NULL;
|
431
|
+
|
432
|
+
/* Set the character tables in the block which is passed around */
|
433
|
+
|
434
|
+
compile_block.lcc = re->tables + lcc_offset;
|
435
|
+
compile_block.fcc = re->tables + fcc_offset;
|
436
|
+
compile_block.cbits = re->tables + cbits_offset;
|
437
|
+
compile_block.ctypes = re->tables + ctypes_offset;
|
438
|
+
|
439
|
+
/* See if we can find a fixed set of initial characters for the pattern. */
|
440
|
+
|
441
|
+
memset(start_bits, 0, 32 * sizeof(uschar));
|
442
|
+
if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
443
|
+
(re->options & PCRE_UTF8) != 0, &compile_block)) return NULL;
|
444
|
+
|
445
|
+
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
|
446
|
+
the latter, which is pointed to by the former, which may also get additional
|
447
|
+
data set later by the calling program. At the moment, the size of
|
448
|
+
pcre_study_data is fixed. We nevertheless save it in a field for returning via
|
449
|
+
the pcre_fullinfo() function so that if it becomes variable in the future, we
|
450
|
+
don't have to change that code. */
|
451
|
+
|
452
|
+
extra = (pcre_extra *)(pcre_malloc)
|
453
|
+
(sizeof(pcre_extra) + sizeof(pcre_study_data));
|
454
|
+
|
455
|
+
if (extra == NULL)
|
456
|
+
{
|
457
|
+
*errorptr = "failed to get memory";
|
458
|
+
return NULL;
|
459
|
+
}
|
460
|
+
|
461
|
+
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
|
462
|
+
extra->flags = PCRE_EXTRA_STUDY_DATA;
|
463
|
+
extra->study_data = study;
|
464
|
+
|
465
|
+
study->size = sizeof(pcre_study_data);
|
466
|
+
study->options = PCRE_STUDY_MAPPED;
|
467
|
+
memcpy(study->start_bits, start_bits, sizeof(start_bits));
|
468
|
+
|
469
|
+
return extra;
|
470
|
+
}
|
471
|
+
|
472
|
+
/* End of study.c */
|