Mxx_ru 1.4.6 → 1.4.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,211 +0,0 @@
1
- #! /usr/bin/perl
2
-
3
- # Program for testing regular expressions with perl to check that PCRE handles
4
- # them the same. This is the version that supports /8 for UTF-8 testing. As it
5
- # stands, it requires at least Perl 5.8 for UTF-8 support. For Perl 5.6, it
6
- # can be used as is for non-UTF-8 testing, but you have to uncomment the
7
- # "use utf8" lines in order to to UTF-8 stuff (and you mustn't uncomment them
8
- # for non-UTF-8 use).
9
-
10
-
11
- # Function for turning a string into a string of printing chars. There are
12
- # currently problems with UTF-8 strings; this fudges round them.
13
-
14
- sub pchars {
15
- my($t) = "";
16
-
17
- if ($utf8)
18
- {
19
- # use utf8; <=============== For UTF-8 in Perl 5.6
20
- @p = unpack('U*', $_[0]);
21
- foreach $c (@p)
22
- {
23
- if ($c >= 32 && $c < 127) { $t .= chr $c; }
24
- else { $t .= sprintf("\\x{%02x}", $c); }
25
- }
26
- }
27
-
28
- else
29
- {
30
- foreach $c (split(//, $_[0]))
31
- {
32
- if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
33
- else { $t .= sprintf("\\x%02x", ord $c); }
34
- }
35
- }
36
-
37
- $t;
38
- }
39
-
40
-
41
-
42
- # Read lines from named file or stdin and write to named file or stdout; lines
43
- # consist of a regular expression, in delimiters and optionally followed by
44
- # options, followed by a set of test data, terminated by an empty line.
45
-
46
- # Sort out the input and output files
47
-
48
- if (@ARGV > 0)
49
- {
50
- open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
51
- $infile = "INFILE";
52
- }
53
- else { $infile = "STDIN"; }
54
-
55
- if (@ARGV > 1)
56
- {
57
- open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
58
- $outfile = "OUTFILE";
59
- }
60
- else { $outfile = "STDOUT"; }
61
-
62
- printf($outfile "Perl $] Regular Expressions\n\n");
63
-
64
- # Main loop
65
-
66
- NEXT_RE:
67
- for (;;)
68
- {
69
- printf " re> " if $infile eq "STDIN";
70
- last if ! ($_ = <$infile>);
71
- printf $outfile "$_" if $infile ne "STDIN";
72
- next if ($_ eq "");
73
-
74
- $pattern = $_;
75
-
76
- while ($pattern !~ /^\s*(.).*\1/s)
77
- {
78
- printf " > " if $infile eq "STDIN";
79
- last if ! ($_ = <$infile>);
80
- printf $outfile "$_" if $infile ne "STDIN";
81
- $pattern .= $_;
82
- }
83
-
84
- chomp($pattern);
85
- $pattern =~ s/\s+$//;
86
-
87
- # The private /+ modifier means "print $' afterwards".
88
-
89
- $showrest = ($pattern =~ s/\+(?=[a-z]*$)//);
90
-
91
- # The private /8 modifier means "operate in UTF-8". Currently, Perl
92
- # has bugs that we try to work around using this flag.
93
-
94
- $utf8 = ($pattern =~ s/8(?=[a-z]*$)//);
95
-
96
- # Check that the pattern is valid
97
-
98
- if ($utf8)
99
- {
100
- # use utf8; <=============== For UTF-8 in Perl 5.6
101
- eval "\$_ =~ ${pattern}";
102
- }
103
- else
104
- {
105
- eval "\$_ =~ ${pattern}";
106
- }
107
-
108
- if ($@)
109
- {
110
- printf $outfile "Error: $@";
111
- next NEXT_RE;
112
- }
113
-
114
- # If the /g modifier is present, we want to put a loop round the matching;
115
- # otherwise just a single "if".
116
-
117
- $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
118
-
119
- # If the pattern is actually the null string, Perl uses the most recently
120
- # executed (and successfully compiled) regex is used instead. This is a
121
- # nasty trap for the unwary! The PCRE test suite does contain null strings
122
- # in places - if they are allowed through here all sorts of weird and
123
- # unexpected effects happen. To avoid this, we replace such patterns with
124
- # a non-null pattern that has the same effect.
125
-
126
- $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
127
-
128
- # Read data lines and test them
129
-
130
- for (;;)
131
- {
132
- printf "data> " if $infile eq "STDIN";
133
- last NEXT_RE if ! ($_ = <$infile>);
134
- chomp;
135
- printf $outfile "$_\n" if $infile ne "STDIN";
136
-
137
- s/\s+$//;
138
- s/^\s+//;
139
-
140
- last if ($_ eq "");
141
-
142
- $x = eval "\"$_\""; # To get escapes processed
143
-
144
- # Empty array for holding results, then do the matching.
145
-
146
- @subs = ();
147
-
148
- $pushes = "push \@subs,\$&;" .
149
- "push \@subs,\$1;" .
150
- "push \@subs,\$2;" .
151
- "push \@subs,\$3;" .
152
- "push \@subs,\$4;" .
153
- "push \@subs,\$5;" .
154
- "push \@subs,\$6;" .
155
- "push \@subs,\$7;" .
156
- "push \@subs,\$8;" .
157
- "push \@subs,\$9;" .
158
- "push \@subs,\$10;" .
159
- "push \@subs,\$11;" .
160
- "push \@subs,\$12;" .
161
- "push \@subs,\$13;" .
162
- "push \@subs,\$14;" .
163
- "push \@subs,\$15;" .
164
- "push \@subs,\$16;" .
165
- "push \@subs,\$'; }";
166
-
167
- if ($utf8)
168
- {
169
- # use utf8; <=============== For UTF-8 in Perl 5.6
170
- eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
171
- }
172
- else
173
- {
174
- eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
175
- }
176
-
177
- if ($@)
178
- {
179
- printf $outfile "Error: $@\n";
180
- next NEXT_RE;
181
- }
182
- elsif (scalar(@subs) == 0)
183
- {
184
- printf $outfile "No match\n";
185
- }
186
- else
187
- {
188
- while (scalar(@subs) != 0)
189
- {
190
- printf $outfile (" 0: %s\n", &pchars($subs[0]));
191
- printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
192
- $last_printed = 0;
193
- for ($i = 1; $i <= 16; $i++)
194
- {
195
- if (defined $subs[$i])
196
- {
197
- while ($last_printed++ < $i-1)
198
- { printf $outfile ("%2d: <unset>\n", $last_printed); }
199
- printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
200
- $last_printed = $i;
201
- }
202
- }
203
- splice(@subs, 0, 18);
204
- }
205
- }
206
- }
207
- }
208
-
209
- printf $outfile "\n";
210
-
211
- # End
@@ -1,360 +0,0 @@
1
- /*************************************************
2
- * Perl-Compatible Regular Expressions *
3
- *************************************************/
4
-
5
- /*
6
- This is a library of functions to support regular expressions whose syntax
7
- and semantics are as close as possible to those of the Perl 5 language. See
8
- the file Tech.Notes for some information on the internals.
9
-
10
- Written by: Philip Hazel <ph10@cam.ac.uk>
11
-
12
- Copyright (c) 1997-2003 University of Cambridge
13
-
14
- -----------------------------------------------------------------------------
15
- Permission is granted to anyone to use this software for any purpose on any
16
- computer system, and to redistribute it freely, subject to the following
17
- restrictions:
18
-
19
- 1. This software is distributed in the hope that it will be useful,
20
- but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22
-
23
- 2. The origin of this software must not be misrepresented, either by
24
- explicit claim or by omission.
25
-
26
- 3. Altered versions must be plainly marked as such, and must not be
27
- misrepresented as being the original software.
28
-
29
- 4. If PCRE is embedded in any software that is released under the GNU
30
- General Purpose Licence (GPL), then the terms of that licence shall
31
- supersede any condition above with which it is incompatible.
32
- -----------------------------------------------------------------------------
33
- */
34
-
35
-
36
- /* This module contains a debugging function for printing out the internal form
37
- of a compiled regular expression. It is kept in a separate file so that it can
38
- be #included both in the pcretest program, and in the library itself when
39
- compiled with the debugging switch. */
40
-
41
-
42
- static const char *OP_names[] = { OP_NAME_LIST };
43
-
44
-
45
- /*************************************************
46
- * Print single- or multi-byte character *
47
- *************************************************/
48
-
49
- /* These tables are actually copies of ones in pcre.c. If we compile the
50
- library with debugging, they are included twice, but that isn't really a
51
- problem - compiling with debugging is pretty rare and these are very small. */
52
-
53
- static const int utf8_t3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
54
-
55
- static const uschar utf8_t4[] = {
56
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
57
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
58
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
59
- 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
60
-
61
- static int
62
- print_char(FILE *f, uschar *ptr, BOOL utf8)
63
- {
64
- int c = *ptr;
65
-
66
- if (!utf8 || (c & 0xc0) != 0xc0)
67
- {
68
- if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
69
- return 0;
70
- }
71
- else
72
- {
73
- int i;
74
- int a = utf8_t4[c & 0x3f]; /* Number of additional bytes */
75
- int s = 6*a;
76
- c = (c & utf8_t3[a]) << s;
77
- for (i = 1; i <= a; i++)
78
- {
79
- s -= 6;
80
- c |= (ptr[i] & 0x3f) << s;
81
- }
82
- if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
83
- return a;
84
- }
85
- }
86
-
87
-
88
-
89
-
90
- /*************************************************
91
- * Print compiled regex *
92
- *************************************************/
93
-
94
- static void
95
- print_internals(pcre *external_re, FILE *f)
96
- {
97
- real_pcre *re = (real_pcre *)external_re;
98
- uschar *codestart =
99
- (uschar *)re + sizeof(real_pcre) + re->name_count * re->name_entry_size;
100
- uschar *code = codestart;
101
- BOOL utf8 = (re->options & PCRE_UTF8) != 0;
102
-
103
- for(;;)
104
- {
105
- uschar *ccode;
106
- int c;
107
- int extra = 0;
108
-
109
- fprintf(f, "%3d ", code - codestart);
110
-
111
- if (*code >= OP_BRA)
112
- {
113
- if (*code - OP_BRA > EXTRACT_BASIC_MAX)
114
- fprintf(f, "%3d Bra extra\n", GET(code, 1));
115
- else
116
- fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
117
- code += OP_lengths[OP_BRA];
118
- continue;
119
- }
120
-
121
- switch(*code)
122
- {
123
- case OP_END:
124
- fprintf(f, " %s\n", OP_names[*code]);
125
- fprintf(f, "------------------------------------------------------------------\n");
126
- return;
127
-
128
- case OP_OPT:
129
- fprintf(f, " %.2x %s", code[1], OP_names[*code]);
130
- break;
131
-
132
- case OP_CHARS:
133
- {
134
- int charlength = code[1];
135
- ccode = code + 2;
136
- extra = charlength;
137
- fprintf(f, "%3d ", charlength);
138
- while (charlength > 0)
139
- {
140
- int extrabytes = print_char(f, ccode, utf8);
141
- ccode += 1 + extrabytes;
142
- charlength -= 1 + extrabytes;
143
- }
144
- }
145
- break;
146
-
147
- case OP_KETRMAX:
148
- case OP_KETRMIN:
149
- case OP_ALT:
150
- case OP_KET:
151
- case OP_ASSERT:
152
- case OP_ASSERT_NOT:
153
- case OP_ASSERTBACK:
154
- case OP_ASSERTBACK_NOT:
155
- case OP_ONCE:
156
- case OP_COND:
157
- case OP_REVERSE:
158
- fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
159
- break;
160
-
161
- case OP_BRANUMBER:
162
- printf("%3d %s", GET2(code, 1), OP_names[*code]);
163
- break;
164
-
165
- case OP_CREF:
166
- if (GET2(code, 1) == CREF_RECURSE)
167
- fprintf(f, " Cond recurse");
168
- else
169
- fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
170
- break;
171
-
172
- case OP_STAR:
173
- case OP_MINSTAR:
174
- case OP_PLUS:
175
- case OP_MINPLUS:
176
- case OP_QUERY:
177
- case OP_MINQUERY:
178
- case OP_TYPESTAR:
179
- case OP_TYPEMINSTAR:
180
- case OP_TYPEPLUS:
181
- case OP_TYPEMINPLUS:
182
- case OP_TYPEQUERY:
183
- case OP_TYPEMINQUERY:
184
- fprintf(f, " ");
185
- if (*code >= OP_TYPESTAR) fprintf(f, "%s", OP_names[code[1]]);
186
- else extra = print_char(f, code+1, utf8);
187
- fprintf(f, "%s", OP_names[*code]);
188
- break;
189
-
190
- case OP_EXACT:
191
- case OP_UPTO:
192
- case OP_MINUPTO:
193
- fprintf(f, " ");
194
- extra = print_char(f, code+3, utf8);
195
- fprintf(f, "{");
196
- if (*code != OP_EXACT) fprintf(f, ",");
197
- fprintf(f, "%d}", GET2(code,1));
198
- if (*code == OP_MINUPTO) fprintf(f, "?");
199
- break;
200
-
201
- case OP_TYPEEXACT:
202
- case OP_TYPEUPTO:
203
- case OP_TYPEMINUPTO:
204
- fprintf(f, " %s{", OP_names[code[3]]);
205
- if (*code != OP_TYPEEXACT) fprintf(f, "0,");
206
- fprintf(f, "%d}", GET2(code,1));
207
- if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
208
- break;
209
-
210
- case OP_NOT:
211
- if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
212
- else fprintf(f, " [^\\x%02x]", c);
213
- break;
214
-
215
- case OP_NOTSTAR:
216
- case OP_NOTMINSTAR:
217
- case OP_NOTPLUS:
218
- case OP_NOTMINPLUS:
219
- case OP_NOTQUERY:
220
- case OP_NOTMINQUERY:
221
- if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
222
- else fprintf(f, " [^\\x%02x]", c);
223
- fprintf(f, "%s", OP_names[*code]);
224
- break;
225
-
226
- case OP_NOTEXACT:
227
- case OP_NOTUPTO:
228
- case OP_NOTMINUPTO:
229
- if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
230
- else fprintf(f, " [^\\x%02x]{", c);
231
- if (*code != OP_NOTEXACT) fprintf(f, ",");
232
- fprintf(f, "%d}", GET2(code,1));
233
- if (*code == OP_NOTMINUPTO) fprintf(f, "?");
234
- break;
235
-
236
- case OP_RECURSE:
237
- fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
238
- break;
239
-
240
- case OP_REF:
241
- fprintf(f, " \\%d", GET2(code,1));
242
- ccode = code + OP_lengths[*code];
243
- goto CLASS_REF_REPEAT;
244
-
245
- case OP_CALLOUT:
246
- fprintf(f, " %s %d", OP_names[*code], code[1]);
247
- break;
248
-
249
- /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
250
- having this code always here, and it makes it less messy without all those
251
- #ifdefs. */
252
-
253
- case OP_CLASS:
254
- case OP_NCLASS:
255
- case OP_XCLASS:
256
- {
257
- int i, min, max;
258
- BOOL printmap;
259
-
260
- fprintf(f, " [");
261
-
262
- if (*code == OP_XCLASS)
263
- {
264
- extra = GET(code, 1);
265
- ccode = code + LINK_SIZE + 1;
266
- printmap = (*ccode & XCL_MAP) != 0;
267
- if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
268
- }
269
- else
270
- {
271
- printmap = TRUE;
272
- ccode = code + 1;
273
- }
274
-
275
- /* Print a bit map */
276
-
277
- if (printmap)
278
- {
279
- for (i = 0; i < 256; i++)
280
- {
281
- if ((ccode[i/8] & (1 << (i&7))) != 0)
282
- {
283
- int j;
284
- for (j = i+1; j < 256; j++)
285
- if ((ccode[j/8] & (1 << (j&7))) == 0) break;
286
- if (i == '-' || i == ']') fprintf(f, "\\");
287
- if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
288
- if (--j > i)
289
- {
290
- fprintf(f, "-");
291
- if (j == '-' || j == ']') fprintf(f, "\\");
292
- if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
293
- }
294
- i = j;
295
- }
296
- }
297
- ccode += 32;
298
- }
299
-
300
- /* For an XCLASS there is always some additional data */
301
-
302
- if (*code == OP_XCLASS)
303
- {
304
- int ch;
305
- while ((ch = *ccode++) != XCL_END)
306
- {
307
- ccode += 1 + print_char(f, ccode, TRUE);
308
- if (ch == XCL_RANGE)
309
- {
310
- fprintf(f, "-");
311
- ccode += 1 + print_char(f, ccode, TRUE);
312
- }
313
- }
314
- }
315
-
316
- /* Indicate a non-UTF8 class which was created by negation */
317
-
318
- fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
319
-
320
- /* Handle repeats after a class or a back reference */
321
-
322
- CLASS_REF_REPEAT:
323
- switch(*ccode)
324
- {
325
- case OP_CRSTAR:
326
- case OP_CRMINSTAR:
327
- case OP_CRPLUS:
328
- case OP_CRMINPLUS:
329
- case OP_CRQUERY:
330
- case OP_CRMINQUERY:
331
- fprintf(f, "%s", OP_names[*ccode]);
332
- extra = OP_lengths[*ccode];
333
- break;
334
-
335
- case OP_CRRANGE:
336
- case OP_CRMINRANGE:
337
- min = GET2(ccode,1);
338
- max = GET2(ccode,3);
339
- if (max == 0) fprintf(f, "{%d,}", min);
340
- else fprintf(f, "{%d,%d}", min, max);
341
- if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
342
- extra = OP_lengths[*ccode];
343
- break;
344
- }
345
- }
346
- break;
347
-
348
- /* Anything else is just an item with no data*/
349
-
350
- default:
351
- fprintf(f, " %s", OP_names[*code]);
352
- break;
353
- }
354
-
355
- code += OP_lengths[*code] + extra;
356
- fprintf(f, "\n");
357
- }
358
- }
359
-
360
- /* End of printint.c */