Mxx_ru 1.4.6 → 1.4.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,305 +0,0 @@
1
- /*************************************************
2
- * Perl-Compatible Regular Expressions *
3
- *************************************************/
4
-
5
- /*
6
- This is a library of functions to support regular expressions whose syntax
7
- and semantics are as close as possible to those of the Perl 5 language. See
8
- the file Tech.Notes for some information on the internals.
9
-
10
- This module is a wrapper that provides a POSIX API to the underlying PCRE
11
- functions.
12
-
13
- Written by: Philip Hazel <ph10@cam.ac.uk>
14
-
15
- Copyright (c) 1997-2003 University of Cambridge
16
-
17
- -----------------------------------------------------------------------------
18
- Permission is granted to anyone to use this software for any purpose on any
19
- computer system, and to redistribute it freely, subject to the following
20
- restrictions:
21
-
22
- 1. This software is distributed in the hope that it will be useful,
23
- but WITHOUT ANY WARRANTY; without even the implied warranty of
24
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
25
-
26
- 2. The origin of this software must not be misrepresented, either by
27
- explicit claim or by omission.
28
-
29
- 3. Altered versions must be plainly marked as such, and must not be
30
- misrepresented as being the original software.
31
-
32
- 4. If PCRE is embedded in any software that is released under the GNU
33
- General Purpose Licence (GPL), then the terms of that licence shall
34
- supersede any condition above with which it is incompatible.
35
- -----------------------------------------------------------------------------
36
- */
37
-
38
- #include "internal.h"
39
- #include "pcreposix.h"
40
- #include "stdlib.h"
41
-
42
-
43
-
44
- /* Corresponding tables of PCRE error messages and POSIX error codes. */
45
-
46
- static const char *const estring[] = {
47
- ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
48
- ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
49
- ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
50
- ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
51
- ERR41, ERR42, ERR43, ERR44 };
52
-
53
- static const int eint[] = {
54
- REG_EESCAPE, /* "\\ at end of pattern" */
55
- REG_EESCAPE, /* "\\c at end of pattern" */
56
- REG_EESCAPE, /* "unrecognized character follows \\" */
57
- REG_BADBR, /* "numbers out of order in {} quantifier" */
58
- REG_BADBR, /* "number too big in {} quantifier" */
59
- REG_EBRACK, /* "missing terminating ] for character class" */
60
- REG_ECTYPE, /* "invalid escape sequence in character class" */
61
- REG_ERANGE, /* "range out of order in character class" */
62
- REG_BADRPT, /* "nothing to repeat" */
63
- REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */
64
- REG_ASSERT, /* "internal error: unexpected repeat" */
65
- REG_BADPAT, /* "unrecognized character after (?" */
66
- REG_BADPAT, /* "POSIX named classes are supported only within a class" */
67
- REG_EPAREN, /* "missing )" */
68
- REG_ESUBREG, /* "reference to non-existent subpattern" */
69
- REG_INVARG, /* "erroffset passed as NULL" */
70
- REG_INVARG, /* "unknown option bit(s) set" */
71
- REG_EPAREN, /* "missing ) after comment" */
72
- REG_ESIZE, /* "parentheses nested too deeply" */
73
- REG_ESIZE, /* "regular expression too large" */
74
- REG_ESPACE, /* "failed to get memory" */
75
- REG_EPAREN, /* "unmatched brackets" */
76
- REG_ASSERT, /* "internal error: code overflow" */
77
- REG_BADPAT, /* "unrecognized character after (?<" */
78
- REG_BADPAT, /* "lookbehind assertion is not fixed length" */
79
- REG_BADPAT, /* "malformed number after (?(" */
80
- REG_BADPAT, /* "conditional group containe more than two branches" */
81
- REG_BADPAT, /* "assertion expected after (?(" */
82
- REG_BADPAT, /* "(?R or (?digits must be followed by )" */
83
- REG_ECTYPE, /* "unknown POSIX class name" */
84
- REG_BADPAT, /* "POSIX collating elements are not supported" */
85
- REG_INVARG, /* "this version of PCRE is not compiled with PCRE_UTF8 support" */
86
- REG_BADPAT, /* "spare error" */
87
- REG_BADPAT, /* "character value in \x{...} sequence is too large" */
88
- REG_BADPAT, /* "invalid condition (?(0)" */
89
- REG_BADPAT, /* "\\C not allowed in lookbehind assertion" */
90
- REG_EESCAPE, /* "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X" */
91
- REG_BADPAT, /* "number after (?C is > 255" */
92
- REG_BADPAT, /* "closing ) for (?C expected" */
93
- REG_BADPAT, /* "recursive call could loop indefinitely" */
94
- REG_BADPAT, /* "unrecognized character after (?P" */
95
- REG_BADPAT, /* "syntax error after (?P" */
96
- REG_BADPAT, /* "two named groups have the same name" */
97
- REG_BADPAT /* "invalid UTF-8 string" */
98
- };
99
-
100
- /* Table of texts corresponding to POSIX error codes */
101
-
102
- static const char *const pstring[] = {
103
- "", /* Dummy for value 0 */
104
- "internal error", /* REG_ASSERT */
105
- "invalid repeat counts in {}", /* BADBR */
106
- "pattern error", /* BADPAT */
107
- "? * + invalid", /* BADRPT */
108
- "unbalanced {}", /* EBRACE */
109
- "unbalanced []", /* EBRACK */
110
- "collation error - not relevant", /* ECOLLATE */
111
- "bad class", /* ECTYPE */
112
- "bad escape sequence", /* EESCAPE */
113
- "empty expression", /* EMPTY */
114
- "unbalanced ()", /* EPAREN */
115
- "bad range inside []", /* ERANGE */
116
- "expression too big", /* ESIZE */
117
- "failed to get memory", /* ESPACE */
118
- "bad back reference", /* ESUBREG */
119
- "bad argument", /* INVARG */
120
- "match failed" /* NOMATCH */
121
- };
122
-
123
-
124
-
125
-
126
- /*************************************************
127
- * Translate PCRE text code to int *
128
- *************************************************/
129
-
130
- /* PCRE compile-time errors are given as strings defined as macros. We can just
131
- look them up in a table to turn them into POSIX-style error codes. */
132
-
133
- static int
134
- pcre_posix_error_code(const char *s)
135
- {
136
- size_t i;
137
- for (i = 0; i < sizeof(estring)/sizeof(char *); i++)
138
- if (strcmp(s, estring[i]) == 0) return eint[i];
139
- return REG_ASSERT;
140
- }
141
-
142
-
143
-
144
- /*************************************************
145
- * Translate error code to string *
146
- *************************************************/
147
-
148
- EXPORT size_t
149
- regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
150
- {
151
- const char *message, *addmessage;
152
- size_t length, addlength;
153
-
154
- message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
155
- "unknown error code" : pstring[errcode];
156
- length = strlen(message) + 1;
157
-
158
- addmessage = " at offset ";
159
- addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
160
- strlen(addmessage) + 6 : 0;
161
-
162
- if (errbuf_size > 0)
163
- {
164
- if (addlength > 0 && errbuf_size >= length + addlength)
165
- sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
166
- else
167
- {
168
- strncpy(errbuf, message, errbuf_size - 1);
169
- errbuf[errbuf_size-1] = 0;
170
- }
171
- }
172
-
173
- return length + addlength;
174
- }
175
-
176
-
177
-
178
-
179
- /*************************************************
180
- * Free store held by a regex *
181
- *************************************************/
182
-
183
- EXPORT void
184
- regfree(regex_t *preg)
185
- {
186
- (pcre_free)(preg->re_pcre);
187
- }
188
-
189
-
190
-
191
-
192
- /*************************************************
193
- * Compile a regular expression *
194
- *************************************************/
195
-
196
- /*
197
- Arguments:
198
- preg points to a structure for recording the compiled expression
199
- pattern the pattern to compile
200
- cflags compilation flags
201
-
202
- Returns: 0 on success
203
- various non-zero codes on failure
204
- */
205
-
206
- EXPORT int
207
- regcomp(regex_t *preg, const char *pattern, int cflags)
208
- {
209
- const char *errorptr;
210
- int erroffset;
211
- int options = 0;
212
-
213
- if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
214
- if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
215
-
216
- preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
217
- preg->re_erroffset = erroffset;
218
-
219
- if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);
220
-
221
- preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
222
- return 0;
223
- }
224
-
225
-
226
-
227
-
228
- /*************************************************
229
- * Match a regular expression *
230
- *************************************************/
231
-
232
- /* Unfortunately, PCRE requires 3 ints of working space for each captured
233
- substring, so we have to get and release working store instead of just using
234
- the POSIX structures as was done in earlier releases when PCRE needed only 2
235
- ints. However, if the number of possible capturing brackets is small, use a
236
- block of store on the stack, to reduce the use of malloc/free. The threshold is
237
- in a macro that can be changed at configure time. */
238
-
239
- EXPORT int
240
- regexec(const regex_t *preg, const char *string, size_t nmatch,
241
- regmatch_t pmatch[], int eflags)
242
- {
243
- int rc;
244
- int options = 0;
245
- int *ovector = NULL;
246
- int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
247
- BOOL allocated_ovector = FALSE;
248
-
249
- if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
250
- if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
251
-
252
- ((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
253
-
254
- if (nmatch > 0)
255
- {
256
- if (nmatch <= POSIX_MALLOC_THRESHOLD)
257
- {
258
- ovector = &(small_ovector[0]);
259
- }
260
- else
261
- {
262
- ovector = (int *)malloc(sizeof(int) * nmatch * 3);
263
- if (ovector == NULL) return REG_ESPACE;
264
- allocated_ovector = TRUE;
265
- }
266
- }
267
-
268
- rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string),
269
- 0, options, ovector, nmatch * 3);
270
-
271
- if (rc == 0) rc = nmatch; /* All captured slots were filled in */
272
-
273
- if (rc >= 0)
274
- {
275
- size_t i;
276
- for (i = 0; i < (size_t)rc; i++)
277
- {
278
- pmatch[i].rm_so = ovector[i*2];
279
- pmatch[i].rm_eo = ovector[i*2+1];
280
- }
281
- if (allocated_ovector) free(ovector);
282
- for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
283
- return 0;
284
- }
285
-
286
- else
287
- {
288
- if (allocated_ovector) free(ovector);
289
- switch(rc)
290
- {
291
- case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
292
- case PCRE_ERROR_NULL: return REG_INVARG;
293
- case PCRE_ERROR_BADOPTION: return REG_INVARG;
294
- case PCRE_ERROR_BADMAGIC: return REG_INVARG;
295
- case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
296
- case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
297
- case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
298
- case PCRE_ERROR_BADUTF8: return REG_INVARG;
299
- case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
300
- default: return REG_ASSERT;
301
- }
302
- }
303
- }
304
-
305
- /* End of pcreposix.c */
@@ -1,88 +0,0 @@
1
- /*************************************************
2
- * Perl-Compatible Regular Expressions *
3
- *************************************************/
4
-
5
- /* Copyright (c) 1997-2003 University of Cambridge */
6
-
7
- #ifndef _PCREPOSIX_H
8
- #define _PCREPOSIX_H
9
-
10
- /* This is the header for the POSIX wrapper interface to the PCRE Perl-
11
- Compatible Regular Expression library. It defines the things POSIX says should
12
- be there. I hope. */
13
-
14
- /* Have to include stdlib.h in order to ensure that size_t is defined. */
15
-
16
- #include <stdlib.h>
17
-
18
- /* Allow for C++ users */
19
-
20
- #ifdef __cplusplus
21
- extern "C" {
22
- #endif
23
-
24
- /* Options defined by POSIX. */
25
-
26
- #define REG_ICASE 0x01
27
- #define REG_NEWLINE 0x02
28
- #define REG_NOTBOL 0x04
29
- #define REG_NOTEOL 0x08
30
-
31
- /* These are not used by PCRE, but by defining them we make it easier
32
- to slot PCRE into existing programs that make POSIX calls. */
33
-
34
- #define REG_EXTENDED 0
35
- #define REG_NOSUB 0
36
-
37
- /* Error values. Not all these are relevant or used by the wrapper. */
38
-
39
- enum {
40
- REG_ASSERT = 1, /* internal error ? */
41
- REG_BADBR, /* invalid repeat counts in {} */
42
- REG_BADPAT, /* pattern error */
43
- REG_BADRPT, /* ? * + invalid */
44
- REG_EBRACE, /* unbalanced {} */
45
- REG_EBRACK, /* unbalanced [] */
46
- REG_ECOLLATE, /* collation error - not relevant */
47
- REG_ECTYPE, /* bad class */
48
- REG_EESCAPE, /* bad escape sequence */
49
- REG_EMPTY, /* empty expression */
50
- REG_EPAREN, /* unbalanced () */
51
- REG_ERANGE, /* bad range inside [] */
52
- REG_ESIZE, /* expression too big */
53
- REG_ESPACE, /* failed to get memory */
54
- REG_ESUBREG, /* bad back reference */
55
- REG_INVARG, /* bad argument */
56
- REG_NOMATCH /* match failed */
57
- };
58
-
59
-
60
- /* The structure representing a compiled regular expression. */
61
-
62
- typedef struct {
63
- void *re_pcre;
64
- size_t re_nsub;
65
- size_t re_erroffset;
66
- } regex_t;
67
-
68
- /* The structure in which a captured offset is returned. */
69
-
70
- typedef int regoff_t;
71
-
72
- typedef struct {
73
- regoff_t rm_so;
74
- regoff_t rm_eo;
75
- } regmatch_t;
76
-
77
- /* The functions */
78
-
79
- extern int regcomp(regex_t *, const char *, int);
80
- extern int regexec(const regex_t *, const char *, size_t, regmatch_t *, int);
81
- extern size_t regerror(int, const regex_t *, char *, size_t);
82
- extern void regfree(regex_t *);
83
-
84
- #ifdef __cplusplus
85
- } /* extern "C" */
86
- #endif
87
-
88
- #endif /* End of pcreposix.h */
@@ -1,1483 +0,0 @@
1
- /*************************************************
2
- * PCRE testing program *
3
- *************************************************/
4
-
5
- /* This program was hacked up as a tester for PCRE. I really should have
6
- written it more tidily in the first place. Will I ever learn? It has grown and
7
- been extended and consequently is now rather untidy in places. */
8
-
9
- #include <ctype.h>
10
- #include <stdio.h>
11
- #include <string.h>
12
- #include <stdlib.h>
13
- #include <time.h>
14
- #include <locale.h>
15
-
16
- /* We need the internal info for displaying the results of pcre_study(). Also
17
- for getting the opcodes for showing compiled code. */
18
-
19
- #define PCRE_SPY /* For Win32 build, import data, not export */
20
- #include "internal.h"
21
-
22
- /* It is possible to compile this test program without including support for
23
- testing the POSIX interface, though this is not available via the standard
24
- Makefile. */
25
-
26
- #if !defined NOPOSIX
27
- #include "pcreposix.h"
28
- #endif
29
-
30
- #ifndef CLOCKS_PER_SEC
31
- #ifdef CLK_TCK
32
- #define CLOCKS_PER_SEC CLK_TCK
33
- #else
34
- #define CLOCKS_PER_SEC 100
35
- #endif
36
- #endif
37
-
38
- #define LOOPREPEAT 50000
39
-
40
- #define BUFFER_SIZE 30000
41
- #define DBUFFER_SIZE BUFFER_SIZE
42
-
43
-
44
- static FILE *outfile;
45
- static int log_store = 0;
46
- static int callout_count;
47
- static int callout_extra;
48
- static int callout_fail_count;
49
- static int callout_fail_id;
50
- static int first_callout;
51
- static int show_malloc;
52
- static int use_utf8;
53
- static size_t gotten_store;
54
-
55
-
56
- static const int utf8_table1[] = {
57
- 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
58
-
59
- static const int utf8_table2[] = {
60
- 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
61
-
62
- static const int utf8_table3[] = {
63
- 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
64
-
65
-
66
-
67
- /*************************************************
68
- * Print compiled regex *
69
- *************************************************/
70
-
71
- /* The code for doing this is held in a separate file that is also included in
72
- pcre.c when it is compiled with the debug switch. It defines a function called
73
- print_internals(), which uses a table of opcode lengths defined by the macro
74
- OP_LENGTHS, whose name must be OP_lengths. */
75
-
76
- static uschar OP_lengths[] = { OP_LENGTHS };
77
-
78
- #include "printint.c"
79
-
80
-
81
-
82
- /*************************************************
83
- * Read number from string *
84
- *************************************************/
85
-
86
- /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
87
- around with conditional compilation, just do the job by hand. It is only used
88
- for unpicking the -o argument, so just keep it simple.
89
-
90
- Arguments:
91
- str string to be converted
92
- endptr where to put the end pointer
93
-
94
- Returns: the unsigned long
95
- */
96
-
97
- static int
98
- get_value(unsigned char *str, unsigned char **endptr)
99
- {
100
- int result = 0;
101
- while(*str != 0 && isspace(*str)) str++;
102
- while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
103
- *endptr = str;
104
- return(result);
105
- }
106
-
107
-
108
-
109
- /*************************************************
110
- * Convert character value to UTF-8 *
111
- *************************************************/
112
-
113
- /* This function takes an integer value in the range 0 - 0x7fffffff
114
- and encodes it as a UTF-8 character in 0 to 6 bytes.
115
-
116
- Arguments:
117
- cvalue the character value
118
- buffer pointer to buffer for result - at least 6 bytes long
119
-
120
- Returns: number of characters placed in the buffer
121
- -1 if input character is negative
122
- 0 if input character is positive but too big (only when
123
- int is longer than 32 bits)
124
- */
125
-
126
- static int
127
- ord2utf8(int cvalue, unsigned char *buffer)
128
- {
129
- register int i, j;
130
- for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
131
- if (cvalue <= utf8_table1[i]) break;
132
- if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
133
- if (cvalue < 0) return -1;
134
-
135
- buffer += i;
136
- for (j = i; j > 0; j--)
137
- {
138
- *buffer-- = 0x80 | (cvalue & 0x3f);
139
- cvalue >>= 6;
140
- }
141
- *buffer = utf8_table2[i] | cvalue;
142
- return i + 1;
143
- }
144
-
145
-
146
- /*************************************************
147
- * Convert UTF-8 string to value *
148
- *************************************************/
149
-
150
- /* This function takes one or more bytes that represents a UTF-8 character,
151
- and returns the value of the character.
152
-
153
- Argument:
154
- buffer a pointer to the byte vector
155
- vptr a pointer to an int to receive the value
156
-
157
- Returns: > 0 => the number of bytes consumed
158
- -6 to 0 => malformed UTF-8 character at offset = (-return)
159
- */
160
-
161
- static int
162
- utf82ord(unsigned char *buffer, int *vptr)
163
- {
164
- int c = *buffer++;
165
- int d = c;
166
- int i, j, s;
167
-
168
- for (i = -1; i < 6; i++) /* i is number of additional bytes */
169
- {
170
- if ((d & 0x80) == 0) break;
171
- d <<= 1;
172
- }
173
-
174
- if (i == -1) { *vptr = c; return 1; } /* ascii character */
175
- if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
176
-
177
- /* i now has a value in the range 1-5 */
178
-
179
- s = 6*i;
180
- d = (c & utf8_table3[i]) << s;
181
-
182
- for (j = 0; j < i; j++)
183
- {
184
- c = *buffer++;
185
- if ((c & 0xc0) != 0x80) return -(j+1);
186
- s -= 6;
187
- d |= (c & 0x3f) << s;
188
- }
189
-
190
- /* Check that encoding was the correct unique one */
191
-
192
- for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
193
- if (d <= utf8_table1[j]) break;
194
- if (j != i) return -(i+1);
195
-
196
- /* Valid value */
197
-
198
- *vptr = d;
199
- return i+1;
200
- }
201
-
202
-
203
-
204
- /*************************************************
205
- * Print character string *
206
- *************************************************/
207
-
208
- /* Character string printing function. Must handle UTF-8 strings in utf8
209
- mode. Yields number of characters printed. If handed a NULL file, just counts
210
- chars without printing. */
211
-
212
- static int pchars(unsigned char *p, int length, FILE *f)
213
- {
214
- int c;
215
- int yield = 0;
216
-
217
- while (length-- > 0)
218
- {
219
- if (use_utf8)
220
- {
221
- int rc = utf82ord(p, &c);
222
-
223
- if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
224
- {
225
- length -= rc - 1;
226
- p += rc;
227
- if (c < 256 && isprint(c))
228
- {
229
- if (f != NULL) fprintf(f, "%c", c);
230
- yield++;
231
- }
232
- else
233
- {
234
- int n;
235
- if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
236
- yield += n;
237
- }
238
- continue;
239
- }
240
- }
241
-
242
- /* Not UTF-8, or malformed UTF-8 */
243
-
244
- if (isprint(c = *(p++)))
245
- {
246
- if (f != NULL) fprintf(f, "%c", c);
247
- yield++;
248
- }
249
- else
250
- {
251
- if (f != NULL) fprintf(f, "\\x%02x", c);
252
- yield += 4;
253
- }
254
- }
255
-
256
- return yield;
257
- }
258
-
259
-
260
-
261
- /*************************************************
262
- * Callout function *
263
- *************************************************/
264
-
265
- /* Called from PCRE as a result of the (?C) item. We print out where we are in
266
- the match. Yield zero unless more callouts than the fail count, or the callout
267
- data is not zero. */
268
-
269
- static int callout(pcre_callout_block *cb)
270
- {
271
- FILE *f = (first_callout | callout_extra)? outfile : NULL;
272
- int i, pre_start, post_start;
273
-
274
- if (callout_extra)
275
- {
276
- fprintf(f, "Callout %d: last capture = %d\n",
277
- cb->callout_number, cb->capture_last);
278
-
279
- for (i = 0; i < cb->capture_top * 2; i += 2)
280
- {
281
- if (cb->offset_vector[i] < 0)
282
- fprintf(f, "%2d: <unset>\n", i/2);
283
- else
284
- {
285
- fprintf(f, "%2d: ", i/2);
286
- (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
287
- cb->offset_vector[i+1] - cb->offset_vector[i], f);
288
- fprintf(f, "\n");
289
- }
290
- }
291
- }
292
-
293
- /* Re-print the subject in canonical form, the first time or if giving full
294
- datails. On subsequent calls in the same match, we use pchars just to find the
295
- printed lengths of the substrings. */
296
-
297
- if (f != NULL) fprintf(f, "--->");
298
-
299
- pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
300
- post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
301
- cb->current_position - cb->start_match, f);
302
-
303
- (void)pchars((unsigned char *)(cb->subject + cb->current_position),
304
- cb->subject_length - cb->current_position, f);
305
-
306
- if (f != NULL) fprintf(f, "\n");
307
-
308
- /* Always print appropriate indicators, with callout number if not already
309
- shown */
310
-
311
- if (callout_extra) fprintf(outfile, " ");
312
- else fprintf(outfile, "%3d ", cb->callout_number);
313
-
314
- for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
315
- fprintf(outfile, "^");
316
-
317
- if (post_start > 0)
318
- {
319
- for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
320
- fprintf(outfile, "^");
321
- }
322
-
323
- fprintf(outfile, "\n");
324
- first_callout = 0;
325
-
326
- if (cb->callout_data != NULL)
327
- {
328
- int callout_data = *((int *)(cb->callout_data));
329
- if (callout_data != 0)
330
- {
331
- fprintf(outfile, "Callout data = %d\n", callout_data);
332
- return callout_data;
333
- }
334
- }
335
-
336
- return (cb->callout_number != callout_fail_id)? 0 :
337
- (++callout_count >= callout_fail_count)? 1 : 0;
338
- }
339
-
340
-
341
- /*************************************************
342
- * Local malloc functions *
343
- *************************************************/
344
-
345
- /* Alternative malloc function, to test functionality and show the size of the
346
- compiled re. */
347
-
348
- static void *new_malloc(size_t size)
349
- {
350
- void *block = malloc(size);
351
- gotten_store = size;
352
- if (show_malloc)
353
- fprintf(outfile, "malloc %3d %p\n", size, block);
354
- return block;
355
- }
356
-
357
- static void new_free(void *block)
358
- {
359
- if (show_malloc)
360
- fprintf(outfile, "free %p\n", block);
361
- free(block);
362
- }
363
-
364
-
365
- /* For recursion malloc/free, to test stacking calls */
366
-
367
- static void *stack_malloc(size_t size)
368
- {
369
- void *block = malloc(size);
370
- if (show_malloc)
371
- fprintf(outfile, "stack_malloc %3d %p\n", size, block);
372
- return block;
373
- }
374
-
375
- static void stack_free(void *block)
376
- {
377
- if (show_malloc)
378
- fprintf(outfile, "stack_free %p\n", block);
379
- free(block);
380
- }
381
-
382
-
383
- /*************************************************
384
- * Call pcre_fullinfo() *
385
- *************************************************/
386
-
387
- /* Get one piece of information from the pcre_fullinfo() function */
388
-
389
- static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
390
- {
391
- int rc;
392
- if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
393
- fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
394
- }
395
-
396
-
397
-
398
- /*************************************************
399
- * Main Program *
400
- *************************************************/
401
-
402
- /* Read lines from named file or stdin and write to named file or stdout; lines
403
- consist of a regular expression, in delimiters and optionally followed by
404
- options, followed by a set of test data, terminated by an empty line. */
405
-
406
- int main(int argc, char **argv)
407
- {
408
- FILE *infile = stdin;
409
- int options = 0;
410
- int study_options = 0;
411
- int op = 1;
412
- int timeit = 0;
413
- int showinfo = 0;
414
- int showstore = 0;
415
- int size_offsets = 45;
416
- int size_offsets_max;
417
- int *offsets;
418
- #if !defined NOPOSIX
419
- int posix = 0;
420
- #endif
421
- int debug = 0;
422
- int done = 0;
423
-
424
- unsigned char *buffer;
425
- unsigned char *dbuffer;
426
-
427
- /* Get buffers from malloc() so that Electric Fence will check their misuse
428
- when I am debugging. */
429
-
430
- buffer = (unsigned char *)malloc(BUFFER_SIZE);
431
- dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
432
-
433
- /* Static so that new_malloc can use it. */
434
-
435
- outfile = stdout;
436
-
437
- /* Scan options */
438
-
439
- while (argc > 1 && argv[op][0] == '-')
440
- {
441
- unsigned char *endptr;
442
-
443
- if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
444
- showstore = 1;
445
- else if (strcmp(argv[op], "-t") == 0) timeit = 1;
446
- else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
447
- else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
448
- else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
449
- ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
450
- *endptr == 0))
451
- {
452
- op++;
453
- argc--;
454
- }
455
- #if !defined NOPOSIX
456
- else if (strcmp(argv[op], "-p") == 0) posix = 1;
457
- #endif
458
- else if (strcmp(argv[op], "-C") == 0)
459
- {
460
- int rc;
461
- printf("PCRE version %s\n", pcre_version());
462
- printf("Compiled with\n");
463
- (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
464
- printf(" %sUTF-8 support\n", rc? "" : "No ");
465
- (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
466
- printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
467
- (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
468
- printf(" Internal link size = %d\n", rc);
469
- (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
470
- printf(" POSIX malloc threshold = %d\n", rc);
471
- (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
472
- printf(" Default match limit = %d\n", rc);
473
- (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
474
- printf(" Match recursion uses %s\n", rc? "stack" : "heap");
475
- exit(0);
476
- }
477
- else
478
- {
479
- printf("** Unknown or malformed option %s\n", argv[op]);
480
- printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
481
- printf(" -C show PCRE compile-time options and exit\n");
482
- printf(" -d debug: show compiled code; implies -i\n"
483
- " -i show information about compiled pattern\n"
484
- " -o <n> set size of offsets vector to <n>\n");
485
- #if !defined NOPOSIX
486
- printf(" -p use POSIX interface\n");
487
- #endif
488
- printf(" -s output store information\n"
489
- " -t time compilation and execution\n");
490
- return 1;
491
- }
492
- op++;
493
- argc--;
494
- }
495
-
496
- /* Get the store for the offsets vector, and remember what it was */
497
-
498
- size_offsets_max = size_offsets;
499
- offsets = (int *)malloc(size_offsets_max * sizeof(int));
500
- if (offsets == NULL)
501
- {
502
- printf("** Failed to get %d bytes of memory for offsets vector\n",
503
- size_offsets_max * sizeof(int));
504
- return 1;
505
- }
506
-
507
- /* Sort out the input and output files */
508
-
509
- if (argc > 1)
510
- {
511
- infile = fopen(argv[op], "r");
512
- if (infile == NULL)
513
- {
514
- printf("** Failed to open %s\n", argv[op]);
515
- return 1;
516
- }
517
- }
518
-
519
- if (argc > 2)
520
- {
521
- outfile = fopen(argv[op+1], "w");
522
- if (outfile == NULL)
523
- {
524
- printf("** Failed to open %s\n", argv[op+1]);
525
- return 1;
526
- }
527
- }
528
-
529
- /* Set alternative malloc function */
530
-
531
- pcre_malloc = new_malloc;
532
- pcre_free = new_free;
533
- pcre_stack_malloc = stack_malloc;
534
- pcre_stack_free = stack_free;
535
-
536
- /* Heading line, then prompt for first regex if stdin */
537
-
538
- fprintf(outfile, "PCRE version %s\n\n", pcre_version());
539
-
540
- /* Main loop */
541
-
542
- while (!done)
543
- {
544
- pcre *re = NULL;
545
- pcre_extra *extra = NULL;
546
-
547
- #if !defined NOPOSIX /* There are still compilers that require no indent */
548
- regex_t preg;
549
- int do_posix = 0;
550
- #endif
551
-
552
- const char *error;
553
- unsigned char *p, *pp, *ppp;
554
- const unsigned char *tables = NULL;
555
- int do_study = 0;
556
- int do_debug = debug;
557
- int do_G = 0;
558
- int do_g = 0;
559
- int do_showinfo = showinfo;
560
- int do_showrest = 0;
561
- int erroroffset, len, delimiter;
562
-
563
- use_utf8 = 0;
564
-
565
- if (infile == stdin) printf(" re> ");
566
- if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
567
- if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
568
- fflush(outfile);
569
-
570
- p = buffer;
571
- while (isspace(*p)) p++;
572
- if (*p == 0) continue;
573
-
574
- /* Get the delimiter and seek the end of the pattern; if is isn't
575
- complete, read more. */
576
-
577
- delimiter = *p++;
578
-
579
- if (isalnum(delimiter) || delimiter == '\\')
580
- {
581
- fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
582
- goto SKIP_DATA;
583
- }
584
-
585
- pp = p;
586
-
587
- for(;;)
588
- {
589
- while (*pp != 0)
590
- {
591
- if (*pp == '\\' && pp[1] != 0) pp++;
592
- else if (*pp == delimiter) break;
593
- pp++;
594
- }
595
- if (*pp != 0) break;
596
-
597
- len = BUFFER_SIZE - (pp - buffer);
598
- if (len < 256)
599
- {
600
- fprintf(outfile, "** Expression too long - missing delimiter?\n");
601
- goto SKIP_DATA;
602
- }
603
-
604
- if (infile == stdin) printf(" > ");
605
- if (fgets((char *)pp, len, infile) == NULL)
606
- {
607
- fprintf(outfile, "** Unexpected EOF\n");
608
- done = 1;
609
- goto CONTINUE;
610
- }
611
- if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
612
- }
613
-
614
- /* If the first character after the delimiter is backslash, make
615
- the pattern end with backslash. This is purely to provide a way
616
- of testing for the error message when a pattern ends with backslash. */
617
-
618
- if (pp[1] == '\\') *pp++ = '\\';
619
-
620
- /* Terminate the pattern at the delimiter */
621
-
622
- *pp++ = 0;
623
-
624
- /* Look for options after final delimiter */
625
-
626
- options = 0;
627
- study_options = 0;
628
- log_store = showstore; /* default from command line */
629
-
630
- while (*pp != 0)
631
- {
632
- switch (*pp++)
633
- {
634
- case 'g': do_g = 1; break;
635
- case 'i': options |= PCRE_CASELESS; break;
636
- case 'm': options |= PCRE_MULTILINE; break;
637
- case 's': options |= PCRE_DOTALL; break;
638
- case 'x': options |= PCRE_EXTENDED; break;
639
-
640
- case '+': do_showrest = 1; break;
641
- case 'A': options |= PCRE_ANCHORED; break;
642
- case 'D': do_debug = do_showinfo = 1; break;
643
- case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
644
- case 'G': do_G = 1; break;
645
- case 'I': do_showinfo = 1; break;
646
- case 'M': log_store = 1; break;
647
- case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
648
-
649
- #if !defined NOPOSIX
650
- case 'P': do_posix = 1; break;
651
- #endif
652
-
653
- case 'S': do_study = 1; break;
654
- case 'U': options |= PCRE_UNGREEDY; break;
655
- case 'X': options |= PCRE_EXTRA; break;
656
- case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
657
- case '?': options |= PCRE_NO_UTF8_CHECK; break;
658
-
659
- case 'L':
660
- ppp = pp;
661
- while (*ppp != '\n' && *ppp != ' ') ppp++;
662
- *ppp = 0;
663
- if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
664
- {
665
- fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
666
- goto SKIP_DATA;
667
- }
668
- tables = pcre_maketables();
669
- pp = ppp;
670
- break;
671
-
672
- case '\n': case ' ': break;
673
- default:
674
- fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
675
- goto SKIP_DATA;
676
- }
677
- }
678
-
679
- /* Handle compiling via the POSIX interface, which doesn't support the
680
- timing, showing, or debugging options, nor the ability to pass over
681
- local character tables. */
682
-
683
- #if !defined NOPOSIX
684
- if (posix || do_posix)
685
- {
686
- int rc;
687
- int cflags = 0;
688
- if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
689
- if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
690
- rc = regcomp(&preg, (char *)p, cflags);
691
-
692
- /* Compilation failed; go back for another re, skipping to blank line
693
- if non-interactive. */
694
-
695
- if (rc != 0)
696
- {
697
- (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
698
- fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
699
- goto SKIP_DATA;
700
- }
701
- }
702
-
703
- /* Handle compiling via the native interface */
704
-
705
- else
706
- #endif /* !defined NOPOSIX */
707
-
708
- {
709
- if (timeit)
710
- {
711
- register int i;
712
- clock_t time_taken;
713
- clock_t start_time = clock();
714
- for (i = 0; i < LOOPREPEAT; i++)
715
- {
716
- re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
717
- if (re != NULL) free(re);
718
- }
719
- time_taken = clock() - start_time;
720
- fprintf(outfile, "Compile time %.3f milliseconds\n",
721
- (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
722
- (double)CLOCKS_PER_SEC);
723
- }
724
-
725
- re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
726
-
727
- /* Compilation failed; go back for another re, skipping to blank line
728
- if non-interactive. */
729
-
730
- if (re == NULL)
731
- {
732
- fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
733
- SKIP_DATA:
734
- if (infile != stdin)
735
- {
736
- for (;;)
737
- {
738
- if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
739
- {
740
- done = 1;
741
- goto CONTINUE;
742
- }
743
- len = (int)strlen((char *)buffer);
744
- while (len > 0 && isspace(buffer[len-1])) len--;
745
- if (len == 0) break;
746
- }
747
- fprintf(outfile, "\n");
748
- }
749
- goto CONTINUE;
750
- }
751
-
752
- /* Compilation succeeded; print data if required. There are now two
753
- info-returning functions. The old one has a limited interface and
754
- returns only limited data. Check that it agrees with the newer one. */
755
-
756
- if (log_store)
757
- fprintf(outfile, "Memory allocation (code space): %d\n",
758
- (int)(gotten_store -
759
- sizeof(real_pcre) -
760
- ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
761
-
762
- if (do_showinfo)
763
- {
764
- unsigned long int get_options;
765
- int old_first_char, old_options, old_count;
766
- int count, backrefmax, first_char, need_char;
767
- int nameentrysize, namecount;
768
- const uschar *nametable;
769
- size_t size;
770
-
771
- if (do_debug)
772
- {
773
- fprintf(outfile, "------------------------------------------------------------------\n");
774
- print_internals(re, outfile);
775
- }
776
-
777
- new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
778
- new_info(re, NULL, PCRE_INFO_SIZE, &size);
779
- new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
780
- new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
781
- new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
782
- new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
783
- new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
784
- new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
785
- new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
786
-
787
- old_count = pcre_info(re, &old_options, &old_first_char);
788
- if (count < 0) fprintf(outfile,
789
- "Error %d from pcre_info()\n", count);
790
- else
791
- {
792
- if (old_count != count) fprintf(outfile,
793
- "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
794
- old_count);
795
-
796
- if (old_first_char != first_char) fprintf(outfile,
797
- "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
798
- first_char, old_first_char);
799
-
800
- if (old_options != (int)get_options) fprintf(outfile,
801
- "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
802
- get_options, old_options);
803
- }
804
-
805
- if (size != gotten_store) fprintf(outfile,
806
- "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
807
- size, gotten_store);
808
-
809
- fprintf(outfile, "Capturing subpattern count = %d\n", count);
810
- if (backrefmax > 0)
811
- fprintf(outfile, "Max back reference = %d\n", backrefmax);
812
-
813
- if (namecount > 0)
814
- {
815
- fprintf(outfile, "Named capturing subpatterns:\n");
816
- while (namecount-- > 0)
817
- {
818
- fprintf(outfile, " %s %*s%3d\n", nametable + 2,
819
- nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
820
- GET2(nametable, 0));
821
- nametable += nameentrysize;
822
- }
823
- }
824
-
825
- if (get_options == 0) fprintf(outfile, "No options\n");
826
- else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
827
- ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
828
- ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
829
- ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
830
- ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
831
- ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
832
- ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
833
- ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
834
- ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
835
- ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
836
- ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
837
-
838
- if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
839
- fprintf(outfile, "Case state changes\n");
840
-
841
- if (first_char == -1)
842
- {
843
- fprintf(outfile, "First char at start or follows \\n\n");
844
- }
845
- else if (first_char < 0)
846
- {
847
- fprintf(outfile, "No first char\n");
848
- }
849
- else
850
- {
851
- int ch = first_char & 255;
852
- const char *caseless = ((first_char & REQ_CASELESS) == 0)?
853
- "" : " (caseless)";
854
- if (isprint(ch))
855
- fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
856
- else
857
- fprintf(outfile, "First char = %d%s\n", ch, caseless);
858
- }
859
-
860
- if (need_char < 0)
861
- {
862
- fprintf(outfile, "No need char\n");
863
- }
864
- else
865
- {
866
- int ch = need_char & 255;
867
- const char *caseless = ((need_char & REQ_CASELESS) == 0)?
868
- "" : " (caseless)";
869
- if (isprint(ch))
870
- fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
871
- else
872
- fprintf(outfile, "Need char = %d%s\n", ch, caseless);
873
- }
874
- }
875
-
876
- /* If /S was present, study the regexp to generate additional info to
877
- help with the matching. */
878
-
879
- if (do_study)
880
- {
881
- if (timeit)
882
- {
883
- register int i;
884
- clock_t time_taken;
885
- clock_t start_time = clock();
886
- for (i = 0; i < LOOPREPEAT; i++)
887
- extra = pcre_study(re, study_options, &error);
888
- time_taken = clock() - start_time;
889
- if (extra != NULL) free(extra);
890
- fprintf(outfile, " Study time %.3f milliseconds\n",
891
- (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
892
- (double)CLOCKS_PER_SEC);
893
- }
894
-
895
- extra = pcre_study(re, study_options, &error);
896
- if (error != NULL)
897
- fprintf(outfile, "Failed to study: %s\n", error);
898
- else if (extra == NULL)
899
- fprintf(outfile, "Study returned NULL\n");
900
-
901
- /* Don't output study size; at present it is in any case a fixed
902
- value, but it varies, depending on the computer architecture, and
903
- so messes up the test suite. */
904
-
905
- else if (do_showinfo)
906
- {
907
- size_t size;
908
- uschar *start_bits = NULL;
909
- new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
910
- new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
911
- /* fprintf(outfile, "Study size = %d\n", size); */
912
- if (start_bits == NULL)
913
- fprintf(outfile, "No starting character set\n");
914
- else
915
- {
916
- int i;
917
- int c = 24;
918
- fprintf(outfile, "Starting character set: ");
919
- for (i = 0; i < 256; i++)
920
- {
921
- if ((start_bits[i/8] & (1<<(i%8))) != 0)
922
- {
923
- if (c > 75)
924
- {
925
- fprintf(outfile, "\n ");
926
- c = 2;
927
- }
928
- if (isprint(i) && i != ' ')
929
- {
930
- fprintf(outfile, "%c ", i);
931
- c += 2;
932
- }
933
- else
934
- {
935
- fprintf(outfile, "\\x%02x ", i);
936
- c += 5;
937
- }
938
- }
939
- }
940
- fprintf(outfile, "\n");
941
- }
942
- }
943
- }
944
- }
945
-
946
- /* Read data lines and test them */
947
-
948
- for (;;)
949
- {
950
- unsigned char *q;
951
- unsigned char *bptr = dbuffer;
952
- int *use_offsets = offsets;
953
- int use_size_offsets = size_offsets;
954
- int callout_data = 0;
955
- int callout_data_set = 0;
956
- int count, c;
957
- int copystrings = 0;
958
- int find_match_limit = 0;
959
- int getstrings = 0;
960
- int getlist = 0;
961
- int gmatched = 0;
962
- int start_offset = 0;
963
- int g_notempty = 0;
964
-
965
- options = 0;
966
-
967
- pcre_callout = callout;
968
- first_callout = 1;
969
- callout_extra = 0;
970
- callout_count = 0;
971
- callout_fail_count = 999999;
972
- callout_fail_id = -1;
973
- show_malloc = 0;
974
-
975
- if (infile == stdin) printf("data> ");
976
- if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
977
- {
978
- done = 1;
979
- goto CONTINUE;
980
- }
981
- if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
982
-
983
- len = (int)strlen((char *)buffer);
984
- while (len > 0 && isspace(buffer[len-1])) len--;
985
- buffer[len] = 0;
986
- if (len == 0) break;
987
-
988
- p = buffer;
989
- while (isspace(*p)) p++;
990
-
991
- q = dbuffer;
992
- while ((c = *p++) != 0)
993
- {
994
- int i = 0;
995
- int n = 0;
996
-
997
- if (c == '\\') switch ((c = *p++))
998
- {
999
- case 'a': c = 7; break;
1000
- case 'b': c = '\b'; break;
1001
- case 'e': c = 27; break;
1002
- case 'f': c = '\f'; break;
1003
- case 'n': c = '\n'; break;
1004
- case 'r': c = '\r'; break;
1005
- case 't': c = '\t'; break;
1006
- case 'v': c = '\v'; break;
1007
-
1008
- case '0': case '1': case '2': case '3':
1009
- case '4': case '5': case '6': case '7':
1010
- c -= '0';
1011
- while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1012
- c = c * 8 + *p++ - '0';
1013
- break;
1014
-
1015
- case 'x':
1016
-
1017
- /* Handle \x{..} specially - new Perl thing for utf8 */
1018
-
1019
- if (*p == '{')
1020
- {
1021
- unsigned char *pt = p;
1022
- c = 0;
1023
- while (isxdigit(*(++pt)))
1024
- c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1025
- if (*pt == '}')
1026
- {
1027
- unsigned char buff8[8];
1028
- int ii, utn;
1029
- utn = ord2utf8(c, buff8);
1030
- for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1031
- c = buff8[ii]; /* Last byte */
1032
- p = pt + 1;
1033
- break;
1034
- }
1035
- /* Not correct form; fall through */
1036
- }
1037
-
1038
- /* Ordinary \x */
1039
-
1040
- c = 0;
1041
- while (i++ < 2 && isxdigit(*p))
1042
- {
1043
- c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1044
- p++;
1045
- }
1046
- break;
1047
-
1048
- case 0: /* Allows for an empty line */
1049
- p--;
1050
- continue;
1051
-
1052
- case 'A': /* Option setting */
1053
- options |= PCRE_ANCHORED;
1054
- continue;
1055
-
1056
- case 'B':
1057
- options |= PCRE_NOTBOL;
1058
- continue;
1059
-
1060
- case 'C':
1061
- if (isdigit(*p)) /* Set copy string */
1062
- {
1063
- while(isdigit(*p)) n = n * 10 + *p++ - '0';
1064
- copystrings |= 1 << n;
1065
- }
1066
- else if (isalnum(*p))
1067
- {
1068
- uschar name[256];
1069
- uschar *npp = name;
1070
- while (isalnum(*p)) *npp++ = *p++;
1071
- *npp = 0;
1072
- n = pcre_get_stringnumber(re, (char *)name);
1073
- if (n < 0)
1074
- fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1075
- else copystrings |= 1 << n;
1076
- }
1077
- else if (*p == '+')
1078
- {
1079
- callout_extra = 1;
1080
- p++;
1081
- }
1082
- else if (*p == '-')
1083
- {
1084
- pcre_callout = NULL;
1085
- p++;
1086
- }
1087
- else if (*p == '!')
1088
- {
1089
- callout_fail_id = 0;
1090
- p++;
1091
- while(isdigit(*p))
1092
- callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1093
- callout_fail_count = 0;
1094
- if (*p == '!')
1095
- {
1096
- p++;
1097
- while(isdigit(*p))
1098
- callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1099
- }
1100
- }
1101
- else if (*p == '*')
1102
- {
1103
- int sign = 1;
1104
- callout_data = 0;
1105
- if (*(++p) == '-') { sign = -1; p++; }
1106
- while(isdigit(*p))
1107
- callout_data = callout_data * 10 + *p++ - '0';
1108
- callout_data *= sign;
1109
- callout_data_set = 1;
1110
- }
1111
- continue;
1112
-
1113
- case 'G':
1114
- if (isdigit(*p))
1115
- {
1116
- while(isdigit(*p)) n = n * 10 + *p++ - '0';
1117
- getstrings |= 1 << n;
1118
- }
1119
- else if (isalnum(*p))
1120
- {
1121
- uschar name[256];
1122
- uschar *npp = name;
1123
- while (isalnum(*p)) *npp++ = *p++;
1124
- *npp = 0;
1125
- n = pcre_get_stringnumber(re, (char *)name);
1126
- if (n < 0)
1127
- fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1128
- else getstrings |= 1 << n;
1129
- }
1130
- continue;
1131
-
1132
- case 'L':
1133
- getlist = 1;
1134
- continue;
1135
-
1136
- case 'M':
1137
- find_match_limit = 1;
1138
- continue;
1139
-
1140
- case 'N':
1141
- options |= PCRE_NOTEMPTY;
1142
- continue;
1143
-
1144
- case 'O':
1145
- while(isdigit(*p)) n = n * 10 + *p++ - '0';
1146
- if (n > size_offsets_max)
1147
- {
1148
- size_offsets_max = n;
1149
- free(offsets);
1150
- use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1151
- if (offsets == NULL)
1152
- {
1153
- printf("** Failed to get %d bytes of memory for offsets vector\n",
1154
- size_offsets_max * sizeof(int));
1155
- return 1;
1156
- }
1157
- }
1158
- use_size_offsets = n;
1159
- if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1160
- continue;
1161
-
1162
- case 'S':
1163
- show_malloc = 1;
1164
- continue;
1165
-
1166
- case 'Z':
1167
- options |= PCRE_NOTEOL;
1168
- continue;
1169
-
1170
- case '?':
1171
- options |= PCRE_NO_UTF8_CHECK;
1172
- continue;
1173
- }
1174
- *q++ = c;
1175
- }
1176
- *q = 0;
1177
- len = q - dbuffer;
1178
-
1179
- /* Handle matching via the POSIX interface, which does not
1180
- support timing or playing with the match limit or callout data. */
1181
-
1182
- #if !defined NOPOSIX
1183
- if (posix || do_posix)
1184
- {
1185
- int rc;
1186
- int eflags = 0;
1187
- regmatch_t *pmatch = NULL;
1188
- if (use_size_offsets > 0)
1189
- pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1190
- if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1191
- if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1192
-
1193
- rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1194
-
1195
- if (rc != 0)
1196
- {
1197
- (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1198
- fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1199
- }
1200
- else
1201
- {
1202
- size_t i;
1203
- for (i = 0; i < (size_t)use_size_offsets; i++)
1204
- {
1205
- if (pmatch[i].rm_so >= 0)
1206
- {
1207
- fprintf(outfile, "%2d: ", (int)i);
1208
- (void)pchars(dbuffer + pmatch[i].rm_so,
1209
- pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1210
- fprintf(outfile, "\n");
1211
- if (i == 0 && do_showrest)
1212
- {
1213
- fprintf(outfile, " 0+ ");
1214
- (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1215
- outfile);
1216
- fprintf(outfile, "\n");
1217
- }
1218
- }
1219
- }
1220
- }
1221
- free(pmatch);
1222
- }
1223
-
1224
- /* Handle matching via the native interface - repeats for /g and /G */
1225
-
1226
- else
1227
- #endif /* !defined NOPOSIX */
1228
-
1229
- for (;; gmatched++) /* Loop for /g or /G */
1230
- {
1231
- if (timeit)
1232
- {
1233
- register int i;
1234
- clock_t time_taken;
1235
- clock_t start_time = clock();
1236
- for (i = 0; i < LOOPREPEAT; i++)
1237
- count = pcre_exec(re, extra, (char *)bptr, len,
1238
- start_offset, options | g_notempty, use_offsets, use_size_offsets);
1239
- time_taken = clock() - start_time;
1240
- fprintf(outfile, "Execute time %.3f milliseconds\n",
1241
- (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1242
- (double)CLOCKS_PER_SEC);
1243
- }
1244
-
1245
- /* If find_match_limit is set, we want to do repeated matches with
1246
- varying limits in order to find the minimum value. */
1247
-
1248
- if (find_match_limit)
1249
- {
1250
- int min = 0;
1251
- int mid = 64;
1252
- int max = -1;
1253
-
1254
- if (extra == NULL)
1255
- {
1256
- extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1257
- extra->flags = 0;
1258
- }
1259
- extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1260
-
1261
- for (;;)
1262
- {
1263
- extra->match_limit = mid;
1264
- count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1265
- options | g_notempty, use_offsets, use_size_offsets);
1266
- if (count == PCRE_ERROR_MATCHLIMIT)
1267
- {
1268
- /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1269
- min = mid;
1270
- mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1271
- }
1272
- else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1273
- {
1274
- if (mid == min + 1)
1275
- {
1276
- fprintf(outfile, "Minimum match limit = %d\n", mid);
1277
- break;
1278
- }
1279
- /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1280
- max = mid;
1281
- mid = (min + mid)/2;
1282
- }
1283
- else break; /* Some other error */
1284
- }
1285
-
1286
- extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1287
- }
1288
-
1289
- /* If callout_data is set, use the interface with additional data */
1290
-
1291
- else if (callout_data_set)
1292
- {
1293
- if (extra == NULL)
1294
- {
1295
- extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1296
- extra->flags = 0;
1297
- }
1298
- extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1299
- extra->callout_data = &callout_data;
1300
- count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1301
- options | g_notempty, use_offsets, use_size_offsets);
1302
- extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1303
- }
1304
-
1305
- /* The normal case is just to do the match once, with the default
1306
- value of match_limit. */
1307
-
1308
- else count = pcre_exec(re, extra, (char *)bptr, len,
1309
- start_offset, options | g_notempty, use_offsets, use_size_offsets);
1310
-
1311
- if (count == 0)
1312
- {
1313
- fprintf(outfile, "Matched, but too many substrings\n");
1314
- count = use_size_offsets/3;
1315
- }
1316
-
1317
- /* Matched */
1318
-
1319
- if (count >= 0)
1320
- {
1321
- int i;
1322
- for (i = 0; i < count * 2; i += 2)
1323
- {
1324
- if (use_offsets[i] < 0)
1325
- fprintf(outfile, "%2d: <unset>\n", i/2);
1326
- else
1327
- {
1328
- fprintf(outfile, "%2d: ", i/2);
1329
- (void)pchars(bptr + use_offsets[i],
1330
- use_offsets[i+1] - use_offsets[i], outfile);
1331
- fprintf(outfile, "\n");
1332
- if (i == 0)
1333
- {
1334
- if (do_showrest)
1335
- {
1336
- fprintf(outfile, " 0+ ");
1337
- (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1338
- outfile);
1339
- fprintf(outfile, "\n");
1340
- }
1341
- }
1342
- }
1343
- }
1344
-
1345
- for (i = 0; i < 32; i++)
1346
- {
1347
- if ((copystrings & (1 << i)) != 0)
1348
- {
1349
- char copybuffer[16];
1350
- int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1351
- i, copybuffer, sizeof(copybuffer));
1352
- if (rc < 0)
1353
- fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1354
- else
1355
- fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1356
- }
1357
- }
1358
-
1359
- for (i = 0; i < 32; i++)
1360
- {
1361
- if ((getstrings & (1 << i)) != 0)
1362
- {
1363
- const char *substring;
1364
- int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1365
- i, &substring);
1366
- if (rc < 0)
1367
- fprintf(outfile, "get substring %d failed %d\n", i, rc);
1368
- else
1369
- {
1370
- fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1371
- /* free((void *)substring); */
1372
- pcre_free_substring(substring);
1373
- }
1374
- }
1375
- }
1376
-
1377
- if (getlist)
1378
- {
1379
- const char **stringlist;
1380
- int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1381
- &stringlist);
1382
- if (rc < 0)
1383
- fprintf(outfile, "get substring list failed %d\n", rc);
1384
- else
1385
- {
1386
- for (i = 0; i < count; i++)
1387
- fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1388
- if (stringlist[i] != NULL)
1389
- fprintf(outfile, "string list not terminated by NULL\n");
1390
- /* free((void *)stringlist); */
1391
- pcre_free_substring_list(stringlist);
1392
- }
1393
- }
1394
- }
1395
-
1396
- /* Failed to match. If this is a /g or /G loop and we previously set
1397
- g_notempty after a null match, this is not necessarily the end.
1398
- We want to advance the start offset, and continue. In the case of UTF-8
1399
- matching, the advance must be one character, not one byte. Fudge the
1400
- offset values to achieve this. We won't be at the end of the string -
1401
- that was checked before setting g_notempty. */
1402
-
1403
- else
1404
- {
1405
- if (g_notempty != 0)
1406
- {
1407
- int onechar = 1;
1408
- use_offsets[0] = start_offset;
1409
- if (use_utf8)
1410
- {
1411
- while (start_offset + onechar < len)
1412
- {
1413
- int tb = bptr[start_offset+onechar];
1414
- if (tb <= 127) break;
1415
- tb &= 0xc0;
1416
- if (tb != 0 && tb != 0xc0) onechar++;
1417
- }
1418
- }
1419
- use_offsets[1] = start_offset + onechar;
1420
- }
1421
- else
1422
- {
1423
- if (count == PCRE_ERROR_NOMATCH)
1424
- {
1425
- if (gmatched == 0) fprintf(outfile, "No match\n");
1426
- }
1427
- else fprintf(outfile, "Error %d\n", count);
1428
- break; /* Out of the /g loop */
1429
- }
1430
- }
1431
-
1432
- /* If not /g or /G we are done */
1433
-
1434
- if (!do_g && !do_G) break;
1435
-
1436
- /* If we have matched an empty string, first check to see if we are at
1437
- the end of the subject. If so, the /g loop is over. Otherwise, mimic
1438
- what Perl's /g options does. This turns out to be rather cunning. First
1439
- we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1440
- same point. If this fails (picked up above) we advance to the next
1441
- character. */
1442
-
1443
- g_notempty = 0;
1444
- if (use_offsets[0] == use_offsets[1])
1445
- {
1446
- if (use_offsets[0] == len) break;
1447
- g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1448
- }
1449
-
1450
- /* For /g, update the start offset, leaving the rest alone */
1451
-
1452
- if (do_g) start_offset = use_offsets[1];
1453
-
1454
- /* For /G, update the pointer and length */
1455
-
1456
- else
1457
- {
1458
- bptr += use_offsets[1];
1459
- len -= use_offsets[1];
1460
- }
1461
- } /* End of loop for /g and /G */
1462
- } /* End of loop for data lines */
1463
-
1464
- CONTINUE:
1465
-
1466
- #if !defined NOPOSIX
1467
- if (posix || do_posix) regfree(&preg);
1468
- #endif
1469
-
1470
- if (re != NULL) free(re);
1471
- if (extra != NULL) free(extra);
1472
- if (tables != NULL)
1473
- {
1474
- free((void *)tables);
1475
- setlocale(LC_CTYPE, "C");
1476
- }
1477
- }
1478
-
1479
- if (infile == stdin) fprintf(outfile, "\n");
1480
- return 0;
1481
- }
1482
-
1483
- /* End */