webruby 0.2.2 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/webruby/config.rb +4 -9
- data/lib/webruby/rake/files.rake +2 -2
- data/modules/emscripten/AUTHORS +9 -1
- data/modules/emscripten/CONTRIBUTING.markdown +5 -0
- data/modules/emscripten/ChangeLog +435 -0
- data/modules/emscripten/cmake/Modules/FindOpenAL.cmake +26 -0
- data/modules/emscripten/cmake/Platform/Emscripten.cmake +9 -2
- data/modules/emscripten/em++ +0 -2
- data/modules/emscripten/emcc +92 -32
- data/modules/emscripten/emlink.py +16 -13
- data/modules/emscripten/emmake +1 -1
- data/modules/emscripten/emrun +918 -0
- data/modules/emscripten/emrun.bat +2 -0
- data/modules/emscripten/emscripten.py +545 -20
- data/modules/emscripten/src/analyzer.js +6 -1
- data/modules/emscripten/src/compiler.js +25 -16
- data/modules/emscripten/src/emrun_postjs.js +20 -0
- data/modules/emscripten/{tests → src}/hello_world.js +0 -0
- data/modules/emscripten/src/intertyper.js +45 -16
- data/modules/emscripten/src/jsifier.js +78 -48
- data/modules/emscripten/src/library.js +381 -96
- data/modules/emscripten/src/library_browser.js +50 -53
- data/modules/emscripten/src/library_egl.js +66 -24
- data/modules/emscripten/src/library_fs.js +122 -90
- data/modules/emscripten/src/library_gl.js +739 -353
- data/modules/emscripten/src/library_glfw.js +9 -3
- data/modules/emscripten/src/library_glut.js +10 -5
- data/modules/emscripten/src/library_idbfs.js +14 -14
- data/modules/emscripten/src/library_memfs.js +65 -41
- data/modules/emscripten/src/library_nodefs.js +61 -9
- data/modules/emscripten/src/library_openal.js +4 -4
- data/modules/emscripten/src/library_path.js +9 -13
- data/modules/emscripten/src/library_sdl.js +301 -64
- data/modules/emscripten/src/library_sockfs.js +7 -5
- data/modules/emscripten/src/modules.js +62 -22
- data/modules/emscripten/src/parseTools.js +135 -102
- data/modules/emscripten/src/postamble.js +3 -4
- data/modules/emscripten/src/preamble.js +49 -29
- data/modules/emscripten/src/proxyClient.js +1 -1
- data/modules/emscripten/src/proxyWorker.js +10 -10
- data/modules/emscripten/src/relooper/Relooper.cpp +15 -4
- data/modules/emscripten/src/runtime.js +32 -8
- data/modules/emscripten/src/settings.js +25 -8
- data/modules/emscripten/src/shell.html +6 -3
- data/modules/emscripten/src/shell.js +13 -11
- data/modules/emscripten/src/simd.js +602 -432
- data/modules/emscripten/src/struct_info.json +22 -2
- data/modules/emscripten/src/utility.js +32 -17
- data/modules/emscripten/system/include/SDL/SDL_events.h +1 -0
- data/modules/emscripten/system/include/compat/ctype.h +17 -0
- data/modules/emscripten/system/include/compat/wchar.h +23 -0
- data/modules/emscripten/system/include/compat/wctype.h +23 -0
- data/modules/emscripten/system/include/emscripten/emmintrin.h +87 -0
- data/modules/emscripten/system/include/emscripten/emscripten.h +30 -4
- data/modules/emscripten/system/include/emscripten/vector.h +29 -1
- data/modules/emscripten/system/include/emscripten/xmmintrin.h +131 -0
- data/modules/emscripten/system/include/libcxx/CREDITS.TXT +9 -1
- data/modules/emscripten/system/include/libcxx/__bit_reference +8 -8
- data/modules/emscripten/system/include/libcxx/__config +95 -17
- data/modules/emscripten/system/include/libcxx/__debug +25 -4
- data/modules/emscripten/system/include/libcxx/__functional_03 +7 -7
- data/modules/emscripten/system/include/libcxx/__functional_base +169 -9
- data/modules/emscripten/system/include/libcxx/__functional_base_03 +1 -1
- data/modules/emscripten/system/include/libcxx/__hash_table +25 -25
- data/modules/emscripten/system/include/libcxx/__locale +21 -19
- data/modules/emscripten/system/include/libcxx/__mutex_base +2 -33
- data/modules/emscripten/system/include/libcxx/__split_buffer +9 -9
- data/modules/emscripten/system/include/libcxx/__std_stream +14 -0
- data/modules/emscripten/system/include/libcxx/__tree +35 -26
- data/modules/emscripten/system/include/libcxx/__tuple +15 -15
- data/modules/emscripten/system/include/libcxx/__tuple_03 +2 -2
- data/modules/emscripten/system/include/libcxx/__undef_min_max +8 -0
- data/modules/emscripten/system/include/libcxx/algorithm +121 -110
- data/modules/emscripten/system/include/libcxx/array +15 -15
- data/modules/emscripten/system/include/libcxx/bitset +4 -4
- data/modules/emscripten/system/include/libcxx/chrono +51 -17
- data/modules/emscripten/system/include/libcxx/cmath +25 -23
- data/modules/emscripten/system/include/libcxx/codecvt +21 -18
- data/modules/emscripten/system/include/libcxx/complex +48 -7
- data/modules/emscripten/system/include/libcxx/cstddef +1 -1
- data/modules/emscripten/system/include/libcxx/cstdio +8 -1
- data/modules/emscripten/system/include/libcxx/cstdlib +1 -1
- data/modules/emscripten/system/include/libcxx/cwchar +1 -1
- data/modules/emscripten/system/include/libcxx/deque +26 -12
- data/modules/emscripten/system/include/libcxx/dynarray +311 -0
- data/modules/emscripten/system/include/libcxx/exception +4 -4
- data/modules/emscripten/system/include/libcxx/ext/__hash +3 -3
- data/modules/emscripten/system/include/libcxx/ext/hash_map +19 -15
- data/modules/emscripten/system/include/libcxx/ext/hash_set +7 -3
- data/modules/emscripten/system/include/libcxx/forward_list +33 -7
- data/modules/emscripten/system/include/libcxx/fstream +4 -4
- data/modules/emscripten/system/include/libcxx/functional +200 -170
- data/modules/emscripten/system/include/libcxx/future +83 -39
- data/modules/emscripten/system/include/libcxx/initializer_list +24 -11
- data/modules/emscripten/system/include/libcxx/iomanip +147 -0
- data/modules/emscripten/system/include/libcxx/ios +24 -16
- data/modules/emscripten/system/include/libcxx/iosfwd +19 -19
- data/modules/emscripten/system/include/libcxx/istream +13 -8
- data/modules/emscripten/system/include/libcxx/iterator +108 -417
- data/modules/emscripten/system/include/libcxx/limits +8 -4
- data/modules/emscripten/system/include/libcxx/list +28 -8
- data/modules/emscripten/system/include/libcxx/locale +153 -390
- data/modules/emscripten/system/include/libcxx/map +280 -100
- data/modules/emscripten/system/include/libcxx/memory +49 -97
- data/modules/emscripten/system/include/libcxx/mutex +2 -2
- data/modules/emscripten/system/include/libcxx/new +43 -14
- data/modules/emscripten/system/include/libcxx/numeric +2 -2
- data/modules/emscripten/system/include/libcxx/optional +697 -0
- data/modules/emscripten/system/include/libcxx/ostream +17 -8
- data/modules/emscripten/system/include/libcxx/queue +5 -5
- data/modules/emscripten/system/include/libcxx/random +53 -51
- data/modules/emscripten/system/include/libcxx/ratio +11 -11
- data/modules/emscripten/system/include/libcxx/readme.txt +1 -1
- data/modules/emscripten/system/include/libcxx/regex +23 -20
- data/modules/emscripten/system/include/libcxx/scoped_allocator +1 -1
- data/modules/emscripten/system/include/libcxx/set +166 -2
- data/modules/emscripten/system/include/libcxx/shared_mutex +419 -0
- data/modules/emscripten/system/include/libcxx/sstream +4 -4
- data/modules/emscripten/system/include/libcxx/stack +3 -3
- data/modules/emscripten/system/include/libcxx/streambuf +5 -5
- data/modules/emscripten/system/include/libcxx/string +372 -324
- data/modules/emscripten/system/include/libcxx/support/ibm/limits.h +99 -0
- data/modules/emscripten/system/include/libcxx/support/ibm/support.h +54 -0
- data/modules/emscripten/system/include/libcxx/support/ibm/xlocale.h +326 -0
- data/modules/emscripten/system/include/libcxx/support/win32/limits_win32.h +6 -6
- data/modules/emscripten/system/include/libcxx/support/win32/locale_win32.h +15 -15
- data/modules/emscripten/system/include/libcxx/support/win32/math_win32.h +2 -0
- data/modules/emscripten/system/include/libcxx/support/win32/support.h +6 -1
- data/modules/emscripten/system/include/libcxx/system_error +14 -8
- data/modules/emscripten/system/include/libcxx/thread +7 -8
- data/modules/emscripten/system/include/libcxx/tuple +29 -88
- data/modules/emscripten/system/include/libcxx/type_traits +253 -209
- data/modules/emscripten/system/include/libcxx/typeindex +3 -3
- data/modules/emscripten/system/include/libcxx/unordered_map +162 -101
- data/modules/emscripten/system/include/libcxx/unordered_set +79 -2
- data/modules/emscripten/system/include/libcxx/utility +20 -20
- data/modules/emscripten/system/include/libcxx/valarray +23 -23
- data/modules/emscripten/system/include/libcxx/vector +114 -91
- data/modules/emscripten/system/lib/libc/musl/src/regex/regcomp.c +3352 -0
- data/modules/emscripten/system/lib/libc/musl/src/regex/regerror.c +35 -0
- data/modules/emscripten/system/lib/libc/musl/src/regex/regexec.c +1011 -0
- data/modules/emscripten/system/lib/libc/musl/src/regex/tre-mem.c +158 -0
- data/modules/emscripten/system/lib/libc/musl/src/regex/tre.h +231 -0
- data/modules/emscripten/system/lib/libcextra.symbols +7 -0
- data/modules/emscripten/system/lib/libcxx/CREDITS.TXT +9 -1
- data/modules/emscripten/system/lib/libcxx/algorithm.cpp +1 -0
- data/modules/emscripten/system/lib/libcxx/debug.cpp +66 -42
- data/modules/emscripten/system/lib/libcxx/exception.cpp +88 -16
- data/modules/emscripten/system/lib/libcxx/future.cpp +6 -0
- data/modules/emscripten/system/lib/libcxx/ios.cpp +7 -2
- data/modules/emscripten/system/lib/libcxx/iostream.cpp +8 -8
- data/modules/emscripten/system/lib/libcxx/locale.cpp +38 -11
- data/modules/emscripten/system/lib/libcxx/mutex.cpp +3 -0
- data/modules/emscripten/system/lib/libcxx/new.cpp +44 -10
- data/modules/emscripten/system/lib/libcxx/optional.cpp +25 -0
- data/modules/emscripten/system/lib/libcxx/random.cpp +26 -0
- data/modules/emscripten/system/lib/libcxx/readme.txt +1 -1
- data/modules/emscripten/system/lib/libcxx/shared_mutex.cpp +101 -0
- data/modules/emscripten/system/lib/libcxx/stdexcept.cpp +11 -7
- data/modules/emscripten/system/lib/libcxx/string.cpp +3 -1
- data/modules/emscripten/system/lib/libcxx/strstream.cpp +7 -7
- data/modules/emscripten/system/lib/libcxx/support/win32/locale_win32.cpp +12 -13
- data/modules/emscripten/system/lib/libcxx/support/win32/support.cpp +33 -36
- data/modules/emscripten/system/lib/libcxx/symbols +187 -168
- data/modules/emscripten/system/lib/libcxx/system_error.cpp +1 -0
- data/modules/emscripten/system/lib/libcxx/thread.cpp +7 -3
- data/modules/emscripten/system/lib/libcxx/typeinfo.cpp +9 -6
- data/modules/emscripten/system/lib/libcxx/valarray.cpp +2 -0
- data/modules/emscripten/third_party/lzma.js/doit.bat +4 -0
- data/modules/emscripten/third_party/lzma.js/doit.sh +9 -2
- data/modules/emscripten/tools/cache.py +5 -7
- data/modules/emscripten/tools/cache.pyc +0 -0
- data/modules/emscripten/tools/eliminator/asm-eliminator-test-output.js +7 -0
- data/modules/emscripten/tools/eliminator/asm-eliminator-test.js +9 -1
- data/modules/emscripten/tools/eliminator/eliminator-test-output.js +3 -0
- data/modules/emscripten/tools/eliminator/eliminator-test.js +9 -1
- data/modules/emscripten/tools/file_packager.py +93 -50
- data/modules/emscripten/tools/js-optimizer.js +98 -48
- data/modules/emscripten/tools/js_optimizer.py +4 -4
- data/modules/emscripten/tools/js_optimizer.pyc +0 -0
- data/modules/emscripten/tools/jsrun.py +1 -1
- data/modules/emscripten/tools/jsrun.pyc +0 -0
- data/modules/emscripten/tools/response_file.py +6 -0
- data/modules/emscripten/tools/response_file.pyc +0 -0
- data/modules/emscripten/tools/settings_template_readonly.py +2 -0
- data/modules/emscripten/tools/shared.py +88 -34
- data/modules/emscripten/tools/shared.pyc +0 -0
- data/modules/emscripten/tools/split.py +21 -13
- data/modules/mruby/build_config.rb +7 -1
- data/modules/mruby/doc/compile/README.md +5 -9
- data/modules/mruby/include/mrbconf.h +5 -2
- data/modules/mruby/include/mruby/array.h +1 -0
- data/modules/mruby/include/mruby/compile.h +2 -4
- data/modules/mruby/include/mruby/dump.h +7 -16
- data/modules/mruby/include/mruby/hash.h +1 -1
- data/modules/mruby/include/mruby/irep.h +14 -2
- data/modules/mruby/include/mruby/khash.h +8 -7
- data/modules/mruby/include/mruby/string.h +1 -0
- data/modules/mruby/include/mruby/value.h +5 -2
- data/modules/mruby/include/mruby.h +12 -13
- data/modules/mruby/mrbgems/mruby-bin-mirb/tools/mirb/mirb.c +16 -6
- data/modules/mruby/mrbgems/mruby-bin-mruby/tools/mruby/mruby.c +18 -30
- data/modules/mruby/mrbgems/mruby-fiber/src/fiber.c +21 -0
- data/modules/mruby/mrbgems/mruby-math/src/math.c +1 -1
- data/modules/mruby/mrbgems/mruby-random/src/random.c +144 -47
- data/modules/mruby/mrbgems/mruby-random/test/random.rb +44 -0
- data/modules/mruby/mrbgems/mruby-struct/src/struct.c +5 -5
- data/modules/mruby/mrblib/numeric.rb +99 -33
- data/modules/mruby/src/array.c +11 -4
- data/modules/mruby/src/backtrace.c +2 -2
- data/modules/mruby/src/class.c +49 -30
- data/modules/mruby/src/codegen.c +131 -79
- data/modules/mruby/src/debug.c +1 -1
- data/modules/mruby/src/dump.c +213 -163
- data/modules/mruby/src/error.c +17 -17
- data/modules/mruby/src/error.h +1 -1
- data/modules/mruby/src/etc.c +10 -0
- data/modules/mruby/src/gc.c +35 -17
- data/modules/mruby/src/hash.c +5 -5
- data/modules/mruby/src/kernel.c +36 -14
- data/modules/mruby/src/load.c +238 -296
- data/modules/mruby/src/numeric.c +18 -98
- data/modules/mruby/src/object.c +3 -5
- data/modules/mruby/src/parse.y +63 -56
- data/modules/mruby/src/proc.c +8 -5
- data/modules/mruby/src/re.h +0 -1
- data/modules/mruby/src/state.c +65 -27
- data/modules/mruby/src/string.c +3 -31
- data/modules/mruby/src/symbol.c +3 -3
- data/modules/mruby/src/variable.c +12 -5
- data/modules/mruby/src/vm.c +90 -72
- data/modules/mruby/tasks/mruby_build.rake +10 -1
- data/modules/mruby/tasks/toolchains/gcc.rake +12 -2
- data/modules/mruby/tasks/toolchains/{vs2012.rake → visualcpp.rake} +1 -1
- data/modules/mruby/test/driver.c +3 -3
- data/modules/mruby/test/t/array.rb +5 -5
- data/modules/mruby/test/t/class.rb +14 -1
- data/modules/mruby/test/t/kernel.rb +4 -0
- data/modules/mruby/test/t/module.rb +4 -4
- data/modules/mruby/test/t/nameerror.rb +1 -1
- data/modules/mruby/tools/mrbc/mrbc.c +23 -17
- data/modules/mruby/travis_config.rb +10 -1
- metadata +28 -5
- data/modules/mruby/tasks/toolchains/vs2010.rake +0 -3
@@ -0,0 +1,3352 @@
|
|
1
|
+
/*
|
2
|
+
regcomp.c - TRE POSIX compatible regex compilation functions.
|
3
|
+
|
4
|
+
Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
|
5
|
+
All rights reserved.
|
6
|
+
|
7
|
+
Redistribution and use in source and binary forms, with or without
|
8
|
+
modification, are permitted provided that the following conditions
|
9
|
+
are met:
|
10
|
+
|
11
|
+
1. Redistributions of source code must retain the above copyright
|
12
|
+
notice, this list of conditions and the following disclaimer.
|
13
|
+
|
14
|
+
2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
notice, this list of conditions and the following disclaimer in the
|
16
|
+
documentation and/or other materials provided with the distribution.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
|
19
|
+
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
*/
|
31
|
+
|
32
|
+
#include <string.h>
|
33
|
+
#include <errno.h>
|
34
|
+
#include <stdlib.h>
|
35
|
+
#include <regex.h>
|
36
|
+
#include <limits.h>
|
37
|
+
#include <stdint.h>
|
38
|
+
|
39
|
+
#include "tre.h"
|
40
|
+
|
41
|
+
#include <assert.h>
|
42
|
+
|
43
|
+
/***********************************************************************
|
44
|
+
from tre-compile.h
|
45
|
+
***********************************************************************/
|
46
|
+
|
47
|
+
typedef struct {
|
48
|
+
int position;
|
49
|
+
int code_min;
|
50
|
+
int code_max;
|
51
|
+
int *tags;
|
52
|
+
int assertions;
|
53
|
+
tre_ctype_t class;
|
54
|
+
tre_ctype_t *neg_classes;
|
55
|
+
int backref;
|
56
|
+
} tre_pos_and_tags_t;
|
57
|
+
|
58
|
+
|
59
|
+
/***********************************************************************
|
60
|
+
from tre-ast.c and tre-ast.h
|
61
|
+
***********************************************************************/
|
62
|
+
|
63
|
+
/* The different AST node types. */
|
64
|
+
typedef enum {
|
65
|
+
LITERAL,
|
66
|
+
CATENATION,
|
67
|
+
ITERATION,
|
68
|
+
UNION
|
69
|
+
} tre_ast_type_t;
|
70
|
+
|
71
|
+
/* Special subtypes of TRE_LITERAL. */
|
72
|
+
#define EMPTY -1 /* Empty leaf (denotes empty string). */
|
73
|
+
#define ASSERTION -2 /* Assertion leaf. */
|
74
|
+
#define TAG -3 /* Tag leaf. */
|
75
|
+
#define BACKREF -4 /* Back reference leaf. */
|
76
|
+
|
77
|
+
#define IS_SPECIAL(x) ((x)->code_min < 0)
|
78
|
+
#define IS_EMPTY(x) ((x)->code_min == EMPTY)
|
79
|
+
#define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
|
80
|
+
#define IS_TAG(x) ((x)->code_min == TAG)
|
81
|
+
#define IS_BACKREF(x) ((x)->code_min == BACKREF)
|
82
|
+
|
83
|
+
|
84
|
+
/* A generic AST node. All AST nodes consist of this node on the top
|
85
|
+
level with `obj' pointing to the actual content. */
|
86
|
+
typedef struct {
|
87
|
+
tre_ast_type_t type; /* Type of the node. */
|
88
|
+
void *obj; /* Pointer to actual node. */
|
89
|
+
int nullable;
|
90
|
+
int submatch_id;
|
91
|
+
int num_submatches;
|
92
|
+
int num_tags;
|
93
|
+
tre_pos_and_tags_t *firstpos;
|
94
|
+
tre_pos_and_tags_t *lastpos;
|
95
|
+
} tre_ast_node_t;
|
96
|
+
|
97
|
+
|
98
|
+
/* A "literal" node. These are created for assertions, back references,
|
99
|
+
tags, matching parameter settings, and all expressions that match one
|
100
|
+
character. */
|
101
|
+
typedef struct {
|
102
|
+
long code_min;
|
103
|
+
long code_max;
|
104
|
+
int position;
|
105
|
+
tre_ctype_t class;
|
106
|
+
tre_ctype_t *neg_classes;
|
107
|
+
} tre_literal_t;
|
108
|
+
|
109
|
+
/* A "catenation" node. These are created when two regexps are concatenated.
|
110
|
+
If there are more than one subexpressions in sequence, the `left' part
|
111
|
+
holds all but the last, and `right' part holds the last subexpression
|
112
|
+
(catenation is left associative). */
|
113
|
+
typedef struct {
|
114
|
+
tre_ast_node_t *left;
|
115
|
+
tre_ast_node_t *right;
|
116
|
+
} tre_catenation_t;
|
117
|
+
|
118
|
+
/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
|
119
|
+
operators. */
|
120
|
+
typedef struct {
|
121
|
+
/* Subexpression to match. */
|
122
|
+
tre_ast_node_t *arg;
|
123
|
+
/* Minimum number of consecutive matches. */
|
124
|
+
int min;
|
125
|
+
/* Maximum number of consecutive matches. */
|
126
|
+
int max;
|
127
|
+
/* If 0, match as many characters as possible, if 1 match as few as
|
128
|
+
possible. Note that this does not always mean the same thing as
|
129
|
+
matching as many/few repetitions as possible. */
|
130
|
+
unsigned int minimal:1;
|
131
|
+
} tre_iteration_t;
|
132
|
+
|
133
|
+
/* An "union" node. These are created for the "|" operator. */
|
134
|
+
typedef struct {
|
135
|
+
tre_ast_node_t *left;
|
136
|
+
tre_ast_node_t *right;
|
137
|
+
} tre_union_t;
|
138
|
+
|
139
|
+
static tre_ast_node_t *
|
140
|
+
tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size);
|
141
|
+
|
142
|
+
static tre_ast_node_t *
|
143
|
+
tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position);
|
144
|
+
|
145
|
+
static tre_ast_node_t *
|
146
|
+
tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
|
147
|
+
int minimal);
|
148
|
+
|
149
|
+
static tre_ast_node_t *
|
150
|
+
tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right);
|
151
|
+
|
152
|
+
static tre_ast_node_t *
|
153
|
+
tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
|
154
|
+
tre_ast_node_t *right);
|
155
|
+
|
156
|
+
|
157
|
+
static tre_ast_node_t *
|
158
|
+
tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
|
159
|
+
{
|
160
|
+
tre_ast_node_t *node;
|
161
|
+
|
162
|
+
node = tre_mem_calloc(mem, sizeof(*node));
|
163
|
+
if (!node)
|
164
|
+
return NULL;
|
165
|
+
node->obj = tre_mem_calloc(mem, size);
|
166
|
+
if (!node->obj)
|
167
|
+
return NULL;
|
168
|
+
node->type = type;
|
169
|
+
node->nullable = -1;
|
170
|
+
node->submatch_id = -1;
|
171
|
+
|
172
|
+
return node;
|
173
|
+
}
|
174
|
+
|
175
|
+
static tre_ast_node_t *
|
176
|
+
tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
|
177
|
+
{
|
178
|
+
tre_ast_node_t *node;
|
179
|
+
tre_literal_t *lit;
|
180
|
+
|
181
|
+
node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
|
182
|
+
if (!node)
|
183
|
+
return NULL;
|
184
|
+
lit = node->obj;
|
185
|
+
lit->code_min = code_min;
|
186
|
+
lit->code_max = code_max;
|
187
|
+
lit->position = position;
|
188
|
+
|
189
|
+
return node;
|
190
|
+
}
|
191
|
+
|
192
|
+
static tre_ast_node_t *
|
193
|
+
tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
|
194
|
+
int minimal)
|
195
|
+
{
|
196
|
+
tre_ast_node_t *node;
|
197
|
+
tre_iteration_t *iter;
|
198
|
+
|
199
|
+
node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
|
200
|
+
if (!node)
|
201
|
+
return NULL;
|
202
|
+
iter = node->obj;
|
203
|
+
iter->arg = arg;
|
204
|
+
iter->min = min;
|
205
|
+
iter->max = max;
|
206
|
+
iter->minimal = minimal;
|
207
|
+
node->num_submatches = arg->num_submatches;
|
208
|
+
|
209
|
+
return node;
|
210
|
+
}
|
211
|
+
|
212
|
+
static tre_ast_node_t *
|
213
|
+
tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
|
214
|
+
{
|
215
|
+
tre_ast_node_t *node;
|
216
|
+
|
217
|
+
node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
|
218
|
+
if (node == NULL)
|
219
|
+
return NULL;
|
220
|
+
((tre_union_t *)node->obj)->left = left;
|
221
|
+
((tre_union_t *)node->obj)->right = right;
|
222
|
+
node->num_submatches = left->num_submatches + right->num_submatches;
|
223
|
+
|
224
|
+
return node;
|
225
|
+
}
|
226
|
+
|
227
|
+
static tre_ast_node_t *
|
228
|
+
tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
|
229
|
+
tre_ast_node_t *right)
|
230
|
+
{
|
231
|
+
tre_ast_node_t *node;
|
232
|
+
|
233
|
+
node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
|
234
|
+
if (node == NULL)
|
235
|
+
return NULL;
|
236
|
+
((tre_catenation_t *)node->obj)->left = left;
|
237
|
+
((tre_catenation_t *)node->obj)->right = right;
|
238
|
+
node->num_submatches = left->num_submatches + right->num_submatches;
|
239
|
+
|
240
|
+
return node;
|
241
|
+
}
|
242
|
+
|
243
|
+
|
244
|
+
/***********************************************************************
|
245
|
+
from tre-stack.c and tre-stack.h
|
246
|
+
***********************************************************************/
|
247
|
+
|
248
|
+
typedef struct tre_stack_rec tre_stack_t;
|
249
|
+
|
250
|
+
/* Creates a new stack object. `size' is initial size in bytes, `max_size'
|
251
|
+
is maximum size, and `increment' specifies how much more space will be
|
252
|
+
allocated with realloc() if all space gets used up. Returns the stack
|
253
|
+
object or NULL if out of memory. */
|
254
|
+
static tre_stack_t *
|
255
|
+
tre_stack_new(int size, int max_size, int increment);
|
256
|
+
|
257
|
+
/* Frees the stack object. */
|
258
|
+
static void
|
259
|
+
tre_stack_destroy(tre_stack_t *s);
|
260
|
+
|
261
|
+
/* Returns the current number of objects in the stack. */
|
262
|
+
static int
|
263
|
+
tre_stack_num_objects(tre_stack_t *s);
|
264
|
+
|
265
|
+
/* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
|
266
|
+
`value' on top of stack `s'. Returns REG_ESPACE if out of memory.
|
267
|
+
This tries to realloc() more space before failing if maximum size
|
268
|
+
has not yet been reached. Returns REG_OK if successful. */
|
269
|
+
#define declare_pushf(typetag, type) \
|
270
|
+
static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
|
271
|
+
|
272
|
+
declare_pushf(voidptr, void *);
|
273
|
+
declare_pushf(int, int);
|
274
|
+
|
275
|
+
/* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
|
276
|
+
element off of stack `s' and returns it. The stack must not be
|
277
|
+
empty. */
|
278
|
+
#define declare_popf(typetag, type) \
|
279
|
+
static type tre_stack_pop_ ## typetag(tre_stack_t *s)
|
280
|
+
|
281
|
+
declare_popf(voidptr, void *);
|
282
|
+
declare_popf(int, int);
|
283
|
+
|
284
|
+
/* Just to save some typing. */
|
285
|
+
#define STACK_PUSH(s, typetag, value) \
|
286
|
+
do \
|
287
|
+
{ \
|
288
|
+
status = tre_stack_push_ ## typetag(s, value); \
|
289
|
+
} \
|
290
|
+
while (/*CONSTCOND*/0)
|
291
|
+
|
292
|
+
#define STACK_PUSHX(s, typetag, value) \
|
293
|
+
{ \
|
294
|
+
status = tre_stack_push_ ## typetag(s, value); \
|
295
|
+
if (status != REG_OK) \
|
296
|
+
break; \
|
297
|
+
}
|
298
|
+
|
299
|
+
#define STACK_PUSHR(s, typetag, value) \
|
300
|
+
{ \
|
301
|
+
reg_errcode_t _status; \
|
302
|
+
_status = tre_stack_push_ ## typetag(s, value); \
|
303
|
+
if (_status != REG_OK) \
|
304
|
+
return _status; \
|
305
|
+
}
|
306
|
+
|
307
|
+
union tre_stack_item {
|
308
|
+
void *voidptr_value;
|
309
|
+
int int_value;
|
310
|
+
};
|
311
|
+
|
312
|
+
struct tre_stack_rec {
|
313
|
+
int size;
|
314
|
+
int max_size;
|
315
|
+
int increment;
|
316
|
+
int ptr;
|
317
|
+
union tre_stack_item *stack;
|
318
|
+
};
|
319
|
+
|
320
|
+
|
321
|
+
static tre_stack_t *
|
322
|
+
tre_stack_new(int size, int max_size, int increment)
|
323
|
+
{
|
324
|
+
tre_stack_t *s;
|
325
|
+
|
326
|
+
s = xmalloc(sizeof(*s));
|
327
|
+
if (s != NULL)
|
328
|
+
{
|
329
|
+
s->stack = xmalloc(sizeof(*s->stack) * size);
|
330
|
+
if (s->stack == NULL)
|
331
|
+
{
|
332
|
+
xfree(s);
|
333
|
+
return NULL;
|
334
|
+
}
|
335
|
+
s->size = size;
|
336
|
+
s->max_size = max_size;
|
337
|
+
s->increment = increment;
|
338
|
+
s->ptr = 0;
|
339
|
+
}
|
340
|
+
return s;
|
341
|
+
}
|
342
|
+
|
343
|
+
static void
|
344
|
+
tre_stack_destroy(tre_stack_t *s)
|
345
|
+
{
|
346
|
+
xfree(s->stack);
|
347
|
+
xfree(s);
|
348
|
+
}
|
349
|
+
|
350
|
+
static int
|
351
|
+
tre_stack_num_objects(tre_stack_t *s)
|
352
|
+
{
|
353
|
+
return s->ptr;
|
354
|
+
}
|
355
|
+
|
356
|
+
static reg_errcode_t
|
357
|
+
tre_stack_push(tre_stack_t *s, union tre_stack_item value)
|
358
|
+
{
|
359
|
+
if (s->ptr < s->size)
|
360
|
+
{
|
361
|
+
s->stack[s->ptr] = value;
|
362
|
+
s->ptr++;
|
363
|
+
}
|
364
|
+
else
|
365
|
+
{
|
366
|
+
if (s->size >= s->max_size)
|
367
|
+
{
|
368
|
+
return REG_ESPACE;
|
369
|
+
}
|
370
|
+
else
|
371
|
+
{
|
372
|
+
union tre_stack_item *new_buffer;
|
373
|
+
int new_size;
|
374
|
+
new_size = s->size + s->increment;
|
375
|
+
if (new_size > s->max_size)
|
376
|
+
new_size = s->max_size;
|
377
|
+
new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
|
378
|
+
if (new_buffer == NULL)
|
379
|
+
{
|
380
|
+
return REG_ESPACE;
|
381
|
+
}
|
382
|
+
assert(new_size > s->size);
|
383
|
+
s->size = new_size;
|
384
|
+
s->stack = new_buffer;
|
385
|
+
tre_stack_push(s, value);
|
386
|
+
}
|
387
|
+
}
|
388
|
+
return REG_OK;
|
389
|
+
}
|
390
|
+
|
391
|
+
#define define_pushf(typetag, type) \
|
392
|
+
declare_pushf(typetag, type) { \
|
393
|
+
union tre_stack_item item; \
|
394
|
+
item.typetag ## _value = value; \
|
395
|
+
return tre_stack_push(s, item); \
|
396
|
+
}
|
397
|
+
|
398
|
+
define_pushf(int, int)
|
399
|
+
define_pushf(voidptr, void *)
|
400
|
+
|
401
|
+
#define define_popf(typetag, type) \
|
402
|
+
declare_popf(typetag, type) { \
|
403
|
+
return s->stack[--s->ptr].typetag ## _value; \
|
404
|
+
}
|
405
|
+
|
406
|
+
define_popf(int, int)
|
407
|
+
define_popf(voidptr, void *)
|
408
|
+
|
409
|
+
|
410
|
+
/***********************************************************************
|
411
|
+
from tre-parse.c and tre-parse.h
|
412
|
+
***********************************************************************/
|
413
|
+
|
414
|
+
/* Parse context. */
|
415
|
+
typedef struct {
|
416
|
+
/* Memory allocator. The AST is allocated using this. */
|
417
|
+
tre_mem_t mem;
|
418
|
+
/* Stack used for keeping track of regexp syntax. */
|
419
|
+
tre_stack_t *stack;
|
420
|
+
/* The parse result. */
|
421
|
+
tre_ast_node_t *result;
|
422
|
+
/* The regexp to parse and its length. */
|
423
|
+
const char *re;
|
424
|
+
/* The first character of the entire regexp. */
|
425
|
+
const char *re_start;
|
426
|
+
/* Current submatch ID. */
|
427
|
+
int submatch_id;
|
428
|
+
/* Current position (number of literal). */
|
429
|
+
int position;
|
430
|
+
/* The highest back reference or -1 if none seen so far. */
|
431
|
+
int max_backref;
|
432
|
+
/* This flag is set if the regexp uses approximate matching. */
|
433
|
+
int have_approx;
|
434
|
+
/* Compilation flags. */
|
435
|
+
int cflags;
|
436
|
+
/* If this flag is set the top-level submatch is not captured. */
|
437
|
+
int nofirstsub;
|
438
|
+
} tre_parse_ctx_t;
|
439
|
+
|
440
|
+
/* Parses a wide character regexp pattern into a syntax tree. This parser
|
441
|
+
handles both syntaxes (BRE and ERE), including the TRE extensions. */
|
442
|
+
static reg_errcode_t
|
443
|
+
tre_parse(tre_parse_ctx_t *ctx);
|
444
|
+
|
445
|
+
|
446
|
+
/*
|
447
|
+
This parser is just a simple recursive descent parser for POSIX.2
|
448
|
+
regexps. The parser supports both the obsolete default syntax and
|
449
|
+
the "extended" syntax, and some nonstandard extensions.
|
450
|
+
*/
|
451
|
+
|
452
|
+
/* Characters with special meanings in regexp syntax. */
|
453
|
+
#define CHAR_PIPE '|'
|
454
|
+
#define CHAR_LPAREN '('
|
455
|
+
#define CHAR_RPAREN ')'
|
456
|
+
#define CHAR_LBRACE '{'
|
457
|
+
#define CHAR_RBRACE '}'
|
458
|
+
#define CHAR_LBRACKET '['
|
459
|
+
#define CHAR_RBRACKET ']'
|
460
|
+
#define CHAR_MINUS '-'
|
461
|
+
#define CHAR_STAR '*'
|
462
|
+
#define CHAR_QUESTIONMARK '?'
|
463
|
+
#define CHAR_PLUS '+'
|
464
|
+
#define CHAR_PERIOD '.'
|
465
|
+
#define CHAR_COLON ':'
|
466
|
+
#define CHAR_EQUAL '='
|
467
|
+
#define CHAR_COMMA ','
|
468
|
+
#define CHAR_CARET '^'
|
469
|
+
#define CHAR_DOLLAR '$'
|
470
|
+
#define CHAR_BACKSLASH '\\'
|
471
|
+
#define CHAR_HASH '#'
|
472
|
+
#define CHAR_TILDE '~'
|
473
|
+
|
474
|
+
|
475
|
+
/* Some macros for expanding \w, \s, etc. */
|
476
|
+
static const struct tre_macro_struct {
|
477
|
+
const char c;
|
478
|
+
const char *expansion;
|
479
|
+
} tre_macros[] =
|
480
|
+
{ {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
|
481
|
+
{'f', "\f"}, {'a', "\a"}, {'e', "\033"},
|
482
|
+
{'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
|
483
|
+
{'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
|
484
|
+
{ 0, NULL }
|
485
|
+
};
|
486
|
+
|
487
|
+
|
488
|
+
/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
|
489
|
+
must have at least `len' items. Sets buf[0] to zero if the there
|
490
|
+
is no match in `tre_macros'. */
|
491
|
+
static const char *
|
492
|
+
tre_expand_macro(const char *regex)
|
493
|
+
{
|
494
|
+
int i;
|
495
|
+
|
496
|
+
if (!*regex)
|
497
|
+
return 0;
|
498
|
+
|
499
|
+
for (i = 0; tre_macros[i].expansion && tre_macros[i].c != *regex; i++);
|
500
|
+
return tre_macros[i].expansion;
|
501
|
+
}
|
502
|
+
|
503
|
+
static reg_errcode_t
|
504
|
+
tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i,
|
505
|
+
tre_ast_node_t ***items)
|
506
|
+
{
|
507
|
+
reg_errcode_t status;
|
508
|
+
tre_ast_node_t **array = *items;
|
509
|
+
/* Allocate more space if necessary. */
|
510
|
+
if (*i >= *max_i)
|
511
|
+
{
|
512
|
+
tre_ast_node_t **new_items;
|
513
|
+
/* If the array is already 1024 items large, give up -- there's
|
514
|
+
probably an error in the regexp (e.g. not a '\0' terminated
|
515
|
+
string and missing ']') */
|
516
|
+
if (*max_i > 1024)
|
517
|
+
return REG_ESPACE;
|
518
|
+
*max_i *= 2;
|
519
|
+
new_items = xrealloc(array, sizeof(*items) * *max_i);
|
520
|
+
if (new_items == NULL)
|
521
|
+
return REG_ESPACE;
|
522
|
+
*items = array = new_items;
|
523
|
+
}
|
524
|
+
array[*i] = tre_ast_new_literal(mem, min, max, -1);
|
525
|
+
status = array[*i] == NULL ? REG_ESPACE : REG_OK;
|
526
|
+
(*i)++;
|
527
|
+
return status;
|
528
|
+
}
|
529
|
+
|
530
|
+
|
531
|
+
static int
|
532
|
+
tre_compare_items(const void *a, const void *b)
|
533
|
+
{
|
534
|
+
const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
|
535
|
+
const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
|
536
|
+
tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
|
537
|
+
int a_min = l_a->code_min, b_min = l_b->code_min;
|
538
|
+
|
539
|
+
if (a_min < b_min)
|
540
|
+
return -1;
|
541
|
+
else if (a_min > b_min)
|
542
|
+
return 1;
|
543
|
+
else
|
544
|
+
return 0;
|
545
|
+
}
|
546
|
+
|
547
|
+
/* Maximum number of character classes that can occur in a negated bracket
|
548
|
+
expression. */
|
549
|
+
#define MAX_NEG_CLASSES 64
|
550
|
+
|
551
|
+
/* Maximum length of character class names. */
|
552
|
+
#define MAX_CLASS_NAME
|
553
|
+
|
554
|
+
static reg_errcode_t
|
555
|
+
tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
|
556
|
+
tre_ctype_t neg_classes[], int *num_neg_classes,
|
557
|
+
tre_ast_node_t ***items, int *num_items,
|
558
|
+
int *items_size)
|
559
|
+
{
|
560
|
+
const char *re = ctx->re;
|
561
|
+
reg_errcode_t status = REG_OK;
|
562
|
+
tre_ctype_t class = (tre_ctype_t)0;
|
563
|
+
int i = *num_items;
|
564
|
+
int max_i = *items_size;
|
565
|
+
int skip;
|
566
|
+
|
567
|
+
/* Build an array of the items in the bracket expression. */
|
568
|
+
while (status == REG_OK)
|
569
|
+
{
|
570
|
+
skip = 0;
|
571
|
+
if (!*re)
|
572
|
+
{
|
573
|
+
status = REG_EBRACK;
|
574
|
+
}
|
575
|
+
else if (*re == CHAR_RBRACKET && re > ctx->re)
|
576
|
+
{
|
577
|
+
re++;
|
578
|
+
break;
|
579
|
+
}
|
580
|
+
else
|
581
|
+
{
|
582
|
+
tre_cint_t min = 0, max = 0;
|
583
|
+
wchar_t wc;
|
584
|
+
int clen = mbtowc(&wc, re, -1);
|
585
|
+
|
586
|
+
if (clen<0) clen=1, wc=WEOF;
|
587
|
+
|
588
|
+
class = (tre_ctype_t)0;
|
589
|
+
if (*(re + clen) == CHAR_MINUS && *(re + clen + 1) != CHAR_RBRACKET)
|
590
|
+
{
|
591
|
+
min = wc;
|
592
|
+
re += clen+1;
|
593
|
+
clen = mbtowc(&wc, re, -1);
|
594
|
+
if (clen<0) clen=1, wc=WEOF;
|
595
|
+
max = wc;
|
596
|
+
re += clen;
|
597
|
+
/* XXX - Should use collation order instead of encoding values
|
598
|
+
in character ranges. */
|
599
|
+
if (min > max)
|
600
|
+
status = REG_ERANGE;
|
601
|
+
}
|
602
|
+
else if (*re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD)
|
603
|
+
status = REG_ECOLLATE;
|
604
|
+
else if (*re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL)
|
605
|
+
status = REG_ECOLLATE;
|
606
|
+
else if (*re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
|
607
|
+
{
|
608
|
+
char tmp_str[64];
|
609
|
+
const char *endptr = re + 2;
|
610
|
+
int len;
|
611
|
+
while (*endptr && *endptr != CHAR_COLON)
|
612
|
+
endptr++;
|
613
|
+
if (*endptr)
|
614
|
+
{
|
615
|
+
len = MIN(endptr - re - 2, 63);
|
616
|
+
strncpy(tmp_str, re + 2, len);
|
617
|
+
tmp_str[len] = '\0';
|
618
|
+
class = tre_ctype(tmp_str);
|
619
|
+
if (!class)
|
620
|
+
status = REG_ECTYPE;
|
621
|
+
re = endptr + 2;
|
622
|
+
}
|
623
|
+
else
|
624
|
+
status = REG_ECTYPE;
|
625
|
+
min = 0;
|
626
|
+
max = TRE_CHAR_MAX;
|
627
|
+
}
|
628
|
+
else
|
629
|
+
{
|
630
|
+
if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET
|
631
|
+
&& ctx->re != re)
|
632
|
+
/* Two ranges are not allowed to share and endpoint. */
|
633
|
+
status = REG_ERANGE;
|
634
|
+
min = max = wc;
|
635
|
+
re += clen;
|
636
|
+
}
|
637
|
+
|
638
|
+
if (status != REG_OK)
|
639
|
+
break;
|
640
|
+
|
641
|
+
if (class && negate)
|
642
|
+
if (*num_neg_classes >= MAX_NEG_CLASSES)
|
643
|
+
status = REG_ESPACE;
|
644
|
+
else
|
645
|
+
neg_classes[(*num_neg_classes)++] = class;
|
646
|
+
else if (!skip)
|
647
|
+
{
|
648
|
+
status = tre_new_item(ctx->mem, min, max, &i, &max_i, items);
|
649
|
+
if (status != REG_OK)
|
650
|
+
break;
|
651
|
+
((tre_literal_t*)((*items)[i-1])->obj)->class = class;
|
652
|
+
}
|
653
|
+
|
654
|
+
/* Add opposite-case counterpoints if REG_ICASE is present.
|
655
|
+
This is broken if there are more than two "same" characters. */
|
656
|
+
if (ctx->cflags & REG_ICASE && !class && status == REG_OK && !skip)
|
657
|
+
{
|
658
|
+
tre_cint_t cmin, ccurr;
|
659
|
+
|
660
|
+
while (min <= max)
|
661
|
+
{
|
662
|
+
if (tre_islower(min))
|
663
|
+
{
|
664
|
+
cmin = ccurr = tre_toupper(min++);
|
665
|
+
while (tre_islower(min) && tre_toupper(min) == ccurr + 1
|
666
|
+
&& min <= max)
|
667
|
+
ccurr = tre_toupper(min++);
|
668
|
+
status = tre_new_item(ctx->mem, cmin, ccurr,
|
669
|
+
&i, &max_i, items);
|
670
|
+
}
|
671
|
+
else if (tre_isupper(min))
|
672
|
+
{
|
673
|
+
cmin = ccurr = tre_tolower(min++);
|
674
|
+
while (tre_isupper(min) && tre_tolower(min) == ccurr + 1
|
675
|
+
&& min <= max)
|
676
|
+
ccurr = tre_tolower(min++);
|
677
|
+
status = tre_new_item(ctx->mem, cmin, ccurr,
|
678
|
+
&i, &max_i, items);
|
679
|
+
}
|
680
|
+
else min++;
|
681
|
+
if (status != REG_OK)
|
682
|
+
break;
|
683
|
+
}
|
684
|
+
if (status != REG_OK)
|
685
|
+
break;
|
686
|
+
}
|
687
|
+
}
|
688
|
+
}
|
689
|
+
*num_items = i;
|
690
|
+
*items_size = max_i;
|
691
|
+
ctx->re = re;
|
692
|
+
return status;
|
693
|
+
}
|
694
|
+
|
695
|
+
static reg_errcode_t
|
696
|
+
tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
|
697
|
+
{
|
698
|
+
tre_ast_node_t *node = NULL;
|
699
|
+
int negate = 0;
|
700
|
+
reg_errcode_t status = REG_OK;
|
701
|
+
tre_ast_node_t **items, *u, *n;
|
702
|
+
int i = 0, j, max_i = 32, curr_max, curr_min;
|
703
|
+
tre_ctype_t neg_classes[MAX_NEG_CLASSES];
|
704
|
+
int num_neg_classes = 0;
|
705
|
+
|
706
|
+
/* Start off with an array of `max_i' elements. */
|
707
|
+
items = xmalloc(sizeof(*items) * max_i);
|
708
|
+
if (items == NULL)
|
709
|
+
return REG_ESPACE;
|
710
|
+
|
711
|
+
if (*ctx->re == CHAR_CARET)
|
712
|
+
{
|
713
|
+
negate = 1;
|
714
|
+
ctx->re++;
|
715
|
+
}
|
716
|
+
|
717
|
+
status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes,
|
718
|
+
&items, &i, &max_i);
|
719
|
+
|
720
|
+
if (status != REG_OK)
|
721
|
+
goto parse_bracket_done;
|
722
|
+
|
723
|
+
/* Sort the array if we need to negate it. */
|
724
|
+
if (negate)
|
725
|
+
qsort(items, (unsigned)i, sizeof(*items), tre_compare_items);
|
726
|
+
|
727
|
+
curr_max = curr_min = 0;
|
728
|
+
/* Build a union of the items in the array, negated if necessary. */
|
729
|
+
for (j = 0; j < i && status == REG_OK; j++)
|
730
|
+
{
|
731
|
+
int min, max;
|
732
|
+
tre_literal_t *l = items[j]->obj;
|
733
|
+
min = l->code_min;
|
734
|
+
max = l->code_max;
|
735
|
+
|
736
|
+
if (negate)
|
737
|
+
{
|
738
|
+
if (min < curr_max)
|
739
|
+
{
|
740
|
+
/* Overlap. */
|
741
|
+
curr_max = MAX(max + 1, curr_max);
|
742
|
+
l = NULL;
|
743
|
+
}
|
744
|
+
else
|
745
|
+
{
|
746
|
+
/* No overlap. */
|
747
|
+
curr_max = min - 1;
|
748
|
+
if (curr_max >= curr_min)
|
749
|
+
{
|
750
|
+
l->code_min = curr_min;
|
751
|
+
l->code_max = curr_max;
|
752
|
+
}
|
753
|
+
else
|
754
|
+
{
|
755
|
+
l = NULL;
|
756
|
+
}
|
757
|
+
curr_min = curr_max = max + 1;
|
758
|
+
}
|
759
|
+
}
|
760
|
+
|
761
|
+
if (l != NULL)
|
762
|
+
{
|
763
|
+
int k;
|
764
|
+
l->position = ctx->position;
|
765
|
+
if (num_neg_classes > 0)
|
766
|
+
{
|
767
|
+
l->neg_classes = tre_mem_alloc(ctx->mem,
|
768
|
+
(sizeof(l->neg_classes)
|
769
|
+
* (num_neg_classes + 1)));
|
770
|
+
if (l->neg_classes == NULL)
|
771
|
+
{
|
772
|
+
status = REG_ESPACE;
|
773
|
+
break;
|
774
|
+
}
|
775
|
+
for (k = 0; k < num_neg_classes; k++)
|
776
|
+
l->neg_classes[k] = neg_classes[k];
|
777
|
+
l->neg_classes[k] = (tre_ctype_t)0;
|
778
|
+
}
|
779
|
+
else
|
780
|
+
l->neg_classes = NULL;
|
781
|
+
if (node == NULL)
|
782
|
+
node = items[j];
|
783
|
+
else
|
784
|
+
{
|
785
|
+
u = tre_ast_new_union(ctx->mem, node, items[j]);
|
786
|
+
if (u == NULL)
|
787
|
+
status = REG_ESPACE;
|
788
|
+
node = u;
|
789
|
+
}
|
790
|
+
}
|
791
|
+
}
|
792
|
+
|
793
|
+
if (status != REG_OK)
|
794
|
+
goto parse_bracket_done;
|
795
|
+
|
796
|
+
if (negate)
|
797
|
+
{
|
798
|
+
int k;
|
799
|
+
n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position);
|
800
|
+
if (n == NULL)
|
801
|
+
status = REG_ESPACE;
|
802
|
+
else
|
803
|
+
{
|
804
|
+
tre_literal_t *l = n->obj;
|
805
|
+
if (num_neg_classes > 0)
|
806
|
+
{
|
807
|
+
l->neg_classes = tre_mem_alloc(ctx->mem,
|
808
|
+
(sizeof(l->neg_classes)
|
809
|
+
* (num_neg_classes + 1)));
|
810
|
+
if (l->neg_classes == NULL)
|
811
|
+
{
|
812
|
+
status = REG_ESPACE;
|
813
|
+
goto parse_bracket_done;
|
814
|
+
}
|
815
|
+
for (k = 0; k < num_neg_classes; k++)
|
816
|
+
l->neg_classes[k] = neg_classes[k];
|
817
|
+
l->neg_classes[k] = (tre_ctype_t)0;
|
818
|
+
}
|
819
|
+
else
|
820
|
+
l->neg_classes = NULL;
|
821
|
+
if (node == NULL)
|
822
|
+
node = n;
|
823
|
+
else
|
824
|
+
{
|
825
|
+
u = tre_ast_new_union(ctx->mem, node, n);
|
826
|
+
if (u == NULL)
|
827
|
+
status = REG_ESPACE;
|
828
|
+
node = u;
|
829
|
+
}
|
830
|
+
}
|
831
|
+
}
|
832
|
+
|
833
|
+
if (status != REG_OK)
|
834
|
+
goto parse_bracket_done;
|
835
|
+
|
836
|
+
#ifdef TRE_DEBUG
|
837
|
+
tre_ast_print(node);
|
838
|
+
#endif /* TRE_DEBUG */
|
839
|
+
|
840
|
+
parse_bracket_done:
|
841
|
+
xfree(items);
|
842
|
+
ctx->position++;
|
843
|
+
*result = node;
|
844
|
+
return status;
|
845
|
+
}
|
846
|
+
|
847
|
+
|
848
|
+
/* Parses a positive decimal integer. Returns -1 if the string does not
|
849
|
+
contain a valid number. */
|
850
|
+
static int
|
851
|
+
tre_parse_int(const char **regex)
|
852
|
+
{
|
853
|
+
int num = -1;
|
854
|
+
const char *r = *regex;
|
855
|
+
while (*r-'0'<10U)
|
856
|
+
{
|
857
|
+
if (num < 0)
|
858
|
+
num = 0;
|
859
|
+
num = num * 10 + *r - '0';
|
860
|
+
r++;
|
861
|
+
}
|
862
|
+
*regex = r;
|
863
|
+
return num;
|
864
|
+
}
|
865
|
+
|
866
|
+
|
867
|
+
static reg_errcode_t
|
868
|
+
tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
|
869
|
+
{
|
870
|
+
int min, max;
|
871
|
+
const char *r = ctx->re;
|
872
|
+
int minimal = 0;
|
873
|
+
|
874
|
+
/* Parse number (minimum repetition count). */
|
875
|
+
min = -1;
|
876
|
+
if (*r >= '0' && *r <= '9') {
|
877
|
+
min = tre_parse_int(&r);
|
878
|
+
}
|
879
|
+
|
880
|
+
/* Parse comma and second number (maximum repetition count). */
|
881
|
+
max = min;
|
882
|
+
if (*r == CHAR_COMMA)
|
883
|
+
{
|
884
|
+
r++;
|
885
|
+
max = tre_parse_int(&r);
|
886
|
+
}
|
887
|
+
|
888
|
+
/* Check that the repeat counts are sane. */
|
889
|
+
if ((max >= 0 && min > max) || max > RE_DUP_MAX)
|
890
|
+
return REG_BADBR;
|
891
|
+
|
892
|
+
/* Missing }. */
|
893
|
+
if (!*r)
|
894
|
+
return REG_EBRACE;
|
895
|
+
|
896
|
+
/* Empty contents of {}. */
|
897
|
+
if (r == ctx->re)
|
898
|
+
return REG_BADBR;
|
899
|
+
|
900
|
+
/* Parse the ending '}' or '\}'.*/
|
901
|
+
if (ctx->cflags & REG_EXTENDED)
|
902
|
+
{
|
903
|
+
if (*r != CHAR_RBRACE)
|
904
|
+
return REG_BADBR;
|
905
|
+
r++;
|
906
|
+
}
|
907
|
+
else
|
908
|
+
{
|
909
|
+
if (*r != CHAR_BACKSLASH || *(r + 1) != CHAR_RBRACE)
|
910
|
+
return REG_BADBR;
|
911
|
+
r += 2;
|
912
|
+
}
|
913
|
+
|
914
|
+
/* Create the AST node(s). */
|
915
|
+
if (min == 0 && max == 0)
|
916
|
+
{
|
917
|
+
*result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
|
918
|
+
if (*result == NULL)
|
919
|
+
return REG_ESPACE;
|
920
|
+
}
|
921
|
+
else
|
922
|
+
{
|
923
|
+
if (min < 0 && max < 0)
|
924
|
+
/* Only approximate parameters set, no repetitions. */
|
925
|
+
min = max = 1;
|
926
|
+
|
927
|
+
*result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal);
|
928
|
+
if (!*result)
|
929
|
+
return REG_ESPACE;
|
930
|
+
}
|
931
|
+
|
932
|
+
ctx->re = r;
|
933
|
+
return REG_OK;
|
934
|
+
}
|
935
|
+
|
936
|
+
typedef enum {
|
937
|
+
PARSE_RE = 0,
|
938
|
+
PARSE_ATOM,
|
939
|
+
PARSE_MARK_FOR_SUBMATCH,
|
940
|
+
PARSE_BRANCH,
|
941
|
+
PARSE_PIECE,
|
942
|
+
PARSE_CATENATION,
|
943
|
+
PARSE_POST_CATENATION,
|
944
|
+
PARSE_UNION,
|
945
|
+
PARSE_POST_UNION,
|
946
|
+
PARSE_POSTFIX,
|
947
|
+
PARSE_RESTORE_CFLAGS
|
948
|
+
} tre_parse_re_stack_symbol_t;
|
949
|
+
|
950
|
+
|
951
|
+
static reg_errcode_t
|
952
|
+
tre_parse(tre_parse_ctx_t *ctx)
|
953
|
+
{
|
954
|
+
tre_ast_node_t *result = NULL;
|
955
|
+
tre_parse_re_stack_symbol_t symbol;
|
956
|
+
reg_errcode_t status = REG_OK;
|
957
|
+
tre_stack_t *stack = ctx->stack;
|
958
|
+
int bottom = tre_stack_num_objects(stack);
|
959
|
+
int depth = 0;
|
960
|
+
wchar_t wc;
|
961
|
+
int clen;
|
962
|
+
|
963
|
+
if (!ctx->nofirstsub)
|
964
|
+
{
|
965
|
+
STACK_PUSH(stack, int, ctx->submatch_id);
|
966
|
+
STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH);
|
967
|
+
ctx->submatch_id++;
|
968
|
+
}
|
969
|
+
STACK_PUSH(stack, int, PARSE_RE);
|
970
|
+
ctx->re_start = ctx->re;
|
971
|
+
|
972
|
+
|
973
|
+
/* The following is basically just a recursive descent parser. I use
|
974
|
+
an explicit stack instead of recursive functions mostly because of
|
975
|
+
two reasons: compatibility with systems which have an overflowable
|
976
|
+
call stack, and efficiency (both in lines of code and speed). */
|
977
|
+
while (tre_stack_num_objects(stack) > bottom && status == REG_OK)
|
978
|
+
{
|
979
|
+
if (status != REG_OK)
|
980
|
+
break;
|
981
|
+
symbol = tre_stack_pop_int(stack);
|
982
|
+
switch (symbol)
|
983
|
+
{
|
984
|
+
case PARSE_RE:
|
985
|
+
/* Parse a full regexp. A regexp is one or more branches,
|
986
|
+
separated by the union operator `|'. */
|
987
|
+
if (ctx->cflags & REG_EXTENDED)
|
988
|
+
STACK_PUSHX(stack, int, PARSE_UNION);
|
989
|
+
STACK_PUSHX(stack, int, PARSE_BRANCH);
|
990
|
+
break;
|
991
|
+
|
992
|
+
case PARSE_BRANCH:
|
993
|
+
/* Parse a branch. A branch is one or more pieces, concatenated.
|
994
|
+
A piece is an atom possibly followed by a postfix operator. */
|
995
|
+
STACK_PUSHX(stack, int, PARSE_CATENATION);
|
996
|
+
STACK_PUSHX(stack, int, PARSE_PIECE);
|
997
|
+
break;
|
998
|
+
|
999
|
+
case PARSE_PIECE:
|
1000
|
+
/* Parse a piece. A piece is an atom possibly followed by one
|
1001
|
+
or more postfix operators. */
|
1002
|
+
STACK_PUSHX(stack, int, PARSE_POSTFIX);
|
1003
|
+
STACK_PUSHX(stack, int, PARSE_ATOM);
|
1004
|
+
break;
|
1005
|
+
|
1006
|
+
case PARSE_CATENATION:
|
1007
|
+
/* If the expression has not ended, parse another piece. */
|
1008
|
+
{
|
1009
|
+
tre_char_t c;
|
1010
|
+
if (!*ctx->re)
|
1011
|
+
break;
|
1012
|
+
c = *ctx->re;
|
1013
|
+
if (ctx->cflags & REG_EXTENDED && c == CHAR_PIPE)
|
1014
|
+
break;
|
1015
|
+
if ((ctx->cflags & REG_EXTENDED
|
1016
|
+
&& c == CHAR_RPAREN && depth > 0)
|
1017
|
+
|| (!(ctx->cflags & REG_EXTENDED)
|
1018
|
+
&& (c == CHAR_BACKSLASH
|
1019
|
+
&& *(ctx->re + 1) == CHAR_RPAREN)))
|
1020
|
+
{
|
1021
|
+
if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
|
1022
|
+
status = REG_EPAREN;
|
1023
|
+
depth--;
|
1024
|
+
if (!(ctx->cflags & REG_EXTENDED))
|
1025
|
+
ctx->re += 2;
|
1026
|
+
break;
|
1027
|
+
}
|
1028
|
+
|
1029
|
+
{
|
1030
|
+
/* Default case, left associative concatenation. */
|
1031
|
+
STACK_PUSHX(stack, int, PARSE_CATENATION);
|
1032
|
+
STACK_PUSHX(stack, voidptr, result);
|
1033
|
+
STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
|
1034
|
+
STACK_PUSHX(stack, int, PARSE_PIECE);
|
1035
|
+
}
|
1036
|
+
break;
|
1037
|
+
}
|
1038
|
+
|
1039
|
+
case PARSE_POST_CATENATION:
|
1040
|
+
{
|
1041
|
+
tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
|
1042
|
+
tre_ast_node_t *tmp_node;
|
1043
|
+
tmp_node = tre_ast_new_catenation(ctx->mem, tree, result);
|
1044
|
+
if (!tmp_node)
|
1045
|
+
return REG_ESPACE;
|
1046
|
+
result = tmp_node;
|
1047
|
+
break;
|
1048
|
+
}
|
1049
|
+
|
1050
|
+
case PARSE_UNION:
|
1051
|
+
switch (*ctx->re)
|
1052
|
+
{
|
1053
|
+
case CHAR_PIPE:
|
1054
|
+
STACK_PUSHX(stack, int, PARSE_UNION);
|
1055
|
+
STACK_PUSHX(stack, voidptr, result);
|
1056
|
+
STACK_PUSHX(stack, int, PARSE_POST_UNION);
|
1057
|
+
STACK_PUSHX(stack, int, PARSE_BRANCH);
|
1058
|
+
ctx->re++;
|
1059
|
+
break;
|
1060
|
+
|
1061
|
+
case CHAR_RPAREN:
|
1062
|
+
ctx->re++;
|
1063
|
+
break;
|
1064
|
+
|
1065
|
+
default:
|
1066
|
+
break;
|
1067
|
+
}
|
1068
|
+
break;
|
1069
|
+
|
1070
|
+
case PARSE_POST_UNION:
|
1071
|
+
{
|
1072
|
+
tre_ast_node_t *tmp_node;
|
1073
|
+
tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
|
1074
|
+
tmp_node = tre_ast_new_union(ctx->mem, tree, result);
|
1075
|
+
if (!tmp_node)
|
1076
|
+
return REG_ESPACE;
|
1077
|
+
result = tmp_node;
|
1078
|
+
break;
|
1079
|
+
}
|
1080
|
+
|
1081
|
+
case PARSE_POSTFIX:
|
1082
|
+
/* Parse postfix operators. */
|
1083
|
+
switch (*ctx->re)
|
1084
|
+
{
|
1085
|
+
case CHAR_PLUS:
|
1086
|
+
case CHAR_QUESTIONMARK:
|
1087
|
+
if (!(ctx->cflags & REG_EXTENDED))
|
1088
|
+
break;
|
1089
|
+
/*FALLTHROUGH*/
|
1090
|
+
case CHAR_STAR:
|
1091
|
+
{
|
1092
|
+
tre_ast_node_t *tmp_node;
|
1093
|
+
int minimal = 0;
|
1094
|
+
int rep_min = 0;
|
1095
|
+
int rep_max = -1;
|
1096
|
+
|
1097
|
+
if (*ctx->re == CHAR_PLUS)
|
1098
|
+
rep_min = 1;
|
1099
|
+
if (*ctx->re == CHAR_QUESTIONMARK)
|
1100
|
+
rep_max = 1;
|
1101
|
+
|
1102
|
+
ctx->re++;
|
1103
|
+
tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max,
|
1104
|
+
minimal);
|
1105
|
+
if (tmp_node == NULL)
|
1106
|
+
return REG_ESPACE;
|
1107
|
+
result = tmp_node;
|
1108
|
+
STACK_PUSHX(stack, int, PARSE_POSTFIX);
|
1109
|
+
}
|
1110
|
+
break;
|
1111
|
+
|
1112
|
+
case CHAR_BACKSLASH:
|
1113
|
+
/* "\{" is special without REG_EXTENDED */
|
1114
|
+
if (!(ctx->cflags & REG_EXTENDED)
|
1115
|
+
&& *(ctx->re + 1) == CHAR_LBRACE)
|
1116
|
+
{
|
1117
|
+
ctx->re++;
|
1118
|
+
goto parse_brace;
|
1119
|
+
}
|
1120
|
+
else
|
1121
|
+
break;
|
1122
|
+
|
1123
|
+
case CHAR_LBRACE:
|
1124
|
+
/* "{" is literal without REG_EXTENDED */
|
1125
|
+
if (!(ctx->cflags & REG_EXTENDED))
|
1126
|
+
break;
|
1127
|
+
|
1128
|
+
parse_brace:
|
1129
|
+
ctx->re++;
|
1130
|
+
|
1131
|
+
status = tre_parse_bound(ctx, &result);
|
1132
|
+
if (status != REG_OK)
|
1133
|
+
return status;
|
1134
|
+
STACK_PUSHX(stack, int, PARSE_POSTFIX);
|
1135
|
+
break;
|
1136
|
+
}
|
1137
|
+
break;
|
1138
|
+
|
1139
|
+
case PARSE_ATOM:
|
1140
|
+
/* Parse an atom. An atom is a regular expression enclosed in `()',
|
1141
|
+
an empty set of `()', a bracket expression, `.', `^', `$',
|
1142
|
+
a `\' followed by a character, or a single character. */
|
1143
|
+
|
1144
|
+
switch (*ctx->re)
|
1145
|
+
{
|
1146
|
+
case CHAR_LPAREN: /* parenthesized subexpression */
|
1147
|
+
|
1148
|
+
if (ctx->cflags & REG_EXTENDED)
|
1149
|
+
{
|
1150
|
+
lparen:
|
1151
|
+
depth++;
|
1152
|
+
{
|
1153
|
+
ctx->re++;
|
1154
|
+
/* First parse a whole RE, then mark the resulting tree
|
1155
|
+
for submatching. */
|
1156
|
+
STACK_PUSHX(stack, int, ctx->submatch_id);
|
1157
|
+
STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
|
1158
|
+
STACK_PUSHX(stack, int, PARSE_RE);
|
1159
|
+
ctx->submatch_id++;
|
1160
|
+
}
|
1161
|
+
}
|
1162
|
+
else
|
1163
|
+
goto parse_literal;
|
1164
|
+
break;
|
1165
|
+
|
1166
|
+
case CHAR_LBRACKET: /* bracket expression */
|
1167
|
+
ctx->re++;
|
1168
|
+
status = tre_parse_bracket(ctx, &result);
|
1169
|
+
if (status != REG_OK)
|
1170
|
+
return status;
|
1171
|
+
break;
|
1172
|
+
|
1173
|
+
case CHAR_BACKSLASH:
|
1174
|
+
/* If this is "\(" or "\)" chew off the backslash and
|
1175
|
+
try again. */
|
1176
|
+
if (!(ctx->cflags & REG_EXTENDED) && *(ctx->re + 1) == CHAR_LPAREN)
|
1177
|
+
{
|
1178
|
+
ctx->re++;
|
1179
|
+
goto lparen;
|
1180
|
+
}
|
1181
|
+
if (!(ctx->cflags & REG_EXTENDED) && *(ctx->re + 1) == CHAR_RPAREN)
|
1182
|
+
{
|
1183
|
+
goto empty_atom;
|
1184
|
+
}
|
1185
|
+
|
1186
|
+
/* If a macro is used, parse the expanded macro recursively. */
|
1187
|
+
{
|
1188
|
+
const char *buf = tre_expand_macro(ctx->re + 1);
|
1189
|
+
if (buf)
|
1190
|
+
{
|
1191
|
+
tre_parse_ctx_t subctx;
|
1192
|
+
memcpy(&subctx, ctx, sizeof(subctx));
|
1193
|
+
subctx.re = buf;
|
1194
|
+
subctx.nofirstsub = 1;
|
1195
|
+
status = tre_parse(&subctx);
|
1196
|
+
if (status != REG_OK)
|
1197
|
+
return status;
|
1198
|
+
ctx->re += 2;
|
1199
|
+
ctx->position = subctx.position;
|
1200
|
+
result = subctx.result;
|
1201
|
+
break;
|
1202
|
+
}
|
1203
|
+
}
|
1204
|
+
|
1205
|
+
if (!ctx->re[1])
|
1206
|
+
/* Trailing backslash. */
|
1207
|
+
return REG_EESCAPE;
|
1208
|
+
|
1209
|
+
ctx->re++;
|
1210
|
+
switch (*ctx->re)
|
1211
|
+
{
|
1212
|
+
case 'b':
|
1213
|
+
result = tre_ast_new_literal(ctx->mem, ASSERTION,
|
1214
|
+
ASSERT_AT_WB, -1);
|
1215
|
+
ctx->re++;
|
1216
|
+
break;
|
1217
|
+
case 'B':
|
1218
|
+
result = tre_ast_new_literal(ctx->mem, ASSERTION,
|
1219
|
+
ASSERT_AT_WB_NEG, -1);
|
1220
|
+
ctx->re++;
|
1221
|
+
break;
|
1222
|
+
case '<':
|
1223
|
+
result = tre_ast_new_literal(ctx->mem, ASSERTION,
|
1224
|
+
ASSERT_AT_BOW, -1);
|
1225
|
+
ctx->re++;
|
1226
|
+
break;
|
1227
|
+
case '>':
|
1228
|
+
result = tre_ast_new_literal(ctx->mem, ASSERTION,
|
1229
|
+
ASSERT_AT_EOW, -1);
|
1230
|
+
ctx->re++;
|
1231
|
+
break;
|
1232
|
+
case 'x':
|
1233
|
+
ctx->re++;
|
1234
|
+
if (ctx->re[0] != CHAR_LBRACE)
|
1235
|
+
{
|
1236
|
+
/* 8 bit hex char. */
|
1237
|
+
char tmp[3] = {0, 0, 0};
|
1238
|
+
long val;
|
1239
|
+
|
1240
|
+
if (tre_isxdigit(ctx->re[0]))
|
1241
|
+
{
|
1242
|
+
tmp[0] = (char)ctx->re[0];
|
1243
|
+
ctx->re++;
|
1244
|
+
}
|
1245
|
+
if (tre_isxdigit(ctx->re[0]))
|
1246
|
+
{
|
1247
|
+
tmp[1] = (char)ctx->re[0];
|
1248
|
+
ctx->re++;
|
1249
|
+
}
|
1250
|
+
val = strtol(tmp, NULL, 16);
|
1251
|
+
result = tre_ast_new_literal(ctx->mem, (int)val,
|
1252
|
+
(int)val, ctx->position);
|
1253
|
+
ctx->position++;
|
1254
|
+
break;
|
1255
|
+
}
|
1256
|
+
else if (*ctx->re)
|
1257
|
+
{
|
1258
|
+
/* Wide char. */
|
1259
|
+
char tmp[32];
|
1260
|
+
long val;
|
1261
|
+
int i = 0;
|
1262
|
+
ctx->re++;
|
1263
|
+
while (*ctx->re && i < sizeof tmp)
|
1264
|
+
{
|
1265
|
+
if (ctx->re[0] == CHAR_RBRACE)
|
1266
|
+
break;
|
1267
|
+
if (tre_isxdigit(ctx->re[0]))
|
1268
|
+
{
|
1269
|
+
tmp[i] = (char)ctx->re[0];
|
1270
|
+
i++;
|
1271
|
+
ctx->re++;
|
1272
|
+
continue;
|
1273
|
+
}
|
1274
|
+
return REG_EBRACE;
|
1275
|
+
}
|
1276
|
+
ctx->re++;
|
1277
|
+
tmp[i] = 0;
|
1278
|
+
val = strtol(tmp, NULL, 16);
|
1279
|
+
result = tre_ast_new_literal(ctx->mem, (int)val, (int)val,
|
1280
|
+
ctx->position);
|
1281
|
+
ctx->position++;
|
1282
|
+
break;
|
1283
|
+
}
|
1284
|
+
/*FALLTHROUGH*/
|
1285
|
+
|
1286
|
+
default:
|
1287
|
+
if (tre_isdigit(*ctx->re))
|
1288
|
+
{
|
1289
|
+
/* Back reference. */
|
1290
|
+
int val = *ctx->re - '0';
|
1291
|
+
result = tre_ast_new_literal(ctx->mem, BACKREF, val,
|
1292
|
+
ctx->position);
|
1293
|
+
if (result == NULL)
|
1294
|
+
return REG_ESPACE;
|
1295
|
+
ctx->position++;
|
1296
|
+
ctx->max_backref = MAX(val, ctx->max_backref);
|
1297
|
+
ctx->re++;
|
1298
|
+
}
|
1299
|
+
else
|
1300
|
+
{
|
1301
|
+
/* Escaped character. */
|
1302
|
+
result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
|
1303
|
+
ctx->position);
|
1304
|
+
ctx->position++;
|
1305
|
+
ctx->re++;
|
1306
|
+
}
|
1307
|
+
break;
|
1308
|
+
}
|
1309
|
+
if (result == NULL)
|
1310
|
+
return REG_ESPACE;
|
1311
|
+
break;
|
1312
|
+
|
1313
|
+
case CHAR_PERIOD: /* the any-symbol */
|
1314
|
+
if (ctx->cflags & REG_NEWLINE)
|
1315
|
+
{
|
1316
|
+
tre_ast_node_t *tmp1;
|
1317
|
+
tre_ast_node_t *tmp2;
|
1318
|
+
tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n' - 1,
|
1319
|
+
ctx->position);
|
1320
|
+
if (!tmp1)
|
1321
|
+
return REG_ESPACE;
|
1322
|
+
tmp2 = tre_ast_new_literal(ctx->mem, '\n' + 1, TRE_CHAR_MAX,
|
1323
|
+
ctx->position + 1);
|
1324
|
+
if (!tmp2)
|
1325
|
+
return REG_ESPACE;
|
1326
|
+
result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
|
1327
|
+
if (!result)
|
1328
|
+
return REG_ESPACE;
|
1329
|
+
ctx->position += 2;
|
1330
|
+
}
|
1331
|
+
else
|
1332
|
+
{
|
1333
|
+
result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX,
|
1334
|
+
ctx->position);
|
1335
|
+
if (!result)
|
1336
|
+
return REG_ESPACE;
|
1337
|
+
ctx->position++;
|
1338
|
+
}
|
1339
|
+
ctx->re++;
|
1340
|
+
break;
|
1341
|
+
|
1342
|
+
case CHAR_CARET: /* beginning of line assertion */
|
1343
|
+
/* '^' has a special meaning everywhere in EREs, and at
|
1344
|
+
beginning of BRE. */
|
1345
|
+
if (ctx->cflags & REG_EXTENDED
|
1346
|
+
|| ctx->re == ctx->re_start)
|
1347
|
+
{
|
1348
|
+
if (!(ctx->cflags & REG_EXTENDED))
|
1349
|
+
STACK_PUSHX(stack, int, PARSE_CATENATION);
|
1350
|
+
result = tre_ast_new_literal(ctx->mem, ASSERTION,
|
1351
|
+
ASSERT_AT_BOL, -1);
|
1352
|
+
if (result == NULL)
|
1353
|
+
return REG_ESPACE;
|
1354
|
+
ctx->re++;
|
1355
|
+
}
|
1356
|
+
else
|
1357
|
+
goto parse_literal;
|
1358
|
+
break;
|
1359
|
+
|
1360
|
+
case CHAR_DOLLAR: /* end of line assertion. */
|
1361
|
+
/* '$' is special everywhere in EREs, and in the end of the
|
1362
|
+
string in BREs. */
|
1363
|
+
if (ctx->cflags & REG_EXTENDED
|
1364
|
+
|| !*(ctx->re + 1))
|
1365
|
+
{
|
1366
|
+
result = tre_ast_new_literal(ctx->mem, ASSERTION,
|
1367
|
+
ASSERT_AT_EOL, -1);
|
1368
|
+
if (result == NULL)
|
1369
|
+
return REG_ESPACE;
|
1370
|
+
ctx->re++;
|
1371
|
+
}
|
1372
|
+
else
|
1373
|
+
goto parse_literal;
|
1374
|
+
break;
|
1375
|
+
|
1376
|
+
case CHAR_RPAREN:
|
1377
|
+
if (!depth)
|
1378
|
+
goto parse_literal;
|
1379
|
+
case CHAR_STAR:
|
1380
|
+
case CHAR_PIPE:
|
1381
|
+
case CHAR_LBRACE:
|
1382
|
+
case CHAR_PLUS:
|
1383
|
+
case CHAR_QUESTIONMARK:
|
1384
|
+
if (!(ctx->cflags & REG_EXTENDED))
|
1385
|
+
goto parse_literal;
|
1386
|
+
|
1387
|
+
case 0:
|
1388
|
+
empty_atom:
|
1389
|
+
result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
|
1390
|
+
if (!result)
|
1391
|
+
return REG_ESPACE;
|
1392
|
+
break;
|
1393
|
+
|
1394
|
+
default:
|
1395
|
+
parse_literal:
|
1396
|
+
|
1397
|
+
clen = mbtowc(&wc, ctx->re, -1);
|
1398
|
+
if (clen<0) clen=1, wc=WEOF;
|
1399
|
+
|
1400
|
+
/* Note that we can't use an tre_isalpha() test here, since there
|
1401
|
+
may be characters which are alphabetic but neither upper or
|
1402
|
+
lower case. */
|
1403
|
+
if (ctx->cflags & REG_ICASE
|
1404
|
+
&& (tre_isupper(wc) || tre_islower(wc)))
|
1405
|
+
{
|
1406
|
+
tre_ast_node_t *tmp1;
|
1407
|
+
tre_ast_node_t *tmp2;
|
1408
|
+
|
1409
|
+
/* XXX - Can there be more than one opposite-case
|
1410
|
+
counterpoints for some character in some locale? Or
|
1411
|
+
more than two characters which all should be regarded
|
1412
|
+
the same character if case is ignored? If yes, there
|
1413
|
+
does not seem to be a portable way to detect it. I guess
|
1414
|
+
that at least for multi-character collating elements there
|
1415
|
+
could be several opposite-case counterpoints, but they
|
1416
|
+
cannot be supported portably anyway. */
|
1417
|
+
tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc),
|
1418
|
+
tre_toupper(wc),
|
1419
|
+
ctx->position);
|
1420
|
+
if (!tmp1)
|
1421
|
+
return REG_ESPACE;
|
1422
|
+
tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc),
|
1423
|
+
tre_tolower(wc),
|
1424
|
+
ctx->position);
|
1425
|
+
if (!tmp2)
|
1426
|
+
return REG_ESPACE;
|
1427
|
+
result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
|
1428
|
+
if (!result)
|
1429
|
+
return REG_ESPACE;
|
1430
|
+
}
|
1431
|
+
else
|
1432
|
+
{
|
1433
|
+
result = tre_ast_new_literal(ctx->mem, wc, wc,
|
1434
|
+
ctx->position);
|
1435
|
+
if (!result)
|
1436
|
+
return REG_ESPACE;
|
1437
|
+
}
|
1438
|
+
ctx->position++;
|
1439
|
+
ctx->re += clen;
|
1440
|
+
break;
|
1441
|
+
}
|
1442
|
+
break;
|
1443
|
+
|
1444
|
+
case PARSE_MARK_FOR_SUBMATCH:
|
1445
|
+
{
|
1446
|
+
int submatch_id = tre_stack_pop_int(stack);
|
1447
|
+
|
1448
|
+
if (result->submatch_id >= 0)
|
1449
|
+
{
|
1450
|
+
tre_ast_node_t *n, *tmp_node;
|
1451
|
+
n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
|
1452
|
+
if (n == NULL)
|
1453
|
+
return REG_ESPACE;
|
1454
|
+
tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
|
1455
|
+
if (tmp_node == NULL)
|
1456
|
+
return REG_ESPACE;
|
1457
|
+
tmp_node->num_submatches = result->num_submatches;
|
1458
|
+
result = tmp_node;
|
1459
|
+
}
|
1460
|
+
result->submatch_id = submatch_id;
|
1461
|
+
result->num_submatches++;
|
1462
|
+
break;
|
1463
|
+
}
|
1464
|
+
|
1465
|
+
case PARSE_RESTORE_CFLAGS:
|
1466
|
+
ctx->cflags = tre_stack_pop_int(stack);
|
1467
|
+
break;
|
1468
|
+
|
1469
|
+
default:
|
1470
|
+
assert(0);
|
1471
|
+
break;
|
1472
|
+
}
|
1473
|
+
}
|
1474
|
+
|
1475
|
+
/* Check for missing closing parentheses. */
|
1476
|
+
if (depth > 0)
|
1477
|
+
return REG_EPAREN;
|
1478
|
+
|
1479
|
+
if (status == REG_OK)
|
1480
|
+
ctx->result = result;
|
1481
|
+
|
1482
|
+
return status;
|
1483
|
+
}
|
1484
|
+
|
1485
|
+
|
1486
|
+
|
1487
|
+
/***********************************************************************
|
1488
|
+
from tre-compile.c
|
1489
|
+
***********************************************************************/
|
1490
|
+
|
1491
|
+
|
1492
|
+
/*
|
1493
|
+
TODO:
|
1494
|
+
- Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
|
1495
|
+
function calls.
|
1496
|
+
*/
|
1497
|
+
|
1498
|
+
/*
|
1499
|
+
Algorithms to setup tags so that submatch addressing can be done.
|
1500
|
+
*/
|
1501
|
+
|
1502
|
+
|
1503
|
+
/* Inserts a catenation node to the root of the tree given in `node'.
|
1504
|
+
As the left child a new tag with number `tag_id' to `node' is added,
|
1505
|
+
and the right child is the old root. */
|
1506
|
+
static reg_errcode_t
|
1507
|
+
tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
|
1508
|
+
{
|
1509
|
+
tre_catenation_t *c;
|
1510
|
+
|
1511
|
+
c = tre_mem_alloc(mem, sizeof(*c));
|
1512
|
+
if (c == NULL)
|
1513
|
+
return REG_ESPACE;
|
1514
|
+
c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
|
1515
|
+
if (c->left == NULL)
|
1516
|
+
return REG_ESPACE;
|
1517
|
+
c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
|
1518
|
+
if (c->right == NULL)
|
1519
|
+
return REG_ESPACE;
|
1520
|
+
|
1521
|
+
c->right->obj = node->obj;
|
1522
|
+
c->right->type = node->type;
|
1523
|
+
c->right->nullable = -1;
|
1524
|
+
c->right->submatch_id = -1;
|
1525
|
+
c->right->firstpos = NULL;
|
1526
|
+
c->right->lastpos = NULL;
|
1527
|
+
c->right->num_tags = 0;
|
1528
|
+
node->obj = c;
|
1529
|
+
node->type = CATENATION;
|
1530
|
+
return REG_OK;
|
1531
|
+
}
|
1532
|
+
|
1533
|
+
/* Inserts a catenation node to the root of the tree given in `node'.
|
1534
|
+
As the right child a new tag with number `tag_id' to `node' is added,
|
1535
|
+
and the left child is the old root. */
|
1536
|
+
static reg_errcode_t
|
1537
|
+
tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
|
1538
|
+
{
|
1539
|
+
tre_catenation_t *c;
|
1540
|
+
|
1541
|
+
c = tre_mem_alloc(mem, sizeof(*c));
|
1542
|
+
if (c == NULL)
|
1543
|
+
return REG_ESPACE;
|
1544
|
+
c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
|
1545
|
+
if (c->right == NULL)
|
1546
|
+
return REG_ESPACE;
|
1547
|
+
c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
|
1548
|
+
if (c->left == NULL)
|
1549
|
+
return REG_ESPACE;
|
1550
|
+
|
1551
|
+
c->left->obj = node->obj;
|
1552
|
+
c->left->type = node->type;
|
1553
|
+
c->left->nullable = -1;
|
1554
|
+
c->left->submatch_id = -1;
|
1555
|
+
c->left->firstpos = NULL;
|
1556
|
+
c->left->lastpos = NULL;
|
1557
|
+
c->left->num_tags = 0;
|
1558
|
+
node->obj = c;
|
1559
|
+
node->type = CATENATION;
|
1560
|
+
return REG_OK;
|
1561
|
+
}
|
1562
|
+
|
1563
|
+
typedef enum {
|
1564
|
+
ADDTAGS_RECURSE,
|
1565
|
+
ADDTAGS_AFTER_ITERATION,
|
1566
|
+
ADDTAGS_AFTER_UNION_LEFT,
|
1567
|
+
ADDTAGS_AFTER_UNION_RIGHT,
|
1568
|
+
ADDTAGS_AFTER_CAT_LEFT,
|
1569
|
+
ADDTAGS_AFTER_CAT_RIGHT,
|
1570
|
+
ADDTAGS_SET_SUBMATCH_END
|
1571
|
+
} tre_addtags_symbol_t;
|
1572
|
+
|
1573
|
+
|
1574
|
+
typedef struct {
|
1575
|
+
int tag;
|
1576
|
+
int next_tag;
|
1577
|
+
} tre_tag_states_t;
|
1578
|
+
|
1579
|
+
|
1580
|
+
/* Go through `regset' and set submatch data for submatches that are
|
1581
|
+
using this tag. */
|
1582
|
+
static void
|
1583
|
+
tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
|
1584
|
+
{
|
1585
|
+
int i;
|
1586
|
+
|
1587
|
+
for (i = 0; regset[i] >= 0; i++)
|
1588
|
+
{
|
1589
|
+
int id = regset[i] / 2;
|
1590
|
+
int start = !(regset[i] % 2);
|
1591
|
+
if (start)
|
1592
|
+
tnfa->submatch_data[id].so_tag = tag;
|
1593
|
+
else
|
1594
|
+
tnfa->submatch_data[id].eo_tag = tag;
|
1595
|
+
}
|
1596
|
+
regset[0] = -1;
|
1597
|
+
}
|
1598
|
+
|
1599
|
+
|
1600
|
+
/* Adds tags to appropriate locations in the parse tree in `tree', so that
|
1601
|
+
subexpressions marked for submatch addressing can be traced. */
|
1602
|
+
static reg_errcode_t
|
1603
|
+
tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
|
1604
|
+
tre_tnfa_t *tnfa)
|
1605
|
+
{
|
1606
|
+
reg_errcode_t status = REG_OK;
|
1607
|
+
tre_addtags_symbol_t symbol;
|
1608
|
+
tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
|
1609
|
+
int bottom = tre_stack_num_objects(stack);
|
1610
|
+
/* True for first pass (counting number of needed tags) */
|
1611
|
+
int first_pass = (mem == NULL || tnfa == NULL);
|
1612
|
+
int *regset, *orig_regset;
|
1613
|
+
int num_tags = 0; /* Total number of tags. */
|
1614
|
+
int num_minimals = 0; /* Number of special minimal tags. */
|
1615
|
+
int tag = 0; /* The tag that is to be added next. */
|
1616
|
+
int next_tag = 1; /* Next tag to use after this one. */
|
1617
|
+
int *parents; /* Stack of submatches the current submatch is
|
1618
|
+
contained in. */
|
1619
|
+
int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
|
1620
|
+
tre_tag_states_t *saved_states;
|
1621
|
+
|
1622
|
+
tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
|
1623
|
+
if (!first_pass)
|
1624
|
+
{
|
1625
|
+
tnfa->end_tag = 0;
|
1626
|
+
tnfa->minimal_tags[0] = -1;
|
1627
|
+
}
|
1628
|
+
|
1629
|
+
regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
|
1630
|
+
if (regset == NULL)
|
1631
|
+
return REG_ESPACE;
|
1632
|
+
regset[0] = -1;
|
1633
|
+
orig_regset = regset;
|
1634
|
+
|
1635
|
+
parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
|
1636
|
+
if (parents == NULL)
|
1637
|
+
{
|
1638
|
+
xfree(regset);
|
1639
|
+
return REG_ESPACE;
|
1640
|
+
}
|
1641
|
+
parents[0] = -1;
|
1642
|
+
|
1643
|
+
saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
|
1644
|
+
if (saved_states == NULL)
|
1645
|
+
{
|
1646
|
+
xfree(regset);
|
1647
|
+
xfree(parents);
|
1648
|
+
return REG_ESPACE;
|
1649
|
+
}
|
1650
|
+
else
|
1651
|
+
{
|
1652
|
+
unsigned int i;
|
1653
|
+
for (i = 0; i <= tnfa->num_submatches; i++)
|
1654
|
+
saved_states[i].tag = -1;
|
1655
|
+
}
|
1656
|
+
|
1657
|
+
STACK_PUSH(stack, voidptr, node);
|
1658
|
+
STACK_PUSH(stack, int, ADDTAGS_RECURSE);
|
1659
|
+
|
1660
|
+
while (tre_stack_num_objects(stack) > bottom)
|
1661
|
+
{
|
1662
|
+
if (status != REG_OK)
|
1663
|
+
break;
|
1664
|
+
|
1665
|
+
symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
|
1666
|
+
switch (symbol)
|
1667
|
+
{
|
1668
|
+
|
1669
|
+
case ADDTAGS_SET_SUBMATCH_END:
|
1670
|
+
{
|
1671
|
+
int id = tre_stack_pop_int(stack);
|
1672
|
+
int i;
|
1673
|
+
|
1674
|
+
/* Add end of this submatch to regset. */
|
1675
|
+
for (i = 0; regset[i] >= 0; i++);
|
1676
|
+
regset[i] = id * 2 + 1;
|
1677
|
+
regset[i + 1] = -1;
|
1678
|
+
|
1679
|
+
/* Pop this submatch from the parents stack. */
|
1680
|
+
for (i = 0; parents[i] >= 0; i++);
|
1681
|
+
parents[i - 1] = -1;
|
1682
|
+
break;
|
1683
|
+
}
|
1684
|
+
|
1685
|
+
case ADDTAGS_RECURSE:
|
1686
|
+
node = tre_stack_pop_voidptr(stack);
|
1687
|
+
|
1688
|
+
if (node->submatch_id >= 0)
|
1689
|
+
{
|
1690
|
+
int id = node->submatch_id;
|
1691
|
+
int i;
|
1692
|
+
|
1693
|
+
|
1694
|
+
/* Add start of this submatch to regset. */
|
1695
|
+
for (i = 0; regset[i] >= 0; i++);
|
1696
|
+
regset[i] = id * 2;
|
1697
|
+
regset[i + 1] = -1;
|
1698
|
+
|
1699
|
+
if (!first_pass)
|
1700
|
+
{
|
1701
|
+
for (i = 0; parents[i] >= 0; i++);
|
1702
|
+
tnfa->submatch_data[id].parents = NULL;
|
1703
|
+
if (i > 0)
|
1704
|
+
{
|
1705
|
+
int *p = xmalloc(sizeof(*p) * (i + 1));
|
1706
|
+
if (p == NULL)
|
1707
|
+
{
|
1708
|
+
status = REG_ESPACE;
|
1709
|
+
break;
|
1710
|
+
}
|
1711
|
+
assert(tnfa->submatch_data[id].parents == NULL);
|
1712
|
+
tnfa->submatch_data[id].parents = p;
|
1713
|
+
for (i = 0; parents[i] >= 0; i++)
|
1714
|
+
p[i] = parents[i];
|
1715
|
+
p[i] = -1;
|
1716
|
+
}
|
1717
|
+
}
|
1718
|
+
|
1719
|
+
/* Add end of this submatch to regset after processing this
|
1720
|
+
node. */
|
1721
|
+
STACK_PUSHX(stack, int, node->submatch_id);
|
1722
|
+
STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
|
1723
|
+
}
|
1724
|
+
|
1725
|
+
switch (node->type)
|
1726
|
+
{
|
1727
|
+
case LITERAL:
|
1728
|
+
{
|
1729
|
+
tre_literal_t *lit = node->obj;
|
1730
|
+
|
1731
|
+
if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
|
1732
|
+
{
|
1733
|
+
int i;
|
1734
|
+
if (regset[0] >= 0)
|
1735
|
+
{
|
1736
|
+
/* Regset is not empty, so add a tag before the
|
1737
|
+
literal or backref. */
|
1738
|
+
if (!first_pass)
|
1739
|
+
{
|
1740
|
+
status = tre_add_tag_left(mem, node, tag);
|
1741
|
+
tnfa->tag_directions[tag] = direction;
|
1742
|
+
if (minimal_tag >= 0)
|
1743
|
+
{
|
1744
|
+
for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
|
1745
|
+
tnfa->minimal_tags[i] = tag;
|
1746
|
+
tnfa->minimal_tags[i + 1] = minimal_tag;
|
1747
|
+
tnfa->minimal_tags[i + 2] = -1;
|
1748
|
+
minimal_tag = -1;
|
1749
|
+
num_minimals++;
|
1750
|
+
}
|
1751
|
+
tre_purge_regset(regset, tnfa, tag);
|
1752
|
+
}
|
1753
|
+
else
|
1754
|
+
{
|
1755
|
+
node->num_tags = 1;
|
1756
|
+
}
|
1757
|
+
|
1758
|
+
regset[0] = -1;
|
1759
|
+
tag = next_tag;
|
1760
|
+
num_tags++;
|
1761
|
+
next_tag++;
|
1762
|
+
}
|
1763
|
+
}
|
1764
|
+
else
|
1765
|
+
{
|
1766
|
+
assert(!IS_TAG(lit));
|
1767
|
+
}
|
1768
|
+
break;
|
1769
|
+
}
|
1770
|
+
case CATENATION:
|
1771
|
+
{
|
1772
|
+
tre_catenation_t *cat = node->obj;
|
1773
|
+
tre_ast_node_t *left = cat->left;
|
1774
|
+
tre_ast_node_t *right = cat->right;
|
1775
|
+
int reserved_tag = -1;
|
1776
|
+
|
1777
|
+
|
1778
|
+
/* After processing right child. */
|
1779
|
+
STACK_PUSHX(stack, voidptr, node);
|
1780
|
+
STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
|
1781
|
+
|
1782
|
+
/* Process right child. */
|
1783
|
+
STACK_PUSHX(stack, voidptr, right);
|
1784
|
+
STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
|
1785
|
+
|
1786
|
+
/* After processing left child. */
|
1787
|
+
STACK_PUSHX(stack, int, next_tag + left->num_tags);
|
1788
|
+
if (left->num_tags > 0 && right->num_tags > 0)
|
1789
|
+
{
|
1790
|
+
/* Reserve the next tag to the right child. */
|
1791
|
+
reserved_tag = next_tag;
|
1792
|
+
next_tag++;
|
1793
|
+
}
|
1794
|
+
STACK_PUSHX(stack, int, reserved_tag);
|
1795
|
+
STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
|
1796
|
+
|
1797
|
+
/* Process left child. */
|
1798
|
+
STACK_PUSHX(stack, voidptr, left);
|
1799
|
+
STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
|
1800
|
+
|
1801
|
+
}
|
1802
|
+
break;
|
1803
|
+
case ITERATION:
|
1804
|
+
{
|
1805
|
+
tre_iteration_t *iter = node->obj;
|
1806
|
+
|
1807
|
+
if (first_pass)
|
1808
|
+
{
|
1809
|
+
STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
|
1810
|
+
}
|
1811
|
+
else
|
1812
|
+
{
|
1813
|
+
STACK_PUSHX(stack, int, tag);
|
1814
|
+
STACK_PUSHX(stack, int, iter->minimal);
|
1815
|
+
}
|
1816
|
+
STACK_PUSHX(stack, voidptr, node);
|
1817
|
+
STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
|
1818
|
+
|
1819
|
+
STACK_PUSHX(stack, voidptr, iter->arg);
|
1820
|
+
STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
|
1821
|
+
|
1822
|
+
/* Regset is not empty, so add a tag here. */
|
1823
|
+
if (regset[0] >= 0 || iter->minimal)
|
1824
|
+
{
|
1825
|
+
if (!first_pass)
|
1826
|
+
{
|
1827
|
+
int i;
|
1828
|
+
status = tre_add_tag_left(mem, node, tag);
|
1829
|
+
if (iter->minimal)
|
1830
|
+
tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
|
1831
|
+
else
|
1832
|
+
tnfa->tag_directions[tag] = direction;
|
1833
|
+
if (minimal_tag >= 0)
|
1834
|
+
{
|
1835
|
+
for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
|
1836
|
+
tnfa->minimal_tags[i] = tag;
|
1837
|
+
tnfa->minimal_tags[i + 1] = minimal_tag;
|
1838
|
+
tnfa->minimal_tags[i + 2] = -1;
|
1839
|
+
minimal_tag = -1;
|
1840
|
+
num_minimals++;
|
1841
|
+
}
|
1842
|
+
tre_purge_regset(regset, tnfa, tag);
|
1843
|
+
}
|
1844
|
+
|
1845
|
+
regset[0] = -1;
|
1846
|
+
tag = next_tag;
|
1847
|
+
num_tags++;
|
1848
|
+
next_tag++;
|
1849
|
+
}
|
1850
|
+
direction = TRE_TAG_MINIMIZE;
|
1851
|
+
}
|
1852
|
+
break;
|
1853
|
+
case UNION:
|
1854
|
+
{
|
1855
|
+
tre_union_t *uni = node->obj;
|
1856
|
+
tre_ast_node_t *left = uni->left;
|
1857
|
+
tre_ast_node_t *right = uni->right;
|
1858
|
+
int left_tag;
|
1859
|
+
int right_tag;
|
1860
|
+
|
1861
|
+
if (regset[0] >= 0)
|
1862
|
+
{
|
1863
|
+
left_tag = next_tag;
|
1864
|
+
right_tag = next_tag + 1;
|
1865
|
+
}
|
1866
|
+
else
|
1867
|
+
{
|
1868
|
+
left_tag = tag;
|
1869
|
+
right_tag = next_tag;
|
1870
|
+
}
|
1871
|
+
|
1872
|
+
/* After processing right child. */
|
1873
|
+
STACK_PUSHX(stack, int, right_tag);
|
1874
|
+
STACK_PUSHX(stack, int, left_tag);
|
1875
|
+
STACK_PUSHX(stack, voidptr, regset);
|
1876
|
+
STACK_PUSHX(stack, int, regset[0] >= 0);
|
1877
|
+
STACK_PUSHX(stack, voidptr, node);
|
1878
|
+
STACK_PUSHX(stack, voidptr, right);
|
1879
|
+
STACK_PUSHX(stack, voidptr, left);
|
1880
|
+
STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
|
1881
|
+
|
1882
|
+
/* Process right child. */
|
1883
|
+
STACK_PUSHX(stack, voidptr, right);
|
1884
|
+
STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
|
1885
|
+
|
1886
|
+
/* After processing left child. */
|
1887
|
+
STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
|
1888
|
+
|
1889
|
+
/* Process left child. */
|
1890
|
+
STACK_PUSHX(stack, voidptr, left);
|
1891
|
+
STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
|
1892
|
+
|
1893
|
+
/* Regset is not empty, so add a tag here. */
|
1894
|
+
if (regset[0] >= 0)
|
1895
|
+
{
|
1896
|
+
if (!first_pass)
|
1897
|
+
{
|
1898
|
+
int i;
|
1899
|
+
status = tre_add_tag_left(mem, node, tag);
|
1900
|
+
tnfa->tag_directions[tag] = direction;
|
1901
|
+
if (minimal_tag >= 0)
|
1902
|
+
{
|
1903
|
+
for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
|
1904
|
+
tnfa->minimal_tags[i] = tag;
|
1905
|
+
tnfa->minimal_tags[i + 1] = minimal_tag;
|
1906
|
+
tnfa->minimal_tags[i + 2] = -1;
|
1907
|
+
minimal_tag = -1;
|
1908
|
+
num_minimals++;
|
1909
|
+
}
|
1910
|
+
tre_purge_regset(regset, tnfa, tag);
|
1911
|
+
}
|
1912
|
+
|
1913
|
+
regset[0] = -1;
|
1914
|
+
tag = next_tag;
|
1915
|
+
num_tags++;
|
1916
|
+
next_tag++;
|
1917
|
+
}
|
1918
|
+
|
1919
|
+
if (node->num_submatches > 0)
|
1920
|
+
{
|
1921
|
+
/* The next two tags are reserved for markers. */
|
1922
|
+
next_tag++;
|
1923
|
+
tag = next_tag;
|
1924
|
+
next_tag++;
|
1925
|
+
}
|
1926
|
+
|
1927
|
+
break;
|
1928
|
+
}
|
1929
|
+
}
|
1930
|
+
|
1931
|
+
if (node->submatch_id >= 0)
|
1932
|
+
{
|
1933
|
+
int i;
|
1934
|
+
/* Push this submatch on the parents stack. */
|
1935
|
+
for (i = 0; parents[i] >= 0; i++);
|
1936
|
+
parents[i] = node->submatch_id;
|
1937
|
+
parents[i + 1] = -1;
|
1938
|
+
}
|
1939
|
+
|
1940
|
+
break; /* end case: ADDTAGS_RECURSE */
|
1941
|
+
|
1942
|
+
case ADDTAGS_AFTER_ITERATION:
|
1943
|
+
{
|
1944
|
+
int minimal = 0;
|
1945
|
+
int enter_tag;
|
1946
|
+
node = tre_stack_pop_voidptr(stack);
|
1947
|
+
if (first_pass)
|
1948
|
+
{
|
1949
|
+
node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
|
1950
|
+
+ tre_stack_pop_int(stack);
|
1951
|
+
minimal_tag = -1;
|
1952
|
+
}
|
1953
|
+
else
|
1954
|
+
{
|
1955
|
+
minimal = tre_stack_pop_int(stack);
|
1956
|
+
enter_tag = tre_stack_pop_int(stack);
|
1957
|
+
if (minimal)
|
1958
|
+
minimal_tag = enter_tag;
|
1959
|
+
}
|
1960
|
+
|
1961
|
+
if (!first_pass)
|
1962
|
+
{
|
1963
|
+
if (minimal)
|
1964
|
+
direction = TRE_TAG_MINIMIZE;
|
1965
|
+
else
|
1966
|
+
direction = TRE_TAG_MAXIMIZE;
|
1967
|
+
}
|
1968
|
+
break;
|
1969
|
+
}
|
1970
|
+
|
1971
|
+
case ADDTAGS_AFTER_CAT_LEFT:
|
1972
|
+
{
|
1973
|
+
int new_tag = tre_stack_pop_int(stack);
|
1974
|
+
next_tag = tre_stack_pop_int(stack);
|
1975
|
+
if (new_tag >= 0)
|
1976
|
+
{
|
1977
|
+
tag = new_tag;
|
1978
|
+
}
|
1979
|
+
break;
|
1980
|
+
}
|
1981
|
+
|
1982
|
+
case ADDTAGS_AFTER_CAT_RIGHT:
|
1983
|
+
node = tre_stack_pop_voidptr(stack);
|
1984
|
+
if (first_pass)
|
1985
|
+
node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
|
1986
|
+
+ ((tre_catenation_t *)node->obj)->right->num_tags;
|
1987
|
+
break;
|
1988
|
+
|
1989
|
+
case ADDTAGS_AFTER_UNION_LEFT:
|
1990
|
+
/* Lift the bottom of the `regset' array so that when processing
|
1991
|
+
the right operand the items currently in the array are
|
1992
|
+
invisible. The original bottom was saved at ADDTAGS_UNION and
|
1993
|
+
will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
|
1994
|
+
while (*regset >= 0)
|
1995
|
+
regset++;
|
1996
|
+
break;
|
1997
|
+
|
1998
|
+
case ADDTAGS_AFTER_UNION_RIGHT:
|
1999
|
+
{
|
2000
|
+
int added_tags, tag_left, tag_right;
|
2001
|
+
tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
|
2002
|
+
tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
|
2003
|
+
node = tre_stack_pop_voidptr(stack);
|
2004
|
+
added_tags = tre_stack_pop_int(stack);
|
2005
|
+
if (first_pass)
|
2006
|
+
{
|
2007
|
+
node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
|
2008
|
+
+ ((tre_union_t *)node->obj)->right->num_tags + added_tags
|
2009
|
+
+ ((node->num_submatches > 0) ? 2 : 0);
|
2010
|
+
}
|
2011
|
+
regset = tre_stack_pop_voidptr(stack);
|
2012
|
+
tag_left = tre_stack_pop_int(stack);
|
2013
|
+
tag_right = tre_stack_pop_int(stack);
|
2014
|
+
|
2015
|
+
/* Add tags after both children, the left child gets a smaller
|
2016
|
+
tag than the right child. This guarantees that we prefer
|
2017
|
+
the left child over the right child. */
|
2018
|
+
/* XXX - This is not always necessary (if the children have
|
2019
|
+
tags which must be seen for every match of that child). */
|
2020
|
+
/* XXX - Check if this is the only place where tre_add_tag_right
|
2021
|
+
is used. If so, use tre_add_tag_left (putting the tag before
|
2022
|
+
the child as opposed after the child) and throw away
|
2023
|
+
tre_add_tag_right. */
|
2024
|
+
if (node->num_submatches > 0)
|
2025
|
+
{
|
2026
|
+
if (!first_pass)
|
2027
|
+
{
|
2028
|
+
status = tre_add_tag_right(mem, left, tag_left);
|
2029
|
+
tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
|
2030
|
+
status = tre_add_tag_right(mem, right, tag_right);
|
2031
|
+
tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
|
2032
|
+
}
|
2033
|
+
num_tags += 2;
|
2034
|
+
}
|
2035
|
+
direction = TRE_TAG_MAXIMIZE;
|
2036
|
+
break;
|
2037
|
+
}
|
2038
|
+
|
2039
|
+
default:
|
2040
|
+
assert(0);
|
2041
|
+
break;
|
2042
|
+
|
2043
|
+
} /* end switch(symbol) */
|
2044
|
+
} /* end while(tre_stack_num_objects(stack) > bottom) */
|
2045
|
+
|
2046
|
+
if (!first_pass)
|
2047
|
+
tre_purge_regset(regset, tnfa, tag);
|
2048
|
+
|
2049
|
+
if (!first_pass && minimal_tag >= 0)
|
2050
|
+
{
|
2051
|
+
int i;
|
2052
|
+
for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
|
2053
|
+
tnfa->minimal_tags[i] = tag;
|
2054
|
+
tnfa->minimal_tags[i + 1] = minimal_tag;
|
2055
|
+
tnfa->minimal_tags[i + 2] = -1;
|
2056
|
+
minimal_tag = -1;
|
2057
|
+
num_minimals++;
|
2058
|
+
}
|
2059
|
+
|
2060
|
+
assert(tree->num_tags == num_tags);
|
2061
|
+
tnfa->end_tag = num_tags;
|
2062
|
+
tnfa->num_tags = num_tags;
|
2063
|
+
tnfa->num_minimals = num_minimals;
|
2064
|
+
xfree(orig_regset);
|
2065
|
+
xfree(parents);
|
2066
|
+
xfree(saved_states);
|
2067
|
+
return status;
|
2068
|
+
}
|
2069
|
+
|
2070
|
+
|
2071
|
+
|
2072
|
+
/*
|
2073
|
+
AST to TNFA compilation routines.
|
2074
|
+
*/
|
2075
|
+
|
2076
|
+
typedef enum {
|
2077
|
+
COPY_RECURSE,
|
2078
|
+
COPY_SET_RESULT_PTR
|
2079
|
+
} tre_copyast_symbol_t;
|
2080
|
+
|
2081
|
+
/* Flags for tre_copy_ast(). */
|
2082
|
+
#define COPY_REMOVE_TAGS 1
|
2083
|
+
#define COPY_MAXIMIZE_FIRST_TAG 2
|
2084
|
+
|
2085
|
+
static reg_errcode_t
|
2086
|
+
tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
|
2087
|
+
int flags, int *pos_add, tre_tag_direction_t *tag_directions,
|
2088
|
+
tre_ast_node_t **copy, int *max_pos)
|
2089
|
+
{
|
2090
|
+
reg_errcode_t status = REG_OK;
|
2091
|
+
int bottom = tre_stack_num_objects(stack);
|
2092
|
+
int num_copied = 0;
|
2093
|
+
int first_tag = 1;
|
2094
|
+
tre_ast_node_t **result = copy;
|
2095
|
+
tre_copyast_symbol_t symbol;
|
2096
|
+
|
2097
|
+
STACK_PUSH(stack, voidptr, ast);
|
2098
|
+
STACK_PUSH(stack, int, COPY_RECURSE);
|
2099
|
+
|
2100
|
+
while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
|
2101
|
+
{
|
2102
|
+
tre_ast_node_t *node;
|
2103
|
+
if (status != REG_OK)
|
2104
|
+
break;
|
2105
|
+
|
2106
|
+
symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
|
2107
|
+
switch (symbol)
|
2108
|
+
{
|
2109
|
+
case COPY_SET_RESULT_PTR:
|
2110
|
+
result = tre_stack_pop_voidptr(stack);
|
2111
|
+
break;
|
2112
|
+
case COPY_RECURSE:
|
2113
|
+
node = tre_stack_pop_voidptr(stack);
|
2114
|
+
switch (node->type)
|
2115
|
+
{
|
2116
|
+
case LITERAL:
|
2117
|
+
{
|
2118
|
+
tre_literal_t *lit = node->obj;
|
2119
|
+
int pos = lit->position;
|
2120
|
+
int min = lit->code_min;
|
2121
|
+
int max = lit->code_max;
|
2122
|
+
if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
|
2123
|
+
{
|
2124
|
+
/* XXX - e.g. [ab] has only one position but two
|
2125
|
+
nodes, so we are creating holes in the state space
|
2126
|
+
here. Not fatal, just wastes memory. */
|
2127
|
+
pos += *pos_add;
|
2128
|
+
num_copied++;
|
2129
|
+
}
|
2130
|
+
else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
|
2131
|
+
{
|
2132
|
+
/* Change this tag to empty. */
|
2133
|
+
min = EMPTY;
|
2134
|
+
max = pos = -1;
|
2135
|
+
}
|
2136
|
+
else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
|
2137
|
+
&& first_tag)
|
2138
|
+
{
|
2139
|
+
/* Maximize the first tag. */
|
2140
|
+
tag_directions[max] = TRE_TAG_MAXIMIZE;
|
2141
|
+
first_tag = 0;
|
2142
|
+
}
|
2143
|
+
*result = tre_ast_new_literal(mem, min, max, pos);
|
2144
|
+
if (*result == NULL)
|
2145
|
+
status = REG_ESPACE;
|
2146
|
+
|
2147
|
+
if (pos > *max_pos)
|
2148
|
+
*max_pos = pos;
|
2149
|
+
break;
|
2150
|
+
}
|
2151
|
+
case UNION:
|
2152
|
+
{
|
2153
|
+
tre_union_t *uni = node->obj;
|
2154
|
+
tre_union_t *tmp;
|
2155
|
+
*result = tre_ast_new_union(mem, uni->left, uni->right);
|
2156
|
+
if (*result == NULL)
|
2157
|
+
{
|
2158
|
+
status = REG_ESPACE;
|
2159
|
+
break;
|
2160
|
+
}
|
2161
|
+
tmp = (*result)->obj;
|
2162
|
+
result = &tmp->left;
|
2163
|
+
STACK_PUSHX(stack, voidptr, uni->right);
|
2164
|
+
STACK_PUSHX(stack, int, COPY_RECURSE);
|
2165
|
+
STACK_PUSHX(stack, voidptr, &tmp->right);
|
2166
|
+
STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
|
2167
|
+
STACK_PUSHX(stack, voidptr, uni->left);
|
2168
|
+
STACK_PUSHX(stack, int, COPY_RECURSE);
|
2169
|
+
break;
|
2170
|
+
}
|
2171
|
+
case CATENATION:
|
2172
|
+
{
|
2173
|
+
tre_catenation_t *cat = node->obj;
|
2174
|
+
tre_catenation_t *tmp;
|
2175
|
+
*result = tre_ast_new_catenation(mem, cat->left, cat->right);
|
2176
|
+
if (*result == NULL)
|
2177
|
+
{
|
2178
|
+
status = REG_ESPACE;
|
2179
|
+
break;
|
2180
|
+
}
|
2181
|
+
tmp = (*result)->obj;
|
2182
|
+
tmp->left = NULL;
|
2183
|
+
tmp->right = NULL;
|
2184
|
+
result = &tmp->left;
|
2185
|
+
|
2186
|
+
STACK_PUSHX(stack, voidptr, cat->right);
|
2187
|
+
STACK_PUSHX(stack, int, COPY_RECURSE);
|
2188
|
+
STACK_PUSHX(stack, voidptr, &tmp->right);
|
2189
|
+
STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
|
2190
|
+
STACK_PUSHX(stack, voidptr, cat->left);
|
2191
|
+
STACK_PUSHX(stack, int, COPY_RECURSE);
|
2192
|
+
break;
|
2193
|
+
}
|
2194
|
+
case ITERATION:
|
2195
|
+
{
|
2196
|
+
tre_iteration_t *iter = node->obj;
|
2197
|
+
STACK_PUSHX(stack, voidptr, iter->arg);
|
2198
|
+
STACK_PUSHX(stack, int, COPY_RECURSE);
|
2199
|
+
*result = tre_ast_new_iter(mem, iter->arg, iter->min,
|
2200
|
+
iter->max, iter->minimal);
|
2201
|
+
if (*result == NULL)
|
2202
|
+
{
|
2203
|
+
status = REG_ESPACE;
|
2204
|
+
break;
|
2205
|
+
}
|
2206
|
+
iter = (*result)->obj;
|
2207
|
+
result = &iter->arg;
|
2208
|
+
break;
|
2209
|
+
}
|
2210
|
+
default:
|
2211
|
+
assert(0);
|
2212
|
+
break;
|
2213
|
+
}
|
2214
|
+
break;
|
2215
|
+
}
|
2216
|
+
}
|
2217
|
+
*pos_add += num_copied;
|
2218
|
+
return status;
|
2219
|
+
}
|
2220
|
+
|
2221
|
+
typedef enum {
|
2222
|
+
EXPAND_RECURSE,
|
2223
|
+
EXPAND_AFTER_ITER
|
2224
|
+
} tre_expand_ast_symbol_t;
|
2225
|
+
|
2226
|
+
/* Expands each iteration node that has a finite nonzero minimum or maximum
|
2227
|
+
iteration count to a catenated sequence of copies of the node. */
|
2228
|
+
static reg_errcode_t
|
2229
|
+
tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
|
2230
|
+
int *position, tre_tag_direction_t *tag_directions)
|
2231
|
+
{
|
2232
|
+
reg_errcode_t status = REG_OK;
|
2233
|
+
int bottom = tre_stack_num_objects(stack);
|
2234
|
+
int pos_add = 0;
|
2235
|
+
int pos_add_total = 0;
|
2236
|
+
int max_pos = 0;
|
2237
|
+
int iter_depth = 0;
|
2238
|
+
|
2239
|
+
STACK_PUSHR(stack, voidptr, ast);
|
2240
|
+
STACK_PUSHR(stack, int, EXPAND_RECURSE);
|
2241
|
+
while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
|
2242
|
+
{
|
2243
|
+
tre_ast_node_t *node;
|
2244
|
+
tre_expand_ast_symbol_t symbol;
|
2245
|
+
|
2246
|
+
if (status != REG_OK)
|
2247
|
+
break;
|
2248
|
+
|
2249
|
+
symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
|
2250
|
+
node = tre_stack_pop_voidptr(stack);
|
2251
|
+
switch (symbol)
|
2252
|
+
{
|
2253
|
+
case EXPAND_RECURSE:
|
2254
|
+
switch (node->type)
|
2255
|
+
{
|
2256
|
+
case LITERAL:
|
2257
|
+
{
|
2258
|
+
tre_literal_t *lit= node->obj;
|
2259
|
+
if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
|
2260
|
+
{
|
2261
|
+
lit->position += pos_add;
|
2262
|
+
if (lit->position > max_pos)
|
2263
|
+
max_pos = lit->position;
|
2264
|
+
}
|
2265
|
+
break;
|
2266
|
+
}
|
2267
|
+
case UNION:
|
2268
|
+
{
|
2269
|
+
tre_union_t *uni = node->obj;
|
2270
|
+
STACK_PUSHX(stack, voidptr, uni->right);
|
2271
|
+
STACK_PUSHX(stack, int, EXPAND_RECURSE);
|
2272
|
+
STACK_PUSHX(stack, voidptr, uni->left);
|
2273
|
+
STACK_PUSHX(stack, int, EXPAND_RECURSE);
|
2274
|
+
break;
|
2275
|
+
}
|
2276
|
+
case CATENATION:
|
2277
|
+
{
|
2278
|
+
tre_catenation_t *cat = node->obj;
|
2279
|
+
STACK_PUSHX(stack, voidptr, cat->right);
|
2280
|
+
STACK_PUSHX(stack, int, EXPAND_RECURSE);
|
2281
|
+
STACK_PUSHX(stack, voidptr, cat->left);
|
2282
|
+
STACK_PUSHX(stack, int, EXPAND_RECURSE);
|
2283
|
+
break;
|
2284
|
+
}
|
2285
|
+
case ITERATION:
|
2286
|
+
{
|
2287
|
+
tre_iteration_t *iter = node->obj;
|
2288
|
+
STACK_PUSHX(stack, int, pos_add);
|
2289
|
+
STACK_PUSHX(stack, voidptr, node);
|
2290
|
+
STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
|
2291
|
+
STACK_PUSHX(stack, voidptr, iter->arg);
|
2292
|
+
STACK_PUSHX(stack, int, EXPAND_RECURSE);
|
2293
|
+
/* If we are going to expand this node at EXPAND_AFTER_ITER
|
2294
|
+
then don't increase the `pos' fields of the nodes now, it
|
2295
|
+
will get done when expanding. */
|
2296
|
+
if (iter->min > 1 || iter->max > 1)
|
2297
|
+
pos_add = 0;
|
2298
|
+
iter_depth++;
|
2299
|
+
break;
|
2300
|
+
}
|
2301
|
+
default:
|
2302
|
+
assert(0);
|
2303
|
+
break;
|
2304
|
+
}
|
2305
|
+
break;
|
2306
|
+
case EXPAND_AFTER_ITER:
|
2307
|
+
{
|
2308
|
+
tre_iteration_t *iter = node->obj;
|
2309
|
+
int pos_add_last;
|
2310
|
+
pos_add = tre_stack_pop_int(stack);
|
2311
|
+
pos_add_last = pos_add;
|
2312
|
+
if (iter->min > 1 || iter->max > 1)
|
2313
|
+
{
|
2314
|
+
tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
|
2315
|
+
int j;
|
2316
|
+
int pos_add_save = pos_add;
|
2317
|
+
|
2318
|
+
/* Create a catenated sequence of copies of the node. */
|
2319
|
+
for (j = 0; j < iter->min; j++)
|
2320
|
+
{
|
2321
|
+
tre_ast_node_t *copy;
|
2322
|
+
/* Remove tags from all but the last copy. */
|
2323
|
+
int flags = ((j + 1 < iter->min)
|
2324
|
+
? COPY_REMOVE_TAGS
|
2325
|
+
: COPY_MAXIMIZE_FIRST_TAG);
|
2326
|
+
pos_add_save = pos_add;
|
2327
|
+
status = tre_copy_ast(mem, stack, iter->arg, flags,
|
2328
|
+
&pos_add, tag_directions, ©,
|
2329
|
+
&max_pos);
|
2330
|
+
if (status != REG_OK)
|
2331
|
+
return status;
|
2332
|
+
if (seq1 != NULL)
|
2333
|
+
seq1 = tre_ast_new_catenation(mem, seq1, copy);
|
2334
|
+
else
|
2335
|
+
seq1 = copy;
|
2336
|
+
if (seq1 == NULL)
|
2337
|
+
return REG_ESPACE;
|
2338
|
+
}
|
2339
|
+
|
2340
|
+
if (iter->max == -1)
|
2341
|
+
{
|
2342
|
+
/* No upper limit. */
|
2343
|
+
pos_add_save = pos_add;
|
2344
|
+
status = tre_copy_ast(mem, stack, iter->arg, 0,
|
2345
|
+
&pos_add, NULL, &seq2, &max_pos);
|
2346
|
+
if (status != REG_OK)
|
2347
|
+
return status;
|
2348
|
+
seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
|
2349
|
+
if (seq2 == NULL)
|
2350
|
+
return REG_ESPACE;
|
2351
|
+
}
|
2352
|
+
else
|
2353
|
+
{
|
2354
|
+
for (j = iter->min; j < iter->max; j++)
|
2355
|
+
{
|
2356
|
+
tre_ast_node_t *tmp, *copy;
|
2357
|
+
pos_add_save = pos_add;
|
2358
|
+
status = tre_copy_ast(mem, stack, iter->arg, 0,
|
2359
|
+
&pos_add, NULL, ©, &max_pos);
|
2360
|
+
if (status != REG_OK)
|
2361
|
+
return status;
|
2362
|
+
if (seq2 != NULL)
|
2363
|
+
seq2 = tre_ast_new_catenation(mem, copy, seq2);
|
2364
|
+
else
|
2365
|
+
seq2 = copy;
|
2366
|
+
if (seq2 == NULL)
|
2367
|
+
return REG_ESPACE;
|
2368
|
+
tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
|
2369
|
+
if (tmp == NULL)
|
2370
|
+
return REG_ESPACE;
|
2371
|
+
seq2 = tre_ast_new_union(mem, tmp, seq2);
|
2372
|
+
if (seq2 == NULL)
|
2373
|
+
return REG_ESPACE;
|
2374
|
+
}
|
2375
|
+
}
|
2376
|
+
|
2377
|
+
pos_add = pos_add_save;
|
2378
|
+
if (seq1 == NULL)
|
2379
|
+
seq1 = seq2;
|
2380
|
+
else if (seq2 != NULL)
|
2381
|
+
seq1 = tre_ast_new_catenation(mem, seq1, seq2);
|
2382
|
+
if (seq1 == NULL)
|
2383
|
+
return REG_ESPACE;
|
2384
|
+
node->obj = seq1->obj;
|
2385
|
+
node->type = seq1->type;
|
2386
|
+
}
|
2387
|
+
|
2388
|
+
iter_depth--;
|
2389
|
+
pos_add_total += pos_add - pos_add_last;
|
2390
|
+
if (iter_depth == 0)
|
2391
|
+
pos_add = pos_add_total;
|
2392
|
+
|
2393
|
+
break;
|
2394
|
+
}
|
2395
|
+
default:
|
2396
|
+
assert(0);
|
2397
|
+
break;
|
2398
|
+
}
|
2399
|
+
}
|
2400
|
+
|
2401
|
+
*position += pos_add_total;
|
2402
|
+
|
2403
|
+
/* `max_pos' should never be larger than `*position' if the above
|
2404
|
+
code works, but just an extra safeguard let's make sure
|
2405
|
+
`*position' is set large enough so enough memory will be
|
2406
|
+
allocated for the transition table. */
|
2407
|
+
if (max_pos > *position)
|
2408
|
+
*position = max_pos;
|
2409
|
+
|
2410
|
+
return status;
|
2411
|
+
}
|
2412
|
+
|
2413
|
+
static tre_pos_and_tags_t *
|
2414
|
+
tre_set_empty(tre_mem_t mem)
|
2415
|
+
{
|
2416
|
+
tre_pos_and_tags_t *new_set;
|
2417
|
+
|
2418
|
+
new_set = tre_mem_calloc(mem, sizeof(*new_set));
|
2419
|
+
if (new_set == NULL)
|
2420
|
+
return NULL;
|
2421
|
+
|
2422
|
+
new_set[0].position = -1;
|
2423
|
+
new_set[0].code_min = -1;
|
2424
|
+
new_set[0].code_max = -1;
|
2425
|
+
|
2426
|
+
return new_set;
|
2427
|
+
}
|
2428
|
+
|
2429
|
+
static tre_pos_and_tags_t *
|
2430
|
+
tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
|
2431
|
+
tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
|
2432
|
+
{
|
2433
|
+
tre_pos_and_tags_t *new_set;
|
2434
|
+
|
2435
|
+
new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
|
2436
|
+
if (new_set == NULL)
|
2437
|
+
return NULL;
|
2438
|
+
|
2439
|
+
new_set[0].position = position;
|
2440
|
+
new_set[0].code_min = code_min;
|
2441
|
+
new_set[0].code_max = code_max;
|
2442
|
+
new_set[0].class = class;
|
2443
|
+
new_set[0].neg_classes = neg_classes;
|
2444
|
+
new_set[0].backref = backref;
|
2445
|
+
new_set[1].position = -1;
|
2446
|
+
new_set[1].code_min = -1;
|
2447
|
+
new_set[1].code_max = -1;
|
2448
|
+
|
2449
|
+
return new_set;
|
2450
|
+
}
|
2451
|
+
|
2452
|
+
static tre_pos_and_tags_t *
|
2453
|
+
tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
|
2454
|
+
int *tags, int assertions)
|
2455
|
+
{
|
2456
|
+
int s1, s2, i, j;
|
2457
|
+
tre_pos_and_tags_t *new_set;
|
2458
|
+
int *new_tags;
|
2459
|
+
int num_tags;
|
2460
|
+
|
2461
|
+
for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
|
2462
|
+
for (s1 = 0; set1[s1].position >= 0; s1++);
|
2463
|
+
for (s2 = 0; set2[s2].position >= 0; s2++);
|
2464
|
+
new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
|
2465
|
+
if (!new_set )
|
2466
|
+
return NULL;
|
2467
|
+
|
2468
|
+
for (s1 = 0; set1[s1].position >= 0; s1++)
|
2469
|
+
{
|
2470
|
+
new_set[s1].position = set1[s1].position;
|
2471
|
+
new_set[s1].code_min = set1[s1].code_min;
|
2472
|
+
new_set[s1].code_max = set1[s1].code_max;
|
2473
|
+
new_set[s1].assertions = set1[s1].assertions | assertions;
|
2474
|
+
new_set[s1].class = set1[s1].class;
|
2475
|
+
new_set[s1].neg_classes = set1[s1].neg_classes;
|
2476
|
+
new_set[s1].backref = set1[s1].backref;
|
2477
|
+
if (set1[s1].tags == NULL && tags == NULL)
|
2478
|
+
new_set[s1].tags = NULL;
|
2479
|
+
else
|
2480
|
+
{
|
2481
|
+
for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
|
2482
|
+
new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
|
2483
|
+
* (i + num_tags + 1)));
|
2484
|
+
if (new_tags == NULL)
|
2485
|
+
return NULL;
|
2486
|
+
for (j = 0; j < i; j++)
|
2487
|
+
new_tags[j] = set1[s1].tags[j];
|
2488
|
+
for (i = 0; i < num_tags; i++)
|
2489
|
+
new_tags[j + i] = tags[i];
|
2490
|
+
new_tags[j + i] = -1;
|
2491
|
+
new_set[s1].tags = new_tags;
|
2492
|
+
}
|
2493
|
+
}
|
2494
|
+
|
2495
|
+
for (s2 = 0; set2[s2].position >= 0; s2++)
|
2496
|
+
{
|
2497
|
+
new_set[s1 + s2].position = set2[s2].position;
|
2498
|
+
new_set[s1 + s2].code_min = set2[s2].code_min;
|
2499
|
+
new_set[s1 + s2].code_max = set2[s2].code_max;
|
2500
|
+
/* XXX - why not | assertions here as well? */
|
2501
|
+
new_set[s1 + s2].assertions = set2[s2].assertions;
|
2502
|
+
new_set[s1 + s2].class = set2[s2].class;
|
2503
|
+
new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
|
2504
|
+
new_set[s1 + s2].backref = set2[s2].backref;
|
2505
|
+
if (set2[s2].tags == NULL)
|
2506
|
+
new_set[s1 + s2].tags = NULL;
|
2507
|
+
else
|
2508
|
+
{
|
2509
|
+
for (i = 0; set2[s2].tags[i] >= 0; i++);
|
2510
|
+
new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
|
2511
|
+
if (new_tags == NULL)
|
2512
|
+
return NULL;
|
2513
|
+
for (j = 0; j < i; j++)
|
2514
|
+
new_tags[j] = set2[s2].tags[j];
|
2515
|
+
new_tags[j] = -1;
|
2516
|
+
new_set[s1 + s2].tags = new_tags;
|
2517
|
+
}
|
2518
|
+
}
|
2519
|
+
new_set[s1 + s2].position = -1;
|
2520
|
+
return new_set;
|
2521
|
+
}
|
2522
|
+
|
2523
|
+
/* Finds the empty path through `node' which is the one that should be
|
2524
|
+
taken according to POSIX.2 rules, and adds the tags on that path to
|
2525
|
+
`tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
|
2526
|
+
set to the number of tags seen on the path. */
|
2527
|
+
static reg_errcode_t
|
2528
|
+
tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
|
2529
|
+
int *assertions, int *num_tags_seen)
|
2530
|
+
{
|
2531
|
+
tre_literal_t *lit;
|
2532
|
+
tre_union_t *uni;
|
2533
|
+
tre_catenation_t *cat;
|
2534
|
+
tre_iteration_t *iter;
|
2535
|
+
int i;
|
2536
|
+
int bottom = tre_stack_num_objects(stack);
|
2537
|
+
reg_errcode_t status = REG_OK;
|
2538
|
+
if (num_tags_seen)
|
2539
|
+
*num_tags_seen = 0;
|
2540
|
+
|
2541
|
+
status = tre_stack_push_voidptr(stack, node);
|
2542
|
+
|
2543
|
+
/* Walk through the tree recursively. */
|
2544
|
+
while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
|
2545
|
+
{
|
2546
|
+
node = tre_stack_pop_voidptr(stack);
|
2547
|
+
|
2548
|
+
switch (node->type)
|
2549
|
+
{
|
2550
|
+
case LITERAL:
|
2551
|
+
lit = (tre_literal_t *)node->obj;
|
2552
|
+
switch (lit->code_min)
|
2553
|
+
{
|
2554
|
+
case TAG:
|
2555
|
+
if (lit->code_max >= 0)
|
2556
|
+
{
|
2557
|
+
if (tags != NULL)
|
2558
|
+
{
|
2559
|
+
/* Add the tag to `tags'. */
|
2560
|
+
for (i = 0; tags[i] >= 0; i++)
|
2561
|
+
if (tags[i] == lit->code_max)
|
2562
|
+
break;
|
2563
|
+
if (tags[i] < 0)
|
2564
|
+
{
|
2565
|
+
tags[i] = lit->code_max;
|
2566
|
+
tags[i + 1] = -1;
|
2567
|
+
}
|
2568
|
+
}
|
2569
|
+
if (num_tags_seen)
|
2570
|
+
(*num_tags_seen)++;
|
2571
|
+
}
|
2572
|
+
break;
|
2573
|
+
case ASSERTION:
|
2574
|
+
assert(lit->code_max >= 1
|
2575
|
+
|| lit->code_max <= ASSERT_LAST);
|
2576
|
+
if (assertions != NULL)
|
2577
|
+
*assertions |= lit->code_max;
|
2578
|
+
break;
|
2579
|
+
case EMPTY:
|
2580
|
+
break;
|
2581
|
+
default:
|
2582
|
+
assert(0);
|
2583
|
+
break;
|
2584
|
+
}
|
2585
|
+
break;
|
2586
|
+
|
2587
|
+
case UNION:
|
2588
|
+
/* Subexpressions starting earlier take priority over ones
|
2589
|
+
starting later, so we prefer the left subexpression over the
|
2590
|
+
right subexpression. */
|
2591
|
+
uni = (tre_union_t *)node->obj;
|
2592
|
+
if (uni->left->nullable)
|
2593
|
+
STACK_PUSHX(stack, voidptr, uni->left)
|
2594
|
+
else if (uni->right->nullable)
|
2595
|
+
STACK_PUSHX(stack, voidptr, uni->right)
|
2596
|
+
else
|
2597
|
+
assert(0);
|
2598
|
+
break;
|
2599
|
+
|
2600
|
+
case CATENATION:
|
2601
|
+
/* The path must go through both children. */
|
2602
|
+
cat = (tre_catenation_t *)node->obj;
|
2603
|
+
assert(cat->left->nullable);
|
2604
|
+
assert(cat->right->nullable);
|
2605
|
+
STACK_PUSHX(stack, voidptr, cat->left);
|
2606
|
+
STACK_PUSHX(stack, voidptr, cat->right);
|
2607
|
+
break;
|
2608
|
+
|
2609
|
+
case ITERATION:
|
2610
|
+
/* A match with an empty string is preferred over no match at
|
2611
|
+
all, so we go through the argument if possible. */
|
2612
|
+
iter = (tre_iteration_t *)node->obj;
|
2613
|
+
if (iter->arg->nullable)
|
2614
|
+
STACK_PUSHX(stack, voidptr, iter->arg);
|
2615
|
+
break;
|
2616
|
+
|
2617
|
+
default:
|
2618
|
+
assert(0);
|
2619
|
+
break;
|
2620
|
+
}
|
2621
|
+
}
|
2622
|
+
|
2623
|
+
return status;
|
2624
|
+
}
|
2625
|
+
|
2626
|
+
|
2627
|
+
typedef enum {
|
2628
|
+
NFL_RECURSE,
|
2629
|
+
NFL_POST_UNION,
|
2630
|
+
NFL_POST_CATENATION,
|
2631
|
+
NFL_POST_ITERATION
|
2632
|
+
} tre_nfl_stack_symbol_t;
|
2633
|
+
|
2634
|
+
|
2635
|
+
/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
|
2636
|
+
the nodes of the AST `tree'. */
|
2637
|
+
static reg_errcode_t
|
2638
|
+
tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
|
2639
|
+
{
|
2640
|
+
int bottom = tre_stack_num_objects(stack);
|
2641
|
+
|
2642
|
+
STACK_PUSHR(stack, voidptr, tree);
|
2643
|
+
STACK_PUSHR(stack, int, NFL_RECURSE);
|
2644
|
+
|
2645
|
+
while (tre_stack_num_objects(stack) > bottom)
|
2646
|
+
{
|
2647
|
+
tre_nfl_stack_symbol_t symbol;
|
2648
|
+
tre_ast_node_t *node;
|
2649
|
+
|
2650
|
+
symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
|
2651
|
+
node = tre_stack_pop_voidptr(stack);
|
2652
|
+
switch (symbol)
|
2653
|
+
{
|
2654
|
+
case NFL_RECURSE:
|
2655
|
+
switch (node->type)
|
2656
|
+
{
|
2657
|
+
case LITERAL:
|
2658
|
+
{
|
2659
|
+
tre_literal_t *lit = (tre_literal_t *)node->obj;
|
2660
|
+
if (IS_BACKREF(lit))
|
2661
|
+
{
|
2662
|
+
/* Back references: nullable = false, firstpos = {i},
|
2663
|
+
lastpos = {i}. */
|
2664
|
+
node->nullable = 0;
|
2665
|
+
node->firstpos = tre_set_one(mem, lit->position, 0,
|
2666
|
+
TRE_CHAR_MAX, 0, NULL, -1);
|
2667
|
+
if (!node->firstpos)
|
2668
|
+
return REG_ESPACE;
|
2669
|
+
node->lastpos = tre_set_one(mem, lit->position, 0,
|
2670
|
+
TRE_CHAR_MAX, 0, NULL,
|
2671
|
+
(int)lit->code_max);
|
2672
|
+
if (!node->lastpos)
|
2673
|
+
return REG_ESPACE;
|
2674
|
+
}
|
2675
|
+
else if (lit->code_min < 0)
|
2676
|
+
{
|
2677
|
+
/* Tags, empty strings, params, and zero width assertions:
|
2678
|
+
nullable = true, firstpos = {}, and lastpos = {}. */
|
2679
|
+
node->nullable = 1;
|
2680
|
+
node->firstpos = tre_set_empty(mem);
|
2681
|
+
if (!node->firstpos)
|
2682
|
+
return REG_ESPACE;
|
2683
|
+
node->lastpos = tre_set_empty(mem);
|
2684
|
+
if (!node->lastpos)
|
2685
|
+
return REG_ESPACE;
|
2686
|
+
}
|
2687
|
+
else
|
2688
|
+
{
|
2689
|
+
/* Literal at position i: nullable = false, firstpos = {i},
|
2690
|
+
lastpos = {i}. */
|
2691
|
+
node->nullable = 0;
|
2692
|
+
node->firstpos =
|
2693
|
+
tre_set_one(mem, lit->position, (int)lit->code_min,
|
2694
|
+
(int)lit->code_max, 0, NULL, -1);
|
2695
|
+
if (!node->firstpos)
|
2696
|
+
return REG_ESPACE;
|
2697
|
+
node->lastpos = tre_set_one(mem, lit->position,
|
2698
|
+
(int)lit->code_min,
|
2699
|
+
(int)lit->code_max,
|
2700
|
+
lit->class, lit->neg_classes,
|
2701
|
+
-1);
|
2702
|
+
if (!node->lastpos)
|
2703
|
+
return REG_ESPACE;
|
2704
|
+
}
|
2705
|
+
break;
|
2706
|
+
}
|
2707
|
+
|
2708
|
+
case UNION:
|
2709
|
+
/* Compute the attributes for the two subtrees, and after that
|
2710
|
+
for this node. */
|
2711
|
+
STACK_PUSHR(stack, voidptr, node);
|
2712
|
+
STACK_PUSHR(stack, int, NFL_POST_UNION);
|
2713
|
+
STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
|
2714
|
+
STACK_PUSHR(stack, int, NFL_RECURSE);
|
2715
|
+
STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
|
2716
|
+
STACK_PUSHR(stack, int, NFL_RECURSE);
|
2717
|
+
break;
|
2718
|
+
|
2719
|
+
case CATENATION:
|
2720
|
+
/* Compute the attributes for the two subtrees, and after that
|
2721
|
+
for this node. */
|
2722
|
+
STACK_PUSHR(stack, voidptr, node);
|
2723
|
+
STACK_PUSHR(stack, int, NFL_POST_CATENATION);
|
2724
|
+
STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
|
2725
|
+
STACK_PUSHR(stack, int, NFL_RECURSE);
|
2726
|
+
STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
|
2727
|
+
STACK_PUSHR(stack, int, NFL_RECURSE);
|
2728
|
+
break;
|
2729
|
+
|
2730
|
+
case ITERATION:
|
2731
|
+
/* Compute the attributes for the subtree, and after that for
|
2732
|
+
this node. */
|
2733
|
+
STACK_PUSHR(stack, voidptr, node);
|
2734
|
+
STACK_PUSHR(stack, int, NFL_POST_ITERATION);
|
2735
|
+
STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
|
2736
|
+
STACK_PUSHR(stack, int, NFL_RECURSE);
|
2737
|
+
break;
|
2738
|
+
}
|
2739
|
+
break; /* end case: NFL_RECURSE */
|
2740
|
+
|
2741
|
+
case NFL_POST_UNION:
|
2742
|
+
{
|
2743
|
+
tre_union_t *uni = (tre_union_t *)node->obj;
|
2744
|
+
node->nullable = uni->left->nullable || uni->right->nullable;
|
2745
|
+
node->firstpos = tre_set_union(mem, uni->left->firstpos,
|
2746
|
+
uni->right->firstpos, NULL, 0);
|
2747
|
+
if (!node->firstpos)
|
2748
|
+
return REG_ESPACE;
|
2749
|
+
node->lastpos = tre_set_union(mem, uni->left->lastpos,
|
2750
|
+
uni->right->lastpos, NULL, 0);
|
2751
|
+
if (!node->lastpos)
|
2752
|
+
return REG_ESPACE;
|
2753
|
+
break;
|
2754
|
+
}
|
2755
|
+
|
2756
|
+
case NFL_POST_ITERATION:
|
2757
|
+
{
|
2758
|
+
tre_iteration_t *iter = (tre_iteration_t *)node->obj;
|
2759
|
+
|
2760
|
+
if (iter->min == 0 || iter->arg->nullable)
|
2761
|
+
node->nullable = 1;
|
2762
|
+
else
|
2763
|
+
node->nullable = 0;
|
2764
|
+
node->firstpos = iter->arg->firstpos;
|
2765
|
+
node->lastpos = iter->arg->lastpos;
|
2766
|
+
break;
|
2767
|
+
}
|
2768
|
+
|
2769
|
+
case NFL_POST_CATENATION:
|
2770
|
+
{
|
2771
|
+
int num_tags, *tags, assertions;
|
2772
|
+
reg_errcode_t status;
|
2773
|
+
tre_catenation_t *cat = node->obj;
|
2774
|
+
node->nullable = cat->left->nullable && cat->right->nullable;
|
2775
|
+
|
2776
|
+
/* Compute firstpos. */
|
2777
|
+
if (cat->left->nullable)
|
2778
|
+
{
|
2779
|
+
/* The left side matches the empty string. Make a first pass
|
2780
|
+
with tre_match_empty() to get the number of tags and
|
2781
|
+
parameters. */
|
2782
|
+
status = tre_match_empty(stack, cat->left,
|
2783
|
+
NULL, NULL, &num_tags);
|
2784
|
+
if (status != REG_OK)
|
2785
|
+
return status;
|
2786
|
+
/* Allocate arrays for the tags and parameters. */
|
2787
|
+
tags = xmalloc(sizeof(*tags) * (num_tags + 1));
|
2788
|
+
if (!tags)
|
2789
|
+
return REG_ESPACE;
|
2790
|
+
tags[0] = -1;
|
2791
|
+
assertions = 0;
|
2792
|
+
/* Second pass with tre_mach_empty() to get the list of
|
2793
|
+
tags and parameters. */
|
2794
|
+
status = tre_match_empty(stack, cat->left, tags,
|
2795
|
+
&assertions, NULL);
|
2796
|
+
if (status != REG_OK)
|
2797
|
+
{
|
2798
|
+
xfree(tags);
|
2799
|
+
return status;
|
2800
|
+
}
|
2801
|
+
node->firstpos =
|
2802
|
+
tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
|
2803
|
+
tags, assertions);
|
2804
|
+
xfree(tags);
|
2805
|
+
if (!node->firstpos)
|
2806
|
+
return REG_ESPACE;
|
2807
|
+
}
|
2808
|
+
else
|
2809
|
+
{
|
2810
|
+
node->firstpos = cat->left->firstpos;
|
2811
|
+
}
|
2812
|
+
|
2813
|
+
/* Compute lastpos. */
|
2814
|
+
if (cat->right->nullable)
|
2815
|
+
{
|
2816
|
+
/* The right side matches the empty string. Make a first pass
|
2817
|
+
with tre_match_empty() to get the number of tags and
|
2818
|
+
parameters. */
|
2819
|
+
status = tre_match_empty(stack, cat->right,
|
2820
|
+
NULL, NULL, &num_tags);
|
2821
|
+
if (status != REG_OK)
|
2822
|
+
return status;
|
2823
|
+
/* Allocate arrays for the tags and parameters. */
|
2824
|
+
tags = xmalloc(sizeof(int) * (num_tags + 1));
|
2825
|
+
if (!tags)
|
2826
|
+
return REG_ESPACE;
|
2827
|
+
tags[0] = -1;
|
2828
|
+
assertions = 0;
|
2829
|
+
/* Second pass with tre_mach_empty() to get the list of
|
2830
|
+
tags and parameters. */
|
2831
|
+
status = tre_match_empty(stack, cat->right, tags,
|
2832
|
+
&assertions, NULL);
|
2833
|
+
if (status != REG_OK)
|
2834
|
+
{
|
2835
|
+
xfree(tags);
|
2836
|
+
return status;
|
2837
|
+
}
|
2838
|
+
node->lastpos =
|
2839
|
+
tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
|
2840
|
+
tags, assertions);
|
2841
|
+
xfree(tags);
|
2842
|
+
if (!node->lastpos)
|
2843
|
+
return REG_ESPACE;
|
2844
|
+
}
|
2845
|
+
else
|
2846
|
+
{
|
2847
|
+
node->lastpos = cat->right->lastpos;
|
2848
|
+
}
|
2849
|
+
break;
|
2850
|
+
}
|
2851
|
+
|
2852
|
+
default:
|
2853
|
+
assert(0);
|
2854
|
+
break;
|
2855
|
+
}
|
2856
|
+
}
|
2857
|
+
|
2858
|
+
return REG_OK;
|
2859
|
+
}
|
2860
|
+
|
2861
|
+
|
2862
|
+
/* Adds a transition from each position in `p1' to each position in `p2'. */
|
2863
|
+
static reg_errcode_t
|
2864
|
+
tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
|
2865
|
+
tre_tnfa_transition_t *transitions,
|
2866
|
+
int *counts, int *offs)
|
2867
|
+
{
|
2868
|
+
tre_pos_and_tags_t *orig_p2 = p2;
|
2869
|
+
tre_tnfa_transition_t *trans;
|
2870
|
+
int i, j, k, l, dup, prev_p2_pos;
|
2871
|
+
|
2872
|
+
if (transitions != NULL)
|
2873
|
+
while (p1->position >= 0)
|
2874
|
+
{
|
2875
|
+
p2 = orig_p2;
|
2876
|
+
prev_p2_pos = -1;
|
2877
|
+
while (p2->position >= 0)
|
2878
|
+
{
|
2879
|
+
/* Optimization: if this position was already handled, skip it. */
|
2880
|
+
if (p2->position == prev_p2_pos)
|
2881
|
+
{
|
2882
|
+
p2++;
|
2883
|
+
continue;
|
2884
|
+
}
|
2885
|
+
prev_p2_pos = p2->position;
|
2886
|
+
/* Set `trans' to point to the next unused transition from
|
2887
|
+
position `p1->position'. */
|
2888
|
+
trans = transitions + offs[p1->position];
|
2889
|
+
while (trans->state != NULL)
|
2890
|
+
{
|
2891
|
+
#if 0
|
2892
|
+
/* If we find a previous transition from `p1->position' to
|
2893
|
+
`p2->position', it is overwritten. This can happen only
|
2894
|
+
if there are nested loops in the regexp, like in "((a)*)*".
|
2895
|
+
In POSIX.2 repetition using the outer loop is always
|
2896
|
+
preferred over using the inner loop. Therefore the
|
2897
|
+
transition for the inner loop is useless and can be thrown
|
2898
|
+
away. */
|
2899
|
+
/* XXX - The same position is used for all nodes in a bracket
|
2900
|
+
expression, so this optimization cannot be used (it will
|
2901
|
+
break bracket expressions) unless I figure out a way to
|
2902
|
+
detect it here. */
|
2903
|
+
if (trans->state_id == p2->position)
|
2904
|
+
{
|
2905
|
+
break;
|
2906
|
+
}
|
2907
|
+
#endif
|
2908
|
+
trans++;
|
2909
|
+
}
|
2910
|
+
|
2911
|
+
if (trans->state == NULL)
|
2912
|
+
(trans + 1)->state = NULL;
|
2913
|
+
/* Use the character ranges, assertions, etc. from `p1' for
|
2914
|
+
the transition from `p1' to `p2'. */
|
2915
|
+
trans->code_min = p1->code_min;
|
2916
|
+
trans->code_max = p1->code_max;
|
2917
|
+
trans->state = transitions + offs[p2->position];
|
2918
|
+
trans->state_id = p2->position;
|
2919
|
+
trans->assertions = p1->assertions | p2->assertions
|
2920
|
+
| (p1->class ? ASSERT_CHAR_CLASS : 0)
|
2921
|
+
| (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
|
2922
|
+
if (p1->backref >= 0)
|
2923
|
+
{
|
2924
|
+
assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
|
2925
|
+
assert(p2->backref < 0);
|
2926
|
+
trans->u.backref = p1->backref;
|
2927
|
+
trans->assertions |= ASSERT_BACKREF;
|
2928
|
+
}
|
2929
|
+
else
|
2930
|
+
trans->u.class = p1->class;
|
2931
|
+
if (p1->neg_classes != NULL)
|
2932
|
+
{
|
2933
|
+
for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
|
2934
|
+
trans->neg_classes =
|
2935
|
+
xmalloc(sizeof(*trans->neg_classes) * (i + 1));
|
2936
|
+
if (trans->neg_classes == NULL)
|
2937
|
+
return REG_ESPACE;
|
2938
|
+
for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
|
2939
|
+
trans->neg_classes[i] = p1->neg_classes[i];
|
2940
|
+
trans->neg_classes[i] = (tre_ctype_t)0;
|
2941
|
+
}
|
2942
|
+
else
|
2943
|
+
trans->neg_classes = NULL;
|
2944
|
+
|
2945
|
+
/* Find out how many tags this transition has. */
|
2946
|
+
i = 0;
|
2947
|
+
if (p1->tags != NULL)
|
2948
|
+
while(p1->tags[i] >= 0)
|
2949
|
+
i++;
|
2950
|
+
j = 0;
|
2951
|
+
if (p2->tags != NULL)
|
2952
|
+
while(p2->tags[j] >= 0)
|
2953
|
+
j++;
|
2954
|
+
|
2955
|
+
/* If we are overwriting a transition, free the old tag array. */
|
2956
|
+
if (trans->tags != NULL)
|
2957
|
+
xfree(trans->tags);
|
2958
|
+
trans->tags = NULL;
|
2959
|
+
|
2960
|
+
/* If there were any tags, allocate an array and fill it. */
|
2961
|
+
if (i + j > 0)
|
2962
|
+
{
|
2963
|
+
trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
|
2964
|
+
if (!trans->tags)
|
2965
|
+
return REG_ESPACE;
|
2966
|
+
i = 0;
|
2967
|
+
if (p1->tags != NULL)
|
2968
|
+
while(p1->tags[i] >= 0)
|
2969
|
+
{
|
2970
|
+
trans->tags[i] = p1->tags[i];
|
2971
|
+
i++;
|
2972
|
+
}
|
2973
|
+
l = i;
|
2974
|
+
j = 0;
|
2975
|
+
if (p2->tags != NULL)
|
2976
|
+
while (p2->tags[j] >= 0)
|
2977
|
+
{
|
2978
|
+
/* Don't add duplicates. */
|
2979
|
+
dup = 0;
|
2980
|
+
for (k = 0; k < i; k++)
|
2981
|
+
if (trans->tags[k] == p2->tags[j])
|
2982
|
+
{
|
2983
|
+
dup = 1;
|
2984
|
+
break;
|
2985
|
+
}
|
2986
|
+
if (!dup)
|
2987
|
+
trans->tags[l++] = p2->tags[j];
|
2988
|
+
j++;
|
2989
|
+
}
|
2990
|
+
trans->tags[l] = -1;
|
2991
|
+
}
|
2992
|
+
|
2993
|
+
p2++;
|
2994
|
+
}
|
2995
|
+
p1++;
|
2996
|
+
}
|
2997
|
+
else
|
2998
|
+
/* Compute a maximum limit for the number of transitions leaving
|
2999
|
+
from each state. */
|
3000
|
+
while (p1->position >= 0)
|
3001
|
+
{
|
3002
|
+
p2 = orig_p2;
|
3003
|
+
while (p2->position >= 0)
|
3004
|
+
{
|
3005
|
+
counts[p1->position]++;
|
3006
|
+
p2++;
|
3007
|
+
}
|
3008
|
+
p1++;
|
3009
|
+
}
|
3010
|
+
return REG_OK;
|
3011
|
+
}
|
3012
|
+
|
3013
|
+
/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
|
3014
|
+
labelled with one character range (there are no transitions on empty
|
3015
|
+
strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
|
3016
|
+
the regexp. */
|
3017
|
+
static reg_errcode_t
|
3018
|
+
tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
|
3019
|
+
int *counts, int *offs)
|
3020
|
+
{
|
3021
|
+
tre_union_t *uni;
|
3022
|
+
tre_catenation_t *cat;
|
3023
|
+
tre_iteration_t *iter;
|
3024
|
+
reg_errcode_t errcode = REG_OK;
|
3025
|
+
|
3026
|
+
/* XXX - recurse using a stack!. */
|
3027
|
+
switch (node->type)
|
3028
|
+
{
|
3029
|
+
case LITERAL:
|
3030
|
+
break;
|
3031
|
+
case UNION:
|
3032
|
+
uni = (tre_union_t *)node->obj;
|
3033
|
+
errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
|
3034
|
+
if (errcode != REG_OK)
|
3035
|
+
return errcode;
|
3036
|
+
errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
|
3037
|
+
break;
|
3038
|
+
|
3039
|
+
case CATENATION:
|
3040
|
+
cat = (tre_catenation_t *)node->obj;
|
3041
|
+
/* Add a transition from each position in cat->left->lastpos
|
3042
|
+
to each position in cat->right->firstpos. */
|
3043
|
+
errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
|
3044
|
+
transitions, counts, offs);
|
3045
|
+
if (errcode != REG_OK)
|
3046
|
+
return errcode;
|
3047
|
+
errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
|
3048
|
+
if (errcode != REG_OK)
|
3049
|
+
return errcode;
|
3050
|
+
errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
|
3051
|
+
break;
|
3052
|
+
|
3053
|
+
case ITERATION:
|
3054
|
+
iter = (tre_iteration_t *)node->obj;
|
3055
|
+
assert(iter->max == -1 || iter->max == 1);
|
3056
|
+
|
3057
|
+
if (iter->max == -1)
|
3058
|
+
{
|
3059
|
+
assert(iter->min == 0 || iter->min == 1);
|
3060
|
+
/* Add a transition from each last position in the iterated
|
3061
|
+
expression to each first position. */
|
3062
|
+
errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
|
3063
|
+
transitions, counts, offs);
|
3064
|
+
if (errcode != REG_OK)
|
3065
|
+
return errcode;
|
3066
|
+
}
|
3067
|
+
errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
|
3068
|
+
break;
|
3069
|
+
}
|
3070
|
+
return errcode;
|
3071
|
+
}
|
3072
|
+
|
3073
|
+
|
3074
|
+
#define ERROR_EXIT(err) \
|
3075
|
+
do \
|
3076
|
+
{ \
|
3077
|
+
errcode = err; \
|
3078
|
+
if (/*CONSTCOND*/1) \
|
3079
|
+
goto error_exit; \
|
3080
|
+
} \
|
3081
|
+
while (/*CONSTCOND*/0)
|
3082
|
+
|
3083
|
+
|
3084
|
+
int
|
3085
|
+
regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
|
3086
|
+
{
|
3087
|
+
tre_stack_t *stack;
|
3088
|
+
tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
|
3089
|
+
tre_pos_and_tags_t *p;
|
3090
|
+
int *counts = NULL, *offs = NULL;
|
3091
|
+
int i, add = 0;
|
3092
|
+
tre_tnfa_transition_t *transitions, *initial;
|
3093
|
+
tre_tnfa_t *tnfa = NULL;
|
3094
|
+
tre_submatch_data_t *submatch_data;
|
3095
|
+
tre_tag_direction_t *tag_directions = NULL;
|
3096
|
+
reg_errcode_t errcode;
|
3097
|
+
tre_mem_t mem;
|
3098
|
+
|
3099
|
+
/* Parse context. */
|
3100
|
+
tre_parse_ctx_t parse_ctx;
|
3101
|
+
|
3102
|
+
/* Allocate a stack used throughout the compilation process for various
|
3103
|
+
purposes. */
|
3104
|
+
stack = tre_stack_new(512, 10240, 128);
|
3105
|
+
if (!stack)
|
3106
|
+
return REG_ESPACE;
|
3107
|
+
/* Allocate a fast memory allocator. */
|
3108
|
+
mem = tre_mem_new();
|
3109
|
+
if (!mem)
|
3110
|
+
{
|
3111
|
+
tre_stack_destroy(stack);
|
3112
|
+
return REG_ESPACE;
|
3113
|
+
}
|
3114
|
+
|
3115
|
+
/* Parse the regexp. */
|
3116
|
+
memset(&parse_ctx, 0, sizeof(parse_ctx));
|
3117
|
+
parse_ctx.mem = mem;
|
3118
|
+
parse_ctx.stack = stack;
|
3119
|
+
parse_ctx.re = regex;
|
3120
|
+
parse_ctx.cflags = cflags;
|
3121
|
+
parse_ctx.max_backref = -1;
|
3122
|
+
errcode = tre_parse(&parse_ctx);
|
3123
|
+
if (errcode != REG_OK)
|
3124
|
+
ERROR_EXIT(errcode);
|
3125
|
+
preg->re_nsub = parse_ctx.submatch_id - 1;
|
3126
|
+
tree = parse_ctx.result;
|
3127
|
+
|
3128
|
+
/* Back references and approximate matching cannot currently be used
|
3129
|
+
in the same regexp. */
|
3130
|
+
if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
|
3131
|
+
ERROR_EXIT(REG_BADPAT);
|
3132
|
+
|
3133
|
+
#ifdef TRE_DEBUG
|
3134
|
+
tre_ast_print(tree);
|
3135
|
+
#endif /* TRE_DEBUG */
|
3136
|
+
|
3137
|
+
/* Referring to nonexistent subexpressions is illegal. */
|
3138
|
+
if (parse_ctx.max_backref > (int)preg->re_nsub)
|
3139
|
+
ERROR_EXIT(REG_ESUBREG);
|
3140
|
+
|
3141
|
+
/* Allocate the TNFA struct. */
|
3142
|
+
tnfa = xcalloc(1, sizeof(tre_tnfa_t));
|
3143
|
+
if (tnfa == NULL)
|
3144
|
+
ERROR_EXIT(REG_ESPACE);
|
3145
|
+
tnfa->have_backrefs = parse_ctx.max_backref >= 0;
|
3146
|
+
tnfa->have_approx = parse_ctx.have_approx;
|
3147
|
+
tnfa->num_submatches = parse_ctx.submatch_id;
|
3148
|
+
|
3149
|
+
/* Set up tags for submatch addressing. If REG_NOSUB is set and the
|
3150
|
+
regexp does not have back references, this can be skipped. */
|
3151
|
+
if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
|
3152
|
+
{
|
3153
|
+
|
3154
|
+
/* Figure out how many tags we will need. */
|
3155
|
+
errcode = tre_add_tags(NULL, stack, tree, tnfa);
|
3156
|
+
if (errcode != REG_OK)
|
3157
|
+
ERROR_EXIT(errcode);
|
3158
|
+
|
3159
|
+
if (tnfa->num_tags > 0)
|
3160
|
+
{
|
3161
|
+
tag_directions = xmalloc(sizeof(*tag_directions)
|
3162
|
+
* (tnfa->num_tags + 1));
|
3163
|
+
if (tag_directions == NULL)
|
3164
|
+
ERROR_EXIT(REG_ESPACE);
|
3165
|
+
tnfa->tag_directions = tag_directions;
|
3166
|
+
memset(tag_directions, -1,
|
3167
|
+
sizeof(*tag_directions) * (tnfa->num_tags + 1));
|
3168
|
+
}
|
3169
|
+
tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
|
3170
|
+
sizeof(tnfa->minimal_tags));
|
3171
|
+
if (tnfa->minimal_tags == NULL)
|
3172
|
+
ERROR_EXIT(REG_ESPACE);
|
3173
|
+
|
3174
|
+
submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
|
3175
|
+
sizeof(*submatch_data));
|
3176
|
+
if (submatch_data == NULL)
|
3177
|
+
ERROR_EXIT(REG_ESPACE);
|
3178
|
+
tnfa->submatch_data = submatch_data;
|
3179
|
+
|
3180
|
+
errcode = tre_add_tags(mem, stack, tree, tnfa);
|
3181
|
+
if (errcode != REG_OK)
|
3182
|
+
ERROR_EXIT(errcode);
|
3183
|
+
|
3184
|
+
}
|
3185
|
+
|
3186
|
+
/* Expand iteration nodes. */
|
3187
|
+
errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
|
3188
|
+
tag_directions);
|
3189
|
+
if (errcode != REG_OK)
|
3190
|
+
ERROR_EXIT(errcode);
|
3191
|
+
|
3192
|
+
/* Add a dummy node for the final state.
|
3193
|
+
XXX - For certain patterns this dummy node can be optimized away,
|
3194
|
+
for example "a*" or "ab*". Figure out a simple way to detect
|
3195
|
+
this possibility. */
|
3196
|
+
tmp_ast_l = tree;
|
3197
|
+
tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
|
3198
|
+
if (tmp_ast_r == NULL)
|
3199
|
+
ERROR_EXIT(REG_ESPACE);
|
3200
|
+
|
3201
|
+
tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
|
3202
|
+
if (tree == NULL)
|
3203
|
+
ERROR_EXIT(REG_ESPACE);
|
3204
|
+
|
3205
|
+
errcode = tre_compute_nfl(mem, stack, tree);
|
3206
|
+
if (errcode != REG_OK)
|
3207
|
+
ERROR_EXIT(errcode);
|
3208
|
+
|
3209
|
+
counts = xmalloc(sizeof(int) * parse_ctx.position);
|
3210
|
+
if (counts == NULL)
|
3211
|
+
ERROR_EXIT(REG_ESPACE);
|
3212
|
+
|
3213
|
+
offs = xmalloc(sizeof(int) * parse_ctx.position);
|
3214
|
+
if (offs == NULL)
|
3215
|
+
ERROR_EXIT(REG_ESPACE);
|
3216
|
+
|
3217
|
+
for (i = 0; i < parse_ctx.position; i++)
|
3218
|
+
counts[i] = 0;
|
3219
|
+
tre_ast_to_tnfa(tree, NULL, counts, NULL);
|
3220
|
+
|
3221
|
+
add = 0;
|
3222
|
+
for (i = 0; i < parse_ctx.position; i++)
|
3223
|
+
{
|
3224
|
+
offs[i] = add;
|
3225
|
+
add += counts[i] + 1;
|
3226
|
+
counts[i] = 0;
|
3227
|
+
}
|
3228
|
+
transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
|
3229
|
+
if (transitions == NULL)
|
3230
|
+
ERROR_EXIT(REG_ESPACE);
|
3231
|
+
tnfa->transitions = transitions;
|
3232
|
+
tnfa->num_transitions = add;
|
3233
|
+
|
3234
|
+
errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
|
3235
|
+
if (errcode != REG_OK)
|
3236
|
+
ERROR_EXIT(errcode);
|
3237
|
+
|
3238
|
+
tnfa->firstpos_chars = NULL;
|
3239
|
+
|
3240
|
+
p = tree->firstpos;
|
3241
|
+
i = 0;
|
3242
|
+
while (p->position >= 0)
|
3243
|
+
{
|
3244
|
+
i++;
|
3245
|
+
p++;
|
3246
|
+
}
|
3247
|
+
|
3248
|
+
initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
|
3249
|
+
if (initial == NULL)
|
3250
|
+
ERROR_EXIT(REG_ESPACE);
|
3251
|
+
tnfa->initial = initial;
|
3252
|
+
|
3253
|
+
i = 0;
|
3254
|
+
for (p = tree->firstpos; p->position >= 0; p++)
|
3255
|
+
{
|
3256
|
+
initial[i].state = transitions + offs[p->position];
|
3257
|
+
initial[i].state_id = p->position;
|
3258
|
+
initial[i].tags = NULL;
|
3259
|
+
/* Copy the arrays p->tags, and p->params, they are allocated
|
3260
|
+
from a tre_mem object. */
|
3261
|
+
if (p->tags)
|
3262
|
+
{
|
3263
|
+
int j;
|
3264
|
+
for (j = 0; p->tags[j] >= 0; j++);
|
3265
|
+
initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
|
3266
|
+
if (!initial[i].tags)
|
3267
|
+
ERROR_EXIT(REG_ESPACE);
|
3268
|
+
memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
|
3269
|
+
}
|
3270
|
+
initial[i].assertions = p->assertions;
|
3271
|
+
i++;
|
3272
|
+
}
|
3273
|
+
initial[i].state = NULL;
|
3274
|
+
|
3275
|
+
tnfa->num_transitions = add;
|
3276
|
+
tnfa->final = transitions + offs[tree->lastpos[0].position];
|
3277
|
+
tnfa->num_states = parse_ctx.position;
|
3278
|
+
tnfa->cflags = cflags;
|
3279
|
+
|
3280
|
+
tre_mem_destroy(mem);
|
3281
|
+
tre_stack_destroy(stack);
|
3282
|
+
xfree(counts);
|
3283
|
+
xfree(offs);
|
3284
|
+
|
3285
|
+
preg->TRE_REGEX_T_FIELD = (void *)tnfa;
|
3286
|
+
return REG_OK;
|
3287
|
+
|
3288
|
+
error_exit:
|
3289
|
+
/* Free everything that was allocated and return the error code. */
|
3290
|
+
tre_mem_destroy(mem);
|
3291
|
+
if (stack != NULL)
|
3292
|
+
tre_stack_destroy(stack);
|
3293
|
+
if (counts != NULL)
|
3294
|
+
xfree(counts);
|
3295
|
+
if (offs != NULL)
|
3296
|
+
xfree(offs);
|
3297
|
+
preg->TRE_REGEX_T_FIELD = (void *)tnfa;
|
3298
|
+
regfree(preg);
|
3299
|
+
return errcode;
|
3300
|
+
}
|
3301
|
+
|
3302
|
+
|
3303
|
+
|
3304
|
+
|
3305
|
+
void
|
3306
|
+
regfree(regex_t *preg)
|
3307
|
+
{
|
3308
|
+
tre_tnfa_t *tnfa;
|
3309
|
+
unsigned int i;
|
3310
|
+
tre_tnfa_transition_t *trans;
|
3311
|
+
|
3312
|
+
tnfa = (void *)preg->TRE_REGEX_T_FIELD;
|
3313
|
+
if (!tnfa)
|
3314
|
+
return;
|
3315
|
+
|
3316
|
+
for (i = 0; i < tnfa->num_transitions; i++)
|
3317
|
+
if (tnfa->transitions[i].state)
|
3318
|
+
{
|
3319
|
+
if (tnfa->transitions[i].tags)
|
3320
|
+
xfree(tnfa->transitions[i].tags);
|
3321
|
+
if (tnfa->transitions[i].neg_classes)
|
3322
|
+
xfree(tnfa->transitions[i].neg_classes);
|
3323
|
+
}
|
3324
|
+
if (tnfa->transitions)
|
3325
|
+
xfree(tnfa->transitions);
|
3326
|
+
|
3327
|
+
if (tnfa->initial)
|
3328
|
+
{
|
3329
|
+
for (trans = tnfa->initial; trans->state; trans++)
|
3330
|
+
{
|
3331
|
+
if (trans->tags)
|
3332
|
+
xfree(trans->tags);
|
3333
|
+
}
|
3334
|
+
xfree(tnfa->initial);
|
3335
|
+
}
|
3336
|
+
|
3337
|
+
if (tnfa->submatch_data)
|
3338
|
+
{
|
3339
|
+
for (i = 0; i < tnfa->num_submatches; i++)
|
3340
|
+
if (tnfa->submatch_data[i].parents)
|
3341
|
+
xfree(tnfa->submatch_data[i].parents);
|
3342
|
+
xfree(tnfa->submatch_data);
|
3343
|
+
}
|
3344
|
+
|
3345
|
+
if (tnfa->tag_directions)
|
3346
|
+
xfree(tnfa->tag_directions);
|
3347
|
+
if (tnfa->firstpos_chars)
|
3348
|
+
xfree(tnfa->firstpos_chars);
|
3349
|
+
if (tnfa->minimal_tags)
|
3350
|
+
xfree(tnfa->minimal_tags);
|
3351
|
+
xfree(tnfa);
|
3352
|
+
}
|