webruby 0.2.2 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (245) hide show
  1. checksums.yaml +4 -4
  2. data/lib/webruby/config.rb +4 -9
  3. data/lib/webruby/rake/files.rake +2 -2
  4. data/modules/emscripten/AUTHORS +9 -1
  5. data/modules/emscripten/CONTRIBUTING.markdown +5 -0
  6. data/modules/emscripten/ChangeLog +435 -0
  7. data/modules/emscripten/cmake/Modules/FindOpenAL.cmake +26 -0
  8. data/modules/emscripten/cmake/Platform/Emscripten.cmake +9 -2
  9. data/modules/emscripten/em++ +0 -2
  10. data/modules/emscripten/emcc +92 -32
  11. data/modules/emscripten/emlink.py +16 -13
  12. data/modules/emscripten/emmake +1 -1
  13. data/modules/emscripten/emrun +918 -0
  14. data/modules/emscripten/emrun.bat +2 -0
  15. data/modules/emscripten/emscripten.py +545 -20
  16. data/modules/emscripten/src/analyzer.js +6 -1
  17. data/modules/emscripten/src/compiler.js +25 -16
  18. data/modules/emscripten/src/emrun_postjs.js +20 -0
  19. data/modules/emscripten/{tests → src}/hello_world.js +0 -0
  20. data/modules/emscripten/src/intertyper.js +45 -16
  21. data/modules/emscripten/src/jsifier.js +78 -48
  22. data/modules/emscripten/src/library.js +381 -96
  23. data/modules/emscripten/src/library_browser.js +50 -53
  24. data/modules/emscripten/src/library_egl.js +66 -24
  25. data/modules/emscripten/src/library_fs.js +122 -90
  26. data/modules/emscripten/src/library_gl.js +739 -353
  27. data/modules/emscripten/src/library_glfw.js +9 -3
  28. data/modules/emscripten/src/library_glut.js +10 -5
  29. data/modules/emscripten/src/library_idbfs.js +14 -14
  30. data/modules/emscripten/src/library_memfs.js +65 -41
  31. data/modules/emscripten/src/library_nodefs.js +61 -9
  32. data/modules/emscripten/src/library_openal.js +4 -4
  33. data/modules/emscripten/src/library_path.js +9 -13
  34. data/modules/emscripten/src/library_sdl.js +301 -64
  35. data/modules/emscripten/src/library_sockfs.js +7 -5
  36. data/modules/emscripten/src/modules.js +62 -22
  37. data/modules/emscripten/src/parseTools.js +135 -102
  38. data/modules/emscripten/src/postamble.js +3 -4
  39. data/modules/emscripten/src/preamble.js +49 -29
  40. data/modules/emscripten/src/proxyClient.js +1 -1
  41. data/modules/emscripten/src/proxyWorker.js +10 -10
  42. data/modules/emscripten/src/relooper/Relooper.cpp +15 -4
  43. data/modules/emscripten/src/runtime.js +32 -8
  44. data/modules/emscripten/src/settings.js +25 -8
  45. data/modules/emscripten/src/shell.html +6 -3
  46. data/modules/emscripten/src/shell.js +13 -11
  47. data/modules/emscripten/src/simd.js +602 -432
  48. data/modules/emscripten/src/struct_info.json +22 -2
  49. data/modules/emscripten/src/utility.js +32 -17
  50. data/modules/emscripten/system/include/SDL/SDL_events.h +1 -0
  51. data/modules/emscripten/system/include/compat/ctype.h +17 -0
  52. data/modules/emscripten/system/include/compat/wchar.h +23 -0
  53. data/modules/emscripten/system/include/compat/wctype.h +23 -0
  54. data/modules/emscripten/system/include/emscripten/emmintrin.h +87 -0
  55. data/modules/emscripten/system/include/emscripten/emscripten.h +30 -4
  56. data/modules/emscripten/system/include/emscripten/vector.h +29 -1
  57. data/modules/emscripten/system/include/emscripten/xmmintrin.h +131 -0
  58. data/modules/emscripten/system/include/libcxx/CREDITS.TXT +9 -1
  59. data/modules/emscripten/system/include/libcxx/__bit_reference +8 -8
  60. data/modules/emscripten/system/include/libcxx/__config +95 -17
  61. data/modules/emscripten/system/include/libcxx/__debug +25 -4
  62. data/modules/emscripten/system/include/libcxx/__functional_03 +7 -7
  63. data/modules/emscripten/system/include/libcxx/__functional_base +169 -9
  64. data/modules/emscripten/system/include/libcxx/__functional_base_03 +1 -1
  65. data/modules/emscripten/system/include/libcxx/__hash_table +25 -25
  66. data/modules/emscripten/system/include/libcxx/__locale +21 -19
  67. data/modules/emscripten/system/include/libcxx/__mutex_base +2 -33
  68. data/modules/emscripten/system/include/libcxx/__split_buffer +9 -9
  69. data/modules/emscripten/system/include/libcxx/__std_stream +14 -0
  70. data/modules/emscripten/system/include/libcxx/__tree +35 -26
  71. data/modules/emscripten/system/include/libcxx/__tuple +15 -15
  72. data/modules/emscripten/system/include/libcxx/__tuple_03 +2 -2
  73. data/modules/emscripten/system/include/libcxx/__undef_min_max +8 -0
  74. data/modules/emscripten/system/include/libcxx/algorithm +121 -110
  75. data/modules/emscripten/system/include/libcxx/array +15 -15
  76. data/modules/emscripten/system/include/libcxx/bitset +4 -4
  77. data/modules/emscripten/system/include/libcxx/chrono +51 -17
  78. data/modules/emscripten/system/include/libcxx/cmath +25 -23
  79. data/modules/emscripten/system/include/libcxx/codecvt +21 -18
  80. data/modules/emscripten/system/include/libcxx/complex +48 -7
  81. data/modules/emscripten/system/include/libcxx/cstddef +1 -1
  82. data/modules/emscripten/system/include/libcxx/cstdio +8 -1
  83. data/modules/emscripten/system/include/libcxx/cstdlib +1 -1
  84. data/modules/emscripten/system/include/libcxx/cwchar +1 -1
  85. data/modules/emscripten/system/include/libcxx/deque +26 -12
  86. data/modules/emscripten/system/include/libcxx/dynarray +311 -0
  87. data/modules/emscripten/system/include/libcxx/exception +4 -4
  88. data/modules/emscripten/system/include/libcxx/ext/__hash +3 -3
  89. data/modules/emscripten/system/include/libcxx/ext/hash_map +19 -15
  90. data/modules/emscripten/system/include/libcxx/ext/hash_set +7 -3
  91. data/modules/emscripten/system/include/libcxx/forward_list +33 -7
  92. data/modules/emscripten/system/include/libcxx/fstream +4 -4
  93. data/modules/emscripten/system/include/libcxx/functional +200 -170
  94. data/modules/emscripten/system/include/libcxx/future +83 -39
  95. data/modules/emscripten/system/include/libcxx/initializer_list +24 -11
  96. data/modules/emscripten/system/include/libcxx/iomanip +147 -0
  97. data/modules/emscripten/system/include/libcxx/ios +24 -16
  98. data/modules/emscripten/system/include/libcxx/iosfwd +19 -19
  99. data/modules/emscripten/system/include/libcxx/istream +13 -8
  100. data/modules/emscripten/system/include/libcxx/iterator +108 -417
  101. data/modules/emscripten/system/include/libcxx/limits +8 -4
  102. data/modules/emscripten/system/include/libcxx/list +28 -8
  103. data/modules/emscripten/system/include/libcxx/locale +153 -390
  104. data/modules/emscripten/system/include/libcxx/map +280 -100
  105. data/modules/emscripten/system/include/libcxx/memory +49 -97
  106. data/modules/emscripten/system/include/libcxx/mutex +2 -2
  107. data/modules/emscripten/system/include/libcxx/new +43 -14
  108. data/modules/emscripten/system/include/libcxx/numeric +2 -2
  109. data/modules/emscripten/system/include/libcxx/optional +697 -0
  110. data/modules/emscripten/system/include/libcxx/ostream +17 -8
  111. data/modules/emscripten/system/include/libcxx/queue +5 -5
  112. data/modules/emscripten/system/include/libcxx/random +53 -51
  113. data/modules/emscripten/system/include/libcxx/ratio +11 -11
  114. data/modules/emscripten/system/include/libcxx/readme.txt +1 -1
  115. data/modules/emscripten/system/include/libcxx/regex +23 -20
  116. data/modules/emscripten/system/include/libcxx/scoped_allocator +1 -1
  117. data/modules/emscripten/system/include/libcxx/set +166 -2
  118. data/modules/emscripten/system/include/libcxx/shared_mutex +419 -0
  119. data/modules/emscripten/system/include/libcxx/sstream +4 -4
  120. data/modules/emscripten/system/include/libcxx/stack +3 -3
  121. data/modules/emscripten/system/include/libcxx/streambuf +5 -5
  122. data/modules/emscripten/system/include/libcxx/string +372 -324
  123. data/modules/emscripten/system/include/libcxx/support/ibm/limits.h +99 -0
  124. data/modules/emscripten/system/include/libcxx/support/ibm/support.h +54 -0
  125. data/modules/emscripten/system/include/libcxx/support/ibm/xlocale.h +326 -0
  126. data/modules/emscripten/system/include/libcxx/support/win32/limits_win32.h +6 -6
  127. data/modules/emscripten/system/include/libcxx/support/win32/locale_win32.h +15 -15
  128. data/modules/emscripten/system/include/libcxx/support/win32/math_win32.h +2 -0
  129. data/modules/emscripten/system/include/libcxx/support/win32/support.h +6 -1
  130. data/modules/emscripten/system/include/libcxx/system_error +14 -8
  131. data/modules/emscripten/system/include/libcxx/thread +7 -8
  132. data/modules/emscripten/system/include/libcxx/tuple +29 -88
  133. data/modules/emscripten/system/include/libcxx/type_traits +253 -209
  134. data/modules/emscripten/system/include/libcxx/typeindex +3 -3
  135. data/modules/emscripten/system/include/libcxx/unordered_map +162 -101
  136. data/modules/emscripten/system/include/libcxx/unordered_set +79 -2
  137. data/modules/emscripten/system/include/libcxx/utility +20 -20
  138. data/modules/emscripten/system/include/libcxx/valarray +23 -23
  139. data/modules/emscripten/system/include/libcxx/vector +114 -91
  140. data/modules/emscripten/system/lib/libc/musl/src/regex/regcomp.c +3352 -0
  141. data/modules/emscripten/system/lib/libc/musl/src/regex/regerror.c +35 -0
  142. data/modules/emscripten/system/lib/libc/musl/src/regex/regexec.c +1011 -0
  143. data/modules/emscripten/system/lib/libc/musl/src/regex/tre-mem.c +158 -0
  144. data/modules/emscripten/system/lib/libc/musl/src/regex/tre.h +231 -0
  145. data/modules/emscripten/system/lib/libcextra.symbols +7 -0
  146. data/modules/emscripten/system/lib/libcxx/CREDITS.TXT +9 -1
  147. data/modules/emscripten/system/lib/libcxx/algorithm.cpp +1 -0
  148. data/modules/emscripten/system/lib/libcxx/debug.cpp +66 -42
  149. data/modules/emscripten/system/lib/libcxx/exception.cpp +88 -16
  150. data/modules/emscripten/system/lib/libcxx/future.cpp +6 -0
  151. data/modules/emscripten/system/lib/libcxx/ios.cpp +7 -2
  152. data/modules/emscripten/system/lib/libcxx/iostream.cpp +8 -8
  153. data/modules/emscripten/system/lib/libcxx/locale.cpp +38 -11
  154. data/modules/emscripten/system/lib/libcxx/mutex.cpp +3 -0
  155. data/modules/emscripten/system/lib/libcxx/new.cpp +44 -10
  156. data/modules/emscripten/system/lib/libcxx/optional.cpp +25 -0
  157. data/modules/emscripten/system/lib/libcxx/random.cpp +26 -0
  158. data/modules/emscripten/system/lib/libcxx/readme.txt +1 -1
  159. data/modules/emscripten/system/lib/libcxx/shared_mutex.cpp +101 -0
  160. data/modules/emscripten/system/lib/libcxx/stdexcept.cpp +11 -7
  161. data/modules/emscripten/system/lib/libcxx/string.cpp +3 -1
  162. data/modules/emscripten/system/lib/libcxx/strstream.cpp +7 -7
  163. data/modules/emscripten/system/lib/libcxx/support/win32/locale_win32.cpp +12 -13
  164. data/modules/emscripten/system/lib/libcxx/support/win32/support.cpp +33 -36
  165. data/modules/emscripten/system/lib/libcxx/symbols +187 -168
  166. data/modules/emscripten/system/lib/libcxx/system_error.cpp +1 -0
  167. data/modules/emscripten/system/lib/libcxx/thread.cpp +7 -3
  168. data/modules/emscripten/system/lib/libcxx/typeinfo.cpp +9 -6
  169. data/modules/emscripten/system/lib/libcxx/valarray.cpp +2 -0
  170. data/modules/emscripten/third_party/lzma.js/doit.bat +4 -0
  171. data/modules/emscripten/third_party/lzma.js/doit.sh +9 -2
  172. data/modules/emscripten/tools/cache.py +5 -7
  173. data/modules/emscripten/tools/cache.pyc +0 -0
  174. data/modules/emscripten/tools/eliminator/asm-eliminator-test-output.js +7 -0
  175. data/modules/emscripten/tools/eliminator/asm-eliminator-test.js +9 -1
  176. data/modules/emscripten/tools/eliminator/eliminator-test-output.js +3 -0
  177. data/modules/emscripten/tools/eliminator/eliminator-test.js +9 -1
  178. data/modules/emscripten/tools/file_packager.py +93 -50
  179. data/modules/emscripten/tools/js-optimizer.js +98 -48
  180. data/modules/emscripten/tools/js_optimizer.py +4 -4
  181. data/modules/emscripten/tools/js_optimizer.pyc +0 -0
  182. data/modules/emscripten/tools/jsrun.py +1 -1
  183. data/modules/emscripten/tools/jsrun.pyc +0 -0
  184. data/modules/emscripten/tools/response_file.py +6 -0
  185. data/modules/emscripten/tools/response_file.pyc +0 -0
  186. data/modules/emscripten/tools/settings_template_readonly.py +2 -0
  187. data/modules/emscripten/tools/shared.py +88 -34
  188. data/modules/emscripten/tools/shared.pyc +0 -0
  189. data/modules/emscripten/tools/split.py +21 -13
  190. data/modules/mruby/build_config.rb +7 -1
  191. data/modules/mruby/doc/compile/README.md +5 -9
  192. data/modules/mruby/include/mrbconf.h +5 -2
  193. data/modules/mruby/include/mruby/array.h +1 -0
  194. data/modules/mruby/include/mruby/compile.h +2 -4
  195. data/modules/mruby/include/mruby/dump.h +7 -16
  196. data/modules/mruby/include/mruby/hash.h +1 -1
  197. data/modules/mruby/include/mruby/irep.h +14 -2
  198. data/modules/mruby/include/mruby/khash.h +8 -7
  199. data/modules/mruby/include/mruby/string.h +1 -0
  200. data/modules/mruby/include/mruby/value.h +5 -2
  201. data/modules/mruby/include/mruby.h +12 -13
  202. data/modules/mruby/mrbgems/mruby-bin-mirb/tools/mirb/mirb.c +16 -6
  203. data/modules/mruby/mrbgems/mruby-bin-mruby/tools/mruby/mruby.c +18 -30
  204. data/modules/mruby/mrbgems/mruby-fiber/src/fiber.c +21 -0
  205. data/modules/mruby/mrbgems/mruby-math/src/math.c +1 -1
  206. data/modules/mruby/mrbgems/mruby-random/src/random.c +144 -47
  207. data/modules/mruby/mrbgems/mruby-random/test/random.rb +44 -0
  208. data/modules/mruby/mrbgems/mruby-struct/src/struct.c +5 -5
  209. data/modules/mruby/mrblib/numeric.rb +99 -33
  210. data/modules/mruby/src/array.c +11 -4
  211. data/modules/mruby/src/backtrace.c +2 -2
  212. data/modules/mruby/src/class.c +49 -30
  213. data/modules/mruby/src/codegen.c +131 -79
  214. data/modules/mruby/src/debug.c +1 -1
  215. data/modules/mruby/src/dump.c +213 -163
  216. data/modules/mruby/src/error.c +17 -17
  217. data/modules/mruby/src/error.h +1 -1
  218. data/modules/mruby/src/etc.c +10 -0
  219. data/modules/mruby/src/gc.c +35 -17
  220. data/modules/mruby/src/hash.c +5 -5
  221. data/modules/mruby/src/kernel.c +36 -14
  222. data/modules/mruby/src/load.c +238 -296
  223. data/modules/mruby/src/numeric.c +18 -98
  224. data/modules/mruby/src/object.c +3 -5
  225. data/modules/mruby/src/parse.y +63 -56
  226. data/modules/mruby/src/proc.c +8 -5
  227. data/modules/mruby/src/re.h +0 -1
  228. data/modules/mruby/src/state.c +65 -27
  229. data/modules/mruby/src/string.c +3 -31
  230. data/modules/mruby/src/symbol.c +3 -3
  231. data/modules/mruby/src/variable.c +12 -5
  232. data/modules/mruby/src/vm.c +90 -72
  233. data/modules/mruby/tasks/mruby_build.rake +10 -1
  234. data/modules/mruby/tasks/toolchains/gcc.rake +12 -2
  235. data/modules/mruby/tasks/toolchains/{vs2012.rake → visualcpp.rake} +1 -1
  236. data/modules/mruby/test/driver.c +3 -3
  237. data/modules/mruby/test/t/array.rb +5 -5
  238. data/modules/mruby/test/t/class.rb +14 -1
  239. data/modules/mruby/test/t/kernel.rb +4 -0
  240. data/modules/mruby/test/t/module.rb +4 -4
  241. data/modules/mruby/test/t/nameerror.rb +1 -1
  242. data/modules/mruby/tools/mrbc/mrbc.c +23 -17
  243. data/modules/mruby/travis_config.rb +10 -1
  244. metadata +28 -5
  245. data/modules/mruby/tasks/toolchains/vs2010.rake +0 -3
@@ -0,0 +1,3352 @@
1
+ /*
2
+ regcomp.c - TRE POSIX compatible regex compilation functions.
3
+
4
+ Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5
+ All rights reserved.
6
+
7
+ Redistribution and use in source and binary forms, with or without
8
+ modification, are permitted provided that the following conditions
9
+ are met:
10
+
11
+ 1. Redistributions of source code must retain the above copyright
12
+ notice, this list of conditions and the following disclaimer.
13
+
14
+ 2. Redistributions in binary form must reproduce the above copyright
15
+ notice, this list of conditions and the following disclaimer in the
16
+ documentation and/or other materials provided with the distribution.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ */
31
+
32
+ #include <string.h>
33
+ #include <errno.h>
34
+ #include <stdlib.h>
35
+ #include <regex.h>
36
+ #include <limits.h>
37
+ #include <stdint.h>
38
+
39
+ #include "tre.h"
40
+
41
+ #include <assert.h>
42
+
43
+ /***********************************************************************
44
+ from tre-compile.h
45
+ ***********************************************************************/
46
+
47
+ typedef struct {
48
+ int position;
49
+ int code_min;
50
+ int code_max;
51
+ int *tags;
52
+ int assertions;
53
+ tre_ctype_t class;
54
+ tre_ctype_t *neg_classes;
55
+ int backref;
56
+ } tre_pos_and_tags_t;
57
+
58
+
59
+ /***********************************************************************
60
+ from tre-ast.c and tre-ast.h
61
+ ***********************************************************************/
62
+
63
+ /* The different AST node types. */
64
+ typedef enum {
65
+ LITERAL,
66
+ CATENATION,
67
+ ITERATION,
68
+ UNION
69
+ } tre_ast_type_t;
70
+
71
+ /* Special subtypes of TRE_LITERAL. */
72
+ #define EMPTY -1 /* Empty leaf (denotes empty string). */
73
+ #define ASSERTION -2 /* Assertion leaf. */
74
+ #define TAG -3 /* Tag leaf. */
75
+ #define BACKREF -4 /* Back reference leaf. */
76
+
77
+ #define IS_SPECIAL(x) ((x)->code_min < 0)
78
+ #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79
+ #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80
+ #define IS_TAG(x) ((x)->code_min == TAG)
81
+ #define IS_BACKREF(x) ((x)->code_min == BACKREF)
82
+
83
+
84
+ /* A generic AST node. All AST nodes consist of this node on the top
85
+ level with `obj' pointing to the actual content. */
86
+ typedef struct {
87
+ tre_ast_type_t type; /* Type of the node. */
88
+ void *obj; /* Pointer to actual node. */
89
+ int nullable;
90
+ int submatch_id;
91
+ int num_submatches;
92
+ int num_tags;
93
+ tre_pos_and_tags_t *firstpos;
94
+ tre_pos_and_tags_t *lastpos;
95
+ } tre_ast_node_t;
96
+
97
+
98
+ /* A "literal" node. These are created for assertions, back references,
99
+ tags, matching parameter settings, and all expressions that match one
100
+ character. */
101
+ typedef struct {
102
+ long code_min;
103
+ long code_max;
104
+ int position;
105
+ tre_ctype_t class;
106
+ tre_ctype_t *neg_classes;
107
+ } tre_literal_t;
108
+
109
+ /* A "catenation" node. These are created when two regexps are concatenated.
110
+ If there are more than one subexpressions in sequence, the `left' part
111
+ holds all but the last, and `right' part holds the last subexpression
112
+ (catenation is left associative). */
113
+ typedef struct {
114
+ tre_ast_node_t *left;
115
+ tre_ast_node_t *right;
116
+ } tre_catenation_t;
117
+
118
+ /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
119
+ operators. */
120
+ typedef struct {
121
+ /* Subexpression to match. */
122
+ tre_ast_node_t *arg;
123
+ /* Minimum number of consecutive matches. */
124
+ int min;
125
+ /* Maximum number of consecutive matches. */
126
+ int max;
127
+ /* If 0, match as many characters as possible, if 1 match as few as
128
+ possible. Note that this does not always mean the same thing as
129
+ matching as many/few repetitions as possible. */
130
+ unsigned int minimal:1;
131
+ } tre_iteration_t;
132
+
133
+ /* An "union" node. These are created for the "|" operator. */
134
+ typedef struct {
135
+ tre_ast_node_t *left;
136
+ tre_ast_node_t *right;
137
+ } tre_union_t;
138
+
139
+ static tre_ast_node_t *
140
+ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size);
141
+
142
+ static tre_ast_node_t *
143
+ tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position);
144
+
145
+ static tre_ast_node_t *
146
+ tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
147
+ int minimal);
148
+
149
+ static tre_ast_node_t *
150
+ tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right);
151
+
152
+ static tre_ast_node_t *
153
+ tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
154
+ tre_ast_node_t *right);
155
+
156
+
157
+ static tre_ast_node_t *
158
+ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
159
+ {
160
+ tre_ast_node_t *node;
161
+
162
+ node = tre_mem_calloc(mem, sizeof(*node));
163
+ if (!node)
164
+ return NULL;
165
+ node->obj = tre_mem_calloc(mem, size);
166
+ if (!node->obj)
167
+ return NULL;
168
+ node->type = type;
169
+ node->nullable = -1;
170
+ node->submatch_id = -1;
171
+
172
+ return node;
173
+ }
174
+
175
+ static tre_ast_node_t *
176
+ tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
177
+ {
178
+ tre_ast_node_t *node;
179
+ tre_literal_t *lit;
180
+
181
+ node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
182
+ if (!node)
183
+ return NULL;
184
+ lit = node->obj;
185
+ lit->code_min = code_min;
186
+ lit->code_max = code_max;
187
+ lit->position = position;
188
+
189
+ return node;
190
+ }
191
+
192
+ static tre_ast_node_t *
193
+ tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
194
+ int minimal)
195
+ {
196
+ tre_ast_node_t *node;
197
+ tre_iteration_t *iter;
198
+
199
+ node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
200
+ if (!node)
201
+ return NULL;
202
+ iter = node->obj;
203
+ iter->arg = arg;
204
+ iter->min = min;
205
+ iter->max = max;
206
+ iter->minimal = minimal;
207
+ node->num_submatches = arg->num_submatches;
208
+
209
+ return node;
210
+ }
211
+
212
+ static tre_ast_node_t *
213
+ tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
214
+ {
215
+ tre_ast_node_t *node;
216
+
217
+ node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
218
+ if (node == NULL)
219
+ return NULL;
220
+ ((tre_union_t *)node->obj)->left = left;
221
+ ((tre_union_t *)node->obj)->right = right;
222
+ node->num_submatches = left->num_submatches + right->num_submatches;
223
+
224
+ return node;
225
+ }
226
+
227
+ static tre_ast_node_t *
228
+ tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
229
+ tre_ast_node_t *right)
230
+ {
231
+ tre_ast_node_t *node;
232
+
233
+ node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
234
+ if (node == NULL)
235
+ return NULL;
236
+ ((tre_catenation_t *)node->obj)->left = left;
237
+ ((tre_catenation_t *)node->obj)->right = right;
238
+ node->num_submatches = left->num_submatches + right->num_submatches;
239
+
240
+ return node;
241
+ }
242
+
243
+
244
+ /***********************************************************************
245
+ from tre-stack.c and tre-stack.h
246
+ ***********************************************************************/
247
+
248
+ typedef struct tre_stack_rec tre_stack_t;
249
+
250
+ /* Creates a new stack object. `size' is initial size in bytes, `max_size'
251
+ is maximum size, and `increment' specifies how much more space will be
252
+ allocated with realloc() if all space gets used up. Returns the stack
253
+ object or NULL if out of memory. */
254
+ static tre_stack_t *
255
+ tre_stack_new(int size, int max_size, int increment);
256
+
257
+ /* Frees the stack object. */
258
+ static void
259
+ tre_stack_destroy(tre_stack_t *s);
260
+
261
+ /* Returns the current number of objects in the stack. */
262
+ static int
263
+ tre_stack_num_objects(tre_stack_t *s);
264
+
265
+ /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
266
+ `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
267
+ This tries to realloc() more space before failing if maximum size
268
+ has not yet been reached. Returns REG_OK if successful. */
269
+ #define declare_pushf(typetag, type) \
270
+ static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
271
+
272
+ declare_pushf(voidptr, void *);
273
+ declare_pushf(int, int);
274
+
275
+ /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
276
+ element off of stack `s' and returns it. The stack must not be
277
+ empty. */
278
+ #define declare_popf(typetag, type) \
279
+ static type tre_stack_pop_ ## typetag(tre_stack_t *s)
280
+
281
+ declare_popf(voidptr, void *);
282
+ declare_popf(int, int);
283
+
284
+ /* Just to save some typing. */
285
+ #define STACK_PUSH(s, typetag, value) \
286
+ do \
287
+ { \
288
+ status = tre_stack_push_ ## typetag(s, value); \
289
+ } \
290
+ while (/*CONSTCOND*/0)
291
+
292
+ #define STACK_PUSHX(s, typetag, value) \
293
+ { \
294
+ status = tre_stack_push_ ## typetag(s, value); \
295
+ if (status != REG_OK) \
296
+ break; \
297
+ }
298
+
299
+ #define STACK_PUSHR(s, typetag, value) \
300
+ { \
301
+ reg_errcode_t _status; \
302
+ _status = tre_stack_push_ ## typetag(s, value); \
303
+ if (_status != REG_OK) \
304
+ return _status; \
305
+ }
306
+
307
+ union tre_stack_item {
308
+ void *voidptr_value;
309
+ int int_value;
310
+ };
311
+
312
+ struct tre_stack_rec {
313
+ int size;
314
+ int max_size;
315
+ int increment;
316
+ int ptr;
317
+ union tre_stack_item *stack;
318
+ };
319
+
320
+
321
+ static tre_stack_t *
322
+ tre_stack_new(int size, int max_size, int increment)
323
+ {
324
+ tre_stack_t *s;
325
+
326
+ s = xmalloc(sizeof(*s));
327
+ if (s != NULL)
328
+ {
329
+ s->stack = xmalloc(sizeof(*s->stack) * size);
330
+ if (s->stack == NULL)
331
+ {
332
+ xfree(s);
333
+ return NULL;
334
+ }
335
+ s->size = size;
336
+ s->max_size = max_size;
337
+ s->increment = increment;
338
+ s->ptr = 0;
339
+ }
340
+ return s;
341
+ }
342
+
343
+ static void
344
+ tre_stack_destroy(tre_stack_t *s)
345
+ {
346
+ xfree(s->stack);
347
+ xfree(s);
348
+ }
349
+
350
+ static int
351
+ tre_stack_num_objects(tre_stack_t *s)
352
+ {
353
+ return s->ptr;
354
+ }
355
+
356
+ static reg_errcode_t
357
+ tre_stack_push(tre_stack_t *s, union tre_stack_item value)
358
+ {
359
+ if (s->ptr < s->size)
360
+ {
361
+ s->stack[s->ptr] = value;
362
+ s->ptr++;
363
+ }
364
+ else
365
+ {
366
+ if (s->size >= s->max_size)
367
+ {
368
+ return REG_ESPACE;
369
+ }
370
+ else
371
+ {
372
+ union tre_stack_item *new_buffer;
373
+ int new_size;
374
+ new_size = s->size + s->increment;
375
+ if (new_size > s->max_size)
376
+ new_size = s->max_size;
377
+ new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
378
+ if (new_buffer == NULL)
379
+ {
380
+ return REG_ESPACE;
381
+ }
382
+ assert(new_size > s->size);
383
+ s->size = new_size;
384
+ s->stack = new_buffer;
385
+ tre_stack_push(s, value);
386
+ }
387
+ }
388
+ return REG_OK;
389
+ }
390
+
391
+ #define define_pushf(typetag, type) \
392
+ declare_pushf(typetag, type) { \
393
+ union tre_stack_item item; \
394
+ item.typetag ## _value = value; \
395
+ return tre_stack_push(s, item); \
396
+ }
397
+
398
+ define_pushf(int, int)
399
+ define_pushf(voidptr, void *)
400
+
401
+ #define define_popf(typetag, type) \
402
+ declare_popf(typetag, type) { \
403
+ return s->stack[--s->ptr].typetag ## _value; \
404
+ }
405
+
406
+ define_popf(int, int)
407
+ define_popf(voidptr, void *)
408
+
409
+
410
+ /***********************************************************************
411
+ from tre-parse.c and tre-parse.h
412
+ ***********************************************************************/
413
+
414
+ /* Parse context. */
415
+ typedef struct {
416
+ /* Memory allocator. The AST is allocated using this. */
417
+ tre_mem_t mem;
418
+ /* Stack used for keeping track of regexp syntax. */
419
+ tre_stack_t *stack;
420
+ /* The parse result. */
421
+ tre_ast_node_t *result;
422
+ /* The regexp to parse and its length. */
423
+ const char *re;
424
+ /* The first character of the entire regexp. */
425
+ const char *re_start;
426
+ /* Current submatch ID. */
427
+ int submatch_id;
428
+ /* Current position (number of literal). */
429
+ int position;
430
+ /* The highest back reference or -1 if none seen so far. */
431
+ int max_backref;
432
+ /* This flag is set if the regexp uses approximate matching. */
433
+ int have_approx;
434
+ /* Compilation flags. */
435
+ int cflags;
436
+ /* If this flag is set the top-level submatch is not captured. */
437
+ int nofirstsub;
438
+ } tre_parse_ctx_t;
439
+
440
+ /* Parses a wide character regexp pattern into a syntax tree. This parser
441
+ handles both syntaxes (BRE and ERE), including the TRE extensions. */
442
+ static reg_errcode_t
443
+ tre_parse(tre_parse_ctx_t *ctx);
444
+
445
+
446
+ /*
447
+ This parser is just a simple recursive descent parser for POSIX.2
448
+ regexps. The parser supports both the obsolete default syntax and
449
+ the "extended" syntax, and some nonstandard extensions.
450
+ */
451
+
452
+ /* Characters with special meanings in regexp syntax. */
453
+ #define CHAR_PIPE '|'
454
+ #define CHAR_LPAREN '('
455
+ #define CHAR_RPAREN ')'
456
+ #define CHAR_LBRACE '{'
457
+ #define CHAR_RBRACE '}'
458
+ #define CHAR_LBRACKET '['
459
+ #define CHAR_RBRACKET ']'
460
+ #define CHAR_MINUS '-'
461
+ #define CHAR_STAR '*'
462
+ #define CHAR_QUESTIONMARK '?'
463
+ #define CHAR_PLUS '+'
464
+ #define CHAR_PERIOD '.'
465
+ #define CHAR_COLON ':'
466
+ #define CHAR_EQUAL '='
467
+ #define CHAR_COMMA ','
468
+ #define CHAR_CARET '^'
469
+ #define CHAR_DOLLAR '$'
470
+ #define CHAR_BACKSLASH '\\'
471
+ #define CHAR_HASH '#'
472
+ #define CHAR_TILDE '~'
473
+
474
+
475
+ /* Some macros for expanding \w, \s, etc. */
476
+ static const struct tre_macro_struct {
477
+ const char c;
478
+ const char *expansion;
479
+ } tre_macros[] =
480
+ { {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
481
+ {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
482
+ {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
483
+ {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
484
+ { 0, NULL }
485
+ };
486
+
487
+
488
+ /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
489
+ must have at least `len' items. Sets buf[0] to zero if the there
490
+ is no match in `tre_macros'. */
491
+ static const char *
492
+ tre_expand_macro(const char *regex)
493
+ {
494
+ int i;
495
+
496
+ if (!*regex)
497
+ return 0;
498
+
499
+ for (i = 0; tre_macros[i].expansion && tre_macros[i].c != *regex; i++);
500
+ return tre_macros[i].expansion;
501
+ }
502
+
503
+ static reg_errcode_t
504
+ tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i,
505
+ tre_ast_node_t ***items)
506
+ {
507
+ reg_errcode_t status;
508
+ tre_ast_node_t **array = *items;
509
+ /* Allocate more space if necessary. */
510
+ if (*i >= *max_i)
511
+ {
512
+ tre_ast_node_t **new_items;
513
+ /* If the array is already 1024 items large, give up -- there's
514
+ probably an error in the regexp (e.g. not a '\0' terminated
515
+ string and missing ']') */
516
+ if (*max_i > 1024)
517
+ return REG_ESPACE;
518
+ *max_i *= 2;
519
+ new_items = xrealloc(array, sizeof(*items) * *max_i);
520
+ if (new_items == NULL)
521
+ return REG_ESPACE;
522
+ *items = array = new_items;
523
+ }
524
+ array[*i] = tre_ast_new_literal(mem, min, max, -1);
525
+ status = array[*i] == NULL ? REG_ESPACE : REG_OK;
526
+ (*i)++;
527
+ return status;
528
+ }
529
+
530
+
531
+ static int
532
+ tre_compare_items(const void *a, const void *b)
533
+ {
534
+ const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
535
+ const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
536
+ tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
537
+ int a_min = l_a->code_min, b_min = l_b->code_min;
538
+
539
+ if (a_min < b_min)
540
+ return -1;
541
+ else if (a_min > b_min)
542
+ return 1;
543
+ else
544
+ return 0;
545
+ }
546
+
547
+ /* Maximum number of character classes that can occur in a negated bracket
548
+ expression. */
549
+ #define MAX_NEG_CLASSES 64
550
+
551
+ /* Maximum length of character class names. */
552
+ #define MAX_CLASS_NAME
553
+
554
+ static reg_errcode_t
555
+ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
556
+ tre_ctype_t neg_classes[], int *num_neg_classes,
557
+ tre_ast_node_t ***items, int *num_items,
558
+ int *items_size)
559
+ {
560
+ const char *re = ctx->re;
561
+ reg_errcode_t status = REG_OK;
562
+ tre_ctype_t class = (tre_ctype_t)0;
563
+ int i = *num_items;
564
+ int max_i = *items_size;
565
+ int skip;
566
+
567
+ /* Build an array of the items in the bracket expression. */
568
+ while (status == REG_OK)
569
+ {
570
+ skip = 0;
571
+ if (!*re)
572
+ {
573
+ status = REG_EBRACK;
574
+ }
575
+ else if (*re == CHAR_RBRACKET && re > ctx->re)
576
+ {
577
+ re++;
578
+ break;
579
+ }
580
+ else
581
+ {
582
+ tre_cint_t min = 0, max = 0;
583
+ wchar_t wc;
584
+ int clen = mbtowc(&wc, re, -1);
585
+
586
+ if (clen<0) clen=1, wc=WEOF;
587
+
588
+ class = (tre_ctype_t)0;
589
+ if (*(re + clen) == CHAR_MINUS && *(re + clen + 1) != CHAR_RBRACKET)
590
+ {
591
+ min = wc;
592
+ re += clen+1;
593
+ clen = mbtowc(&wc, re, -1);
594
+ if (clen<0) clen=1, wc=WEOF;
595
+ max = wc;
596
+ re += clen;
597
+ /* XXX - Should use collation order instead of encoding values
598
+ in character ranges. */
599
+ if (min > max)
600
+ status = REG_ERANGE;
601
+ }
602
+ else if (*re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD)
603
+ status = REG_ECOLLATE;
604
+ else if (*re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL)
605
+ status = REG_ECOLLATE;
606
+ else if (*re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
607
+ {
608
+ char tmp_str[64];
609
+ const char *endptr = re + 2;
610
+ int len;
611
+ while (*endptr && *endptr != CHAR_COLON)
612
+ endptr++;
613
+ if (*endptr)
614
+ {
615
+ len = MIN(endptr - re - 2, 63);
616
+ strncpy(tmp_str, re + 2, len);
617
+ tmp_str[len] = '\0';
618
+ class = tre_ctype(tmp_str);
619
+ if (!class)
620
+ status = REG_ECTYPE;
621
+ re = endptr + 2;
622
+ }
623
+ else
624
+ status = REG_ECTYPE;
625
+ min = 0;
626
+ max = TRE_CHAR_MAX;
627
+ }
628
+ else
629
+ {
630
+ if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET
631
+ && ctx->re != re)
632
+ /* Two ranges are not allowed to share and endpoint. */
633
+ status = REG_ERANGE;
634
+ min = max = wc;
635
+ re += clen;
636
+ }
637
+
638
+ if (status != REG_OK)
639
+ break;
640
+
641
+ if (class && negate)
642
+ if (*num_neg_classes >= MAX_NEG_CLASSES)
643
+ status = REG_ESPACE;
644
+ else
645
+ neg_classes[(*num_neg_classes)++] = class;
646
+ else if (!skip)
647
+ {
648
+ status = tre_new_item(ctx->mem, min, max, &i, &max_i, items);
649
+ if (status != REG_OK)
650
+ break;
651
+ ((tre_literal_t*)((*items)[i-1])->obj)->class = class;
652
+ }
653
+
654
+ /* Add opposite-case counterpoints if REG_ICASE is present.
655
+ This is broken if there are more than two "same" characters. */
656
+ if (ctx->cflags & REG_ICASE && !class && status == REG_OK && !skip)
657
+ {
658
+ tre_cint_t cmin, ccurr;
659
+
660
+ while (min <= max)
661
+ {
662
+ if (tre_islower(min))
663
+ {
664
+ cmin = ccurr = tre_toupper(min++);
665
+ while (tre_islower(min) && tre_toupper(min) == ccurr + 1
666
+ && min <= max)
667
+ ccurr = tre_toupper(min++);
668
+ status = tre_new_item(ctx->mem, cmin, ccurr,
669
+ &i, &max_i, items);
670
+ }
671
+ else if (tre_isupper(min))
672
+ {
673
+ cmin = ccurr = tre_tolower(min++);
674
+ while (tre_isupper(min) && tre_tolower(min) == ccurr + 1
675
+ && min <= max)
676
+ ccurr = tre_tolower(min++);
677
+ status = tre_new_item(ctx->mem, cmin, ccurr,
678
+ &i, &max_i, items);
679
+ }
680
+ else min++;
681
+ if (status != REG_OK)
682
+ break;
683
+ }
684
+ if (status != REG_OK)
685
+ break;
686
+ }
687
+ }
688
+ }
689
+ *num_items = i;
690
+ *items_size = max_i;
691
+ ctx->re = re;
692
+ return status;
693
+ }
694
+
695
+ static reg_errcode_t
696
+ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
697
+ {
698
+ tre_ast_node_t *node = NULL;
699
+ int negate = 0;
700
+ reg_errcode_t status = REG_OK;
701
+ tre_ast_node_t **items, *u, *n;
702
+ int i = 0, j, max_i = 32, curr_max, curr_min;
703
+ tre_ctype_t neg_classes[MAX_NEG_CLASSES];
704
+ int num_neg_classes = 0;
705
+
706
+ /* Start off with an array of `max_i' elements. */
707
+ items = xmalloc(sizeof(*items) * max_i);
708
+ if (items == NULL)
709
+ return REG_ESPACE;
710
+
711
+ if (*ctx->re == CHAR_CARET)
712
+ {
713
+ negate = 1;
714
+ ctx->re++;
715
+ }
716
+
717
+ status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes,
718
+ &items, &i, &max_i);
719
+
720
+ if (status != REG_OK)
721
+ goto parse_bracket_done;
722
+
723
+ /* Sort the array if we need to negate it. */
724
+ if (negate)
725
+ qsort(items, (unsigned)i, sizeof(*items), tre_compare_items);
726
+
727
+ curr_max = curr_min = 0;
728
+ /* Build a union of the items in the array, negated if necessary. */
729
+ for (j = 0; j < i && status == REG_OK; j++)
730
+ {
731
+ int min, max;
732
+ tre_literal_t *l = items[j]->obj;
733
+ min = l->code_min;
734
+ max = l->code_max;
735
+
736
+ if (negate)
737
+ {
738
+ if (min < curr_max)
739
+ {
740
+ /* Overlap. */
741
+ curr_max = MAX(max + 1, curr_max);
742
+ l = NULL;
743
+ }
744
+ else
745
+ {
746
+ /* No overlap. */
747
+ curr_max = min - 1;
748
+ if (curr_max >= curr_min)
749
+ {
750
+ l->code_min = curr_min;
751
+ l->code_max = curr_max;
752
+ }
753
+ else
754
+ {
755
+ l = NULL;
756
+ }
757
+ curr_min = curr_max = max + 1;
758
+ }
759
+ }
760
+
761
+ if (l != NULL)
762
+ {
763
+ int k;
764
+ l->position = ctx->position;
765
+ if (num_neg_classes > 0)
766
+ {
767
+ l->neg_classes = tre_mem_alloc(ctx->mem,
768
+ (sizeof(l->neg_classes)
769
+ * (num_neg_classes + 1)));
770
+ if (l->neg_classes == NULL)
771
+ {
772
+ status = REG_ESPACE;
773
+ break;
774
+ }
775
+ for (k = 0; k < num_neg_classes; k++)
776
+ l->neg_classes[k] = neg_classes[k];
777
+ l->neg_classes[k] = (tre_ctype_t)0;
778
+ }
779
+ else
780
+ l->neg_classes = NULL;
781
+ if (node == NULL)
782
+ node = items[j];
783
+ else
784
+ {
785
+ u = tre_ast_new_union(ctx->mem, node, items[j]);
786
+ if (u == NULL)
787
+ status = REG_ESPACE;
788
+ node = u;
789
+ }
790
+ }
791
+ }
792
+
793
+ if (status != REG_OK)
794
+ goto parse_bracket_done;
795
+
796
+ if (negate)
797
+ {
798
+ int k;
799
+ n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position);
800
+ if (n == NULL)
801
+ status = REG_ESPACE;
802
+ else
803
+ {
804
+ tre_literal_t *l = n->obj;
805
+ if (num_neg_classes > 0)
806
+ {
807
+ l->neg_classes = tre_mem_alloc(ctx->mem,
808
+ (sizeof(l->neg_classes)
809
+ * (num_neg_classes + 1)));
810
+ if (l->neg_classes == NULL)
811
+ {
812
+ status = REG_ESPACE;
813
+ goto parse_bracket_done;
814
+ }
815
+ for (k = 0; k < num_neg_classes; k++)
816
+ l->neg_classes[k] = neg_classes[k];
817
+ l->neg_classes[k] = (tre_ctype_t)0;
818
+ }
819
+ else
820
+ l->neg_classes = NULL;
821
+ if (node == NULL)
822
+ node = n;
823
+ else
824
+ {
825
+ u = tre_ast_new_union(ctx->mem, node, n);
826
+ if (u == NULL)
827
+ status = REG_ESPACE;
828
+ node = u;
829
+ }
830
+ }
831
+ }
832
+
833
+ if (status != REG_OK)
834
+ goto parse_bracket_done;
835
+
836
+ #ifdef TRE_DEBUG
837
+ tre_ast_print(node);
838
+ #endif /* TRE_DEBUG */
839
+
840
+ parse_bracket_done:
841
+ xfree(items);
842
+ ctx->position++;
843
+ *result = node;
844
+ return status;
845
+ }
846
+
847
+
848
+ /* Parses a positive decimal integer. Returns -1 if the string does not
849
+ contain a valid number. */
850
+ static int
851
+ tre_parse_int(const char **regex)
852
+ {
853
+ int num = -1;
854
+ const char *r = *regex;
855
+ while (*r-'0'<10U)
856
+ {
857
+ if (num < 0)
858
+ num = 0;
859
+ num = num * 10 + *r - '0';
860
+ r++;
861
+ }
862
+ *regex = r;
863
+ return num;
864
+ }
865
+
866
+
867
+ static reg_errcode_t
868
+ tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
869
+ {
870
+ int min, max;
871
+ const char *r = ctx->re;
872
+ int minimal = 0;
873
+
874
+ /* Parse number (minimum repetition count). */
875
+ min = -1;
876
+ if (*r >= '0' && *r <= '9') {
877
+ min = tre_parse_int(&r);
878
+ }
879
+
880
+ /* Parse comma and second number (maximum repetition count). */
881
+ max = min;
882
+ if (*r == CHAR_COMMA)
883
+ {
884
+ r++;
885
+ max = tre_parse_int(&r);
886
+ }
887
+
888
+ /* Check that the repeat counts are sane. */
889
+ if ((max >= 0 && min > max) || max > RE_DUP_MAX)
890
+ return REG_BADBR;
891
+
892
+ /* Missing }. */
893
+ if (!*r)
894
+ return REG_EBRACE;
895
+
896
+ /* Empty contents of {}. */
897
+ if (r == ctx->re)
898
+ return REG_BADBR;
899
+
900
+ /* Parse the ending '}' or '\}'.*/
901
+ if (ctx->cflags & REG_EXTENDED)
902
+ {
903
+ if (*r != CHAR_RBRACE)
904
+ return REG_BADBR;
905
+ r++;
906
+ }
907
+ else
908
+ {
909
+ if (*r != CHAR_BACKSLASH || *(r + 1) != CHAR_RBRACE)
910
+ return REG_BADBR;
911
+ r += 2;
912
+ }
913
+
914
+ /* Create the AST node(s). */
915
+ if (min == 0 && max == 0)
916
+ {
917
+ *result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
918
+ if (*result == NULL)
919
+ return REG_ESPACE;
920
+ }
921
+ else
922
+ {
923
+ if (min < 0 && max < 0)
924
+ /* Only approximate parameters set, no repetitions. */
925
+ min = max = 1;
926
+
927
+ *result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal);
928
+ if (!*result)
929
+ return REG_ESPACE;
930
+ }
931
+
932
+ ctx->re = r;
933
+ return REG_OK;
934
+ }
935
+
936
+ typedef enum {
937
+ PARSE_RE = 0,
938
+ PARSE_ATOM,
939
+ PARSE_MARK_FOR_SUBMATCH,
940
+ PARSE_BRANCH,
941
+ PARSE_PIECE,
942
+ PARSE_CATENATION,
943
+ PARSE_POST_CATENATION,
944
+ PARSE_UNION,
945
+ PARSE_POST_UNION,
946
+ PARSE_POSTFIX,
947
+ PARSE_RESTORE_CFLAGS
948
+ } tre_parse_re_stack_symbol_t;
949
+
950
+
951
+ static reg_errcode_t
952
+ tre_parse(tre_parse_ctx_t *ctx)
953
+ {
954
+ tre_ast_node_t *result = NULL;
955
+ tre_parse_re_stack_symbol_t symbol;
956
+ reg_errcode_t status = REG_OK;
957
+ tre_stack_t *stack = ctx->stack;
958
+ int bottom = tre_stack_num_objects(stack);
959
+ int depth = 0;
960
+ wchar_t wc;
961
+ int clen;
962
+
963
+ if (!ctx->nofirstsub)
964
+ {
965
+ STACK_PUSH(stack, int, ctx->submatch_id);
966
+ STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH);
967
+ ctx->submatch_id++;
968
+ }
969
+ STACK_PUSH(stack, int, PARSE_RE);
970
+ ctx->re_start = ctx->re;
971
+
972
+
973
+ /* The following is basically just a recursive descent parser. I use
974
+ an explicit stack instead of recursive functions mostly because of
975
+ two reasons: compatibility with systems which have an overflowable
976
+ call stack, and efficiency (both in lines of code and speed). */
977
+ while (tre_stack_num_objects(stack) > bottom && status == REG_OK)
978
+ {
979
+ if (status != REG_OK)
980
+ break;
981
+ symbol = tre_stack_pop_int(stack);
982
+ switch (symbol)
983
+ {
984
+ case PARSE_RE:
985
+ /* Parse a full regexp. A regexp is one or more branches,
986
+ separated by the union operator `|'. */
987
+ if (ctx->cflags & REG_EXTENDED)
988
+ STACK_PUSHX(stack, int, PARSE_UNION);
989
+ STACK_PUSHX(stack, int, PARSE_BRANCH);
990
+ break;
991
+
992
+ case PARSE_BRANCH:
993
+ /* Parse a branch. A branch is one or more pieces, concatenated.
994
+ A piece is an atom possibly followed by a postfix operator. */
995
+ STACK_PUSHX(stack, int, PARSE_CATENATION);
996
+ STACK_PUSHX(stack, int, PARSE_PIECE);
997
+ break;
998
+
999
+ case PARSE_PIECE:
1000
+ /* Parse a piece. A piece is an atom possibly followed by one
1001
+ or more postfix operators. */
1002
+ STACK_PUSHX(stack, int, PARSE_POSTFIX);
1003
+ STACK_PUSHX(stack, int, PARSE_ATOM);
1004
+ break;
1005
+
1006
+ case PARSE_CATENATION:
1007
+ /* If the expression has not ended, parse another piece. */
1008
+ {
1009
+ tre_char_t c;
1010
+ if (!*ctx->re)
1011
+ break;
1012
+ c = *ctx->re;
1013
+ if (ctx->cflags & REG_EXTENDED && c == CHAR_PIPE)
1014
+ break;
1015
+ if ((ctx->cflags & REG_EXTENDED
1016
+ && c == CHAR_RPAREN && depth > 0)
1017
+ || (!(ctx->cflags & REG_EXTENDED)
1018
+ && (c == CHAR_BACKSLASH
1019
+ && *(ctx->re + 1) == CHAR_RPAREN)))
1020
+ {
1021
+ if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
1022
+ status = REG_EPAREN;
1023
+ depth--;
1024
+ if (!(ctx->cflags & REG_EXTENDED))
1025
+ ctx->re += 2;
1026
+ break;
1027
+ }
1028
+
1029
+ {
1030
+ /* Default case, left associative concatenation. */
1031
+ STACK_PUSHX(stack, int, PARSE_CATENATION);
1032
+ STACK_PUSHX(stack, voidptr, result);
1033
+ STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
1034
+ STACK_PUSHX(stack, int, PARSE_PIECE);
1035
+ }
1036
+ break;
1037
+ }
1038
+
1039
+ case PARSE_POST_CATENATION:
1040
+ {
1041
+ tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
1042
+ tre_ast_node_t *tmp_node;
1043
+ tmp_node = tre_ast_new_catenation(ctx->mem, tree, result);
1044
+ if (!tmp_node)
1045
+ return REG_ESPACE;
1046
+ result = tmp_node;
1047
+ break;
1048
+ }
1049
+
1050
+ case PARSE_UNION:
1051
+ switch (*ctx->re)
1052
+ {
1053
+ case CHAR_PIPE:
1054
+ STACK_PUSHX(stack, int, PARSE_UNION);
1055
+ STACK_PUSHX(stack, voidptr, result);
1056
+ STACK_PUSHX(stack, int, PARSE_POST_UNION);
1057
+ STACK_PUSHX(stack, int, PARSE_BRANCH);
1058
+ ctx->re++;
1059
+ break;
1060
+
1061
+ case CHAR_RPAREN:
1062
+ ctx->re++;
1063
+ break;
1064
+
1065
+ default:
1066
+ break;
1067
+ }
1068
+ break;
1069
+
1070
+ case PARSE_POST_UNION:
1071
+ {
1072
+ tre_ast_node_t *tmp_node;
1073
+ tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
1074
+ tmp_node = tre_ast_new_union(ctx->mem, tree, result);
1075
+ if (!tmp_node)
1076
+ return REG_ESPACE;
1077
+ result = tmp_node;
1078
+ break;
1079
+ }
1080
+
1081
+ case PARSE_POSTFIX:
1082
+ /* Parse postfix operators. */
1083
+ switch (*ctx->re)
1084
+ {
1085
+ case CHAR_PLUS:
1086
+ case CHAR_QUESTIONMARK:
1087
+ if (!(ctx->cflags & REG_EXTENDED))
1088
+ break;
1089
+ /*FALLTHROUGH*/
1090
+ case CHAR_STAR:
1091
+ {
1092
+ tre_ast_node_t *tmp_node;
1093
+ int minimal = 0;
1094
+ int rep_min = 0;
1095
+ int rep_max = -1;
1096
+
1097
+ if (*ctx->re == CHAR_PLUS)
1098
+ rep_min = 1;
1099
+ if (*ctx->re == CHAR_QUESTIONMARK)
1100
+ rep_max = 1;
1101
+
1102
+ ctx->re++;
1103
+ tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max,
1104
+ minimal);
1105
+ if (tmp_node == NULL)
1106
+ return REG_ESPACE;
1107
+ result = tmp_node;
1108
+ STACK_PUSHX(stack, int, PARSE_POSTFIX);
1109
+ }
1110
+ break;
1111
+
1112
+ case CHAR_BACKSLASH:
1113
+ /* "\{" is special without REG_EXTENDED */
1114
+ if (!(ctx->cflags & REG_EXTENDED)
1115
+ && *(ctx->re + 1) == CHAR_LBRACE)
1116
+ {
1117
+ ctx->re++;
1118
+ goto parse_brace;
1119
+ }
1120
+ else
1121
+ break;
1122
+
1123
+ case CHAR_LBRACE:
1124
+ /* "{" is literal without REG_EXTENDED */
1125
+ if (!(ctx->cflags & REG_EXTENDED))
1126
+ break;
1127
+
1128
+ parse_brace:
1129
+ ctx->re++;
1130
+
1131
+ status = tre_parse_bound(ctx, &result);
1132
+ if (status != REG_OK)
1133
+ return status;
1134
+ STACK_PUSHX(stack, int, PARSE_POSTFIX);
1135
+ break;
1136
+ }
1137
+ break;
1138
+
1139
+ case PARSE_ATOM:
1140
+ /* Parse an atom. An atom is a regular expression enclosed in `()',
1141
+ an empty set of `()', a bracket expression, `.', `^', `$',
1142
+ a `\' followed by a character, or a single character. */
1143
+
1144
+ switch (*ctx->re)
1145
+ {
1146
+ case CHAR_LPAREN: /* parenthesized subexpression */
1147
+
1148
+ if (ctx->cflags & REG_EXTENDED)
1149
+ {
1150
+ lparen:
1151
+ depth++;
1152
+ {
1153
+ ctx->re++;
1154
+ /* First parse a whole RE, then mark the resulting tree
1155
+ for submatching. */
1156
+ STACK_PUSHX(stack, int, ctx->submatch_id);
1157
+ STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
1158
+ STACK_PUSHX(stack, int, PARSE_RE);
1159
+ ctx->submatch_id++;
1160
+ }
1161
+ }
1162
+ else
1163
+ goto parse_literal;
1164
+ break;
1165
+
1166
+ case CHAR_LBRACKET: /* bracket expression */
1167
+ ctx->re++;
1168
+ status = tre_parse_bracket(ctx, &result);
1169
+ if (status != REG_OK)
1170
+ return status;
1171
+ break;
1172
+
1173
+ case CHAR_BACKSLASH:
1174
+ /* If this is "\(" or "\)" chew off the backslash and
1175
+ try again. */
1176
+ if (!(ctx->cflags & REG_EXTENDED) && *(ctx->re + 1) == CHAR_LPAREN)
1177
+ {
1178
+ ctx->re++;
1179
+ goto lparen;
1180
+ }
1181
+ if (!(ctx->cflags & REG_EXTENDED) && *(ctx->re + 1) == CHAR_RPAREN)
1182
+ {
1183
+ goto empty_atom;
1184
+ }
1185
+
1186
+ /* If a macro is used, parse the expanded macro recursively. */
1187
+ {
1188
+ const char *buf = tre_expand_macro(ctx->re + 1);
1189
+ if (buf)
1190
+ {
1191
+ tre_parse_ctx_t subctx;
1192
+ memcpy(&subctx, ctx, sizeof(subctx));
1193
+ subctx.re = buf;
1194
+ subctx.nofirstsub = 1;
1195
+ status = tre_parse(&subctx);
1196
+ if (status != REG_OK)
1197
+ return status;
1198
+ ctx->re += 2;
1199
+ ctx->position = subctx.position;
1200
+ result = subctx.result;
1201
+ break;
1202
+ }
1203
+ }
1204
+
1205
+ if (!ctx->re[1])
1206
+ /* Trailing backslash. */
1207
+ return REG_EESCAPE;
1208
+
1209
+ ctx->re++;
1210
+ switch (*ctx->re)
1211
+ {
1212
+ case 'b':
1213
+ result = tre_ast_new_literal(ctx->mem, ASSERTION,
1214
+ ASSERT_AT_WB, -1);
1215
+ ctx->re++;
1216
+ break;
1217
+ case 'B':
1218
+ result = tre_ast_new_literal(ctx->mem, ASSERTION,
1219
+ ASSERT_AT_WB_NEG, -1);
1220
+ ctx->re++;
1221
+ break;
1222
+ case '<':
1223
+ result = tre_ast_new_literal(ctx->mem, ASSERTION,
1224
+ ASSERT_AT_BOW, -1);
1225
+ ctx->re++;
1226
+ break;
1227
+ case '>':
1228
+ result = tre_ast_new_literal(ctx->mem, ASSERTION,
1229
+ ASSERT_AT_EOW, -1);
1230
+ ctx->re++;
1231
+ break;
1232
+ case 'x':
1233
+ ctx->re++;
1234
+ if (ctx->re[0] != CHAR_LBRACE)
1235
+ {
1236
+ /* 8 bit hex char. */
1237
+ char tmp[3] = {0, 0, 0};
1238
+ long val;
1239
+
1240
+ if (tre_isxdigit(ctx->re[0]))
1241
+ {
1242
+ tmp[0] = (char)ctx->re[0];
1243
+ ctx->re++;
1244
+ }
1245
+ if (tre_isxdigit(ctx->re[0]))
1246
+ {
1247
+ tmp[1] = (char)ctx->re[0];
1248
+ ctx->re++;
1249
+ }
1250
+ val = strtol(tmp, NULL, 16);
1251
+ result = tre_ast_new_literal(ctx->mem, (int)val,
1252
+ (int)val, ctx->position);
1253
+ ctx->position++;
1254
+ break;
1255
+ }
1256
+ else if (*ctx->re)
1257
+ {
1258
+ /* Wide char. */
1259
+ char tmp[32];
1260
+ long val;
1261
+ int i = 0;
1262
+ ctx->re++;
1263
+ while (*ctx->re && i < sizeof tmp)
1264
+ {
1265
+ if (ctx->re[0] == CHAR_RBRACE)
1266
+ break;
1267
+ if (tre_isxdigit(ctx->re[0]))
1268
+ {
1269
+ tmp[i] = (char)ctx->re[0];
1270
+ i++;
1271
+ ctx->re++;
1272
+ continue;
1273
+ }
1274
+ return REG_EBRACE;
1275
+ }
1276
+ ctx->re++;
1277
+ tmp[i] = 0;
1278
+ val = strtol(tmp, NULL, 16);
1279
+ result = tre_ast_new_literal(ctx->mem, (int)val, (int)val,
1280
+ ctx->position);
1281
+ ctx->position++;
1282
+ break;
1283
+ }
1284
+ /*FALLTHROUGH*/
1285
+
1286
+ default:
1287
+ if (tre_isdigit(*ctx->re))
1288
+ {
1289
+ /* Back reference. */
1290
+ int val = *ctx->re - '0';
1291
+ result = tre_ast_new_literal(ctx->mem, BACKREF, val,
1292
+ ctx->position);
1293
+ if (result == NULL)
1294
+ return REG_ESPACE;
1295
+ ctx->position++;
1296
+ ctx->max_backref = MAX(val, ctx->max_backref);
1297
+ ctx->re++;
1298
+ }
1299
+ else
1300
+ {
1301
+ /* Escaped character. */
1302
+ result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
1303
+ ctx->position);
1304
+ ctx->position++;
1305
+ ctx->re++;
1306
+ }
1307
+ break;
1308
+ }
1309
+ if (result == NULL)
1310
+ return REG_ESPACE;
1311
+ break;
1312
+
1313
+ case CHAR_PERIOD: /* the any-symbol */
1314
+ if (ctx->cflags & REG_NEWLINE)
1315
+ {
1316
+ tre_ast_node_t *tmp1;
1317
+ tre_ast_node_t *tmp2;
1318
+ tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n' - 1,
1319
+ ctx->position);
1320
+ if (!tmp1)
1321
+ return REG_ESPACE;
1322
+ tmp2 = tre_ast_new_literal(ctx->mem, '\n' + 1, TRE_CHAR_MAX,
1323
+ ctx->position + 1);
1324
+ if (!tmp2)
1325
+ return REG_ESPACE;
1326
+ result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
1327
+ if (!result)
1328
+ return REG_ESPACE;
1329
+ ctx->position += 2;
1330
+ }
1331
+ else
1332
+ {
1333
+ result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX,
1334
+ ctx->position);
1335
+ if (!result)
1336
+ return REG_ESPACE;
1337
+ ctx->position++;
1338
+ }
1339
+ ctx->re++;
1340
+ break;
1341
+
1342
+ case CHAR_CARET: /* beginning of line assertion */
1343
+ /* '^' has a special meaning everywhere in EREs, and at
1344
+ beginning of BRE. */
1345
+ if (ctx->cflags & REG_EXTENDED
1346
+ || ctx->re == ctx->re_start)
1347
+ {
1348
+ if (!(ctx->cflags & REG_EXTENDED))
1349
+ STACK_PUSHX(stack, int, PARSE_CATENATION);
1350
+ result = tre_ast_new_literal(ctx->mem, ASSERTION,
1351
+ ASSERT_AT_BOL, -1);
1352
+ if (result == NULL)
1353
+ return REG_ESPACE;
1354
+ ctx->re++;
1355
+ }
1356
+ else
1357
+ goto parse_literal;
1358
+ break;
1359
+
1360
+ case CHAR_DOLLAR: /* end of line assertion. */
1361
+ /* '$' is special everywhere in EREs, and in the end of the
1362
+ string in BREs. */
1363
+ if (ctx->cflags & REG_EXTENDED
1364
+ || !*(ctx->re + 1))
1365
+ {
1366
+ result = tre_ast_new_literal(ctx->mem, ASSERTION,
1367
+ ASSERT_AT_EOL, -1);
1368
+ if (result == NULL)
1369
+ return REG_ESPACE;
1370
+ ctx->re++;
1371
+ }
1372
+ else
1373
+ goto parse_literal;
1374
+ break;
1375
+
1376
+ case CHAR_RPAREN:
1377
+ if (!depth)
1378
+ goto parse_literal;
1379
+ case CHAR_STAR:
1380
+ case CHAR_PIPE:
1381
+ case CHAR_LBRACE:
1382
+ case CHAR_PLUS:
1383
+ case CHAR_QUESTIONMARK:
1384
+ if (!(ctx->cflags & REG_EXTENDED))
1385
+ goto parse_literal;
1386
+
1387
+ case 0:
1388
+ empty_atom:
1389
+ result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
1390
+ if (!result)
1391
+ return REG_ESPACE;
1392
+ break;
1393
+
1394
+ default:
1395
+ parse_literal:
1396
+
1397
+ clen = mbtowc(&wc, ctx->re, -1);
1398
+ if (clen<0) clen=1, wc=WEOF;
1399
+
1400
+ /* Note that we can't use an tre_isalpha() test here, since there
1401
+ may be characters which are alphabetic but neither upper or
1402
+ lower case. */
1403
+ if (ctx->cflags & REG_ICASE
1404
+ && (tre_isupper(wc) || tre_islower(wc)))
1405
+ {
1406
+ tre_ast_node_t *tmp1;
1407
+ tre_ast_node_t *tmp2;
1408
+
1409
+ /* XXX - Can there be more than one opposite-case
1410
+ counterpoints for some character in some locale? Or
1411
+ more than two characters which all should be regarded
1412
+ the same character if case is ignored? If yes, there
1413
+ does not seem to be a portable way to detect it. I guess
1414
+ that at least for multi-character collating elements there
1415
+ could be several opposite-case counterpoints, but they
1416
+ cannot be supported portably anyway. */
1417
+ tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc),
1418
+ tre_toupper(wc),
1419
+ ctx->position);
1420
+ if (!tmp1)
1421
+ return REG_ESPACE;
1422
+ tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc),
1423
+ tre_tolower(wc),
1424
+ ctx->position);
1425
+ if (!tmp2)
1426
+ return REG_ESPACE;
1427
+ result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
1428
+ if (!result)
1429
+ return REG_ESPACE;
1430
+ }
1431
+ else
1432
+ {
1433
+ result = tre_ast_new_literal(ctx->mem, wc, wc,
1434
+ ctx->position);
1435
+ if (!result)
1436
+ return REG_ESPACE;
1437
+ }
1438
+ ctx->position++;
1439
+ ctx->re += clen;
1440
+ break;
1441
+ }
1442
+ break;
1443
+
1444
+ case PARSE_MARK_FOR_SUBMATCH:
1445
+ {
1446
+ int submatch_id = tre_stack_pop_int(stack);
1447
+
1448
+ if (result->submatch_id >= 0)
1449
+ {
1450
+ tre_ast_node_t *n, *tmp_node;
1451
+ n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
1452
+ if (n == NULL)
1453
+ return REG_ESPACE;
1454
+ tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
1455
+ if (tmp_node == NULL)
1456
+ return REG_ESPACE;
1457
+ tmp_node->num_submatches = result->num_submatches;
1458
+ result = tmp_node;
1459
+ }
1460
+ result->submatch_id = submatch_id;
1461
+ result->num_submatches++;
1462
+ break;
1463
+ }
1464
+
1465
+ case PARSE_RESTORE_CFLAGS:
1466
+ ctx->cflags = tre_stack_pop_int(stack);
1467
+ break;
1468
+
1469
+ default:
1470
+ assert(0);
1471
+ break;
1472
+ }
1473
+ }
1474
+
1475
+ /* Check for missing closing parentheses. */
1476
+ if (depth > 0)
1477
+ return REG_EPAREN;
1478
+
1479
+ if (status == REG_OK)
1480
+ ctx->result = result;
1481
+
1482
+ return status;
1483
+ }
1484
+
1485
+
1486
+
1487
+ /***********************************************************************
1488
+ from tre-compile.c
1489
+ ***********************************************************************/
1490
+
1491
+
1492
+ /*
1493
+ TODO:
1494
+ - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1495
+ function calls.
1496
+ */
1497
+
1498
+ /*
1499
+ Algorithms to setup tags so that submatch addressing can be done.
1500
+ */
1501
+
1502
+
1503
+ /* Inserts a catenation node to the root of the tree given in `node'.
1504
+ As the left child a new tag with number `tag_id' to `node' is added,
1505
+ and the right child is the old root. */
1506
+ static reg_errcode_t
1507
+ tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1508
+ {
1509
+ tre_catenation_t *c;
1510
+
1511
+ c = tre_mem_alloc(mem, sizeof(*c));
1512
+ if (c == NULL)
1513
+ return REG_ESPACE;
1514
+ c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1515
+ if (c->left == NULL)
1516
+ return REG_ESPACE;
1517
+ c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1518
+ if (c->right == NULL)
1519
+ return REG_ESPACE;
1520
+
1521
+ c->right->obj = node->obj;
1522
+ c->right->type = node->type;
1523
+ c->right->nullable = -1;
1524
+ c->right->submatch_id = -1;
1525
+ c->right->firstpos = NULL;
1526
+ c->right->lastpos = NULL;
1527
+ c->right->num_tags = 0;
1528
+ node->obj = c;
1529
+ node->type = CATENATION;
1530
+ return REG_OK;
1531
+ }
1532
+
1533
+ /* Inserts a catenation node to the root of the tree given in `node'.
1534
+ As the right child a new tag with number `tag_id' to `node' is added,
1535
+ and the left child is the old root. */
1536
+ static reg_errcode_t
1537
+ tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1538
+ {
1539
+ tre_catenation_t *c;
1540
+
1541
+ c = tre_mem_alloc(mem, sizeof(*c));
1542
+ if (c == NULL)
1543
+ return REG_ESPACE;
1544
+ c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1545
+ if (c->right == NULL)
1546
+ return REG_ESPACE;
1547
+ c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1548
+ if (c->left == NULL)
1549
+ return REG_ESPACE;
1550
+
1551
+ c->left->obj = node->obj;
1552
+ c->left->type = node->type;
1553
+ c->left->nullable = -1;
1554
+ c->left->submatch_id = -1;
1555
+ c->left->firstpos = NULL;
1556
+ c->left->lastpos = NULL;
1557
+ c->left->num_tags = 0;
1558
+ node->obj = c;
1559
+ node->type = CATENATION;
1560
+ return REG_OK;
1561
+ }
1562
+
1563
+ typedef enum {
1564
+ ADDTAGS_RECURSE,
1565
+ ADDTAGS_AFTER_ITERATION,
1566
+ ADDTAGS_AFTER_UNION_LEFT,
1567
+ ADDTAGS_AFTER_UNION_RIGHT,
1568
+ ADDTAGS_AFTER_CAT_LEFT,
1569
+ ADDTAGS_AFTER_CAT_RIGHT,
1570
+ ADDTAGS_SET_SUBMATCH_END
1571
+ } tre_addtags_symbol_t;
1572
+
1573
+
1574
+ typedef struct {
1575
+ int tag;
1576
+ int next_tag;
1577
+ } tre_tag_states_t;
1578
+
1579
+
1580
+ /* Go through `regset' and set submatch data for submatches that are
1581
+ using this tag. */
1582
+ static void
1583
+ tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1584
+ {
1585
+ int i;
1586
+
1587
+ for (i = 0; regset[i] >= 0; i++)
1588
+ {
1589
+ int id = regset[i] / 2;
1590
+ int start = !(regset[i] % 2);
1591
+ if (start)
1592
+ tnfa->submatch_data[id].so_tag = tag;
1593
+ else
1594
+ tnfa->submatch_data[id].eo_tag = tag;
1595
+ }
1596
+ regset[0] = -1;
1597
+ }
1598
+
1599
+
1600
+ /* Adds tags to appropriate locations in the parse tree in `tree', so that
1601
+ subexpressions marked for submatch addressing can be traced. */
1602
+ static reg_errcode_t
1603
+ tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1604
+ tre_tnfa_t *tnfa)
1605
+ {
1606
+ reg_errcode_t status = REG_OK;
1607
+ tre_addtags_symbol_t symbol;
1608
+ tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1609
+ int bottom = tre_stack_num_objects(stack);
1610
+ /* True for first pass (counting number of needed tags) */
1611
+ int first_pass = (mem == NULL || tnfa == NULL);
1612
+ int *regset, *orig_regset;
1613
+ int num_tags = 0; /* Total number of tags. */
1614
+ int num_minimals = 0; /* Number of special minimal tags. */
1615
+ int tag = 0; /* The tag that is to be added next. */
1616
+ int next_tag = 1; /* Next tag to use after this one. */
1617
+ int *parents; /* Stack of submatches the current submatch is
1618
+ contained in. */
1619
+ int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1620
+ tre_tag_states_t *saved_states;
1621
+
1622
+ tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1623
+ if (!first_pass)
1624
+ {
1625
+ tnfa->end_tag = 0;
1626
+ tnfa->minimal_tags[0] = -1;
1627
+ }
1628
+
1629
+ regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1630
+ if (regset == NULL)
1631
+ return REG_ESPACE;
1632
+ regset[0] = -1;
1633
+ orig_regset = regset;
1634
+
1635
+ parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1636
+ if (parents == NULL)
1637
+ {
1638
+ xfree(regset);
1639
+ return REG_ESPACE;
1640
+ }
1641
+ parents[0] = -1;
1642
+
1643
+ saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1644
+ if (saved_states == NULL)
1645
+ {
1646
+ xfree(regset);
1647
+ xfree(parents);
1648
+ return REG_ESPACE;
1649
+ }
1650
+ else
1651
+ {
1652
+ unsigned int i;
1653
+ for (i = 0; i <= tnfa->num_submatches; i++)
1654
+ saved_states[i].tag = -1;
1655
+ }
1656
+
1657
+ STACK_PUSH(stack, voidptr, node);
1658
+ STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1659
+
1660
+ while (tre_stack_num_objects(stack) > bottom)
1661
+ {
1662
+ if (status != REG_OK)
1663
+ break;
1664
+
1665
+ symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1666
+ switch (symbol)
1667
+ {
1668
+
1669
+ case ADDTAGS_SET_SUBMATCH_END:
1670
+ {
1671
+ int id = tre_stack_pop_int(stack);
1672
+ int i;
1673
+
1674
+ /* Add end of this submatch to regset. */
1675
+ for (i = 0; regset[i] >= 0; i++);
1676
+ regset[i] = id * 2 + 1;
1677
+ regset[i + 1] = -1;
1678
+
1679
+ /* Pop this submatch from the parents stack. */
1680
+ for (i = 0; parents[i] >= 0; i++);
1681
+ parents[i - 1] = -1;
1682
+ break;
1683
+ }
1684
+
1685
+ case ADDTAGS_RECURSE:
1686
+ node = tre_stack_pop_voidptr(stack);
1687
+
1688
+ if (node->submatch_id >= 0)
1689
+ {
1690
+ int id = node->submatch_id;
1691
+ int i;
1692
+
1693
+
1694
+ /* Add start of this submatch to regset. */
1695
+ for (i = 0; regset[i] >= 0; i++);
1696
+ regset[i] = id * 2;
1697
+ regset[i + 1] = -1;
1698
+
1699
+ if (!first_pass)
1700
+ {
1701
+ for (i = 0; parents[i] >= 0; i++);
1702
+ tnfa->submatch_data[id].parents = NULL;
1703
+ if (i > 0)
1704
+ {
1705
+ int *p = xmalloc(sizeof(*p) * (i + 1));
1706
+ if (p == NULL)
1707
+ {
1708
+ status = REG_ESPACE;
1709
+ break;
1710
+ }
1711
+ assert(tnfa->submatch_data[id].parents == NULL);
1712
+ tnfa->submatch_data[id].parents = p;
1713
+ for (i = 0; parents[i] >= 0; i++)
1714
+ p[i] = parents[i];
1715
+ p[i] = -1;
1716
+ }
1717
+ }
1718
+
1719
+ /* Add end of this submatch to regset after processing this
1720
+ node. */
1721
+ STACK_PUSHX(stack, int, node->submatch_id);
1722
+ STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1723
+ }
1724
+
1725
+ switch (node->type)
1726
+ {
1727
+ case LITERAL:
1728
+ {
1729
+ tre_literal_t *lit = node->obj;
1730
+
1731
+ if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1732
+ {
1733
+ int i;
1734
+ if (regset[0] >= 0)
1735
+ {
1736
+ /* Regset is not empty, so add a tag before the
1737
+ literal or backref. */
1738
+ if (!first_pass)
1739
+ {
1740
+ status = tre_add_tag_left(mem, node, tag);
1741
+ tnfa->tag_directions[tag] = direction;
1742
+ if (minimal_tag >= 0)
1743
+ {
1744
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1745
+ tnfa->minimal_tags[i] = tag;
1746
+ tnfa->minimal_tags[i + 1] = minimal_tag;
1747
+ tnfa->minimal_tags[i + 2] = -1;
1748
+ minimal_tag = -1;
1749
+ num_minimals++;
1750
+ }
1751
+ tre_purge_regset(regset, tnfa, tag);
1752
+ }
1753
+ else
1754
+ {
1755
+ node->num_tags = 1;
1756
+ }
1757
+
1758
+ regset[0] = -1;
1759
+ tag = next_tag;
1760
+ num_tags++;
1761
+ next_tag++;
1762
+ }
1763
+ }
1764
+ else
1765
+ {
1766
+ assert(!IS_TAG(lit));
1767
+ }
1768
+ break;
1769
+ }
1770
+ case CATENATION:
1771
+ {
1772
+ tre_catenation_t *cat = node->obj;
1773
+ tre_ast_node_t *left = cat->left;
1774
+ tre_ast_node_t *right = cat->right;
1775
+ int reserved_tag = -1;
1776
+
1777
+
1778
+ /* After processing right child. */
1779
+ STACK_PUSHX(stack, voidptr, node);
1780
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1781
+
1782
+ /* Process right child. */
1783
+ STACK_PUSHX(stack, voidptr, right);
1784
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1785
+
1786
+ /* After processing left child. */
1787
+ STACK_PUSHX(stack, int, next_tag + left->num_tags);
1788
+ if (left->num_tags > 0 && right->num_tags > 0)
1789
+ {
1790
+ /* Reserve the next tag to the right child. */
1791
+ reserved_tag = next_tag;
1792
+ next_tag++;
1793
+ }
1794
+ STACK_PUSHX(stack, int, reserved_tag);
1795
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1796
+
1797
+ /* Process left child. */
1798
+ STACK_PUSHX(stack, voidptr, left);
1799
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1800
+
1801
+ }
1802
+ break;
1803
+ case ITERATION:
1804
+ {
1805
+ tre_iteration_t *iter = node->obj;
1806
+
1807
+ if (first_pass)
1808
+ {
1809
+ STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1810
+ }
1811
+ else
1812
+ {
1813
+ STACK_PUSHX(stack, int, tag);
1814
+ STACK_PUSHX(stack, int, iter->minimal);
1815
+ }
1816
+ STACK_PUSHX(stack, voidptr, node);
1817
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1818
+
1819
+ STACK_PUSHX(stack, voidptr, iter->arg);
1820
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1821
+
1822
+ /* Regset is not empty, so add a tag here. */
1823
+ if (regset[0] >= 0 || iter->minimal)
1824
+ {
1825
+ if (!first_pass)
1826
+ {
1827
+ int i;
1828
+ status = tre_add_tag_left(mem, node, tag);
1829
+ if (iter->minimal)
1830
+ tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1831
+ else
1832
+ tnfa->tag_directions[tag] = direction;
1833
+ if (minimal_tag >= 0)
1834
+ {
1835
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1836
+ tnfa->minimal_tags[i] = tag;
1837
+ tnfa->minimal_tags[i + 1] = minimal_tag;
1838
+ tnfa->minimal_tags[i + 2] = -1;
1839
+ minimal_tag = -1;
1840
+ num_minimals++;
1841
+ }
1842
+ tre_purge_regset(regset, tnfa, tag);
1843
+ }
1844
+
1845
+ regset[0] = -1;
1846
+ tag = next_tag;
1847
+ num_tags++;
1848
+ next_tag++;
1849
+ }
1850
+ direction = TRE_TAG_MINIMIZE;
1851
+ }
1852
+ break;
1853
+ case UNION:
1854
+ {
1855
+ tre_union_t *uni = node->obj;
1856
+ tre_ast_node_t *left = uni->left;
1857
+ tre_ast_node_t *right = uni->right;
1858
+ int left_tag;
1859
+ int right_tag;
1860
+
1861
+ if (regset[0] >= 0)
1862
+ {
1863
+ left_tag = next_tag;
1864
+ right_tag = next_tag + 1;
1865
+ }
1866
+ else
1867
+ {
1868
+ left_tag = tag;
1869
+ right_tag = next_tag;
1870
+ }
1871
+
1872
+ /* After processing right child. */
1873
+ STACK_PUSHX(stack, int, right_tag);
1874
+ STACK_PUSHX(stack, int, left_tag);
1875
+ STACK_PUSHX(stack, voidptr, regset);
1876
+ STACK_PUSHX(stack, int, regset[0] >= 0);
1877
+ STACK_PUSHX(stack, voidptr, node);
1878
+ STACK_PUSHX(stack, voidptr, right);
1879
+ STACK_PUSHX(stack, voidptr, left);
1880
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1881
+
1882
+ /* Process right child. */
1883
+ STACK_PUSHX(stack, voidptr, right);
1884
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1885
+
1886
+ /* After processing left child. */
1887
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1888
+
1889
+ /* Process left child. */
1890
+ STACK_PUSHX(stack, voidptr, left);
1891
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1892
+
1893
+ /* Regset is not empty, so add a tag here. */
1894
+ if (regset[0] >= 0)
1895
+ {
1896
+ if (!first_pass)
1897
+ {
1898
+ int i;
1899
+ status = tre_add_tag_left(mem, node, tag);
1900
+ tnfa->tag_directions[tag] = direction;
1901
+ if (minimal_tag >= 0)
1902
+ {
1903
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1904
+ tnfa->minimal_tags[i] = tag;
1905
+ tnfa->minimal_tags[i + 1] = minimal_tag;
1906
+ tnfa->minimal_tags[i + 2] = -1;
1907
+ minimal_tag = -1;
1908
+ num_minimals++;
1909
+ }
1910
+ tre_purge_regset(regset, tnfa, tag);
1911
+ }
1912
+
1913
+ regset[0] = -1;
1914
+ tag = next_tag;
1915
+ num_tags++;
1916
+ next_tag++;
1917
+ }
1918
+
1919
+ if (node->num_submatches > 0)
1920
+ {
1921
+ /* The next two tags are reserved for markers. */
1922
+ next_tag++;
1923
+ tag = next_tag;
1924
+ next_tag++;
1925
+ }
1926
+
1927
+ break;
1928
+ }
1929
+ }
1930
+
1931
+ if (node->submatch_id >= 0)
1932
+ {
1933
+ int i;
1934
+ /* Push this submatch on the parents stack. */
1935
+ for (i = 0; parents[i] >= 0; i++);
1936
+ parents[i] = node->submatch_id;
1937
+ parents[i + 1] = -1;
1938
+ }
1939
+
1940
+ break; /* end case: ADDTAGS_RECURSE */
1941
+
1942
+ case ADDTAGS_AFTER_ITERATION:
1943
+ {
1944
+ int minimal = 0;
1945
+ int enter_tag;
1946
+ node = tre_stack_pop_voidptr(stack);
1947
+ if (first_pass)
1948
+ {
1949
+ node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1950
+ + tre_stack_pop_int(stack);
1951
+ minimal_tag = -1;
1952
+ }
1953
+ else
1954
+ {
1955
+ minimal = tre_stack_pop_int(stack);
1956
+ enter_tag = tre_stack_pop_int(stack);
1957
+ if (minimal)
1958
+ minimal_tag = enter_tag;
1959
+ }
1960
+
1961
+ if (!first_pass)
1962
+ {
1963
+ if (minimal)
1964
+ direction = TRE_TAG_MINIMIZE;
1965
+ else
1966
+ direction = TRE_TAG_MAXIMIZE;
1967
+ }
1968
+ break;
1969
+ }
1970
+
1971
+ case ADDTAGS_AFTER_CAT_LEFT:
1972
+ {
1973
+ int new_tag = tre_stack_pop_int(stack);
1974
+ next_tag = tre_stack_pop_int(stack);
1975
+ if (new_tag >= 0)
1976
+ {
1977
+ tag = new_tag;
1978
+ }
1979
+ break;
1980
+ }
1981
+
1982
+ case ADDTAGS_AFTER_CAT_RIGHT:
1983
+ node = tre_stack_pop_voidptr(stack);
1984
+ if (first_pass)
1985
+ node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1986
+ + ((tre_catenation_t *)node->obj)->right->num_tags;
1987
+ break;
1988
+
1989
+ case ADDTAGS_AFTER_UNION_LEFT:
1990
+ /* Lift the bottom of the `regset' array so that when processing
1991
+ the right operand the items currently in the array are
1992
+ invisible. The original bottom was saved at ADDTAGS_UNION and
1993
+ will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1994
+ while (*regset >= 0)
1995
+ regset++;
1996
+ break;
1997
+
1998
+ case ADDTAGS_AFTER_UNION_RIGHT:
1999
+ {
2000
+ int added_tags, tag_left, tag_right;
2001
+ tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
2002
+ tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
2003
+ node = tre_stack_pop_voidptr(stack);
2004
+ added_tags = tre_stack_pop_int(stack);
2005
+ if (first_pass)
2006
+ {
2007
+ node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
2008
+ + ((tre_union_t *)node->obj)->right->num_tags + added_tags
2009
+ + ((node->num_submatches > 0) ? 2 : 0);
2010
+ }
2011
+ regset = tre_stack_pop_voidptr(stack);
2012
+ tag_left = tre_stack_pop_int(stack);
2013
+ tag_right = tre_stack_pop_int(stack);
2014
+
2015
+ /* Add tags after both children, the left child gets a smaller
2016
+ tag than the right child. This guarantees that we prefer
2017
+ the left child over the right child. */
2018
+ /* XXX - This is not always necessary (if the children have
2019
+ tags which must be seen for every match of that child). */
2020
+ /* XXX - Check if this is the only place where tre_add_tag_right
2021
+ is used. If so, use tre_add_tag_left (putting the tag before
2022
+ the child as opposed after the child) and throw away
2023
+ tre_add_tag_right. */
2024
+ if (node->num_submatches > 0)
2025
+ {
2026
+ if (!first_pass)
2027
+ {
2028
+ status = tre_add_tag_right(mem, left, tag_left);
2029
+ tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
2030
+ status = tre_add_tag_right(mem, right, tag_right);
2031
+ tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
2032
+ }
2033
+ num_tags += 2;
2034
+ }
2035
+ direction = TRE_TAG_MAXIMIZE;
2036
+ break;
2037
+ }
2038
+
2039
+ default:
2040
+ assert(0);
2041
+ break;
2042
+
2043
+ } /* end switch(symbol) */
2044
+ } /* end while(tre_stack_num_objects(stack) > bottom) */
2045
+
2046
+ if (!first_pass)
2047
+ tre_purge_regset(regset, tnfa, tag);
2048
+
2049
+ if (!first_pass && minimal_tag >= 0)
2050
+ {
2051
+ int i;
2052
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
2053
+ tnfa->minimal_tags[i] = tag;
2054
+ tnfa->minimal_tags[i + 1] = minimal_tag;
2055
+ tnfa->minimal_tags[i + 2] = -1;
2056
+ minimal_tag = -1;
2057
+ num_minimals++;
2058
+ }
2059
+
2060
+ assert(tree->num_tags == num_tags);
2061
+ tnfa->end_tag = num_tags;
2062
+ tnfa->num_tags = num_tags;
2063
+ tnfa->num_minimals = num_minimals;
2064
+ xfree(orig_regset);
2065
+ xfree(parents);
2066
+ xfree(saved_states);
2067
+ return status;
2068
+ }
2069
+
2070
+
2071
+
2072
+ /*
2073
+ AST to TNFA compilation routines.
2074
+ */
2075
+
2076
+ typedef enum {
2077
+ COPY_RECURSE,
2078
+ COPY_SET_RESULT_PTR
2079
+ } tre_copyast_symbol_t;
2080
+
2081
+ /* Flags for tre_copy_ast(). */
2082
+ #define COPY_REMOVE_TAGS 1
2083
+ #define COPY_MAXIMIZE_FIRST_TAG 2
2084
+
2085
+ static reg_errcode_t
2086
+ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
2087
+ int flags, int *pos_add, tre_tag_direction_t *tag_directions,
2088
+ tre_ast_node_t **copy, int *max_pos)
2089
+ {
2090
+ reg_errcode_t status = REG_OK;
2091
+ int bottom = tre_stack_num_objects(stack);
2092
+ int num_copied = 0;
2093
+ int first_tag = 1;
2094
+ tre_ast_node_t **result = copy;
2095
+ tre_copyast_symbol_t symbol;
2096
+
2097
+ STACK_PUSH(stack, voidptr, ast);
2098
+ STACK_PUSH(stack, int, COPY_RECURSE);
2099
+
2100
+ while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2101
+ {
2102
+ tre_ast_node_t *node;
2103
+ if (status != REG_OK)
2104
+ break;
2105
+
2106
+ symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
2107
+ switch (symbol)
2108
+ {
2109
+ case COPY_SET_RESULT_PTR:
2110
+ result = tre_stack_pop_voidptr(stack);
2111
+ break;
2112
+ case COPY_RECURSE:
2113
+ node = tre_stack_pop_voidptr(stack);
2114
+ switch (node->type)
2115
+ {
2116
+ case LITERAL:
2117
+ {
2118
+ tre_literal_t *lit = node->obj;
2119
+ int pos = lit->position;
2120
+ int min = lit->code_min;
2121
+ int max = lit->code_max;
2122
+ if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
2123
+ {
2124
+ /* XXX - e.g. [ab] has only one position but two
2125
+ nodes, so we are creating holes in the state space
2126
+ here. Not fatal, just wastes memory. */
2127
+ pos += *pos_add;
2128
+ num_copied++;
2129
+ }
2130
+ else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
2131
+ {
2132
+ /* Change this tag to empty. */
2133
+ min = EMPTY;
2134
+ max = pos = -1;
2135
+ }
2136
+ else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
2137
+ && first_tag)
2138
+ {
2139
+ /* Maximize the first tag. */
2140
+ tag_directions[max] = TRE_TAG_MAXIMIZE;
2141
+ first_tag = 0;
2142
+ }
2143
+ *result = tre_ast_new_literal(mem, min, max, pos);
2144
+ if (*result == NULL)
2145
+ status = REG_ESPACE;
2146
+
2147
+ if (pos > *max_pos)
2148
+ *max_pos = pos;
2149
+ break;
2150
+ }
2151
+ case UNION:
2152
+ {
2153
+ tre_union_t *uni = node->obj;
2154
+ tre_union_t *tmp;
2155
+ *result = tre_ast_new_union(mem, uni->left, uni->right);
2156
+ if (*result == NULL)
2157
+ {
2158
+ status = REG_ESPACE;
2159
+ break;
2160
+ }
2161
+ tmp = (*result)->obj;
2162
+ result = &tmp->left;
2163
+ STACK_PUSHX(stack, voidptr, uni->right);
2164
+ STACK_PUSHX(stack, int, COPY_RECURSE);
2165
+ STACK_PUSHX(stack, voidptr, &tmp->right);
2166
+ STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
2167
+ STACK_PUSHX(stack, voidptr, uni->left);
2168
+ STACK_PUSHX(stack, int, COPY_RECURSE);
2169
+ break;
2170
+ }
2171
+ case CATENATION:
2172
+ {
2173
+ tre_catenation_t *cat = node->obj;
2174
+ tre_catenation_t *tmp;
2175
+ *result = tre_ast_new_catenation(mem, cat->left, cat->right);
2176
+ if (*result == NULL)
2177
+ {
2178
+ status = REG_ESPACE;
2179
+ break;
2180
+ }
2181
+ tmp = (*result)->obj;
2182
+ tmp->left = NULL;
2183
+ tmp->right = NULL;
2184
+ result = &tmp->left;
2185
+
2186
+ STACK_PUSHX(stack, voidptr, cat->right);
2187
+ STACK_PUSHX(stack, int, COPY_RECURSE);
2188
+ STACK_PUSHX(stack, voidptr, &tmp->right);
2189
+ STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
2190
+ STACK_PUSHX(stack, voidptr, cat->left);
2191
+ STACK_PUSHX(stack, int, COPY_RECURSE);
2192
+ break;
2193
+ }
2194
+ case ITERATION:
2195
+ {
2196
+ tre_iteration_t *iter = node->obj;
2197
+ STACK_PUSHX(stack, voidptr, iter->arg);
2198
+ STACK_PUSHX(stack, int, COPY_RECURSE);
2199
+ *result = tre_ast_new_iter(mem, iter->arg, iter->min,
2200
+ iter->max, iter->minimal);
2201
+ if (*result == NULL)
2202
+ {
2203
+ status = REG_ESPACE;
2204
+ break;
2205
+ }
2206
+ iter = (*result)->obj;
2207
+ result = &iter->arg;
2208
+ break;
2209
+ }
2210
+ default:
2211
+ assert(0);
2212
+ break;
2213
+ }
2214
+ break;
2215
+ }
2216
+ }
2217
+ *pos_add += num_copied;
2218
+ return status;
2219
+ }
2220
+
2221
+ typedef enum {
2222
+ EXPAND_RECURSE,
2223
+ EXPAND_AFTER_ITER
2224
+ } tre_expand_ast_symbol_t;
2225
+
2226
+ /* Expands each iteration node that has a finite nonzero minimum or maximum
2227
+ iteration count to a catenated sequence of copies of the node. */
2228
+ static reg_errcode_t
2229
+ tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
2230
+ int *position, tre_tag_direction_t *tag_directions)
2231
+ {
2232
+ reg_errcode_t status = REG_OK;
2233
+ int bottom = tre_stack_num_objects(stack);
2234
+ int pos_add = 0;
2235
+ int pos_add_total = 0;
2236
+ int max_pos = 0;
2237
+ int iter_depth = 0;
2238
+
2239
+ STACK_PUSHR(stack, voidptr, ast);
2240
+ STACK_PUSHR(stack, int, EXPAND_RECURSE);
2241
+ while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2242
+ {
2243
+ tre_ast_node_t *node;
2244
+ tre_expand_ast_symbol_t symbol;
2245
+
2246
+ if (status != REG_OK)
2247
+ break;
2248
+
2249
+ symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
2250
+ node = tre_stack_pop_voidptr(stack);
2251
+ switch (symbol)
2252
+ {
2253
+ case EXPAND_RECURSE:
2254
+ switch (node->type)
2255
+ {
2256
+ case LITERAL:
2257
+ {
2258
+ tre_literal_t *lit= node->obj;
2259
+ if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
2260
+ {
2261
+ lit->position += pos_add;
2262
+ if (lit->position > max_pos)
2263
+ max_pos = lit->position;
2264
+ }
2265
+ break;
2266
+ }
2267
+ case UNION:
2268
+ {
2269
+ tre_union_t *uni = node->obj;
2270
+ STACK_PUSHX(stack, voidptr, uni->right);
2271
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
2272
+ STACK_PUSHX(stack, voidptr, uni->left);
2273
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
2274
+ break;
2275
+ }
2276
+ case CATENATION:
2277
+ {
2278
+ tre_catenation_t *cat = node->obj;
2279
+ STACK_PUSHX(stack, voidptr, cat->right);
2280
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
2281
+ STACK_PUSHX(stack, voidptr, cat->left);
2282
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
2283
+ break;
2284
+ }
2285
+ case ITERATION:
2286
+ {
2287
+ tre_iteration_t *iter = node->obj;
2288
+ STACK_PUSHX(stack, int, pos_add);
2289
+ STACK_PUSHX(stack, voidptr, node);
2290
+ STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
2291
+ STACK_PUSHX(stack, voidptr, iter->arg);
2292
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
2293
+ /* If we are going to expand this node at EXPAND_AFTER_ITER
2294
+ then don't increase the `pos' fields of the nodes now, it
2295
+ will get done when expanding. */
2296
+ if (iter->min > 1 || iter->max > 1)
2297
+ pos_add = 0;
2298
+ iter_depth++;
2299
+ break;
2300
+ }
2301
+ default:
2302
+ assert(0);
2303
+ break;
2304
+ }
2305
+ break;
2306
+ case EXPAND_AFTER_ITER:
2307
+ {
2308
+ tre_iteration_t *iter = node->obj;
2309
+ int pos_add_last;
2310
+ pos_add = tre_stack_pop_int(stack);
2311
+ pos_add_last = pos_add;
2312
+ if (iter->min > 1 || iter->max > 1)
2313
+ {
2314
+ tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
2315
+ int j;
2316
+ int pos_add_save = pos_add;
2317
+
2318
+ /* Create a catenated sequence of copies of the node. */
2319
+ for (j = 0; j < iter->min; j++)
2320
+ {
2321
+ tre_ast_node_t *copy;
2322
+ /* Remove tags from all but the last copy. */
2323
+ int flags = ((j + 1 < iter->min)
2324
+ ? COPY_REMOVE_TAGS
2325
+ : COPY_MAXIMIZE_FIRST_TAG);
2326
+ pos_add_save = pos_add;
2327
+ status = tre_copy_ast(mem, stack, iter->arg, flags,
2328
+ &pos_add, tag_directions, &copy,
2329
+ &max_pos);
2330
+ if (status != REG_OK)
2331
+ return status;
2332
+ if (seq1 != NULL)
2333
+ seq1 = tre_ast_new_catenation(mem, seq1, copy);
2334
+ else
2335
+ seq1 = copy;
2336
+ if (seq1 == NULL)
2337
+ return REG_ESPACE;
2338
+ }
2339
+
2340
+ if (iter->max == -1)
2341
+ {
2342
+ /* No upper limit. */
2343
+ pos_add_save = pos_add;
2344
+ status = tre_copy_ast(mem, stack, iter->arg, 0,
2345
+ &pos_add, NULL, &seq2, &max_pos);
2346
+ if (status != REG_OK)
2347
+ return status;
2348
+ seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
2349
+ if (seq2 == NULL)
2350
+ return REG_ESPACE;
2351
+ }
2352
+ else
2353
+ {
2354
+ for (j = iter->min; j < iter->max; j++)
2355
+ {
2356
+ tre_ast_node_t *tmp, *copy;
2357
+ pos_add_save = pos_add;
2358
+ status = tre_copy_ast(mem, stack, iter->arg, 0,
2359
+ &pos_add, NULL, &copy, &max_pos);
2360
+ if (status != REG_OK)
2361
+ return status;
2362
+ if (seq2 != NULL)
2363
+ seq2 = tre_ast_new_catenation(mem, copy, seq2);
2364
+ else
2365
+ seq2 = copy;
2366
+ if (seq2 == NULL)
2367
+ return REG_ESPACE;
2368
+ tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
2369
+ if (tmp == NULL)
2370
+ return REG_ESPACE;
2371
+ seq2 = tre_ast_new_union(mem, tmp, seq2);
2372
+ if (seq2 == NULL)
2373
+ return REG_ESPACE;
2374
+ }
2375
+ }
2376
+
2377
+ pos_add = pos_add_save;
2378
+ if (seq1 == NULL)
2379
+ seq1 = seq2;
2380
+ else if (seq2 != NULL)
2381
+ seq1 = tre_ast_new_catenation(mem, seq1, seq2);
2382
+ if (seq1 == NULL)
2383
+ return REG_ESPACE;
2384
+ node->obj = seq1->obj;
2385
+ node->type = seq1->type;
2386
+ }
2387
+
2388
+ iter_depth--;
2389
+ pos_add_total += pos_add - pos_add_last;
2390
+ if (iter_depth == 0)
2391
+ pos_add = pos_add_total;
2392
+
2393
+ break;
2394
+ }
2395
+ default:
2396
+ assert(0);
2397
+ break;
2398
+ }
2399
+ }
2400
+
2401
+ *position += pos_add_total;
2402
+
2403
+ /* `max_pos' should never be larger than `*position' if the above
2404
+ code works, but just an extra safeguard let's make sure
2405
+ `*position' is set large enough so enough memory will be
2406
+ allocated for the transition table. */
2407
+ if (max_pos > *position)
2408
+ *position = max_pos;
2409
+
2410
+ return status;
2411
+ }
2412
+
2413
+ static tre_pos_and_tags_t *
2414
+ tre_set_empty(tre_mem_t mem)
2415
+ {
2416
+ tre_pos_and_tags_t *new_set;
2417
+
2418
+ new_set = tre_mem_calloc(mem, sizeof(*new_set));
2419
+ if (new_set == NULL)
2420
+ return NULL;
2421
+
2422
+ new_set[0].position = -1;
2423
+ new_set[0].code_min = -1;
2424
+ new_set[0].code_max = -1;
2425
+
2426
+ return new_set;
2427
+ }
2428
+
2429
+ static tre_pos_and_tags_t *
2430
+ tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2431
+ tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2432
+ {
2433
+ tre_pos_and_tags_t *new_set;
2434
+
2435
+ new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2436
+ if (new_set == NULL)
2437
+ return NULL;
2438
+
2439
+ new_set[0].position = position;
2440
+ new_set[0].code_min = code_min;
2441
+ new_set[0].code_max = code_max;
2442
+ new_set[0].class = class;
2443
+ new_set[0].neg_classes = neg_classes;
2444
+ new_set[0].backref = backref;
2445
+ new_set[1].position = -1;
2446
+ new_set[1].code_min = -1;
2447
+ new_set[1].code_max = -1;
2448
+
2449
+ return new_set;
2450
+ }
2451
+
2452
+ static tre_pos_and_tags_t *
2453
+ tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2454
+ int *tags, int assertions)
2455
+ {
2456
+ int s1, s2, i, j;
2457
+ tre_pos_and_tags_t *new_set;
2458
+ int *new_tags;
2459
+ int num_tags;
2460
+
2461
+ for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2462
+ for (s1 = 0; set1[s1].position >= 0; s1++);
2463
+ for (s2 = 0; set2[s2].position >= 0; s2++);
2464
+ new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2465
+ if (!new_set )
2466
+ return NULL;
2467
+
2468
+ for (s1 = 0; set1[s1].position >= 0; s1++)
2469
+ {
2470
+ new_set[s1].position = set1[s1].position;
2471
+ new_set[s1].code_min = set1[s1].code_min;
2472
+ new_set[s1].code_max = set1[s1].code_max;
2473
+ new_set[s1].assertions = set1[s1].assertions | assertions;
2474
+ new_set[s1].class = set1[s1].class;
2475
+ new_set[s1].neg_classes = set1[s1].neg_classes;
2476
+ new_set[s1].backref = set1[s1].backref;
2477
+ if (set1[s1].tags == NULL && tags == NULL)
2478
+ new_set[s1].tags = NULL;
2479
+ else
2480
+ {
2481
+ for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2482
+ new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2483
+ * (i + num_tags + 1)));
2484
+ if (new_tags == NULL)
2485
+ return NULL;
2486
+ for (j = 0; j < i; j++)
2487
+ new_tags[j] = set1[s1].tags[j];
2488
+ for (i = 0; i < num_tags; i++)
2489
+ new_tags[j + i] = tags[i];
2490
+ new_tags[j + i] = -1;
2491
+ new_set[s1].tags = new_tags;
2492
+ }
2493
+ }
2494
+
2495
+ for (s2 = 0; set2[s2].position >= 0; s2++)
2496
+ {
2497
+ new_set[s1 + s2].position = set2[s2].position;
2498
+ new_set[s1 + s2].code_min = set2[s2].code_min;
2499
+ new_set[s1 + s2].code_max = set2[s2].code_max;
2500
+ /* XXX - why not | assertions here as well? */
2501
+ new_set[s1 + s2].assertions = set2[s2].assertions;
2502
+ new_set[s1 + s2].class = set2[s2].class;
2503
+ new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2504
+ new_set[s1 + s2].backref = set2[s2].backref;
2505
+ if (set2[s2].tags == NULL)
2506
+ new_set[s1 + s2].tags = NULL;
2507
+ else
2508
+ {
2509
+ for (i = 0; set2[s2].tags[i] >= 0; i++);
2510
+ new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2511
+ if (new_tags == NULL)
2512
+ return NULL;
2513
+ for (j = 0; j < i; j++)
2514
+ new_tags[j] = set2[s2].tags[j];
2515
+ new_tags[j] = -1;
2516
+ new_set[s1 + s2].tags = new_tags;
2517
+ }
2518
+ }
2519
+ new_set[s1 + s2].position = -1;
2520
+ return new_set;
2521
+ }
2522
+
2523
+ /* Finds the empty path through `node' which is the one that should be
2524
+ taken according to POSIX.2 rules, and adds the tags on that path to
2525
+ `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2526
+ set to the number of tags seen on the path. */
2527
+ static reg_errcode_t
2528
+ tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2529
+ int *assertions, int *num_tags_seen)
2530
+ {
2531
+ tre_literal_t *lit;
2532
+ tre_union_t *uni;
2533
+ tre_catenation_t *cat;
2534
+ tre_iteration_t *iter;
2535
+ int i;
2536
+ int bottom = tre_stack_num_objects(stack);
2537
+ reg_errcode_t status = REG_OK;
2538
+ if (num_tags_seen)
2539
+ *num_tags_seen = 0;
2540
+
2541
+ status = tre_stack_push_voidptr(stack, node);
2542
+
2543
+ /* Walk through the tree recursively. */
2544
+ while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2545
+ {
2546
+ node = tre_stack_pop_voidptr(stack);
2547
+
2548
+ switch (node->type)
2549
+ {
2550
+ case LITERAL:
2551
+ lit = (tre_literal_t *)node->obj;
2552
+ switch (lit->code_min)
2553
+ {
2554
+ case TAG:
2555
+ if (lit->code_max >= 0)
2556
+ {
2557
+ if (tags != NULL)
2558
+ {
2559
+ /* Add the tag to `tags'. */
2560
+ for (i = 0; tags[i] >= 0; i++)
2561
+ if (tags[i] == lit->code_max)
2562
+ break;
2563
+ if (tags[i] < 0)
2564
+ {
2565
+ tags[i] = lit->code_max;
2566
+ tags[i + 1] = -1;
2567
+ }
2568
+ }
2569
+ if (num_tags_seen)
2570
+ (*num_tags_seen)++;
2571
+ }
2572
+ break;
2573
+ case ASSERTION:
2574
+ assert(lit->code_max >= 1
2575
+ || lit->code_max <= ASSERT_LAST);
2576
+ if (assertions != NULL)
2577
+ *assertions |= lit->code_max;
2578
+ break;
2579
+ case EMPTY:
2580
+ break;
2581
+ default:
2582
+ assert(0);
2583
+ break;
2584
+ }
2585
+ break;
2586
+
2587
+ case UNION:
2588
+ /* Subexpressions starting earlier take priority over ones
2589
+ starting later, so we prefer the left subexpression over the
2590
+ right subexpression. */
2591
+ uni = (tre_union_t *)node->obj;
2592
+ if (uni->left->nullable)
2593
+ STACK_PUSHX(stack, voidptr, uni->left)
2594
+ else if (uni->right->nullable)
2595
+ STACK_PUSHX(stack, voidptr, uni->right)
2596
+ else
2597
+ assert(0);
2598
+ break;
2599
+
2600
+ case CATENATION:
2601
+ /* The path must go through both children. */
2602
+ cat = (tre_catenation_t *)node->obj;
2603
+ assert(cat->left->nullable);
2604
+ assert(cat->right->nullable);
2605
+ STACK_PUSHX(stack, voidptr, cat->left);
2606
+ STACK_PUSHX(stack, voidptr, cat->right);
2607
+ break;
2608
+
2609
+ case ITERATION:
2610
+ /* A match with an empty string is preferred over no match at
2611
+ all, so we go through the argument if possible. */
2612
+ iter = (tre_iteration_t *)node->obj;
2613
+ if (iter->arg->nullable)
2614
+ STACK_PUSHX(stack, voidptr, iter->arg);
2615
+ break;
2616
+
2617
+ default:
2618
+ assert(0);
2619
+ break;
2620
+ }
2621
+ }
2622
+
2623
+ return status;
2624
+ }
2625
+
2626
+
2627
+ typedef enum {
2628
+ NFL_RECURSE,
2629
+ NFL_POST_UNION,
2630
+ NFL_POST_CATENATION,
2631
+ NFL_POST_ITERATION
2632
+ } tre_nfl_stack_symbol_t;
2633
+
2634
+
2635
+ /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2636
+ the nodes of the AST `tree'. */
2637
+ static reg_errcode_t
2638
+ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2639
+ {
2640
+ int bottom = tre_stack_num_objects(stack);
2641
+
2642
+ STACK_PUSHR(stack, voidptr, tree);
2643
+ STACK_PUSHR(stack, int, NFL_RECURSE);
2644
+
2645
+ while (tre_stack_num_objects(stack) > bottom)
2646
+ {
2647
+ tre_nfl_stack_symbol_t symbol;
2648
+ tre_ast_node_t *node;
2649
+
2650
+ symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2651
+ node = tre_stack_pop_voidptr(stack);
2652
+ switch (symbol)
2653
+ {
2654
+ case NFL_RECURSE:
2655
+ switch (node->type)
2656
+ {
2657
+ case LITERAL:
2658
+ {
2659
+ tre_literal_t *lit = (tre_literal_t *)node->obj;
2660
+ if (IS_BACKREF(lit))
2661
+ {
2662
+ /* Back references: nullable = false, firstpos = {i},
2663
+ lastpos = {i}. */
2664
+ node->nullable = 0;
2665
+ node->firstpos = tre_set_one(mem, lit->position, 0,
2666
+ TRE_CHAR_MAX, 0, NULL, -1);
2667
+ if (!node->firstpos)
2668
+ return REG_ESPACE;
2669
+ node->lastpos = tre_set_one(mem, lit->position, 0,
2670
+ TRE_CHAR_MAX, 0, NULL,
2671
+ (int)lit->code_max);
2672
+ if (!node->lastpos)
2673
+ return REG_ESPACE;
2674
+ }
2675
+ else if (lit->code_min < 0)
2676
+ {
2677
+ /* Tags, empty strings, params, and zero width assertions:
2678
+ nullable = true, firstpos = {}, and lastpos = {}. */
2679
+ node->nullable = 1;
2680
+ node->firstpos = tre_set_empty(mem);
2681
+ if (!node->firstpos)
2682
+ return REG_ESPACE;
2683
+ node->lastpos = tre_set_empty(mem);
2684
+ if (!node->lastpos)
2685
+ return REG_ESPACE;
2686
+ }
2687
+ else
2688
+ {
2689
+ /* Literal at position i: nullable = false, firstpos = {i},
2690
+ lastpos = {i}. */
2691
+ node->nullable = 0;
2692
+ node->firstpos =
2693
+ tre_set_one(mem, lit->position, (int)lit->code_min,
2694
+ (int)lit->code_max, 0, NULL, -1);
2695
+ if (!node->firstpos)
2696
+ return REG_ESPACE;
2697
+ node->lastpos = tre_set_one(mem, lit->position,
2698
+ (int)lit->code_min,
2699
+ (int)lit->code_max,
2700
+ lit->class, lit->neg_classes,
2701
+ -1);
2702
+ if (!node->lastpos)
2703
+ return REG_ESPACE;
2704
+ }
2705
+ break;
2706
+ }
2707
+
2708
+ case UNION:
2709
+ /* Compute the attributes for the two subtrees, and after that
2710
+ for this node. */
2711
+ STACK_PUSHR(stack, voidptr, node);
2712
+ STACK_PUSHR(stack, int, NFL_POST_UNION);
2713
+ STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2714
+ STACK_PUSHR(stack, int, NFL_RECURSE);
2715
+ STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2716
+ STACK_PUSHR(stack, int, NFL_RECURSE);
2717
+ break;
2718
+
2719
+ case CATENATION:
2720
+ /* Compute the attributes for the two subtrees, and after that
2721
+ for this node. */
2722
+ STACK_PUSHR(stack, voidptr, node);
2723
+ STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2724
+ STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2725
+ STACK_PUSHR(stack, int, NFL_RECURSE);
2726
+ STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2727
+ STACK_PUSHR(stack, int, NFL_RECURSE);
2728
+ break;
2729
+
2730
+ case ITERATION:
2731
+ /* Compute the attributes for the subtree, and after that for
2732
+ this node. */
2733
+ STACK_PUSHR(stack, voidptr, node);
2734
+ STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2735
+ STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2736
+ STACK_PUSHR(stack, int, NFL_RECURSE);
2737
+ break;
2738
+ }
2739
+ break; /* end case: NFL_RECURSE */
2740
+
2741
+ case NFL_POST_UNION:
2742
+ {
2743
+ tre_union_t *uni = (tre_union_t *)node->obj;
2744
+ node->nullable = uni->left->nullable || uni->right->nullable;
2745
+ node->firstpos = tre_set_union(mem, uni->left->firstpos,
2746
+ uni->right->firstpos, NULL, 0);
2747
+ if (!node->firstpos)
2748
+ return REG_ESPACE;
2749
+ node->lastpos = tre_set_union(mem, uni->left->lastpos,
2750
+ uni->right->lastpos, NULL, 0);
2751
+ if (!node->lastpos)
2752
+ return REG_ESPACE;
2753
+ break;
2754
+ }
2755
+
2756
+ case NFL_POST_ITERATION:
2757
+ {
2758
+ tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2759
+
2760
+ if (iter->min == 0 || iter->arg->nullable)
2761
+ node->nullable = 1;
2762
+ else
2763
+ node->nullable = 0;
2764
+ node->firstpos = iter->arg->firstpos;
2765
+ node->lastpos = iter->arg->lastpos;
2766
+ break;
2767
+ }
2768
+
2769
+ case NFL_POST_CATENATION:
2770
+ {
2771
+ int num_tags, *tags, assertions;
2772
+ reg_errcode_t status;
2773
+ tre_catenation_t *cat = node->obj;
2774
+ node->nullable = cat->left->nullable && cat->right->nullable;
2775
+
2776
+ /* Compute firstpos. */
2777
+ if (cat->left->nullable)
2778
+ {
2779
+ /* The left side matches the empty string. Make a first pass
2780
+ with tre_match_empty() to get the number of tags and
2781
+ parameters. */
2782
+ status = tre_match_empty(stack, cat->left,
2783
+ NULL, NULL, &num_tags);
2784
+ if (status != REG_OK)
2785
+ return status;
2786
+ /* Allocate arrays for the tags and parameters. */
2787
+ tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2788
+ if (!tags)
2789
+ return REG_ESPACE;
2790
+ tags[0] = -1;
2791
+ assertions = 0;
2792
+ /* Second pass with tre_mach_empty() to get the list of
2793
+ tags and parameters. */
2794
+ status = tre_match_empty(stack, cat->left, tags,
2795
+ &assertions, NULL);
2796
+ if (status != REG_OK)
2797
+ {
2798
+ xfree(tags);
2799
+ return status;
2800
+ }
2801
+ node->firstpos =
2802
+ tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2803
+ tags, assertions);
2804
+ xfree(tags);
2805
+ if (!node->firstpos)
2806
+ return REG_ESPACE;
2807
+ }
2808
+ else
2809
+ {
2810
+ node->firstpos = cat->left->firstpos;
2811
+ }
2812
+
2813
+ /* Compute lastpos. */
2814
+ if (cat->right->nullable)
2815
+ {
2816
+ /* The right side matches the empty string. Make a first pass
2817
+ with tre_match_empty() to get the number of tags and
2818
+ parameters. */
2819
+ status = tre_match_empty(stack, cat->right,
2820
+ NULL, NULL, &num_tags);
2821
+ if (status != REG_OK)
2822
+ return status;
2823
+ /* Allocate arrays for the tags and parameters. */
2824
+ tags = xmalloc(sizeof(int) * (num_tags + 1));
2825
+ if (!tags)
2826
+ return REG_ESPACE;
2827
+ tags[0] = -1;
2828
+ assertions = 0;
2829
+ /* Second pass with tre_mach_empty() to get the list of
2830
+ tags and parameters. */
2831
+ status = tre_match_empty(stack, cat->right, tags,
2832
+ &assertions, NULL);
2833
+ if (status != REG_OK)
2834
+ {
2835
+ xfree(tags);
2836
+ return status;
2837
+ }
2838
+ node->lastpos =
2839
+ tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2840
+ tags, assertions);
2841
+ xfree(tags);
2842
+ if (!node->lastpos)
2843
+ return REG_ESPACE;
2844
+ }
2845
+ else
2846
+ {
2847
+ node->lastpos = cat->right->lastpos;
2848
+ }
2849
+ break;
2850
+ }
2851
+
2852
+ default:
2853
+ assert(0);
2854
+ break;
2855
+ }
2856
+ }
2857
+
2858
+ return REG_OK;
2859
+ }
2860
+
2861
+
2862
+ /* Adds a transition from each position in `p1' to each position in `p2'. */
2863
+ static reg_errcode_t
2864
+ tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2865
+ tre_tnfa_transition_t *transitions,
2866
+ int *counts, int *offs)
2867
+ {
2868
+ tre_pos_and_tags_t *orig_p2 = p2;
2869
+ tre_tnfa_transition_t *trans;
2870
+ int i, j, k, l, dup, prev_p2_pos;
2871
+
2872
+ if (transitions != NULL)
2873
+ while (p1->position >= 0)
2874
+ {
2875
+ p2 = orig_p2;
2876
+ prev_p2_pos = -1;
2877
+ while (p2->position >= 0)
2878
+ {
2879
+ /* Optimization: if this position was already handled, skip it. */
2880
+ if (p2->position == prev_p2_pos)
2881
+ {
2882
+ p2++;
2883
+ continue;
2884
+ }
2885
+ prev_p2_pos = p2->position;
2886
+ /* Set `trans' to point to the next unused transition from
2887
+ position `p1->position'. */
2888
+ trans = transitions + offs[p1->position];
2889
+ while (trans->state != NULL)
2890
+ {
2891
+ #if 0
2892
+ /* If we find a previous transition from `p1->position' to
2893
+ `p2->position', it is overwritten. This can happen only
2894
+ if there are nested loops in the regexp, like in "((a)*)*".
2895
+ In POSIX.2 repetition using the outer loop is always
2896
+ preferred over using the inner loop. Therefore the
2897
+ transition for the inner loop is useless and can be thrown
2898
+ away. */
2899
+ /* XXX - The same position is used for all nodes in a bracket
2900
+ expression, so this optimization cannot be used (it will
2901
+ break bracket expressions) unless I figure out a way to
2902
+ detect it here. */
2903
+ if (trans->state_id == p2->position)
2904
+ {
2905
+ break;
2906
+ }
2907
+ #endif
2908
+ trans++;
2909
+ }
2910
+
2911
+ if (trans->state == NULL)
2912
+ (trans + 1)->state = NULL;
2913
+ /* Use the character ranges, assertions, etc. from `p1' for
2914
+ the transition from `p1' to `p2'. */
2915
+ trans->code_min = p1->code_min;
2916
+ trans->code_max = p1->code_max;
2917
+ trans->state = transitions + offs[p2->position];
2918
+ trans->state_id = p2->position;
2919
+ trans->assertions = p1->assertions | p2->assertions
2920
+ | (p1->class ? ASSERT_CHAR_CLASS : 0)
2921
+ | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2922
+ if (p1->backref >= 0)
2923
+ {
2924
+ assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2925
+ assert(p2->backref < 0);
2926
+ trans->u.backref = p1->backref;
2927
+ trans->assertions |= ASSERT_BACKREF;
2928
+ }
2929
+ else
2930
+ trans->u.class = p1->class;
2931
+ if (p1->neg_classes != NULL)
2932
+ {
2933
+ for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2934
+ trans->neg_classes =
2935
+ xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2936
+ if (trans->neg_classes == NULL)
2937
+ return REG_ESPACE;
2938
+ for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2939
+ trans->neg_classes[i] = p1->neg_classes[i];
2940
+ trans->neg_classes[i] = (tre_ctype_t)0;
2941
+ }
2942
+ else
2943
+ trans->neg_classes = NULL;
2944
+
2945
+ /* Find out how many tags this transition has. */
2946
+ i = 0;
2947
+ if (p1->tags != NULL)
2948
+ while(p1->tags[i] >= 0)
2949
+ i++;
2950
+ j = 0;
2951
+ if (p2->tags != NULL)
2952
+ while(p2->tags[j] >= 0)
2953
+ j++;
2954
+
2955
+ /* If we are overwriting a transition, free the old tag array. */
2956
+ if (trans->tags != NULL)
2957
+ xfree(trans->tags);
2958
+ trans->tags = NULL;
2959
+
2960
+ /* If there were any tags, allocate an array and fill it. */
2961
+ if (i + j > 0)
2962
+ {
2963
+ trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2964
+ if (!trans->tags)
2965
+ return REG_ESPACE;
2966
+ i = 0;
2967
+ if (p1->tags != NULL)
2968
+ while(p1->tags[i] >= 0)
2969
+ {
2970
+ trans->tags[i] = p1->tags[i];
2971
+ i++;
2972
+ }
2973
+ l = i;
2974
+ j = 0;
2975
+ if (p2->tags != NULL)
2976
+ while (p2->tags[j] >= 0)
2977
+ {
2978
+ /* Don't add duplicates. */
2979
+ dup = 0;
2980
+ for (k = 0; k < i; k++)
2981
+ if (trans->tags[k] == p2->tags[j])
2982
+ {
2983
+ dup = 1;
2984
+ break;
2985
+ }
2986
+ if (!dup)
2987
+ trans->tags[l++] = p2->tags[j];
2988
+ j++;
2989
+ }
2990
+ trans->tags[l] = -1;
2991
+ }
2992
+
2993
+ p2++;
2994
+ }
2995
+ p1++;
2996
+ }
2997
+ else
2998
+ /* Compute a maximum limit for the number of transitions leaving
2999
+ from each state. */
3000
+ while (p1->position >= 0)
3001
+ {
3002
+ p2 = orig_p2;
3003
+ while (p2->position >= 0)
3004
+ {
3005
+ counts[p1->position]++;
3006
+ p2++;
3007
+ }
3008
+ p1++;
3009
+ }
3010
+ return REG_OK;
3011
+ }
3012
+
3013
+ /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
3014
+ labelled with one character range (there are no transitions on empty
3015
+ strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
3016
+ the regexp. */
3017
+ static reg_errcode_t
3018
+ tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
3019
+ int *counts, int *offs)
3020
+ {
3021
+ tre_union_t *uni;
3022
+ tre_catenation_t *cat;
3023
+ tre_iteration_t *iter;
3024
+ reg_errcode_t errcode = REG_OK;
3025
+
3026
+ /* XXX - recurse using a stack!. */
3027
+ switch (node->type)
3028
+ {
3029
+ case LITERAL:
3030
+ break;
3031
+ case UNION:
3032
+ uni = (tre_union_t *)node->obj;
3033
+ errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
3034
+ if (errcode != REG_OK)
3035
+ return errcode;
3036
+ errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
3037
+ break;
3038
+
3039
+ case CATENATION:
3040
+ cat = (tre_catenation_t *)node->obj;
3041
+ /* Add a transition from each position in cat->left->lastpos
3042
+ to each position in cat->right->firstpos. */
3043
+ errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
3044
+ transitions, counts, offs);
3045
+ if (errcode != REG_OK)
3046
+ return errcode;
3047
+ errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
3048
+ if (errcode != REG_OK)
3049
+ return errcode;
3050
+ errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
3051
+ break;
3052
+
3053
+ case ITERATION:
3054
+ iter = (tre_iteration_t *)node->obj;
3055
+ assert(iter->max == -1 || iter->max == 1);
3056
+
3057
+ if (iter->max == -1)
3058
+ {
3059
+ assert(iter->min == 0 || iter->min == 1);
3060
+ /* Add a transition from each last position in the iterated
3061
+ expression to each first position. */
3062
+ errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
3063
+ transitions, counts, offs);
3064
+ if (errcode != REG_OK)
3065
+ return errcode;
3066
+ }
3067
+ errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
3068
+ break;
3069
+ }
3070
+ return errcode;
3071
+ }
3072
+
3073
+
3074
+ #define ERROR_EXIT(err) \
3075
+ do \
3076
+ { \
3077
+ errcode = err; \
3078
+ if (/*CONSTCOND*/1) \
3079
+ goto error_exit; \
3080
+ } \
3081
+ while (/*CONSTCOND*/0)
3082
+
3083
+
3084
+ int
3085
+ regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
3086
+ {
3087
+ tre_stack_t *stack;
3088
+ tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
3089
+ tre_pos_and_tags_t *p;
3090
+ int *counts = NULL, *offs = NULL;
3091
+ int i, add = 0;
3092
+ tre_tnfa_transition_t *transitions, *initial;
3093
+ tre_tnfa_t *tnfa = NULL;
3094
+ tre_submatch_data_t *submatch_data;
3095
+ tre_tag_direction_t *tag_directions = NULL;
3096
+ reg_errcode_t errcode;
3097
+ tre_mem_t mem;
3098
+
3099
+ /* Parse context. */
3100
+ tre_parse_ctx_t parse_ctx;
3101
+
3102
+ /* Allocate a stack used throughout the compilation process for various
3103
+ purposes. */
3104
+ stack = tre_stack_new(512, 10240, 128);
3105
+ if (!stack)
3106
+ return REG_ESPACE;
3107
+ /* Allocate a fast memory allocator. */
3108
+ mem = tre_mem_new();
3109
+ if (!mem)
3110
+ {
3111
+ tre_stack_destroy(stack);
3112
+ return REG_ESPACE;
3113
+ }
3114
+
3115
+ /* Parse the regexp. */
3116
+ memset(&parse_ctx, 0, sizeof(parse_ctx));
3117
+ parse_ctx.mem = mem;
3118
+ parse_ctx.stack = stack;
3119
+ parse_ctx.re = regex;
3120
+ parse_ctx.cflags = cflags;
3121
+ parse_ctx.max_backref = -1;
3122
+ errcode = tre_parse(&parse_ctx);
3123
+ if (errcode != REG_OK)
3124
+ ERROR_EXIT(errcode);
3125
+ preg->re_nsub = parse_ctx.submatch_id - 1;
3126
+ tree = parse_ctx.result;
3127
+
3128
+ /* Back references and approximate matching cannot currently be used
3129
+ in the same regexp. */
3130
+ if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
3131
+ ERROR_EXIT(REG_BADPAT);
3132
+
3133
+ #ifdef TRE_DEBUG
3134
+ tre_ast_print(tree);
3135
+ #endif /* TRE_DEBUG */
3136
+
3137
+ /* Referring to nonexistent subexpressions is illegal. */
3138
+ if (parse_ctx.max_backref > (int)preg->re_nsub)
3139
+ ERROR_EXIT(REG_ESUBREG);
3140
+
3141
+ /* Allocate the TNFA struct. */
3142
+ tnfa = xcalloc(1, sizeof(tre_tnfa_t));
3143
+ if (tnfa == NULL)
3144
+ ERROR_EXIT(REG_ESPACE);
3145
+ tnfa->have_backrefs = parse_ctx.max_backref >= 0;
3146
+ tnfa->have_approx = parse_ctx.have_approx;
3147
+ tnfa->num_submatches = parse_ctx.submatch_id;
3148
+
3149
+ /* Set up tags for submatch addressing. If REG_NOSUB is set and the
3150
+ regexp does not have back references, this can be skipped. */
3151
+ if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
3152
+ {
3153
+
3154
+ /* Figure out how many tags we will need. */
3155
+ errcode = tre_add_tags(NULL, stack, tree, tnfa);
3156
+ if (errcode != REG_OK)
3157
+ ERROR_EXIT(errcode);
3158
+
3159
+ if (tnfa->num_tags > 0)
3160
+ {
3161
+ tag_directions = xmalloc(sizeof(*tag_directions)
3162
+ * (tnfa->num_tags + 1));
3163
+ if (tag_directions == NULL)
3164
+ ERROR_EXIT(REG_ESPACE);
3165
+ tnfa->tag_directions = tag_directions;
3166
+ memset(tag_directions, -1,
3167
+ sizeof(*tag_directions) * (tnfa->num_tags + 1));
3168
+ }
3169
+ tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
3170
+ sizeof(tnfa->minimal_tags));
3171
+ if (tnfa->minimal_tags == NULL)
3172
+ ERROR_EXIT(REG_ESPACE);
3173
+
3174
+ submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
3175
+ sizeof(*submatch_data));
3176
+ if (submatch_data == NULL)
3177
+ ERROR_EXIT(REG_ESPACE);
3178
+ tnfa->submatch_data = submatch_data;
3179
+
3180
+ errcode = tre_add_tags(mem, stack, tree, tnfa);
3181
+ if (errcode != REG_OK)
3182
+ ERROR_EXIT(errcode);
3183
+
3184
+ }
3185
+
3186
+ /* Expand iteration nodes. */
3187
+ errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
3188
+ tag_directions);
3189
+ if (errcode != REG_OK)
3190
+ ERROR_EXIT(errcode);
3191
+
3192
+ /* Add a dummy node for the final state.
3193
+ XXX - For certain patterns this dummy node can be optimized away,
3194
+ for example "a*" or "ab*". Figure out a simple way to detect
3195
+ this possibility. */
3196
+ tmp_ast_l = tree;
3197
+ tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
3198
+ if (tmp_ast_r == NULL)
3199
+ ERROR_EXIT(REG_ESPACE);
3200
+
3201
+ tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
3202
+ if (tree == NULL)
3203
+ ERROR_EXIT(REG_ESPACE);
3204
+
3205
+ errcode = tre_compute_nfl(mem, stack, tree);
3206
+ if (errcode != REG_OK)
3207
+ ERROR_EXIT(errcode);
3208
+
3209
+ counts = xmalloc(sizeof(int) * parse_ctx.position);
3210
+ if (counts == NULL)
3211
+ ERROR_EXIT(REG_ESPACE);
3212
+
3213
+ offs = xmalloc(sizeof(int) * parse_ctx.position);
3214
+ if (offs == NULL)
3215
+ ERROR_EXIT(REG_ESPACE);
3216
+
3217
+ for (i = 0; i < parse_ctx.position; i++)
3218
+ counts[i] = 0;
3219
+ tre_ast_to_tnfa(tree, NULL, counts, NULL);
3220
+
3221
+ add = 0;
3222
+ for (i = 0; i < parse_ctx.position; i++)
3223
+ {
3224
+ offs[i] = add;
3225
+ add += counts[i] + 1;
3226
+ counts[i] = 0;
3227
+ }
3228
+ transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
3229
+ if (transitions == NULL)
3230
+ ERROR_EXIT(REG_ESPACE);
3231
+ tnfa->transitions = transitions;
3232
+ tnfa->num_transitions = add;
3233
+
3234
+ errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
3235
+ if (errcode != REG_OK)
3236
+ ERROR_EXIT(errcode);
3237
+
3238
+ tnfa->firstpos_chars = NULL;
3239
+
3240
+ p = tree->firstpos;
3241
+ i = 0;
3242
+ while (p->position >= 0)
3243
+ {
3244
+ i++;
3245
+ p++;
3246
+ }
3247
+
3248
+ initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
3249
+ if (initial == NULL)
3250
+ ERROR_EXIT(REG_ESPACE);
3251
+ tnfa->initial = initial;
3252
+
3253
+ i = 0;
3254
+ for (p = tree->firstpos; p->position >= 0; p++)
3255
+ {
3256
+ initial[i].state = transitions + offs[p->position];
3257
+ initial[i].state_id = p->position;
3258
+ initial[i].tags = NULL;
3259
+ /* Copy the arrays p->tags, and p->params, they are allocated
3260
+ from a tre_mem object. */
3261
+ if (p->tags)
3262
+ {
3263
+ int j;
3264
+ for (j = 0; p->tags[j] >= 0; j++);
3265
+ initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
3266
+ if (!initial[i].tags)
3267
+ ERROR_EXIT(REG_ESPACE);
3268
+ memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
3269
+ }
3270
+ initial[i].assertions = p->assertions;
3271
+ i++;
3272
+ }
3273
+ initial[i].state = NULL;
3274
+
3275
+ tnfa->num_transitions = add;
3276
+ tnfa->final = transitions + offs[tree->lastpos[0].position];
3277
+ tnfa->num_states = parse_ctx.position;
3278
+ tnfa->cflags = cflags;
3279
+
3280
+ tre_mem_destroy(mem);
3281
+ tre_stack_destroy(stack);
3282
+ xfree(counts);
3283
+ xfree(offs);
3284
+
3285
+ preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3286
+ return REG_OK;
3287
+
3288
+ error_exit:
3289
+ /* Free everything that was allocated and return the error code. */
3290
+ tre_mem_destroy(mem);
3291
+ if (stack != NULL)
3292
+ tre_stack_destroy(stack);
3293
+ if (counts != NULL)
3294
+ xfree(counts);
3295
+ if (offs != NULL)
3296
+ xfree(offs);
3297
+ preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3298
+ regfree(preg);
3299
+ return errcode;
3300
+ }
3301
+
3302
+
3303
+
3304
+
3305
+ void
3306
+ regfree(regex_t *preg)
3307
+ {
3308
+ tre_tnfa_t *tnfa;
3309
+ unsigned int i;
3310
+ tre_tnfa_transition_t *trans;
3311
+
3312
+ tnfa = (void *)preg->TRE_REGEX_T_FIELD;
3313
+ if (!tnfa)
3314
+ return;
3315
+
3316
+ for (i = 0; i < tnfa->num_transitions; i++)
3317
+ if (tnfa->transitions[i].state)
3318
+ {
3319
+ if (tnfa->transitions[i].tags)
3320
+ xfree(tnfa->transitions[i].tags);
3321
+ if (tnfa->transitions[i].neg_classes)
3322
+ xfree(tnfa->transitions[i].neg_classes);
3323
+ }
3324
+ if (tnfa->transitions)
3325
+ xfree(tnfa->transitions);
3326
+
3327
+ if (tnfa->initial)
3328
+ {
3329
+ for (trans = tnfa->initial; trans->state; trans++)
3330
+ {
3331
+ if (trans->tags)
3332
+ xfree(trans->tags);
3333
+ }
3334
+ xfree(tnfa->initial);
3335
+ }
3336
+
3337
+ if (tnfa->submatch_data)
3338
+ {
3339
+ for (i = 0; i < tnfa->num_submatches; i++)
3340
+ if (tnfa->submatch_data[i].parents)
3341
+ xfree(tnfa->submatch_data[i].parents);
3342
+ xfree(tnfa->submatch_data);
3343
+ }
3344
+
3345
+ if (tnfa->tag_directions)
3346
+ xfree(tnfa->tag_directions);
3347
+ if (tnfa->firstpos_chars)
3348
+ xfree(tnfa->firstpos_chars);
3349
+ if (tnfa->minimal_tags)
3350
+ xfree(tnfa->minimal_tags);
3351
+ xfree(tnfa);
3352
+ }