johnson 2.0.0.pre1 → 2.0.0.pre2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (311) hide show
  1. data/CHANGELOG.rdoc +12 -0
  2. data/Manifest.txt +4 -285
  3. data/Rakefile +13 -20
  4. data/ext/tracemonkey/global.cc +4 -1
  5. data/ext/tracemonkey/js.cc +30 -33
  6. data/ext/tracemonkey/runtime.cc +96 -6
  7. data/ext/tracemonkey/split_global.cc +0 -5
  8. data/ext/tracemonkey/tracemonkey.h +2 -2
  9. data/lib/johnson.rb +2 -2
  10. data/lib/johnson/runtime.rb +25 -15
  11. data/lib/johnson/tracemonkey/runtime.rb +6 -3
  12. data/vendor/tracemonkey/config/system-headers +1 -3
  13. data/vendor/tracemonkey/jscntxt.h +5 -2
  14. data/vendor/tracemonkey/jsdbgapi.cpp +9 -1
  15. data/vendor/tracemonkey/jsdbgapi.h +4 -0
  16. data/vendor/tracemonkey/tests/ecma/NativeObjects/browser.js +0 -0
  17. data/vendor/tracemonkey/tests/ecma/NativeObjects/jstests.list +0 -0
  18. data/vendor/tracemonkey/tests/ecma_3_1/Object/jstests.list +1 -1
  19. data/vendor/tracemonkey/tests/js1_3/misc/browser.js +0 -0
  20. data/vendor/tracemonkey/tests/js1_3/misc/jstests.list +0 -0
  21. data/vendor/tracemonkey/tests/js1_5/Regress/jstests.list +4 -4
  22. data/vendor/tracemonkey/tests/js1_5/Scope/jstests.list +1 -1
  23. data/vendor/tracemonkey/tests/js1_5/decompilation/jstests.list +2 -2
  24. data/vendor/tracemonkey/tests/js1_7/decompilation/jstests.list +1 -1
  25. data/vendor/tracemonkey/tests/shell.js +2 -1
  26. metadata +51 -309
  27. data/ext/spidermonkey/context.c +0 -116
  28. data/ext/spidermonkey/context.h +0 -19
  29. data/ext/spidermonkey/conversions.c +0 -361
  30. data/ext/spidermonkey/conversions.h +0 -31
  31. data/ext/spidermonkey/debugger.c +0 -234
  32. data/ext/spidermonkey/debugger.h +0 -10
  33. data/ext/spidermonkey/extconf.rb +0 -32
  34. data/ext/spidermonkey/extensions.c +0 -37
  35. data/ext/spidermonkey/extensions.h +0 -12
  36. data/ext/spidermonkey/global.c +0 -40
  37. data/ext/spidermonkey/global.h +0 -11
  38. data/ext/spidermonkey/idhash.c +0 -16
  39. data/ext/spidermonkey/idhash.h +0 -8
  40. data/ext/spidermonkey/immutable_node.c +0 -1153
  41. data/ext/spidermonkey/immutable_node.c.erb +0 -523
  42. data/ext/spidermonkey/immutable_node.h +0 -22
  43. data/ext/spidermonkey/jroot.h +0 -197
  44. data/ext/spidermonkey/js_land_proxy.c +0 -620
  45. data/ext/spidermonkey/js_land_proxy.h +0 -20
  46. data/ext/spidermonkey/ruby_land_proxy.c +0 -618
  47. data/ext/spidermonkey/ruby_land_proxy.h +0 -38
  48. data/ext/spidermonkey/runtime.c +0 -396
  49. data/ext/spidermonkey/runtime.h +0 -27
  50. data/ext/spidermonkey/spidermonkey.c +0 -22
  51. data/ext/spidermonkey/spidermonkey.h +0 -29
  52. data/lib/johnson/spidermonkey.rb +0 -12
  53. data/lib/johnson/spidermonkey/context.rb +0 -10
  54. data/lib/johnson/spidermonkey/debugger.rb +0 -67
  55. data/lib/johnson/spidermonkey/immutable_node.rb +0 -282
  56. data/lib/johnson/spidermonkey/js_land_proxy.rb +0 -64
  57. data/lib/johnson/spidermonkey/mutable_tree_visitor.rb +0 -242
  58. data/lib/johnson/spidermonkey/ruby_land_proxy.rb +0 -17
  59. data/lib/johnson/spidermonkey/runtime.rb +0 -74
  60. data/test/johnson/spidermonkey/context_test.rb +0 -21
  61. data/test/johnson/spidermonkey/immutable_node_test.rb +0 -34
  62. data/test/johnson/spidermonkey/js_land_proxy_test.rb +0 -273
  63. data/test/johnson/spidermonkey/ruby_land_proxy_test.rb +0 -274
  64. data/test/johnson/spidermonkey/runtime_test.rb +0 -41
  65. data/vendor/spidermonkey/.cvsignore +0 -9
  66. data/vendor/spidermonkey/Makefile.in +0 -449
  67. data/vendor/spidermonkey/Makefile.ref +0 -365
  68. data/vendor/spidermonkey/README.html +0 -820
  69. data/vendor/spidermonkey/SpiderMonkey.rsp +0 -12
  70. data/vendor/spidermonkey/Y.js +0 -19
  71. data/vendor/spidermonkey/build.mk +0 -43
  72. data/vendor/spidermonkey/config.mk +0 -192
  73. data/vendor/spidermonkey/config/AIX4.1.mk +0 -65
  74. data/vendor/spidermonkey/config/AIX4.2.mk +0 -64
  75. data/vendor/spidermonkey/config/AIX4.3.mk +0 -65
  76. data/vendor/spidermonkey/config/Darwin.mk +0 -83
  77. data/vendor/spidermonkey/config/Darwin1.3.mk +0 -81
  78. data/vendor/spidermonkey/config/Darwin1.4.mk +0 -41
  79. data/vendor/spidermonkey/config/Darwin5.2.mk +0 -81
  80. data/vendor/spidermonkey/config/Darwin5.3.mk +0 -81
  81. data/vendor/spidermonkey/config/HP-UXB.10.10.mk +0 -77
  82. data/vendor/spidermonkey/config/HP-UXB.10.20.mk +0 -77
  83. data/vendor/spidermonkey/config/HP-UXB.11.00.mk +0 -80
  84. data/vendor/spidermonkey/config/IRIX.mk +0 -87
  85. data/vendor/spidermonkey/config/IRIX5.3.mk +0 -44
  86. data/vendor/spidermonkey/config/IRIX6.1.mk +0 -44
  87. data/vendor/spidermonkey/config/IRIX6.2.mk +0 -44
  88. data/vendor/spidermonkey/config/IRIX6.3.mk +0 -44
  89. data/vendor/spidermonkey/config/IRIX6.5.mk +0 -44
  90. data/vendor/spidermonkey/config/Linux_All.mk +0 -103
  91. data/vendor/spidermonkey/config/Mac_OS10.0.mk +0 -82
  92. data/vendor/spidermonkey/config/OSF1V4.0.mk +0 -72
  93. data/vendor/spidermonkey/config/OSF1V5.0.mk +0 -69
  94. data/vendor/spidermonkey/config/SunOS4.1.4.mk +0 -101
  95. data/vendor/spidermonkey/config/SunOS5.10.mk +0 -50
  96. data/vendor/spidermonkey/config/SunOS5.3.mk +0 -91
  97. data/vendor/spidermonkey/config/SunOS5.4.mk +0 -92
  98. data/vendor/spidermonkey/config/SunOS5.5.1.mk +0 -44
  99. data/vendor/spidermonkey/config/SunOS5.5.mk +0 -87
  100. data/vendor/spidermonkey/config/SunOS5.6.mk +0 -89
  101. data/vendor/spidermonkey/config/SunOS5.7.mk +0 -44
  102. data/vendor/spidermonkey/config/SunOS5.8.mk +0 -44
  103. data/vendor/spidermonkey/config/SunOS5.9.mk +0 -44
  104. data/vendor/spidermonkey/config/WINNT4.0.mk +0 -117
  105. data/vendor/spidermonkey/config/WINNT5.0.mk +0 -117
  106. data/vendor/spidermonkey/config/WINNT5.1.mk +0 -117
  107. data/vendor/spidermonkey/config/WINNT5.2.mk +0 -117
  108. data/vendor/spidermonkey/config/WINNT6.0.mk +0 -117
  109. data/vendor/spidermonkey/config/dgux.mk +0 -64
  110. data/vendor/spidermonkey/editline/Makefile.ref +0 -144
  111. data/vendor/spidermonkey/editline/README +0 -83
  112. data/vendor/spidermonkey/editline/editline.3 +0 -175
  113. data/vendor/spidermonkey/editline/editline.c +0 -1369
  114. data/vendor/spidermonkey/editline/editline.h +0 -135
  115. data/vendor/spidermonkey/editline/sysunix.c +0 -182
  116. data/vendor/spidermonkey/editline/unix.h +0 -82
  117. data/vendor/spidermonkey/fdlibm/.cvsignore +0 -7
  118. data/vendor/spidermonkey/fdlibm/Makefile.in +0 -127
  119. data/vendor/spidermonkey/fdlibm/Makefile.ref +0 -192
  120. data/vendor/spidermonkey/fdlibm/e_acos.c +0 -147
  121. data/vendor/spidermonkey/fdlibm/e_acosh.c +0 -105
  122. data/vendor/spidermonkey/fdlibm/e_asin.c +0 -156
  123. data/vendor/spidermonkey/fdlibm/e_atan2.c +0 -165
  124. data/vendor/spidermonkey/fdlibm/e_atanh.c +0 -110
  125. data/vendor/spidermonkey/fdlibm/e_cosh.c +0 -133
  126. data/vendor/spidermonkey/fdlibm/e_exp.c +0 -202
  127. data/vendor/spidermonkey/fdlibm/e_fmod.c +0 -184
  128. data/vendor/spidermonkey/fdlibm/e_gamma.c +0 -71
  129. data/vendor/spidermonkey/fdlibm/e_gamma_r.c +0 -70
  130. data/vendor/spidermonkey/fdlibm/e_hypot.c +0 -173
  131. data/vendor/spidermonkey/fdlibm/e_j0.c +0 -524
  132. data/vendor/spidermonkey/fdlibm/e_j1.c +0 -523
  133. data/vendor/spidermonkey/fdlibm/e_jn.c +0 -315
  134. data/vendor/spidermonkey/fdlibm/e_lgamma.c +0 -71
  135. data/vendor/spidermonkey/fdlibm/e_lgamma_r.c +0 -347
  136. data/vendor/spidermonkey/fdlibm/e_log.c +0 -184
  137. data/vendor/spidermonkey/fdlibm/e_log10.c +0 -134
  138. data/vendor/spidermonkey/fdlibm/e_pow.c +0 -386
  139. data/vendor/spidermonkey/fdlibm/e_rem_pio2.c +0 -222
  140. data/vendor/spidermonkey/fdlibm/e_remainder.c +0 -120
  141. data/vendor/spidermonkey/fdlibm/e_scalb.c +0 -89
  142. data/vendor/spidermonkey/fdlibm/e_sinh.c +0 -122
  143. data/vendor/spidermonkey/fdlibm/e_sqrt.c +0 -497
  144. data/vendor/spidermonkey/fdlibm/fdlibm.h +0 -273
  145. data/vendor/spidermonkey/fdlibm/fdlibm.mak +0 -1453
  146. data/vendor/spidermonkey/fdlibm/fdlibm.mdp +0 -0
  147. data/vendor/spidermonkey/fdlibm/k_cos.c +0 -135
  148. data/vendor/spidermonkey/fdlibm/k_rem_pio2.c +0 -354
  149. data/vendor/spidermonkey/fdlibm/k_sin.c +0 -114
  150. data/vendor/spidermonkey/fdlibm/k_standard.c +0 -785
  151. data/vendor/spidermonkey/fdlibm/k_tan.c +0 -170
  152. data/vendor/spidermonkey/fdlibm/s_asinh.c +0 -101
  153. data/vendor/spidermonkey/fdlibm/s_atan.c +0 -175
  154. data/vendor/spidermonkey/fdlibm/s_cbrt.c +0 -133
  155. data/vendor/spidermonkey/fdlibm/s_ceil.c +0 -120
  156. data/vendor/spidermonkey/fdlibm/s_copysign.c +0 -72
  157. data/vendor/spidermonkey/fdlibm/s_cos.c +0 -118
  158. data/vendor/spidermonkey/fdlibm/s_erf.c +0 -356
  159. data/vendor/spidermonkey/fdlibm/s_expm1.c +0 -267
  160. data/vendor/spidermonkey/fdlibm/s_fabs.c +0 -70
  161. data/vendor/spidermonkey/fdlibm/s_finite.c +0 -71
  162. data/vendor/spidermonkey/fdlibm/s_floor.c +0 -121
  163. data/vendor/spidermonkey/fdlibm/s_frexp.c +0 -99
  164. data/vendor/spidermonkey/fdlibm/s_ilogb.c +0 -85
  165. data/vendor/spidermonkey/fdlibm/s_isnan.c +0 -74
  166. data/vendor/spidermonkey/fdlibm/s_ldexp.c +0 -66
  167. data/vendor/spidermonkey/fdlibm/s_lib_version.c +0 -73
  168. data/vendor/spidermonkey/fdlibm/s_log1p.c +0 -211
  169. data/vendor/spidermonkey/fdlibm/s_logb.c +0 -79
  170. data/vendor/spidermonkey/fdlibm/s_matherr.c +0 -64
  171. data/vendor/spidermonkey/fdlibm/s_modf.c +0 -132
  172. data/vendor/spidermonkey/fdlibm/s_nextafter.c +0 -124
  173. data/vendor/spidermonkey/fdlibm/s_rint.c +0 -131
  174. data/vendor/spidermonkey/fdlibm/s_scalbn.c +0 -107
  175. data/vendor/spidermonkey/fdlibm/s_signgam.c +0 -40
  176. data/vendor/spidermonkey/fdlibm/s_significand.c +0 -68
  177. data/vendor/spidermonkey/fdlibm/s_sin.c +0 -118
  178. data/vendor/spidermonkey/fdlibm/s_tan.c +0 -112
  179. data/vendor/spidermonkey/fdlibm/s_tanh.c +0 -122
  180. data/vendor/spidermonkey/fdlibm/w_acos.c +0 -78
  181. data/vendor/spidermonkey/fdlibm/w_acosh.c +0 -78
  182. data/vendor/spidermonkey/fdlibm/w_asin.c +0 -80
  183. data/vendor/spidermonkey/fdlibm/w_atan2.c +0 -79
  184. data/vendor/spidermonkey/fdlibm/w_atanh.c +0 -81
  185. data/vendor/spidermonkey/fdlibm/w_cosh.c +0 -77
  186. data/vendor/spidermonkey/fdlibm/w_exp.c +0 -88
  187. data/vendor/spidermonkey/fdlibm/w_fmod.c +0 -78
  188. data/vendor/spidermonkey/fdlibm/w_gamma.c +0 -85
  189. data/vendor/spidermonkey/fdlibm/w_gamma_r.c +0 -81
  190. data/vendor/spidermonkey/fdlibm/w_hypot.c +0 -78
  191. data/vendor/spidermonkey/fdlibm/w_j0.c +0 -105
  192. data/vendor/spidermonkey/fdlibm/w_j1.c +0 -106
  193. data/vendor/spidermonkey/fdlibm/w_jn.c +0 -128
  194. data/vendor/spidermonkey/fdlibm/w_lgamma.c +0 -85
  195. data/vendor/spidermonkey/fdlibm/w_lgamma_r.c +0 -81
  196. data/vendor/spidermonkey/fdlibm/w_log.c +0 -78
  197. data/vendor/spidermonkey/fdlibm/w_log10.c +0 -81
  198. data/vendor/spidermonkey/fdlibm/w_pow.c +0 -99
  199. data/vendor/spidermonkey/fdlibm/w_remainder.c +0 -77
  200. data/vendor/spidermonkey/fdlibm/w_scalb.c +0 -95
  201. data/vendor/spidermonkey/fdlibm/w_sinh.c +0 -77
  202. data/vendor/spidermonkey/fdlibm/w_sqrt.c +0 -77
  203. data/vendor/spidermonkey/javascript-trace.d +0 -73
  204. data/vendor/spidermonkey/js.c +0 -3951
  205. data/vendor/spidermonkey/js.mdp +0 -0
  206. data/vendor/spidermonkey/js.msg +0 -308
  207. data/vendor/spidermonkey/js.pkg +0 -2
  208. data/vendor/spidermonkey/js3240.rc +0 -79
  209. data/vendor/spidermonkey/jsOS240.def +0 -654
  210. data/vendor/spidermonkey/jsapi.c +0 -5836
  211. data/vendor/spidermonkey/jsapi.h +0 -2624
  212. data/vendor/spidermonkey/jsarena.c +0 -450
  213. data/vendor/spidermonkey/jsarena.h +0 -318
  214. data/vendor/spidermonkey/jsarray.c +0 -2996
  215. data/vendor/spidermonkey/jsarray.h +0 -127
  216. data/vendor/spidermonkey/jsatom.c +0 -1045
  217. data/vendor/spidermonkey/jsatom.h +0 -442
  218. data/vendor/spidermonkey/jsbit.h +0 -253
  219. data/vendor/spidermonkey/jsbool.c +0 -176
  220. data/vendor/spidermonkey/jsbool.h +0 -73
  221. data/vendor/spidermonkey/jsclist.h +0 -139
  222. data/vendor/spidermonkey/jscntxt.c +0 -1348
  223. data/vendor/spidermonkey/jscntxt.h +0 -1120
  224. data/vendor/spidermonkey/jscompat.h +0 -57
  225. data/vendor/spidermonkey/jsconfig.h +0 -248
  226. data/vendor/spidermonkey/jsconfig.mk +0 -181
  227. data/vendor/spidermonkey/jscpucfg.c +0 -396
  228. data/vendor/spidermonkey/jscpucfg.h +0 -212
  229. data/vendor/spidermonkey/jsdate.c +0 -2390
  230. data/vendor/spidermonkey/jsdate.h +0 -124
  231. data/vendor/spidermonkey/jsdbgapi.c +0 -1802
  232. data/vendor/spidermonkey/jsdbgapi.h +0 -464
  233. data/vendor/spidermonkey/jsdhash.c +0 -868
  234. data/vendor/spidermonkey/jsdhash.h +0 -592
  235. data/vendor/spidermonkey/jsdtoa.c +0 -3167
  236. data/vendor/spidermonkey/jsdtoa.h +0 -130
  237. data/vendor/spidermonkey/jsdtracef.c +0 -317
  238. data/vendor/spidermonkey/jsdtracef.h +0 -77
  239. data/vendor/spidermonkey/jsemit.c +0 -6909
  240. data/vendor/spidermonkey/jsemit.h +0 -741
  241. data/vendor/spidermonkey/jsexn.c +0 -1371
  242. data/vendor/spidermonkey/jsexn.h +0 -96
  243. data/vendor/spidermonkey/jsfile.c +0 -2736
  244. data/vendor/spidermonkey/jsfile.h +0 -56
  245. data/vendor/spidermonkey/jsfile.msg +0 -90
  246. data/vendor/spidermonkey/jsfun.c +0 -2634
  247. data/vendor/spidermonkey/jsfun.h +0 -254
  248. data/vendor/spidermonkey/jsgc.c +0 -3562
  249. data/vendor/spidermonkey/jsgc.h +0 -403
  250. data/vendor/spidermonkey/jshash.c +0 -476
  251. data/vendor/spidermonkey/jshash.h +0 -151
  252. data/vendor/spidermonkey/jsify.pl +0 -485
  253. data/vendor/spidermonkey/jsinterp.c +0 -7007
  254. data/vendor/spidermonkey/jsinterp.h +0 -525
  255. data/vendor/spidermonkey/jsinvoke.c +0 -43
  256. data/vendor/spidermonkey/jsiter.c +0 -1067
  257. data/vendor/spidermonkey/jsiter.h +0 -122
  258. data/vendor/spidermonkey/jskeyword.tbl +0 -124
  259. data/vendor/spidermonkey/jskwgen.c +0 -460
  260. data/vendor/spidermonkey/jslibmath.h +0 -266
  261. data/vendor/spidermonkey/jslock.c +0 -1309
  262. data/vendor/spidermonkey/jslock.h +0 -313
  263. data/vendor/spidermonkey/jslocko.asm +0 -60
  264. data/vendor/spidermonkey/jslog2.c +0 -94
  265. data/vendor/spidermonkey/jslong.c +0 -264
  266. data/vendor/spidermonkey/jslong.h +0 -412
  267. data/vendor/spidermonkey/jsmath.c +0 -567
  268. data/vendor/spidermonkey/jsmath.h +0 -57
  269. data/vendor/spidermonkey/jsnum.c +0 -1239
  270. data/vendor/spidermonkey/jsnum.h +0 -283
  271. data/vendor/spidermonkey/jsobj.c +0 -5282
  272. data/vendor/spidermonkey/jsobj.h +0 -709
  273. data/vendor/spidermonkey/jsopcode.c +0 -5245
  274. data/vendor/spidermonkey/jsopcode.h +0 -394
  275. data/vendor/spidermonkey/jsopcode.tbl +0 -523
  276. data/vendor/spidermonkey/jsotypes.h +0 -202
  277. data/vendor/spidermonkey/jsparse.c +0 -6704
  278. data/vendor/spidermonkey/jsparse.h +0 -511
  279. data/vendor/spidermonkey/jsprf.c +0 -1264
  280. data/vendor/spidermonkey/jsprf.h +0 -150
  281. data/vendor/spidermonkey/jsproto.tbl +0 -128
  282. data/vendor/spidermonkey/jsprvtd.h +0 -267
  283. data/vendor/spidermonkey/jspubtd.h +0 -744
  284. data/vendor/spidermonkey/jsregexp.c +0 -4364
  285. data/vendor/spidermonkey/jsregexp.h +0 -183
  286. data/vendor/spidermonkey/jsreops.tbl +0 -145
  287. data/vendor/spidermonkey/jsscan.c +0 -2012
  288. data/vendor/spidermonkey/jsscan.h +0 -387
  289. data/vendor/spidermonkey/jsscope.c +0 -1957
  290. data/vendor/spidermonkey/jsscope.h +0 -418
  291. data/vendor/spidermonkey/jsscript.c +0 -1832
  292. data/vendor/spidermonkey/jsscript.h +0 -287
  293. data/vendor/spidermonkey/jsshell.msg +0 -50
  294. data/vendor/spidermonkey/jsstddef.h +0 -83
  295. data/vendor/spidermonkey/jsstr.c +0 -5005
  296. data/vendor/spidermonkey/jsstr.h +0 -641
  297. data/vendor/spidermonkey/jstypes.h +0 -475
  298. data/vendor/spidermonkey/jsutil.c +0 -345
  299. data/vendor/spidermonkey/jsutil.h +0 -157
  300. data/vendor/spidermonkey/jsxdrapi.c +0 -800
  301. data/vendor/spidermonkey/jsxdrapi.h +0 -218
  302. data/vendor/spidermonkey/jsxml.c +0 -8476
  303. data/vendor/spidermonkey/jsxml.h +0 -349
  304. data/vendor/spidermonkey/lock_SunOS.s +0 -119
  305. data/vendor/spidermonkey/perfect.js +0 -39
  306. data/vendor/spidermonkey/plify_jsdhash.sed +0 -36
  307. data/vendor/spidermonkey/prmjtime.c +0 -846
  308. data/vendor/spidermonkey/prmjtime.h +0 -103
  309. data/vendor/spidermonkey/resource.h +0 -15
  310. data/vendor/spidermonkey/rules.mk +0 -197
  311. data/vendor/spidermonkey/win32.order +0 -384
@@ -1,4364 +0,0 @@
1
- /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2
- * vim: set sw=4 ts=8 et tw=78:
3
- *
4
- * ***** BEGIN LICENSE BLOCK *****
5
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6
- *
7
- * The contents of this file are subject to the Mozilla Public License Version
8
- * 1.1 (the "License"); you may not use this file except in compliance with
9
- * the License. You may obtain a copy of the License at
10
- * http://www.mozilla.org/MPL/
11
- *
12
- * Software distributed under the License is distributed on an "AS IS" basis,
13
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14
- * for the specific language governing rights and limitations under the
15
- * License.
16
- *
17
- * The Original Code is Mozilla Communicator client code, released
18
- * March 31, 1998.
19
- *
20
- * The Initial Developer of the Original Code is
21
- * Netscape Communications Corporation.
22
- * Portions created by the Initial Developer are Copyright (C) 1998
23
- * the Initial Developer. All Rights Reserved.
24
- *
25
- * Contributor(s):
26
- *
27
- * Alternatively, the contents of this file may be used under the terms of
28
- * either of the GNU General Public License Version 2 or later (the "GPL"),
29
- * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30
- * in which case the provisions of the GPL or the LGPL are applicable instead
31
- * of those above. If you wish to allow use of your version of this file only
32
- * under the terms of either the GPL or the LGPL, and not to allow others to
33
- * use your version of this file under the terms of the MPL, indicate your
34
- * decision by deleting the provisions above and replace them with the notice
35
- * and other provisions required by the GPL or the LGPL. If you do not delete
36
- * the provisions above, a recipient may use your version of this file under
37
- * the terms of any one of the MPL, the GPL or the LGPL.
38
- *
39
- * ***** END LICENSE BLOCK ***** */
40
-
41
- /*
42
- * JS regular expressions, after Perl.
43
- */
44
- #include "jsstddef.h"
45
- #include <stdlib.h>
46
- #include <string.h>
47
- #include <stdarg.h>
48
- #include "jstypes.h"
49
- #include "jsarena.h" /* Added by JSIFY */
50
- #include "jsutil.h" /* Added by JSIFY */
51
- #include "jsapi.h"
52
- #include "jsarray.h"
53
- #include "jsatom.h"
54
- #include "jscntxt.h"
55
- #include "jsconfig.h"
56
- #include "jsfun.h"
57
- #include "jsgc.h"
58
- #include "jsinterp.h"
59
- #include "jslock.h"
60
- #include "jsnum.h"
61
- #include "jsobj.h"
62
- #include "jsopcode.h"
63
- #include "jsregexp.h"
64
- #include "jsscan.h"
65
- #include "jsscope.h"
66
- #include "jsstr.h"
67
-
68
- typedef enum REOp {
69
- #define REOP_DEF(opcode, name) opcode,
70
- #include "jsreops.tbl"
71
- #undef REOP_DEF
72
- REOP_LIMIT /* META: no operator >= to this */
73
- } REOp;
74
-
75
- #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
76
-
77
- #ifdef REGEXP_DEBUG
78
- const char *reop_names[] = {
79
- #define REOP_DEF(opcode, name) name,
80
- #include "jsreops.tbl"
81
- #undef REOP_DEF
82
- NULL
83
- };
84
- #endif
85
-
86
- #ifdef __GNUC__
87
- static int
88
- re_debug(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
89
- #endif
90
-
91
- #ifdef REGEXP_DEBUG
92
- static int
93
- re_debug(const char *fmt, ...)
94
- {
95
- va_list ap;
96
- int retval;
97
-
98
- va_start(ap, fmt);
99
- retval = vprintf(fmt, ap);
100
- va_end(ap);
101
- return retval;
102
- }
103
-
104
- static void
105
- re_debug_chars(const jschar *chrs, size_t length)
106
- {
107
- int i = 0;
108
-
109
- printf(" \"");
110
- while (*chrs && i++ < length) {
111
- putchar((char)*chrs++);
112
- }
113
- printf("\"");
114
- }
115
- #else /* !REGEXP_DEBUG */
116
- /* This should be optimized to a no-op by our tier-1 compilers. */
117
- static int
118
- re_debug(const char *fmt, ...)
119
- {
120
- return 0;
121
- }
122
-
123
- static void
124
- re_debug_chars(const jschar *chrs, size_t length)
125
- {
126
- }
127
- #endif /* !REGEXP_DEBUG */
128
-
129
- struct RENode {
130
- REOp op; /* r.e. op bytecode */
131
- RENode *next; /* next in concatenation order */
132
- void *kid; /* first operand */
133
- union {
134
- void *kid2; /* second operand */
135
- jsint num; /* could be a number */
136
- size_t parenIndex; /* or a parenthesis index */
137
- struct { /* or a quantifier range */
138
- uintN min;
139
- uintN max;
140
- JSPackedBool greedy;
141
- } range;
142
- struct { /* or a character class */
143
- size_t startIndex;
144
- size_t kidlen; /* length of string at kid, in jschars */
145
- size_t index; /* index into class list */
146
- uint16 bmsize; /* bitmap size, based on max char code */
147
- JSPackedBool sense;
148
- } ucclass;
149
- struct { /* or a literal sequence */
150
- jschar chr; /* of one character */
151
- size_t length; /* or many (via the kid) */
152
- } flat;
153
- struct {
154
- RENode *kid2; /* second operand from ALT */
155
- jschar ch1; /* match char for ALTPREREQ */
156
- jschar ch2; /* ditto, or class index for ALTPREREQ2 */
157
- } altprereq;
158
- } u;
159
- };
160
-
161
- #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
162
- ((c >= 'a') && (c <= 'z')) )
163
- #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
164
- (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
165
-
166
- #define CLASS_CACHE_SIZE 4
167
-
168
- typedef struct CompilerState {
169
- JSContext *context;
170
- JSTokenStream *tokenStream; /* For reporting errors */
171
- const jschar *cpbegin;
172
- const jschar *cpend;
173
- const jschar *cp;
174
- size_t parenCount;
175
- size_t classCount; /* number of [] encountered */
176
- size_t treeDepth; /* maximum depth of parse tree */
177
- size_t progLength; /* estimated bytecode length */
178
- RENode *result;
179
- size_t classBitmapsMem; /* memory to hold all class bitmaps */
180
- struct {
181
- const jschar *start; /* small cache of class strings */
182
- size_t length; /* since they're often the same */
183
- size_t index;
184
- } classCache[CLASS_CACHE_SIZE];
185
- uint16 flags;
186
- } CompilerState;
187
-
188
- typedef struct EmitStateStackEntry {
189
- jsbytecode *altHead; /* start of REOP_ALT* opcode */
190
- jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
191
- jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
192
- jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
193
- RENode *continueNode; /* original REOP_ALT* node being stacked */
194
- jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
195
- JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
196
- avoid 16-bit unsigned offset overflow */
197
- } EmitStateStackEntry;
198
-
199
- /*
200
- * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
201
- * the getters and setters take the pc of the offset, not of the opcode before
202
- * the offset.
203
- */
204
- #define ARG_LEN 2
205
- #define GET_ARG(pc) ((uint16)(((pc)[0] << 8) | (pc)[1]))
206
- #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
207
- (pc)[1] = (jsbytecode) (arg))
208
-
209
- #define OFFSET_LEN ARG_LEN
210
- #define OFFSET_MAX (JS_BIT(ARG_LEN * 8) - 1)
211
- #define GET_OFFSET(pc) GET_ARG(pc)
212
-
213
- /*
214
- * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
215
- * For sanity, we limit it to 2^24 bytes.
216
- */
217
- #define TREE_DEPTH_MAX (JS_BIT(24) / sizeof(EmitStateStackEntry))
218
-
219
- /*
220
- * The maximum memory that can be allocated for class bitmaps.
221
- * For sanity, we limit it to 2^24 bytes.
222
- */
223
- #define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
224
-
225
- /*
226
- * Functions to get size and write/read bytecode that represent small indexes
227
- * compactly.
228
- * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
229
- * indicates that the following byte brings more bits to the index. Otherwise
230
- * this is the last byte in the index bytecode representing highest index bits.
231
- */
232
- static size_t
233
- GetCompactIndexWidth(size_t index)
234
- {
235
- size_t width;
236
-
237
- for (width = 1; (index >>= 7) != 0; ++width) { }
238
- return width;
239
- }
240
-
241
- static JS_INLINE jsbytecode *
242
- WriteCompactIndex(jsbytecode *pc, size_t index)
243
- {
244
- size_t next;
245
-
246
- while ((next = index >> 7) != 0) {
247
- *pc++ = (jsbytecode)(index | 0x80);
248
- index = next;
249
- }
250
- *pc++ = (jsbytecode)index;
251
- return pc;
252
- }
253
-
254
- static JS_INLINE jsbytecode *
255
- ReadCompactIndex(jsbytecode *pc, size_t *result)
256
- {
257
- size_t nextByte;
258
-
259
- nextByte = *pc++;
260
- if ((nextByte & 0x80) == 0) {
261
- /*
262
- * Short-circuit the most common case when compact index <= 127.
263
- */
264
- *result = nextByte;
265
- } else {
266
- size_t shift = 7;
267
- *result = 0x7F & nextByte;
268
- do {
269
- nextByte = *pc++;
270
- *result |= (nextByte & 0x7F) << shift;
271
- shift += 7;
272
- } while ((nextByte & 0x80) != 0);
273
- }
274
- return pc;
275
- }
276
-
277
- typedef struct RECapture {
278
- ptrdiff_t index; /* start of contents, -1 for empty */
279
- size_t length; /* length of capture */
280
- } RECapture;
281
-
282
- typedef struct REMatchState {
283
- const jschar *cp;
284
- RECapture parens[1]; /* first of 're->parenCount' captures,
285
- allocated at end of this struct */
286
- } REMatchState;
287
-
288
- struct REBackTrackData;
289
-
290
- typedef struct REProgState {
291
- jsbytecode *continue_pc; /* current continuation data */
292
- jsbytecode continue_op;
293
- ptrdiff_t index; /* progress in text */
294
- size_t parenSoFar; /* highest indexed paren started */
295
- union {
296
- struct {
297
- uintN min; /* current quantifier limits */
298
- uintN max;
299
- } quantifier;
300
- struct {
301
- size_t top; /* backtrack stack state */
302
- size_t sz;
303
- } assertion;
304
- } u;
305
- } REProgState;
306
-
307
- typedef struct REBackTrackData {
308
- size_t sz; /* size of previous stack entry */
309
- jsbytecode *backtrack_pc; /* where to backtrack to */
310
- jsbytecode backtrack_op;
311
- const jschar *cp; /* index in text of match at backtrack */
312
- size_t parenIndex; /* start index of saved paren contents */
313
- size_t parenCount; /* # of saved paren contents */
314
- size_t saveStateStackTop; /* number of parent states */
315
- /* saved parent states follow */
316
- /* saved paren contents follow */
317
- } REBackTrackData;
318
-
319
- #define INITIAL_STATESTACK 100
320
- #define INITIAL_BACKTRACK 8000
321
-
322
- typedef struct REGlobalData {
323
- JSContext *cx;
324
- JSRegExp *regexp; /* the RE in execution */
325
- JSBool ok; /* runtime error (out_of_memory only?) */
326
- size_t start; /* offset to start at */
327
- ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
328
- const jschar *cpbegin; /* text base address */
329
- const jschar *cpend; /* text limit address */
330
-
331
- REProgState *stateStack; /* stack of state of current parents */
332
- size_t stateStackTop;
333
- size_t stateStackLimit;
334
-
335
- REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
336
- REBackTrackData *backTrackSP;
337
- size_t backTrackStackSize;
338
- size_t cursz; /* size of current stack entry */
339
- size_t backTrackCount; /* how many times we've backtracked */
340
- size_t backTrackLimit; /* upper limit on backtrack states */
341
-
342
- JSArenaPool pool; /* It's faster to use one malloc'd pool
343
- than to malloc/free the three items
344
- that are allocated from this pool */
345
- } REGlobalData;
346
-
347
- /*
348
- * 1. If IgnoreCase is false, return ch.
349
- * 2. Let u be ch converted to upper case as if by calling
350
- * String.prototype.toUpperCase on the one-character string ch.
351
- * 3. If u does not consist of a single character, return ch.
352
- * 4. Let cu be u's character.
353
- * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
354
- * code point value is less than decimal 128, then return ch.
355
- * 6. Return cu.
356
- */
357
- static JS_INLINE uintN
358
- upcase(uintN ch)
359
- {
360
- uintN cu;
361
-
362
- JS_ASSERT((uintN) (jschar) ch == ch);
363
- if (ch < 128) {
364
- if (ch - (uintN) 'a' <= (uintN) ('z' - 'a'))
365
- ch -= (uintN) ('a' - 'A');
366
- return ch;
367
- }
368
-
369
- cu = JS_TOUPPER(ch);
370
- return (cu < 128) ? ch : cu;
371
- }
372
-
373
- static JS_INLINE uintN
374
- downcase(uintN ch)
375
- {
376
- JS_ASSERT((uintN) (jschar) ch == ch);
377
- if (ch < 128) {
378
- if (ch - (uintN) 'A' <= (uintN) ('Z' - 'A'))
379
- ch += (uintN) ('a' - 'A');
380
- return ch;
381
- }
382
-
383
- return JS_TOLOWER(ch);
384
- }
385
-
386
- /* Construct and initialize an RENode, returning NULL for out-of-memory */
387
- static RENode *
388
- NewRENode(CompilerState *state, REOp op)
389
- {
390
- JSContext *cx;
391
- RENode *ren;
392
-
393
- cx = state->context;
394
- JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
395
- if (!ren) {
396
- js_ReportOutOfScriptQuota(cx);
397
- return NULL;
398
- }
399
- ren->op = op;
400
- ren->next = NULL;
401
- ren->kid = NULL;
402
- return ren;
403
- }
404
-
405
- /*
406
- * Validates and converts hex ascii value.
407
- */
408
- static JSBool
409
- isASCIIHexDigit(jschar c, uintN *digit)
410
- {
411
- uintN cv = c;
412
-
413
- if (cv < '0')
414
- return JS_FALSE;
415
- if (cv <= '9') {
416
- *digit = cv - '0';
417
- return JS_TRUE;
418
- }
419
- cv |= 0x20;
420
- if (cv >= 'a' && cv <= 'f') {
421
- *digit = cv - 'a' + 10;
422
- return JS_TRUE;
423
- }
424
- return JS_FALSE;
425
- }
426
-
427
-
428
- typedef struct {
429
- REOp op;
430
- const jschar *errPos;
431
- size_t parenIndex;
432
- } REOpData;
433
-
434
- static JSBool
435
- ReportRegExpErrorHelper(CompilerState *state, uintN flags, uintN errorNumber,
436
- const jschar *arg)
437
- {
438
- if (state->tokenStream) {
439
- return js_ReportCompileErrorNumber(state->context, state->tokenStream,
440
- NULL, JSREPORT_UC | flags,
441
- errorNumber, arg);
442
- }
443
- return JS_ReportErrorFlagsAndNumberUC(state->context, flags,
444
- js_GetErrorMessage, NULL,
445
- errorNumber, arg);
446
- }
447
-
448
- static JSBool
449
- ReportRegExpError(CompilerState *state, uintN flags, uintN errorNumber)
450
- {
451
- return ReportRegExpErrorHelper(state, flags, errorNumber, NULL);
452
- }
453
-
454
- /*
455
- * Process the op against the two top operands, reducing them to a single
456
- * operand in the penultimate slot. Update progLength and treeDepth.
457
- */
458
- static JSBool
459
- ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
460
- intN operandSP)
461
- {
462
- RENode *result;
463
-
464
- switch (opData->op) {
465
- case REOP_ALT:
466
- result = NewRENode(state, REOP_ALT);
467
- if (!result)
468
- return JS_FALSE;
469
- result->kid = operandStack[operandSP - 2];
470
- result->u.kid2 = operandStack[operandSP - 1];
471
- operandStack[operandSP - 2] = result;
472
-
473
- if (state->treeDepth == TREE_DEPTH_MAX) {
474
- ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
475
- return JS_FALSE;
476
- }
477
- ++state->treeDepth;
478
-
479
- /*
480
- * Look at both alternates to see if there's a FLAT or a CLASS at
481
- * the start of each. If so, use a prerequisite match.
482
- */
483
- if (((RENode *) result->kid)->op == REOP_FLAT &&
484
- ((RENode *) result->u.kid2)->op == REOP_FLAT &&
485
- (state->flags & JSREG_FOLD) == 0) {
486
- result->op = REOP_ALTPREREQ;
487
- result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
488
- result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
489
- /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
490
- JUMP, <end> ... ENDALT */
491
- state->progLength += 13;
492
- }
493
- else
494
- if (((RENode *) result->kid)->op == REOP_CLASS &&
495
- ((RENode *) result->kid)->u.ucclass.index < 256 &&
496
- ((RENode *) result->u.kid2)->op == REOP_FLAT &&
497
- (state->flags & JSREG_FOLD) == 0) {
498
- result->op = REOP_ALTPREREQ2;
499
- result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
500
- result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
501
- /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
502
- JUMP, <end> ... ENDALT */
503
- state->progLength += 13;
504
- }
505
- else
506
- if (((RENode *) result->kid)->op == REOP_FLAT &&
507
- ((RENode *) result->u.kid2)->op == REOP_CLASS &&
508
- ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
509
- (state->flags & JSREG_FOLD) == 0) {
510
- result->op = REOP_ALTPREREQ2;
511
- result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
512
- result->u.altprereq.ch2 =
513
- ((RENode *) result->u.kid2)->u.ucclass.index;
514
- /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
515
- JUMP, <end> ... ENDALT */
516
- state->progLength += 13;
517
- }
518
- else {
519
- /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
520
- state->progLength += 7;
521
- }
522
- break;
523
-
524
- case REOP_CONCAT:
525
- result = operandStack[operandSP - 2];
526
- while (result->next)
527
- result = result->next;
528
- result->next = operandStack[operandSP - 1];
529
- break;
530
-
531
- case REOP_ASSERT:
532
- case REOP_ASSERT_NOT:
533
- case REOP_LPARENNON:
534
- case REOP_LPAREN:
535
- /* These should have been processed by a close paren. */
536
- ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
537
- opData->errPos);
538
- return JS_FALSE;
539
-
540
- default:;
541
- }
542
- return JS_TRUE;
543
- }
544
-
545
- /*
546
- * Parser forward declarations.
547
- */
548
- static JSBool ParseTerm(CompilerState *state);
549
- static JSBool ParseQuantifier(CompilerState *state);
550
- static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);
551
-
552
- /*
553
- * Top-down regular expression grammar, based closely on Perl4.
554
- *
555
- * regexp: altern A regular expression is one or more
556
- * altern '|' regexp alternatives separated by vertical bar.
557
- */
558
- #define INITIAL_STACK_SIZE 128
559
-
560
- static JSBool
561
- ParseRegExp(CompilerState *state)
562
- {
563
- size_t parenIndex;
564
- RENode *operand;
565
- REOpData *operatorStack;
566
- RENode **operandStack;
567
- REOp op;
568
- intN i;
569
- JSBool result = JS_FALSE;
570
-
571
- intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
572
- intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
573
-
574
- /* Watch out for empty regexp */
575
- if (state->cp == state->cpend) {
576
- state->result = NewRENode(state, REOP_EMPTY);
577
- return (state->result != NULL);
578
- }
579
-
580
- operatorStack = (REOpData *)
581
- JS_malloc(state->context, sizeof(REOpData) * operatorStackSize);
582
- if (!operatorStack)
583
- return JS_FALSE;
584
-
585
- operandStack = (RENode **)
586
- JS_malloc(state->context, sizeof(RENode *) * operandStackSize);
587
- if (!operandStack)
588
- goto out;
589
-
590
- for (;;) {
591
- parenIndex = state->parenCount;
592
- if (state->cp == state->cpend) {
593
- /*
594
- * If we are at the end of the regexp and we're short one or more
595
- * operands, the regexp must have the form /x|/ or some such, with
596
- * left parentheses making us short more than one operand.
597
- */
598
- if (operatorSP >= operandSP) {
599
- operand = NewRENode(state, REOP_EMPTY);
600
- if (!operand)
601
- goto out;
602
- goto pushOperand;
603
- }
604
- } else {
605
- switch (*state->cp) {
606
- case '(':
607
- ++state->cp;
608
- if (state->cp + 1 < state->cpend &&
609
- *state->cp == '?' &&
610
- (state->cp[1] == '=' ||
611
- state->cp[1] == '!' ||
612
- state->cp[1] == ':')) {
613
- switch (state->cp[1]) {
614
- case '=':
615
- op = REOP_ASSERT;
616
- /* ASSERT, <next>, ... ASSERTTEST */
617
- state->progLength += 4;
618
- break;
619
- case '!':
620
- op = REOP_ASSERT_NOT;
621
- /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
622
- state->progLength += 4;
623
- break;
624
- default:
625
- op = REOP_LPARENNON;
626
- break;
627
- }
628
- state->cp += 2;
629
- } else {
630
- op = REOP_LPAREN;
631
- /* LPAREN, <index>, ... RPAREN, <index> */
632
- state->progLength
633
- += 2 * (1 + GetCompactIndexWidth(parenIndex));
634
- state->parenCount++;
635
- if (state->parenCount == 65535) {
636
- ReportRegExpError(state, JSREPORT_ERROR,
637
- JSMSG_TOO_MANY_PARENS);
638
- goto out;
639
- }
640
- }
641
- goto pushOperator;
642
-
643
- case ')':
644
- /*
645
- * If there's no stacked open parenthesis, throw syntax error.
646
- */
647
- for (i = operatorSP - 1; ; i--) {
648
- if (i < 0) {
649
- ReportRegExpError(state, JSREPORT_ERROR,
650
- JSMSG_UNMATCHED_RIGHT_PAREN);
651
- goto out;
652
- }
653
- if (operatorStack[i].op == REOP_ASSERT ||
654
- operatorStack[i].op == REOP_ASSERT_NOT ||
655
- operatorStack[i].op == REOP_LPARENNON ||
656
- operatorStack[i].op == REOP_LPAREN) {
657
- break;
658
- }
659
- }
660
- /* FALL THROUGH */
661
-
662
- case '|':
663
- /* Expected an operand before these, so make an empty one */
664
- operand = NewRENode(state, REOP_EMPTY);
665
- if (!operand)
666
- goto out;
667
- goto pushOperand;
668
-
669
- default:
670
- if (!ParseTerm(state))
671
- goto out;
672
- operand = state->result;
673
- pushOperand:
674
- if (operandSP == operandStackSize) {
675
- RENode **tmp;
676
- operandStackSize += operandStackSize;
677
- tmp = (RENode **)
678
- JS_realloc(state->context, operandStack,
679
- sizeof(RENode *) * operandStackSize);
680
- if (!tmp)
681
- goto out;
682
- operandStack = tmp;
683
- }
684
- operandStack[operandSP++] = operand;
685
- break;
686
- }
687
- }
688
-
689
- /* At the end; process remaining operators. */
690
- restartOperator:
691
- if (state->cp == state->cpend) {
692
- while (operatorSP) {
693
- --operatorSP;
694
- if (!ProcessOp(state, &operatorStack[operatorSP],
695
- operandStack, operandSP))
696
- goto out;
697
- --operandSP;
698
- }
699
- JS_ASSERT(operandSP == 1);
700
- state->result = operandStack[0];
701
- result = JS_TRUE;
702
- goto out;
703
- }
704
-
705
- switch (*state->cp) {
706
- case '|':
707
- /* Process any stacked 'concat' operators */
708
- ++state->cp;
709
- while (operatorSP &&
710
- operatorStack[operatorSP - 1].op == REOP_CONCAT) {
711
- --operatorSP;
712
- if (!ProcessOp(state, &operatorStack[operatorSP],
713
- operandStack, operandSP)) {
714
- goto out;
715
- }
716
- --operandSP;
717
- }
718
- op = REOP_ALT;
719
- goto pushOperator;
720
-
721
- case ')':
722
- /*
723
- * If there's no stacked open parenthesis, throw syntax error.
724
- */
725
- for (i = operatorSP - 1; ; i--) {
726
- if (i < 0) {
727
- ReportRegExpError(state, JSREPORT_ERROR,
728
- JSMSG_UNMATCHED_RIGHT_PAREN);
729
- goto out;
730
- }
731
- if (operatorStack[i].op == REOP_ASSERT ||
732
- operatorStack[i].op == REOP_ASSERT_NOT ||
733
- operatorStack[i].op == REOP_LPARENNON ||
734
- operatorStack[i].op == REOP_LPAREN) {
735
- break;
736
- }
737
- }
738
- ++state->cp;
739
-
740
- /* Process everything on the stack until the open parenthesis. */
741
- for (;;) {
742
- JS_ASSERT(operatorSP);
743
- --operatorSP;
744
- switch (operatorStack[operatorSP].op) {
745
- case REOP_ASSERT:
746
- case REOP_ASSERT_NOT:
747
- case REOP_LPAREN:
748
- operand = NewRENode(state, operatorStack[operatorSP].op);
749
- if (!operand)
750
- goto out;
751
- operand->u.parenIndex =
752
- operatorStack[operatorSP].parenIndex;
753
- JS_ASSERT(operandSP);
754
- operand->kid = operandStack[operandSP - 1];
755
- operandStack[operandSP - 1] = operand;
756
- if (state->treeDepth == TREE_DEPTH_MAX) {
757
- ReportRegExpError(state, JSREPORT_ERROR,
758
- JSMSG_REGEXP_TOO_COMPLEX);
759
- goto out;
760
- }
761
- ++state->treeDepth;
762
- /* FALL THROUGH */
763
-
764
- case REOP_LPARENNON:
765
- state->result = operandStack[operandSP - 1];
766
- if (!ParseQuantifier(state))
767
- goto out;
768
- operandStack[operandSP - 1] = state->result;
769
- goto restartOperator;
770
- default:
771
- if (!ProcessOp(state, &operatorStack[operatorSP],
772
- operandStack, operandSP))
773
- goto out;
774
- --operandSP;
775
- break;
776
- }
777
- }
778
- break;
779
-
780
- case '{':
781
- {
782
- const jschar *errp = state->cp;
783
-
784
- if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
785
- /*
786
- * This didn't even scan correctly as a quantifier, so we should
787
- * treat it as flat.
788
- */
789
- op = REOP_CONCAT;
790
- goto pushOperator;
791
- }
792
-
793
- state->cp = errp;
794
- /* FALL THROUGH */
795
- }
796
-
797
- case '+':
798
- case '*':
799
- case '?':
800
- ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
801
- state->cp);
802
- result = JS_FALSE;
803
- goto out;
804
-
805
- default:
806
- /* Anything else is the start of the next term. */
807
- op = REOP_CONCAT;
808
- pushOperator:
809
- if (operatorSP == operatorStackSize) {
810
- REOpData *tmp;
811
- operatorStackSize += operatorStackSize;
812
- tmp = (REOpData *)
813
- JS_realloc(state->context, operatorStack,
814
- sizeof(REOpData) * operatorStackSize);
815
- if (!tmp)
816
- goto out;
817
- operatorStack = tmp;
818
- }
819
- operatorStack[operatorSP].op = op;
820
- operatorStack[operatorSP].errPos = state->cp;
821
- operatorStack[operatorSP++].parenIndex = parenIndex;
822
- break;
823
- }
824
- }
825
- out:
826
- if (operatorStack)
827
- JS_free(state->context, operatorStack);
828
- if (operandStack)
829
- JS_free(state->context, operandStack);
830
- return result;
831
- }
832
-
833
- /*
834
- * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
835
- * its being on the stack, and to propagate errors to its callers.
836
- */
837
- #define JSREG_FIND_PAREN_COUNT 0x8000
838
- #define JSREG_FIND_PAREN_ERROR 0x4000
839
-
840
- /*
841
- * Magic return value from FindParenCount and GetDecimalValue, to indicate
842
- * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
843
- * its findMax parameter is non-null.
844
- */
845
- #define OVERFLOW_VALUE ((uintN)-1)
846
-
847
- static uintN
848
- FindParenCount(CompilerState *state)
849
- {
850
- CompilerState temp;
851
- int i;
852
-
853
- if (state->flags & JSREG_FIND_PAREN_COUNT)
854
- return OVERFLOW_VALUE;
855
-
856
- /*
857
- * Copy state into temp, flag it so we never report an invalid backref,
858
- * and reset its members to parse the entire regexp. This is obviously
859
- * suboptimal, but GetDecimalValue calls us only if a backref appears to
860
- * refer to a forward parenthetical, which is rare.
861
- */
862
- temp = *state;
863
- temp.flags |= JSREG_FIND_PAREN_COUNT;
864
- temp.cp = temp.cpbegin;
865
- temp.parenCount = 0;
866
- temp.classCount = 0;
867
- temp.progLength = 0;
868
- temp.treeDepth = 0;
869
- temp.classBitmapsMem = 0;
870
- for (i = 0; i < CLASS_CACHE_SIZE; i++)
871
- temp.classCache[i].start = NULL;
872
-
873
- if (!ParseRegExp(&temp)) {
874
- state->flags |= JSREG_FIND_PAREN_ERROR;
875
- return OVERFLOW_VALUE;
876
- }
877
- return temp.parenCount;
878
- }
879
-
880
- /*
881
- * Extract and return a decimal value at state->cp. The initial character c
882
- * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
883
- * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
884
- * state->flags to discover whether an error occurred under findMax.
885
- */
886
- static uintN
887
- GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
888
- CompilerState *state)
889
- {
890
- uintN value = JS7_UNDEC(c);
891
- JSBool overflow = (value > max && (!findMax || value > findMax(state)));
892
-
893
- /* The following restriction allows simpler overflow checks. */
894
- JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
895
- while (state->cp < state->cpend) {
896
- c = *state->cp;
897
- if (!JS7_ISDEC(c))
898
- break;
899
- value = 10 * value + JS7_UNDEC(c);
900
- if (!overflow && value > max && (!findMax || value > findMax(state)))
901
- overflow = JS_TRUE;
902
- ++state->cp;
903
- }
904
- return overflow ? OVERFLOW_VALUE : value;
905
- }
906
-
907
- /*
908
- * Calculate the total size of the bitmap required for a class expression.
909
- */
910
- static JSBool
911
- CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
912
- const jschar *end)
913
- {
914
- uintN max = 0;
915
- JSBool inRange = JS_FALSE;
916
- jschar c, rangeStart = 0;
917
- uintN n, digit, nDigits, i;
918
-
919
- target->u.ucclass.bmsize = 0;
920
- target->u.ucclass.sense = JS_TRUE;
921
-
922
- if (src == end)
923
- return JS_TRUE;
924
-
925
- if (*src == '^') {
926
- ++src;
927
- target->u.ucclass.sense = JS_FALSE;
928
- }
929
-
930
- while (src != end) {
931
- JSBool canStartRange = JS_TRUE;
932
- uintN localMax = 0;
933
-
934
- switch (*src) {
935
- case '\\':
936
- ++src;
937
- c = *src++;
938
- switch (c) {
939
- case 'b':
940
- localMax = 0x8;
941
- break;
942
- case 'f':
943
- localMax = 0xC;
944
- break;
945
- case 'n':
946
- localMax = 0xA;
947
- break;
948
- case 'r':
949
- localMax = 0xD;
950
- break;
951
- case 't':
952
- localMax = 0x9;
953
- break;
954
- case 'v':
955
- localMax = 0xB;
956
- break;
957
- case 'c':
958
- if (src < end && RE_IS_LETTER(*src)) {
959
- localMax = (uintN) (*src++) & 0x1F;
960
- } else {
961
- --src;
962
- localMax = '\\';
963
- }
964
- break;
965
- case 'x':
966
- nDigits = 2;
967
- goto lexHex;
968
- case 'u':
969
- nDigits = 4;
970
- lexHex:
971
- n = 0;
972
- for (i = 0; (i < nDigits) && (src < end); i++) {
973
- c = *src++;
974
- if (!isASCIIHexDigit(c, &digit)) {
975
- /*
976
- * Back off to accepting the original
977
- *'\' as a literal.
978
- */
979
- src -= i + 1;
980
- n = '\\';
981
- break;
982
- }
983
- n = (n << 4) | digit;
984
- }
985
- localMax = n;
986
- break;
987
- case 'd':
988
- canStartRange = JS_FALSE;
989
- if (inRange) {
990
- JS_ReportErrorNumber(state->context,
991
- js_GetErrorMessage, NULL,
992
- JSMSG_BAD_CLASS_RANGE);
993
- return JS_FALSE;
994
- }
995
- localMax = '9';
996
- break;
997
- case 'D':
998
- case 's':
999
- case 'S':
1000
- case 'w':
1001
- case 'W':
1002
- canStartRange = JS_FALSE;
1003
- if (inRange) {
1004
- JS_ReportErrorNumber(state->context,
1005
- js_GetErrorMessage, NULL,
1006
- JSMSG_BAD_CLASS_RANGE);
1007
- return JS_FALSE;
1008
- }
1009
- max = 65535;
1010
-
1011
- /*
1012
- * If this is the start of a range, ensure that it's less than
1013
- * the end.
1014
- */
1015
- localMax = 0;
1016
- break;
1017
- case '0':
1018
- case '1':
1019
- case '2':
1020
- case '3':
1021
- case '4':
1022
- case '5':
1023
- case '6':
1024
- case '7':
1025
- /*
1026
- * This is a non-ECMA extension - decimal escapes (in this
1027
- * case, octal!) are supposed to be an error inside class
1028
- * ranges, but supported here for backwards compatibility.
1029
- *
1030
- */
1031
- n = JS7_UNDEC(c);
1032
- c = *src;
1033
- if ('0' <= c && c <= '7') {
1034
- src++;
1035
- n = 8 * n + JS7_UNDEC(c);
1036
- c = *src;
1037
- if ('0' <= c && c <= '7') {
1038
- src++;
1039
- i = 8 * n + JS7_UNDEC(c);
1040
- if (i <= 0377)
1041
- n = i;
1042
- else
1043
- src--;
1044
- }
1045
- }
1046
- localMax = n;
1047
- break;
1048
-
1049
- default:
1050
- localMax = c;
1051
- break;
1052
- }
1053
- break;
1054
- default:
1055
- localMax = *src++;
1056
- break;
1057
- }
1058
-
1059
- if (inRange) {
1060
- /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1061
- if (rangeStart > localMax) {
1062
- JS_ReportErrorNumber(state->context,
1063
- js_GetErrorMessage, NULL,
1064
- JSMSG_BAD_CLASS_RANGE);
1065
- return JS_FALSE;
1066
- }
1067
- inRange = JS_FALSE;
1068
- } else {
1069
- if (canStartRange && src < end - 1) {
1070
- if (*src == '-') {
1071
- ++src;
1072
- inRange = JS_TRUE;
1073
- rangeStart = (jschar)localMax;
1074
- continue;
1075
- }
1076
- }
1077
- if (state->flags & JSREG_FOLD)
1078
- rangeStart = localMax; /* one run of the uc/dc loop below */
1079
- }
1080
-
1081
- if (state->flags & JSREG_FOLD) {
1082
- jschar maxch = localMax;
1083
-
1084
- for (i = rangeStart; i <= localMax; i++) {
1085
- jschar uch, dch;
1086
-
1087
- uch = upcase(i);
1088
- dch = downcase(i);
1089
- maxch = JS_MAX(maxch, uch);
1090
- maxch = JS_MAX(maxch, dch);
1091
- }
1092
- localMax = maxch;
1093
- }
1094
-
1095
- if (localMax > max)
1096
- max = localMax;
1097
- }
1098
- target->u.ucclass.bmsize = max;
1099
- return JS_TRUE;
1100
- }
1101
-
1102
- /*
1103
- * item: assertion An item is either an assertion or
1104
- * quantatom a quantified atom.
1105
- *
1106
- * assertion: '^' Assertions match beginning of string
1107
- * (or line if the class static property
1108
- * RegExp.multiline is true).
1109
- * '$' End of string (or line if the class
1110
- * static property RegExp.multiline is
1111
- * true).
1112
- * '\b' Word boundary (between \w and \W).
1113
- * '\B' Word non-boundary.
1114
- *
1115
- * quantatom: atom An unquantified atom.
1116
- * quantatom '{' n ',' m '}'
1117
- * Atom must occur between n and m times.
1118
- * quantatom '{' n ',' '}' Atom must occur at least n times.
1119
- * quantatom '{' n '}' Atom must occur exactly n times.
1120
- * quantatom '*' Zero or more times (same as {0,}).
1121
- * quantatom '+' One or more times (same as {1,}).
1122
- * quantatom '?' Zero or one time (same as {0,1}).
1123
- *
1124
- * any of which can be optionally followed by '?' for ungreedy
1125
- *
1126
- * atom: '(' regexp ')' A parenthesized regexp (what matched
1127
- * can be addressed using a backreference,
1128
- * see '\' n below).
1129
- * '.' Matches any char except '\n'.
1130
- * '[' classlist ']' A character class.
1131
- * '[' '^' classlist ']' A negated character class.
1132
- * '\f' Form Feed.
1133
- * '\n' Newline (Line Feed).
1134
- * '\r' Carriage Return.
1135
- * '\t' Horizontal Tab.
1136
- * '\v' Vertical Tab.
1137
- * '\d' A digit (same as [0-9]).
1138
- * '\D' A non-digit.
1139
- * '\w' A word character, [0-9a-z_A-Z].
1140
- * '\W' A non-word character.
1141
- * '\s' A whitespace character, [ \b\f\n\r\t\v].
1142
- * '\S' A non-whitespace character.
1143
- * '\' n A backreference to the nth (n decimal
1144
- * and positive) parenthesized expression.
1145
- * '\' octal An octal escape sequence (octal must be
1146
- * two or three digits long, unless it is
1147
- * 0 for the null character).
1148
- * '\x' hex A hex escape (hex must be two digits).
1149
- * '\u' unicode A unicode escape (must be four digits).
1150
- * '\c' ctrl A control character, ctrl is a letter.
1151
- * '\' literalatomchar Any character except one of the above
1152
- * that follow '\' in an atom.
1153
- * otheratomchar Any character not first among the other
1154
- * atom right-hand sides.
1155
- */
1156
- static JSBool
1157
- ParseTerm(CompilerState *state)
1158
- {
1159
- jschar c = *state->cp++;
1160
- uintN nDigits;
1161
- uintN num, tmp, n, i;
1162
- const jschar *termStart;
1163
-
1164
- switch (c) {
1165
- /* assertions and atoms */
1166
- case '^':
1167
- state->result = NewRENode(state, REOP_BOL);
1168
- if (!state->result)
1169
- return JS_FALSE;
1170
- state->progLength++;
1171
- return JS_TRUE;
1172
- case '$':
1173
- state->result = NewRENode(state, REOP_EOL);
1174
- if (!state->result)
1175
- return JS_FALSE;
1176
- state->progLength++;
1177
- return JS_TRUE;
1178
- case '\\':
1179
- if (state->cp >= state->cpend) {
1180
- /* a trailing '\' is an error */
1181
- ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1182
- return JS_FALSE;
1183
- }
1184
- c = *state->cp++;
1185
- switch (c) {
1186
- /* assertion escapes */
1187
- case 'b' :
1188
- state->result = NewRENode(state, REOP_WBDRY);
1189
- if (!state->result)
1190
- return JS_FALSE;
1191
- state->progLength++;
1192
- return JS_TRUE;
1193
- case 'B':
1194
- state->result = NewRENode(state, REOP_WNONBDRY);
1195
- if (!state->result)
1196
- return JS_FALSE;
1197
- state->progLength++;
1198
- return JS_TRUE;
1199
- /* Decimal escape */
1200
- case '0':
1201
- /* Give a strict warning. See also the note below. */
1202
- if (!ReportRegExpError(state, JSREPORT_WARNING | JSREPORT_STRICT,
1203
- JSMSG_INVALID_BACKREF)) {
1204
- return JS_FALSE;
1205
- }
1206
- doOctal:
1207
- num = 0;
1208
- while (state->cp < state->cpend) {
1209
- c = *state->cp;
1210
- if (c < '0' || '7' < c)
1211
- break;
1212
- state->cp++;
1213
- tmp = 8 * num + (uintN)JS7_UNDEC(c);
1214
- if (tmp > 0377)
1215
- break;
1216
- num = tmp;
1217
- }
1218
- c = (jschar)num;
1219
- doFlat:
1220
- state->result = NewRENode(state, REOP_FLAT);
1221
- if (!state->result)
1222
- return JS_FALSE;
1223
- state->result->u.flat.chr = c;
1224
- state->result->u.flat.length = 1;
1225
- state->progLength += 3;
1226
- break;
1227
- case '1':
1228
- case '2':
1229
- case '3':
1230
- case '4':
1231
- case '5':
1232
- case '6':
1233
- case '7':
1234
- case '8':
1235
- case '9':
1236
- termStart = state->cp - 1;
1237
- num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1238
- if (state->flags & JSREG_FIND_PAREN_ERROR)
1239
- return JS_FALSE;
1240
- if (num == OVERFLOW_VALUE) {
1241
- /* Give a strict mode warning. */
1242
- if (!ReportRegExpError(state,
1243
- JSREPORT_WARNING | JSREPORT_STRICT,
1244
- (c >= '8')
1245
- ? JSMSG_INVALID_BACKREF
1246
- : JSMSG_BAD_BACKREF)) {
1247
- return JS_FALSE;
1248
- }
1249
-
1250
- /*
1251
- * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1252
- * error here. However, for compatibility with IE, we treat the
1253
- * whole backref as flat if the first character in it is not a
1254
- * valid octal character, and as an octal escape otherwise.
1255
- */
1256
- state->cp = termStart;
1257
- if (c >= '8') {
1258
- /* Treat this as flat. termStart - 1 is the \. */
1259
- c = '\\';
1260
- goto asFlat;
1261
- }
1262
-
1263
- /* Treat this as an octal escape. */
1264
- goto doOctal;
1265
- }
1266
- JS_ASSERT(1 <= num && num <= 0x10000);
1267
- state->result = NewRENode(state, REOP_BACKREF);
1268
- if (!state->result)
1269
- return JS_FALSE;
1270
- state->result->u.parenIndex = num - 1;
1271
- state->progLength
1272
- += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1273
- break;
1274
- /* Control escape */
1275
- case 'f':
1276
- c = 0xC;
1277
- goto doFlat;
1278
- case 'n':
1279
- c = 0xA;
1280
- goto doFlat;
1281
- case 'r':
1282
- c = 0xD;
1283
- goto doFlat;
1284
- case 't':
1285
- c = 0x9;
1286
- goto doFlat;
1287
- case 'v':
1288
- c = 0xB;
1289
- goto doFlat;
1290
- /* Control letter */
1291
- case 'c':
1292
- if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1293
- c = (jschar) (*state->cp++ & 0x1F);
1294
- } else {
1295
- /* back off to accepting the original '\' as a literal */
1296
- --state->cp;
1297
- c = '\\';
1298
- }
1299
- goto doFlat;
1300
- /* HexEscapeSequence */
1301
- case 'x':
1302
- nDigits = 2;
1303
- goto lexHex;
1304
- /* UnicodeEscapeSequence */
1305
- case 'u':
1306
- nDigits = 4;
1307
- lexHex:
1308
- n = 0;
1309
- for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1310
- uintN digit;
1311
- c = *state->cp++;
1312
- if (!isASCIIHexDigit(c, &digit)) {
1313
- /*
1314
- * Back off to accepting the original 'u' or 'x' as a
1315
- * literal.
1316
- */
1317
- state->cp -= i + 2;
1318
- n = *state->cp++;
1319
- break;
1320
- }
1321
- n = (n << 4) | digit;
1322
- }
1323
- c = (jschar) n;
1324
- goto doFlat;
1325
- /* Character class escapes */
1326
- case 'd':
1327
- state->result = NewRENode(state, REOP_DIGIT);
1328
- doSimple:
1329
- if (!state->result)
1330
- return JS_FALSE;
1331
- state->progLength++;
1332
- break;
1333
- case 'D':
1334
- state->result = NewRENode(state, REOP_NONDIGIT);
1335
- goto doSimple;
1336
- case 's':
1337
- state->result = NewRENode(state, REOP_SPACE);
1338
- goto doSimple;
1339
- case 'S':
1340
- state->result = NewRENode(state, REOP_NONSPACE);
1341
- goto doSimple;
1342
- case 'w':
1343
- state->result = NewRENode(state, REOP_ALNUM);
1344
- goto doSimple;
1345
- case 'W':
1346
- state->result = NewRENode(state, REOP_NONALNUM);
1347
- goto doSimple;
1348
- /* IdentityEscape */
1349
- default:
1350
- state->result = NewRENode(state, REOP_FLAT);
1351
- if (!state->result)
1352
- return JS_FALSE;
1353
- state->result->u.flat.chr = c;
1354
- state->result->u.flat.length = 1;
1355
- state->result->kid = (void *) (state->cp - 1);
1356
- state->progLength += 3;
1357
- break;
1358
- }
1359
- break;
1360
- case '[':
1361
- state->result = NewRENode(state, REOP_CLASS);
1362
- if (!state->result)
1363
- return JS_FALSE;
1364
- termStart = state->cp;
1365
- state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1366
- for (;;) {
1367
- if (state->cp == state->cpend) {
1368
- ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1369
- JSMSG_UNTERM_CLASS, termStart);
1370
-
1371
- return JS_FALSE;
1372
- }
1373
- if (*state->cp == '\\') {
1374
- state->cp++;
1375
- if (state->cp != state->cpend)
1376
- state->cp++;
1377
- continue;
1378
- }
1379
- if (*state->cp == ']') {
1380
- state->result->u.ucclass.kidlen = state->cp - termStart;
1381
- break;
1382
- }
1383
- state->cp++;
1384
- }
1385
- for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1386
- if (!state->classCache[i].start) {
1387
- state->classCache[i].start = termStart;
1388
- state->classCache[i].length = state->result->u.ucclass.kidlen;
1389
- state->classCache[i].index = state->classCount;
1390
- break;
1391
- }
1392
- if (state->classCache[i].length ==
1393
- state->result->u.ucclass.kidlen) {
1394
- for (n = 0; ; n++) {
1395
- if (n == state->classCache[i].length) {
1396
- state->result->u.ucclass.index
1397
- = state->classCache[i].index;
1398
- goto claim;
1399
- }
1400
- if (state->classCache[i].start[n] != termStart[n])
1401
- break;
1402
- }
1403
- }
1404
- }
1405
- state->result->u.ucclass.index = state->classCount++;
1406
-
1407
- claim:
1408
- /*
1409
- * Call CalculateBitmapSize now as we want any errors it finds
1410
- * to be reported during the parse phase, not at execution.
1411
- */
1412
- if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1413
- return JS_FALSE;
1414
- /*
1415
- * Update classBitmapsMem with number of bytes to hold bmsize bits,
1416
- * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1417
- * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1418
- */
1419
- n = (state->result->u.ucclass.bmsize >> 3) + 1;
1420
- if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1421
- ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1422
- return JS_FALSE;
1423
- }
1424
- state->classBitmapsMem += n;
1425
- /* CLASS, <index> */
1426
- state->progLength
1427
- += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1428
- break;
1429
-
1430
- case '.':
1431
- state->result = NewRENode(state, REOP_DOT);
1432
- goto doSimple;
1433
-
1434
- case '{':
1435
- {
1436
- const jschar *errp = state->cp--;
1437
- intN err;
1438
-
1439
- err = ParseMinMaxQuantifier(state, JS_TRUE);
1440
- state->cp = errp;
1441
-
1442
- if (err < 0)
1443
- goto asFlat;
1444
-
1445
- /* FALL THROUGH */
1446
- }
1447
- case '*':
1448
- case '+':
1449
- case '?':
1450
- ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1451
- JSMSG_BAD_QUANTIFIER, state->cp - 1);
1452
- return JS_FALSE;
1453
- default:
1454
- asFlat:
1455
- state->result = NewRENode(state, REOP_FLAT);
1456
- if (!state->result)
1457
- return JS_FALSE;
1458
- state->result->u.flat.chr = c;
1459
- state->result->u.flat.length = 1;
1460
- state->result->kid = (void *) (state->cp - 1);
1461
- state->progLength += 3;
1462
- break;
1463
- }
1464
- return ParseQuantifier(state);
1465
- }
1466
-
1467
- static JSBool
1468
- ParseQuantifier(CompilerState *state)
1469
- {
1470
- RENode *term;
1471
- term = state->result;
1472
- if (state->cp < state->cpend) {
1473
- switch (*state->cp) {
1474
- case '+':
1475
- state->result = NewRENode(state, REOP_QUANT);
1476
- if (!state->result)
1477
- return JS_FALSE;
1478
- state->result->u.range.min = 1;
1479
- state->result->u.range.max = (uintN)-1;
1480
- /* <PLUS>, <next> ... <ENDCHILD> */
1481
- state->progLength += 4;
1482
- goto quantifier;
1483
- case '*':
1484
- state->result = NewRENode(state, REOP_QUANT);
1485
- if (!state->result)
1486
- return JS_FALSE;
1487
- state->result->u.range.min = 0;
1488
- state->result->u.range.max = (uintN)-1;
1489
- /* <STAR>, <next> ... <ENDCHILD> */
1490
- state->progLength += 4;
1491
- goto quantifier;
1492
- case '?':
1493
- state->result = NewRENode(state, REOP_QUANT);
1494
- if (!state->result)
1495
- return JS_FALSE;
1496
- state->result->u.range.min = 0;
1497
- state->result->u.range.max = 1;
1498
- /* <OPT>, <next> ... <ENDCHILD> */
1499
- state->progLength += 4;
1500
- goto quantifier;
1501
- case '{': /* balance '}' */
1502
- {
1503
- intN err;
1504
- const jschar *errp = state->cp;
1505
-
1506
- err = ParseMinMaxQuantifier(state, JS_FALSE);
1507
- if (err == 0)
1508
- goto quantifier;
1509
- if (err == -1)
1510
- return JS_TRUE;
1511
-
1512
- ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1513
- return JS_FALSE;
1514
- }
1515
- default:;
1516
- }
1517
- }
1518
- return JS_TRUE;
1519
-
1520
- quantifier:
1521
- if (state->treeDepth == TREE_DEPTH_MAX) {
1522
- ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1523
- return JS_FALSE;
1524
- }
1525
-
1526
- ++state->treeDepth;
1527
- ++state->cp;
1528
- state->result->kid = term;
1529
- if (state->cp < state->cpend && *state->cp == '?') {
1530
- ++state->cp;
1531
- state->result->u.range.greedy = JS_FALSE;
1532
- } else {
1533
- state->result->u.range.greedy = JS_TRUE;
1534
- }
1535
- return JS_TRUE;
1536
- }
1537
-
1538
- static intN
1539
- ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues)
1540
- {
1541
- uintN min, max;
1542
- jschar c;
1543
- const jschar *errp = state->cp++;
1544
-
1545
- c = *state->cp;
1546
- if (JS7_ISDEC(c)) {
1547
- ++state->cp;
1548
- min = GetDecimalValue(c, 0xFFFF, NULL, state);
1549
- c = *state->cp;
1550
-
1551
- if (!ignoreValues && min == OVERFLOW_VALUE)
1552
- return JSMSG_MIN_TOO_BIG;
1553
-
1554
- if (c == ',') {
1555
- c = *++state->cp;
1556
- if (JS7_ISDEC(c)) {
1557
- ++state->cp;
1558
- max = GetDecimalValue(c, 0xFFFF, NULL, state);
1559
- c = *state->cp;
1560
- if (!ignoreValues && max == OVERFLOW_VALUE)
1561
- return JSMSG_MAX_TOO_BIG;
1562
- if (!ignoreValues && min > max)
1563
- return JSMSG_OUT_OF_ORDER;
1564
- } else {
1565
- max = (uintN)-1;
1566
- }
1567
- } else {
1568
- max = min;
1569
- }
1570
- if (c == '}') {
1571
- state->result = NewRENode(state, REOP_QUANT);
1572
- if (!state->result)
1573
- return JSMSG_OUT_OF_MEMORY;
1574
- state->result->u.range.min = min;
1575
- state->result->u.range.max = max;
1576
- /*
1577
- * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1578
- * where <max> is written as compact(max+1) to make
1579
- * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1580
- */
1581
- state->progLength += (1 + GetCompactIndexWidth(min)
1582
- + GetCompactIndexWidth(max + 1)
1583
- +3);
1584
- return 0;
1585
- }
1586
- }
1587
-
1588
- state->cp = errp;
1589
- return -1;
1590
- }
1591
-
1592
- static JSBool
1593
- SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
1594
- {
1595
- ptrdiff_t offset = target - jump;
1596
-
1597
- /* Check that target really points forward. */
1598
- JS_ASSERT(offset >= 2);
1599
- if ((size_t)offset > OFFSET_MAX)
1600
- return JS_FALSE;
1601
-
1602
- jump[0] = JUMP_OFFSET_HI(offset);
1603
- jump[1] = JUMP_OFFSET_LO(offset);
1604
- return JS_TRUE;
1605
- }
1606
-
1607
- /*
1608
- * Generate bytecode for the tree rooted at t using an explicit stack instead
1609
- * of recursion.
1610
- */
1611
- static jsbytecode *
1612
- EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
1613
- jsbytecode *pc, RENode *t)
1614
- {
1615
- EmitStateStackEntry *emitStateSP, *emitStateStack;
1616
- RECharSet *charSet;
1617
- REOp op;
1618
-
1619
- if (treeDepth == 0) {
1620
- emitStateStack = NULL;
1621
- } else {
1622
- emitStateStack =
1623
- (EmitStateStackEntry *)JS_malloc(state->context,
1624
- sizeof(EmitStateStackEntry) *
1625
- treeDepth);
1626
- if (!emitStateStack)
1627
- return NULL;
1628
- }
1629
- emitStateSP = emitStateStack;
1630
- op = t->op;
1631
- JS_ASSERT(op < REOP_LIMIT);
1632
-
1633
- for (;;) {
1634
- *pc++ = op;
1635
- switch (op) {
1636
- case REOP_EMPTY:
1637
- --pc;
1638
- break;
1639
-
1640
- case REOP_ALTPREREQ2:
1641
- case REOP_ALTPREREQ:
1642
- JS_ASSERT(emitStateSP);
1643
- emitStateSP->altHead = pc - 1;
1644
- emitStateSP->endTermFixup = pc;
1645
- pc += OFFSET_LEN;
1646
- SET_ARG(pc, t->u.altprereq.ch1);
1647
- pc += ARG_LEN;
1648
- SET_ARG(pc, t->u.altprereq.ch2);
1649
- pc += ARG_LEN;
1650
-
1651
- emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1652
- pc += OFFSET_LEN;
1653
-
1654
- emitStateSP->continueNode = t;
1655
- emitStateSP->continueOp = REOP_JUMP;
1656
- emitStateSP->jumpToJumpFlag = JS_FALSE;
1657
- ++emitStateSP;
1658
- JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1659
- t = (RENode *) t->kid;
1660
- op = t->op;
1661
- JS_ASSERT(op < REOP_LIMIT);
1662
- continue;
1663
-
1664
- case REOP_JUMP:
1665
- emitStateSP->nextTermFixup = pc; /* offset to following term */
1666
- pc += OFFSET_LEN;
1667
- if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
1668
- goto jump_too_big;
1669
- emitStateSP->continueOp = REOP_ENDALT;
1670
- ++emitStateSP;
1671
- JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1672
- t = (RENode *) t->u.kid2;
1673
- op = t->op;
1674
- JS_ASSERT(op < REOP_LIMIT);
1675
- continue;
1676
-
1677
- case REOP_ENDALT:
1678
- /*
1679
- * If we already patched emitStateSP->nextTermFixup to jump to
1680
- * a nearer jump, to avoid 16-bit immediate offset overflow, we
1681
- * are done here.
1682
- */
1683
- if (emitStateSP->jumpToJumpFlag)
1684
- break;
1685
-
1686
- /*
1687
- * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
1688
- * REOP_ENDALT is executed only on successful match of the last
1689
- * alternate in a group.
1690
- */
1691
- if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1692
- goto jump_too_big;
1693
- if (t->op != REOP_ALT) {
1694
- if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
1695
- goto jump_too_big;
1696
- }
1697
-
1698
- /*
1699
- * If the program is bigger than the REOP_JUMP offset range, then
1700
- * we must check for alternates before this one that are part of
1701
- * the same group, and fix up their jump offsets to target jumps
1702
- * close enough to fit in a 16-bit unsigned offset immediate.
1703
- */
1704
- if ((size_t)(pc - re->program) > OFFSET_MAX &&
1705
- emitStateSP > emitStateStack) {
1706
- EmitStateStackEntry *esp, *esp2;
1707
- jsbytecode *alt, *jump;
1708
- ptrdiff_t span, header;
1709
-
1710
- esp2 = emitStateSP;
1711
- alt = esp2->altHead;
1712
- for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
1713
- if (esp->continueOp == REOP_ENDALT &&
1714
- !esp->jumpToJumpFlag &&
1715
- esp->nextTermFixup + OFFSET_LEN == alt &&
1716
- (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
1717
- ? esp->endTermFixup
1718
- : esp->nextTermFixup)) > OFFSET_MAX) {
1719
- alt = esp->altHead;
1720
- jump = esp->nextTermFixup;
1721
-
1722
- /*
1723
- * The span must be 1 less than the distance from
1724
- * jump offset to jump offset, so we actually jump
1725
- * to a REOP_JUMP bytecode, not to its offset!
1726
- */
1727
- for (;;) {
1728
- JS_ASSERT(jump < esp2->nextTermFixup);
1729
- span = esp2->nextTermFixup - jump - 1;
1730
- if ((size_t)span <= OFFSET_MAX)
1731
- break;
1732
- do {
1733
- if (--esp2 == esp)
1734
- goto jump_too_big;
1735
- } while (esp2->continueOp != REOP_ENDALT);
1736
- }
1737
-
1738
- jump[0] = JUMP_OFFSET_HI(span);
1739
- jump[1] = JUMP_OFFSET_LO(span);
1740
-
1741
- if (esp->continueNode->op != REOP_ALT) {
1742
- /*
1743
- * We must patch the offset at esp->endTermFixup
1744
- * as well, for the REOP_ALTPREREQ{,2} opcodes.
1745
- * If we're unlucky and endTermFixup is more than
1746
- * OFFSET_MAX bytes from its target, we cheat by
1747
- * jumping 6 bytes to the jump whose offset is at
1748
- * esp->nextTermFixup, which has the same target.
1749
- */
1750
- jump = esp->endTermFixup;
1751
- header = esp->nextTermFixup - jump;
1752
- span += header;
1753
- if ((size_t)span > OFFSET_MAX)
1754
- span = header;
1755
-
1756
- jump[0] = JUMP_OFFSET_HI(span);
1757
- jump[1] = JUMP_OFFSET_LO(span);
1758
- }
1759
-
1760
- esp->jumpToJumpFlag = JS_TRUE;
1761
- }
1762
- }
1763
- }
1764
- break;
1765
-
1766
- case REOP_ALT:
1767
- JS_ASSERT(emitStateSP);
1768
- emitStateSP->altHead = pc - 1;
1769
- emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1770
- pc += OFFSET_LEN;
1771
- emitStateSP->continueNode = t;
1772
- emitStateSP->continueOp = REOP_JUMP;
1773
- emitStateSP->jumpToJumpFlag = JS_FALSE;
1774
- ++emitStateSP;
1775
- JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1776
- t = (RENode *) t->kid;
1777
- op = t->op;
1778
- JS_ASSERT(op < REOP_LIMIT);
1779
- continue;
1780
-
1781
- case REOP_FLAT:
1782
- /*
1783
- * Coalesce FLATs if possible and if it would not increase bytecode
1784
- * beyond preallocated limit. The latter happens only when bytecode
1785
- * size for coalesced string with offset p and length 2 exceeds 6
1786
- * bytes preallocated for 2 single char nodes, i.e. when
1787
- * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
1788
- * GetCompactIndexWidth(p) > 4.
1789
- * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
1790
- * nodes strictly decreases bytecode size, the check has to be
1791
- * done only for the first coalescing.
1792
- */
1793
- if (t->kid &&
1794
- GetCompactIndexWidth((jschar *)t->kid - state->cpbegin) <= 4)
1795
- {
1796
- while (t->next &&
1797
- t->next->op == REOP_FLAT &&
1798
- (jschar*)t->kid + t->u.flat.length ==
1799
- (jschar*)t->next->kid) {
1800
- t->u.flat.length += t->next->u.flat.length;
1801
- t->next = t->next->next;
1802
- }
1803
- }
1804
- if (t->kid && t->u.flat.length > 1) {
1805
- pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
1806
- pc = WriteCompactIndex(pc, (jschar *)t->kid - state->cpbegin);
1807
- pc = WriteCompactIndex(pc, t->u.flat.length);
1808
- } else if (t->u.flat.chr < 256) {
1809
- pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
1810
- *pc++ = (jsbytecode) t->u.flat.chr;
1811
- } else {
1812
- pc[-1] = (state->flags & JSREG_FOLD)
1813
- ? REOP_UCFLAT1i
1814
- : REOP_UCFLAT1;
1815
- SET_ARG(pc, t->u.flat.chr);
1816
- pc += ARG_LEN;
1817
- }
1818
- break;
1819
-
1820
- case REOP_LPAREN:
1821
- JS_ASSERT(emitStateSP);
1822
- pc = WriteCompactIndex(pc, t->u.parenIndex);
1823
- emitStateSP->continueNode = t;
1824
- emitStateSP->continueOp = REOP_RPAREN;
1825
- ++emitStateSP;
1826
- JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1827
- t = (RENode *) t->kid;
1828
- op = t->op;
1829
- continue;
1830
-
1831
- case REOP_RPAREN:
1832
- pc = WriteCompactIndex(pc, t->u.parenIndex);
1833
- break;
1834
-
1835
- case REOP_BACKREF:
1836
- pc = WriteCompactIndex(pc, t->u.parenIndex);
1837
- break;
1838
-
1839
- case REOP_ASSERT:
1840
- JS_ASSERT(emitStateSP);
1841
- emitStateSP->nextTermFixup = pc;
1842
- pc += OFFSET_LEN;
1843
- emitStateSP->continueNode = t;
1844
- emitStateSP->continueOp = REOP_ASSERTTEST;
1845
- ++emitStateSP;
1846
- JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1847
- t = (RENode *) t->kid;
1848
- op = t->op;
1849
- continue;
1850
-
1851
- case REOP_ASSERTTEST:
1852
- case REOP_ASSERTNOTTEST:
1853
- if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1854
- goto jump_too_big;
1855
- break;
1856
-
1857
- case REOP_ASSERT_NOT:
1858
- JS_ASSERT(emitStateSP);
1859
- emitStateSP->nextTermFixup = pc;
1860
- pc += OFFSET_LEN;
1861
- emitStateSP->continueNode = t;
1862
- emitStateSP->continueOp = REOP_ASSERTNOTTEST;
1863
- ++emitStateSP;
1864
- JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1865
- t = (RENode *) t->kid;
1866
- op = t->op;
1867
- continue;
1868
-
1869
- case REOP_QUANT:
1870
- JS_ASSERT(emitStateSP);
1871
- if (t->u.range.min == 0 && t->u.range.max == (uintN)-1) {
1872
- pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
1873
- } else if (t->u.range.min == 0 && t->u.range.max == 1) {
1874
- pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
1875
- } else if (t->u.range.min == 1 && t->u.range.max == (uintN) -1) {
1876
- pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
1877
- } else {
1878
- if (!t->u.range.greedy)
1879
- pc[-1] = REOP_MINIMALQUANT;
1880
- pc = WriteCompactIndex(pc, t->u.range.min);
1881
- /*
1882
- * Write max + 1 to avoid using size_t(max) + 1 bytes
1883
- * for (uintN)-1 sentinel.
1884
- */
1885
- pc = WriteCompactIndex(pc, t->u.range.max + 1);
1886
- }
1887
- emitStateSP->nextTermFixup = pc;
1888
- pc += OFFSET_LEN;
1889
- emitStateSP->continueNode = t;
1890
- emitStateSP->continueOp = REOP_ENDCHILD;
1891
- ++emitStateSP;
1892
- JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1893
- t = (RENode *) t->kid;
1894
- op = t->op;
1895
- continue;
1896
-
1897
- case REOP_ENDCHILD:
1898
- if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1899
- goto jump_too_big;
1900
- break;
1901
-
1902
- case REOP_CLASS:
1903
- if (!t->u.ucclass.sense)
1904
- pc[-1] = REOP_NCLASS;
1905
- pc = WriteCompactIndex(pc, t->u.ucclass.index);
1906
- charSet = &re->classList[t->u.ucclass.index];
1907
- charSet->converted = JS_FALSE;
1908
- charSet->length = t->u.ucclass.bmsize;
1909
- charSet->u.src.startIndex = t->u.ucclass.startIndex;
1910
- charSet->u.src.length = t->u.ucclass.kidlen;
1911
- charSet->sense = t->u.ucclass.sense;
1912
- break;
1913
-
1914
- default:
1915
- break;
1916
- }
1917
-
1918
- t = t->next;
1919
- if (t) {
1920
- op = t->op;
1921
- } else {
1922
- if (emitStateSP == emitStateStack)
1923
- break;
1924
- --emitStateSP;
1925
- t = emitStateSP->continueNode;
1926
- op = (REOp) emitStateSP->continueOp;
1927
- }
1928
- }
1929
-
1930
- cleanup:
1931
- if (emitStateStack)
1932
- JS_free(state->context, emitStateStack);
1933
- return pc;
1934
-
1935
- jump_too_big:
1936
- ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1937
- pc = NULL;
1938
- goto cleanup;
1939
- }
1940
-
1941
-
1942
- JSRegExp *
1943
- js_NewRegExp(JSContext *cx, JSTokenStream *ts,
1944
- JSString *str, uintN flags, JSBool flat)
1945
- {
1946
- JSRegExp *re;
1947
- void *mark;
1948
- CompilerState state;
1949
- size_t resize;
1950
- jsbytecode *endPC;
1951
- uintN i;
1952
- size_t len;
1953
-
1954
- re = NULL;
1955
- mark = JS_ARENA_MARK(&cx->tempPool);
1956
- len = JSSTRING_LENGTH(str);
1957
-
1958
- state.context = cx;
1959
- state.tokenStream = ts;
1960
- state.cp = js_UndependString(cx, str);
1961
- if (!state.cp)
1962
- goto out;
1963
- state.cpbegin = state.cp;
1964
- state.cpend = state.cp + len;
1965
- state.flags = flags;
1966
- state.parenCount = 0;
1967
- state.classCount = 0;
1968
- state.progLength = 0;
1969
- state.treeDepth = 0;
1970
- state.classBitmapsMem = 0;
1971
- for (i = 0; i < CLASS_CACHE_SIZE; i++)
1972
- state.classCache[i].start = NULL;
1973
-
1974
- if (len != 0 && flat) {
1975
- state.result = NewRENode(&state, REOP_FLAT);
1976
- if (!state.result)
1977
- goto out;
1978
- state.result->u.flat.chr = *state.cpbegin;
1979
- state.result->u.flat.length = len;
1980
- state.result->kid = (void *) state.cpbegin;
1981
- /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
1982
- state.progLength += 1 + GetCompactIndexWidth(0)
1983
- + GetCompactIndexWidth(len);
1984
- } else {
1985
- if (!ParseRegExp(&state))
1986
- goto out;
1987
- }
1988
- resize = offsetof(JSRegExp, program) + state.progLength + 1;
1989
- re = (JSRegExp *) JS_malloc(cx, resize);
1990
- if (!re)
1991
- goto out;
1992
-
1993
- re->nrefs = 1;
1994
- JS_ASSERT(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
1995
- re->classCount = state.classCount;
1996
- if (re->classCount) {
1997
- re->classList = (RECharSet *)
1998
- JS_malloc(cx, re->classCount * sizeof(RECharSet));
1999
- if (!re->classList) {
2000
- js_DestroyRegExp(cx, re);
2001
- re = NULL;
2002
- goto out;
2003
- }
2004
- for (i = 0; i < re->classCount; i++)
2005
- re->classList[i].converted = JS_FALSE;
2006
- } else {
2007
- re->classList = NULL;
2008
- }
2009
- endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
2010
- if (!endPC) {
2011
- js_DestroyRegExp(cx, re);
2012
- re = NULL;
2013
- goto out;
2014
- }
2015
- *endPC++ = REOP_END;
2016
- /*
2017
- * Check whether size was overestimated and shrink using realloc.
2018
- * This is safe since no pointers to newly parsed regexp or its parts
2019
- * besides re exist here.
2020
- */
2021
- if ((size_t)(endPC - re->program) != state.progLength + 1) {
2022
- JSRegExp *tmp;
2023
- JS_ASSERT((size_t)(endPC - re->program) < state.progLength + 1);
2024
- resize = offsetof(JSRegExp, program) + (endPC - re->program);
2025
- tmp = (JSRegExp *) JS_realloc(cx, re, resize);
2026
- if (tmp)
2027
- re = tmp;
2028
- }
2029
-
2030
- re->flags = flags;
2031
- re->parenCount = state.parenCount;
2032
- re->source = str;
2033
-
2034
- out:
2035
- JS_ARENA_RELEASE(&cx->tempPool, mark);
2036
- return re;
2037
- }
2038
-
2039
- JSRegExp *
2040
- js_NewRegExpOpt(JSContext *cx, JSString *str, JSString *opt, JSBool flat)
2041
- {
2042
- uintN flags;
2043
- jschar *s;
2044
- size_t i, n;
2045
- char charBuf[2];
2046
-
2047
- flags = 0;
2048
- if (opt) {
2049
- JSSTRING_CHARS_AND_LENGTH(opt, s, n);
2050
- for (i = 0; i < n; i++) {
2051
- switch (s[i]) {
2052
- case 'g':
2053
- flags |= JSREG_GLOB;
2054
- break;
2055
- case 'i':
2056
- flags |= JSREG_FOLD;
2057
- break;
2058
- case 'm':
2059
- flags |= JSREG_MULTILINE;
2060
- break;
2061
- case 'y':
2062
- flags |= JSREG_STICKY;
2063
- break;
2064
- default:
2065
- charBuf[0] = (char)s[i];
2066
- charBuf[1] = '\0';
2067
- JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR,
2068
- js_GetErrorMessage, NULL,
2069
- JSMSG_BAD_FLAG, charBuf);
2070
- return NULL;
2071
- }
2072
- }
2073
- }
2074
- return js_NewRegExp(cx, NULL, str, flags, flat);
2075
- }
2076
-
2077
- /*
2078
- * Save the current state of the match - the position in the input
2079
- * text as well as the position in the bytecode. The state of any
2080
- * parent expressions is also saved (preceding state).
2081
- * Contents of parenCount parentheses from parenIndex are also saved.
2082
- */
2083
- static REBackTrackData *
2084
- PushBackTrackState(REGlobalData *gData, REOp op,
2085
- jsbytecode *target, REMatchState *x, const jschar *cp,
2086
- size_t parenIndex, size_t parenCount)
2087
- {
2088
- size_t i;
2089
- REBackTrackData *result =
2090
- (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
2091
-
2092
- size_t sz = sizeof(REBackTrackData) +
2093
- gData->stateStackTop * sizeof(REProgState) +
2094
- parenCount * sizeof(RECapture);
2095
-
2096
- ptrdiff_t btsize = gData->backTrackStackSize;
2097
- ptrdiff_t btincr = ((char *)result + sz) -
2098
- ((char *)gData->backTrackStack + btsize);
2099
-
2100
- re_debug("\tBT_Push: %lu,%lu",
2101
- (unsigned long) parenIndex, (unsigned long) parenCount);
2102
-
2103
- JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
2104
- if (btincr > 0) {
2105
- ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
2106
-
2107
- JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
2108
- btincr = JS_ROUNDUP(btincr, btsize);
2109
- JS_ARENA_GROW_CAST(gData->backTrackStack, REBackTrackData *,
2110
- &gData->pool, btsize, btincr);
2111
- if (!gData->backTrackStack) {
2112
- js_ReportOutOfScriptQuota(gData->cx);
2113
- gData->ok = JS_FALSE;
2114
- return NULL;
2115
- }
2116
- gData->backTrackStackSize = btsize + btincr;
2117
- result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
2118
- }
2119
- gData->backTrackSP = result;
2120
- result->sz = gData->cursz;
2121
- gData->cursz = sz;
2122
-
2123
- result->backtrack_op = op;
2124
- result->backtrack_pc = target;
2125
- result->cp = cp;
2126
- result->parenCount = parenCount;
2127
- result->parenIndex = parenIndex;
2128
-
2129
- result->saveStateStackTop = gData->stateStackTop;
2130
- JS_ASSERT(gData->stateStackTop);
2131
- memcpy(result + 1, gData->stateStack,
2132
- sizeof(REProgState) * result->saveStateStackTop);
2133
-
2134
- if (parenCount != 0) {
2135
- memcpy((char *)(result + 1) +
2136
- sizeof(REProgState) * result->saveStateStackTop,
2137
- &x->parens[parenIndex],
2138
- sizeof(RECapture) * parenCount);
2139
- for (i = 0; i != parenCount; i++)
2140
- x->parens[parenIndex + i].index = -1;
2141
- }
2142
-
2143
- return result;
2144
- }
2145
-
2146
-
2147
- /*
2148
- * Consecutive literal characters.
2149
- */
2150
- #if 0
2151
- static REMatchState *
2152
- FlatNMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2153
- size_t length)
2154
- {
2155
- size_t i;
2156
- if (length > gData->cpend - x->cp)
2157
- return NULL;
2158
- for (i = 0; i != length; i++) {
2159
- if (matchChars[i] != x->cp[i])
2160
- return NULL;
2161
- }
2162
- x->cp += length;
2163
- return x;
2164
- }
2165
- #endif
2166
-
2167
- static JS_INLINE REMatchState *
2168
- FlatNIMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2169
- size_t length)
2170
- {
2171
- size_t i;
2172
- JS_ASSERT(gData->cpend >= x->cp);
2173
- if (length > (size_t)(gData->cpend - x->cp))
2174
- return NULL;
2175
- for (i = 0; i != length; i++) {
2176
- if (upcase(matchChars[i]) != upcase(x->cp[i]))
2177
- return NULL;
2178
- }
2179
- x->cp += length;
2180
- return x;
2181
- }
2182
-
2183
- /*
2184
- * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2185
- * 2. If E is not a character then go to step 6.
2186
- * 3. Let ch be E's character.
2187
- * 4. Let A be a one-element RECharSet containing the character ch.
2188
- * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2189
- * 6. E must be an integer. Let n be that integer.
2190
- * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2191
- * 8. Return an internal Matcher closure that takes two arguments, a State x
2192
- * and a Continuation c, and performs the following:
2193
- * 1. Let cap be x's captures internal array.
2194
- * 2. Let s be cap[n].
2195
- * 3. If s is undefined, then call c(x) and return its result.
2196
- * 4. Let e be x's endIndex.
2197
- * 5. Let len be s's length.
2198
- * 6. Let f be e+len.
2199
- * 7. If f>InputLength, return failure.
2200
- * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2201
- * such that Canonicalize(s[i]) is not the same character as
2202
- * Canonicalize(Input [e+i]), then return failure.
2203
- * 9. Let y be the State (f, cap).
2204
- * 10. Call c(y) and return its result.
2205
- */
2206
- static REMatchState *
2207
- BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
2208
- {
2209
- size_t len, i;
2210
- const jschar *parenContent;
2211
- RECapture *cap = &x->parens[parenIndex];
2212
-
2213
- if (cap->index == -1)
2214
- return x;
2215
-
2216
- len = cap->length;
2217
- if (x->cp + len > gData->cpend)
2218
- return NULL;
2219
-
2220
- parenContent = &gData->cpbegin[cap->index];
2221
- if (gData->regexp->flags & JSREG_FOLD) {
2222
- for (i = 0; i < len; i++) {
2223
- if (upcase(parenContent[i]) != upcase(x->cp[i]))
2224
- return NULL;
2225
- }
2226
- } else {
2227
- for (i = 0; i < len; i++) {
2228
- if (parenContent[i] != x->cp[i])
2229
- return NULL;
2230
- }
2231
- }
2232
- x->cp += len;
2233
- return x;
2234
- }
2235
-
2236
-
2237
- /* Add a single character to the RECharSet */
2238
- static void
2239
- AddCharacterToCharSet(RECharSet *cs, jschar c)
2240
- {
2241
- uintN byteIndex = (uintN)(c >> 3);
2242
- JS_ASSERT(c <= cs->length);
2243
- cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2244
- }
2245
-
2246
-
2247
- /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2248
- static void
2249
- AddCharacterRangeToCharSet(RECharSet *cs, uintN c1, uintN c2)
2250
- {
2251
- uintN i;
2252
-
2253
- uintN byteIndex1 = c1 >> 3;
2254
- uintN byteIndex2 = c2 >> 3;
2255
-
2256
- JS_ASSERT(c2 <= cs->length && c1 <= c2);
2257
-
2258
- c1 &= 0x7;
2259
- c2 &= 0x7;
2260
-
2261
- if (byteIndex1 == byteIndex2) {
2262
- cs->u.bits[byteIndex1] |= ((uint8)0xFF >> (7 - (c2 - c1))) << c1;
2263
- } else {
2264
- cs->u.bits[byteIndex1] |= 0xFF << c1;
2265
- for (i = byteIndex1 + 1; i < byteIndex2; i++)
2266
- cs->u.bits[i] = 0xFF;
2267
- cs->u.bits[byteIndex2] |= (uint8)0xFF >> (7 - c2);
2268
- }
2269
- }
2270
-
2271
- /* Compile the source of the class into a RECharSet */
2272
- static JSBool
2273
- ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2274
- {
2275
- const jschar *src, *end;
2276
- JSBool inRange = JS_FALSE;
2277
- jschar rangeStart = 0;
2278
- uintN byteLength, n;
2279
- jschar c, thisCh;
2280
- intN nDigits, i;
2281
-
2282
- JS_ASSERT(!charSet->converted);
2283
- /*
2284
- * Assert that startIndex and length points to chars inside [] inside
2285
- * source string.
2286
- */
2287
- JS_ASSERT(1 <= charSet->u.src.startIndex);
2288
- JS_ASSERT(charSet->u.src.startIndex
2289
- < JSSTRING_LENGTH(gData->regexp->source));
2290
- JS_ASSERT(charSet->u.src.length <= JSSTRING_LENGTH(gData->regexp->source)
2291
- - 1 - charSet->u.src.startIndex);
2292
-
2293
- charSet->converted = JS_TRUE;
2294
- src = JSSTRING_CHARS(gData->regexp->source) + charSet->u.src.startIndex;
2295
- end = src + charSet->u.src.length;
2296
- JS_ASSERT(src[-1] == '[');
2297
- JS_ASSERT(end[0] == ']');
2298
-
2299
- byteLength = (charSet->length >> 3) + 1;
2300
- charSet->u.bits = (uint8 *)JS_malloc(gData->cx, byteLength);
2301
- if (!charSet->u.bits) {
2302
- JS_ReportOutOfMemory(gData->cx);
2303
- gData->ok = JS_FALSE;
2304
- return JS_FALSE;
2305
- }
2306
- memset(charSet->u.bits, 0, byteLength);
2307
-
2308
- if (src == end)
2309
- return JS_TRUE;
2310
-
2311
- if (*src == '^') {
2312
- JS_ASSERT(charSet->sense == JS_FALSE);
2313
- ++src;
2314
- } else {
2315
- JS_ASSERT(charSet->sense == JS_TRUE);
2316
- }
2317
-
2318
- while (src != end) {
2319
- switch (*src) {
2320
- case '\\':
2321
- ++src;
2322
- c = *src++;
2323
- switch (c) {
2324
- case 'b':
2325
- thisCh = 0x8;
2326
- break;
2327
- case 'f':
2328
- thisCh = 0xC;
2329
- break;
2330
- case 'n':
2331
- thisCh = 0xA;
2332
- break;
2333
- case 'r':
2334
- thisCh = 0xD;
2335
- break;
2336
- case 't':
2337
- thisCh = 0x9;
2338
- break;
2339
- case 'v':
2340
- thisCh = 0xB;
2341
- break;
2342
- case 'c':
2343
- if (src < end && JS_ISWORD(*src)) {
2344
- thisCh = (jschar)(*src++ & 0x1F);
2345
- } else {
2346
- --src;
2347
- thisCh = '\\';
2348
- }
2349
- break;
2350
- case 'x':
2351
- nDigits = 2;
2352
- goto lexHex;
2353
- case 'u':
2354
- nDigits = 4;
2355
- lexHex:
2356
- n = 0;
2357
- for (i = 0; (i < nDigits) && (src < end); i++) {
2358
- uintN digit;
2359
- c = *src++;
2360
- if (!isASCIIHexDigit(c, &digit)) {
2361
- /*
2362
- * Back off to accepting the original '\'
2363
- * as a literal
2364
- */
2365
- src -= i + 1;
2366
- n = '\\';
2367
- break;
2368
- }
2369
- n = (n << 4) | digit;
2370
- }
2371
- thisCh = (jschar)n;
2372
- break;
2373
- case '0':
2374
- case '1':
2375
- case '2':
2376
- case '3':
2377
- case '4':
2378
- case '5':
2379
- case '6':
2380
- case '7':
2381
- /*
2382
- * This is a non-ECMA extension - decimal escapes (in this
2383
- * case, octal!) are supposed to be an error inside class
2384
- * ranges, but supported here for backwards compatibility.
2385
- */
2386
- n = JS7_UNDEC(c);
2387
- c = *src;
2388
- if ('0' <= c && c <= '7') {
2389
- src++;
2390
- n = 8 * n + JS7_UNDEC(c);
2391
- c = *src;
2392
- if ('0' <= c && c <= '7') {
2393
- src++;
2394
- i = 8 * n + JS7_UNDEC(c);
2395
- if (i <= 0377)
2396
- n = i;
2397
- else
2398
- src--;
2399
- }
2400
- }
2401
- thisCh = (jschar)n;
2402
- break;
2403
-
2404
- case 'd':
2405
- AddCharacterRangeToCharSet(charSet, '0', '9');
2406
- continue; /* don't need range processing */
2407
- case 'D':
2408
- AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2409
- AddCharacterRangeToCharSet(charSet,
2410
- (jschar)('9' + 1),
2411
- (jschar)charSet->length);
2412
- continue;
2413
- case 's':
2414
- for (i = (intN)charSet->length; i >= 0; i--)
2415
- if (JS_ISSPACE(i))
2416
- AddCharacterToCharSet(charSet, (jschar)i);
2417
- continue;
2418
- case 'S':
2419
- for (i = (intN)charSet->length; i >= 0; i--)
2420
- if (!JS_ISSPACE(i))
2421
- AddCharacterToCharSet(charSet, (jschar)i);
2422
- continue;
2423
- case 'w':
2424
- for (i = (intN)charSet->length; i >= 0; i--)
2425
- if (JS_ISWORD(i))
2426
- AddCharacterToCharSet(charSet, (jschar)i);
2427
- continue;
2428
- case 'W':
2429
- for (i = (intN)charSet->length; i >= 0; i--)
2430
- if (!JS_ISWORD(i))
2431
- AddCharacterToCharSet(charSet, (jschar)i);
2432
- continue;
2433
- default:
2434
- thisCh = c;
2435
- break;
2436
-
2437
- }
2438
- break;
2439
-
2440
- default:
2441
- thisCh = *src++;
2442
- break;
2443
-
2444
- }
2445
- if (inRange) {
2446
- if (gData->regexp->flags & JSREG_FOLD) {
2447
- int i;
2448
-
2449
- JS_ASSERT(rangeStart <= thisCh);
2450
- for (i = rangeStart; i <= thisCh; i++) {
2451
- jschar uch, dch;
2452
-
2453
- AddCharacterToCharSet(charSet, i);
2454
- uch = upcase(i);
2455
- dch = downcase(i);
2456
- if (i != uch)
2457
- AddCharacterToCharSet(charSet, uch);
2458
- if (i != dch)
2459
- AddCharacterToCharSet(charSet, dch);
2460
- }
2461
- } else {
2462
- AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2463
- }
2464
- inRange = JS_FALSE;
2465
- } else {
2466
- if (gData->regexp->flags & JSREG_FOLD) {
2467
- AddCharacterToCharSet(charSet, upcase(thisCh));
2468
- AddCharacterToCharSet(charSet, downcase(thisCh));
2469
- } else {
2470
- AddCharacterToCharSet(charSet, thisCh);
2471
- }
2472
- if (src < end - 1) {
2473
- if (*src == '-') {
2474
- ++src;
2475
- inRange = JS_TRUE;
2476
- rangeStart = thisCh;
2477
- }
2478
- }
2479
- }
2480
- }
2481
- return JS_TRUE;
2482
- }
2483
-
2484
- void
2485
- js_DestroyRegExp(JSContext *cx, JSRegExp *re)
2486
- {
2487
- if (JS_ATOMIC_DECREMENT(&re->nrefs) == 0) {
2488
- if (re->classList) {
2489
- uintN i;
2490
- for (i = 0; i < re->classCount; i++) {
2491
- if (re->classList[i].converted)
2492
- JS_free(cx, re->classList[i].u.bits);
2493
- re->classList[i].u.bits = NULL;
2494
- }
2495
- JS_free(cx, re->classList);
2496
- }
2497
- JS_free(cx, re);
2498
- }
2499
- }
2500
-
2501
- static JSBool
2502
- ReallocStateStack(REGlobalData *gData)
2503
- {
2504
- size_t limit = gData->stateStackLimit;
2505
- size_t sz = sizeof(REProgState) * limit;
2506
-
2507
- JS_ARENA_GROW_CAST(gData->stateStack, REProgState *, &gData->pool, sz, sz);
2508
- if (!gData->stateStack) {
2509
- js_ReportOutOfScriptQuota(gData->cx);
2510
- gData->ok = JS_FALSE;
2511
- return JS_FALSE;
2512
- }
2513
- gData->stateStackLimit = limit + limit;
2514
- return JS_TRUE;
2515
- }
2516
-
2517
- #define PUSH_STATE_STACK(data) \
2518
- JS_BEGIN_MACRO \
2519
- ++(data)->stateStackTop; \
2520
- if ((data)->stateStackTop == (data)->stateStackLimit && \
2521
- !ReallocStateStack((data))) { \
2522
- return NULL; \
2523
- } \
2524
- JS_END_MACRO
2525
-
2526
- /*
2527
- * Apply the current op against the given input to see if it's going to match
2528
- * or fail. Return false if we don't get a match, true if we do. If updatecp is
2529
- * true, then update the current state's cp. Always update startpc to the next
2530
- * op.
2531
- */
2532
- static JS_INLINE REMatchState *
2533
- SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
2534
- jsbytecode **startpc, JSBool updatecp)
2535
- {
2536
- REMatchState *result = NULL;
2537
- jschar matchCh;
2538
- size_t parenIndex;
2539
- size_t offset, length, index;
2540
- jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2541
- jschar *source;
2542
- const jschar *startcp = x->cp;
2543
- jschar ch;
2544
- RECharSet *charSet;
2545
-
2546
- #ifdef REGEXP_DEBUG
2547
- const char *opname = reop_names[op];
2548
- re_debug("\n%06d: %*s%s", pc - gData->regexp->program,
2549
- gData->stateStackTop * 2, "", opname);
2550
- #endif
2551
- switch (op) {
2552
- case REOP_EMPTY:
2553
- result = x;
2554
- break;
2555
- case REOP_BOL:
2556
- if (x->cp != gData->cpbegin) {
2557
- if (!gData->cx->regExpStatics.multiline &&
2558
- !(gData->regexp->flags & JSREG_MULTILINE)) {
2559
- break;
2560
- }
2561
- if (!RE_IS_LINE_TERM(x->cp[-1]))
2562
- break;
2563
- }
2564
- result = x;
2565
- break;
2566
- case REOP_EOL:
2567
- if (x->cp != gData->cpend) {
2568
- if (!gData->cx->regExpStatics.multiline &&
2569
- !(gData->regexp->flags & JSREG_MULTILINE)) {
2570
- break;
2571
- }
2572
- if (!RE_IS_LINE_TERM(*x->cp))
2573
- break;
2574
- }
2575
- result = x;
2576
- break;
2577
- case REOP_WBDRY:
2578
- if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2579
- !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2580
- result = x;
2581
- }
2582
- break;
2583
- case REOP_WNONBDRY:
2584
- if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2585
- (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2586
- result = x;
2587
- }
2588
- break;
2589
- case REOP_DOT:
2590
- if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2591
- result = x;
2592
- result->cp++;
2593
- }
2594
- break;
2595
- case REOP_DIGIT:
2596
- if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2597
- result = x;
2598
- result->cp++;
2599
- }
2600
- break;
2601
- case REOP_NONDIGIT:
2602
- if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2603
- result = x;
2604
- result->cp++;
2605
- }
2606
- break;
2607
- case REOP_ALNUM:
2608
- if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2609
- result = x;
2610
- result->cp++;
2611
- }
2612
- break;
2613
- case REOP_NONALNUM:
2614
- if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2615
- result = x;
2616
- result->cp++;
2617
- }
2618
- break;
2619
- case REOP_SPACE:
2620
- if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) {
2621
- result = x;
2622
- result->cp++;
2623
- }
2624
- break;
2625
- case REOP_NONSPACE:
2626
- if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) {
2627
- result = x;
2628
- result->cp++;
2629
- }
2630
- break;
2631
- case REOP_BACKREF:
2632
- pc = ReadCompactIndex(pc, &parenIndex);
2633
- JS_ASSERT(parenIndex < gData->regexp->parenCount);
2634
- result = BackrefMatcher(gData, x, parenIndex);
2635
- break;
2636
- case REOP_FLAT:
2637
- pc = ReadCompactIndex(pc, &offset);
2638
- JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
2639
- pc = ReadCompactIndex(pc, &length);
2640
- JS_ASSERT(1 <= length);
2641
- JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
2642
- if (length <= (size_t)(gData->cpend - x->cp)) {
2643
- source = JSSTRING_CHARS(gData->regexp->source) + offset;
2644
- re_debug_chars(source, length);
2645
- for (index = 0; index != length; index++) {
2646
- if (source[index] != x->cp[index])
2647
- return NULL;
2648
- }
2649
- x->cp += length;
2650
- result = x;
2651
- }
2652
- break;
2653
- case REOP_FLAT1:
2654
- matchCh = *pc++;
2655
- re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp);
2656
- if (x->cp != gData->cpend && *x->cp == matchCh) {
2657
- result = x;
2658
- result->cp++;
2659
- }
2660
- break;
2661
- case REOP_FLATi:
2662
- pc = ReadCompactIndex(pc, &offset);
2663
- JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
2664
- pc = ReadCompactIndex(pc, &length);
2665
- JS_ASSERT(1 <= length);
2666
- JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
2667
- source = JSSTRING_CHARS(gData->regexp->source);
2668
- result = FlatNIMatcher(gData, x, source + offset, length);
2669
- break;
2670
- case REOP_FLAT1i:
2671
- matchCh = *pc++;
2672
- if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2673
- result = x;
2674
- result->cp++;
2675
- }
2676
- break;
2677
- case REOP_UCFLAT1:
2678
- matchCh = GET_ARG(pc);
2679
- re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp);
2680
- pc += ARG_LEN;
2681
- if (x->cp != gData->cpend && *x->cp == matchCh) {
2682
- result = x;
2683
- result->cp++;
2684
- }
2685
- break;
2686
- case REOP_UCFLAT1i:
2687
- matchCh = GET_ARG(pc);
2688
- pc += ARG_LEN;
2689
- if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2690
- result = x;
2691
- result->cp++;
2692
- }
2693
- break;
2694
- case REOP_CLASS:
2695
- pc = ReadCompactIndex(pc, &index);
2696
- JS_ASSERT(index < gData->regexp->classCount);
2697
- if (x->cp != gData->cpend) {
2698
- charSet = &gData->regexp->classList[index];
2699
- JS_ASSERT(charSet->converted);
2700
- ch = *x->cp;
2701
- index = ch >> 3;
2702
- if (charSet->length != 0 &&
2703
- ch <= charSet->length &&
2704
- (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2705
- result = x;
2706
- result->cp++;
2707
- }
2708
- }
2709
- break;
2710
- case REOP_NCLASS:
2711
- pc = ReadCompactIndex(pc, &index);
2712
- JS_ASSERT(index < gData->regexp->classCount);
2713
- if (x->cp != gData->cpend) {
2714
- charSet = &gData->regexp->classList[index];
2715
- JS_ASSERT(charSet->converted);
2716
- ch = *x->cp;
2717
- index = ch >> 3;
2718
- if (charSet->length == 0 ||
2719
- ch > charSet->length ||
2720
- !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2721
- result = x;
2722
- result->cp++;
2723
- }
2724
- }
2725
- break;
2726
-
2727
- default:
2728
- JS_ASSERT(JS_FALSE);
2729
- }
2730
- if (result) {
2731
- if (!updatecp)
2732
- x->cp = startcp;
2733
- *startpc = pc;
2734
- re_debug(" * ");
2735
- return result;
2736
- }
2737
- x->cp = startcp;
2738
- return NULL;
2739
- }
2740
-
2741
- static JS_INLINE REMatchState *
2742
- ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
2743
- {
2744
- REMatchState *result = NULL;
2745
- REBackTrackData *backTrackData;
2746
- jsbytecode *nextpc, *testpc;
2747
- REOp nextop;
2748
- RECapture *cap;
2749
- REProgState *curState;
2750
- const jschar *startcp;
2751
- size_t parenIndex, k;
2752
- size_t parenSoFar = 0;
2753
-
2754
- jschar matchCh1, matchCh2;
2755
- RECharSet *charSet;
2756
-
2757
- JSBool anchor;
2758
- jsbytecode *pc = gData->regexp->program;
2759
- REOp op = (REOp) *pc++;
2760
-
2761
- /*
2762
- * If the first node is a simple match, step the index into the string
2763
- * until that match is made, or fail if it can't be found at all.
2764
- */
2765
- if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) {
2766
- anchor = JS_FALSE;
2767
- while (x->cp <= gData->cpend) {
2768
- nextpc = pc; /* reset back to start each time */
2769
- result = SimpleMatch(gData, x, op, &nextpc, JS_TRUE);
2770
- if (result) {
2771
- anchor = JS_TRUE;
2772
- x = result;
2773
- pc = nextpc; /* accept skip to next opcode */
2774
- op = (REOp) *pc++;
2775
- JS_ASSERT(op < REOP_LIMIT);
2776
- break;
2777
- }
2778
- gData->skipped++;
2779
- x->cp++;
2780
- }
2781
- if (!anchor)
2782
- goto bad;
2783
- }
2784
-
2785
- for (;;) {
2786
- #ifdef REGEXP_DEBUG
2787
- const char *opname = reop_names[op];
2788
- re_debug("\n%06d: %*s%s", pc - gData->regexp->program,
2789
- gData->stateStackTop * 2, "", opname);
2790
- #endif
2791
- if (REOP_IS_SIMPLE(op)) {
2792
- result = SimpleMatch(gData, x, op, &pc, JS_TRUE);
2793
- } else {
2794
- curState = &gData->stateStack[gData->stateStackTop];
2795
- switch (op) {
2796
- case REOP_END:
2797
- goto good;
2798
- case REOP_ALTPREREQ2:
2799
- nextpc = pc + GET_OFFSET(pc); /* start of next op */
2800
- pc += ARG_LEN;
2801
- matchCh2 = GET_ARG(pc);
2802
- pc += ARG_LEN;
2803
- k = GET_ARG(pc);
2804
- pc += ARG_LEN;
2805
-
2806
- if (x->cp != gData->cpend) {
2807
- if (*x->cp == matchCh2)
2808
- goto doAlt;
2809
-
2810
- charSet = &gData->regexp->classList[k];
2811
- if (!charSet->converted && !ProcessCharSet(gData, charSet))
2812
- goto bad;
2813
- matchCh1 = *x->cp;
2814
- k = matchCh1 >> 3;
2815
- if ((charSet->length == 0 ||
2816
- matchCh1 > charSet->length ||
2817
- !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2818
- charSet->sense) {
2819
- goto doAlt;
2820
- }
2821
- }
2822
- result = NULL;
2823
- break;
2824
-
2825
- case REOP_ALTPREREQ:
2826
- nextpc = pc + GET_OFFSET(pc); /* start of next op */
2827
- pc += ARG_LEN;
2828
- matchCh1 = GET_ARG(pc);
2829
- pc += ARG_LEN;
2830
- matchCh2 = GET_ARG(pc);
2831
- pc += ARG_LEN;
2832
- if (x->cp == gData->cpend ||
2833
- (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2834
- result = NULL;
2835
- break;
2836
- }
2837
- /* else false thru... */
2838
-
2839
- case REOP_ALT:
2840
- doAlt:
2841
- nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2842
- pc += ARG_LEN; /* start of this alternate */
2843
- curState->parenSoFar = parenSoFar;
2844
- PUSH_STATE_STACK(gData);
2845
- op = (REOp) *pc++;
2846
- startcp = x->cp;
2847
- if (REOP_IS_SIMPLE(op)) {
2848
- if (!SimpleMatch(gData, x, op, &pc, JS_TRUE)) {
2849
- op = (REOp) *nextpc++;
2850
- pc = nextpc;
2851
- continue;
2852
- }
2853
- result = x;
2854
- op = (REOp) *pc++;
2855
- }
2856
- nextop = (REOp) *nextpc++;
2857
- if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2858
- goto bad;
2859
- continue;
2860
-
2861
- /*
2862
- * Occurs at (successful) end of REOP_ALT,
2863
- */
2864
- case REOP_JUMP:
2865
- /*
2866
- * If we have not gotten a result here, it is because of an
2867
- * empty match. Do the same thing REOP_EMPTY would do.
2868
- */
2869
- if (!result)
2870
- result = x;
2871
-
2872
- --gData->stateStackTop;
2873
- pc += GET_OFFSET(pc);
2874
- op = (REOp) *pc++;
2875
- continue;
2876
-
2877
- /*
2878
- * Occurs at last (successful) end of REOP_ALT,
2879
- */
2880
- case REOP_ENDALT:
2881
- /*
2882
- * If we have not gotten a result here, it is because of an
2883
- * empty match. Do the same thing REOP_EMPTY would do.
2884
- */
2885
- if (!result)
2886
- result = x;
2887
-
2888
- --gData->stateStackTop;
2889
- op = (REOp) *pc++;
2890
- continue;
2891
-
2892
- case REOP_LPAREN:
2893
- pc = ReadCompactIndex(pc, &parenIndex);
2894
- re_debug("[ %lu ]", (unsigned long) parenIndex);
2895
- JS_ASSERT(parenIndex < gData->regexp->parenCount);
2896
- if (parenIndex + 1 > parenSoFar)
2897
- parenSoFar = parenIndex + 1;
2898
- x->parens[parenIndex].index = x->cp - gData->cpbegin;
2899
- x->parens[parenIndex].length = 0;
2900
- op = (REOp) *pc++;
2901
- continue;
2902
-
2903
- case REOP_RPAREN:
2904
- {
2905
- ptrdiff_t delta;
2906
-
2907
- pc = ReadCompactIndex(pc, &parenIndex);
2908
- JS_ASSERT(parenIndex < gData->regexp->parenCount);
2909
- cap = &x->parens[parenIndex];
2910
- delta = x->cp - (gData->cpbegin + cap->index);
2911
- cap->length = (delta < 0) ? 0 : (size_t) delta;
2912
- op = (REOp) *pc++;
2913
-
2914
- if (!result)
2915
- result = x;
2916
- continue;
2917
- }
2918
- case REOP_ASSERT:
2919
- nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2920
- pc += ARG_LEN; /* start of ASSERT child */
2921
- op = (REOp) *pc++;
2922
- testpc = pc;
2923
- if (REOP_IS_SIMPLE(op) &&
2924
- !SimpleMatch(gData, x, op, &testpc, JS_FALSE)) {
2925
- result = NULL;
2926
- break;
2927
- }
2928
- curState->u.assertion.top =
2929
- (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2930
- curState->u.assertion.sz = gData->cursz;
2931
- curState->index = x->cp - gData->cpbegin;
2932
- curState->parenSoFar = parenSoFar;
2933
- PUSH_STATE_STACK(gData);
2934
- if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2935
- nextpc, x, x->cp, 0, 0)) {
2936
- goto bad;
2937
- }
2938
- continue;
2939
-
2940
- case REOP_ASSERT_NOT:
2941
- nextpc = pc + GET_OFFSET(pc);
2942
- pc += ARG_LEN;
2943
- op = (REOp) *pc++;
2944
- testpc = pc;
2945
- if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2946
- SimpleMatch(gData, x, op, &testpc, JS_FALSE) &&
2947
- *testpc == REOP_ASSERTNOTTEST) {
2948
- result = NULL;
2949
- break;
2950
- }
2951
- curState->u.assertion.top
2952
- = (char *)gData->backTrackSP -
2953
- (char *)gData->backTrackStack;
2954
- curState->u.assertion.sz = gData->cursz;
2955
- curState->index = x->cp - gData->cpbegin;
2956
- curState->parenSoFar = parenSoFar;
2957
- PUSH_STATE_STACK(gData);
2958
- if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2959
- nextpc, x, x->cp, 0, 0)) {
2960
- goto bad;
2961
- }
2962
- continue;
2963
-
2964
- case REOP_ASSERTTEST:
2965
- --gData->stateStackTop;
2966
- --curState;
2967
- x->cp = gData->cpbegin + curState->index;
2968
- gData->backTrackSP =
2969
- (REBackTrackData *) ((char *)gData->backTrackStack +
2970
- curState->u.assertion.top);
2971
- gData->cursz = curState->u.assertion.sz;
2972
- if (result)
2973
- result = x;
2974
- break;
2975
-
2976
- case REOP_ASSERTNOTTEST:
2977
- --gData->stateStackTop;
2978
- --curState;
2979
- x->cp = gData->cpbegin + curState->index;
2980
- gData->backTrackSP =
2981
- (REBackTrackData *) ((char *)gData->backTrackStack +
2982
- curState->u.assertion.top);
2983
- gData->cursz = curState->u.assertion.sz;
2984
- result = (!result) ? x : NULL;
2985
- break;
2986
- case REOP_STAR:
2987
- curState->u.quantifier.min = 0;
2988
- curState->u.quantifier.max = (uintN)-1;
2989
- goto quantcommon;
2990
- case REOP_PLUS:
2991
- curState->u.quantifier.min = 1;
2992
- curState->u.quantifier.max = (uintN)-1;
2993
- goto quantcommon;
2994
- case REOP_OPT:
2995
- curState->u.quantifier.min = 0;
2996
- curState->u.quantifier.max = 1;
2997
- goto quantcommon;
2998
- case REOP_QUANT:
2999
- pc = ReadCompactIndex(pc, &k);
3000
- curState->u.quantifier.min = k;
3001
- pc = ReadCompactIndex(pc, &k);
3002
- /* max is k - 1 to use one byte for (uintN)-1 sentinel. */
3003
- curState->u.quantifier.max = k - 1;
3004
- JS_ASSERT(curState->u.quantifier.min
3005
- <= curState->u.quantifier.max);
3006
- quantcommon:
3007
- if (curState->u.quantifier.max == 0) {
3008
- pc = pc + GET_OFFSET(pc);
3009
- op = (REOp) *pc++;
3010
- result = x;
3011
- continue;
3012
- }
3013
- /* Step over <next> */
3014
- nextpc = pc + ARG_LEN;
3015
- op = (REOp) *nextpc++;
3016
- startcp = x->cp;
3017
- if (REOP_IS_SIMPLE(op)) {
3018
- if (!SimpleMatch(gData, x, op, &nextpc, JS_TRUE)) {
3019
- if (curState->u.quantifier.min == 0)
3020
- result = x;
3021
- else
3022
- result = NULL;
3023
- pc = pc + GET_OFFSET(pc);
3024
- break;
3025
- }
3026
- op = (REOp) *nextpc++;
3027
- result = x;
3028
- }
3029
- curState->index = startcp - gData->cpbegin;
3030
- curState->continue_op = REOP_REPEAT;
3031
- curState->continue_pc = pc;
3032
- curState->parenSoFar = parenSoFar;
3033
- PUSH_STATE_STACK(gData);
3034
- if (curState->u.quantifier.min == 0 &&
3035
- !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
3036
- 0, 0)) {
3037
- goto bad;
3038
- }
3039
- pc = nextpc;
3040
- continue;
3041
-
3042
- case REOP_ENDCHILD: /* marks the end of a quantifier child */
3043
- pc = curState[-1].continue_pc;
3044
- op = (REOp) curState[-1].continue_op;
3045
-
3046
- if (!result)
3047
- result = x;
3048
- continue;
3049
-
3050
- case REOP_REPEAT:
3051
- --curState;
3052
- do {
3053
- --gData->stateStackTop;
3054
- if (!result) {
3055
- /* Failed, see if we have enough children. */
3056
- if (curState->u.quantifier.min == 0)
3057
- goto repeatDone;
3058
- goto break_switch;
3059
- }
3060
- if (curState->u.quantifier.min == 0 &&
3061
- x->cp == gData->cpbegin + curState->index) {
3062
- /* matched an empty string, that'll get us nowhere */
3063
- result = NULL;
3064
- goto break_switch;
3065
- }
3066
- if (curState->u.quantifier.min != 0)
3067
- curState->u.quantifier.min--;
3068
- if (curState->u.quantifier.max != (uintN) -1)
3069
- curState->u.quantifier.max--;
3070
- if (curState->u.quantifier.max == 0)
3071
- goto repeatDone;
3072
- nextpc = pc + ARG_LEN;
3073
- nextop = (REOp) *nextpc;
3074
- startcp = x->cp;
3075
- if (REOP_IS_SIMPLE(nextop)) {
3076
- nextpc++;
3077
- if (!SimpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) {
3078
- if (curState->u.quantifier.min == 0)
3079
- goto repeatDone;
3080
- result = NULL;
3081
- goto break_switch;
3082
- }
3083
- result = x;
3084
- }
3085
- curState->index = startcp - gData->cpbegin;
3086
- PUSH_STATE_STACK(gData);
3087
- if (curState->u.quantifier.min == 0 &&
3088
- !PushBackTrackState(gData, REOP_REPEAT,
3089
- pc, x, startcp,
3090
- curState->parenSoFar,
3091
- parenSoFar -
3092
- curState->parenSoFar)) {
3093
- goto bad;
3094
- }
3095
- } while (*nextpc == REOP_ENDCHILD);
3096
- pc = nextpc;
3097
- op = (REOp) *pc++;
3098
- parenSoFar = curState->parenSoFar;
3099
- continue;
3100
-
3101
- repeatDone:
3102
- result = x;
3103
- pc += GET_OFFSET(pc);
3104
- goto break_switch;
3105
-
3106
- case REOP_MINIMALSTAR:
3107
- curState->u.quantifier.min = 0;
3108
- curState->u.quantifier.max = (uintN)-1;
3109
- goto minimalquantcommon;
3110
- case REOP_MINIMALPLUS:
3111
- curState->u.quantifier.min = 1;
3112
- curState->u.quantifier.max = (uintN)-1;
3113
- goto minimalquantcommon;
3114
- case REOP_MINIMALOPT:
3115
- curState->u.quantifier.min = 0;
3116
- curState->u.quantifier.max = 1;
3117
- goto minimalquantcommon;
3118
- case REOP_MINIMALQUANT:
3119
- pc = ReadCompactIndex(pc, &k);
3120
- curState->u.quantifier.min = k;
3121
- pc = ReadCompactIndex(pc, &k);
3122
- /* See REOP_QUANT comments about k - 1. */
3123
- curState->u.quantifier.max = k - 1;
3124
- JS_ASSERT(curState->u.quantifier.min
3125
- <= curState->u.quantifier.max);
3126
- minimalquantcommon:
3127
- curState->index = x->cp - gData->cpbegin;
3128
- curState->parenSoFar = parenSoFar;
3129
- PUSH_STATE_STACK(gData);
3130
- if (curState->u.quantifier.min != 0) {
3131
- curState->continue_op = REOP_MINIMALREPEAT;
3132
- curState->continue_pc = pc;
3133
- /* step over <next> */
3134
- pc += OFFSET_LEN;
3135
- op = (REOp) *pc++;
3136
- } else {
3137
- if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3138
- pc, x, x->cp, 0, 0)) {
3139
- goto bad;
3140
- }
3141
- --gData->stateStackTop;
3142
- pc = pc + GET_OFFSET(pc);
3143
- op = (REOp) *pc++;
3144
- }
3145
- continue;
3146
-
3147
- case REOP_MINIMALREPEAT:
3148
- --gData->stateStackTop;
3149
- --curState;
3150
-
3151
- re_debug("{%d,%d}", curState->u.quantifier.min,
3152
- curState->u.quantifier.max);
3153
- #define PREPARE_REPEAT() \
3154
- JS_BEGIN_MACRO \
3155
- curState->index = x->cp - gData->cpbegin; \
3156
- curState->continue_op = REOP_MINIMALREPEAT; \
3157
- curState->continue_pc = pc; \
3158
- pc += ARG_LEN; \
3159
- for (k = curState->parenSoFar; k < parenSoFar; k++) \
3160
- x->parens[k].index = -1; \
3161
- PUSH_STATE_STACK(gData); \
3162
- op = (REOp) *pc++; \
3163
- JS_ASSERT(op < REOP_LIMIT); \
3164
- JS_END_MACRO
3165
-
3166
- if (!result) {
3167
- re_debug(" - ");
3168
- /*
3169
- * Non-greedy failure - try to consume another child.
3170
- */
3171
- if (curState->u.quantifier.max == (uintN) -1 ||
3172
- curState->u.quantifier.max > 0) {
3173
- PREPARE_REPEAT();
3174
- continue;
3175
- }
3176
- /* Don't need to adjust pc since we're going to pop. */
3177
- break;
3178
- }
3179
- if (curState->u.quantifier.min == 0 &&
3180
- x->cp == gData->cpbegin + curState->index) {
3181
- /* Matched an empty string, that'll get us nowhere. */
3182
- result = NULL;
3183
- break;
3184
- }
3185
- if (curState->u.quantifier.min != 0)
3186
- curState->u.quantifier.min--;
3187
- if (curState->u.quantifier.max != (uintN) -1)
3188
- curState->u.quantifier.max--;
3189
- if (curState->u.quantifier.min != 0) {
3190
- PREPARE_REPEAT();
3191
- continue;
3192
- }
3193
- curState->index = x->cp - gData->cpbegin;
3194
- curState->parenSoFar = parenSoFar;
3195
- PUSH_STATE_STACK(gData);
3196
- if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3197
- pc, x, x->cp,
3198
- curState->parenSoFar,
3199
- parenSoFar - curState->parenSoFar)) {
3200
- goto bad;
3201
- }
3202
- --gData->stateStackTop;
3203
- pc = pc + GET_OFFSET(pc);
3204
- op = (REOp) *pc++;
3205
- JS_ASSERT(op < REOP_LIMIT);
3206
- continue;
3207
- default:
3208
- JS_ASSERT(JS_FALSE);
3209
- result = NULL;
3210
- }
3211
- break_switch:;
3212
- }
3213
-
3214
- /*
3215
- * If the match failed and there's a backtrack option, take it.
3216
- * Otherwise this is a complete and utter failure.
3217
- */
3218
- if (!result) {
3219
- if (gData->cursz == 0)
3220
- return NULL;
3221
- if (!JS_CHECK_OPERATION_LIMIT(gData->cx, JSOW_JUMP)) {
3222
- gData->ok = JS_FALSE;
3223
- return NULL;
3224
- }
3225
-
3226
- /* Potentially detect explosive regex here. */
3227
- gData->backTrackCount++;
3228
- if (gData->backTrackLimit &&
3229
- gData->backTrackCount >= gData->backTrackLimit) {
3230
- JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3231
- JSMSG_REGEXP_TOO_COMPLEX);
3232
- gData->ok = JS_FALSE;
3233
- return NULL;
3234
- }
3235
-
3236
- backTrackData = gData->backTrackSP;
3237
- gData->cursz = backTrackData->sz;
3238
- gData->backTrackSP =
3239
- (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3240
- x->cp = backTrackData->cp;
3241
- pc = backTrackData->backtrack_pc;
3242
- op = (REOp) backTrackData->backtrack_op;
3243
- JS_ASSERT(op < REOP_LIMIT);
3244
- gData->stateStackTop = backTrackData->saveStateStackTop;
3245
- JS_ASSERT(gData->stateStackTop);
3246
-
3247
- memcpy(gData->stateStack, backTrackData + 1,
3248
- sizeof(REProgState) * backTrackData->saveStateStackTop);
3249
- curState = &gData->stateStack[gData->stateStackTop - 1];
3250
-
3251
- if (backTrackData->parenCount) {
3252
- memcpy(&x->parens[backTrackData->parenIndex],
3253
- (char *)(backTrackData + 1) +
3254
- sizeof(REProgState) * backTrackData->saveStateStackTop,
3255
- sizeof(RECapture) * backTrackData->parenCount);
3256
- parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3257
- } else {
3258
- for (k = curState->parenSoFar; k < parenSoFar; k++)
3259
- x->parens[k].index = -1;
3260
- parenSoFar = curState->parenSoFar;
3261
- }
3262
-
3263
- re_debug("\tBT_Pop: %ld,%ld",
3264
- (unsigned long) backTrackData->parenIndex,
3265
- (unsigned long) backTrackData->parenCount);
3266
- continue;
3267
- }
3268
- x = result;
3269
-
3270
- /*
3271
- * Continue with the expression.
3272
- */
3273
- op = (REOp)*pc++;
3274
- JS_ASSERT(op < REOP_LIMIT);
3275
- }
3276
-
3277
- bad:
3278
- re_debug("\n");
3279
- return NULL;
3280
-
3281
- good:
3282
- re_debug("\n");
3283
- return x;
3284
- }
3285
-
3286
- static REMatchState *
3287
- MatchRegExp(REGlobalData *gData, REMatchState *x)
3288
- {
3289
- REMatchState *result;
3290
- const jschar *cp = x->cp;
3291
- const jschar *cp2;
3292
- uintN j;
3293
-
3294
- /*
3295
- * Have to include the position beyond the last character
3296
- * in order to detect end-of-input/line condition.
3297
- */
3298
- for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3299
- gData->skipped = cp2 - cp;
3300
- x->cp = cp2;
3301
- for (j = 0; j < gData->regexp->parenCount; j++)
3302
- x->parens[j].index = -1;
3303
- result = ExecuteREBytecode(gData, x);
3304
- if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY))
3305
- return result;
3306
- gData->backTrackSP = gData->backTrackStack;
3307
- gData->cursz = 0;
3308
- gData->stateStackTop = 0;
3309
- cp2 = cp + gData->skipped;
3310
- }
3311
- return NULL;
3312
- }
3313
-
3314
- #define MIN_BACKTRACK_LIMIT 400000
3315
-
3316
- static REMatchState *
3317
- InitMatch(JSContext *cx, REGlobalData *gData, JSRegExp *re, size_t length)
3318
- {
3319
- REMatchState *result;
3320
- uintN i;
3321
-
3322
- gData->backTrackStackSize = INITIAL_BACKTRACK;
3323
- JS_ARENA_ALLOCATE_CAST(gData->backTrackStack, REBackTrackData *,
3324
- &gData->pool,
3325
- INITIAL_BACKTRACK);
3326
- if (!gData->backTrackStack)
3327
- goto bad;
3328
-
3329
- gData->backTrackSP = gData->backTrackStack;
3330
- gData->cursz = 0;
3331
- gData->backTrackCount = 0;
3332
- gData->backTrackLimit = 0;
3333
- if (JS_GetOptions(cx) & JSOPTION_RELIMIT) {
3334
- gData->backTrackLimit = length * length * length; /* O(n^3) */
3335
- if (gData->backTrackLimit < MIN_BACKTRACK_LIMIT)
3336
- gData->backTrackLimit = MIN_BACKTRACK_LIMIT;
3337
- }
3338
-
3339
- gData->stateStackLimit = INITIAL_STATESTACK;
3340
- JS_ARENA_ALLOCATE_CAST(gData->stateStack, REProgState *,
3341
- &gData->pool,
3342
- sizeof(REProgState) * INITIAL_STATESTACK);
3343
- if (!gData->stateStack)
3344
- goto bad;
3345
-
3346
- gData->stateStackTop = 0;
3347
- gData->cx = cx;
3348
- gData->regexp = re;
3349
- gData->ok = JS_TRUE;
3350
-
3351
- JS_ARENA_ALLOCATE_CAST(result, REMatchState *,
3352
- &gData->pool,
3353
- offsetof(REMatchState, parens)
3354
- + re->parenCount * sizeof(RECapture));
3355
- if (!result)
3356
- goto bad;
3357
-
3358
- for (i = 0; i < re->classCount; i++) {
3359
- if (!re->classList[i].converted &&
3360
- !ProcessCharSet(gData, &re->classList[i])) {
3361
- return NULL;
3362
- }
3363
- }
3364
-
3365
- return result;
3366
-
3367
- bad:
3368
- js_ReportOutOfScriptQuota(cx);
3369
- gData->ok = JS_FALSE;
3370
- return NULL;
3371
- }
3372
-
3373
- JSBool
3374
- js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
3375
- JSBool test, jsval *rval)
3376
- {
3377
- REGlobalData gData;
3378
- REMatchState *x, *result;
3379
-
3380
- const jschar *cp, *ep;
3381
- size_t i, length, start;
3382
- JSSubString *morepar;
3383
- JSBool ok;
3384
- JSRegExpStatics *res;
3385
- ptrdiff_t matchlen;
3386
- uintN num, morenum;
3387
- JSString *parstr, *matchstr;
3388
- JSObject *obj;
3389
-
3390
- RECapture *parsub = NULL;
3391
-
3392
- /*
3393
- * It's safe to load from cp because JSStrings have a zero at the end,
3394
- * and we never let cp get beyond cpend.
3395
- */
3396
- start = *indexp;
3397
- JSSTRING_CHARS_AND_LENGTH(str, cp, length);
3398
- if (start > length)
3399
- start = length;
3400
- gData.cpbegin = cp;
3401
- gData.cpend = cp + length;
3402
- cp += start;
3403
- gData.start = start;
3404
- gData.skipped = 0;
3405
-
3406
- /*
3407
- * To avoid multiple allocations in InitMatch(), the arena size parameter
3408
- * should be at least as big as:
3409
- * INITIAL_BACKTRACK
3410
- * + (sizeof(REProgState) * INITIAL_STATESTACK)
3411
- * + (offsetof(REMatchState, parens) + avgParanSize * sizeof(RECapture))
3412
- */
3413
- JS_INIT_ARENA_POOL(&gData.pool, "RegExpPool", 12288, 4,
3414
- &cx->scriptStackQuota);
3415
- x = InitMatch(cx, &gData, re, length);
3416
-
3417
- if (!x) {
3418
- ok = JS_FALSE;
3419
- goto out;
3420
- }
3421
- x->cp = cp;
3422
-
3423
- /*
3424
- * Call the recursive matcher to do the real work. Return null on mismatch
3425
- * whether testing or not. On match, return an extended Array object.
3426
- */
3427
- result = MatchRegExp(&gData, x);
3428
- ok = gData.ok;
3429
- if (!ok)
3430
- goto out;
3431
- if (!result) {
3432
- *rval = JSVAL_NULL;
3433
- goto out;
3434
- }
3435
- cp = result->cp;
3436
- i = cp - gData.cpbegin;
3437
- *indexp = i;
3438
- matchlen = i - (start + gData.skipped);
3439
- ep = cp;
3440
- cp -= matchlen;
3441
-
3442
- if (test) {
3443
- /*
3444
- * Testing for a match and updating cx->regExpStatics: don't allocate
3445
- * an array object, do return true.
3446
- */
3447
- *rval = JSVAL_TRUE;
3448
-
3449
- /* Avoid warning. (gcc doesn't detect that obj is needed iff !test); */
3450
- obj = NULL;
3451
- } else {
3452
- /*
3453
- * The array returned on match has element 0 bound to the matched
3454
- * string, elements 1 through state.parenCount bound to the paren
3455
- * matches, an index property telling the length of the left context,
3456
- * and an input property referring to the input string.
3457
- */
3458
- obj = js_NewSlowArrayObject(cx);
3459
- if (!obj) {
3460
- ok = JS_FALSE;
3461
- goto out;
3462
- }
3463
- *rval = OBJECT_TO_JSVAL(obj);
3464
-
3465
- #define DEFVAL(val, id) { \
3466
- ok = js_DefineProperty(cx, obj, id, val, \
3467
- JS_PropertyStub, JS_PropertyStub, \
3468
- JSPROP_ENUMERATE, NULL); \
3469
- if (!ok) { \
3470
- cx->weakRoots.newborn[GCX_OBJECT] = NULL; \
3471
- cx->weakRoots.newborn[GCX_STRING] = NULL; \
3472
- goto out; \
3473
- } \
3474
- }
3475
-
3476
- matchstr = js_NewStringCopyN(cx, cp, matchlen);
3477
- if (!matchstr) {
3478
- cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3479
- ok = JS_FALSE;
3480
- goto out;
3481
- }
3482
- DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSID(0));
3483
- }
3484
-
3485
- res = &cx->regExpStatics;
3486
- res->input = str;
3487
- res->parenCount = re->parenCount;
3488
- if (re->parenCount == 0) {
3489
- res->lastParen = js_EmptySubString;
3490
- } else {
3491
- for (num = 0; num < re->parenCount; num++) {
3492
- parsub = &result->parens[num];
3493
- if (num < 9) {
3494
- if (parsub->index == -1) {
3495
- res->parens[num].chars = NULL;
3496
- res->parens[num].length = 0;
3497
- } else {
3498
- res->parens[num].chars = gData.cpbegin + parsub->index;
3499
- res->parens[num].length = parsub->length;
3500
- }
3501
- } else {
3502
- morenum = num - 9;
3503
- morepar = res->moreParens;
3504
- if (!morepar) {
3505
- res->moreLength = 10;
3506
- morepar = (JSSubString*)
3507
- JS_malloc(cx, 10 * sizeof(JSSubString));
3508
- } else if (morenum >= res->moreLength) {
3509
- res->moreLength += 10;
3510
- morepar = (JSSubString*)
3511
- JS_realloc(cx, morepar,
3512
- res->moreLength * sizeof(JSSubString));
3513
- }
3514
- if (!morepar) {
3515
- cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3516
- cx->weakRoots.newborn[GCX_STRING] = NULL;
3517
- ok = JS_FALSE;
3518
- goto out;
3519
- }
3520
- res->moreParens = morepar;
3521
- if (parsub->index == -1) {
3522
- morepar[morenum].chars = NULL;
3523
- morepar[morenum].length = 0;
3524
- } else {
3525
- morepar[morenum].chars = gData.cpbegin + parsub->index;
3526
- morepar[morenum].length = parsub->length;
3527
- }
3528
- }
3529
- if (test)
3530
- continue;
3531
- if (parsub->index == -1) {
3532
- ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
3533
- JSVAL_VOID, NULL, NULL,
3534
- JSPROP_ENUMERATE, NULL);
3535
- } else {
3536
- parstr = js_NewStringCopyN(cx, gData.cpbegin + parsub->index,
3537
- parsub->length);
3538
- if (!parstr) {
3539
- cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3540
- cx->weakRoots.newborn[GCX_STRING] = NULL;
3541
- ok = JS_FALSE;
3542
- goto out;
3543
- }
3544
- ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
3545
- STRING_TO_JSVAL(parstr), NULL, NULL,
3546
- JSPROP_ENUMERATE, NULL);
3547
- }
3548
- if (!ok) {
3549
- cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3550
- cx->weakRoots.newborn[GCX_STRING] = NULL;
3551
- goto out;
3552
- }
3553
- }
3554
- if (parsub->index == -1) {
3555
- res->lastParen = js_EmptySubString;
3556
- } else {
3557
- res->lastParen.chars = gData.cpbegin + parsub->index;
3558
- res->lastParen.length = parsub->length;
3559
- }
3560
- }
3561
-
3562
- if (!test) {
3563
- /*
3564
- * Define the index and input properties last for better for/in loop
3565
- * order (so they come after the elements).
3566
- */
3567
- DEFVAL(INT_TO_JSVAL(start + gData.skipped),
3568
- ATOM_TO_JSID(cx->runtime->atomState.indexAtom));
3569
- DEFVAL(STRING_TO_JSVAL(str),
3570
- ATOM_TO_JSID(cx->runtime->atomState.inputAtom));
3571
- }
3572
-
3573
- #undef DEFVAL
3574
-
3575
- res->lastMatch.chars = cp;
3576
- res->lastMatch.length = matchlen;
3577
-
3578
- /*
3579
- * For JS1.3 and ECMAv2, emulate Perl5 exactly:
3580
- *
3581
- * js1.3 "hi", "hi there" "hihitherehi therebye"
3582
- */
3583
- res->leftContext.chars = JSSTRING_CHARS(str);
3584
- res->leftContext.length = start + gData.skipped;
3585
- res->rightContext.chars = ep;
3586
- res->rightContext.length = gData.cpend - ep;
3587
-
3588
- out:
3589
- JS_FinishArenaPool(&gData.pool);
3590
- return ok;
3591
- }
3592
-
3593
- /************************************************************************/
3594
-
3595
- enum regexp_tinyid {
3596
- REGEXP_SOURCE = -1,
3597
- REGEXP_GLOBAL = -2,
3598
- REGEXP_IGNORE_CASE = -3,
3599
- REGEXP_LAST_INDEX = -4,
3600
- REGEXP_MULTILINE = -5,
3601
- REGEXP_STICKY = -6
3602
- };
3603
-
3604
- #define REGEXP_PROP_ATTRS (JSPROP_PERMANENT | JSPROP_SHARED)
3605
- #define RO_REGEXP_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_READONLY)
3606
-
3607
- static JSPropertySpec regexp_props[] = {
3608
- {"source", REGEXP_SOURCE, RO_REGEXP_PROP_ATTRS,0,0},
3609
- {"global", REGEXP_GLOBAL, RO_REGEXP_PROP_ATTRS,0,0},
3610
- {"ignoreCase", REGEXP_IGNORE_CASE, RO_REGEXP_PROP_ATTRS,0,0},
3611
- {"lastIndex", REGEXP_LAST_INDEX, REGEXP_PROP_ATTRS,0,0},
3612
- {"multiline", REGEXP_MULTILINE, RO_REGEXP_PROP_ATTRS,0,0},
3613
- {"sticky", REGEXP_STICKY, RO_REGEXP_PROP_ATTRS,0,0},
3614
- {0,0,0,0,0}
3615
- };
3616
-
3617
- static JSBool
3618
- regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3619
- {
3620
- jsint slot;
3621
- JSRegExp *re;
3622
-
3623
- if (!JSVAL_IS_INT(id))
3624
- return JS_TRUE;
3625
- while (OBJ_GET_CLASS(cx, obj) != &js_RegExpClass) {
3626
- obj = OBJ_GET_PROTO(cx, obj);
3627
- if (!obj)
3628
- return JS_TRUE;
3629
- }
3630
- slot = JSVAL_TO_INT(id);
3631
- if (slot == REGEXP_LAST_INDEX)
3632
- return JS_GetReservedSlot(cx, obj, 0, vp);
3633
-
3634
- JS_LOCK_OBJ(cx, obj);
3635
- re = (JSRegExp *) JS_GetPrivate(cx, obj);
3636
- if (re) {
3637
- switch (slot) {
3638
- case REGEXP_SOURCE:
3639
- *vp = STRING_TO_JSVAL(re->source);
3640
- break;
3641
- case REGEXP_GLOBAL:
3642
- *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0);
3643
- break;
3644
- case REGEXP_IGNORE_CASE:
3645
- *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0);
3646
- break;
3647
- case REGEXP_MULTILINE:
3648
- *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0);
3649
- break;
3650
- case REGEXP_STICKY:
3651
- *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_STICKY) != 0);
3652
- break;
3653
- }
3654
- }
3655
- JS_UNLOCK_OBJ(cx, obj);
3656
- return JS_TRUE;
3657
- }
3658
-
3659
- static JSBool
3660
- regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3661
- {
3662
- JSBool ok;
3663
- jsint slot;
3664
- jsdouble lastIndex;
3665
-
3666
- ok = JS_TRUE;
3667
- if (!JSVAL_IS_INT(id))
3668
- return ok;
3669
- while (OBJ_GET_CLASS(cx, obj) != &js_RegExpClass) {
3670
- obj = OBJ_GET_PROTO(cx, obj);
3671
- if (!obj)
3672
- return JS_TRUE;
3673
- }
3674
- slot = JSVAL_TO_INT(id);
3675
- if (slot == REGEXP_LAST_INDEX) {
3676
- if (!JS_ValueToNumber(cx, *vp, &lastIndex))
3677
- return JS_FALSE;
3678
- lastIndex = js_DoubleToInteger(lastIndex);
3679
- ok = JS_NewNumberValue(cx, lastIndex, vp) &&
3680
- JS_SetReservedSlot(cx, obj, 0, *vp);
3681
- }
3682
- return ok;
3683
- }
3684
-
3685
- /*
3686
- * RegExp class static properties and their Perl counterparts:
3687
- *
3688
- * RegExp.input $_
3689
- * RegExp.multiline $*
3690
- * RegExp.lastMatch $&
3691
- * RegExp.lastParen $+
3692
- * RegExp.leftContext $`
3693
- * RegExp.rightContext $'
3694
- */
3695
- enum regexp_static_tinyid {
3696
- REGEXP_STATIC_INPUT = -1,
3697
- REGEXP_STATIC_MULTILINE = -2,
3698
- REGEXP_STATIC_LAST_MATCH = -3,
3699
- REGEXP_STATIC_LAST_PAREN = -4,
3700
- REGEXP_STATIC_LEFT_CONTEXT = -5,
3701
- REGEXP_STATIC_RIGHT_CONTEXT = -6
3702
- };
3703
-
3704
- JSBool
3705
- js_InitRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3706
- {
3707
- JS_ClearRegExpStatics(cx);
3708
- return js_AddRoot(cx, &res->input, "res->input");
3709
- }
3710
-
3711
- void
3712
- js_FreeRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3713
- {
3714
- if (res->moreParens) {
3715
- JS_free(cx, res->moreParens);
3716
- res->moreParens = NULL;
3717
- }
3718
- js_RemoveRoot(cx->runtime, &res->input);
3719
- }
3720
-
3721
- static JSBool
3722
- regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3723
- {
3724
- jsint slot;
3725
- JSRegExpStatics *res;
3726
- JSString *str;
3727
- JSSubString *sub;
3728
-
3729
- res = &cx->regExpStatics;
3730
- if (!JSVAL_IS_INT(id))
3731
- return JS_TRUE;
3732
- slot = JSVAL_TO_INT(id);
3733
- switch (slot) {
3734
- case REGEXP_STATIC_INPUT:
3735
- *vp = res->input ? STRING_TO_JSVAL(res->input)
3736
- : JS_GetEmptyStringValue(cx);
3737
- return JS_TRUE;
3738
- case REGEXP_STATIC_MULTILINE:
3739
- *vp = BOOLEAN_TO_JSVAL(res->multiline);
3740
- return JS_TRUE;
3741
- case REGEXP_STATIC_LAST_MATCH:
3742
- sub = &res->lastMatch;
3743
- break;
3744
- case REGEXP_STATIC_LAST_PAREN:
3745
- sub = &res->lastParen;
3746
- break;
3747
- case REGEXP_STATIC_LEFT_CONTEXT:
3748
- sub = &res->leftContext;
3749
- break;
3750
- case REGEXP_STATIC_RIGHT_CONTEXT:
3751
- sub = &res->rightContext;
3752
- break;
3753
- default:
3754
- sub = REGEXP_PAREN_SUBSTRING(res, slot);
3755
- break;
3756
- }
3757
- str = js_NewStringCopyN(cx, sub->chars, sub->length);
3758
- if (!str)
3759
- return JS_FALSE;
3760
- *vp = STRING_TO_JSVAL(str);
3761
- return JS_TRUE;
3762
- }
3763
-
3764
- static JSBool
3765
- regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3766
- {
3767
- JSRegExpStatics *res;
3768
-
3769
- if (!JSVAL_IS_INT(id))
3770
- return JS_TRUE;
3771
- res = &cx->regExpStatics;
3772
- /* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
3773
- if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) {
3774
- if (!JSVAL_IS_STRING(*vp) &&
3775
- !JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) {
3776
- return JS_FALSE;
3777
- }
3778
- res->input = JSVAL_TO_STRING(*vp);
3779
- } else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) {
3780
- if (!JSVAL_IS_BOOLEAN(*vp) &&
3781
- !JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) {
3782
- return JS_FALSE;
3783
- }
3784
- res->multiline = JSVAL_TO_BOOLEAN(*vp);
3785
- }
3786
- return JS_TRUE;
3787
- }
3788
- #define REGEXP_STATIC_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_ENUMERATE)
3789
- #define RO_REGEXP_STATIC_PROP_ATTRS (REGEXP_STATIC_PROP_ATTRS | JSPROP_READONLY)
3790
-
3791
- static JSPropertySpec regexp_static_props[] = {
3792
- {"input",
3793
- REGEXP_STATIC_INPUT,
3794
- REGEXP_STATIC_PROP_ATTRS,
3795
- regexp_static_getProperty, regexp_static_setProperty},
3796
- {"multiline",
3797
- REGEXP_STATIC_MULTILINE,
3798
- REGEXP_STATIC_PROP_ATTRS,
3799
- regexp_static_getProperty, regexp_static_setProperty},
3800
- {"lastMatch",
3801
- REGEXP_STATIC_LAST_MATCH,
3802
- RO_REGEXP_STATIC_PROP_ATTRS,
3803
- regexp_static_getProperty, regexp_static_getProperty},
3804
- {"lastParen",
3805
- REGEXP_STATIC_LAST_PAREN,
3806
- RO_REGEXP_STATIC_PROP_ATTRS,
3807
- regexp_static_getProperty, regexp_static_getProperty},
3808
- {"leftContext",
3809
- REGEXP_STATIC_LEFT_CONTEXT,
3810
- RO_REGEXP_STATIC_PROP_ATTRS,
3811
- regexp_static_getProperty, regexp_static_getProperty},
3812
- {"rightContext",
3813
- REGEXP_STATIC_RIGHT_CONTEXT,
3814
- RO_REGEXP_STATIC_PROP_ATTRS,
3815
- regexp_static_getProperty, regexp_static_getProperty},
3816
-
3817
- /* XXX should have block scope and local $1, etc. */
3818
- {"$1", 0, RO_REGEXP_STATIC_PROP_ATTRS,
3819
- regexp_static_getProperty, regexp_static_getProperty},
3820
- {"$2", 1, RO_REGEXP_STATIC_PROP_ATTRS,
3821
- regexp_static_getProperty, regexp_static_getProperty},
3822
- {"$3", 2, RO_REGEXP_STATIC_PROP_ATTRS,
3823
- regexp_static_getProperty, regexp_static_getProperty},
3824
- {"$4", 3, RO_REGEXP_STATIC_PROP_ATTRS,
3825
- regexp_static_getProperty, regexp_static_getProperty},
3826
- {"$5", 4, RO_REGEXP_STATIC_PROP_ATTRS,
3827
- regexp_static_getProperty, regexp_static_getProperty},
3828
- {"$6", 5, RO_REGEXP_STATIC_PROP_ATTRS,
3829
- regexp_static_getProperty, regexp_static_getProperty},
3830
- {"$7", 6, RO_REGEXP_STATIC_PROP_ATTRS,
3831
- regexp_static_getProperty, regexp_static_getProperty},
3832
- {"$8", 7, RO_REGEXP_STATIC_PROP_ATTRS,
3833
- regexp_static_getProperty, regexp_static_getProperty},
3834
- {"$9", 8, RO_REGEXP_STATIC_PROP_ATTRS,
3835
- regexp_static_getProperty, regexp_static_getProperty},
3836
-
3837
- {0,0,0,0,0}
3838
- };
3839
-
3840
- static void
3841
- regexp_finalize(JSContext *cx, JSObject *obj)
3842
- {
3843
- JSRegExp *re;
3844
-
3845
- re = (JSRegExp *) JS_GetPrivate(cx, obj);
3846
- if (!re)
3847
- return;
3848
- js_DestroyRegExp(cx, re);
3849
- }
3850
-
3851
- /* Forward static prototype. */
3852
- static JSBool
3853
- regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3854
- JSBool test, jsval *rval);
3855
-
3856
- static JSBool
3857
- regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
3858
- {
3859
- return regexp_exec_sub(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv,
3860
- JS_FALSE, rval);
3861
- }
3862
-
3863
- #if JS_HAS_XDR
3864
-
3865
- #include "jsxdrapi.h"
3866
-
3867
- static JSBool
3868
- regexp_xdrObject(JSXDRState *xdr, JSObject **objp)
3869
- {
3870
- JSRegExp *re;
3871
- JSString *source;
3872
- uint32 flagsword;
3873
- JSObject *obj;
3874
-
3875
- if (xdr->mode == JSXDR_ENCODE) {
3876
- re = (JSRegExp *) JS_GetPrivate(xdr->cx, *objp);
3877
- if (!re)
3878
- return JS_FALSE;
3879
- source = re->source;
3880
- flagsword = (uint32)re->flags;
3881
- }
3882
- if (!JS_XDRString(xdr, &source) ||
3883
- !JS_XDRUint32(xdr, &flagsword)) {
3884
- return JS_FALSE;
3885
- }
3886
- if (xdr->mode == JSXDR_DECODE) {
3887
- obj = js_NewObject(xdr->cx, &js_RegExpClass, NULL, NULL, 0);
3888
- if (!obj)
3889
- return JS_FALSE;
3890
- STOBJ_SET_PARENT(obj, NULL);
3891
- STOBJ_SET_PROTO(obj, NULL);
3892
- re = js_NewRegExp(xdr->cx, NULL, source, (uint8)flagsword, JS_FALSE);
3893
- if (!re)
3894
- return JS_FALSE;
3895
- if (!JS_SetPrivate(xdr->cx, obj, re) ||
3896
- !js_SetLastIndex(xdr->cx, obj, 0)) {
3897
- js_DestroyRegExp(xdr->cx, re);
3898
- return JS_FALSE;
3899
- }
3900
- *objp = obj;
3901
- }
3902
- return JS_TRUE;
3903
- }
3904
-
3905
- #else /* !JS_HAS_XDR */
3906
-
3907
- #define regexp_xdrObject NULL
3908
-
3909
- #endif /* !JS_HAS_XDR */
3910
-
3911
- static void
3912
- regexp_trace(JSTracer *trc, JSObject *obj)
3913
- {
3914
- JSRegExp *re;
3915
-
3916
- re = (JSRegExp *) JS_GetPrivate(trc->context, obj);
3917
- if (re && re->source)
3918
- JS_CALL_STRING_TRACER(trc, re->source, "source");
3919
- }
3920
-
3921
- JSClass js_RegExpClass = {
3922
- js_RegExp_str,
3923
- JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(1) |
3924
- JSCLASS_MARK_IS_TRACE | JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
3925
- JS_PropertyStub, JS_PropertyStub,
3926
- regexp_getProperty, regexp_setProperty,
3927
- JS_EnumerateStub, JS_ResolveStub,
3928
- JS_ConvertStub, regexp_finalize,
3929
- NULL, NULL,
3930
- regexp_call, NULL,
3931
- regexp_xdrObject, NULL,
3932
- JS_CLASS_TRACE(regexp_trace), 0
3933
- };
3934
-
3935
- static const jschar empty_regexp_ucstr[] = {'(', '?', ':', ')', 0};
3936
-
3937
- JSBool
3938
- js_regexp_toString(JSContext *cx, JSObject *obj, jsval *vp)
3939
- {
3940
- JSRegExp *re;
3941
- const jschar *source;
3942
- jschar *chars;
3943
- size_t length, nflags;
3944
- uintN flags;
3945
- JSString *str;
3946
-
3947
- if (!JS_InstanceOf(cx, obj, &js_RegExpClass, vp + 2))
3948
- return JS_FALSE;
3949
- JS_LOCK_OBJ(cx, obj);
3950
- re = (JSRegExp *) JS_GetPrivate(cx, obj);
3951
- if (!re) {
3952
- JS_UNLOCK_OBJ(cx, obj);
3953
- *vp = STRING_TO_JSVAL(cx->runtime->emptyString);
3954
- return JS_TRUE;
3955
- }
3956
-
3957
- JSSTRING_CHARS_AND_LENGTH(re->source, source, length);
3958
- if (length == 0) {
3959
- source = empty_regexp_ucstr;
3960
- length = JS_ARRAY_LENGTH(empty_regexp_ucstr) - 1;
3961
- }
3962
- length += 2;
3963
- nflags = 0;
3964
- for (flags = re->flags; flags != 0; flags &= flags - 1)
3965
- nflags++;
3966
- chars = (jschar*) JS_malloc(cx, (length + nflags + 1) * sizeof(jschar));
3967
- if (!chars) {
3968
- JS_UNLOCK_OBJ(cx, obj);
3969
- return JS_FALSE;
3970
- }
3971
-
3972
- chars[0] = '/';
3973
- js_strncpy(&chars[1], source, length - 2);
3974
- chars[length-1] = '/';
3975
- if (nflags) {
3976
- if (re->flags & JSREG_GLOB)
3977
- chars[length++] = 'g';
3978
- if (re->flags & JSREG_FOLD)
3979
- chars[length++] = 'i';
3980
- if (re->flags & JSREG_MULTILINE)
3981
- chars[length++] = 'm';
3982
- if (re->flags & JSREG_STICKY)
3983
- chars[length++] = 'y';
3984
- }
3985
- JS_UNLOCK_OBJ(cx, obj);
3986
- chars[length] = 0;
3987
-
3988
- str = js_NewString(cx, chars, length);
3989
- if (!str) {
3990
- JS_free(cx, chars);
3991
- return JS_FALSE;
3992
- }
3993
- *vp = STRING_TO_JSVAL(str);
3994
- return JS_TRUE;
3995
- }
3996
-
3997
- static JSBool
3998
- regexp_toString(JSContext *cx, uintN argc, jsval *vp)
3999
- {
4000
- JSObject *obj;
4001
-
4002
- obj = JS_THIS_OBJECT(cx, vp);
4003
- return obj && js_regexp_toString(cx, obj, vp);
4004
- }
4005
-
4006
- static JSBool
4007
- regexp_compile_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
4008
- jsval *rval)
4009
- {
4010
- JSString *opt, *str;
4011
- JSRegExp *oldre, *re;
4012
- JSBool ok, ok2;
4013
- JSObject *obj2;
4014
- size_t length, nbytes;
4015
- const jschar *cp, *start, *end;
4016
- jschar *nstart, *ncp, *tmp;
4017
-
4018
- if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
4019
- return JS_FALSE;
4020
- opt = NULL;
4021
- if (argc == 0) {
4022
- str = cx->runtime->emptyString;
4023
- } else {
4024
- if (JSVAL_IS_OBJECT(argv[0])) {
4025
- /*
4026
- * If we get passed in a RegExp object we construct a new
4027
- * RegExp that is a duplicate of it by re-compiling the
4028
- * original source code. ECMA requires that it be an error
4029
- * here if the flags are specified. (We must use the flags
4030
- * from the original RegExp also).
4031
- */
4032
- obj2 = JSVAL_TO_OBJECT(argv[0]);
4033
- if (obj2 && OBJ_GET_CLASS(cx, obj2) == &js_RegExpClass) {
4034
- if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' passed */
4035
- JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4036
- JSMSG_NEWREGEXP_FLAGGED);
4037
- return JS_FALSE;
4038
- }
4039
- JS_LOCK_OBJ(cx, obj2);
4040
- re = (JSRegExp *) JS_GetPrivate(cx, obj2);
4041
- if (!re) {
4042
- JS_UNLOCK_OBJ(cx, obj2);
4043
- return JS_FALSE;
4044
- }
4045
- re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE);
4046
- JS_UNLOCK_OBJ(cx, obj2);
4047
- goto created;
4048
- }
4049
- }
4050
- str = js_ValueToString(cx, argv[0]);
4051
- if (!str)
4052
- return JS_FALSE;
4053
- argv[0] = STRING_TO_JSVAL(str);
4054
- if (argc > 1) {
4055
- if (JSVAL_IS_VOID(argv[1])) {
4056
- opt = NULL;
4057
- } else {
4058
- opt = js_ValueToString(cx, argv[1]);
4059
- if (!opt)
4060
- return JS_FALSE;
4061
- argv[1] = STRING_TO_JSVAL(opt);
4062
- }
4063
- }
4064
-
4065
- /* Escape any naked slashes in the regexp source. */
4066
- JSSTRING_CHARS_AND_LENGTH(str, start, length);
4067
- end = start + length;
4068
- nstart = ncp = NULL;
4069
- for (cp = start; cp < end; cp++) {
4070
- if (*cp == '/' && (cp == start || cp[-1] != '\\')) {
4071
- nbytes = (++length + 1) * sizeof(jschar);
4072
- if (!nstart) {
4073
- nstart = (jschar *) JS_malloc(cx, nbytes);
4074
- if (!nstart)
4075
- return JS_FALSE;
4076
- ncp = nstart + (cp - start);
4077
- js_strncpy(nstart, start, cp - start);
4078
- } else {
4079
- tmp = (jschar *) JS_realloc(cx, nstart, nbytes);
4080
- if (!tmp) {
4081
- JS_free(cx, nstart);
4082
- return JS_FALSE;
4083
- }
4084
- ncp = tmp + (ncp - nstart);
4085
- nstart = tmp;
4086
- }
4087
- *ncp++ = '\\';
4088
- }
4089
- if (nstart)
4090
- *ncp++ = *cp;
4091
- }
4092
-
4093
- if (nstart) {
4094
- /* Don't forget to store the backstop after the new string. */
4095
- JS_ASSERT((size_t)(ncp - nstart) == length);
4096
- *ncp = 0;
4097
- str = js_NewString(cx, nstart, length);
4098
- if (!str) {
4099
- JS_free(cx, nstart);
4100
- return JS_FALSE;
4101
- }
4102
- argv[0] = STRING_TO_JSVAL(str);
4103
- }
4104
- }
4105
-
4106
- re = js_NewRegExpOpt(cx, str, opt, JS_FALSE);
4107
- created:
4108
- if (!re)
4109
- return JS_FALSE;
4110
- JS_LOCK_OBJ(cx, obj);
4111
- oldre = (JSRegExp *) JS_GetPrivate(cx, obj);
4112
- ok = JS_SetPrivate(cx, obj, re);
4113
- ok2 = js_SetLastIndex(cx, obj, 0);
4114
- JS_UNLOCK_OBJ(cx, obj);
4115
- if (!ok) {
4116
- js_DestroyRegExp(cx, re);
4117
- return JS_FALSE;
4118
- }
4119
- if (oldre)
4120
- js_DestroyRegExp(cx, oldre);
4121
- *rval = OBJECT_TO_JSVAL(obj);
4122
- return ok2;
4123
- }
4124
-
4125
- static JSBool
4126
- regexp_compile(JSContext *cx, uintN argc, jsval *vp)
4127
- {
4128
- JSObject *obj;
4129
-
4130
- obj = JS_THIS_OBJECT(cx, vp);
4131
- return obj && regexp_compile_sub(cx, obj, argc, vp + 2, vp);
4132
- }
4133
-
4134
- static JSBool
4135
- regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
4136
- JSBool test, jsval *rval)
4137
- {
4138
- JSBool ok, sticky;
4139
- JSRegExp *re;
4140
- jsdouble lastIndex;
4141
- JSString *str;
4142
- size_t i;
4143
-
4144
- ok = JS_InstanceOf(cx, obj, &js_RegExpClass, argv);
4145
- if (!ok)
4146
- return JS_FALSE;
4147
- JS_LOCK_OBJ(cx, obj);
4148
- re = (JSRegExp *) JS_GetPrivate(cx, obj);
4149
- if (!re) {
4150
- JS_UNLOCK_OBJ(cx, obj);
4151
- return JS_TRUE;
4152
- }
4153
-
4154
- /* NB: we must reach out: after this paragraph, in order to drop re. */
4155
- HOLD_REGEXP(cx, re);
4156
- sticky = (re->flags & JSREG_STICKY) != 0;
4157
- if (re->flags & (JSREG_GLOB | JSREG_STICKY)) {
4158
- ok = js_GetLastIndex(cx, obj, &lastIndex);
4159
- } else {
4160
- lastIndex = 0;
4161
- }
4162
- JS_UNLOCK_OBJ(cx, obj);
4163
- if (!ok)
4164
- goto out;
4165
-
4166
- /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
4167
- if (argc == 0) {
4168
- str = cx->regExpStatics.input;
4169
- if (!str) {
4170
- const char *bytes = js_GetStringBytes(cx, re->source);
4171
-
4172
- if (bytes) {
4173
- JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4174
- JSMSG_NO_INPUT,
4175
- bytes,
4176
- (re->flags & JSREG_GLOB) ? "g" : "",
4177
- (re->flags & JSREG_FOLD) ? "i" : "",
4178
- (re->flags & JSREG_MULTILINE) ? "m" : "",
4179
- (re->flags & JSREG_STICKY) ? "y" : "");
4180
- }
4181
- ok = JS_FALSE;
4182
- goto out;
4183
- }
4184
- } else {
4185
- str = js_ValueToString(cx, argv[0]);
4186
- if (!str) {
4187
- ok = JS_FALSE;
4188
- goto out;
4189
- }
4190
- argv[0] = STRING_TO_JSVAL(str);
4191
- }
4192
-
4193
- if (lastIndex < 0 || JSSTRING_LENGTH(str) < lastIndex) {
4194
- ok = js_SetLastIndex(cx, obj, 0);
4195
- *rval = JSVAL_NULL;
4196
- } else {
4197
- i = (size_t) lastIndex;
4198
- ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
4199
- if (ok &&
4200
- ((re->flags & JSREG_GLOB) || (*rval != JSVAL_NULL && sticky))) {
4201
- ok = js_SetLastIndex(cx, obj, (*rval == JSVAL_NULL) ? 0 : i);
4202
- }
4203
- }
4204
-
4205
- out:
4206
- DROP_REGEXP(cx, re);
4207
- return ok;
4208
- }
4209
-
4210
- static JSBool
4211
- regexp_exec(JSContext *cx, uintN argc, jsval *vp)
4212
- {
4213
- return regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_FALSE,
4214
- vp);
4215
- }
4216
-
4217
- static JSBool
4218
- regexp_test(JSContext *cx, uintN argc, jsval *vp)
4219
- {
4220
- if (!regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_TRUE, vp))
4221
- return JS_FALSE;
4222
- if (*vp != JSVAL_TRUE)
4223
- *vp = JSVAL_FALSE;
4224
- return JS_TRUE;
4225
- }
4226
-
4227
- static JSFunctionSpec regexp_methods[] = {
4228
- #if JS_HAS_TOSOURCE
4229
- JS_FN(js_toSource_str, regexp_toString, 0,0,0),
4230
- #endif
4231
- JS_FN(js_toString_str, regexp_toString, 0,0,0),
4232
- JS_FN("compile", regexp_compile, 0,2,0),
4233
- JS_FN("exec", regexp_exec, 0,1,0),
4234
- JS_FN("test", regexp_test, 0,1,0),
4235
- JS_FS_END
4236
- };
4237
-
4238
- static JSBool
4239
- RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4240
- {
4241
- if (!(cx->fp->flags & JSFRAME_CONSTRUCTING)) {
4242
- /*
4243
- * If first arg is regexp and no flags are given, just return the arg.
4244
- * (regexp_compile_sub detects the regexp + flags case and throws a
4245
- * TypeError.) See 10.15.3.1.
4246
- */
4247
- if ((argc < 2 || JSVAL_IS_VOID(argv[1])) &&
4248
- !JSVAL_IS_PRIMITIVE(argv[0]) &&
4249
- OBJ_GET_CLASS(cx, JSVAL_TO_OBJECT(argv[0])) == &js_RegExpClass) {
4250
- *rval = argv[0];
4251
- return JS_TRUE;
4252
- }
4253
-
4254
- /* Otherwise, replace obj with a new RegExp object. */
4255
- obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL, 0);
4256
- if (!obj)
4257
- return JS_FALSE;
4258
-
4259
- /*
4260
- * regexp_compile_sub does not use rval to root its temporaries so we
4261
- * can use it to root obj.
4262
- */
4263
- *rval = OBJECT_TO_JSVAL(obj);
4264
- }
4265
- return regexp_compile_sub(cx, obj, argc, argv, rval);
4266
- }
4267
-
4268
- JSObject *
4269
- js_InitRegExpClass(JSContext *cx, JSObject *obj)
4270
- {
4271
- JSObject *proto, *ctor;
4272
- jsval rval;
4273
-
4274
- proto = JS_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
4275
- regexp_props, regexp_methods,
4276
- regexp_static_props, NULL);
4277
-
4278
- if (!proto || !(ctor = JS_GetConstructor(cx, proto)))
4279
- return NULL;
4280
- if (!JS_AliasProperty(cx, ctor, "input", "$_") ||
4281
- !JS_AliasProperty(cx, ctor, "multiline", "$*") ||
4282
- !JS_AliasProperty(cx, ctor, "lastMatch", "$&") ||
4283
- !JS_AliasProperty(cx, ctor, "lastParen", "$+") ||
4284
- !JS_AliasProperty(cx, ctor, "leftContext", "$`") ||
4285
- !JS_AliasProperty(cx, ctor, "rightContext", "$'")) {
4286
- goto bad;
4287
- }
4288
-
4289
- /* Give RegExp.prototype private data so it matches the empty string. */
4290
- if (!regexp_compile_sub(cx, proto, 0, NULL, &rval))
4291
- goto bad;
4292
- return proto;
4293
-
4294
- bad:
4295
- JS_DeleteProperty(cx, obj, js_RegExpClass.name);
4296
- return NULL;
4297
- }
4298
-
4299
- JSObject *
4300
- js_NewRegExpObject(JSContext *cx, JSTokenStream *ts,
4301
- jschar *chars, size_t length, uintN flags)
4302
- {
4303
- JSString *str;
4304
- JSObject *obj;
4305
- JSRegExp *re;
4306
- JSTempValueRooter tvr;
4307
-
4308
- str = js_NewStringCopyN(cx, chars, length);
4309
- if (!str)
4310
- return NULL;
4311
- JS_PUSH_TEMP_ROOT_STRING(cx, str, &tvr);
4312
- re = js_NewRegExp(cx, ts, str, flags, JS_FALSE);
4313
- if (!re) {
4314
- JS_POP_TEMP_ROOT(cx, &tvr);
4315
- return NULL;
4316
- }
4317
- obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL, 0);
4318
- if (!obj || !JS_SetPrivate(cx, obj, re)) {
4319
- js_DestroyRegExp(cx, re);
4320
- obj = NULL;
4321
- }
4322
- if (obj && !js_SetLastIndex(cx, obj, 0))
4323
- obj = NULL;
4324
- JS_POP_TEMP_ROOT(cx, &tvr);
4325
- return obj;
4326
- }
4327
-
4328
- JSObject *
4329
- js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *parent)
4330
- {
4331
- JSObject *clone;
4332
- JSRegExp *re;
4333
-
4334
- JS_ASSERT(OBJ_GET_CLASS(cx, obj) == &js_RegExpClass);
4335
- clone = js_NewObject(cx, &js_RegExpClass, NULL, parent, 0);
4336
- if (!clone)
4337
- return NULL;
4338
- re = (JSRegExp *) JS_GetPrivate(cx, obj);
4339
- if (!JS_SetPrivate(cx, clone, re) || !js_SetLastIndex(cx, clone, 0)) {
4340
- cx->weakRoots.newborn[GCX_OBJECT] = NULL;
4341
- return NULL;
4342
- }
4343
- HOLD_REGEXP(cx, re);
4344
- return clone;
4345
- }
4346
-
4347
- JSBool
4348
- js_GetLastIndex(JSContext *cx, JSObject *obj, jsdouble *lastIndex)
4349
- {
4350
- jsval v;
4351
-
4352
- return JS_GetReservedSlot(cx, obj, 0, &v) &&
4353
- JS_ValueToNumber(cx, v, lastIndex);
4354
- }
4355
-
4356
- JSBool
4357
- js_SetLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex)
4358
- {
4359
- jsval v;
4360
-
4361
- return JS_NewNumberValue(cx, lastIndex, &v) &&
4362
- JS_SetReservedSlot(cx, obj, 0, v);
4363
- }
4364
-