johnson 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (371) hide show
  1. data/.autotest +14 -0
  2. data/CHANGELOG.rdoc +11 -0
  3. data/Manifest.txt +370 -0
  4. data/README.rdoc +60 -0
  5. data/Rakefile +42 -0
  6. data/bin/johnson +108 -0
  7. data/docs/MINGW32.mk +124 -0
  8. data/docs/cross-compile.txt +38 -0
  9. data/ext/spidermonkey/context.c +115 -0
  10. data/ext/spidermonkey/context.h +19 -0
  11. data/ext/spidermonkey/conversions.c +320 -0
  12. data/ext/spidermonkey/conversions.h +18 -0
  13. data/ext/spidermonkey/debugger.c +226 -0
  14. data/ext/spidermonkey/debugger.h +9 -0
  15. data/ext/spidermonkey/extconf.rb +30 -0
  16. data/ext/spidermonkey/extensions.c +37 -0
  17. data/ext/spidermonkey/extensions.h +12 -0
  18. data/ext/spidermonkey/global.c +40 -0
  19. data/ext/spidermonkey/global.h +11 -0
  20. data/ext/spidermonkey/idhash.c +16 -0
  21. data/ext/spidermonkey/idhash.h +8 -0
  22. data/ext/spidermonkey/immutable_node.c +1153 -0
  23. data/ext/spidermonkey/immutable_node.c.erb +523 -0
  24. data/ext/spidermonkey/immutable_node.h +22 -0
  25. data/ext/spidermonkey/jroot.h +187 -0
  26. data/ext/spidermonkey/js_land_proxy.c +610 -0
  27. data/ext/spidermonkey/js_land_proxy.h +20 -0
  28. data/ext/spidermonkey/ruby_land_proxy.c +543 -0
  29. data/ext/spidermonkey/ruby_land_proxy.h +17 -0
  30. data/ext/spidermonkey/runtime.c +330 -0
  31. data/ext/spidermonkey/runtime.h +25 -0
  32. data/ext/spidermonkey/spidermonkey.c +20 -0
  33. data/ext/spidermonkey/spidermonkey.h +29 -0
  34. data/johnson.gemspec +44 -0
  35. data/js/johnson/cli.js +30 -0
  36. data/js/johnson/prelude.js +80 -0
  37. data/lib/johnson.rb +55 -0
  38. data/lib/johnson/cli.rb +7 -0
  39. data/lib/johnson/cli/options.rb +67 -0
  40. data/lib/johnson/error.rb +4 -0
  41. data/lib/johnson/nodes.rb +7 -0
  42. data/lib/johnson/nodes/binary_node.rb +65 -0
  43. data/lib/johnson/nodes/for.rb +14 -0
  44. data/lib/johnson/nodes/for_in.rb +12 -0
  45. data/lib/johnson/nodes/function.rb +13 -0
  46. data/lib/johnson/nodes/list.rb +28 -0
  47. data/lib/johnson/nodes/node.rb +68 -0
  48. data/lib/johnson/nodes/ternary_node.rb +20 -0
  49. data/lib/johnson/parser.rb +21 -0
  50. data/lib/johnson/parser/syntax_error.rb +13 -0
  51. data/lib/johnson/runtime.rb +63 -0
  52. data/lib/johnson/spidermonkey/context.rb +10 -0
  53. data/lib/johnson/spidermonkey/debugger.rb +67 -0
  54. data/lib/johnson/spidermonkey/immutable_node.rb +282 -0
  55. data/lib/johnson/spidermonkey/js_land_proxy.rb +62 -0
  56. data/lib/johnson/spidermonkey/mutable_tree_visitor.rb +242 -0
  57. data/lib/johnson/spidermonkey/ruby_land_proxy.rb +54 -0
  58. data/lib/johnson/spidermonkey/runtime.rb +103 -0
  59. data/lib/johnson/version.rb +3 -0
  60. data/lib/johnson/visitable.rb +16 -0
  61. data/lib/johnson/visitors.rb +5 -0
  62. data/lib/johnson/visitors/dot_visitor.rb +169 -0
  63. data/lib/johnson/visitors/ecma_visitor.rb +323 -0
  64. data/lib/johnson/visitors/enumerating_visitor.rb +15 -0
  65. data/lib/johnson/visitors/sexp_visitor.rb +174 -0
  66. data/lib/johnson/visitors/visitor.rb +91 -0
  67. data/lib/rails/init.rb +37 -0
  68. data/lib/tasks/gem.rake +9 -0
  69. data/lib/tasks/parsing.rake +37 -0
  70. data/lib/tasks/testing.rake +36 -0
  71. data/lib/tasks/vendor.rake +20 -0
  72. data/test/helper.rb +55 -0
  73. data/test/johnson/browser_test.rb +43 -0
  74. data/test/johnson/conversions/array_test.rb +32 -0
  75. data/test/johnson/conversions/boolean_test.rb +17 -0
  76. data/test/johnson/conversions/callable_test.rb +34 -0
  77. data/test/johnson/conversions/file_test.rb +15 -0
  78. data/test/johnson/conversions/nil_test.rb +20 -0
  79. data/test/johnson/conversions/number_test.rb +34 -0
  80. data/test/johnson/conversions/regexp_test.rb +24 -0
  81. data/test/johnson/conversions/string_test.rb +26 -0
  82. data/test/johnson/conversions/struct_test.rb +15 -0
  83. data/test/johnson/conversions/symbol_test.rb +19 -0
  84. data/test/johnson/conversions/thread_test.rb +24 -0
  85. data/test/johnson/error_test.rb +9 -0
  86. data/test/johnson/extensions_test.rb +56 -0
  87. data/test/johnson/nodes/array_literal_test.rb +57 -0
  88. data/test/johnson/nodes/array_node_test.rb +26 -0
  89. data/test/johnson/nodes/binary_node_test.rb +61 -0
  90. data/test/johnson/nodes/bracket_access_test.rb +16 -0
  91. data/test/johnson/nodes/delete_test.rb +11 -0
  92. data/test/johnson/nodes/do_while_test.rb +12 -0
  93. data/test/johnson/nodes/dot_accessor_test.rb +15 -0
  94. data/test/johnson/nodes/export_test.rb +9 -0
  95. data/test/johnson/nodes/for_test.rb +54 -0
  96. data/test/johnson/nodes/function_test.rb +71 -0
  97. data/test/johnson/nodes/if_test.rb +41 -0
  98. data/test/johnson/nodes/import_test.rb +13 -0
  99. data/test/johnson/nodes/label_test.rb +19 -0
  100. data/test/johnson/nodes/let_test.rb +31 -0
  101. data/test/johnson/nodes/object_literal_test.rb +110 -0
  102. data/test/johnson/nodes/return_test.rb +16 -0
  103. data/test/johnson/nodes/semi_test.rb +8 -0
  104. data/test/johnson/nodes/switch_test.rb +55 -0
  105. data/test/johnson/nodes/ternary_test.rb +25 -0
  106. data/test/johnson/nodes/throw_test.rb +9 -0
  107. data/test/johnson/nodes/try_node_test.rb +59 -0
  108. data/test/johnson/nodes/typeof_test.rb +11 -0
  109. data/test/johnson/nodes/unary_node_test.rb +23 -0
  110. data/test/johnson/nodes/void_test.rb +11 -0
  111. data/test/johnson/nodes/while_test.rb +26 -0
  112. data/test/johnson/nodes/with_test.rb +10 -0
  113. data/test/johnson/prelude_test.rb +56 -0
  114. data/test/johnson/runtime_test.rb +64 -0
  115. data/test/johnson/spidermonkey/context_test.rb +21 -0
  116. data/test/johnson/spidermonkey/immutable_node_test.rb +34 -0
  117. data/test/johnson/spidermonkey/js_land_proxy_test.rb +236 -0
  118. data/test/johnson/spidermonkey/ruby_land_proxy_test.rb +240 -0
  119. data/test/johnson/spidermonkey/runtime_test.rb +17 -0
  120. data/test/johnson/version_test.rb +13 -0
  121. data/test/johnson/visitors/dot_visitor_test.rb +39 -0
  122. data/test/johnson/visitors/enumerating_visitor_test.rb +12 -0
  123. data/test/johnson_test.rb +16 -0
  124. data/test/parser_test.rb +276 -0
  125. data/vendor/spidermonkey/.cvsignore +9 -0
  126. data/vendor/spidermonkey/Makefile.in +449 -0
  127. data/vendor/spidermonkey/Makefile.ref +365 -0
  128. data/vendor/spidermonkey/README.html +820 -0
  129. data/vendor/spidermonkey/SpiderMonkey.rsp +12 -0
  130. data/vendor/spidermonkey/Y.js +19 -0
  131. data/vendor/spidermonkey/build.mk +43 -0
  132. data/vendor/spidermonkey/config.mk +192 -0
  133. data/vendor/spidermonkey/config/AIX4.1.mk +65 -0
  134. data/vendor/spidermonkey/config/AIX4.2.mk +64 -0
  135. data/vendor/spidermonkey/config/AIX4.3.mk +65 -0
  136. data/vendor/spidermonkey/config/Darwin.mk +83 -0
  137. data/vendor/spidermonkey/config/Darwin1.3.mk +81 -0
  138. data/vendor/spidermonkey/config/Darwin1.4.mk +41 -0
  139. data/vendor/spidermonkey/config/Darwin5.2.mk +81 -0
  140. data/vendor/spidermonkey/config/Darwin5.3.mk +81 -0
  141. data/vendor/spidermonkey/config/HP-UXB.10.10.mk +77 -0
  142. data/vendor/spidermonkey/config/HP-UXB.10.20.mk +77 -0
  143. data/vendor/spidermonkey/config/HP-UXB.11.00.mk +80 -0
  144. data/vendor/spidermonkey/config/IRIX.mk +87 -0
  145. data/vendor/spidermonkey/config/IRIX5.3.mk +44 -0
  146. data/vendor/spidermonkey/config/IRIX6.1.mk +44 -0
  147. data/vendor/spidermonkey/config/IRIX6.2.mk +44 -0
  148. data/vendor/spidermonkey/config/IRIX6.3.mk +44 -0
  149. data/vendor/spidermonkey/config/IRIX6.5.mk +44 -0
  150. data/vendor/spidermonkey/config/Linux_All.mk +103 -0
  151. data/vendor/spidermonkey/config/Mac_OS10.0.mk +82 -0
  152. data/vendor/spidermonkey/config/OSF1V4.0.mk +72 -0
  153. data/vendor/spidermonkey/config/OSF1V5.0.mk +69 -0
  154. data/vendor/spidermonkey/config/SunOS4.1.4.mk +101 -0
  155. data/vendor/spidermonkey/config/SunOS5.10.mk +50 -0
  156. data/vendor/spidermonkey/config/SunOS5.3.mk +91 -0
  157. data/vendor/spidermonkey/config/SunOS5.4.mk +92 -0
  158. data/vendor/spidermonkey/config/SunOS5.5.1.mk +44 -0
  159. data/vendor/spidermonkey/config/SunOS5.5.mk +87 -0
  160. data/vendor/spidermonkey/config/SunOS5.6.mk +89 -0
  161. data/vendor/spidermonkey/config/SunOS5.7.mk +44 -0
  162. data/vendor/spidermonkey/config/SunOS5.8.mk +44 -0
  163. data/vendor/spidermonkey/config/SunOS5.9.mk +44 -0
  164. data/vendor/spidermonkey/config/WINNT4.0.mk +117 -0
  165. data/vendor/spidermonkey/config/WINNT5.0.mk +117 -0
  166. data/vendor/spidermonkey/config/WINNT5.1.mk +117 -0
  167. data/vendor/spidermonkey/config/WINNT5.2.mk +117 -0
  168. data/vendor/spidermonkey/config/WINNT6.0.mk +117 -0
  169. data/vendor/spidermonkey/config/dgux.mk +64 -0
  170. data/vendor/spidermonkey/editline/Makefile.ref +144 -0
  171. data/vendor/spidermonkey/editline/README +83 -0
  172. data/vendor/spidermonkey/editline/editline.3 +175 -0
  173. data/vendor/spidermonkey/editline/editline.c +1369 -0
  174. data/vendor/spidermonkey/editline/editline.h +135 -0
  175. data/vendor/spidermonkey/editline/sysunix.c +182 -0
  176. data/vendor/spidermonkey/editline/unix.h +82 -0
  177. data/vendor/spidermonkey/fdlibm/.cvsignore +7 -0
  178. data/vendor/spidermonkey/fdlibm/Makefile.in +127 -0
  179. data/vendor/spidermonkey/fdlibm/Makefile.ref +192 -0
  180. data/vendor/spidermonkey/fdlibm/e_acos.c +147 -0
  181. data/vendor/spidermonkey/fdlibm/e_acosh.c +105 -0
  182. data/vendor/spidermonkey/fdlibm/e_asin.c +156 -0
  183. data/vendor/spidermonkey/fdlibm/e_atan2.c +165 -0
  184. data/vendor/spidermonkey/fdlibm/e_atanh.c +110 -0
  185. data/vendor/spidermonkey/fdlibm/e_cosh.c +133 -0
  186. data/vendor/spidermonkey/fdlibm/e_exp.c +202 -0
  187. data/vendor/spidermonkey/fdlibm/e_fmod.c +184 -0
  188. data/vendor/spidermonkey/fdlibm/e_gamma.c +71 -0
  189. data/vendor/spidermonkey/fdlibm/e_gamma_r.c +70 -0
  190. data/vendor/spidermonkey/fdlibm/e_hypot.c +173 -0
  191. data/vendor/spidermonkey/fdlibm/e_j0.c +524 -0
  192. data/vendor/spidermonkey/fdlibm/e_j1.c +523 -0
  193. data/vendor/spidermonkey/fdlibm/e_jn.c +315 -0
  194. data/vendor/spidermonkey/fdlibm/e_lgamma.c +71 -0
  195. data/vendor/spidermonkey/fdlibm/e_lgamma_r.c +347 -0
  196. data/vendor/spidermonkey/fdlibm/e_log.c +184 -0
  197. data/vendor/spidermonkey/fdlibm/e_log10.c +134 -0
  198. data/vendor/spidermonkey/fdlibm/e_pow.c +386 -0
  199. data/vendor/spidermonkey/fdlibm/e_rem_pio2.c +222 -0
  200. data/vendor/spidermonkey/fdlibm/e_remainder.c +120 -0
  201. data/vendor/spidermonkey/fdlibm/e_scalb.c +89 -0
  202. data/vendor/spidermonkey/fdlibm/e_sinh.c +122 -0
  203. data/vendor/spidermonkey/fdlibm/e_sqrt.c +497 -0
  204. data/vendor/spidermonkey/fdlibm/fdlibm.h +273 -0
  205. data/vendor/spidermonkey/fdlibm/fdlibm.mak +1453 -0
  206. data/vendor/spidermonkey/fdlibm/fdlibm.mdp +0 -0
  207. data/vendor/spidermonkey/fdlibm/k_cos.c +135 -0
  208. data/vendor/spidermonkey/fdlibm/k_rem_pio2.c +354 -0
  209. data/vendor/spidermonkey/fdlibm/k_sin.c +114 -0
  210. data/vendor/spidermonkey/fdlibm/k_standard.c +785 -0
  211. data/vendor/spidermonkey/fdlibm/k_tan.c +170 -0
  212. data/vendor/spidermonkey/fdlibm/s_asinh.c +101 -0
  213. data/vendor/spidermonkey/fdlibm/s_atan.c +175 -0
  214. data/vendor/spidermonkey/fdlibm/s_cbrt.c +133 -0
  215. data/vendor/spidermonkey/fdlibm/s_ceil.c +120 -0
  216. data/vendor/spidermonkey/fdlibm/s_copysign.c +72 -0
  217. data/vendor/spidermonkey/fdlibm/s_cos.c +118 -0
  218. data/vendor/spidermonkey/fdlibm/s_erf.c +356 -0
  219. data/vendor/spidermonkey/fdlibm/s_expm1.c +267 -0
  220. data/vendor/spidermonkey/fdlibm/s_fabs.c +70 -0
  221. data/vendor/spidermonkey/fdlibm/s_finite.c +71 -0
  222. data/vendor/spidermonkey/fdlibm/s_floor.c +121 -0
  223. data/vendor/spidermonkey/fdlibm/s_frexp.c +99 -0
  224. data/vendor/spidermonkey/fdlibm/s_ilogb.c +85 -0
  225. data/vendor/spidermonkey/fdlibm/s_isnan.c +74 -0
  226. data/vendor/spidermonkey/fdlibm/s_ldexp.c +66 -0
  227. data/vendor/spidermonkey/fdlibm/s_lib_version.c +73 -0
  228. data/vendor/spidermonkey/fdlibm/s_log1p.c +211 -0
  229. data/vendor/spidermonkey/fdlibm/s_logb.c +79 -0
  230. data/vendor/spidermonkey/fdlibm/s_matherr.c +64 -0
  231. data/vendor/spidermonkey/fdlibm/s_modf.c +132 -0
  232. data/vendor/spidermonkey/fdlibm/s_nextafter.c +124 -0
  233. data/vendor/spidermonkey/fdlibm/s_rint.c +131 -0
  234. data/vendor/spidermonkey/fdlibm/s_scalbn.c +107 -0
  235. data/vendor/spidermonkey/fdlibm/s_signgam.c +40 -0
  236. data/vendor/spidermonkey/fdlibm/s_significand.c +68 -0
  237. data/vendor/spidermonkey/fdlibm/s_sin.c +118 -0
  238. data/vendor/spidermonkey/fdlibm/s_tan.c +112 -0
  239. data/vendor/spidermonkey/fdlibm/s_tanh.c +122 -0
  240. data/vendor/spidermonkey/fdlibm/w_acos.c +78 -0
  241. data/vendor/spidermonkey/fdlibm/w_acosh.c +78 -0
  242. data/vendor/spidermonkey/fdlibm/w_asin.c +80 -0
  243. data/vendor/spidermonkey/fdlibm/w_atan2.c +79 -0
  244. data/vendor/spidermonkey/fdlibm/w_atanh.c +81 -0
  245. data/vendor/spidermonkey/fdlibm/w_cosh.c +77 -0
  246. data/vendor/spidermonkey/fdlibm/w_exp.c +88 -0
  247. data/vendor/spidermonkey/fdlibm/w_fmod.c +78 -0
  248. data/vendor/spidermonkey/fdlibm/w_gamma.c +85 -0
  249. data/vendor/spidermonkey/fdlibm/w_gamma_r.c +81 -0
  250. data/vendor/spidermonkey/fdlibm/w_hypot.c +78 -0
  251. data/vendor/spidermonkey/fdlibm/w_j0.c +105 -0
  252. data/vendor/spidermonkey/fdlibm/w_j1.c +106 -0
  253. data/vendor/spidermonkey/fdlibm/w_jn.c +128 -0
  254. data/vendor/spidermonkey/fdlibm/w_lgamma.c +85 -0
  255. data/vendor/spidermonkey/fdlibm/w_lgamma_r.c +81 -0
  256. data/vendor/spidermonkey/fdlibm/w_log.c +78 -0
  257. data/vendor/spidermonkey/fdlibm/w_log10.c +81 -0
  258. data/vendor/spidermonkey/fdlibm/w_pow.c +99 -0
  259. data/vendor/spidermonkey/fdlibm/w_remainder.c +77 -0
  260. data/vendor/spidermonkey/fdlibm/w_scalb.c +95 -0
  261. data/vendor/spidermonkey/fdlibm/w_sinh.c +77 -0
  262. data/vendor/spidermonkey/fdlibm/w_sqrt.c +77 -0
  263. data/vendor/spidermonkey/javascript-trace.d +73 -0
  264. data/vendor/spidermonkey/js.c +3951 -0
  265. data/vendor/spidermonkey/js.mdp +0 -0
  266. data/vendor/spidermonkey/js.msg +308 -0
  267. data/vendor/spidermonkey/js3240.rc +79 -0
  268. data/vendor/spidermonkey/jsOS240.def +654 -0
  269. data/vendor/spidermonkey/jsapi.c +5836 -0
  270. data/vendor/spidermonkey/jsapi.h +2624 -0
  271. data/vendor/spidermonkey/jsarena.c +450 -0
  272. data/vendor/spidermonkey/jsarena.h +318 -0
  273. data/vendor/spidermonkey/jsarray.c +2996 -0
  274. data/vendor/spidermonkey/jsarray.h +127 -0
  275. data/vendor/spidermonkey/jsatom.c +1045 -0
  276. data/vendor/spidermonkey/jsatom.h +442 -0
  277. data/vendor/spidermonkey/jsbit.h +253 -0
  278. data/vendor/spidermonkey/jsbool.c +176 -0
  279. data/vendor/spidermonkey/jsbool.h +73 -0
  280. data/vendor/spidermonkey/jsclist.h +139 -0
  281. data/vendor/spidermonkey/jscntxt.c +1348 -0
  282. data/vendor/spidermonkey/jscntxt.h +1120 -0
  283. data/vendor/spidermonkey/jscompat.h +57 -0
  284. data/vendor/spidermonkey/jsconfig.h +248 -0
  285. data/vendor/spidermonkey/jsconfig.mk +181 -0
  286. data/vendor/spidermonkey/jscpucfg.c +396 -0
  287. data/vendor/spidermonkey/jscpucfg.h +212 -0
  288. data/vendor/spidermonkey/jsdate.c +2390 -0
  289. data/vendor/spidermonkey/jsdate.h +124 -0
  290. data/vendor/spidermonkey/jsdbgapi.c +1802 -0
  291. data/vendor/spidermonkey/jsdbgapi.h +464 -0
  292. data/vendor/spidermonkey/jsdhash.c +868 -0
  293. data/vendor/spidermonkey/jsdhash.h +592 -0
  294. data/vendor/spidermonkey/jsdtoa.c +3167 -0
  295. data/vendor/spidermonkey/jsdtoa.h +130 -0
  296. data/vendor/spidermonkey/jsdtracef.c +317 -0
  297. data/vendor/spidermonkey/jsdtracef.h +77 -0
  298. data/vendor/spidermonkey/jsemit.c +6909 -0
  299. data/vendor/spidermonkey/jsemit.h +741 -0
  300. data/vendor/spidermonkey/jsexn.c +1371 -0
  301. data/vendor/spidermonkey/jsexn.h +96 -0
  302. data/vendor/spidermonkey/jsfile.c +2736 -0
  303. data/vendor/spidermonkey/jsfile.h +56 -0
  304. data/vendor/spidermonkey/jsfile.msg +90 -0
  305. data/vendor/spidermonkey/jsfun.c +2634 -0
  306. data/vendor/spidermonkey/jsfun.h +254 -0
  307. data/vendor/spidermonkey/jsgc.c +3562 -0
  308. data/vendor/spidermonkey/jsgc.h +403 -0
  309. data/vendor/spidermonkey/jshash.c +476 -0
  310. data/vendor/spidermonkey/jshash.h +151 -0
  311. data/vendor/spidermonkey/jsify.pl +485 -0
  312. data/vendor/spidermonkey/jsinterp.c +7007 -0
  313. data/vendor/spidermonkey/jsinterp.h +525 -0
  314. data/vendor/spidermonkey/jsinvoke.c +43 -0
  315. data/vendor/spidermonkey/jsiter.c +1067 -0
  316. data/vendor/spidermonkey/jsiter.h +122 -0
  317. data/vendor/spidermonkey/jskeyword.tbl +124 -0
  318. data/vendor/spidermonkey/jskwgen.c +460 -0
  319. data/vendor/spidermonkey/jslibmath.h +266 -0
  320. data/vendor/spidermonkey/jslock.c +1309 -0
  321. data/vendor/spidermonkey/jslock.h +313 -0
  322. data/vendor/spidermonkey/jslocko.asm +60 -0
  323. data/vendor/spidermonkey/jslog2.c +94 -0
  324. data/vendor/spidermonkey/jslong.c +264 -0
  325. data/vendor/spidermonkey/jslong.h +412 -0
  326. data/vendor/spidermonkey/jsmath.c +567 -0
  327. data/vendor/spidermonkey/jsmath.h +57 -0
  328. data/vendor/spidermonkey/jsnum.c +1239 -0
  329. data/vendor/spidermonkey/jsnum.h +283 -0
  330. data/vendor/spidermonkey/jsobj.c +5282 -0
  331. data/vendor/spidermonkey/jsobj.h +709 -0
  332. data/vendor/spidermonkey/jsopcode.c +5245 -0
  333. data/vendor/spidermonkey/jsopcode.h +394 -0
  334. data/vendor/spidermonkey/jsopcode.tbl +523 -0
  335. data/vendor/spidermonkey/jsotypes.h +202 -0
  336. data/vendor/spidermonkey/jsparse.c +6704 -0
  337. data/vendor/spidermonkey/jsparse.h +511 -0
  338. data/vendor/spidermonkey/jsprf.c +1262 -0
  339. data/vendor/spidermonkey/jsprf.h +150 -0
  340. data/vendor/spidermonkey/jsproto.tbl +128 -0
  341. data/vendor/spidermonkey/jsprvtd.h +267 -0
  342. data/vendor/spidermonkey/jspubtd.h +744 -0
  343. data/vendor/spidermonkey/jsregexp.c +4364 -0
  344. data/vendor/spidermonkey/jsregexp.h +183 -0
  345. data/vendor/spidermonkey/jsreops.tbl +145 -0
  346. data/vendor/spidermonkey/jsscan.c +2012 -0
  347. data/vendor/spidermonkey/jsscan.h +387 -0
  348. data/vendor/spidermonkey/jsscope.c +1957 -0
  349. data/vendor/spidermonkey/jsscope.h +418 -0
  350. data/vendor/spidermonkey/jsscript.c +1832 -0
  351. data/vendor/spidermonkey/jsscript.h +287 -0
  352. data/vendor/spidermonkey/jsshell.msg +50 -0
  353. data/vendor/spidermonkey/jsstddef.h +83 -0
  354. data/vendor/spidermonkey/jsstr.c +5005 -0
  355. data/vendor/spidermonkey/jsstr.h +641 -0
  356. data/vendor/spidermonkey/jstypes.h +475 -0
  357. data/vendor/spidermonkey/jsutil.c +345 -0
  358. data/vendor/spidermonkey/jsutil.h +157 -0
  359. data/vendor/spidermonkey/jsxdrapi.c +800 -0
  360. data/vendor/spidermonkey/jsxdrapi.h +218 -0
  361. data/vendor/spidermonkey/jsxml.c +8476 -0
  362. data/vendor/spidermonkey/jsxml.h +349 -0
  363. data/vendor/spidermonkey/lock_SunOS.s +119 -0
  364. data/vendor/spidermonkey/perfect.js +39 -0
  365. data/vendor/spidermonkey/plify_jsdhash.sed +36 -0
  366. data/vendor/spidermonkey/prmjtime.c +846 -0
  367. data/vendor/spidermonkey/prmjtime.h +103 -0
  368. data/vendor/spidermonkey/resource.h +15 -0
  369. data/vendor/spidermonkey/rules.mk +197 -0
  370. data/vendor/spidermonkey/win32.order +384 -0
  371. metadata +513 -0
@@ -0,0 +1,4364 @@
1
+ /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2
+ * vim: set sw=4 ts=8 et tw=78:
3
+ *
4
+ * ***** BEGIN LICENSE BLOCK *****
5
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6
+ *
7
+ * The contents of this file are subject to the Mozilla Public License Version
8
+ * 1.1 (the "License"); you may not use this file except in compliance with
9
+ * the License. You may obtain a copy of the License at
10
+ * http://www.mozilla.org/MPL/
11
+ *
12
+ * Software distributed under the License is distributed on an "AS IS" basis,
13
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14
+ * for the specific language governing rights and limitations under the
15
+ * License.
16
+ *
17
+ * The Original Code is Mozilla Communicator client code, released
18
+ * March 31, 1998.
19
+ *
20
+ * The Initial Developer of the Original Code is
21
+ * Netscape Communications Corporation.
22
+ * Portions created by the Initial Developer are Copyright (C) 1998
23
+ * the Initial Developer. All Rights Reserved.
24
+ *
25
+ * Contributor(s):
26
+ *
27
+ * Alternatively, the contents of this file may be used under the terms of
28
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
29
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30
+ * in which case the provisions of the GPL or the LGPL are applicable instead
31
+ * of those above. If you wish to allow use of your version of this file only
32
+ * under the terms of either the GPL or the LGPL, and not to allow others to
33
+ * use your version of this file under the terms of the MPL, indicate your
34
+ * decision by deleting the provisions above and replace them with the notice
35
+ * and other provisions required by the GPL or the LGPL. If you do not delete
36
+ * the provisions above, a recipient may use your version of this file under
37
+ * the terms of any one of the MPL, the GPL or the LGPL.
38
+ *
39
+ * ***** END LICENSE BLOCK ***** */
40
+
41
+ /*
42
+ * JS regular expressions, after Perl.
43
+ */
44
+ #include "jsstddef.h"
45
+ #include <stdlib.h>
46
+ #include <string.h>
47
+ #include <stdarg.h>
48
+ #include "jstypes.h"
49
+ #include "jsarena.h" /* Added by JSIFY */
50
+ #include "jsutil.h" /* Added by JSIFY */
51
+ #include "jsapi.h"
52
+ #include "jsarray.h"
53
+ #include "jsatom.h"
54
+ #include "jscntxt.h"
55
+ #include "jsconfig.h"
56
+ #include "jsfun.h"
57
+ #include "jsgc.h"
58
+ #include "jsinterp.h"
59
+ #include "jslock.h"
60
+ #include "jsnum.h"
61
+ #include "jsobj.h"
62
+ #include "jsopcode.h"
63
+ #include "jsregexp.h"
64
+ #include "jsscan.h"
65
+ #include "jsscope.h"
66
+ #include "jsstr.h"
67
+
68
+ typedef enum REOp {
69
+ #define REOP_DEF(opcode, name) opcode,
70
+ #include "jsreops.tbl"
71
+ #undef REOP_DEF
72
+ REOP_LIMIT /* META: no operator >= to this */
73
+ } REOp;
74
+
75
+ #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
76
+
77
+ #ifdef REGEXP_DEBUG
78
+ const char *reop_names[] = {
79
+ #define REOP_DEF(opcode, name) name,
80
+ #include "jsreops.tbl"
81
+ #undef REOP_DEF
82
+ NULL
83
+ };
84
+ #endif
85
+
86
+ #ifdef __GNUC__
87
+ static int
88
+ re_debug(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
89
+ #endif
90
+
91
+ #ifdef REGEXP_DEBUG
92
+ static int
93
+ re_debug(const char *fmt, ...)
94
+ {
95
+ va_list ap;
96
+ int retval;
97
+
98
+ va_start(ap, fmt);
99
+ retval = vprintf(fmt, ap);
100
+ va_end(ap);
101
+ return retval;
102
+ }
103
+
104
+ static void
105
+ re_debug_chars(const jschar *chrs, size_t length)
106
+ {
107
+ int i = 0;
108
+
109
+ printf(" \"");
110
+ while (*chrs && i++ < length) {
111
+ putchar((char)*chrs++);
112
+ }
113
+ printf("\"");
114
+ }
115
+ #else /* !REGEXP_DEBUG */
116
+ /* This should be optimized to a no-op by our tier-1 compilers. */
117
+ static int
118
+ re_debug(const char *fmt, ...)
119
+ {
120
+ return 0;
121
+ }
122
+
123
+ static void
124
+ re_debug_chars(const jschar *chrs, size_t length)
125
+ {
126
+ }
127
+ #endif /* !REGEXP_DEBUG */
128
+
129
+ struct RENode {
130
+ REOp op; /* r.e. op bytecode */
131
+ RENode *next; /* next in concatenation order */
132
+ void *kid; /* first operand */
133
+ union {
134
+ void *kid2; /* second operand */
135
+ jsint num; /* could be a number */
136
+ size_t parenIndex; /* or a parenthesis index */
137
+ struct { /* or a quantifier range */
138
+ uintN min;
139
+ uintN max;
140
+ JSPackedBool greedy;
141
+ } range;
142
+ struct { /* or a character class */
143
+ size_t startIndex;
144
+ size_t kidlen; /* length of string at kid, in jschars */
145
+ size_t index; /* index into class list */
146
+ uint16 bmsize; /* bitmap size, based on max char code */
147
+ JSPackedBool sense;
148
+ } ucclass;
149
+ struct { /* or a literal sequence */
150
+ jschar chr; /* of one character */
151
+ size_t length; /* or many (via the kid) */
152
+ } flat;
153
+ struct {
154
+ RENode *kid2; /* second operand from ALT */
155
+ jschar ch1; /* match char for ALTPREREQ */
156
+ jschar ch2; /* ditto, or class index for ALTPREREQ2 */
157
+ } altprereq;
158
+ } u;
159
+ };
160
+
161
+ #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
162
+ ((c >= 'a') && (c <= 'z')) )
163
+ #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
164
+ (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
165
+
166
+ #define CLASS_CACHE_SIZE 4
167
+
168
+ typedef struct CompilerState {
169
+ JSContext *context;
170
+ JSTokenStream *tokenStream; /* For reporting errors */
171
+ const jschar *cpbegin;
172
+ const jschar *cpend;
173
+ const jschar *cp;
174
+ size_t parenCount;
175
+ size_t classCount; /* number of [] encountered */
176
+ size_t treeDepth; /* maximum depth of parse tree */
177
+ size_t progLength; /* estimated bytecode length */
178
+ RENode *result;
179
+ size_t classBitmapsMem; /* memory to hold all class bitmaps */
180
+ struct {
181
+ const jschar *start; /* small cache of class strings */
182
+ size_t length; /* since they're often the same */
183
+ size_t index;
184
+ } classCache[CLASS_CACHE_SIZE];
185
+ uint16 flags;
186
+ } CompilerState;
187
+
188
+ typedef struct EmitStateStackEntry {
189
+ jsbytecode *altHead; /* start of REOP_ALT* opcode */
190
+ jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
191
+ jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
192
+ jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
193
+ RENode *continueNode; /* original REOP_ALT* node being stacked */
194
+ jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
195
+ JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
196
+ avoid 16-bit unsigned offset overflow */
197
+ } EmitStateStackEntry;
198
+
199
+ /*
200
+ * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
201
+ * the getters and setters take the pc of the offset, not of the opcode before
202
+ * the offset.
203
+ */
204
+ #define ARG_LEN 2
205
+ #define GET_ARG(pc) ((uint16)(((pc)[0] << 8) | (pc)[1]))
206
+ #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
207
+ (pc)[1] = (jsbytecode) (arg))
208
+
209
+ #define OFFSET_LEN ARG_LEN
210
+ #define OFFSET_MAX (JS_BIT(ARG_LEN * 8) - 1)
211
+ #define GET_OFFSET(pc) GET_ARG(pc)
212
+
213
+ /*
214
+ * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
215
+ * For sanity, we limit it to 2^24 bytes.
216
+ */
217
+ #define TREE_DEPTH_MAX (JS_BIT(24) / sizeof(EmitStateStackEntry))
218
+
219
+ /*
220
+ * The maximum memory that can be allocated for class bitmaps.
221
+ * For sanity, we limit it to 2^24 bytes.
222
+ */
223
+ #define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
224
+
225
+ /*
226
+ * Functions to get size and write/read bytecode that represent small indexes
227
+ * compactly.
228
+ * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
229
+ * indicates that the following byte brings more bits to the index. Otherwise
230
+ * this is the last byte in the index bytecode representing highest index bits.
231
+ */
232
+ static size_t
233
+ GetCompactIndexWidth(size_t index)
234
+ {
235
+ size_t width;
236
+
237
+ for (width = 1; (index >>= 7) != 0; ++width) { }
238
+ return width;
239
+ }
240
+
241
+ static JS_INLINE jsbytecode *
242
+ WriteCompactIndex(jsbytecode *pc, size_t index)
243
+ {
244
+ size_t next;
245
+
246
+ while ((next = index >> 7) != 0) {
247
+ *pc++ = (jsbytecode)(index | 0x80);
248
+ index = next;
249
+ }
250
+ *pc++ = (jsbytecode)index;
251
+ return pc;
252
+ }
253
+
254
+ static JS_INLINE jsbytecode *
255
+ ReadCompactIndex(jsbytecode *pc, size_t *result)
256
+ {
257
+ size_t nextByte;
258
+
259
+ nextByte = *pc++;
260
+ if ((nextByte & 0x80) == 0) {
261
+ /*
262
+ * Short-circuit the most common case when compact index <= 127.
263
+ */
264
+ *result = nextByte;
265
+ } else {
266
+ size_t shift = 7;
267
+ *result = 0x7F & nextByte;
268
+ do {
269
+ nextByte = *pc++;
270
+ *result |= (nextByte & 0x7F) << shift;
271
+ shift += 7;
272
+ } while ((nextByte & 0x80) != 0);
273
+ }
274
+ return pc;
275
+ }
276
+
277
+ typedef struct RECapture {
278
+ ptrdiff_t index; /* start of contents, -1 for empty */
279
+ size_t length; /* length of capture */
280
+ } RECapture;
281
+
282
+ typedef struct REMatchState {
283
+ const jschar *cp;
284
+ RECapture parens[1]; /* first of 're->parenCount' captures,
285
+ allocated at end of this struct */
286
+ } REMatchState;
287
+
288
+ struct REBackTrackData;
289
+
290
+ typedef struct REProgState {
291
+ jsbytecode *continue_pc; /* current continuation data */
292
+ jsbytecode continue_op;
293
+ ptrdiff_t index; /* progress in text */
294
+ size_t parenSoFar; /* highest indexed paren started */
295
+ union {
296
+ struct {
297
+ uintN min; /* current quantifier limits */
298
+ uintN max;
299
+ } quantifier;
300
+ struct {
301
+ size_t top; /* backtrack stack state */
302
+ size_t sz;
303
+ } assertion;
304
+ } u;
305
+ } REProgState;
306
+
307
+ typedef struct REBackTrackData {
308
+ size_t sz; /* size of previous stack entry */
309
+ jsbytecode *backtrack_pc; /* where to backtrack to */
310
+ jsbytecode backtrack_op;
311
+ const jschar *cp; /* index in text of match at backtrack */
312
+ size_t parenIndex; /* start index of saved paren contents */
313
+ size_t parenCount; /* # of saved paren contents */
314
+ size_t saveStateStackTop; /* number of parent states */
315
+ /* saved parent states follow */
316
+ /* saved paren contents follow */
317
+ } REBackTrackData;
318
+
319
+ #define INITIAL_STATESTACK 100
320
+ #define INITIAL_BACKTRACK 8000
321
+
322
+ typedef struct REGlobalData {
323
+ JSContext *cx;
324
+ JSRegExp *regexp; /* the RE in execution */
325
+ JSBool ok; /* runtime error (out_of_memory only?) */
326
+ size_t start; /* offset to start at */
327
+ ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
328
+ const jschar *cpbegin; /* text base address */
329
+ const jschar *cpend; /* text limit address */
330
+
331
+ REProgState *stateStack; /* stack of state of current parents */
332
+ size_t stateStackTop;
333
+ size_t stateStackLimit;
334
+
335
+ REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
336
+ REBackTrackData *backTrackSP;
337
+ size_t backTrackStackSize;
338
+ size_t cursz; /* size of current stack entry */
339
+ size_t backTrackCount; /* how many times we've backtracked */
340
+ size_t backTrackLimit; /* upper limit on backtrack states */
341
+
342
+ JSArenaPool pool; /* It's faster to use one malloc'd pool
343
+ than to malloc/free the three items
344
+ that are allocated from this pool */
345
+ } REGlobalData;
346
+
347
+ /*
348
+ * 1. If IgnoreCase is false, return ch.
349
+ * 2. Let u be ch converted to upper case as if by calling
350
+ * String.prototype.toUpperCase on the one-character string ch.
351
+ * 3. If u does not consist of a single character, return ch.
352
+ * 4. Let cu be u's character.
353
+ * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
354
+ * code point value is less than decimal 128, then return ch.
355
+ * 6. Return cu.
356
+ */
357
+ static JS_INLINE uintN
358
+ upcase(uintN ch)
359
+ {
360
+ uintN cu;
361
+
362
+ JS_ASSERT((uintN) (jschar) ch == ch);
363
+ if (ch < 128) {
364
+ if (ch - (uintN) 'a' <= (uintN) ('z' - 'a'))
365
+ ch -= (uintN) ('a' - 'A');
366
+ return ch;
367
+ }
368
+
369
+ cu = JS_TOUPPER(ch);
370
+ return (cu < 128) ? ch : cu;
371
+ }
372
+
373
+ static JS_INLINE uintN
374
+ downcase(uintN ch)
375
+ {
376
+ JS_ASSERT((uintN) (jschar) ch == ch);
377
+ if (ch < 128) {
378
+ if (ch - (uintN) 'A' <= (uintN) ('Z' - 'A'))
379
+ ch += (uintN) ('a' - 'A');
380
+ return ch;
381
+ }
382
+
383
+ return JS_TOLOWER(ch);
384
+ }
385
+
386
+ /* Construct and initialize an RENode, returning NULL for out-of-memory */
387
+ static RENode *
388
+ NewRENode(CompilerState *state, REOp op)
389
+ {
390
+ JSContext *cx;
391
+ RENode *ren;
392
+
393
+ cx = state->context;
394
+ JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
395
+ if (!ren) {
396
+ js_ReportOutOfScriptQuota(cx);
397
+ return NULL;
398
+ }
399
+ ren->op = op;
400
+ ren->next = NULL;
401
+ ren->kid = NULL;
402
+ return ren;
403
+ }
404
+
405
+ /*
406
+ * Validates and converts hex ascii value.
407
+ */
408
+ static JSBool
409
+ isASCIIHexDigit(jschar c, uintN *digit)
410
+ {
411
+ uintN cv = c;
412
+
413
+ if (cv < '0')
414
+ return JS_FALSE;
415
+ if (cv <= '9') {
416
+ *digit = cv - '0';
417
+ return JS_TRUE;
418
+ }
419
+ cv |= 0x20;
420
+ if (cv >= 'a' && cv <= 'f') {
421
+ *digit = cv - 'a' + 10;
422
+ return JS_TRUE;
423
+ }
424
+ return JS_FALSE;
425
+ }
426
+
427
+
428
+ typedef struct {
429
+ REOp op;
430
+ const jschar *errPos;
431
+ size_t parenIndex;
432
+ } REOpData;
433
+
434
+ static JSBool
435
+ ReportRegExpErrorHelper(CompilerState *state, uintN flags, uintN errorNumber,
436
+ const jschar *arg)
437
+ {
438
+ if (state->tokenStream) {
439
+ return js_ReportCompileErrorNumber(state->context, state->tokenStream,
440
+ NULL, JSREPORT_UC | flags,
441
+ errorNumber, arg);
442
+ }
443
+ return JS_ReportErrorFlagsAndNumberUC(state->context, flags,
444
+ js_GetErrorMessage, NULL,
445
+ errorNumber, arg);
446
+ }
447
+
448
+ static JSBool
449
+ ReportRegExpError(CompilerState *state, uintN flags, uintN errorNumber)
450
+ {
451
+ return ReportRegExpErrorHelper(state, flags, errorNumber, NULL);
452
+ }
453
+
454
+ /*
455
+ * Process the op against the two top operands, reducing them to a single
456
+ * operand in the penultimate slot. Update progLength and treeDepth.
457
+ */
458
+ static JSBool
459
+ ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
460
+ intN operandSP)
461
+ {
462
+ RENode *result;
463
+
464
+ switch (opData->op) {
465
+ case REOP_ALT:
466
+ result = NewRENode(state, REOP_ALT);
467
+ if (!result)
468
+ return JS_FALSE;
469
+ result->kid = operandStack[operandSP - 2];
470
+ result->u.kid2 = operandStack[operandSP - 1];
471
+ operandStack[operandSP - 2] = result;
472
+
473
+ if (state->treeDepth == TREE_DEPTH_MAX) {
474
+ ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
475
+ return JS_FALSE;
476
+ }
477
+ ++state->treeDepth;
478
+
479
+ /*
480
+ * Look at both alternates to see if there's a FLAT or a CLASS at
481
+ * the start of each. If so, use a prerequisite match.
482
+ */
483
+ if (((RENode *) result->kid)->op == REOP_FLAT &&
484
+ ((RENode *) result->u.kid2)->op == REOP_FLAT &&
485
+ (state->flags & JSREG_FOLD) == 0) {
486
+ result->op = REOP_ALTPREREQ;
487
+ result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
488
+ result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
489
+ /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
490
+ JUMP, <end> ... ENDALT */
491
+ state->progLength += 13;
492
+ }
493
+ else
494
+ if (((RENode *) result->kid)->op == REOP_CLASS &&
495
+ ((RENode *) result->kid)->u.ucclass.index < 256 &&
496
+ ((RENode *) result->u.kid2)->op == REOP_FLAT &&
497
+ (state->flags & JSREG_FOLD) == 0) {
498
+ result->op = REOP_ALTPREREQ2;
499
+ result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
500
+ result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
501
+ /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
502
+ JUMP, <end> ... ENDALT */
503
+ state->progLength += 13;
504
+ }
505
+ else
506
+ if (((RENode *) result->kid)->op == REOP_FLAT &&
507
+ ((RENode *) result->u.kid2)->op == REOP_CLASS &&
508
+ ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
509
+ (state->flags & JSREG_FOLD) == 0) {
510
+ result->op = REOP_ALTPREREQ2;
511
+ result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
512
+ result->u.altprereq.ch2 =
513
+ ((RENode *) result->u.kid2)->u.ucclass.index;
514
+ /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
515
+ JUMP, <end> ... ENDALT */
516
+ state->progLength += 13;
517
+ }
518
+ else {
519
+ /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
520
+ state->progLength += 7;
521
+ }
522
+ break;
523
+
524
+ case REOP_CONCAT:
525
+ result = operandStack[operandSP - 2];
526
+ while (result->next)
527
+ result = result->next;
528
+ result->next = operandStack[operandSP - 1];
529
+ break;
530
+
531
+ case REOP_ASSERT:
532
+ case REOP_ASSERT_NOT:
533
+ case REOP_LPARENNON:
534
+ case REOP_LPAREN:
535
+ /* These should have been processed by a close paren. */
536
+ ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
537
+ opData->errPos);
538
+ return JS_FALSE;
539
+
540
+ default:;
541
+ }
542
+ return JS_TRUE;
543
+ }
544
+
545
+ /*
546
+ * Parser forward declarations.
547
+ */
548
+ static JSBool ParseTerm(CompilerState *state);
549
+ static JSBool ParseQuantifier(CompilerState *state);
550
+ static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);
551
+
552
+ /*
553
+ * Top-down regular expression grammar, based closely on Perl4.
554
+ *
555
+ * regexp: altern A regular expression is one or more
556
+ * altern '|' regexp alternatives separated by vertical bar.
557
+ */
558
+ #define INITIAL_STACK_SIZE 128
559
+
560
+ static JSBool
561
+ ParseRegExp(CompilerState *state)
562
+ {
563
+ size_t parenIndex;
564
+ RENode *operand;
565
+ REOpData *operatorStack;
566
+ RENode **operandStack;
567
+ REOp op;
568
+ intN i;
569
+ JSBool result = JS_FALSE;
570
+
571
+ intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
572
+ intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
573
+
574
+ /* Watch out for empty regexp */
575
+ if (state->cp == state->cpend) {
576
+ state->result = NewRENode(state, REOP_EMPTY);
577
+ return (state->result != NULL);
578
+ }
579
+
580
+ operatorStack = (REOpData *)
581
+ JS_malloc(state->context, sizeof(REOpData) * operatorStackSize);
582
+ if (!operatorStack)
583
+ return JS_FALSE;
584
+
585
+ operandStack = (RENode **)
586
+ JS_malloc(state->context, sizeof(RENode *) * operandStackSize);
587
+ if (!operandStack)
588
+ goto out;
589
+
590
+ for (;;) {
591
+ parenIndex = state->parenCount;
592
+ if (state->cp == state->cpend) {
593
+ /*
594
+ * If we are at the end of the regexp and we're short one or more
595
+ * operands, the regexp must have the form /x|/ or some such, with
596
+ * left parentheses making us short more than one operand.
597
+ */
598
+ if (operatorSP >= operandSP) {
599
+ operand = NewRENode(state, REOP_EMPTY);
600
+ if (!operand)
601
+ goto out;
602
+ goto pushOperand;
603
+ }
604
+ } else {
605
+ switch (*state->cp) {
606
+ case '(':
607
+ ++state->cp;
608
+ if (state->cp + 1 < state->cpend &&
609
+ *state->cp == '?' &&
610
+ (state->cp[1] == '=' ||
611
+ state->cp[1] == '!' ||
612
+ state->cp[1] == ':')) {
613
+ switch (state->cp[1]) {
614
+ case '=':
615
+ op = REOP_ASSERT;
616
+ /* ASSERT, <next>, ... ASSERTTEST */
617
+ state->progLength += 4;
618
+ break;
619
+ case '!':
620
+ op = REOP_ASSERT_NOT;
621
+ /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
622
+ state->progLength += 4;
623
+ break;
624
+ default:
625
+ op = REOP_LPARENNON;
626
+ break;
627
+ }
628
+ state->cp += 2;
629
+ } else {
630
+ op = REOP_LPAREN;
631
+ /* LPAREN, <index>, ... RPAREN, <index> */
632
+ state->progLength
633
+ += 2 * (1 + GetCompactIndexWidth(parenIndex));
634
+ state->parenCount++;
635
+ if (state->parenCount == 65535) {
636
+ ReportRegExpError(state, JSREPORT_ERROR,
637
+ JSMSG_TOO_MANY_PARENS);
638
+ goto out;
639
+ }
640
+ }
641
+ goto pushOperator;
642
+
643
+ case ')':
644
+ /*
645
+ * If there's no stacked open parenthesis, throw syntax error.
646
+ */
647
+ for (i = operatorSP - 1; ; i--) {
648
+ if (i < 0) {
649
+ ReportRegExpError(state, JSREPORT_ERROR,
650
+ JSMSG_UNMATCHED_RIGHT_PAREN);
651
+ goto out;
652
+ }
653
+ if (operatorStack[i].op == REOP_ASSERT ||
654
+ operatorStack[i].op == REOP_ASSERT_NOT ||
655
+ operatorStack[i].op == REOP_LPARENNON ||
656
+ operatorStack[i].op == REOP_LPAREN) {
657
+ break;
658
+ }
659
+ }
660
+ /* FALL THROUGH */
661
+
662
+ case '|':
663
+ /* Expected an operand before these, so make an empty one */
664
+ operand = NewRENode(state, REOP_EMPTY);
665
+ if (!operand)
666
+ goto out;
667
+ goto pushOperand;
668
+
669
+ default:
670
+ if (!ParseTerm(state))
671
+ goto out;
672
+ operand = state->result;
673
+ pushOperand:
674
+ if (operandSP == operandStackSize) {
675
+ RENode **tmp;
676
+ operandStackSize += operandStackSize;
677
+ tmp = (RENode **)
678
+ JS_realloc(state->context, operandStack,
679
+ sizeof(RENode *) * operandStackSize);
680
+ if (!tmp)
681
+ goto out;
682
+ operandStack = tmp;
683
+ }
684
+ operandStack[operandSP++] = operand;
685
+ break;
686
+ }
687
+ }
688
+
689
+ /* At the end; process remaining operators. */
690
+ restartOperator:
691
+ if (state->cp == state->cpend) {
692
+ while (operatorSP) {
693
+ --operatorSP;
694
+ if (!ProcessOp(state, &operatorStack[operatorSP],
695
+ operandStack, operandSP))
696
+ goto out;
697
+ --operandSP;
698
+ }
699
+ JS_ASSERT(operandSP == 1);
700
+ state->result = operandStack[0];
701
+ result = JS_TRUE;
702
+ goto out;
703
+ }
704
+
705
+ switch (*state->cp) {
706
+ case '|':
707
+ /* Process any stacked 'concat' operators */
708
+ ++state->cp;
709
+ while (operatorSP &&
710
+ operatorStack[operatorSP - 1].op == REOP_CONCAT) {
711
+ --operatorSP;
712
+ if (!ProcessOp(state, &operatorStack[operatorSP],
713
+ operandStack, operandSP)) {
714
+ goto out;
715
+ }
716
+ --operandSP;
717
+ }
718
+ op = REOP_ALT;
719
+ goto pushOperator;
720
+
721
+ case ')':
722
+ /*
723
+ * If there's no stacked open parenthesis, throw syntax error.
724
+ */
725
+ for (i = operatorSP - 1; ; i--) {
726
+ if (i < 0) {
727
+ ReportRegExpError(state, JSREPORT_ERROR,
728
+ JSMSG_UNMATCHED_RIGHT_PAREN);
729
+ goto out;
730
+ }
731
+ if (operatorStack[i].op == REOP_ASSERT ||
732
+ operatorStack[i].op == REOP_ASSERT_NOT ||
733
+ operatorStack[i].op == REOP_LPARENNON ||
734
+ operatorStack[i].op == REOP_LPAREN) {
735
+ break;
736
+ }
737
+ }
738
+ ++state->cp;
739
+
740
+ /* Process everything on the stack until the open parenthesis. */
741
+ for (;;) {
742
+ JS_ASSERT(operatorSP);
743
+ --operatorSP;
744
+ switch (operatorStack[operatorSP].op) {
745
+ case REOP_ASSERT:
746
+ case REOP_ASSERT_NOT:
747
+ case REOP_LPAREN:
748
+ operand = NewRENode(state, operatorStack[operatorSP].op);
749
+ if (!operand)
750
+ goto out;
751
+ operand->u.parenIndex =
752
+ operatorStack[operatorSP].parenIndex;
753
+ JS_ASSERT(operandSP);
754
+ operand->kid = operandStack[operandSP - 1];
755
+ operandStack[operandSP - 1] = operand;
756
+ if (state->treeDepth == TREE_DEPTH_MAX) {
757
+ ReportRegExpError(state, JSREPORT_ERROR,
758
+ JSMSG_REGEXP_TOO_COMPLEX);
759
+ goto out;
760
+ }
761
+ ++state->treeDepth;
762
+ /* FALL THROUGH */
763
+
764
+ case REOP_LPARENNON:
765
+ state->result = operandStack[operandSP - 1];
766
+ if (!ParseQuantifier(state))
767
+ goto out;
768
+ operandStack[operandSP - 1] = state->result;
769
+ goto restartOperator;
770
+ default:
771
+ if (!ProcessOp(state, &operatorStack[operatorSP],
772
+ operandStack, operandSP))
773
+ goto out;
774
+ --operandSP;
775
+ break;
776
+ }
777
+ }
778
+ break;
779
+
780
+ case '{':
781
+ {
782
+ const jschar *errp = state->cp;
783
+
784
+ if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
785
+ /*
786
+ * This didn't even scan correctly as a quantifier, so we should
787
+ * treat it as flat.
788
+ */
789
+ op = REOP_CONCAT;
790
+ goto pushOperator;
791
+ }
792
+
793
+ state->cp = errp;
794
+ /* FALL THROUGH */
795
+ }
796
+
797
+ case '+':
798
+ case '*':
799
+ case '?':
800
+ ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
801
+ state->cp);
802
+ result = JS_FALSE;
803
+ goto out;
804
+
805
+ default:
806
+ /* Anything else is the start of the next term. */
807
+ op = REOP_CONCAT;
808
+ pushOperator:
809
+ if (operatorSP == operatorStackSize) {
810
+ REOpData *tmp;
811
+ operatorStackSize += operatorStackSize;
812
+ tmp = (REOpData *)
813
+ JS_realloc(state->context, operatorStack,
814
+ sizeof(REOpData) * operatorStackSize);
815
+ if (!tmp)
816
+ goto out;
817
+ operatorStack = tmp;
818
+ }
819
+ operatorStack[operatorSP].op = op;
820
+ operatorStack[operatorSP].errPos = state->cp;
821
+ operatorStack[operatorSP++].parenIndex = parenIndex;
822
+ break;
823
+ }
824
+ }
825
+ out:
826
+ if (operatorStack)
827
+ JS_free(state->context, operatorStack);
828
+ if (operandStack)
829
+ JS_free(state->context, operandStack);
830
+ return result;
831
+ }
832
+
833
+ /*
834
+ * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
835
+ * its being on the stack, and to propagate errors to its callers.
836
+ */
837
+ #define JSREG_FIND_PAREN_COUNT 0x8000
838
+ #define JSREG_FIND_PAREN_ERROR 0x4000
839
+
840
+ /*
841
+ * Magic return value from FindParenCount and GetDecimalValue, to indicate
842
+ * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
843
+ * its findMax parameter is non-null.
844
+ */
845
+ #define OVERFLOW_VALUE ((uintN)-1)
846
+
847
+ static uintN
848
+ FindParenCount(CompilerState *state)
849
+ {
850
+ CompilerState temp;
851
+ int i;
852
+
853
+ if (state->flags & JSREG_FIND_PAREN_COUNT)
854
+ return OVERFLOW_VALUE;
855
+
856
+ /*
857
+ * Copy state into temp, flag it so we never report an invalid backref,
858
+ * and reset its members to parse the entire regexp. This is obviously
859
+ * suboptimal, but GetDecimalValue calls us only if a backref appears to
860
+ * refer to a forward parenthetical, which is rare.
861
+ */
862
+ temp = *state;
863
+ temp.flags |= JSREG_FIND_PAREN_COUNT;
864
+ temp.cp = temp.cpbegin;
865
+ temp.parenCount = 0;
866
+ temp.classCount = 0;
867
+ temp.progLength = 0;
868
+ temp.treeDepth = 0;
869
+ temp.classBitmapsMem = 0;
870
+ for (i = 0; i < CLASS_CACHE_SIZE; i++)
871
+ temp.classCache[i].start = NULL;
872
+
873
+ if (!ParseRegExp(&temp)) {
874
+ state->flags |= JSREG_FIND_PAREN_ERROR;
875
+ return OVERFLOW_VALUE;
876
+ }
877
+ return temp.parenCount;
878
+ }
879
+
880
+ /*
881
+ * Extract and return a decimal value at state->cp. The initial character c
882
+ * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
883
+ * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
884
+ * state->flags to discover whether an error occurred under findMax.
885
+ */
886
+ static uintN
887
+ GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
888
+ CompilerState *state)
889
+ {
890
+ uintN value = JS7_UNDEC(c);
891
+ JSBool overflow = (value > max && (!findMax || value > findMax(state)));
892
+
893
+ /* The following restriction allows simpler overflow checks. */
894
+ JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
895
+ while (state->cp < state->cpend) {
896
+ c = *state->cp;
897
+ if (!JS7_ISDEC(c))
898
+ break;
899
+ value = 10 * value + JS7_UNDEC(c);
900
+ if (!overflow && value > max && (!findMax || value > findMax(state)))
901
+ overflow = JS_TRUE;
902
+ ++state->cp;
903
+ }
904
+ return overflow ? OVERFLOW_VALUE : value;
905
+ }
906
+
907
+ /*
908
+ * Calculate the total size of the bitmap required for a class expression.
909
+ */
910
+ static JSBool
911
+ CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
912
+ const jschar *end)
913
+ {
914
+ uintN max = 0;
915
+ JSBool inRange = JS_FALSE;
916
+ jschar c, rangeStart = 0;
917
+ uintN n, digit, nDigits, i;
918
+
919
+ target->u.ucclass.bmsize = 0;
920
+ target->u.ucclass.sense = JS_TRUE;
921
+
922
+ if (src == end)
923
+ return JS_TRUE;
924
+
925
+ if (*src == '^') {
926
+ ++src;
927
+ target->u.ucclass.sense = JS_FALSE;
928
+ }
929
+
930
+ while (src != end) {
931
+ JSBool canStartRange = JS_TRUE;
932
+ uintN localMax = 0;
933
+
934
+ switch (*src) {
935
+ case '\\':
936
+ ++src;
937
+ c = *src++;
938
+ switch (c) {
939
+ case 'b':
940
+ localMax = 0x8;
941
+ break;
942
+ case 'f':
943
+ localMax = 0xC;
944
+ break;
945
+ case 'n':
946
+ localMax = 0xA;
947
+ break;
948
+ case 'r':
949
+ localMax = 0xD;
950
+ break;
951
+ case 't':
952
+ localMax = 0x9;
953
+ break;
954
+ case 'v':
955
+ localMax = 0xB;
956
+ break;
957
+ case 'c':
958
+ if (src < end && RE_IS_LETTER(*src)) {
959
+ localMax = (uintN) (*src++) & 0x1F;
960
+ } else {
961
+ --src;
962
+ localMax = '\\';
963
+ }
964
+ break;
965
+ case 'x':
966
+ nDigits = 2;
967
+ goto lexHex;
968
+ case 'u':
969
+ nDigits = 4;
970
+ lexHex:
971
+ n = 0;
972
+ for (i = 0; (i < nDigits) && (src < end); i++) {
973
+ c = *src++;
974
+ if (!isASCIIHexDigit(c, &digit)) {
975
+ /*
976
+ * Back off to accepting the original
977
+ *'\' as a literal.
978
+ */
979
+ src -= i + 1;
980
+ n = '\\';
981
+ break;
982
+ }
983
+ n = (n << 4) | digit;
984
+ }
985
+ localMax = n;
986
+ break;
987
+ case 'd':
988
+ canStartRange = JS_FALSE;
989
+ if (inRange) {
990
+ JS_ReportErrorNumber(state->context,
991
+ js_GetErrorMessage, NULL,
992
+ JSMSG_BAD_CLASS_RANGE);
993
+ return JS_FALSE;
994
+ }
995
+ localMax = '9';
996
+ break;
997
+ case 'D':
998
+ case 's':
999
+ case 'S':
1000
+ case 'w':
1001
+ case 'W':
1002
+ canStartRange = JS_FALSE;
1003
+ if (inRange) {
1004
+ JS_ReportErrorNumber(state->context,
1005
+ js_GetErrorMessage, NULL,
1006
+ JSMSG_BAD_CLASS_RANGE);
1007
+ return JS_FALSE;
1008
+ }
1009
+ max = 65535;
1010
+
1011
+ /*
1012
+ * If this is the start of a range, ensure that it's less than
1013
+ * the end.
1014
+ */
1015
+ localMax = 0;
1016
+ break;
1017
+ case '0':
1018
+ case '1':
1019
+ case '2':
1020
+ case '3':
1021
+ case '4':
1022
+ case '5':
1023
+ case '6':
1024
+ case '7':
1025
+ /*
1026
+ * This is a non-ECMA extension - decimal escapes (in this
1027
+ * case, octal!) are supposed to be an error inside class
1028
+ * ranges, but supported here for backwards compatibility.
1029
+ *
1030
+ */
1031
+ n = JS7_UNDEC(c);
1032
+ c = *src;
1033
+ if ('0' <= c && c <= '7') {
1034
+ src++;
1035
+ n = 8 * n + JS7_UNDEC(c);
1036
+ c = *src;
1037
+ if ('0' <= c && c <= '7') {
1038
+ src++;
1039
+ i = 8 * n + JS7_UNDEC(c);
1040
+ if (i <= 0377)
1041
+ n = i;
1042
+ else
1043
+ src--;
1044
+ }
1045
+ }
1046
+ localMax = n;
1047
+ break;
1048
+
1049
+ default:
1050
+ localMax = c;
1051
+ break;
1052
+ }
1053
+ break;
1054
+ default:
1055
+ localMax = *src++;
1056
+ break;
1057
+ }
1058
+
1059
+ if (inRange) {
1060
+ /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1061
+ if (rangeStart > localMax) {
1062
+ JS_ReportErrorNumber(state->context,
1063
+ js_GetErrorMessage, NULL,
1064
+ JSMSG_BAD_CLASS_RANGE);
1065
+ return JS_FALSE;
1066
+ }
1067
+ inRange = JS_FALSE;
1068
+ } else {
1069
+ if (canStartRange && src < end - 1) {
1070
+ if (*src == '-') {
1071
+ ++src;
1072
+ inRange = JS_TRUE;
1073
+ rangeStart = (jschar)localMax;
1074
+ continue;
1075
+ }
1076
+ }
1077
+ if (state->flags & JSREG_FOLD)
1078
+ rangeStart = localMax; /* one run of the uc/dc loop below */
1079
+ }
1080
+
1081
+ if (state->flags & JSREG_FOLD) {
1082
+ jschar maxch = localMax;
1083
+
1084
+ for (i = rangeStart; i <= localMax; i++) {
1085
+ jschar uch, dch;
1086
+
1087
+ uch = upcase(i);
1088
+ dch = downcase(i);
1089
+ maxch = JS_MAX(maxch, uch);
1090
+ maxch = JS_MAX(maxch, dch);
1091
+ }
1092
+ localMax = maxch;
1093
+ }
1094
+
1095
+ if (localMax > max)
1096
+ max = localMax;
1097
+ }
1098
+ target->u.ucclass.bmsize = max;
1099
+ return JS_TRUE;
1100
+ }
1101
+
1102
+ /*
1103
+ * item: assertion An item is either an assertion or
1104
+ * quantatom a quantified atom.
1105
+ *
1106
+ * assertion: '^' Assertions match beginning of string
1107
+ * (or line if the class static property
1108
+ * RegExp.multiline is true).
1109
+ * '$' End of string (or line if the class
1110
+ * static property RegExp.multiline is
1111
+ * true).
1112
+ * '\b' Word boundary (between \w and \W).
1113
+ * '\B' Word non-boundary.
1114
+ *
1115
+ * quantatom: atom An unquantified atom.
1116
+ * quantatom '{' n ',' m '}'
1117
+ * Atom must occur between n and m times.
1118
+ * quantatom '{' n ',' '}' Atom must occur at least n times.
1119
+ * quantatom '{' n '}' Atom must occur exactly n times.
1120
+ * quantatom '*' Zero or more times (same as {0,}).
1121
+ * quantatom '+' One or more times (same as {1,}).
1122
+ * quantatom '?' Zero or one time (same as {0,1}).
1123
+ *
1124
+ * any of which can be optionally followed by '?' for ungreedy
1125
+ *
1126
+ * atom: '(' regexp ')' A parenthesized regexp (what matched
1127
+ * can be addressed using a backreference,
1128
+ * see '\' n below).
1129
+ * '.' Matches any char except '\n'.
1130
+ * '[' classlist ']' A character class.
1131
+ * '[' '^' classlist ']' A negated character class.
1132
+ * '\f' Form Feed.
1133
+ * '\n' Newline (Line Feed).
1134
+ * '\r' Carriage Return.
1135
+ * '\t' Horizontal Tab.
1136
+ * '\v' Vertical Tab.
1137
+ * '\d' A digit (same as [0-9]).
1138
+ * '\D' A non-digit.
1139
+ * '\w' A word character, [0-9a-z_A-Z].
1140
+ * '\W' A non-word character.
1141
+ * '\s' A whitespace character, [ \b\f\n\r\t\v].
1142
+ * '\S' A non-whitespace character.
1143
+ * '\' n A backreference to the nth (n decimal
1144
+ * and positive) parenthesized expression.
1145
+ * '\' octal An octal escape sequence (octal must be
1146
+ * two or three digits long, unless it is
1147
+ * 0 for the null character).
1148
+ * '\x' hex A hex escape (hex must be two digits).
1149
+ * '\u' unicode A unicode escape (must be four digits).
1150
+ * '\c' ctrl A control character, ctrl is a letter.
1151
+ * '\' literalatomchar Any character except one of the above
1152
+ * that follow '\' in an atom.
1153
+ * otheratomchar Any character not first among the other
1154
+ * atom right-hand sides.
1155
+ */
1156
+ static JSBool
1157
+ ParseTerm(CompilerState *state)
1158
+ {
1159
+ jschar c = *state->cp++;
1160
+ uintN nDigits;
1161
+ uintN num, tmp, n, i;
1162
+ const jschar *termStart;
1163
+
1164
+ switch (c) {
1165
+ /* assertions and atoms */
1166
+ case '^':
1167
+ state->result = NewRENode(state, REOP_BOL);
1168
+ if (!state->result)
1169
+ return JS_FALSE;
1170
+ state->progLength++;
1171
+ return JS_TRUE;
1172
+ case '$':
1173
+ state->result = NewRENode(state, REOP_EOL);
1174
+ if (!state->result)
1175
+ return JS_FALSE;
1176
+ state->progLength++;
1177
+ return JS_TRUE;
1178
+ case '\\':
1179
+ if (state->cp >= state->cpend) {
1180
+ /* a trailing '\' is an error */
1181
+ ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1182
+ return JS_FALSE;
1183
+ }
1184
+ c = *state->cp++;
1185
+ switch (c) {
1186
+ /* assertion escapes */
1187
+ case 'b' :
1188
+ state->result = NewRENode(state, REOP_WBDRY);
1189
+ if (!state->result)
1190
+ return JS_FALSE;
1191
+ state->progLength++;
1192
+ return JS_TRUE;
1193
+ case 'B':
1194
+ state->result = NewRENode(state, REOP_WNONBDRY);
1195
+ if (!state->result)
1196
+ return JS_FALSE;
1197
+ state->progLength++;
1198
+ return JS_TRUE;
1199
+ /* Decimal escape */
1200
+ case '0':
1201
+ /* Give a strict warning. See also the note below. */
1202
+ if (!ReportRegExpError(state, JSREPORT_WARNING | JSREPORT_STRICT,
1203
+ JSMSG_INVALID_BACKREF)) {
1204
+ return JS_FALSE;
1205
+ }
1206
+ doOctal:
1207
+ num = 0;
1208
+ while (state->cp < state->cpend) {
1209
+ c = *state->cp;
1210
+ if (c < '0' || '7' < c)
1211
+ break;
1212
+ state->cp++;
1213
+ tmp = 8 * num + (uintN)JS7_UNDEC(c);
1214
+ if (tmp > 0377)
1215
+ break;
1216
+ num = tmp;
1217
+ }
1218
+ c = (jschar)num;
1219
+ doFlat:
1220
+ state->result = NewRENode(state, REOP_FLAT);
1221
+ if (!state->result)
1222
+ return JS_FALSE;
1223
+ state->result->u.flat.chr = c;
1224
+ state->result->u.flat.length = 1;
1225
+ state->progLength += 3;
1226
+ break;
1227
+ case '1':
1228
+ case '2':
1229
+ case '3':
1230
+ case '4':
1231
+ case '5':
1232
+ case '6':
1233
+ case '7':
1234
+ case '8':
1235
+ case '9':
1236
+ termStart = state->cp - 1;
1237
+ num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1238
+ if (state->flags & JSREG_FIND_PAREN_ERROR)
1239
+ return JS_FALSE;
1240
+ if (num == OVERFLOW_VALUE) {
1241
+ /* Give a strict mode warning. */
1242
+ if (!ReportRegExpError(state,
1243
+ JSREPORT_WARNING | JSREPORT_STRICT,
1244
+ (c >= '8')
1245
+ ? JSMSG_INVALID_BACKREF
1246
+ : JSMSG_BAD_BACKREF)) {
1247
+ return JS_FALSE;
1248
+ }
1249
+
1250
+ /*
1251
+ * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1252
+ * error here. However, for compatibility with IE, we treat the
1253
+ * whole backref as flat if the first character in it is not a
1254
+ * valid octal character, and as an octal escape otherwise.
1255
+ */
1256
+ state->cp = termStart;
1257
+ if (c >= '8') {
1258
+ /* Treat this as flat. termStart - 1 is the \. */
1259
+ c = '\\';
1260
+ goto asFlat;
1261
+ }
1262
+
1263
+ /* Treat this as an octal escape. */
1264
+ goto doOctal;
1265
+ }
1266
+ JS_ASSERT(1 <= num && num <= 0x10000);
1267
+ state->result = NewRENode(state, REOP_BACKREF);
1268
+ if (!state->result)
1269
+ return JS_FALSE;
1270
+ state->result->u.parenIndex = num - 1;
1271
+ state->progLength
1272
+ += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1273
+ break;
1274
+ /* Control escape */
1275
+ case 'f':
1276
+ c = 0xC;
1277
+ goto doFlat;
1278
+ case 'n':
1279
+ c = 0xA;
1280
+ goto doFlat;
1281
+ case 'r':
1282
+ c = 0xD;
1283
+ goto doFlat;
1284
+ case 't':
1285
+ c = 0x9;
1286
+ goto doFlat;
1287
+ case 'v':
1288
+ c = 0xB;
1289
+ goto doFlat;
1290
+ /* Control letter */
1291
+ case 'c':
1292
+ if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1293
+ c = (jschar) (*state->cp++ & 0x1F);
1294
+ } else {
1295
+ /* back off to accepting the original '\' as a literal */
1296
+ --state->cp;
1297
+ c = '\\';
1298
+ }
1299
+ goto doFlat;
1300
+ /* HexEscapeSequence */
1301
+ case 'x':
1302
+ nDigits = 2;
1303
+ goto lexHex;
1304
+ /* UnicodeEscapeSequence */
1305
+ case 'u':
1306
+ nDigits = 4;
1307
+ lexHex:
1308
+ n = 0;
1309
+ for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1310
+ uintN digit;
1311
+ c = *state->cp++;
1312
+ if (!isASCIIHexDigit(c, &digit)) {
1313
+ /*
1314
+ * Back off to accepting the original 'u' or 'x' as a
1315
+ * literal.
1316
+ */
1317
+ state->cp -= i + 2;
1318
+ n = *state->cp++;
1319
+ break;
1320
+ }
1321
+ n = (n << 4) | digit;
1322
+ }
1323
+ c = (jschar) n;
1324
+ goto doFlat;
1325
+ /* Character class escapes */
1326
+ case 'd':
1327
+ state->result = NewRENode(state, REOP_DIGIT);
1328
+ doSimple:
1329
+ if (!state->result)
1330
+ return JS_FALSE;
1331
+ state->progLength++;
1332
+ break;
1333
+ case 'D':
1334
+ state->result = NewRENode(state, REOP_NONDIGIT);
1335
+ goto doSimple;
1336
+ case 's':
1337
+ state->result = NewRENode(state, REOP_SPACE);
1338
+ goto doSimple;
1339
+ case 'S':
1340
+ state->result = NewRENode(state, REOP_NONSPACE);
1341
+ goto doSimple;
1342
+ case 'w':
1343
+ state->result = NewRENode(state, REOP_ALNUM);
1344
+ goto doSimple;
1345
+ case 'W':
1346
+ state->result = NewRENode(state, REOP_NONALNUM);
1347
+ goto doSimple;
1348
+ /* IdentityEscape */
1349
+ default:
1350
+ state->result = NewRENode(state, REOP_FLAT);
1351
+ if (!state->result)
1352
+ return JS_FALSE;
1353
+ state->result->u.flat.chr = c;
1354
+ state->result->u.flat.length = 1;
1355
+ state->result->kid = (void *) (state->cp - 1);
1356
+ state->progLength += 3;
1357
+ break;
1358
+ }
1359
+ break;
1360
+ case '[':
1361
+ state->result = NewRENode(state, REOP_CLASS);
1362
+ if (!state->result)
1363
+ return JS_FALSE;
1364
+ termStart = state->cp;
1365
+ state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1366
+ for (;;) {
1367
+ if (state->cp == state->cpend) {
1368
+ ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1369
+ JSMSG_UNTERM_CLASS, termStart);
1370
+
1371
+ return JS_FALSE;
1372
+ }
1373
+ if (*state->cp == '\\') {
1374
+ state->cp++;
1375
+ if (state->cp != state->cpend)
1376
+ state->cp++;
1377
+ continue;
1378
+ }
1379
+ if (*state->cp == ']') {
1380
+ state->result->u.ucclass.kidlen = state->cp - termStart;
1381
+ break;
1382
+ }
1383
+ state->cp++;
1384
+ }
1385
+ for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1386
+ if (!state->classCache[i].start) {
1387
+ state->classCache[i].start = termStart;
1388
+ state->classCache[i].length = state->result->u.ucclass.kidlen;
1389
+ state->classCache[i].index = state->classCount;
1390
+ break;
1391
+ }
1392
+ if (state->classCache[i].length ==
1393
+ state->result->u.ucclass.kidlen) {
1394
+ for (n = 0; ; n++) {
1395
+ if (n == state->classCache[i].length) {
1396
+ state->result->u.ucclass.index
1397
+ = state->classCache[i].index;
1398
+ goto claim;
1399
+ }
1400
+ if (state->classCache[i].start[n] != termStart[n])
1401
+ break;
1402
+ }
1403
+ }
1404
+ }
1405
+ state->result->u.ucclass.index = state->classCount++;
1406
+
1407
+ claim:
1408
+ /*
1409
+ * Call CalculateBitmapSize now as we want any errors it finds
1410
+ * to be reported during the parse phase, not at execution.
1411
+ */
1412
+ if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1413
+ return JS_FALSE;
1414
+ /*
1415
+ * Update classBitmapsMem with number of bytes to hold bmsize bits,
1416
+ * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1417
+ * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1418
+ */
1419
+ n = (state->result->u.ucclass.bmsize >> 3) + 1;
1420
+ if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1421
+ ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1422
+ return JS_FALSE;
1423
+ }
1424
+ state->classBitmapsMem += n;
1425
+ /* CLASS, <index> */
1426
+ state->progLength
1427
+ += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1428
+ break;
1429
+
1430
+ case '.':
1431
+ state->result = NewRENode(state, REOP_DOT);
1432
+ goto doSimple;
1433
+
1434
+ case '{':
1435
+ {
1436
+ const jschar *errp = state->cp--;
1437
+ intN err;
1438
+
1439
+ err = ParseMinMaxQuantifier(state, JS_TRUE);
1440
+ state->cp = errp;
1441
+
1442
+ if (err < 0)
1443
+ goto asFlat;
1444
+
1445
+ /* FALL THROUGH */
1446
+ }
1447
+ case '*':
1448
+ case '+':
1449
+ case '?':
1450
+ ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1451
+ JSMSG_BAD_QUANTIFIER, state->cp - 1);
1452
+ return JS_FALSE;
1453
+ default:
1454
+ asFlat:
1455
+ state->result = NewRENode(state, REOP_FLAT);
1456
+ if (!state->result)
1457
+ return JS_FALSE;
1458
+ state->result->u.flat.chr = c;
1459
+ state->result->u.flat.length = 1;
1460
+ state->result->kid = (void *) (state->cp - 1);
1461
+ state->progLength += 3;
1462
+ break;
1463
+ }
1464
+ return ParseQuantifier(state);
1465
+ }
1466
+
1467
+ static JSBool
1468
+ ParseQuantifier(CompilerState *state)
1469
+ {
1470
+ RENode *term;
1471
+ term = state->result;
1472
+ if (state->cp < state->cpend) {
1473
+ switch (*state->cp) {
1474
+ case '+':
1475
+ state->result = NewRENode(state, REOP_QUANT);
1476
+ if (!state->result)
1477
+ return JS_FALSE;
1478
+ state->result->u.range.min = 1;
1479
+ state->result->u.range.max = (uintN)-1;
1480
+ /* <PLUS>, <next> ... <ENDCHILD> */
1481
+ state->progLength += 4;
1482
+ goto quantifier;
1483
+ case '*':
1484
+ state->result = NewRENode(state, REOP_QUANT);
1485
+ if (!state->result)
1486
+ return JS_FALSE;
1487
+ state->result->u.range.min = 0;
1488
+ state->result->u.range.max = (uintN)-1;
1489
+ /* <STAR>, <next> ... <ENDCHILD> */
1490
+ state->progLength += 4;
1491
+ goto quantifier;
1492
+ case '?':
1493
+ state->result = NewRENode(state, REOP_QUANT);
1494
+ if (!state->result)
1495
+ return JS_FALSE;
1496
+ state->result->u.range.min = 0;
1497
+ state->result->u.range.max = 1;
1498
+ /* <OPT>, <next> ... <ENDCHILD> */
1499
+ state->progLength += 4;
1500
+ goto quantifier;
1501
+ case '{': /* balance '}' */
1502
+ {
1503
+ intN err;
1504
+ const jschar *errp = state->cp;
1505
+
1506
+ err = ParseMinMaxQuantifier(state, JS_FALSE);
1507
+ if (err == 0)
1508
+ goto quantifier;
1509
+ if (err == -1)
1510
+ return JS_TRUE;
1511
+
1512
+ ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1513
+ return JS_FALSE;
1514
+ }
1515
+ default:;
1516
+ }
1517
+ }
1518
+ return JS_TRUE;
1519
+
1520
+ quantifier:
1521
+ if (state->treeDepth == TREE_DEPTH_MAX) {
1522
+ ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1523
+ return JS_FALSE;
1524
+ }
1525
+
1526
+ ++state->treeDepth;
1527
+ ++state->cp;
1528
+ state->result->kid = term;
1529
+ if (state->cp < state->cpend && *state->cp == '?') {
1530
+ ++state->cp;
1531
+ state->result->u.range.greedy = JS_FALSE;
1532
+ } else {
1533
+ state->result->u.range.greedy = JS_TRUE;
1534
+ }
1535
+ return JS_TRUE;
1536
+ }
1537
+
1538
+ static intN
1539
+ ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues)
1540
+ {
1541
+ uintN min, max;
1542
+ jschar c;
1543
+ const jschar *errp = state->cp++;
1544
+
1545
+ c = *state->cp;
1546
+ if (JS7_ISDEC(c)) {
1547
+ ++state->cp;
1548
+ min = GetDecimalValue(c, 0xFFFF, NULL, state);
1549
+ c = *state->cp;
1550
+
1551
+ if (!ignoreValues && min == OVERFLOW_VALUE)
1552
+ return JSMSG_MIN_TOO_BIG;
1553
+
1554
+ if (c == ',') {
1555
+ c = *++state->cp;
1556
+ if (JS7_ISDEC(c)) {
1557
+ ++state->cp;
1558
+ max = GetDecimalValue(c, 0xFFFF, NULL, state);
1559
+ c = *state->cp;
1560
+ if (!ignoreValues && max == OVERFLOW_VALUE)
1561
+ return JSMSG_MAX_TOO_BIG;
1562
+ if (!ignoreValues && min > max)
1563
+ return JSMSG_OUT_OF_ORDER;
1564
+ } else {
1565
+ max = (uintN)-1;
1566
+ }
1567
+ } else {
1568
+ max = min;
1569
+ }
1570
+ if (c == '}') {
1571
+ state->result = NewRENode(state, REOP_QUANT);
1572
+ if (!state->result)
1573
+ return JSMSG_OUT_OF_MEMORY;
1574
+ state->result->u.range.min = min;
1575
+ state->result->u.range.max = max;
1576
+ /*
1577
+ * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1578
+ * where <max> is written as compact(max+1) to make
1579
+ * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1580
+ */
1581
+ state->progLength += (1 + GetCompactIndexWidth(min)
1582
+ + GetCompactIndexWidth(max + 1)
1583
+ +3);
1584
+ return 0;
1585
+ }
1586
+ }
1587
+
1588
+ state->cp = errp;
1589
+ return -1;
1590
+ }
1591
+
1592
+ static JSBool
1593
+ SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
1594
+ {
1595
+ ptrdiff_t offset = target - jump;
1596
+
1597
+ /* Check that target really points forward. */
1598
+ JS_ASSERT(offset >= 2);
1599
+ if ((size_t)offset > OFFSET_MAX)
1600
+ return JS_FALSE;
1601
+
1602
+ jump[0] = JUMP_OFFSET_HI(offset);
1603
+ jump[1] = JUMP_OFFSET_LO(offset);
1604
+ return JS_TRUE;
1605
+ }
1606
+
1607
+ /*
1608
+ * Generate bytecode for the tree rooted at t using an explicit stack instead
1609
+ * of recursion.
1610
+ */
1611
+ static jsbytecode *
1612
+ EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
1613
+ jsbytecode *pc, RENode *t)
1614
+ {
1615
+ EmitStateStackEntry *emitStateSP, *emitStateStack;
1616
+ RECharSet *charSet;
1617
+ REOp op;
1618
+
1619
+ if (treeDepth == 0) {
1620
+ emitStateStack = NULL;
1621
+ } else {
1622
+ emitStateStack =
1623
+ (EmitStateStackEntry *)JS_malloc(state->context,
1624
+ sizeof(EmitStateStackEntry) *
1625
+ treeDepth);
1626
+ if (!emitStateStack)
1627
+ return NULL;
1628
+ }
1629
+ emitStateSP = emitStateStack;
1630
+ op = t->op;
1631
+ JS_ASSERT(op < REOP_LIMIT);
1632
+
1633
+ for (;;) {
1634
+ *pc++ = op;
1635
+ switch (op) {
1636
+ case REOP_EMPTY:
1637
+ --pc;
1638
+ break;
1639
+
1640
+ case REOP_ALTPREREQ2:
1641
+ case REOP_ALTPREREQ:
1642
+ JS_ASSERT(emitStateSP);
1643
+ emitStateSP->altHead = pc - 1;
1644
+ emitStateSP->endTermFixup = pc;
1645
+ pc += OFFSET_LEN;
1646
+ SET_ARG(pc, t->u.altprereq.ch1);
1647
+ pc += ARG_LEN;
1648
+ SET_ARG(pc, t->u.altprereq.ch2);
1649
+ pc += ARG_LEN;
1650
+
1651
+ emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1652
+ pc += OFFSET_LEN;
1653
+
1654
+ emitStateSP->continueNode = t;
1655
+ emitStateSP->continueOp = REOP_JUMP;
1656
+ emitStateSP->jumpToJumpFlag = JS_FALSE;
1657
+ ++emitStateSP;
1658
+ JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1659
+ t = (RENode *) t->kid;
1660
+ op = t->op;
1661
+ JS_ASSERT(op < REOP_LIMIT);
1662
+ continue;
1663
+
1664
+ case REOP_JUMP:
1665
+ emitStateSP->nextTermFixup = pc; /* offset to following term */
1666
+ pc += OFFSET_LEN;
1667
+ if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
1668
+ goto jump_too_big;
1669
+ emitStateSP->continueOp = REOP_ENDALT;
1670
+ ++emitStateSP;
1671
+ JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1672
+ t = (RENode *) t->u.kid2;
1673
+ op = t->op;
1674
+ JS_ASSERT(op < REOP_LIMIT);
1675
+ continue;
1676
+
1677
+ case REOP_ENDALT:
1678
+ /*
1679
+ * If we already patched emitStateSP->nextTermFixup to jump to
1680
+ * a nearer jump, to avoid 16-bit immediate offset overflow, we
1681
+ * are done here.
1682
+ */
1683
+ if (emitStateSP->jumpToJumpFlag)
1684
+ break;
1685
+
1686
+ /*
1687
+ * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
1688
+ * REOP_ENDALT is executed only on successful match of the last
1689
+ * alternate in a group.
1690
+ */
1691
+ if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1692
+ goto jump_too_big;
1693
+ if (t->op != REOP_ALT) {
1694
+ if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
1695
+ goto jump_too_big;
1696
+ }
1697
+
1698
+ /*
1699
+ * If the program is bigger than the REOP_JUMP offset range, then
1700
+ * we must check for alternates before this one that are part of
1701
+ * the same group, and fix up their jump offsets to target jumps
1702
+ * close enough to fit in a 16-bit unsigned offset immediate.
1703
+ */
1704
+ if ((size_t)(pc - re->program) > OFFSET_MAX &&
1705
+ emitStateSP > emitStateStack) {
1706
+ EmitStateStackEntry *esp, *esp2;
1707
+ jsbytecode *alt, *jump;
1708
+ ptrdiff_t span, header;
1709
+
1710
+ esp2 = emitStateSP;
1711
+ alt = esp2->altHead;
1712
+ for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
1713
+ if (esp->continueOp == REOP_ENDALT &&
1714
+ !esp->jumpToJumpFlag &&
1715
+ esp->nextTermFixup + OFFSET_LEN == alt &&
1716
+ (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
1717
+ ? esp->endTermFixup
1718
+ : esp->nextTermFixup)) > OFFSET_MAX) {
1719
+ alt = esp->altHead;
1720
+ jump = esp->nextTermFixup;
1721
+
1722
+ /*
1723
+ * The span must be 1 less than the distance from
1724
+ * jump offset to jump offset, so we actually jump
1725
+ * to a REOP_JUMP bytecode, not to its offset!
1726
+ */
1727
+ for (;;) {
1728
+ JS_ASSERT(jump < esp2->nextTermFixup);
1729
+ span = esp2->nextTermFixup - jump - 1;
1730
+ if ((size_t)span <= OFFSET_MAX)
1731
+ break;
1732
+ do {
1733
+ if (--esp2 == esp)
1734
+ goto jump_too_big;
1735
+ } while (esp2->continueOp != REOP_ENDALT);
1736
+ }
1737
+
1738
+ jump[0] = JUMP_OFFSET_HI(span);
1739
+ jump[1] = JUMP_OFFSET_LO(span);
1740
+
1741
+ if (esp->continueNode->op != REOP_ALT) {
1742
+ /*
1743
+ * We must patch the offset at esp->endTermFixup
1744
+ * as well, for the REOP_ALTPREREQ{,2} opcodes.
1745
+ * If we're unlucky and endTermFixup is more than
1746
+ * OFFSET_MAX bytes from its target, we cheat by
1747
+ * jumping 6 bytes to the jump whose offset is at
1748
+ * esp->nextTermFixup, which has the same target.
1749
+ */
1750
+ jump = esp->endTermFixup;
1751
+ header = esp->nextTermFixup - jump;
1752
+ span += header;
1753
+ if ((size_t)span > OFFSET_MAX)
1754
+ span = header;
1755
+
1756
+ jump[0] = JUMP_OFFSET_HI(span);
1757
+ jump[1] = JUMP_OFFSET_LO(span);
1758
+ }
1759
+
1760
+ esp->jumpToJumpFlag = JS_TRUE;
1761
+ }
1762
+ }
1763
+ }
1764
+ break;
1765
+
1766
+ case REOP_ALT:
1767
+ JS_ASSERT(emitStateSP);
1768
+ emitStateSP->altHead = pc - 1;
1769
+ emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1770
+ pc += OFFSET_LEN;
1771
+ emitStateSP->continueNode = t;
1772
+ emitStateSP->continueOp = REOP_JUMP;
1773
+ emitStateSP->jumpToJumpFlag = JS_FALSE;
1774
+ ++emitStateSP;
1775
+ JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1776
+ t = (RENode *) t->kid;
1777
+ op = t->op;
1778
+ JS_ASSERT(op < REOP_LIMIT);
1779
+ continue;
1780
+
1781
+ case REOP_FLAT:
1782
+ /*
1783
+ * Coalesce FLATs if possible and if it would not increase bytecode
1784
+ * beyond preallocated limit. The latter happens only when bytecode
1785
+ * size for coalesced string with offset p and length 2 exceeds 6
1786
+ * bytes preallocated for 2 single char nodes, i.e. when
1787
+ * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
1788
+ * GetCompactIndexWidth(p) > 4.
1789
+ * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
1790
+ * nodes strictly decreases bytecode size, the check has to be
1791
+ * done only for the first coalescing.
1792
+ */
1793
+ if (t->kid &&
1794
+ GetCompactIndexWidth((jschar *)t->kid - state->cpbegin) <= 4)
1795
+ {
1796
+ while (t->next &&
1797
+ t->next->op == REOP_FLAT &&
1798
+ (jschar*)t->kid + t->u.flat.length ==
1799
+ (jschar*)t->next->kid) {
1800
+ t->u.flat.length += t->next->u.flat.length;
1801
+ t->next = t->next->next;
1802
+ }
1803
+ }
1804
+ if (t->kid && t->u.flat.length > 1) {
1805
+ pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
1806
+ pc = WriteCompactIndex(pc, (jschar *)t->kid - state->cpbegin);
1807
+ pc = WriteCompactIndex(pc, t->u.flat.length);
1808
+ } else if (t->u.flat.chr < 256) {
1809
+ pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
1810
+ *pc++ = (jsbytecode) t->u.flat.chr;
1811
+ } else {
1812
+ pc[-1] = (state->flags & JSREG_FOLD)
1813
+ ? REOP_UCFLAT1i
1814
+ : REOP_UCFLAT1;
1815
+ SET_ARG(pc, t->u.flat.chr);
1816
+ pc += ARG_LEN;
1817
+ }
1818
+ break;
1819
+
1820
+ case REOP_LPAREN:
1821
+ JS_ASSERT(emitStateSP);
1822
+ pc = WriteCompactIndex(pc, t->u.parenIndex);
1823
+ emitStateSP->continueNode = t;
1824
+ emitStateSP->continueOp = REOP_RPAREN;
1825
+ ++emitStateSP;
1826
+ JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1827
+ t = (RENode *) t->kid;
1828
+ op = t->op;
1829
+ continue;
1830
+
1831
+ case REOP_RPAREN:
1832
+ pc = WriteCompactIndex(pc, t->u.parenIndex);
1833
+ break;
1834
+
1835
+ case REOP_BACKREF:
1836
+ pc = WriteCompactIndex(pc, t->u.parenIndex);
1837
+ break;
1838
+
1839
+ case REOP_ASSERT:
1840
+ JS_ASSERT(emitStateSP);
1841
+ emitStateSP->nextTermFixup = pc;
1842
+ pc += OFFSET_LEN;
1843
+ emitStateSP->continueNode = t;
1844
+ emitStateSP->continueOp = REOP_ASSERTTEST;
1845
+ ++emitStateSP;
1846
+ JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1847
+ t = (RENode *) t->kid;
1848
+ op = t->op;
1849
+ continue;
1850
+
1851
+ case REOP_ASSERTTEST:
1852
+ case REOP_ASSERTNOTTEST:
1853
+ if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1854
+ goto jump_too_big;
1855
+ break;
1856
+
1857
+ case REOP_ASSERT_NOT:
1858
+ JS_ASSERT(emitStateSP);
1859
+ emitStateSP->nextTermFixup = pc;
1860
+ pc += OFFSET_LEN;
1861
+ emitStateSP->continueNode = t;
1862
+ emitStateSP->continueOp = REOP_ASSERTNOTTEST;
1863
+ ++emitStateSP;
1864
+ JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1865
+ t = (RENode *) t->kid;
1866
+ op = t->op;
1867
+ continue;
1868
+
1869
+ case REOP_QUANT:
1870
+ JS_ASSERT(emitStateSP);
1871
+ if (t->u.range.min == 0 && t->u.range.max == (uintN)-1) {
1872
+ pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
1873
+ } else if (t->u.range.min == 0 && t->u.range.max == 1) {
1874
+ pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
1875
+ } else if (t->u.range.min == 1 && t->u.range.max == (uintN) -1) {
1876
+ pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
1877
+ } else {
1878
+ if (!t->u.range.greedy)
1879
+ pc[-1] = REOP_MINIMALQUANT;
1880
+ pc = WriteCompactIndex(pc, t->u.range.min);
1881
+ /*
1882
+ * Write max + 1 to avoid using size_t(max) + 1 bytes
1883
+ * for (uintN)-1 sentinel.
1884
+ */
1885
+ pc = WriteCompactIndex(pc, t->u.range.max + 1);
1886
+ }
1887
+ emitStateSP->nextTermFixup = pc;
1888
+ pc += OFFSET_LEN;
1889
+ emitStateSP->continueNode = t;
1890
+ emitStateSP->continueOp = REOP_ENDCHILD;
1891
+ ++emitStateSP;
1892
+ JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1893
+ t = (RENode *) t->kid;
1894
+ op = t->op;
1895
+ continue;
1896
+
1897
+ case REOP_ENDCHILD:
1898
+ if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1899
+ goto jump_too_big;
1900
+ break;
1901
+
1902
+ case REOP_CLASS:
1903
+ if (!t->u.ucclass.sense)
1904
+ pc[-1] = REOP_NCLASS;
1905
+ pc = WriteCompactIndex(pc, t->u.ucclass.index);
1906
+ charSet = &re->classList[t->u.ucclass.index];
1907
+ charSet->converted = JS_FALSE;
1908
+ charSet->length = t->u.ucclass.bmsize;
1909
+ charSet->u.src.startIndex = t->u.ucclass.startIndex;
1910
+ charSet->u.src.length = t->u.ucclass.kidlen;
1911
+ charSet->sense = t->u.ucclass.sense;
1912
+ break;
1913
+
1914
+ default:
1915
+ break;
1916
+ }
1917
+
1918
+ t = t->next;
1919
+ if (t) {
1920
+ op = t->op;
1921
+ } else {
1922
+ if (emitStateSP == emitStateStack)
1923
+ break;
1924
+ --emitStateSP;
1925
+ t = emitStateSP->continueNode;
1926
+ op = (REOp) emitStateSP->continueOp;
1927
+ }
1928
+ }
1929
+
1930
+ cleanup:
1931
+ if (emitStateStack)
1932
+ JS_free(state->context, emitStateStack);
1933
+ return pc;
1934
+
1935
+ jump_too_big:
1936
+ ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1937
+ pc = NULL;
1938
+ goto cleanup;
1939
+ }
1940
+
1941
+
1942
+ JSRegExp *
1943
+ js_NewRegExp(JSContext *cx, JSTokenStream *ts,
1944
+ JSString *str, uintN flags, JSBool flat)
1945
+ {
1946
+ JSRegExp *re;
1947
+ void *mark;
1948
+ CompilerState state;
1949
+ size_t resize;
1950
+ jsbytecode *endPC;
1951
+ uintN i;
1952
+ size_t len;
1953
+
1954
+ re = NULL;
1955
+ mark = JS_ARENA_MARK(&cx->tempPool);
1956
+ len = JSSTRING_LENGTH(str);
1957
+
1958
+ state.context = cx;
1959
+ state.tokenStream = ts;
1960
+ state.cp = js_UndependString(cx, str);
1961
+ if (!state.cp)
1962
+ goto out;
1963
+ state.cpbegin = state.cp;
1964
+ state.cpend = state.cp + len;
1965
+ state.flags = flags;
1966
+ state.parenCount = 0;
1967
+ state.classCount = 0;
1968
+ state.progLength = 0;
1969
+ state.treeDepth = 0;
1970
+ state.classBitmapsMem = 0;
1971
+ for (i = 0; i < CLASS_CACHE_SIZE; i++)
1972
+ state.classCache[i].start = NULL;
1973
+
1974
+ if (len != 0 && flat) {
1975
+ state.result = NewRENode(&state, REOP_FLAT);
1976
+ if (!state.result)
1977
+ goto out;
1978
+ state.result->u.flat.chr = *state.cpbegin;
1979
+ state.result->u.flat.length = len;
1980
+ state.result->kid = (void *) state.cpbegin;
1981
+ /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
1982
+ state.progLength += 1 + GetCompactIndexWidth(0)
1983
+ + GetCompactIndexWidth(len);
1984
+ } else {
1985
+ if (!ParseRegExp(&state))
1986
+ goto out;
1987
+ }
1988
+ resize = offsetof(JSRegExp, program) + state.progLength + 1;
1989
+ re = (JSRegExp *) JS_malloc(cx, resize);
1990
+ if (!re)
1991
+ goto out;
1992
+
1993
+ re->nrefs = 1;
1994
+ JS_ASSERT(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
1995
+ re->classCount = state.classCount;
1996
+ if (re->classCount) {
1997
+ re->classList = (RECharSet *)
1998
+ JS_malloc(cx, re->classCount * sizeof(RECharSet));
1999
+ if (!re->classList) {
2000
+ js_DestroyRegExp(cx, re);
2001
+ re = NULL;
2002
+ goto out;
2003
+ }
2004
+ for (i = 0; i < re->classCount; i++)
2005
+ re->classList[i].converted = JS_FALSE;
2006
+ } else {
2007
+ re->classList = NULL;
2008
+ }
2009
+ endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
2010
+ if (!endPC) {
2011
+ js_DestroyRegExp(cx, re);
2012
+ re = NULL;
2013
+ goto out;
2014
+ }
2015
+ *endPC++ = REOP_END;
2016
+ /*
2017
+ * Check whether size was overestimated and shrink using realloc.
2018
+ * This is safe since no pointers to newly parsed regexp or its parts
2019
+ * besides re exist here.
2020
+ */
2021
+ if ((size_t)(endPC - re->program) != state.progLength + 1) {
2022
+ JSRegExp *tmp;
2023
+ JS_ASSERT((size_t)(endPC - re->program) < state.progLength + 1);
2024
+ resize = offsetof(JSRegExp, program) + (endPC - re->program);
2025
+ tmp = (JSRegExp *) JS_realloc(cx, re, resize);
2026
+ if (tmp)
2027
+ re = tmp;
2028
+ }
2029
+
2030
+ re->flags = flags;
2031
+ re->parenCount = state.parenCount;
2032
+ re->source = str;
2033
+
2034
+ out:
2035
+ JS_ARENA_RELEASE(&cx->tempPool, mark);
2036
+ return re;
2037
+ }
2038
+
2039
+ JSRegExp *
2040
+ js_NewRegExpOpt(JSContext *cx, JSString *str, JSString *opt, JSBool flat)
2041
+ {
2042
+ uintN flags;
2043
+ jschar *s;
2044
+ size_t i, n;
2045
+ char charBuf[2];
2046
+
2047
+ flags = 0;
2048
+ if (opt) {
2049
+ JSSTRING_CHARS_AND_LENGTH(opt, s, n);
2050
+ for (i = 0; i < n; i++) {
2051
+ switch (s[i]) {
2052
+ case 'g':
2053
+ flags |= JSREG_GLOB;
2054
+ break;
2055
+ case 'i':
2056
+ flags |= JSREG_FOLD;
2057
+ break;
2058
+ case 'm':
2059
+ flags |= JSREG_MULTILINE;
2060
+ break;
2061
+ case 'y':
2062
+ flags |= JSREG_STICKY;
2063
+ break;
2064
+ default:
2065
+ charBuf[0] = (char)s[i];
2066
+ charBuf[1] = '\0';
2067
+ JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR,
2068
+ js_GetErrorMessage, NULL,
2069
+ JSMSG_BAD_FLAG, charBuf);
2070
+ return NULL;
2071
+ }
2072
+ }
2073
+ }
2074
+ return js_NewRegExp(cx, NULL, str, flags, flat);
2075
+ }
2076
+
2077
+ /*
2078
+ * Save the current state of the match - the position in the input
2079
+ * text as well as the position in the bytecode. The state of any
2080
+ * parent expressions is also saved (preceding state).
2081
+ * Contents of parenCount parentheses from parenIndex are also saved.
2082
+ */
2083
+ static REBackTrackData *
2084
+ PushBackTrackState(REGlobalData *gData, REOp op,
2085
+ jsbytecode *target, REMatchState *x, const jschar *cp,
2086
+ size_t parenIndex, size_t parenCount)
2087
+ {
2088
+ size_t i;
2089
+ REBackTrackData *result =
2090
+ (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
2091
+
2092
+ size_t sz = sizeof(REBackTrackData) +
2093
+ gData->stateStackTop * sizeof(REProgState) +
2094
+ parenCount * sizeof(RECapture);
2095
+
2096
+ ptrdiff_t btsize = gData->backTrackStackSize;
2097
+ ptrdiff_t btincr = ((char *)result + sz) -
2098
+ ((char *)gData->backTrackStack + btsize);
2099
+
2100
+ re_debug("\tBT_Push: %lu,%lu",
2101
+ (unsigned long) parenIndex, (unsigned long) parenCount);
2102
+
2103
+ JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
2104
+ if (btincr > 0) {
2105
+ ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
2106
+
2107
+ JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
2108
+ btincr = JS_ROUNDUP(btincr, btsize);
2109
+ JS_ARENA_GROW_CAST(gData->backTrackStack, REBackTrackData *,
2110
+ &gData->pool, btsize, btincr);
2111
+ if (!gData->backTrackStack) {
2112
+ js_ReportOutOfScriptQuota(gData->cx);
2113
+ gData->ok = JS_FALSE;
2114
+ return NULL;
2115
+ }
2116
+ gData->backTrackStackSize = btsize + btincr;
2117
+ result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
2118
+ }
2119
+ gData->backTrackSP = result;
2120
+ result->sz = gData->cursz;
2121
+ gData->cursz = sz;
2122
+
2123
+ result->backtrack_op = op;
2124
+ result->backtrack_pc = target;
2125
+ result->cp = cp;
2126
+ result->parenCount = parenCount;
2127
+ result->parenIndex = parenIndex;
2128
+
2129
+ result->saveStateStackTop = gData->stateStackTop;
2130
+ JS_ASSERT(gData->stateStackTop);
2131
+ memcpy(result + 1, gData->stateStack,
2132
+ sizeof(REProgState) * result->saveStateStackTop);
2133
+
2134
+ if (parenCount != 0) {
2135
+ memcpy((char *)(result + 1) +
2136
+ sizeof(REProgState) * result->saveStateStackTop,
2137
+ &x->parens[parenIndex],
2138
+ sizeof(RECapture) * parenCount);
2139
+ for (i = 0; i != parenCount; i++)
2140
+ x->parens[parenIndex + i].index = -1;
2141
+ }
2142
+
2143
+ return result;
2144
+ }
2145
+
2146
+
2147
+ /*
2148
+ * Consecutive literal characters.
2149
+ */
2150
+ #if 0
2151
+ static REMatchState *
2152
+ FlatNMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2153
+ size_t length)
2154
+ {
2155
+ size_t i;
2156
+ if (length > gData->cpend - x->cp)
2157
+ return NULL;
2158
+ for (i = 0; i != length; i++) {
2159
+ if (matchChars[i] != x->cp[i])
2160
+ return NULL;
2161
+ }
2162
+ x->cp += length;
2163
+ return x;
2164
+ }
2165
+ #endif
2166
+
2167
+ static JS_INLINE REMatchState *
2168
+ FlatNIMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2169
+ size_t length)
2170
+ {
2171
+ size_t i;
2172
+ JS_ASSERT(gData->cpend >= x->cp);
2173
+ if (length > (size_t)(gData->cpend - x->cp))
2174
+ return NULL;
2175
+ for (i = 0; i != length; i++) {
2176
+ if (upcase(matchChars[i]) != upcase(x->cp[i]))
2177
+ return NULL;
2178
+ }
2179
+ x->cp += length;
2180
+ return x;
2181
+ }
2182
+
2183
+ /*
2184
+ * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2185
+ * 2. If E is not a character then go to step 6.
2186
+ * 3. Let ch be E's character.
2187
+ * 4. Let A be a one-element RECharSet containing the character ch.
2188
+ * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2189
+ * 6. E must be an integer. Let n be that integer.
2190
+ * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2191
+ * 8. Return an internal Matcher closure that takes two arguments, a State x
2192
+ * and a Continuation c, and performs the following:
2193
+ * 1. Let cap be x's captures internal array.
2194
+ * 2. Let s be cap[n].
2195
+ * 3. If s is undefined, then call c(x) and return its result.
2196
+ * 4. Let e be x's endIndex.
2197
+ * 5. Let len be s's length.
2198
+ * 6. Let f be e+len.
2199
+ * 7. If f>InputLength, return failure.
2200
+ * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2201
+ * such that Canonicalize(s[i]) is not the same character as
2202
+ * Canonicalize(Input [e+i]), then return failure.
2203
+ * 9. Let y be the State (f, cap).
2204
+ * 10. Call c(y) and return its result.
2205
+ */
2206
+ static REMatchState *
2207
+ BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
2208
+ {
2209
+ size_t len, i;
2210
+ const jschar *parenContent;
2211
+ RECapture *cap = &x->parens[parenIndex];
2212
+
2213
+ if (cap->index == -1)
2214
+ return x;
2215
+
2216
+ len = cap->length;
2217
+ if (x->cp + len > gData->cpend)
2218
+ return NULL;
2219
+
2220
+ parenContent = &gData->cpbegin[cap->index];
2221
+ if (gData->regexp->flags & JSREG_FOLD) {
2222
+ for (i = 0; i < len; i++) {
2223
+ if (upcase(parenContent[i]) != upcase(x->cp[i]))
2224
+ return NULL;
2225
+ }
2226
+ } else {
2227
+ for (i = 0; i < len; i++) {
2228
+ if (parenContent[i] != x->cp[i])
2229
+ return NULL;
2230
+ }
2231
+ }
2232
+ x->cp += len;
2233
+ return x;
2234
+ }
2235
+
2236
+
2237
+ /* Add a single character to the RECharSet */
2238
+ static void
2239
+ AddCharacterToCharSet(RECharSet *cs, jschar c)
2240
+ {
2241
+ uintN byteIndex = (uintN)(c >> 3);
2242
+ JS_ASSERT(c <= cs->length);
2243
+ cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2244
+ }
2245
+
2246
+
2247
+ /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2248
+ static void
2249
+ AddCharacterRangeToCharSet(RECharSet *cs, uintN c1, uintN c2)
2250
+ {
2251
+ uintN i;
2252
+
2253
+ uintN byteIndex1 = c1 >> 3;
2254
+ uintN byteIndex2 = c2 >> 3;
2255
+
2256
+ JS_ASSERT(c2 <= cs->length && c1 <= c2);
2257
+
2258
+ c1 &= 0x7;
2259
+ c2 &= 0x7;
2260
+
2261
+ if (byteIndex1 == byteIndex2) {
2262
+ cs->u.bits[byteIndex1] |= ((uint8)0xFF >> (7 - (c2 - c1))) << c1;
2263
+ } else {
2264
+ cs->u.bits[byteIndex1] |= 0xFF << c1;
2265
+ for (i = byteIndex1 + 1; i < byteIndex2; i++)
2266
+ cs->u.bits[i] = 0xFF;
2267
+ cs->u.bits[byteIndex2] |= (uint8)0xFF >> (7 - c2);
2268
+ }
2269
+ }
2270
+
2271
+ /* Compile the source of the class into a RECharSet */
2272
+ static JSBool
2273
+ ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2274
+ {
2275
+ const jschar *src, *end;
2276
+ JSBool inRange = JS_FALSE;
2277
+ jschar rangeStart = 0;
2278
+ uintN byteLength, n;
2279
+ jschar c, thisCh;
2280
+ intN nDigits, i;
2281
+
2282
+ JS_ASSERT(!charSet->converted);
2283
+ /*
2284
+ * Assert that startIndex and length points to chars inside [] inside
2285
+ * source string.
2286
+ */
2287
+ JS_ASSERT(1 <= charSet->u.src.startIndex);
2288
+ JS_ASSERT(charSet->u.src.startIndex
2289
+ < JSSTRING_LENGTH(gData->regexp->source));
2290
+ JS_ASSERT(charSet->u.src.length <= JSSTRING_LENGTH(gData->regexp->source)
2291
+ - 1 - charSet->u.src.startIndex);
2292
+
2293
+ charSet->converted = JS_TRUE;
2294
+ src = JSSTRING_CHARS(gData->regexp->source) + charSet->u.src.startIndex;
2295
+ end = src + charSet->u.src.length;
2296
+ JS_ASSERT(src[-1] == '[');
2297
+ JS_ASSERT(end[0] == ']');
2298
+
2299
+ byteLength = (charSet->length >> 3) + 1;
2300
+ charSet->u.bits = (uint8 *)JS_malloc(gData->cx, byteLength);
2301
+ if (!charSet->u.bits) {
2302
+ JS_ReportOutOfMemory(gData->cx);
2303
+ gData->ok = JS_FALSE;
2304
+ return JS_FALSE;
2305
+ }
2306
+ memset(charSet->u.bits, 0, byteLength);
2307
+
2308
+ if (src == end)
2309
+ return JS_TRUE;
2310
+
2311
+ if (*src == '^') {
2312
+ JS_ASSERT(charSet->sense == JS_FALSE);
2313
+ ++src;
2314
+ } else {
2315
+ JS_ASSERT(charSet->sense == JS_TRUE);
2316
+ }
2317
+
2318
+ while (src != end) {
2319
+ switch (*src) {
2320
+ case '\\':
2321
+ ++src;
2322
+ c = *src++;
2323
+ switch (c) {
2324
+ case 'b':
2325
+ thisCh = 0x8;
2326
+ break;
2327
+ case 'f':
2328
+ thisCh = 0xC;
2329
+ break;
2330
+ case 'n':
2331
+ thisCh = 0xA;
2332
+ break;
2333
+ case 'r':
2334
+ thisCh = 0xD;
2335
+ break;
2336
+ case 't':
2337
+ thisCh = 0x9;
2338
+ break;
2339
+ case 'v':
2340
+ thisCh = 0xB;
2341
+ break;
2342
+ case 'c':
2343
+ if (src < end && JS_ISWORD(*src)) {
2344
+ thisCh = (jschar)(*src++ & 0x1F);
2345
+ } else {
2346
+ --src;
2347
+ thisCh = '\\';
2348
+ }
2349
+ break;
2350
+ case 'x':
2351
+ nDigits = 2;
2352
+ goto lexHex;
2353
+ case 'u':
2354
+ nDigits = 4;
2355
+ lexHex:
2356
+ n = 0;
2357
+ for (i = 0; (i < nDigits) && (src < end); i++) {
2358
+ uintN digit;
2359
+ c = *src++;
2360
+ if (!isASCIIHexDigit(c, &digit)) {
2361
+ /*
2362
+ * Back off to accepting the original '\'
2363
+ * as a literal
2364
+ */
2365
+ src -= i + 1;
2366
+ n = '\\';
2367
+ break;
2368
+ }
2369
+ n = (n << 4) | digit;
2370
+ }
2371
+ thisCh = (jschar)n;
2372
+ break;
2373
+ case '0':
2374
+ case '1':
2375
+ case '2':
2376
+ case '3':
2377
+ case '4':
2378
+ case '5':
2379
+ case '6':
2380
+ case '7':
2381
+ /*
2382
+ * This is a non-ECMA extension - decimal escapes (in this
2383
+ * case, octal!) are supposed to be an error inside class
2384
+ * ranges, but supported here for backwards compatibility.
2385
+ */
2386
+ n = JS7_UNDEC(c);
2387
+ c = *src;
2388
+ if ('0' <= c && c <= '7') {
2389
+ src++;
2390
+ n = 8 * n + JS7_UNDEC(c);
2391
+ c = *src;
2392
+ if ('0' <= c && c <= '7') {
2393
+ src++;
2394
+ i = 8 * n + JS7_UNDEC(c);
2395
+ if (i <= 0377)
2396
+ n = i;
2397
+ else
2398
+ src--;
2399
+ }
2400
+ }
2401
+ thisCh = (jschar)n;
2402
+ break;
2403
+
2404
+ case 'd':
2405
+ AddCharacterRangeToCharSet(charSet, '0', '9');
2406
+ continue; /* don't need range processing */
2407
+ case 'D':
2408
+ AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2409
+ AddCharacterRangeToCharSet(charSet,
2410
+ (jschar)('9' + 1),
2411
+ (jschar)charSet->length);
2412
+ continue;
2413
+ case 's':
2414
+ for (i = (intN)charSet->length; i >= 0; i--)
2415
+ if (JS_ISSPACE(i))
2416
+ AddCharacterToCharSet(charSet, (jschar)i);
2417
+ continue;
2418
+ case 'S':
2419
+ for (i = (intN)charSet->length; i >= 0; i--)
2420
+ if (!JS_ISSPACE(i))
2421
+ AddCharacterToCharSet(charSet, (jschar)i);
2422
+ continue;
2423
+ case 'w':
2424
+ for (i = (intN)charSet->length; i >= 0; i--)
2425
+ if (JS_ISWORD(i))
2426
+ AddCharacterToCharSet(charSet, (jschar)i);
2427
+ continue;
2428
+ case 'W':
2429
+ for (i = (intN)charSet->length; i >= 0; i--)
2430
+ if (!JS_ISWORD(i))
2431
+ AddCharacterToCharSet(charSet, (jschar)i);
2432
+ continue;
2433
+ default:
2434
+ thisCh = c;
2435
+ break;
2436
+
2437
+ }
2438
+ break;
2439
+
2440
+ default:
2441
+ thisCh = *src++;
2442
+ break;
2443
+
2444
+ }
2445
+ if (inRange) {
2446
+ if (gData->regexp->flags & JSREG_FOLD) {
2447
+ int i;
2448
+
2449
+ JS_ASSERT(rangeStart <= thisCh);
2450
+ for (i = rangeStart; i <= thisCh; i++) {
2451
+ jschar uch, dch;
2452
+
2453
+ AddCharacterToCharSet(charSet, i);
2454
+ uch = upcase(i);
2455
+ dch = downcase(i);
2456
+ if (i != uch)
2457
+ AddCharacterToCharSet(charSet, uch);
2458
+ if (i != dch)
2459
+ AddCharacterToCharSet(charSet, dch);
2460
+ }
2461
+ } else {
2462
+ AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2463
+ }
2464
+ inRange = JS_FALSE;
2465
+ } else {
2466
+ if (gData->regexp->flags & JSREG_FOLD) {
2467
+ AddCharacterToCharSet(charSet, upcase(thisCh));
2468
+ AddCharacterToCharSet(charSet, downcase(thisCh));
2469
+ } else {
2470
+ AddCharacterToCharSet(charSet, thisCh);
2471
+ }
2472
+ if (src < end - 1) {
2473
+ if (*src == '-') {
2474
+ ++src;
2475
+ inRange = JS_TRUE;
2476
+ rangeStart = thisCh;
2477
+ }
2478
+ }
2479
+ }
2480
+ }
2481
+ return JS_TRUE;
2482
+ }
2483
+
2484
+ void
2485
+ js_DestroyRegExp(JSContext *cx, JSRegExp *re)
2486
+ {
2487
+ if (JS_ATOMIC_DECREMENT(&re->nrefs) == 0) {
2488
+ if (re->classList) {
2489
+ uintN i;
2490
+ for (i = 0; i < re->classCount; i++) {
2491
+ if (re->classList[i].converted)
2492
+ JS_free(cx, re->classList[i].u.bits);
2493
+ re->classList[i].u.bits = NULL;
2494
+ }
2495
+ JS_free(cx, re->classList);
2496
+ }
2497
+ JS_free(cx, re);
2498
+ }
2499
+ }
2500
+
2501
+ static JSBool
2502
+ ReallocStateStack(REGlobalData *gData)
2503
+ {
2504
+ size_t limit = gData->stateStackLimit;
2505
+ size_t sz = sizeof(REProgState) * limit;
2506
+
2507
+ JS_ARENA_GROW_CAST(gData->stateStack, REProgState *, &gData->pool, sz, sz);
2508
+ if (!gData->stateStack) {
2509
+ js_ReportOutOfScriptQuota(gData->cx);
2510
+ gData->ok = JS_FALSE;
2511
+ return JS_FALSE;
2512
+ }
2513
+ gData->stateStackLimit = limit + limit;
2514
+ return JS_TRUE;
2515
+ }
2516
+
2517
+ #define PUSH_STATE_STACK(data) \
2518
+ JS_BEGIN_MACRO \
2519
+ ++(data)->stateStackTop; \
2520
+ if ((data)->stateStackTop == (data)->stateStackLimit && \
2521
+ !ReallocStateStack((data))) { \
2522
+ return NULL; \
2523
+ } \
2524
+ JS_END_MACRO
2525
+
2526
+ /*
2527
+ * Apply the current op against the given input to see if it's going to match
2528
+ * or fail. Return false if we don't get a match, true if we do. If updatecp is
2529
+ * true, then update the current state's cp. Always update startpc to the next
2530
+ * op.
2531
+ */
2532
+ static JS_INLINE REMatchState *
2533
+ SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
2534
+ jsbytecode **startpc, JSBool updatecp)
2535
+ {
2536
+ REMatchState *result = NULL;
2537
+ jschar matchCh;
2538
+ size_t parenIndex;
2539
+ size_t offset, length, index;
2540
+ jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2541
+ jschar *source;
2542
+ const jschar *startcp = x->cp;
2543
+ jschar ch;
2544
+ RECharSet *charSet;
2545
+
2546
+ #ifdef REGEXP_DEBUG
2547
+ const char *opname = reop_names[op];
2548
+ re_debug("\n%06d: %*s%s", pc - gData->regexp->program,
2549
+ gData->stateStackTop * 2, "", opname);
2550
+ #endif
2551
+ switch (op) {
2552
+ case REOP_EMPTY:
2553
+ result = x;
2554
+ break;
2555
+ case REOP_BOL:
2556
+ if (x->cp != gData->cpbegin) {
2557
+ if (!gData->cx->regExpStatics.multiline &&
2558
+ !(gData->regexp->flags & JSREG_MULTILINE)) {
2559
+ break;
2560
+ }
2561
+ if (!RE_IS_LINE_TERM(x->cp[-1]))
2562
+ break;
2563
+ }
2564
+ result = x;
2565
+ break;
2566
+ case REOP_EOL:
2567
+ if (x->cp != gData->cpend) {
2568
+ if (!gData->cx->regExpStatics.multiline &&
2569
+ !(gData->regexp->flags & JSREG_MULTILINE)) {
2570
+ break;
2571
+ }
2572
+ if (!RE_IS_LINE_TERM(*x->cp))
2573
+ break;
2574
+ }
2575
+ result = x;
2576
+ break;
2577
+ case REOP_WBDRY:
2578
+ if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2579
+ !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2580
+ result = x;
2581
+ }
2582
+ break;
2583
+ case REOP_WNONBDRY:
2584
+ if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2585
+ (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2586
+ result = x;
2587
+ }
2588
+ break;
2589
+ case REOP_DOT:
2590
+ if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2591
+ result = x;
2592
+ result->cp++;
2593
+ }
2594
+ break;
2595
+ case REOP_DIGIT:
2596
+ if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2597
+ result = x;
2598
+ result->cp++;
2599
+ }
2600
+ break;
2601
+ case REOP_NONDIGIT:
2602
+ if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2603
+ result = x;
2604
+ result->cp++;
2605
+ }
2606
+ break;
2607
+ case REOP_ALNUM:
2608
+ if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2609
+ result = x;
2610
+ result->cp++;
2611
+ }
2612
+ break;
2613
+ case REOP_NONALNUM:
2614
+ if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2615
+ result = x;
2616
+ result->cp++;
2617
+ }
2618
+ break;
2619
+ case REOP_SPACE:
2620
+ if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) {
2621
+ result = x;
2622
+ result->cp++;
2623
+ }
2624
+ break;
2625
+ case REOP_NONSPACE:
2626
+ if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) {
2627
+ result = x;
2628
+ result->cp++;
2629
+ }
2630
+ break;
2631
+ case REOP_BACKREF:
2632
+ pc = ReadCompactIndex(pc, &parenIndex);
2633
+ JS_ASSERT(parenIndex < gData->regexp->parenCount);
2634
+ result = BackrefMatcher(gData, x, parenIndex);
2635
+ break;
2636
+ case REOP_FLAT:
2637
+ pc = ReadCompactIndex(pc, &offset);
2638
+ JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
2639
+ pc = ReadCompactIndex(pc, &length);
2640
+ JS_ASSERT(1 <= length);
2641
+ JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
2642
+ if (length <= (size_t)(gData->cpend - x->cp)) {
2643
+ source = JSSTRING_CHARS(gData->regexp->source) + offset;
2644
+ re_debug_chars(source, length);
2645
+ for (index = 0; index != length; index++) {
2646
+ if (source[index] != x->cp[index])
2647
+ return NULL;
2648
+ }
2649
+ x->cp += length;
2650
+ result = x;
2651
+ }
2652
+ break;
2653
+ case REOP_FLAT1:
2654
+ matchCh = *pc++;
2655
+ re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp);
2656
+ if (x->cp != gData->cpend && *x->cp == matchCh) {
2657
+ result = x;
2658
+ result->cp++;
2659
+ }
2660
+ break;
2661
+ case REOP_FLATi:
2662
+ pc = ReadCompactIndex(pc, &offset);
2663
+ JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
2664
+ pc = ReadCompactIndex(pc, &length);
2665
+ JS_ASSERT(1 <= length);
2666
+ JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
2667
+ source = JSSTRING_CHARS(gData->regexp->source);
2668
+ result = FlatNIMatcher(gData, x, source + offset, length);
2669
+ break;
2670
+ case REOP_FLAT1i:
2671
+ matchCh = *pc++;
2672
+ if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2673
+ result = x;
2674
+ result->cp++;
2675
+ }
2676
+ break;
2677
+ case REOP_UCFLAT1:
2678
+ matchCh = GET_ARG(pc);
2679
+ re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp);
2680
+ pc += ARG_LEN;
2681
+ if (x->cp != gData->cpend && *x->cp == matchCh) {
2682
+ result = x;
2683
+ result->cp++;
2684
+ }
2685
+ break;
2686
+ case REOP_UCFLAT1i:
2687
+ matchCh = GET_ARG(pc);
2688
+ pc += ARG_LEN;
2689
+ if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2690
+ result = x;
2691
+ result->cp++;
2692
+ }
2693
+ break;
2694
+ case REOP_CLASS:
2695
+ pc = ReadCompactIndex(pc, &index);
2696
+ JS_ASSERT(index < gData->regexp->classCount);
2697
+ if (x->cp != gData->cpend) {
2698
+ charSet = &gData->regexp->classList[index];
2699
+ JS_ASSERT(charSet->converted);
2700
+ ch = *x->cp;
2701
+ index = ch >> 3;
2702
+ if (charSet->length != 0 &&
2703
+ ch <= charSet->length &&
2704
+ (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2705
+ result = x;
2706
+ result->cp++;
2707
+ }
2708
+ }
2709
+ break;
2710
+ case REOP_NCLASS:
2711
+ pc = ReadCompactIndex(pc, &index);
2712
+ JS_ASSERT(index < gData->regexp->classCount);
2713
+ if (x->cp != gData->cpend) {
2714
+ charSet = &gData->regexp->classList[index];
2715
+ JS_ASSERT(charSet->converted);
2716
+ ch = *x->cp;
2717
+ index = ch >> 3;
2718
+ if (charSet->length == 0 ||
2719
+ ch > charSet->length ||
2720
+ !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2721
+ result = x;
2722
+ result->cp++;
2723
+ }
2724
+ }
2725
+ break;
2726
+
2727
+ default:
2728
+ JS_ASSERT(JS_FALSE);
2729
+ }
2730
+ if (result) {
2731
+ if (!updatecp)
2732
+ x->cp = startcp;
2733
+ *startpc = pc;
2734
+ re_debug(" * ");
2735
+ return result;
2736
+ }
2737
+ x->cp = startcp;
2738
+ return NULL;
2739
+ }
2740
+
2741
+ static JS_INLINE REMatchState *
2742
+ ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
2743
+ {
2744
+ REMatchState *result = NULL;
2745
+ REBackTrackData *backTrackData;
2746
+ jsbytecode *nextpc, *testpc;
2747
+ REOp nextop;
2748
+ RECapture *cap;
2749
+ REProgState *curState;
2750
+ const jschar *startcp;
2751
+ size_t parenIndex, k;
2752
+ size_t parenSoFar = 0;
2753
+
2754
+ jschar matchCh1, matchCh2;
2755
+ RECharSet *charSet;
2756
+
2757
+ JSBool anchor;
2758
+ jsbytecode *pc = gData->regexp->program;
2759
+ REOp op = (REOp) *pc++;
2760
+
2761
+ /*
2762
+ * If the first node is a simple match, step the index into the string
2763
+ * until that match is made, or fail if it can't be found at all.
2764
+ */
2765
+ if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) {
2766
+ anchor = JS_FALSE;
2767
+ while (x->cp <= gData->cpend) {
2768
+ nextpc = pc; /* reset back to start each time */
2769
+ result = SimpleMatch(gData, x, op, &nextpc, JS_TRUE);
2770
+ if (result) {
2771
+ anchor = JS_TRUE;
2772
+ x = result;
2773
+ pc = nextpc; /* accept skip to next opcode */
2774
+ op = (REOp) *pc++;
2775
+ JS_ASSERT(op < REOP_LIMIT);
2776
+ break;
2777
+ }
2778
+ gData->skipped++;
2779
+ x->cp++;
2780
+ }
2781
+ if (!anchor)
2782
+ goto bad;
2783
+ }
2784
+
2785
+ for (;;) {
2786
+ #ifdef REGEXP_DEBUG
2787
+ const char *opname = reop_names[op];
2788
+ re_debug("\n%06d: %*s%s", pc - gData->regexp->program,
2789
+ gData->stateStackTop * 2, "", opname);
2790
+ #endif
2791
+ if (REOP_IS_SIMPLE(op)) {
2792
+ result = SimpleMatch(gData, x, op, &pc, JS_TRUE);
2793
+ } else {
2794
+ curState = &gData->stateStack[gData->stateStackTop];
2795
+ switch (op) {
2796
+ case REOP_END:
2797
+ goto good;
2798
+ case REOP_ALTPREREQ2:
2799
+ nextpc = pc + GET_OFFSET(pc); /* start of next op */
2800
+ pc += ARG_LEN;
2801
+ matchCh2 = GET_ARG(pc);
2802
+ pc += ARG_LEN;
2803
+ k = GET_ARG(pc);
2804
+ pc += ARG_LEN;
2805
+
2806
+ if (x->cp != gData->cpend) {
2807
+ if (*x->cp == matchCh2)
2808
+ goto doAlt;
2809
+
2810
+ charSet = &gData->regexp->classList[k];
2811
+ if (!charSet->converted && !ProcessCharSet(gData, charSet))
2812
+ goto bad;
2813
+ matchCh1 = *x->cp;
2814
+ k = matchCh1 >> 3;
2815
+ if ((charSet->length == 0 ||
2816
+ matchCh1 > charSet->length ||
2817
+ !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2818
+ charSet->sense) {
2819
+ goto doAlt;
2820
+ }
2821
+ }
2822
+ result = NULL;
2823
+ break;
2824
+
2825
+ case REOP_ALTPREREQ:
2826
+ nextpc = pc + GET_OFFSET(pc); /* start of next op */
2827
+ pc += ARG_LEN;
2828
+ matchCh1 = GET_ARG(pc);
2829
+ pc += ARG_LEN;
2830
+ matchCh2 = GET_ARG(pc);
2831
+ pc += ARG_LEN;
2832
+ if (x->cp == gData->cpend ||
2833
+ (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2834
+ result = NULL;
2835
+ break;
2836
+ }
2837
+ /* else false thru... */
2838
+
2839
+ case REOP_ALT:
2840
+ doAlt:
2841
+ nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2842
+ pc += ARG_LEN; /* start of this alternate */
2843
+ curState->parenSoFar = parenSoFar;
2844
+ PUSH_STATE_STACK(gData);
2845
+ op = (REOp) *pc++;
2846
+ startcp = x->cp;
2847
+ if (REOP_IS_SIMPLE(op)) {
2848
+ if (!SimpleMatch(gData, x, op, &pc, JS_TRUE)) {
2849
+ op = (REOp) *nextpc++;
2850
+ pc = nextpc;
2851
+ continue;
2852
+ }
2853
+ result = x;
2854
+ op = (REOp) *pc++;
2855
+ }
2856
+ nextop = (REOp) *nextpc++;
2857
+ if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2858
+ goto bad;
2859
+ continue;
2860
+
2861
+ /*
2862
+ * Occurs at (successful) end of REOP_ALT,
2863
+ */
2864
+ case REOP_JUMP:
2865
+ /*
2866
+ * If we have not gotten a result here, it is because of an
2867
+ * empty match. Do the same thing REOP_EMPTY would do.
2868
+ */
2869
+ if (!result)
2870
+ result = x;
2871
+
2872
+ --gData->stateStackTop;
2873
+ pc += GET_OFFSET(pc);
2874
+ op = (REOp) *pc++;
2875
+ continue;
2876
+
2877
+ /*
2878
+ * Occurs at last (successful) end of REOP_ALT,
2879
+ */
2880
+ case REOP_ENDALT:
2881
+ /*
2882
+ * If we have not gotten a result here, it is because of an
2883
+ * empty match. Do the same thing REOP_EMPTY would do.
2884
+ */
2885
+ if (!result)
2886
+ result = x;
2887
+
2888
+ --gData->stateStackTop;
2889
+ op = (REOp) *pc++;
2890
+ continue;
2891
+
2892
+ case REOP_LPAREN:
2893
+ pc = ReadCompactIndex(pc, &parenIndex);
2894
+ re_debug("[ %lu ]", (unsigned long) parenIndex);
2895
+ JS_ASSERT(parenIndex < gData->regexp->parenCount);
2896
+ if (parenIndex + 1 > parenSoFar)
2897
+ parenSoFar = parenIndex + 1;
2898
+ x->parens[parenIndex].index = x->cp - gData->cpbegin;
2899
+ x->parens[parenIndex].length = 0;
2900
+ op = (REOp) *pc++;
2901
+ continue;
2902
+
2903
+ case REOP_RPAREN:
2904
+ {
2905
+ ptrdiff_t delta;
2906
+
2907
+ pc = ReadCompactIndex(pc, &parenIndex);
2908
+ JS_ASSERT(parenIndex < gData->regexp->parenCount);
2909
+ cap = &x->parens[parenIndex];
2910
+ delta = x->cp - (gData->cpbegin + cap->index);
2911
+ cap->length = (delta < 0) ? 0 : (size_t) delta;
2912
+ op = (REOp) *pc++;
2913
+
2914
+ if (!result)
2915
+ result = x;
2916
+ continue;
2917
+ }
2918
+ case REOP_ASSERT:
2919
+ nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2920
+ pc += ARG_LEN; /* start of ASSERT child */
2921
+ op = (REOp) *pc++;
2922
+ testpc = pc;
2923
+ if (REOP_IS_SIMPLE(op) &&
2924
+ !SimpleMatch(gData, x, op, &testpc, JS_FALSE)) {
2925
+ result = NULL;
2926
+ break;
2927
+ }
2928
+ curState->u.assertion.top =
2929
+ (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2930
+ curState->u.assertion.sz = gData->cursz;
2931
+ curState->index = x->cp - gData->cpbegin;
2932
+ curState->parenSoFar = parenSoFar;
2933
+ PUSH_STATE_STACK(gData);
2934
+ if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2935
+ nextpc, x, x->cp, 0, 0)) {
2936
+ goto bad;
2937
+ }
2938
+ continue;
2939
+
2940
+ case REOP_ASSERT_NOT:
2941
+ nextpc = pc + GET_OFFSET(pc);
2942
+ pc += ARG_LEN;
2943
+ op = (REOp) *pc++;
2944
+ testpc = pc;
2945
+ if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2946
+ SimpleMatch(gData, x, op, &testpc, JS_FALSE) &&
2947
+ *testpc == REOP_ASSERTNOTTEST) {
2948
+ result = NULL;
2949
+ break;
2950
+ }
2951
+ curState->u.assertion.top
2952
+ = (char *)gData->backTrackSP -
2953
+ (char *)gData->backTrackStack;
2954
+ curState->u.assertion.sz = gData->cursz;
2955
+ curState->index = x->cp - gData->cpbegin;
2956
+ curState->parenSoFar = parenSoFar;
2957
+ PUSH_STATE_STACK(gData);
2958
+ if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2959
+ nextpc, x, x->cp, 0, 0)) {
2960
+ goto bad;
2961
+ }
2962
+ continue;
2963
+
2964
+ case REOP_ASSERTTEST:
2965
+ --gData->stateStackTop;
2966
+ --curState;
2967
+ x->cp = gData->cpbegin + curState->index;
2968
+ gData->backTrackSP =
2969
+ (REBackTrackData *) ((char *)gData->backTrackStack +
2970
+ curState->u.assertion.top);
2971
+ gData->cursz = curState->u.assertion.sz;
2972
+ if (result)
2973
+ result = x;
2974
+ break;
2975
+
2976
+ case REOP_ASSERTNOTTEST:
2977
+ --gData->stateStackTop;
2978
+ --curState;
2979
+ x->cp = gData->cpbegin + curState->index;
2980
+ gData->backTrackSP =
2981
+ (REBackTrackData *) ((char *)gData->backTrackStack +
2982
+ curState->u.assertion.top);
2983
+ gData->cursz = curState->u.assertion.sz;
2984
+ result = (!result) ? x : NULL;
2985
+ break;
2986
+ case REOP_STAR:
2987
+ curState->u.quantifier.min = 0;
2988
+ curState->u.quantifier.max = (uintN)-1;
2989
+ goto quantcommon;
2990
+ case REOP_PLUS:
2991
+ curState->u.quantifier.min = 1;
2992
+ curState->u.quantifier.max = (uintN)-1;
2993
+ goto quantcommon;
2994
+ case REOP_OPT:
2995
+ curState->u.quantifier.min = 0;
2996
+ curState->u.quantifier.max = 1;
2997
+ goto quantcommon;
2998
+ case REOP_QUANT:
2999
+ pc = ReadCompactIndex(pc, &k);
3000
+ curState->u.quantifier.min = k;
3001
+ pc = ReadCompactIndex(pc, &k);
3002
+ /* max is k - 1 to use one byte for (uintN)-1 sentinel. */
3003
+ curState->u.quantifier.max = k - 1;
3004
+ JS_ASSERT(curState->u.quantifier.min
3005
+ <= curState->u.quantifier.max);
3006
+ quantcommon:
3007
+ if (curState->u.quantifier.max == 0) {
3008
+ pc = pc + GET_OFFSET(pc);
3009
+ op = (REOp) *pc++;
3010
+ result = x;
3011
+ continue;
3012
+ }
3013
+ /* Step over <next> */
3014
+ nextpc = pc + ARG_LEN;
3015
+ op = (REOp) *nextpc++;
3016
+ startcp = x->cp;
3017
+ if (REOP_IS_SIMPLE(op)) {
3018
+ if (!SimpleMatch(gData, x, op, &nextpc, JS_TRUE)) {
3019
+ if (curState->u.quantifier.min == 0)
3020
+ result = x;
3021
+ else
3022
+ result = NULL;
3023
+ pc = pc + GET_OFFSET(pc);
3024
+ break;
3025
+ }
3026
+ op = (REOp) *nextpc++;
3027
+ result = x;
3028
+ }
3029
+ curState->index = startcp - gData->cpbegin;
3030
+ curState->continue_op = REOP_REPEAT;
3031
+ curState->continue_pc = pc;
3032
+ curState->parenSoFar = parenSoFar;
3033
+ PUSH_STATE_STACK(gData);
3034
+ if (curState->u.quantifier.min == 0 &&
3035
+ !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
3036
+ 0, 0)) {
3037
+ goto bad;
3038
+ }
3039
+ pc = nextpc;
3040
+ continue;
3041
+
3042
+ case REOP_ENDCHILD: /* marks the end of a quantifier child */
3043
+ pc = curState[-1].continue_pc;
3044
+ op = (REOp) curState[-1].continue_op;
3045
+
3046
+ if (!result)
3047
+ result = x;
3048
+ continue;
3049
+
3050
+ case REOP_REPEAT:
3051
+ --curState;
3052
+ do {
3053
+ --gData->stateStackTop;
3054
+ if (!result) {
3055
+ /* Failed, see if we have enough children. */
3056
+ if (curState->u.quantifier.min == 0)
3057
+ goto repeatDone;
3058
+ goto break_switch;
3059
+ }
3060
+ if (curState->u.quantifier.min == 0 &&
3061
+ x->cp == gData->cpbegin + curState->index) {
3062
+ /* matched an empty string, that'll get us nowhere */
3063
+ result = NULL;
3064
+ goto break_switch;
3065
+ }
3066
+ if (curState->u.quantifier.min != 0)
3067
+ curState->u.quantifier.min--;
3068
+ if (curState->u.quantifier.max != (uintN) -1)
3069
+ curState->u.quantifier.max--;
3070
+ if (curState->u.quantifier.max == 0)
3071
+ goto repeatDone;
3072
+ nextpc = pc + ARG_LEN;
3073
+ nextop = (REOp) *nextpc;
3074
+ startcp = x->cp;
3075
+ if (REOP_IS_SIMPLE(nextop)) {
3076
+ nextpc++;
3077
+ if (!SimpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) {
3078
+ if (curState->u.quantifier.min == 0)
3079
+ goto repeatDone;
3080
+ result = NULL;
3081
+ goto break_switch;
3082
+ }
3083
+ result = x;
3084
+ }
3085
+ curState->index = startcp - gData->cpbegin;
3086
+ PUSH_STATE_STACK(gData);
3087
+ if (curState->u.quantifier.min == 0 &&
3088
+ !PushBackTrackState(gData, REOP_REPEAT,
3089
+ pc, x, startcp,
3090
+ curState->parenSoFar,
3091
+ parenSoFar -
3092
+ curState->parenSoFar)) {
3093
+ goto bad;
3094
+ }
3095
+ } while (*nextpc == REOP_ENDCHILD);
3096
+ pc = nextpc;
3097
+ op = (REOp) *pc++;
3098
+ parenSoFar = curState->parenSoFar;
3099
+ continue;
3100
+
3101
+ repeatDone:
3102
+ result = x;
3103
+ pc += GET_OFFSET(pc);
3104
+ goto break_switch;
3105
+
3106
+ case REOP_MINIMALSTAR:
3107
+ curState->u.quantifier.min = 0;
3108
+ curState->u.quantifier.max = (uintN)-1;
3109
+ goto minimalquantcommon;
3110
+ case REOP_MINIMALPLUS:
3111
+ curState->u.quantifier.min = 1;
3112
+ curState->u.quantifier.max = (uintN)-1;
3113
+ goto minimalquantcommon;
3114
+ case REOP_MINIMALOPT:
3115
+ curState->u.quantifier.min = 0;
3116
+ curState->u.quantifier.max = 1;
3117
+ goto minimalquantcommon;
3118
+ case REOP_MINIMALQUANT:
3119
+ pc = ReadCompactIndex(pc, &k);
3120
+ curState->u.quantifier.min = k;
3121
+ pc = ReadCompactIndex(pc, &k);
3122
+ /* See REOP_QUANT comments about k - 1. */
3123
+ curState->u.quantifier.max = k - 1;
3124
+ JS_ASSERT(curState->u.quantifier.min
3125
+ <= curState->u.quantifier.max);
3126
+ minimalquantcommon:
3127
+ curState->index = x->cp - gData->cpbegin;
3128
+ curState->parenSoFar = parenSoFar;
3129
+ PUSH_STATE_STACK(gData);
3130
+ if (curState->u.quantifier.min != 0) {
3131
+ curState->continue_op = REOP_MINIMALREPEAT;
3132
+ curState->continue_pc = pc;
3133
+ /* step over <next> */
3134
+ pc += OFFSET_LEN;
3135
+ op = (REOp) *pc++;
3136
+ } else {
3137
+ if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3138
+ pc, x, x->cp, 0, 0)) {
3139
+ goto bad;
3140
+ }
3141
+ --gData->stateStackTop;
3142
+ pc = pc + GET_OFFSET(pc);
3143
+ op = (REOp) *pc++;
3144
+ }
3145
+ continue;
3146
+
3147
+ case REOP_MINIMALREPEAT:
3148
+ --gData->stateStackTop;
3149
+ --curState;
3150
+
3151
+ re_debug("{%d,%d}", curState->u.quantifier.min,
3152
+ curState->u.quantifier.max);
3153
+ #define PREPARE_REPEAT() \
3154
+ JS_BEGIN_MACRO \
3155
+ curState->index = x->cp - gData->cpbegin; \
3156
+ curState->continue_op = REOP_MINIMALREPEAT; \
3157
+ curState->continue_pc = pc; \
3158
+ pc += ARG_LEN; \
3159
+ for (k = curState->parenSoFar; k < parenSoFar; k++) \
3160
+ x->parens[k].index = -1; \
3161
+ PUSH_STATE_STACK(gData); \
3162
+ op = (REOp) *pc++; \
3163
+ JS_ASSERT(op < REOP_LIMIT); \
3164
+ JS_END_MACRO
3165
+
3166
+ if (!result) {
3167
+ re_debug(" - ");
3168
+ /*
3169
+ * Non-greedy failure - try to consume another child.
3170
+ */
3171
+ if (curState->u.quantifier.max == (uintN) -1 ||
3172
+ curState->u.quantifier.max > 0) {
3173
+ PREPARE_REPEAT();
3174
+ continue;
3175
+ }
3176
+ /* Don't need to adjust pc since we're going to pop. */
3177
+ break;
3178
+ }
3179
+ if (curState->u.quantifier.min == 0 &&
3180
+ x->cp == gData->cpbegin + curState->index) {
3181
+ /* Matched an empty string, that'll get us nowhere. */
3182
+ result = NULL;
3183
+ break;
3184
+ }
3185
+ if (curState->u.quantifier.min != 0)
3186
+ curState->u.quantifier.min--;
3187
+ if (curState->u.quantifier.max != (uintN) -1)
3188
+ curState->u.quantifier.max--;
3189
+ if (curState->u.quantifier.min != 0) {
3190
+ PREPARE_REPEAT();
3191
+ continue;
3192
+ }
3193
+ curState->index = x->cp - gData->cpbegin;
3194
+ curState->parenSoFar = parenSoFar;
3195
+ PUSH_STATE_STACK(gData);
3196
+ if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3197
+ pc, x, x->cp,
3198
+ curState->parenSoFar,
3199
+ parenSoFar - curState->parenSoFar)) {
3200
+ goto bad;
3201
+ }
3202
+ --gData->stateStackTop;
3203
+ pc = pc + GET_OFFSET(pc);
3204
+ op = (REOp) *pc++;
3205
+ JS_ASSERT(op < REOP_LIMIT);
3206
+ continue;
3207
+ default:
3208
+ JS_ASSERT(JS_FALSE);
3209
+ result = NULL;
3210
+ }
3211
+ break_switch:;
3212
+ }
3213
+
3214
+ /*
3215
+ * If the match failed and there's a backtrack option, take it.
3216
+ * Otherwise this is a complete and utter failure.
3217
+ */
3218
+ if (!result) {
3219
+ if (gData->cursz == 0)
3220
+ return NULL;
3221
+ if (!JS_CHECK_OPERATION_LIMIT(gData->cx, JSOW_JUMP)) {
3222
+ gData->ok = JS_FALSE;
3223
+ return NULL;
3224
+ }
3225
+
3226
+ /* Potentially detect explosive regex here. */
3227
+ gData->backTrackCount++;
3228
+ if (gData->backTrackLimit &&
3229
+ gData->backTrackCount >= gData->backTrackLimit) {
3230
+ JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3231
+ JSMSG_REGEXP_TOO_COMPLEX);
3232
+ gData->ok = JS_FALSE;
3233
+ return NULL;
3234
+ }
3235
+
3236
+ backTrackData = gData->backTrackSP;
3237
+ gData->cursz = backTrackData->sz;
3238
+ gData->backTrackSP =
3239
+ (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3240
+ x->cp = backTrackData->cp;
3241
+ pc = backTrackData->backtrack_pc;
3242
+ op = (REOp) backTrackData->backtrack_op;
3243
+ JS_ASSERT(op < REOP_LIMIT);
3244
+ gData->stateStackTop = backTrackData->saveStateStackTop;
3245
+ JS_ASSERT(gData->stateStackTop);
3246
+
3247
+ memcpy(gData->stateStack, backTrackData + 1,
3248
+ sizeof(REProgState) * backTrackData->saveStateStackTop);
3249
+ curState = &gData->stateStack[gData->stateStackTop - 1];
3250
+
3251
+ if (backTrackData->parenCount) {
3252
+ memcpy(&x->parens[backTrackData->parenIndex],
3253
+ (char *)(backTrackData + 1) +
3254
+ sizeof(REProgState) * backTrackData->saveStateStackTop,
3255
+ sizeof(RECapture) * backTrackData->parenCount);
3256
+ parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3257
+ } else {
3258
+ for (k = curState->parenSoFar; k < parenSoFar; k++)
3259
+ x->parens[k].index = -1;
3260
+ parenSoFar = curState->parenSoFar;
3261
+ }
3262
+
3263
+ re_debug("\tBT_Pop: %ld,%ld",
3264
+ (unsigned long) backTrackData->parenIndex,
3265
+ (unsigned long) backTrackData->parenCount);
3266
+ continue;
3267
+ }
3268
+ x = result;
3269
+
3270
+ /*
3271
+ * Continue with the expression.
3272
+ */
3273
+ op = (REOp)*pc++;
3274
+ JS_ASSERT(op < REOP_LIMIT);
3275
+ }
3276
+
3277
+ bad:
3278
+ re_debug("\n");
3279
+ return NULL;
3280
+
3281
+ good:
3282
+ re_debug("\n");
3283
+ return x;
3284
+ }
3285
+
3286
+ static REMatchState *
3287
+ MatchRegExp(REGlobalData *gData, REMatchState *x)
3288
+ {
3289
+ REMatchState *result;
3290
+ const jschar *cp = x->cp;
3291
+ const jschar *cp2;
3292
+ uintN j;
3293
+
3294
+ /*
3295
+ * Have to include the position beyond the last character
3296
+ * in order to detect end-of-input/line condition.
3297
+ */
3298
+ for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3299
+ gData->skipped = cp2 - cp;
3300
+ x->cp = cp2;
3301
+ for (j = 0; j < gData->regexp->parenCount; j++)
3302
+ x->parens[j].index = -1;
3303
+ result = ExecuteREBytecode(gData, x);
3304
+ if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY))
3305
+ return result;
3306
+ gData->backTrackSP = gData->backTrackStack;
3307
+ gData->cursz = 0;
3308
+ gData->stateStackTop = 0;
3309
+ cp2 = cp + gData->skipped;
3310
+ }
3311
+ return NULL;
3312
+ }
3313
+
3314
+ #define MIN_BACKTRACK_LIMIT 400000
3315
+
3316
+ static REMatchState *
3317
+ InitMatch(JSContext *cx, REGlobalData *gData, JSRegExp *re, size_t length)
3318
+ {
3319
+ REMatchState *result;
3320
+ uintN i;
3321
+
3322
+ gData->backTrackStackSize = INITIAL_BACKTRACK;
3323
+ JS_ARENA_ALLOCATE_CAST(gData->backTrackStack, REBackTrackData *,
3324
+ &gData->pool,
3325
+ INITIAL_BACKTRACK);
3326
+ if (!gData->backTrackStack)
3327
+ goto bad;
3328
+
3329
+ gData->backTrackSP = gData->backTrackStack;
3330
+ gData->cursz = 0;
3331
+ gData->backTrackCount = 0;
3332
+ gData->backTrackLimit = 0;
3333
+ if (JS_GetOptions(cx) & JSOPTION_RELIMIT) {
3334
+ gData->backTrackLimit = length * length * length; /* O(n^3) */
3335
+ if (gData->backTrackLimit < MIN_BACKTRACK_LIMIT)
3336
+ gData->backTrackLimit = MIN_BACKTRACK_LIMIT;
3337
+ }
3338
+
3339
+ gData->stateStackLimit = INITIAL_STATESTACK;
3340
+ JS_ARENA_ALLOCATE_CAST(gData->stateStack, REProgState *,
3341
+ &gData->pool,
3342
+ sizeof(REProgState) * INITIAL_STATESTACK);
3343
+ if (!gData->stateStack)
3344
+ goto bad;
3345
+
3346
+ gData->stateStackTop = 0;
3347
+ gData->cx = cx;
3348
+ gData->regexp = re;
3349
+ gData->ok = JS_TRUE;
3350
+
3351
+ JS_ARENA_ALLOCATE_CAST(result, REMatchState *,
3352
+ &gData->pool,
3353
+ offsetof(REMatchState, parens)
3354
+ + re->parenCount * sizeof(RECapture));
3355
+ if (!result)
3356
+ goto bad;
3357
+
3358
+ for (i = 0; i < re->classCount; i++) {
3359
+ if (!re->classList[i].converted &&
3360
+ !ProcessCharSet(gData, &re->classList[i])) {
3361
+ return NULL;
3362
+ }
3363
+ }
3364
+
3365
+ return result;
3366
+
3367
+ bad:
3368
+ js_ReportOutOfScriptQuota(cx);
3369
+ gData->ok = JS_FALSE;
3370
+ return NULL;
3371
+ }
3372
+
3373
+ JSBool
3374
+ js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
3375
+ JSBool test, jsval *rval)
3376
+ {
3377
+ REGlobalData gData;
3378
+ REMatchState *x, *result;
3379
+
3380
+ const jschar *cp, *ep;
3381
+ size_t i, length, start;
3382
+ JSSubString *morepar;
3383
+ JSBool ok;
3384
+ JSRegExpStatics *res;
3385
+ ptrdiff_t matchlen;
3386
+ uintN num, morenum;
3387
+ JSString *parstr, *matchstr;
3388
+ JSObject *obj;
3389
+
3390
+ RECapture *parsub = NULL;
3391
+
3392
+ /*
3393
+ * It's safe to load from cp because JSStrings have a zero at the end,
3394
+ * and we never let cp get beyond cpend.
3395
+ */
3396
+ start = *indexp;
3397
+ JSSTRING_CHARS_AND_LENGTH(str, cp, length);
3398
+ if (start > length)
3399
+ start = length;
3400
+ gData.cpbegin = cp;
3401
+ gData.cpend = cp + length;
3402
+ cp += start;
3403
+ gData.start = start;
3404
+ gData.skipped = 0;
3405
+
3406
+ /*
3407
+ * To avoid multiple allocations in InitMatch(), the arena size parameter
3408
+ * should be at least as big as:
3409
+ * INITIAL_BACKTRACK
3410
+ * + (sizeof(REProgState) * INITIAL_STATESTACK)
3411
+ * + (offsetof(REMatchState, parens) + avgParanSize * sizeof(RECapture))
3412
+ */
3413
+ JS_INIT_ARENA_POOL(&gData.pool, "RegExpPool", 12288, 4,
3414
+ &cx->scriptStackQuota);
3415
+ x = InitMatch(cx, &gData, re, length);
3416
+
3417
+ if (!x) {
3418
+ ok = JS_FALSE;
3419
+ goto out;
3420
+ }
3421
+ x->cp = cp;
3422
+
3423
+ /*
3424
+ * Call the recursive matcher to do the real work. Return null on mismatch
3425
+ * whether testing or not. On match, return an extended Array object.
3426
+ */
3427
+ result = MatchRegExp(&gData, x);
3428
+ ok = gData.ok;
3429
+ if (!ok)
3430
+ goto out;
3431
+ if (!result) {
3432
+ *rval = JSVAL_NULL;
3433
+ goto out;
3434
+ }
3435
+ cp = result->cp;
3436
+ i = cp - gData.cpbegin;
3437
+ *indexp = i;
3438
+ matchlen = i - (start + gData.skipped);
3439
+ ep = cp;
3440
+ cp -= matchlen;
3441
+
3442
+ if (test) {
3443
+ /*
3444
+ * Testing for a match and updating cx->regExpStatics: don't allocate
3445
+ * an array object, do return true.
3446
+ */
3447
+ *rval = JSVAL_TRUE;
3448
+
3449
+ /* Avoid warning. (gcc doesn't detect that obj is needed iff !test); */
3450
+ obj = NULL;
3451
+ } else {
3452
+ /*
3453
+ * The array returned on match has element 0 bound to the matched
3454
+ * string, elements 1 through state.parenCount bound to the paren
3455
+ * matches, an index property telling the length of the left context,
3456
+ * and an input property referring to the input string.
3457
+ */
3458
+ obj = js_NewSlowArrayObject(cx);
3459
+ if (!obj) {
3460
+ ok = JS_FALSE;
3461
+ goto out;
3462
+ }
3463
+ *rval = OBJECT_TO_JSVAL(obj);
3464
+
3465
+ #define DEFVAL(val, id) { \
3466
+ ok = js_DefineProperty(cx, obj, id, val, \
3467
+ JS_PropertyStub, JS_PropertyStub, \
3468
+ JSPROP_ENUMERATE, NULL); \
3469
+ if (!ok) { \
3470
+ cx->weakRoots.newborn[GCX_OBJECT] = NULL; \
3471
+ cx->weakRoots.newborn[GCX_STRING] = NULL; \
3472
+ goto out; \
3473
+ } \
3474
+ }
3475
+
3476
+ matchstr = js_NewStringCopyN(cx, cp, matchlen);
3477
+ if (!matchstr) {
3478
+ cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3479
+ ok = JS_FALSE;
3480
+ goto out;
3481
+ }
3482
+ DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSID(0));
3483
+ }
3484
+
3485
+ res = &cx->regExpStatics;
3486
+ res->input = str;
3487
+ res->parenCount = re->parenCount;
3488
+ if (re->parenCount == 0) {
3489
+ res->lastParen = js_EmptySubString;
3490
+ } else {
3491
+ for (num = 0; num < re->parenCount; num++) {
3492
+ parsub = &result->parens[num];
3493
+ if (num < 9) {
3494
+ if (parsub->index == -1) {
3495
+ res->parens[num].chars = NULL;
3496
+ res->parens[num].length = 0;
3497
+ } else {
3498
+ res->parens[num].chars = gData.cpbegin + parsub->index;
3499
+ res->parens[num].length = parsub->length;
3500
+ }
3501
+ } else {
3502
+ morenum = num - 9;
3503
+ morepar = res->moreParens;
3504
+ if (!morepar) {
3505
+ res->moreLength = 10;
3506
+ morepar = (JSSubString*)
3507
+ JS_malloc(cx, 10 * sizeof(JSSubString));
3508
+ } else if (morenum >= res->moreLength) {
3509
+ res->moreLength += 10;
3510
+ morepar = (JSSubString*)
3511
+ JS_realloc(cx, morepar,
3512
+ res->moreLength * sizeof(JSSubString));
3513
+ }
3514
+ if (!morepar) {
3515
+ cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3516
+ cx->weakRoots.newborn[GCX_STRING] = NULL;
3517
+ ok = JS_FALSE;
3518
+ goto out;
3519
+ }
3520
+ res->moreParens = morepar;
3521
+ if (parsub->index == -1) {
3522
+ morepar[morenum].chars = NULL;
3523
+ morepar[morenum].length = 0;
3524
+ } else {
3525
+ morepar[morenum].chars = gData.cpbegin + parsub->index;
3526
+ morepar[morenum].length = parsub->length;
3527
+ }
3528
+ }
3529
+ if (test)
3530
+ continue;
3531
+ if (parsub->index == -1) {
3532
+ ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
3533
+ JSVAL_VOID, NULL, NULL,
3534
+ JSPROP_ENUMERATE, NULL);
3535
+ } else {
3536
+ parstr = js_NewStringCopyN(cx, gData.cpbegin + parsub->index,
3537
+ parsub->length);
3538
+ if (!parstr) {
3539
+ cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3540
+ cx->weakRoots.newborn[GCX_STRING] = NULL;
3541
+ ok = JS_FALSE;
3542
+ goto out;
3543
+ }
3544
+ ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
3545
+ STRING_TO_JSVAL(parstr), NULL, NULL,
3546
+ JSPROP_ENUMERATE, NULL);
3547
+ }
3548
+ if (!ok) {
3549
+ cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3550
+ cx->weakRoots.newborn[GCX_STRING] = NULL;
3551
+ goto out;
3552
+ }
3553
+ }
3554
+ if (parsub->index == -1) {
3555
+ res->lastParen = js_EmptySubString;
3556
+ } else {
3557
+ res->lastParen.chars = gData.cpbegin + parsub->index;
3558
+ res->lastParen.length = parsub->length;
3559
+ }
3560
+ }
3561
+
3562
+ if (!test) {
3563
+ /*
3564
+ * Define the index and input properties last for better for/in loop
3565
+ * order (so they come after the elements).
3566
+ */
3567
+ DEFVAL(INT_TO_JSVAL(start + gData.skipped),
3568
+ ATOM_TO_JSID(cx->runtime->atomState.indexAtom));
3569
+ DEFVAL(STRING_TO_JSVAL(str),
3570
+ ATOM_TO_JSID(cx->runtime->atomState.inputAtom));
3571
+ }
3572
+
3573
+ #undef DEFVAL
3574
+
3575
+ res->lastMatch.chars = cp;
3576
+ res->lastMatch.length = matchlen;
3577
+
3578
+ /*
3579
+ * For JS1.3 and ECMAv2, emulate Perl5 exactly:
3580
+ *
3581
+ * js1.3 "hi", "hi there" "hihitherehi therebye"
3582
+ */
3583
+ res->leftContext.chars = JSSTRING_CHARS(str);
3584
+ res->leftContext.length = start + gData.skipped;
3585
+ res->rightContext.chars = ep;
3586
+ res->rightContext.length = gData.cpend - ep;
3587
+
3588
+ out:
3589
+ JS_FinishArenaPool(&gData.pool);
3590
+ return ok;
3591
+ }
3592
+
3593
+ /************************************************************************/
3594
+
3595
+ enum regexp_tinyid {
3596
+ REGEXP_SOURCE = -1,
3597
+ REGEXP_GLOBAL = -2,
3598
+ REGEXP_IGNORE_CASE = -3,
3599
+ REGEXP_LAST_INDEX = -4,
3600
+ REGEXP_MULTILINE = -5,
3601
+ REGEXP_STICKY = -6
3602
+ };
3603
+
3604
+ #define REGEXP_PROP_ATTRS (JSPROP_PERMANENT | JSPROP_SHARED)
3605
+ #define RO_REGEXP_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_READONLY)
3606
+
3607
+ static JSPropertySpec regexp_props[] = {
3608
+ {"source", REGEXP_SOURCE, RO_REGEXP_PROP_ATTRS,0,0},
3609
+ {"global", REGEXP_GLOBAL, RO_REGEXP_PROP_ATTRS,0,0},
3610
+ {"ignoreCase", REGEXP_IGNORE_CASE, RO_REGEXP_PROP_ATTRS,0,0},
3611
+ {"lastIndex", REGEXP_LAST_INDEX, REGEXP_PROP_ATTRS,0,0},
3612
+ {"multiline", REGEXP_MULTILINE, RO_REGEXP_PROP_ATTRS,0,0},
3613
+ {"sticky", REGEXP_STICKY, RO_REGEXP_PROP_ATTRS,0,0},
3614
+ {0,0,0,0,0}
3615
+ };
3616
+
3617
+ static JSBool
3618
+ regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3619
+ {
3620
+ jsint slot;
3621
+ JSRegExp *re;
3622
+
3623
+ if (!JSVAL_IS_INT(id))
3624
+ return JS_TRUE;
3625
+ while (OBJ_GET_CLASS(cx, obj) != &js_RegExpClass) {
3626
+ obj = OBJ_GET_PROTO(cx, obj);
3627
+ if (!obj)
3628
+ return JS_TRUE;
3629
+ }
3630
+ slot = JSVAL_TO_INT(id);
3631
+ if (slot == REGEXP_LAST_INDEX)
3632
+ return JS_GetReservedSlot(cx, obj, 0, vp);
3633
+
3634
+ JS_LOCK_OBJ(cx, obj);
3635
+ re = (JSRegExp *) JS_GetPrivate(cx, obj);
3636
+ if (re) {
3637
+ switch (slot) {
3638
+ case REGEXP_SOURCE:
3639
+ *vp = STRING_TO_JSVAL(re->source);
3640
+ break;
3641
+ case REGEXP_GLOBAL:
3642
+ *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0);
3643
+ break;
3644
+ case REGEXP_IGNORE_CASE:
3645
+ *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0);
3646
+ break;
3647
+ case REGEXP_MULTILINE:
3648
+ *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0);
3649
+ break;
3650
+ case REGEXP_STICKY:
3651
+ *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_STICKY) != 0);
3652
+ break;
3653
+ }
3654
+ }
3655
+ JS_UNLOCK_OBJ(cx, obj);
3656
+ return JS_TRUE;
3657
+ }
3658
+
3659
+ static JSBool
3660
+ regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3661
+ {
3662
+ JSBool ok;
3663
+ jsint slot;
3664
+ jsdouble lastIndex;
3665
+
3666
+ ok = JS_TRUE;
3667
+ if (!JSVAL_IS_INT(id))
3668
+ return ok;
3669
+ while (OBJ_GET_CLASS(cx, obj) != &js_RegExpClass) {
3670
+ obj = OBJ_GET_PROTO(cx, obj);
3671
+ if (!obj)
3672
+ return JS_TRUE;
3673
+ }
3674
+ slot = JSVAL_TO_INT(id);
3675
+ if (slot == REGEXP_LAST_INDEX) {
3676
+ if (!JS_ValueToNumber(cx, *vp, &lastIndex))
3677
+ return JS_FALSE;
3678
+ lastIndex = js_DoubleToInteger(lastIndex);
3679
+ ok = JS_NewNumberValue(cx, lastIndex, vp) &&
3680
+ JS_SetReservedSlot(cx, obj, 0, *vp);
3681
+ }
3682
+ return ok;
3683
+ }
3684
+
3685
+ /*
3686
+ * RegExp class static properties and their Perl counterparts:
3687
+ *
3688
+ * RegExp.input $_
3689
+ * RegExp.multiline $*
3690
+ * RegExp.lastMatch $&
3691
+ * RegExp.lastParen $+
3692
+ * RegExp.leftContext $`
3693
+ * RegExp.rightContext $'
3694
+ */
3695
+ enum regexp_static_tinyid {
3696
+ REGEXP_STATIC_INPUT = -1,
3697
+ REGEXP_STATIC_MULTILINE = -2,
3698
+ REGEXP_STATIC_LAST_MATCH = -3,
3699
+ REGEXP_STATIC_LAST_PAREN = -4,
3700
+ REGEXP_STATIC_LEFT_CONTEXT = -5,
3701
+ REGEXP_STATIC_RIGHT_CONTEXT = -6
3702
+ };
3703
+
3704
+ JSBool
3705
+ js_InitRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3706
+ {
3707
+ JS_ClearRegExpStatics(cx);
3708
+ return js_AddRoot(cx, &res->input, "res->input");
3709
+ }
3710
+
3711
+ void
3712
+ js_FreeRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3713
+ {
3714
+ if (res->moreParens) {
3715
+ JS_free(cx, res->moreParens);
3716
+ res->moreParens = NULL;
3717
+ }
3718
+ js_RemoveRoot(cx->runtime, &res->input);
3719
+ }
3720
+
3721
+ static JSBool
3722
+ regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3723
+ {
3724
+ jsint slot;
3725
+ JSRegExpStatics *res;
3726
+ JSString *str;
3727
+ JSSubString *sub;
3728
+
3729
+ res = &cx->regExpStatics;
3730
+ if (!JSVAL_IS_INT(id))
3731
+ return JS_TRUE;
3732
+ slot = JSVAL_TO_INT(id);
3733
+ switch (slot) {
3734
+ case REGEXP_STATIC_INPUT:
3735
+ *vp = res->input ? STRING_TO_JSVAL(res->input)
3736
+ : JS_GetEmptyStringValue(cx);
3737
+ return JS_TRUE;
3738
+ case REGEXP_STATIC_MULTILINE:
3739
+ *vp = BOOLEAN_TO_JSVAL(res->multiline);
3740
+ return JS_TRUE;
3741
+ case REGEXP_STATIC_LAST_MATCH:
3742
+ sub = &res->lastMatch;
3743
+ break;
3744
+ case REGEXP_STATIC_LAST_PAREN:
3745
+ sub = &res->lastParen;
3746
+ break;
3747
+ case REGEXP_STATIC_LEFT_CONTEXT:
3748
+ sub = &res->leftContext;
3749
+ break;
3750
+ case REGEXP_STATIC_RIGHT_CONTEXT:
3751
+ sub = &res->rightContext;
3752
+ break;
3753
+ default:
3754
+ sub = REGEXP_PAREN_SUBSTRING(res, slot);
3755
+ break;
3756
+ }
3757
+ str = js_NewStringCopyN(cx, sub->chars, sub->length);
3758
+ if (!str)
3759
+ return JS_FALSE;
3760
+ *vp = STRING_TO_JSVAL(str);
3761
+ return JS_TRUE;
3762
+ }
3763
+
3764
+ static JSBool
3765
+ regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3766
+ {
3767
+ JSRegExpStatics *res;
3768
+
3769
+ if (!JSVAL_IS_INT(id))
3770
+ return JS_TRUE;
3771
+ res = &cx->regExpStatics;
3772
+ /* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
3773
+ if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) {
3774
+ if (!JSVAL_IS_STRING(*vp) &&
3775
+ !JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) {
3776
+ return JS_FALSE;
3777
+ }
3778
+ res->input = JSVAL_TO_STRING(*vp);
3779
+ } else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) {
3780
+ if (!JSVAL_IS_BOOLEAN(*vp) &&
3781
+ !JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) {
3782
+ return JS_FALSE;
3783
+ }
3784
+ res->multiline = JSVAL_TO_BOOLEAN(*vp);
3785
+ }
3786
+ return JS_TRUE;
3787
+ }
3788
+ #define REGEXP_STATIC_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_ENUMERATE)
3789
+ #define RO_REGEXP_STATIC_PROP_ATTRS (REGEXP_STATIC_PROP_ATTRS | JSPROP_READONLY)
3790
+
3791
+ static JSPropertySpec regexp_static_props[] = {
3792
+ {"input",
3793
+ REGEXP_STATIC_INPUT,
3794
+ REGEXP_STATIC_PROP_ATTRS,
3795
+ regexp_static_getProperty, regexp_static_setProperty},
3796
+ {"multiline",
3797
+ REGEXP_STATIC_MULTILINE,
3798
+ REGEXP_STATIC_PROP_ATTRS,
3799
+ regexp_static_getProperty, regexp_static_setProperty},
3800
+ {"lastMatch",
3801
+ REGEXP_STATIC_LAST_MATCH,
3802
+ RO_REGEXP_STATIC_PROP_ATTRS,
3803
+ regexp_static_getProperty, regexp_static_getProperty},
3804
+ {"lastParen",
3805
+ REGEXP_STATIC_LAST_PAREN,
3806
+ RO_REGEXP_STATIC_PROP_ATTRS,
3807
+ regexp_static_getProperty, regexp_static_getProperty},
3808
+ {"leftContext",
3809
+ REGEXP_STATIC_LEFT_CONTEXT,
3810
+ RO_REGEXP_STATIC_PROP_ATTRS,
3811
+ regexp_static_getProperty, regexp_static_getProperty},
3812
+ {"rightContext",
3813
+ REGEXP_STATIC_RIGHT_CONTEXT,
3814
+ RO_REGEXP_STATIC_PROP_ATTRS,
3815
+ regexp_static_getProperty, regexp_static_getProperty},
3816
+
3817
+ /* XXX should have block scope and local $1, etc. */
3818
+ {"$1", 0, RO_REGEXP_STATIC_PROP_ATTRS,
3819
+ regexp_static_getProperty, regexp_static_getProperty},
3820
+ {"$2", 1, RO_REGEXP_STATIC_PROP_ATTRS,
3821
+ regexp_static_getProperty, regexp_static_getProperty},
3822
+ {"$3", 2, RO_REGEXP_STATIC_PROP_ATTRS,
3823
+ regexp_static_getProperty, regexp_static_getProperty},
3824
+ {"$4", 3, RO_REGEXP_STATIC_PROP_ATTRS,
3825
+ regexp_static_getProperty, regexp_static_getProperty},
3826
+ {"$5", 4, RO_REGEXP_STATIC_PROP_ATTRS,
3827
+ regexp_static_getProperty, regexp_static_getProperty},
3828
+ {"$6", 5, RO_REGEXP_STATIC_PROP_ATTRS,
3829
+ regexp_static_getProperty, regexp_static_getProperty},
3830
+ {"$7", 6, RO_REGEXP_STATIC_PROP_ATTRS,
3831
+ regexp_static_getProperty, regexp_static_getProperty},
3832
+ {"$8", 7, RO_REGEXP_STATIC_PROP_ATTRS,
3833
+ regexp_static_getProperty, regexp_static_getProperty},
3834
+ {"$9", 8, RO_REGEXP_STATIC_PROP_ATTRS,
3835
+ regexp_static_getProperty, regexp_static_getProperty},
3836
+
3837
+ {0,0,0,0,0}
3838
+ };
3839
+
3840
+ static void
3841
+ regexp_finalize(JSContext *cx, JSObject *obj)
3842
+ {
3843
+ JSRegExp *re;
3844
+
3845
+ re = (JSRegExp *) JS_GetPrivate(cx, obj);
3846
+ if (!re)
3847
+ return;
3848
+ js_DestroyRegExp(cx, re);
3849
+ }
3850
+
3851
+ /* Forward static prototype. */
3852
+ static JSBool
3853
+ regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3854
+ JSBool test, jsval *rval);
3855
+
3856
+ static JSBool
3857
+ regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
3858
+ {
3859
+ return regexp_exec_sub(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv,
3860
+ JS_FALSE, rval);
3861
+ }
3862
+
3863
+ #if JS_HAS_XDR
3864
+
3865
+ #include "jsxdrapi.h"
3866
+
3867
+ static JSBool
3868
+ regexp_xdrObject(JSXDRState *xdr, JSObject **objp)
3869
+ {
3870
+ JSRegExp *re;
3871
+ JSString *source;
3872
+ uint32 flagsword;
3873
+ JSObject *obj;
3874
+
3875
+ if (xdr->mode == JSXDR_ENCODE) {
3876
+ re = (JSRegExp *) JS_GetPrivate(xdr->cx, *objp);
3877
+ if (!re)
3878
+ return JS_FALSE;
3879
+ source = re->source;
3880
+ flagsword = (uint32)re->flags;
3881
+ }
3882
+ if (!JS_XDRString(xdr, &source) ||
3883
+ !JS_XDRUint32(xdr, &flagsword)) {
3884
+ return JS_FALSE;
3885
+ }
3886
+ if (xdr->mode == JSXDR_DECODE) {
3887
+ obj = js_NewObject(xdr->cx, &js_RegExpClass, NULL, NULL, 0);
3888
+ if (!obj)
3889
+ return JS_FALSE;
3890
+ STOBJ_SET_PARENT(obj, NULL);
3891
+ STOBJ_SET_PROTO(obj, NULL);
3892
+ re = js_NewRegExp(xdr->cx, NULL, source, (uint8)flagsword, JS_FALSE);
3893
+ if (!re)
3894
+ return JS_FALSE;
3895
+ if (!JS_SetPrivate(xdr->cx, obj, re) ||
3896
+ !js_SetLastIndex(xdr->cx, obj, 0)) {
3897
+ js_DestroyRegExp(xdr->cx, re);
3898
+ return JS_FALSE;
3899
+ }
3900
+ *objp = obj;
3901
+ }
3902
+ return JS_TRUE;
3903
+ }
3904
+
3905
+ #else /* !JS_HAS_XDR */
3906
+
3907
+ #define regexp_xdrObject NULL
3908
+
3909
+ #endif /* !JS_HAS_XDR */
3910
+
3911
+ static void
3912
+ regexp_trace(JSTracer *trc, JSObject *obj)
3913
+ {
3914
+ JSRegExp *re;
3915
+
3916
+ re = (JSRegExp *) JS_GetPrivate(trc->context, obj);
3917
+ if (re && re->source)
3918
+ JS_CALL_STRING_TRACER(trc, re->source, "source");
3919
+ }
3920
+
3921
+ JSClass js_RegExpClass = {
3922
+ js_RegExp_str,
3923
+ JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(1) |
3924
+ JSCLASS_MARK_IS_TRACE | JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
3925
+ JS_PropertyStub, JS_PropertyStub,
3926
+ regexp_getProperty, regexp_setProperty,
3927
+ JS_EnumerateStub, JS_ResolveStub,
3928
+ JS_ConvertStub, regexp_finalize,
3929
+ NULL, NULL,
3930
+ regexp_call, NULL,
3931
+ regexp_xdrObject, NULL,
3932
+ JS_CLASS_TRACE(regexp_trace), 0
3933
+ };
3934
+
3935
+ static const jschar empty_regexp_ucstr[] = {'(', '?', ':', ')', 0};
3936
+
3937
+ JSBool
3938
+ js_regexp_toString(JSContext *cx, JSObject *obj, jsval *vp)
3939
+ {
3940
+ JSRegExp *re;
3941
+ const jschar *source;
3942
+ jschar *chars;
3943
+ size_t length, nflags;
3944
+ uintN flags;
3945
+ JSString *str;
3946
+
3947
+ if (!JS_InstanceOf(cx, obj, &js_RegExpClass, vp + 2))
3948
+ return JS_FALSE;
3949
+ JS_LOCK_OBJ(cx, obj);
3950
+ re = (JSRegExp *) JS_GetPrivate(cx, obj);
3951
+ if (!re) {
3952
+ JS_UNLOCK_OBJ(cx, obj);
3953
+ *vp = STRING_TO_JSVAL(cx->runtime->emptyString);
3954
+ return JS_TRUE;
3955
+ }
3956
+
3957
+ JSSTRING_CHARS_AND_LENGTH(re->source, source, length);
3958
+ if (length == 0) {
3959
+ source = empty_regexp_ucstr;
3960
+ length = JS_ARRAY_LENGTH(empty_regexp_ucstr) - 1;
3961
+ }
3962
+ length += 2;
3963
+ nflags = 0;
3964
+ for (flags = re->flags; flags != 0; flags &= flags - 1)
3965
+ nflags++;
3966
+ chars = (jschar*) JS_malloc(cx, (length + nflags + 1) * sizeof(jschar));
3967
+ if (!chars) {
3968
+ JS_UNLOCK_OBJ(cx, obj);
3969
+ return JS_FALSE;
3970
+ }
3971
+
3972
+ chars[0] = '/';
3973
+ js_strncpy(&chars[1], source, length - 2);
3974
+ chars[length-1] = '/';
3975
+ if (nflags) {
3976
+ if (re->flags & JSREG_GLOB)
3977
+ chars[length++] = 'g';
3978
+ if (re->flags & JSREG_FOLD)
3979
+ chars[length++] = 'i';
3980
+ if (re->flags & JSREG_MULTILINE)
3981
+ chars[length++] = 'm';
3982
+ if (re->flags & JSREG_STICKY)
3983
+ chars[length++] = 'y';
3984
+ }
3985
+ JS_UNLOCK_OBJ(cx, obj);
3986
+ chars[length] = 0;
3987
+
3988
+ str = js_NewString(cx, chars, length);
3989
+ if (!str) {
3990
+ JS_free(cx, chars);
3991
+ return JS_FALSE;
3992
+ }
3993
+ *vp = STRING_TO_JSVAL(str);
3994
+ return JS_TRUE;
3995
+ }
3996
+
3997
+ static JSBool
3998
+ regexp_toString(JSContext *cx, uintN argc, jsval *vp)
3999
+ {
4000
+ JSObject *obj;
4001
+
4002
+ obj = JS_THIS_OBJECT(cx, vp);
4003
+ return obj && js_regexp_toString(cx, obj, vp);
4004
+ }
4005
+
4006
+ static JSBool
4007
+ regexp_compile_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
4008
+ jsval *rval)
4009
+ {
4010
+ JSString *opt, *str;
4011
+ JSRegExp *oldre, *re;
4012
+ JSBool ok, ok2;
4013
+ JSObject *obj2;
4014
+ size_t length, nbytes;
4015
+ const jschar *cp, *start, *end;
4016
+ jschar *nstart, *ncp, *tmp;
4017
+
4018
+ if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
4019
+ return JS_FALSE;
4020
+ opt = NULL;
4021
+ if (argc == 0) {
4022
+ str = cx->runtime->emptyString;
4023
+ } else {
4024
+ if (JSVAL_IS_OBJECT(argv[0])) {
4025
+ /*
4026
+ * If we get passed in a RegExp object we construct a new
4027
+ * RegExp that is a duplicate of it by re-compiling the
4028
+ * original source code. ECMA requires that it be an error
4029
+ * here if the flags are specified. (We must use the flags
4030
+ * from the original RegExp also).
4031
+ */
4032
+ obj2 = JSVAL_TO_OBJECT(argv[0]);
4033
+ if (obj2 && OBJ_GET_CLASS(cx, obj2) == &js_RegExpClass) {
4034
+ if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' passed */
4035
+ JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4036
+ JSMSG_NEWREGEXP_FLAGGED);
4037
+ return JS_FALSE;
4038
+ }
4039
+ JS_LOCK_OBJ(cx, obj2);
4040
+ re = (JSRegExp *) JS_GetPrivate(cx, obj2);
4041
+ if (!re) {
4042
+ JS_UNLOCK_OBJ(cx, obj2);
4043
+ return JS_FALSE;
4044
+ }
4045
+ re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE);
4046
+ JS_UNLOCK_OBJ(cx, obj2);
4047
+ goto created;
4048
+ }
4049
+ }
4050
+ str = js_ValueToString(cx, argv[0]);
4051
+ if (!str)
4052
+ return JS_FALSE;
4053
+ argv[0] = STRING_TO_JSVAL(str);
4054
+ if (argc > 1) {
4055
+ if (JSVAL_IS_VOID(argv[1])) {
4056
+ opt = NULL;
4057
+ } else {
4058
+ opt = js_ValueToString(cx, argv[1]);
4059
+ if (!opt)
4060
+ return JS_FALSE;
4061
+ argv[1] = STRING_TO_JSVAL(opt);
4062
+ }
4063
+ }
4064
+
4065
+ /* Escape any naked slashes in the regexp source. */
4066
+ JSSTRING_CHARS_AND_LENGTH(str, start, length);
4067
+ end = start + length;
4068
+ nstart = ncp = NULL;
4069
+ for (cp = start; cp < end; cp++) {
4070
+ if (*cp == '/' && (cp == start || cp[-1] != '\\')) {
4071
+ nbytes = (++length + 1) * sizeof(jschar);
4072
+ if (!nstart) {
4073
+ nstart = (jschar *) JS_malloc(cx, nbytes);
4074
+ if (!nstart)
4075
+ return JS_FALSE;
4076
+ ncp = nstart + (cp - start);
4077
+ js_strncpy(nstart, start, cp - start);
4078
+ } else {
4079
+ tmp = (jschar *) JS_realloc(cx, nstart, nbytes);
4080
+ if (!tmp) {
4081
+ JS_free(cx, nstart);
4082
+ return JS_FALSE;
4083
+ }
4084
+ ncp = tmp + (ncp - nstart);
4085
+ nstart = tmp;
4086
+ }
4087
+ *ncp++ = '\\';
4088
+ }
4089
+ if (nstart)
4090
+ *ncp++ = *cp;
4091
+ }
4092
+
4093
+ if (nstart) {
4094
+ /* Don't forget to store the backstop after the new string. */
4095
+ JS_ASSERT((size_t)(ncp - nstart) == length);
4096
+ *ncp = 0;
4097
+ str = js_NewString(cx, nstart, length);
4098
+ if (!str) {
4099
+ JS_free(cx, nstart);
4100
+ return JS_FALSE;
4101
+ }
4102
+ argv[0] = STRING_TO_JSVAL(str);
4103
+ }
4104
+ }
4105
+
4106
+ re = js_NewRegExpOpt(cx, str, opt, JS_FALSE);
4107
+ created:
4108
+ if (!re)
4109
+ return JS_FALSE;
4110
+ JS_LOCK_OBJ(cx, obj);
4111
+ oldre = (JSRegExp *) JS_GetPrivate(cx, obj);
4112
+ ok = JS_SetPrivate(cx, obj, re);
4113
+ ok2 = js_SetLastIndex(cx, obj, 0);
4114
+ JS_UNLOCK_OBJ(cx, obj);
4115
+ if (!ok) {
4116
+ js_DestroyRegExp(cx, re);
4117
+ return JS_FALSE;
4118
+ }
4119
+ if (oldre)
4120
+ js_DestroyRegExp(cx, oldre);
4121
+ *rval = OBJECT_TO_JSVAL(obj);
4122
+ return ok2;
4123
+ }
4124
+
4125
+ static JSBool
4126
+ regexp_compile(JSContext *cx, uintN argc, jsval *vp)
4127
+ {
4128
+ JSObject *obj;
4129
+
4130
+ obj = JS_THIS_OBJECT(cx, vp);
4131
+ return obj && regexp_compile_sub(cx, obj, argc, vp + 2, vp);
4132
+ }
4133
+
4134
+ static JSBool
4135
+ regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
4136
+ JSBool test, jsval *rval)
4137
+ {
4138
+ JSBool ok, sticky;
4139
+ JSRegExp *re;
4140
+ jsdouble lastIndex;
4141
+ JSString *str;
4142
+ size_t i;
4143
+
4144
+ ok = JS_InstanceOf(cx, obj, &js_RegExpClass, argv);
4145
+ if (!ok)
4146
+ return JS_FALSE;
4147
+ JS_LOCK_OBJ(cx, obj);
4148
+ re = (JSRegExp *) JS_GetPrivate(cx, obj);
4149
+ if (!re) {
4150
+ JS_UNLOCK_OBJ(cx, obj);
4151
+ return JS_TRUE;
4152
+ }
4153
+
4154
+ /* NB: we must reach out: after this paragraph, in order to drop re. */
4155
+ HOLD_REGEXP(cx, re);
4156
+ sticky = (re->flags & JSREG_STICKY) != 0;
4157
+ if (re->flags & (JSREG_GLOB | JSREG_STICKY)) {
4158
+ ok = js_GetLastIndex(cx, obj, &lastIndex);
4159
+ } else {
4160
+ lastIndex = 0;
4161
+ }
4162
+ JS_UNLOCK_OBJ(cx, obj);
4163
+ if (!ok)
4164
+ goto out;
4165
+
4166
+ /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
4167
+ if (argc == 0) {
4168
+ str = cx->regExpStatics.input;
4169
+ if (!str) {
4170
+ const char *bytes = js_GetStringBytes(cx, re->source);
4171
+
4172
+ if (bytes) {
4173
+ JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4174
+ JSMSG_NO_INPUT,
4175
+ bytes,
4176
+ (re->flags & JSREG_GLOB) ? "g" : "",
4177
+ (re->flags & JSREG_FOLD) ? "i" : "",
4178
+ (re->flags & JSREG_MULTILINE) ? "m" : "",
4179
+ (re->flags & JSREG_STICKY) ? "y" : "");
4180
+ }
4181
+ ok = JS_FALSE;
4182
+ goto out;
4183
+ }
4184
+ } else {
4185
+ str = js_ValueToString(cx, argv[0]);
4186
+ if (!str) {
4187
+ ok = JS_FALSE;
4188
+ goto out;
4189
+ }
4190
+ argv[0] = STRING_TO_JSVAL(str);
4191
+ }
4192
+
4193
+ if (lastIndex < 0 || JSSTRING_LENGTH(str) < lastIndex) {
4194
+ ok = js_SetLastIndex(cx, obj, 0);
4195
+ *rval = JSVAL_NULL;
4196
+ } else {
4197
+ i = (size_t) lastIndex;
4198
+ ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
4199
+ if (ok &&
4200
+ ((re->flags & JSREG_GLOB) || (*rval != JSVAL_NULL && sticky))) {
4201
+ ok = js_SetLastIndex(cx, obj, (*rval == JSVAL_NULL) ? 0 : i);
4202
+ }
4203
+ }
4204
+
4205
+ out:
4206
+ DROP_REGEXP(cx, re);
4207
+ return ok;
4208
+ }
4209
+
4210
+ static JSBool
4211
+ regexp_exec(JSContext *cx, uintN argc, jsval *vp)
4212
+ {
4213
+ return regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_FALSE,
4214
+ vp);
4215
+ }
4216
+
4217
+ static JSBool
4218
+ regexp_test(JSContext *cx, uintN argc, jsval *vp)
4219
+ {
4220
+ if (!regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_TRUE, vp))
4221
+ return JS_FALSE;
4222
+ if (*vp != JSVAL_TRUE)
4223
+ *vp = JSVAL_FALSE;
4224
+ return JS_TRUE;
4225
+ }
4226
+
4227
+ static JSFunctionSpec regexp_methods[] = {
4228
+ #if JS_HAS_TOSOURCE
4229
+ JS_FN(js_toSource_str, regexp_toString, 0,0,0),
4230
+ #endif
4231
+ JS_FN(js_toString_str, regexp_toString, 0,0,0),
4232
+ JS_FN("compile", regexp_compile, 0,2,0),
4233
+ JS_FN("exec", regexp_exec, 0,1,0),
4234
+ JS_FN("test", regexp_test, 0,1,0),
4235
+ JS_FS_END
4236
+ };
4237
+
4238
+ static JSBool
4239
+ RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4240
+ {
4241
+ if (!(cx->fp->flags & JSFRAME_CONSTRUCTING)) {
4242
+ /*
4243
+ * If first arg is regexp and no flags are given, just return the arg.
4244
+ * (regexp_compile_sub detects the regexp + flags case and throws a
4245
+ * TypeError.) See 10.15.3.1.
4246
+ */
4247
+ if ((argc < 2 || JSVAL_IS_VOID(argv[1])) &&
4248
+ !JSVAL_IS_PRIMITIVE(argv[0]) &&
4249
+ OBJ_GET_CLASS(cx, JSVAL_TO_OBJECT(argv[0])) == &js_RegExpClass) {
4250
+ *rval = argv[0];
4251
+ return JS_TRUE;
4252
+ }
4253
+
4254
+ /* Otherwise, replace obj with a new RegExp object. */
4255
+ obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL, 0);
4256
+ if (!obj)
4257
+ return JS_FALSE;
4258
+
4259
+ /*
4260
+ * regexp_compile_sub does not use rval to root its temporaries so we
4261
+ * can use it to root obj.
4262
+ */
4263
+ *rval = OBJECT_TO_JSVAL(obj);
4264
+ }
4265
+ return regexp_compile_sub(cx, obj, argc, argv, rval);
4266
+ }
4267
+
4268
+ JSObject *
4269
+ js_InitRegExpClass(JSContext *cx, JSObject *obj)
4270
+ {
4271
+ JSObject *proto, *ctor;
4272
+ jsval rval;
4273
+
4274
+ proto = JS_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
4275
+ regexp_props, regexp_methods,
4276
+ regexp_static_props, NULL);
4277
+
4278
+ if (!proto || !(ctor = JS_GetConstructor(cx, proto)))
4279
+ return NULL;
4280
+ if (!JS_AliasProperty(cx, ctor, "input", "$_") ||
4281
+ !JS_AliasProperty(cx, ctor, "multiline", "$*") ||
4282
+ !JS_AliasProperty(cx, ctor, "lastMatch", "$&") ||
4283
+ !JS_AliasProperty(cx, ctor, "lastParen", "$+") ||
4284
+ !JS_AliasProperty(cx, ctor, "leftContext", "$`") ||
4285
+ !JS_AliasProperty(cx, ctor, "rightContext", "$'")) {
4286
+ goto bad;
4287
+ }
4288
+
4289
+ /* Give RegExp.prototype private data so it matches the empty string. */
4290
+ if (!regexp_compile_sub(cx, proto, 0, NULL, &rval))
4291
+ goto bad;
4292
+ return proto;
4293
+
4294
+ bad:
4295
+ JS_DeleteProperty(cx, obj, js_RegExpClass.name);
4296
+ return NULL;
4297
+ }
4298
+
4299
+ JSObject *
4300
+ js_NewRegExpObject(JSContext *cx, JSTokenStream *ts,
4301
+ jschar *chars, size_t length, uintN flags)
4302
+ {
4303
+ JSString *str;
4304
+ JSObject *obj;
4305
+ JSRegExp *re;
4306
+ JSTempValueRooter tvr;
4307
+
4308
+ str = js_NewStringCopyN(cx, chars, length);
4309
+ if (!str)
4310
+ return NULL;
4311
+ JS_PUSH_TEMP_ROOT_STRING(cx, str, &tvr);
4312
+ re = js_NewRegExp(cx, ts, str, flags, JS_FALSE);
4313
+ if (!re) {
4314
+ JS_POP_TEMP_ROOT(cx, &tvr);
4315
+ return NULL;
4316
+ }
4317
+ obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL, 0);
4318
+ if (!obj || !JS_SetPrivate(cx, obj, re)) {
4319
+ js_DestroyRegExp(cx, re);
4320
+ obj = NULL;
4321
+ }
4322
+ if (obj && !js_SetLastIndex(cx, obj, 0))
4323
+ obj = NULL;
4324
+ JS_POP_TEMP_ROOT(cx, &tvr);
4325
+ return obj;
4326
+ }
4327
+
4328
+ JSObject *
4329
+ js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *parent)
4330
+ {
4331
+ JSObject *clone;
4332
+ JSRegExp *re;
4333
+
4334
+ JS_ASSERT(OBJ_GET_CLASS(cx, obj) == &js_RegExpClass);
4335
+ clone = js_NewObject(cx, &js_RegExpClass, NULL, parent, 0);
4336
+ if (!clone)
4337
+ return NULL;
4338
+ re = (JSRegExp *) JS_GetPrivate(cx, obj);
4339
+ if (!JS_SetPrivate(cx, clone, re) || !js_SetLastIndex(cx, clone, 0)) {
4340
+ cx->weakRoots.newborn[GCX_OBJECT] = NULL;
4341
+ return NULL;
4342
+ }
4343
+ HOLD_REGEXP(cx, re);
4344
+ return clone;
4345
+ }
4346
+
4347
+ JSBool
4348
+ js_GetLastIndex(JSContext *cx, JSObject *obj, jsdouble *lastIndex)
4349
+ {
4350
+ jsval v;
4351
+
4352
+ return JS_GetReservedSlot(cx, obj, 0, &v) &&
4353
+ JS_ValueToNumber(cx, v, lastIndex);
4354
+ }
4355
+
4356
+ JSBool
4357
+ js_SetLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex)
4358
+ {
4359
+ jsval v;
4360
+
4361
+ return JS_NewNumberValue(cx, lastIndex, &v) &&
4362
+ JS_SetReservedSlot(cx, obj, 0, v);
4363
+ }
4364
+