rugged 0.28.4 → 0.28.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (350) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rugged/version.rb +1 -1
  3. data/vendor/libgit2/AUTHORS +0 -1
  4. data/vendor/libgit2/CMakeLists.txt +16 -36
  5. data/vendor/libgit2/COPYING +0 -28
  6. data/vendor/libgit2/cmake/Modules/EnableWarnings.cmake +1 -5
  7. data/vendor/libgit2/cmake/Modules/FindCoreFoundation.cmake +2 -2
  8. data/vendor/libgit2/cmake/Modules/FindGSSAPI.cmake +1 -1
  9. data/vendor/libgit2/cmake/Modules/FindSecurity.cmake +2 -2
  10. data/vendor/libgit2/cmake/Modules/FindStatNsec.cmake +0 -6
  11. data/vendor/libgit2/deps/http-parser/http_parser.c +6 -11
  12. data/vendor/libgit2/deps/regex/CMakeLists.txt +2 -0
  13. data/vendor/libgit2/deps/regex/COPYING +502 -0
  14. data/vendor/libgit2/deps/regex/config.h +7 -0
  15. data/vendor/libgit2/deps/regex/regcomp.c +3857 -0
  16. data/vendor/libgit2/deps/regex/regex.c +92 -0
  17. data/vendor/libgit2/deps/regex/regex.h +582 -0
  18. data/vendor/libgit2/deps/regex/regex_internal.c +1744 -0
  19. data/vendor/libgit2/deps/regex/regex_internal.h +819 -0
  20. data/vendor/libgit2/deps/regex/regexec.c +4369 -0
  21. data/vendor/libgit2/deps/zlib/adler32.c +7 -0
  22. data/vendor/libgit2/deps/zlib/crc32.c +7 -0
  23. data/vendor/libgit2/include/git2.h +0 -2
  24. data/vendor/libgit2/include/git2/apply.h +2 -22
  25. data/vendor/libgit2/include/git2/attr.h +12 -19
  26. data/vendor/libgit2/include/git2/blame.h +2 -2
  27. data/vendor/libgit2/include/git2/blob.h +12 -44
  28. data/vendor/libgit2/include/git2/buffer.h +14 -20
  29. data/vendor/libgit2/include/git2/checkout.h +14 -46
  30. data/vendor/libgit2/include/git2/cherrypick.h +3 -3
  31. data/vendor/libgit2/include/git2/clone.h +2 -2
  32. data/vendor/libgit2/include/git2/commit.h +1 -23
  33. data/vendor/libgit2/include/git2/common.h +5 -7
  34. data/vendor/libgit2/include/git2/config.h +12 -12
  35. data/vendor/libgit2/include/git2/deprecated.h +3 -243
  36. data/vendor/libgit2/include/git2/describe.h +4 -4
  37. data/vendor/libgit2/include/git2/diff.h +14 -16
  38. data/vendor/libgit2/include/git2/filter.h +0 -8
  39. data/vendor/libgit2/include/git2/index.h +1 -2
  40. data/vendor/libgit2/include/git2/indexer.h +4 -48
  41. data/vendor/libgit2/include/git2/inttypes.h +309 -0
  42. data/vendor/libgit2/include/git2/merge.h +10 -6
  43. data/vendor/libgit2/include/git2/net.h +5 -0
  44. data/vendor/libgit2/include/git2/object.h +14 -2
  45. data/vendor/libgit2/include/git2/odb.h +2 -3
  46. data/vendor/libgit2/include/git2/odb_backend.h +4 -5
  47. data/vendor/libgit2/include/git2/oid.h +1 -1
  48. data/vendor/libgit2/include/git2/pack.h +1 -12
  49. data/vendor/libgit2/include/git2/proxy.h +3 -5
  50. data/vendor/libgit2/include/git2/rebase.h +2 -46
  51. data/vendor/libgit2/include/git2/refs.h +0 -19
  52. data/vendor/libgit2/include/git2/remote.h +12 -35
  53. data/vendor/libgit2/include/git2/repository.h +2 -24
  54. data/vendor/libgit2/include/git2/revert.h +1 -1
  55. data/vendor/libgit2/include/git2/stash.h +3 -3
  56. data/vendor/libgit2/include/git2/status.h +16 -25
  57. data/vendor/libgit2/include/git2/submodule.h +3 -20
  58. data/vendor/libgit2/include/git2/sys/alloc.h +9 -9
  59. data/vendor/libgit2/include/git2/sys/odb_backend.h +4 -48
  60. data/vendor/libgit2/include/git2/sys/refdb_backend.h +21 -57
  61. data/vendor/libgit2/include/git2/sys/repository.h +1 -5
  62. data/vendor/libgit2/include/git2/sys/time.h +31 -0
  63. data/vendor/libgit2/include/git2/sys/transport.h +2 -2
  64. data/vendor/libgit2/include/git2/tag.h +2 -11
  65. data/vendor/libgit2/include/git2/trace.h +2 -2
  66. data/vendor/libgit2/include/git2/transport.h +340 -11
  67. data/vendor/libgit2/include/git2/tree.h +1 -1
  68. data/vendor/libgit2/include/git2/types.h +89 -4
  69. data/vendor/libgit2/include/git2/version.h +2 -2
  70. data/vendor/libgit2/include/git2/worktree.h +5 -5
  71. data/vendor/libgit2/libgit2.pc.in +13 -0
  72. data/vendor/libgit2/src/CMakeLists.txt +222 -88
  73. data/vendor/libgit2/src/alloc.c +14 -2
  74. data/vendor/libgit2/src/apply.c +30 -60
  75. data/vendor/libgit2/src/attr.c +64 -70
  76. data/vendor/libgit2/src/attr_file.c +96 -189
  77. data/vendor/libgit2/src/attr_file.h +9 -9
  78. data/vendor/libgit2/src/attrcache.c +46 -44
  79. data/vendor/libgit2/src/attrcache.h +1 -2
  80. data/vendor/libgit2/src/blame.c +5 -17
  81. data/vendor/libgit2/src/blame.h +1 -1
  82. data/vendor/libgit2/src/blame_git.c +7 -21
  83. data/vendor/libgit2/src/blob.c +17 -81
  84. data/vendor/libgit2/src/blob.h +2 -2
  85. data/vendor/libgit2/src/branch.c +5 -29
  86. data/vendor/libgit2/src/buffer.c +7 -14
  87. data/vendor/libgit2/src/cache.c +33 -26
  88. data/vendor/libgit2/src/cache.h +1 -1
  89. data/vendor/libgit2/src/cc-compat.h +0 -5
  90. data/vendor/libgit2/src/checkout.c +16 -26
  91. data/vendor/libgit2/src/cherrypick.c +3 -9
  92. data/vendor/libgit2/src/clone.c +7 -29
  93. data/vendor/libgit2/src/clone.h +0 -4
  94. data/vendor/libgit2/src/commit.c +21 -69
  95. data/vendor/libgit2/src/commit.h +0 -6
  96. data/vendor/libgit2/src/commit_list.c +76 -28
  97. data/vendor/libgit2/src/commit_list.h +2 -2
  98. data/vendor/libgit2/src/common.h +75 -3
  99. data/vendor/libgit2/src/config.c +40 -31
  100. data/vendor/libgit2/src/config.h +6 -7
  101. data/vendor/libgit2/src/config_backend.h +0 -12
  102. data/vendor/libgit2/src/config_cache.c +39 -39
  103. data/vendor/libgit2/src/config_entries.c +99 -69
  104. data/vendor/libgit2/src/config_entries.h +0 -1
  105. data/vendor/libgit2/src/config_file.c +380 -337
  106. data/vendor/libgit2/src/config_mem.c +16 -12
  107. data/vendor/libgit2/src/config_parse.c +29 -49
  108. data/vendor/libgit2/src/config_parse.h +12 -13
  109. data/vendor/libgit2/src/crlf.c +14 -14
  110. data/vendor/libgit2/src/describe.c +20 -21
  111. data/vendor/libgit2/src/diff.c +58 -43
  112. data/vendor/libgit2/src/diff.h +1 -2
  113. data/vendor/libgit2/src/diff_driver.c +38 -37
  114. data/vendor/libgit2/src/diff_file.c +7 -9
  115. data/vendor/libgit2/src/diff_file.h +1 -1
  116. data/vendor/libgit2/src/diff_generate.c +85 -135
  117. data/vendor/libgit2/src/diff_generate.h +2 -2
  118. data/vendor/libgit2/src/diff_parse.c +1 -1
  119. data/vendor/libgit2/src/diff_print.c +13 -25
  120. data/vendor/libgit2/src/diff_stats.c +1 -1
  121. data/vendor/libgit2/src/diff_tform.c +4 -4
  122. data/vendor/libgit2/src/errors.c +22 -12
  123. data/vendor/libgit2/src/features.h.in +2 -9
  124. data/vendor/libgit2/src/fetch.c +2 -7
  125. data/vendor/libgit2/src/fetchhead.c +1 -1
  126. data/vendor/libgit2/src/filebuf.c +10 -6
  127. data/vendor/libgit2/src/filebuf.h +2 -2
  128. data/vendor/libgit2/src/{futils.c → fileops.c} +17 -21
  129. data/vendor/libgit2/src/{futils.h → fileops.h} +5 -5
  130. data/vendor/libgit2/src/filter.c +8 -16
  131. data/vendor/libgit2/src/fnmatch.c +248 -0
  132. data/vendor/libgit2/src/fnmatch.h +48 -0
  133. data/vendor/libgit2/src/global.c +40 -12
  134. data/vendor/libgit2/src/global.h +2 -0
  135. data/vendor/libgit2/src/hash.c +0 -61
  136. data/vendor/libgit2/src/hash.h +21 -19
  137. data/vendor/libgit2/src/hash/{sha1/collisiondetect.c → hash_collisiondetect.h} +17 -14
  138. data/vendor/libgit2/src/hash/{sha1/common_crypto.c → hash_common_crypto.h} +19 -15
  139. data/vendor/libgit2/src/hash/{sha1/generic.c → hash_generic.c} +10 -22
  140. data/vendor/libgit2/src/hash/{sha1/generic.h → hash_generic.h} +14 -4
  141. data/vendor/libgit2/src/hash/{sha1/mbedtls.c → hash_mbedtls.c} +7 -15
  142. data/vendor/libgit2/src/hash/{sha1/mbedtls.h → hash_mbedtls.h} +11 -6
  143. data/vendor/libgit2/src/hash/{sha1/openssl.c → hash_openssl.h} +18 -14
  144. data/vendor/libgit2/src/hash/{sha1/win32.c → hash_win32.c} +24 -34
  145. data/vendor/libgit2/src/hash/{sha1/win32.h → hash_win32.h} +19 -6
  146. data/vendor/libgit2/src/hash/{sha1/sha1dc → sha1dc}/sha1.c +3 -14
  147. data/vendor/libgit2/src/hash/{sha1/sha1dc → sha1dc}/sha1.h +0 -0
  148. data/vendor/libgit2/src/hash/{sha1/sha1dc → sha1dc}/ubc_check.c +0 -0
  149. data/vendor/libgit2/src/hash/{sha1/sha1dc → sha1dc}/ubc_check.h +0 -0
  150. data/vendor/libgit2/src/hashsig.c +1 -1
  151. data/vendor/libgit2/src/idxmap.c +65 -91
  152. data/vendor/libgit2/src/idxmap.h +15 -151
  153. data/vendor/libgit2/src/ignore.c +38 -32
  154. data/vendor/libgit2/src/index.c +43 -66
  155. data/vendor/libgit2/src/index.h +1 -1
  156. data/vendor/libgit2/src/indexer.c +70 -69
  157. data/vendor/libgit2/src/integer.h +4 -39
  158. data/vendor/libgit2/src/iterator.c +22 -27
  159. data/vendor/libgit2/src/map.h +1 -1
  160. data/vendor/libgit2/src/merge.c +44 -58
  161. data/vendor/libgit2/src/merge_driver.c +4 -4
  162. data/vendor/libgit2/src/merge_file.c +1 -1
  163. data/vendor/libgit2/src/mwindow.c +23 -18
  164. data/vendor/libgit2/src/mwindow.h +4 -4
  165. data/vendor/libgit2/src/netops.c +165 -55
  166. data/vendor/libgit2/src/netops.h +25 -3
  167. data/vendor/libgit2/src/notes.c +2 -2
  168. data/vendor/libgit2/src/object.c +2 -2
  169. data/vendor/libgit2/src/object.h +0 -2
  170. data/vendor/libgit2/src/odb.c +23 -41
  171. data/vendor/libgit2/src/odb.h +2 -3
  172. data/vendor/libgit2/src/odb_loose.c +10 -17
  173. data/vendor/libgit2/src/odb_mempack.c +23 -10
  174. data/vendor/libgit2/src/odb_pack.c +4 -4
  175. data/vendor/libgit2/src/offmap.c +55 -43
  176. data/vendor/libgit2/src/offmap.h +24 -102
  177. data/vendor/libgit2/src/oid.c +1 -6
  178. data/vendor/libgit2/src/oidmap.c +57 -39
  179. data/vendor/libgit2/src/oidmap.h +19 -99
  180. data/vendor/libgit2/src/pack-objects.c +32 -25
  181. data/vendor/libgit2/src/pack-objects.h +1 -1
  182. data/vendor/libgit2/src/pack.c +47 -45
  183. data/vendor/libgit2/src/pack.h +14 -12
  184. data/vendor/libgit2/src/parse.c +0 -10
  185. data/vendor/libgit2/src/parse.h +3 -3
  186. data/vendor/libgit2/src/patch.c +1 -1
  187. data/vendor/libgit2/src/patch_generate.c +2 -2
  188. data/vendor/libgit2/src/patch_parse.c +31 -124
  189. data/vendor/libgit2/src/path.c +6 -43
  190. data/vendor/libgit2/src/path.h +0 -2
  191. data/vendor/libgit2/src/pathspec.c +13 -13
  192. data/vendor/libgit2/src/pool.c +22 -26
  193. data/vendor/libgit2/src/pool.h +7 -7
  194. data/vendor/libgit2/src/posix.c +7 -7
  195. data/vendor/libgit2/src/posix.h +1 -12
  196. data/vendor/libgit2/src/proxy.c +2 -7
  197. data/vendor/libgit2/src/push.c +5 -10
  198. data/vendor/libgit2/src/reader.c +2 -2
  199. data/vendor/libgit2/src/rebase.c +7 -66
  200. data/vendor/libgit2/src/refdb.c +0 -12
  201. data/vendor/libgit2/src/refdb_fs.c +165 -214
  202. data/vendor/libgit2/src/reflog.c +13 -11
  203. data/vendor/libgit2/src/refs.c +18 -24
  204. data/vendor/libgit2/src/refspec.c +16 -9
  205. data/vendor/libgit2/src/remote.c +52 -50
  206. data/vendor/libgit2/src/remote.h +2 -2
  207. data/vendor/libgit2/src/repository.c +100 -115
  208. data/vendor/libgit2/src/repository.h +40 -49
  209. data/vendor/libgit2/src/revert.c +3 -8
  210. data/vendor/libgit2/src/revparse.c +19 -18
  211. data/vendor/libgit2/src/revwalk.c +30 -63
  212. data/vendor/libgit2/src/revwalk.h +0 -20
  213. data/vendor/libgit2/src/settings.c +0 -5
  214. data/vendor/libgit2/src/sortedcache.c +26 -12
  215. data/vendor/libgit2/src/sortedcache.h +1 -1
  216. data/vendor/libgit2/src/stash.c +65 -45
  217. data/vendor/libgit2/src/status.c +9 -15
  218. data/vendor/libgit2/src/{allocators/stdalloc.c → stdalloc.c} +4 -3
  219. data/vendor/libgit2/src/{allocators/stdalloc.h → stdalloc.h} +4 -4
  220. data/vendor/libgit2/src/streams/openssl.c +0 -20
  221. data/vendor/libgit2/src/streams/socket.c +2 -2
  222. data/vendor/libgit2/src/strmap.c +84 -37
  223. data/vendor/libgit2/src/strmap.h +33 -105
  224. data/vendor/libgit2/src/submodule.c +70 -102
  225. data/vendor/libgit2/src/submodule.h +1 -1
  226. data/vendor/libgit2/src/sysdir.c +1 -11
  227. data/vendor/libgit2/src/tag.c +2 -10
  228. data/vendor/libgit2/src/trace.c +1 -1
  229. data/vendor/libgit2/src/trace.h +2 -2
  230. data/vendor/libgit2/src/trailer.c +32 -46
  231. data/vendor/libgit2/src/transaction.c +9 -10
  232. data/vendor/libgit2/src/transports/auth.c +9 -10
  233. data/vendor/libgit2/src/transports/auth.h +4 -11
  234. data/vendor/libgit2/src/transports/auth_negotiate.c +9 -23
  235. data/vendor/libgit2/src/transports/auth_negotiate.h +2 -2
  236. data/vendor/libgit2/src/transports/cred.c +6 -6
  237. data/vendor/libgit2/src/{allocators/win32_crtdbg.h → transports/cred.h} +4 -5
  238. data/vendor/libgit2/src/transports/git.c +16 -11
  239. data/vendor/libgit2/src/transports/http.c +276 -419
  240. data/vendor/libgit2/src/transports/http.h +1 -1
  241. data/vendor/libgit2/src/transports/local.c +9 -9
  242. data/vendor/libgit2/src/transports/smart.c +17 -17
  243. data/vendor/libgit2/src/transports/smart.h +2 -2
  244. data/vendor/libgit2/src/transports/smart_protocol.c +60 -36
  245. data/vendor/libgit2/src/transports/ssh.c +36 -46
  246. data/vendor/libgit2/src/transports/winhttp.c +207 -231
  247. data/vendor/libgit2/src/tree-cache.c +7 -14
  248. data/vendor/libgit2/src/tree.c +24 -10
  249. data/vendor/libgit2/src/unix/map.c +1 -1
  250. data/vendor/libgit2/src/unix/posix.h +11 -1
  251. data/vendor/libgit2/src/userdiff.h +1 -3
  252. data/vendor/libgit2/src/util.c +53 -51
  253. data/vendor/libgit2/src/util.h +21 -16
  254. data/vendor/libgit2/src/win32/map.c +5 -3
  255. data/vendor/libgit2/src/win32/path_w32.c +2 -12
  256. data/vendor/libgit2/src/win32/path_w32.h +29 -0
  257. data/vendor/libgit2/src/win32/posix.h +4 -1
  258. data/vendor/libgit2/src/win32/posix_w32.c +5 -40
  259. data/vendor/libgit2/src/win32/precompiled.h +2 -0
  260. data/vendor/libgit2/src/win32/thread.c +10 -5
  261. data/vendor/libgit2/src/win32/w32_buffer.c +3 -7
  262. data/vendor/libgit2/src/win32/w32_crtdbg_stacktrace.c +93 -0
  263. data/vendor/libgit2/src/win32/w32_crtdbg_stacktrace.h +2 -0
  264. data/vendor/libgit2/src/win32/w32_stack.c +9 -4
  265. data/vendor/libgit2/src/win32/w32_stack.h +3 -3
  266. data/vendor/libgit2/src/win32/w32_util.c +0 -31
  267. data/vendor/libgit2/src/win32/w32_util.h +32 -6
  268. data/vendor/libgit2/src/worktree.c +22 -36
  269. data/vendor/libgit2/src/xdiff/xdiffi.c +1 -1
  270. data/vendor/libgit2/src/xdiff/xmerge.c +0 -12
  271. data/vendor/libgit2/src/xdiff/xpatience.c +0 -3
  272. metadata +34 -98
  273. data/vendor/libgit2/cmake/Modules/FindGSSFramework.cmake +0 -28
  274. data/vendor/libgit2/cmake/Modules/FindPCRE.cmake +0 -38
  275. data/vendor/libgit2/cmake/Modules/FindPCRE2.cmake +0 -37
  276. data/vendor/libgit2/cmake/Modules/PkgBuildConfig.cmake +0 -110
  277. data/vendor/libgit2/cmake/Modules/SelectGSSAPI.cmake +0 -53
  278. data/vendor/libgit2/cmake/Modules/SelectHTTPSBackend.cmake +0 -124
  279. data/vendor/libgit2/cmake/Modules/SelectHashes.cmake +0 -66
  280. data/vendor/libgit2/deps/ntlmclient/CMakeLists.txt +0 -21
  281. data/vendor/libgit2/deps/ntlmclient/compat.h +0 -33
  282. data/vendor/libgit2/deps/ntlmclient/crypt.h +0 -64
  283. data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.c +0 -120
  284. data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.h +0 -18
  285. data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.c +0 -145
  286. data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.h +0 -18
  287. data/vendor/libgit2/deps/ntlmclient/crypt_openssl.c +0 -130
  288. data/vendor/libgit2/deps/ntlmclient/crypt_openssl.h +0 -21
  289. data/vendor/libgit2/deps/ntlmclient/ntlm.c +0 -1420
  290. data/vendor/libgit2/deps/ntlmclient/ntlm.h +0 -174
  291. data/vendor/libgit2/deps/ntlmclient/ntlmclient.h +0 -320
  292. data/vendor/libgit2/deps/ntlmclient/unicode.h +0 -36
  293. data/vendor/libgit2/deps/ntlmclient/unicode_builtin.c +0 -445
  294. data/vendor/libgit2/deps/ntlmclient/unicode_iconv.c +0 -201
  295. data/vendor/libgit2/deps/ntlmclient/utf8.h +0 -1257
  296. data/vendor/libgit2/deps/ntlmclient/util.c +0 -21
  297. data/vendor/libgit2/deps/ntlmclient/util.h +0 -14
  298. data/vendor/libgit2/deps/pcre/CMakeLists.txt +0 -140
  299. data/vendor/libgit2/deps/pcre/COPYING +0 -5
  300. data/vendor/libgit2/deps/pcre/cmake/COPYING-CMAKE-SCRIPTS +0 -22
  301. data/vendor/libgit2/deps/pcre/cmake/FindEditline.cmake +0 -17
  302. data/vendor/libgit2/deps/pcre/cmake/FindPackageHandleStandardArgs.cmake +0 -58
  303. data/vendor/libgit2/deps/pcre/cmake/FindReadline.cmake +0 -29
  304. data/vendor/libgit2/deps/pcre/config.h.in +0 -57
  305. data/vendor/libgit2/deps/pcre/pcre.h +0 -641
  306. data/vendor/libgit2/deps/pcre/pcre_byte_order.c +0 -319
  307. data/vendor/libgit2/deps/pcre/pcre_chartables.c +0 -198
  308. data/vendor/libgit2/deps/pcre/pcre_compile.c +0 -9800
  309. data/vendor/libgit2/deps/pcre/pcre_config.c +0 -190
  310. data/vendor/libgit2/deps/pcre/pcre_dfa_exec.c +0 -3676
  311. data/vendor/libgit2/deps/pcre/pcre_exec.c +0 -7173
  312. data/vendor/libgit2/deps/pcre/pcre_fullinfo.c +0 -245
  313. data/vendor/libgit2/deps/pcre/pcre_get.c +0 -669
  314. data/vendor/libgit2/deps/pcre/pcre_globals.c +0 -86
  315. data/vendor/libgit2/deps/pcre/pcre_internal.h +0 -2787
  316. data/vendor/libgit2/deps/pcre/pcre_jit_compile.c +0 -11913
  317. data/vendor/libgit2/deps/pcre/pcre_maketables.c +0 -156
  318. data/vendor/libgit2/deps/pcre/pcre_newline.c +0 -210
  319. data/vendor/libgit2/deps/pcre/pcre_ord2utf8.c +0 -94
  320. data/vendor/libgit2/deps/pcre/pcre_printint.c +0 -834
  321. data/vendor/libgit2/deps/pcre/pcre_refcount.c +0 -92
  322. data/vendor/libgit2/deps/pcre/pcre_string_utils.c +0 -211
  323. data/vendor/libgit2/deps/pcre/pcre_study.c +0 -1686
  324. data/vendor/libgit2/deps/pcre/pcre_tables.c +0 -727
  325. data/vendor/libgit2/deps/pcre/pcre_ucd.c +0 -3644
  326. data/vendor/libgit2/deps/pcre/pcre_valid_utf8.c +0 -301
  327. data/vendor/libgit2/deps/pcre/pcre_version.c +0 -98
  328. data/vendor/libgit2/deps/pcre/pcre_xclass.c +0 -268
  329. data/vendor/libgit2/deps/pcre/pcreposix.c +0 -421
  330. data/vendor/libgit2/deps/pcre/pcreposix.h +0 -117
  331. data/vendor/libgit2/deps/pcre/ucp.h +0 -224
  332. data/vendor/libgit2/include/git2/cert.h +0 -135
  333. data/vendor/libgit2/include/git2/cred.h +0 -308
  334. data/vendor/libgit2/include/git2/sys/cred.h +0 -90
  335. data/vendor/libgit2/src/allocators/win32_crtdbg.c +0 -118
  336. data/vendor/libgit2/src/config_snapshot.c +0 -206
  337. data/vendor/libgit2/src/errors.h +0 -81
  338. data/vendor/libgit2/src/hash/sha1.h +0 -38
  339. data/vendor/libgit2/src/hash/sha1/collisiondetect.h +0 -19
  340. data/vendor/libgit2/src/hash/sha1/common_crypto.h +0 -19
  341. data/vendor/libgit2/src/hash/sha1/openssl.h +0 -19
  342. data/vendor/libgit2/src/net.c +0 -184
  343. data/vendor/libgit2/src/net.h +0 -36
  344. data/vendor/libgit2/src/regexp.c +0 -221
  345. data/vendor/libgit2/src/regexp.h +0 -97
  346. data/vendor/libgit2/src/transports/auth_ntlm.c +0 -223
  347. data/vendor/libgit2/src/transports/auth_ntlm.h +0 -35
  348. data/vendor/libgit2/src/wildmatch.c +0 -320
  349. data/vendor/libgit2/src/wildmatch.h +0 -23
  350. data/vendor/libgit2/src/win32/w32_common.h +0 -39
@@ -1,190 +0,0 @@
1
- /*************************************************
2
- * Perl-Compatible Regular Expressions *
3
- *************************************************/
4
-
5
- /* PCRE is a library of functions to support regular expressions whose syntax
6
- and semantics are as close as possible to those of the Perl 5 language.
7
-
8
- Written by Philip Hazel
9
- Copyright (c) 1997-2012 University of Cambridge
10
-
11
- -----------------------------------------------------------------------------
12
- Redistribution and use in source and binary forms, with or without
13
- modification, are permitted provided that the following conditions are met:
14
-
15
- * Redistributions of source code must retain the above copyright notice,
16
- this list of conditions and the following disclaimer.
17
-
18
- * Redistributions in binary form must reproduce the above copyright
19
- notice, this list of conditions and the following disclaimer in the
20
- documentation and/or other materials provided with the distribution.
21
-
22
- * Neither the name of the University of Cambridge nor the names of its
23
- contributors may be used to endorse or promote products derived from
24
- this software without specific prior written permission.
25
-
26
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36
- POSSIBILITY OF SUCH DAMAGE.
37
- -----------------------------------------------------------------------------
38
- */
39
-
40
-
41
- /* This module contains the external function pcre_config(). */
42
-
43
-
44
- #ifdef HAVE_CONFIG_H
45
- #include "config.h"
46
- #endif
47
-
48
- /* Keep the original link size. */
49
- static int real_link_size = LINK_SIZE;
50
-
51
- #include "pcre_internal.h"
52
-
53
-
54
- /*************************************************
55
- * Return info about what features are configured *
56
- *************************************************/
57
-
58
- /* This function has an extensible interface so that additional items can be
59
- added compatibly.
60
-
61
- Arguments:
62
- what what information is required
63
- where where to put the information
64
-
65
- Returns: 0 if data returned, negative on error
66
- */
67
-
68
- #if defined COMPILE_PCRE8
69
- PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
70
- pcre_config(int what, void *where)
71
- #elif defined COMPILE_PCRE16
72
- PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
73
- pcre16_config(int what, void *where)
74
- #elif defined COMPILE_PCRE32
75
- PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
76
- pcre32_config(int what, void *where)
77
- #endif
78
- {
79
- switch (what)
80
- {
81
- case PCRE_CONFIG_UTF8:
82
- #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
83
- *((int *)where) = 0;
84
- return PCRE_ERROR_BADOPTION;
85
- #else
86
- #if defined SUPPORT_UTF
87
- *((int *)where) = 1;
88
- #else
89
- *((int *)where) = 0;
90
- #endif
91
- break;
92
- #endif
93
-
94
- case PCRE_CONFIG_UTF16:
95
- #if defined COMPILE_PCRE8 || defined COMPILE_PCRE32
96
- *((int *)where) = 0;
97
- return PCRE_ERROR_BADOPTION;
98
- #else
99
- #if defined SUPPORT_UTF
100
- *((int *)where) = 1;
101
- #else
102
- *((int *)where) = 0;
103
- #endif
104
- break;
105
- #endif
106
-
107
- case PCRE_CONFIG_UTF32:
108
- #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
109
- *((int *)where) = 0;
110
- return PCRE_ERROR_BADOPTION;
111
- #else
112
- #if defined SUPPORT_UTF
113
- *((int *)where) = 1;
114
- #else
115
- *((int *)where) = 0;
116
- #endif
117
- break;
118
- #endif
119
-
120
- case PCRE_CONFIG_UNICODE_PROPERTIES:
121
- #ifdef SUPPORT_UCP
122
- *((int *)where) = 1;
123
- #else
124
- *((int *)where) = 0;
125
- #endif
126
- break;
127
-
128
- case PCRE_CONFIG_JIT:
129
- #ifdef SUPPORT_JIT
130
- *((int *)where) = 1;
131
- #else
132
- *((int *)where) = 0;
133
- #endif
134
- break;
135
-
136
- case PCRE_CONFIG_JITTARGET:
137
- #ifdef SUPPORT_JIT
138
- *((const char **)where) = PRIV(jit_get_target)();
139
- #else
140
- *((const char **)where) = NULL;
141
- #endif
142
- break;
143
-
144
- case PCRE_CONFIG_NEWLINE:
145
- *((int *)where) = NEWLINE;
146
- break;
147
-
148
- case PCRE_CONFIG_BSR:
149
- #ifdef BSR_ANYCRLF
150
- *((int *)where) = 1;
151
- #else
152
- *((int *)where) = 0;
153
- #endif
154
- break;
155
-
156
- case PCRE_CONFIG_LINK_SIZE:
157
- *((int *)where) = real_link_size;
158
- break;
159
-
160
- case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
161
- *((int *)where) = POSIX_MALLOC_THRESHOLD;
162
- break;
163
-
164
- case PCRE_CONFIG_PARENS_LIMIT:
165
- *((unsigned long int *)where) = PARENS_NEST_LIMIT;
166
- break;
167
-
168
- case PCRE_CONFIG_MATCH_LIMIT:
169
- *((unsigned long int *)where) = MATCH_LIMIT;
170
- break;
171
-
172
- case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
173
- *((unsigned long int *)where) = MATCH_LIMIT_RECURSION;
174
- break;
175
-
176
- case PCRE_CONFIG_STACKRECURSE:
177
- #ifdef NO_RECURSE
178
- *((int *)where) = 0;
179
- #else
180
- *((int *)where) = 1;
181
- #endif
182
- break;
183
-
184
- default: return PCRE_ERROR_BADOPTION;
185
- }
186
-
187
- return 0;
188
- }
189
-
190
- /* End of pcre_config.c */
@@ -1,3676 +0,0 @@
1
- /*************************************************
2
- * Perl-Compatible Regular Expressions *
3
- *************************************************/
4
-
5
- /* PCRE is a library of functions to support regular expressions whose syntax
6
- and semantics are as close as possible to those of the Perl 5 language (but see
7
- below for why this module is different).
8
-
9
- Written by Philip Hazel
10
- Copyright (c) 1997-2017 University of Cambridge
11
-
12
- -----------------------------------------------------------------------------
13
- Redistribution and use in source and binary forms, with or without
14
- modification, are permitted provided that the following conditions are met:
15
-
16
- * Redistributions of source code must retain the above copyright notice,
17
- this list of conditions and the following disclaimer.
18
-
19
- * Redistributions in binary form must reproduce the above copyright
20
- notice, this list of conditions and the following disclaimer in the
21
- documentation and/or other materials provided with the distribution.
22
-
23
- * Neither the name of the University of Cambridge nor the names of its
24
- contributors may be used to endorse or promote products derived from
25
- this software without specific prior written permission.
26
-
27
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
- POSSIBILITY OF SUCH DAMAGE.
38
- -----------------------------------------------------------------------------
39
- */
40
-
41
- /* This module contains the external function pcre_dfa_exec(), which is an
42
- alternative matching function that uses a sort of DFA algorithm (not a true
43
- FSM). This is NOT Perl-compatible, but it has advantages in certain
44
- applications. */
45
-
46
-
47
- /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
48
- the performance of his patterns greatly. I could not use it as it stood, as it
49
- was not thread safe, and made assumptions about pattern sizes. Also, it caused
50
- test 7 to loop, and test 9 to crash with a segfault.
51
-
52
- The issue is the check for duplicate states, which is done by a simple linear
53
- search up the state list. (Grep for "duplicate" below to find the code.) For
54
- many patterns, there will never be many states active at one time, so a simple
55
- linear search is fine. In patterns that have many active states, it might be a
56
- bottleneck. The suggested code used an indexing scheme to remember which states
57
- had previously been used for each character, and avoided the linear search when
58
- it knew there was no chance of a duplicate. This was implemented when adding
59
- states to the state lists.
60
-
61
- I wrote some thread-safe, not-limited code to try something similar at the time
62
- of checking for duplicates (instead of when adding states), using index vectors
63
- on the stack. It did give a 13% improvement with one specially constructed
64
- pattern for certain subject strings, but on other strings and on many of the
65
- simpler patterns in the test suite it did worse. The major problem, I think,
66
- was the extra time to initialize the index. This had to be done for each call
67
- of internal_dfa_exec(). (The supplied patch used a static vector, initialized
68
- only once - I suspect this was the cause of the problems with the tests.)
69
-
70
- Overall, I concluded that the gains in some cases did not outweigh the losses
71
- in others, so I abandoned this code. */
72
-
73
-
74
-
75
- #ifdef HAVE_CONFIG_H
76
- #include "config.h"
77
- #endif
78
-
79
- #define NLBLOCK md /* Block containing newline information */
80
- #define PSSTART start_subject /* Field containing processed string start */
81
- #define PSEND end_subject /* Field containing processed string end */
82
-
83
- #include "pcre_internal.h"
84
-
85
-
86
- /* For use to indent debugging output */
87
-
88
- #define SP " "
89
-
90
-
91
- /*************************************************
92
- * Code parameters and static tables *
93
- *************************************************/
94
-
95
- /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
96
- into others, under special conditions. A gap of 20 between the blocks should be
97
- enough. The resulting opcodes don't have to be less than 256 because they are
98
- never stored, so we push them well clear of the normal opcodes. */
99
-
100
- #define OP_PROP_EXTRA 300
101
- #define OP_EXTUNI_EXTRA 320
102
- #define OP_ANYNL_EXTRA 340
103
- #define OP_HSPACE_EXTRA 360
104
- #define OP_VSPACE_EXTRA 380
105
-
106
-
107
- /* This table identifies those opcodes that are followed immediately by a
108
- character that is to be tested in some way. This makes it possible to
109
- centralize the loading of these characters. In the case of Type * etc, the
110
- "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
111
- small value. Non-zero values in the table are the offsets from the opcode where
112
- the character is to be found. ***NOTE*** If the start of this table is
113
- modified, the three tables that follow must also be modified. */
114
-
115
- static const pcre_uint8 coptable[] = {
116
- 0, /* End */
117
- 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
118
- 0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
119
- 0, 0, 0, /* Any, AllAny, Anybyte */
120
- 0, 0, /* \P, \p */
121
- 0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
122
- 0, /* \X */
123
- 0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
124
- 1, /* Char */
125
- 1, /* Chari */
126
- 1, /* not */
127
- 1, /* noti */
128
- /* Positive single-char repeats */
129
- 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
130
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */
131
- 1+IMM2_SIZE, /* exact */
132
- 1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */
133
- 1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
134
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */
135
- 1+IMM2_SIZE, /* exact I */
136
- 1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */
137
- /* Negative single-char repeats - only for chars < 256 */
138
- 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
139
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */
140
- 1+IMM2_SIZE, /* NOT exact */
141
- 1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */
142
- 1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
143
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */
144
- 1+IMM2_SIZE, /* NOT exact I */
145
- 1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */
146
- /* Positive type repeats */
147
- 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
148
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */
149
- 1+IMM2_SIZE, /* Type exact */
150
- 1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */
151
- /* Character class & ref repeats */
152
- 0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
153
- 0, 0, /* CRRANGE, CRMINRANGE */
154
- 0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
155
- 0, /* CLASS */
156
- 0, /* NCLASS */
157
- 0, /* XCLASS - variable length */
158
- 0, /* REF */
159
- 0, /* REFI */
160
- 0, /* DNREF */
161
- 0, /* DNREFI */
162
- 0, /* RECURSE */
163
- 0, /* CALLOUT */
164
- 0, /* Alt */
165
- 0, /* Ket */
166
- 0, /* KetRmax */
167
- 0, /* KetRmin */
168
- 0, /* KetRpos */
169
- 0, /* Reverse */
170
- 0, /* Assert */
171
- 0, /* Assert not */
172
- 0, /* Assert behind */
173
- 0, /* Assert behind not */
174
- 0, 0, /* ONCE, ONCE_NC */
175
- 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
176
- 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
177
- 0, 0, /* CREF, DNCREF */
178
- 0, 0, /* RREF, DNRREF */
179
- 0, /* DEF */
180
- 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
181
- 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
182
- 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
183
- 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
184
- 0, 0 /* CLOSE, SKIPZERO */
185
- };
186
-
187
- /* This table identifies those opcodes that inspect a character. It is used to
188
- remember the fact that a character could have been inspected when the end of
189
- the subject is reached. ***NOTE*** If the start of this table is modified, the
190
- two tables that follow must also be modified. */
191
-
192
- static const pcre_uint8 poptable[] = {
193
- 0, /* End */
194
- 0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */
195
- 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
196
- 1, 1, 1, /* Any, AllAny, Anybyte */
197
- 1, 1, /* \P, \p */
198
- 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
199
- 1, /* \X */
200
- 0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
201
- 1, /* Char */
202
- 1, /* Chari */
203
- 1, /* not */
204
- 1, /* noti */
205
- /* Positive single-char repeats */
206
- 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
207
- 1, 1, 1, /* upto, minupto, exact */
208
- 1, 1, 1, 1, /* *+, ++, ?+, upto+ */
209
- 1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
210
- 1, 1, 1, /* upto I, minupto I, exact I */
211
- 1, 1, 1, 1, /* *+I, ++I, ?+I, upto+I */
212
- /* Negative single-char repeats - only for chars < 256 */
213
- 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
214
- 1, 1, 1, /* NOT upto, minupto, exact */
215
- 1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */
216
- 1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
217
- 1, 1, 1, /* NOT upto I, minupto I, exact I */
218
- 1, 1, 1, 1, /* NOT *+I, ++I, ?+I, upto+I */
219
- /* Positive type repeats */
220
- 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
221
- 1, 1, 1, /* Type upto, minupto, exact */
222
- 1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */
223
- /* Character class & ref repeats */
224
- 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
225
- 1, 1, /* CRRANGE, CRMINRANGE */
226
- 1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
227
- 1, /* CLASS */
228
- 1, /* NCLASS */
229
- 1, /* XCLASS - variable length */
230
- 0, /* REF */
231
- 0, /* REFI */
232
- 0, /* DNREF */
233
- 0, /* DNREFI */
234
- 0, /* RECURSE */
235
- 0, /* CALLOUT */
236
- 0, /* Alt */
237
- 0, /* Ket */
238
- 0, /* KetRmax */
239
- 0, /* KetRmin */
240
- 0, /* KetRpos */
241
- 0, /* Reverse */
242
- 0, /* Assert */
243
- 0, /* Assert not */
244
- 0, /* Assert behind */
245
- 0, /* Assert behind not */
246
- 0, 0, /* ONCE, ONCE_NC */
247
- 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
248
- 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
249
- 0, 0, /* CREF, DNCREF */
250
- 0, 0, /* RREF, DNRREF */
251
- 0, /* DEF */
252
- 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
253
- 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
254
- 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
255
- 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
256
- 0, 0 /* CLOSE, SKIPZERO */
257
- };
258
-
259
- /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
260
- and \w */
261
-
262
- static const pcre_uint8 toptable1[] = {
263
- 0, 0, 0, 0, 0, 0,
264
- ctype_digit, ctype_digit,
265
- ctype_space, ctype_space,
266
- ctype_word, ctype_word,
267
- 0, 0 /* OP_ANY, OP_ALLANY */
268
- };
269
-
270
- static const pcre_uint8 toptable2[] = {
271
- 0, 0, 0, 0, 0, 0,
272
- ctype_digit, 0,
273
- ctype_space, 0,
274
- ctype_word, 0,
275
- 1, 1 /* OP_ANY, OP_ALLANY */
276
- };
277
-
278
-
279
- /* Structure for holding data about a particular state, which is in effect the
280
- current data for an active path through the match tree. It must consist
281
- entirely of ints because the working vector we are passed, and which we put
282
- these structures in, is a vector of ints. */
283
-
284
- typedef struct stateblock {
285
- int offset; /* Offset to opcode */
286
- int count; /* Count for repeats */
287
- int data; /* Some use extra data */
288
- } stateblock;
289
-
290
- #define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
291
-
292
-
293
- #ifdef PCRE_DEBUG
294
- /*************************************************
295
- * Print character string *
296
- *************************************************/
297
-
298
- /* Character string printing function for debugging.
299
-
300
- Arguments:
301
- p points to string
302
- length number of bytes
303
- f where to print
304
-
305
- Returns: nothing
306
- */
307
-
308
- static void
309
- pchars(const pcre_uchar *p, int length, FILE *f)
310
- {
311
- pcre_uint32 c;
312
- while (length-- > 0)
313
- {
314
- if (isprint(c = *(p++)))
315
- fprintf(f, "%c", c);
316
- else
317
- fprintf(f, "\\x{%02x}", c);
318
- }
319
- }
320
- #endif
321
-
322
-
323
-
324
- /*************************************************
325
- * Execute a Regular Expression - DFA engine *
326
- *************************************************/
327
-
328
- /* This internal function applies a compiled pattern to a subject string,
329
- starting at a given point, using a DFA engine. This function is called from the
330
- external one, possibly multiple times if the pattern is not anchored. The
331
- function calls itself recursively for some kinds of subpattern.
332
-
333
- Arguments:
334
- md the match_data block with fixed information
335
- this_start_code the opening bracket of this subexpression's code
336
- current_subject where we currently are in the subject string
337
- start_offset start offset in the subject string
338
- offsets vector to contain the matching string offsets
339
- offsetcount size of same
340
- workspace vector of workspace
341
- wscount size of same
342
- rlevel function call recursion level
343
-
344
- Returns: > 0 => number of match offset pairs placed in offsets
345
- = 0 => offsets overflowed; longest matches are present
346
- -1 => failed to match
347
- < -1 => some kind of unexpected problem
348
-
349
- The following macros are used for adding states to the two state vectors (one
350
- for the current character, one for the following character). */
351
-
352
- #define ADD_ACTIVE(x,y) \
353
- if (active_count++ < wscount) \
354
- { \
355
- next_active_state->offset = (x); \
356
- next_active_state->count = (y); \
357
- next_active_state++; \
358
- DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
359
- } \
360
- else return PCRE_ERROR_DFA_WSSIZE
361
-
362
- #define ADD_ACTIVE_DATA(x,y,z) \
363
- if (active_count++ < wscount) \
364
- { \
365
- next_active_state->offset = (x); \
366
- next_active_state->count = (y); \
367
- next_active_state->data = (z); \
368
- next_active_state++; \
369
- DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
370
- } \
371
- else return PCRE_ERROR_DFA_WSSIZE
372
-
373
- #define ADD_NEW(x,y) \
374
- if (new_count++ < wscount) \
375
- { \
376
- next_new_state->offset = (x); \
377
- next_new_state->count = (y); \
378
- next_new_state++; \
379
- DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
380
- } \
381
- else return PCRE_ERROR_DFA_WSSIZE
382
-
383
- #define ADD_NEW_DATA(x,y,z) \
384
- if (new_count++ < wscount) \
385
- { \
386
- next_new_state->offset = (x); \
387
- next_new_state->count = (y); \
388
- next_new_state->data = (z); \
389
- next_new_state++; \
390
- DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
391
- (x), (y), (z), __LINE__)); \
392
- } \
393
- else return PCRE_ERROR_DFA_WSSIZE
394
-
395
- /* And now, here is the code */
396
-
397
- static int
398
- internal_dfa_exec(
399
- dfa_match_data *md,
400
- const pcre_uchar *this_start_code,
401
- const pcre_uchar *current_subject,
402
- int start_offset,
403
- int *offsets,
404
- int offsetcount,
405
- int *workspace,
406
- int wscount,
407
- int rlevel)
408
- {
409
- stateblock *active_states, *new_states, *temp_states;
410
- stateblock *next_active_state, *next_new_state;
411
-
412
- const pcre_uint8 *ctypes, *lcc, *fcc;
413
- const pcre_uchar *ptr;
414
- const pcre_uchar *end_code, *first_op;
415
-
416
- dfa_recursion_info new_recursive;
417
-
418
- int active_count, new_count, match_count;
419
-
420
- /* Some fields in the md block are frequently referenced, so we load them into
421
- independent variables in the hope that this will perform better. */
422
-
423
- const pcre_uchar *start_subject = md->start_subject;
424
- const pcre_uchar *end_subject = md->end_subject;
425
- const pcre_uchar *start_code = md->start_code;
426
-
427
- #ifdef SUPPORT_UTF
428
- BOOL utf = (md->poptions & PCRE_UTF8) != 0;
429
- #else
430
- BOOL utf = FALSE;
431
- #endif
432
-
433
- BOOL reset_could_continue = FALSE;
434
-
435
- rlevel++;
436
- offsetcount &= (-2);
437
-
438
- wscount -= 2;
439
- wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
440
- (2 * INTS_PER_STATEBLOCK);
441
-
442
- DPRINTF(("\n%.*s---------------------\n"
443
- "%.*sCall to internal_dfa_exec f=%d\n",
444
- rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
445
-
446
- ctypes = md->tables + ctypes_offset;
447
- lcc = md->tables + lcc_offset;
448
- fcc = md->tables + fcc_offset;
449
-
450
- match_count = PCRE_ERROR_NOMATCH; /* A negative number */
451
-
452
- active_states = (stateblock *)(workspace + 2);
453
- next_new_state = new_states = active_states + wscount;
454
- new_count = 0;
455
-
456
- first_op = this_start_code + 1 + LINK_SIZE +
457
- ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
458
- *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
459
- ? IMM2_SIZE:0);
460
-
461
- /* The first thing in any (sub) pattern is a bracket of some sort. Push all
462
- the alternative states onto the list, and find out where the end is. This
463
- makes is possible to use this function recursively, when we want to stop at a
464
- matching internal ket rather than at the end.
465
-
466
- If the first opcode in the first alternative is OP_REVERSE, we are dealing with
467
- a backward assertion. In that case, we have to find out the maximum amount to
468
- move back, and set up each alternative appropriately. */
469
-
470
- if (*first_op == OP_REVERSE)
471
- {
472
- int max_back = 0;
473
- int gone_back;
474
-
475
- end_code = this_start_code;
476
- do
477
- {
478
- int back = GET(end_code, 2+LINK_SIZE);
479
- if (back > max_back) max_back = back;
480
- end_code += GET(end_code, 1);
481
- }
482
- while (*end_code == OP_ALT);
483
-
484
- /* If we can't go back the amount required for the longest lookbehind
485
- pattern, go back as far as we can; some alternatives may still be viable. */
486
-
487
- #ifdef SUPPORT_UTF
488
- /* In character mode we have to step back character by character */
489
-
490
- if (utf)
491
- {
492
- for (gone_back = 0; gone_back < max_back; gone_back++)
493
- {
494
- if (current_subject <= start_subject) break;
495
- current_subject--;
496
- ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
497
- }
498
- }
499
- else
500
- #endif
501
-
502
- /* In byte-mode we can do this quickly. */
503
-
504
- {
505
- gone_back = (current_subject - max_back < start_subject)?
506
- (int)(current_subject - start_subject) : max_back;
507
- current_subject -= gone_back;
508
- }
509
-
510
- /* Save the earliest consulted character */
511
-
512
- if (current_subject < md->start_used_ptr)
513
- md->start_used_ptr = current_subject;
514
-
515
- /* Now we can process the individual branches. */
516
-
517
- end_code = this_start_code;
518
- do
519
- {
520
- int back = GET(end_code, 2+LINK_SIZE);
521
- if (back <= gone_back)
522
- {
523
- int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
524
- ADD_NEW_DATA(-bstate, 0, gone_back - back);
525
- }
526
- end_code += GET(end_code, 1);
527
- }
528
- while (*end_code == OP_ALT);
529
- }
530
-
531
- /* This is the code for a "normal" subpattern (not a backward assertion). The
532
- start of a whole pattern is always one of these. If we are at the top level,
533
- we may be asked to restart matching from the same point that we reached for a
534
- previous partial match. We still have to scan through the top-level branches to
535
- find the end state. */
536
-
537
- else
538
- {
539
- end_code = this_start_code;
540
-
541
- /* Restarting */
542
-
543
- if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
544
- {
545
- do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
546
- new_count = workspace[1];
547
- if (!workspace[0])
548
- memcpy(new_states, active_states, new_count * sizeof(stateblock));
549
- }
550
-
551
- /* Not restarting */
552
-
553
- else
554
- {
555
- int length = 1 + LINK_SIZE +
556
- ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
557
- *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
558
- ? IMM2_SIZE:0);
559
- do
560
- {
561
- ADD_NEW((int)(end_code - start_code + length), 0);
562
- end_code += GET(end_code, 1);
563
- length = 1 + LINK_SIZE;
564
- }
565
- while (*end_code == OP_ALT);
566
- }
567
- }
568
-
569
- workspace[0] = 0; /* Bit indicating which vector is current */
570
-
571
- DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
572
-
573
- /* Loop for scanning the subject */
574
-
575
- ptr = current_subject;
576
- for (;;)
577
- {
578
- int i, j;
579
- int clen, dlen;
580
- pcre_uint32 c, d;
581
- int forced_fail = 0;
582
- BOOL partial_newline = FALSE;
583
- BOOL could_continue = reset_could_continue;
584
- reset_could_continue = FALSE;
585
-
586
- /* Make the new state list into the active state list and empty the
587
- new state list. */
588
-
589
- temp_states = active_states;
590
- active_states = new_states;
591
- new_states = temp_states;
592
- active_count = new_count;
593
- new_count = 0;
594
-
595
- workspace[0] ^= 1; /* Remember for the restarting feature */
596
- workspace[1] = active_count;
597
-
598
- #ifdef PCRE_DEBUG
599
- printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
600
- pchars(ptr, STRLEN_UC(ptr), stdout);
601
- printf("\"\n");
602
-
603
- printf("%.*sActive states: ", rlevel*2-2, SP);
604
- for (i = 0; i < active_count; i++)
605
- printf("%d/%d ", active_states[i].offset, active_states[i].count);
606
- printf("\n");
607
- #endif
608
-
609
- /* Set the pointers for adding new states */
610
-
611
- next_active_state = active_states + active_count;
612
- next_new_state = new_states;
613
-
614
- /* Load the current character from the subject outside the loop, as many
615
- different states may want to look at it, and we assume that at least one
616
- will. */
617
-
618
- if (ptr < end_subject)
619
- {
620
- clen = 1; /* Number of data items in the character */
621
- #ifdef SUPPORT_UTF
622
- GETCHARLENTEST(c, ptr, clen);
623
- #else
624
- c = *ptr;
625
- #endif /* SUPPORT_UTF */
626
- }
627
- else
628
- {
629
- clen = 0; /* This indicates the end of the subject */
630
- c = NOTACHAR; /* This value should never actually be used */
631
- }
632
-
633
- /* Scan up the active states and act on each one. The result of an action
634
- may be to add more states to the currently active list (e.g. on hitting a
635
- parenthesis) or it may be to put states on the new list, for considering
636
- when we move the character pointer on. */
637
-
638
- for (i = 0; i < active_count; i++)
639
- {
640
- stateblock *current_state = active_states + i;
641
- BOOL caseless = FALSE;
642
- const pcre_uchar *code;
643
- int state_offset = current_state->offset;
644
- int codevalue, rrc;
645
- int count;
646
-
647
- #ifdef PCRE_DEBUG
648
- printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
649
- if (clen == 0) printf("EOL\n");
650
- else if (c > 32 && c < 127) printf("'%c'\n", c);
651
- else printf("0x%02x\n", c);
652
- #endif
653
-
654
- /* A negative offset is a special case meaning "hold off going to this
655
- (negated) state until the number of characters in the data field have
656
- been skipped". If the could_continue flag was passed over from a previous
657
- state, arrange for it to passed on. */
658
-
659
- if (state_offset < 0)
660
- {
661
- if (current_state->data > 0)
662
- {
663
- DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
664
- ADD_NEW_DATA(state_offset, current_state->count,
665
- current_state->data - 1);
666
- if (could_continue) reset_could_continue = TRUE;
667
- continue;
668
- }
669
- else
670
- {
671
- current_state->offset = state_offset = -state_offset;
672
- }
673
- }
674
-
675
- /* Check for a duplicate state with the same count, and skip if found.
676
- See the note at the head of this module about the possibility of improving
677
- performance here. */
678
-
679
- for (j = 0; j < i; j++)
680
- {
681
- if (active_states[j].offset == state_offset &&
682
- active_states[j].count == current_state->count)
683
- {
684
- DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
685
- goto NEXT_ACTIVE_STATE;
686
- }
687
- }
688
-
689
- /* The state offset is the offset to the opcode */
690
-
691
- code = start_code + state_offset;
692
- codevalue = *code;
693
-
694
- /* If this opcode inspects a character, but we are at the end of the
695
- subject, remember the fact for use when testing for a partial match. */
696
-
697
- if (clen == 0 && poptable[codevalue] != 0)
698
- could_continue = TRUE;
699
-
700
- /* If this opcode is followed by an inline character, load it. It is
701
- tempting to test for the presence of a subject character here, but that
702
- is wrong, because sometimes zero repetitions of the subject are
703
- permitted.
704
-
705
- We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
706
- argument that is not a data character - but is always one byte long because
707
- the values are small. We have to take special action to deal with \P, \p,
708
- \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
709
- these ones to new opcodes. */
710
-
711
- if (coptable[codevalue] > 0)
712
- {
713
- dlen = 1;
714
- #ifdef SUPPORT_UTF
715
- if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
716
- #endif /* SUPPORT_UTF */
717
- d = code[coptable[codevalue]];
718
- if (codevalue >= OP_TYPESTAR)
719
- {
720
- switch(d)
721
- {
722
- case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
723
- case OP_NOTPROP:
724
- case OP_PROP: codevalue += OP_PROP_EXTRA; break;
725
- case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
726
- case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
727
- case OP_NOT_HSPACE:
728
- case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
729
- case OP_NOT_VSPACE:
730
- case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
731
- default: break;
732
- }
733
- }
734
- }
735
- else
736
- {
737
- dlen = 0; /* Not strictly necessary, but compilers moan */
738
- d = NOTACHAR; /* if these variables are not set. */
739
- }
740
-
741
-
742
- /* Now process the individual opcodes */
743
-
744
- switch (codevalue)
745
- {
746
- /* ========================================================================== */
747
- /* These cases are never obeyed. This is a fudge that causes a compile-
748
- time error if the vectors coptable or poptable, which are indexed by
749
- opcode, are not the correct length. It seems to be the only way to do
750
- such a check at compile time, as the sizeof() operator does not work
751
- in the C preprocessor. */
752
-
753
- case OP_TABLE_LENGTH:
754
- case OP_TABLE_LENGTH +
755
- ((sizeof(coptable) == OP_TABLE_LENGTH) &&
756
- (sizeof(poptable) == OP_TABLE_LENGTH)):
757
- break;
758
-
759
- /* ========================================================================== */
760
- /* Reached a closing bracket. If not at the end of the pattern, carry
761
- on with the next opcode. For repeating opcodes, also add the repeat
762
- state. Note that KETRPOS will always be encountered at the end of the
763
- subpattern, because the possessive subpattern repeats are always handled
764
- using recursive calls. Thus, it never adds any new states.
765
-
766
- At the end of the (sub)pattern, unless we have an empty string and
767
- PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
768
- start of the subject, save the match data, shifting up all previous
769
- matches so we always have the longest first. */
770
-
771
- case OP_KET:
772
- case OP_KETRMIN:
773
- case OP_KETRMAX:
774
- case OP_KETRPOS:
775
- if (code != end_code)
776
- {
777
- ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
778
- if (codevalue != OP_KET)
779
- {
780
- ADD_ACTIVE(state_offset - GET(code, 1), 0);
781
- }
782
- }
783
- else
784
- {
785
- if (ptr > current_subject ||
786
- ((md->moptions & PCRE_NOTEMPTY) == 0 &&
787
- ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
788
- current_subject > start_subject + md->start_offset)))
789
- {
790
- if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
791
- else if (match_count > 0 && ++match_count * 2 > offsetcount)
792
- match_count = 0;
793
- count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
794
- if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
795
- if (offsetcount >= 2)
796
- {
797
- offsets[0] = (int)(current_subject - start_subject);
798
- offsets[1] = (int)(ptr - start_subject);
799
- DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
800
- offsets[1] - offsets[0], (char *)current_subject));
801
- }
802
- if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
803
- {
804
- DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
805
- "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
806
- match_count, rlevel*2-2, SP));
807
- return match_count;
808
- }
809
- }
810
- }
811
- break;
812
-
813
- /* ========================================================================== */
814
- /* These opcodes add to the current list of states without looking
815
- at the current character. */
816
-
817
- /*-----------------------------------------------------------------*/
818
- case OP_ALT:
819
- do { code += GET(code, 1); } while (*code == OP_ALT);
820
- ADD_ACTIVE((int)(code - start_code), 0);
821
- break;
822
-
823
- /*-----------------------------------------------------------------*/
824
- case OP_BRA:
825
- case OP_SBRA:
826
- do
827
- {
828
- ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
829
- code += GET(code, 1);
830
- }
831
- while (*code == OP_ALT);
832
- break;
833
-
834
- /*-----------------------------------------------------------------*/
835
- case OP_CBRA:
836
- case OP_SCBRA:
837
- ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0);
838
- code += GET(code, 1);
839
- while (*code == OP_ALT)
840
- {
841
- ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
842
- code += GET(code, 1);
843
- }
844
- break;
845
-
846
- /*-----------------------------------------------------------------*/
847
- case OP_BRAZERO:
848
- case OP_BRAMINZERO:
849
- ADD_ACTIVE(state_offset + 1, 0);
850
- code += 1 + GET(code, 2);
851
- while (*code == OP_ALT) code += GET(code, 1);
852
- ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
853
- break;
854
-
855
- /*-----------------------------------------------------------------*/
856
- case OP_SKIPZERO:
857
- code += 1 + GET(code, 2);
858
- while (*code == OP_ALT) code += GET(code, 1);
859
- ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
860
- break;
861
-
862
- /*-----------------------------------------------------------------*/
863
- case OP_CIRC:
864
- if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
865
- { ADD_ACTIVE(state_offset + 1, 0); }
866
- break;
867
-
868
- /*-----------------------------------------------------------------*/
869
- case OP_CIRCM:
870
- if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
871
- (ptr != end_subject && WAS_NEWLINE(ptr)))
872
- { ADD_ACTIVE(state_offset + 1, 0); }
873
- break;
874
-
875
- /*-----------------------------------------------------------------*/
876
- case OP_EOD:
877
- if (ptr >= end_subject)
878
- {
879
- if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
880
- could_continue = TRUE;
881
- else { ADD_ACTIVE(state_offset + 1, 0); }
882
- }
883
- break;
884
-
885
- /*-----------------------------------------------------------------*/
886
- case OP_SOD:
887
- if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
888
- break;
889
-
890
- /*-----------------------------------------------------------------*/
891
- case OP_SOM:
892
- if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
893
- break;
894
-
895
-
896
- /* ========================================================================== */
897
- /* These opcodes inspect the next subject character, and sometimes
898
- the previous one as well, but do not have an argument. The variable
899
- clen contains the length of the current character and is zero if we are
900
- at the end of the subject. */
901
-
902
- /*-----------------------------------------------------------------*/
903
- case OP_ANY:
904
- if (clen > 0 && !IS_NEWLINE(ptr))
905
- {
906
- if (ptr + 1 >= md->end_subject &&
907
- (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
908
- NLBLOCK->nltype == NLTYPE_FIXED &&
909
- NLBLOCK->nllen == 2 &&
910
- c == NLBLOCK->nl[0])
911
- {
912
- could_continue = partial_newline = TRUE;
913
- }
914
- else
915
- {
916
- ADD_NEW(state_offset + 1, 0);
917
- }
918
- }
919
- break;
920
-
921
- /*-----------------------------------------------------------------*/
922
- case OP_ALLANY:
923
- if (clen > 0)
924
- { ADD_NEW(state_offset + 1, 0); }
925
- break;
926
-
927
- /*-----------------------------------------------------------------*/
928
- case OP_EODN:
929
- if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
930
- could_continue = TRUE;
931
- else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
932
- { ADD_ACTIVE(state_offset + 1, 0); }
933
- break;
934
-
935
- /*-----------------------------------------------------------------*/
936
- case OP_DOLL:
937
- if ((md->moptions & PCRE_NOTEOL) == 0)
938
- {
939
- if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
940
- could_continue = TRUE;
941
- else if (clen == 0 ||
942
- ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
943
- (ptr == end_subject - md->nllen)
944
- ))
945
- { ADD_ACTIVE(state_offset + 1, 0); }
946
- else if (ptr + 1 >= md->end_subject &&
947
- (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
948
- NLBLOCK->nltype == NLTYPE_FIXED &&
949
- NLBLOCK->nllen == 2 &&
950
- c == NLBLOCK->nl[0])
951
- {
952
- if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
953
- {
954
- reset_could_continue = TRUE;
955
- ADD_NEW_DATA(-(state_offset + 1), 0, 1);
956
- }
957
- else could_continue = partial_newline = TRUE;
958
- }
959
- }
960
- break;
961
-
962
- /*-----------------------------------------------------------------*/
963
- case OP_DOLLM:
964
- if ((md->moptions & PCRE_NOTEOL) == 0)
965
- {
966
- if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
967
- could_continue = TRUE;
968
- else if (clen == 0 ||
969
- ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
970
- { ADD_ACTIVE(state_offset + 1, 0); }
971
- else if (ptr + 1 >= md->end_subject &&
972
- (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
973
- NLBLOCK->nltype == NLTYPE_FIXED &&
974
- NLBLOCK->nllen == 2 &&
975
- c == NLBLOCK->nl[0])
976
- {
977
- if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
978
- {
979
- reset_could_continue = TRUE;
980
- ADD_NEW_DATA(-(state_offset + 1), 0, 1);
981
- }
982
- else could_continue = partial_newline = TRUE;
983
- }
984
- }
985
- else if (IS_NEWLINE(ptr))
986
- { ADD_ACTIVE(state_offset + 1, 0); }
987
- break;
988
-
989
- /*-----------------------------------------------------------------*/
990
-
991
- case OP_DIGIT:
992
- case OP_WHITESPACE:
993
- case OP_WORDCHAR:
994
- if (clen > 0 && c < 256 &&
995
- ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
996
- { ADD_NEW(state_offset + 1, 0); }
997
- break;
998
-
999
- /*-----------------------------------------------------------------*/
1000
- case OP_NOT_DIGIT:
1001
- case OP_NOT_WHITESPACE:
1002
- case OP_NOT_WORDCHAR:
1003
- if (clen > 0 && (c >= 256 ||
1004
- ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
1005
- { ADD_NEW(state_offset + 1, 0); }
1006
- break;
1007
-
1008
- /*-----------------------------------------------------------------*/
1009
- case OP_WORD_BOUNDARY:
1010
- case OP_NOT_WORD_BOUNDARY:
1011
- {
1012
- int left_word, right_word;
1013
-
1014
- if (ptr > start_subject)
1015
- {
1016
- const pcre_uchar *temp = ptr - 1;
1017
- if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1018
- #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1019
- if (utf) { BACKCHAR(temp); }
1020
- #endif
1021
- GETCHARTEST(d, temp);
1022
- #ifdef SUPPORT_UCP
1023
- if ((md->poptions & PCRE_UCP) != 0)
1024
- {
1025
- if (d == '_') left_word = TRUE; else
1026
- {
1027
- int cat = UCD_CATEGORY(d);
1028
- left_word = (cat == ucp_L || cat == ucp_N);
1029
- }
1030
- }
1031
- else
1032
- #endif
1033
- left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
1034
- }
1035
- else left_word = FALSE;
1036
-
1037
- if (clen > 0)
1038
- {
1039
- #ifdef SUPPORT_UCP
1040
- if ((md->poptions & PCRE_UCP) != 0)
1041
- {
1042
- if (c == '_') right_word = TRUE; else
1043
- {
1044
- int cat = UCD_CATEGORY(c);
1045
- right_word = (cat == ucp_L || cat == ucp_N);
1046
- }
1047
- }
1048
- else
1049
- #endif
1050
- right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
1051
- }
1052
- else right_word = FALSE;
1053
-
1054
- if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
1055
- { ADD_ACTIVE(state_offset + 1, 0); }
1056
- }
1057
- break;
1058
-
1059
-
1060
- /*-----------------------------------------------------------------*/
1061
- /* Check the next character by Unicode property. We will get here only
1062
- if the support is in the binary; otherwise a compile-time error occurs.
1063
- */
1064
-
1065
- #ifdef SUPPORT_UCP
1066
- case OP_PROP:
1067
- case OP_NOTPROP:
1068
- if (clen > 0)
1069
- {
1070
- BOOL OK;
1071
- const pcre_uint32 *cp;
1072
- const ucd_record * prop = GET_UCD(c);
1073
- switch(code[1])
1074
- {
1075
- case PT_ANY:
1076
- OK = TRUE;
1077
- break;
1078
-
1079
- case PT_LAMP:
1080
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1081
- prop->chartype == ucp_Lt;
1082
- break;
1083
-
1084
- case PT_GC:
1085
- OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1086
- break;
1087
-
1088
- case PT_PC:
1089
- OK = prop->chartype == code[2];
1090
- break;
1091
-
1092
- case PT_SC:
1093
- OK = prop->script == code[2];
1094
- break;
1095
-
1096
- /* These are specials for combination cases. */
1097
-
1098
- case PT_ALNUM:
1099
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1100
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1101
- break;
1102
-
1103
- /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1104
- which means that Perl space and POSIX space are now identical. PCRE
1105
- was changed at release 8.34. */
1106
-
1107
- case PT_SPACE: /* Perl space */
1108
- case PT_PXSPACE: /* POSIX space */
1109
- switch(c)
1110
- {
1111
- HSPACE_CASES:
1112
- VSPACE_CASES:
1113
- OK = TRUE;
1114
- break;
1115
-
1116
- default:
1117
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1118
- break;
1119
- }
1120
- break;
1121
-
1122
- case PT_WORD:
1123
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1124
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1125
- c == CHAR_UNDERSCORE;
1126
- break;
1127
-
1128
- case PT_CLIST:
1129
- cp = PRIV(ucd_caseless_sets) + code[2];
1130
- for (;;)
1131
- {
1132
- if (c < *cp) { OK = FALSE; break; }
1133
- if (c == *cp++) { OK = TRUE; break; }
1134
- }
1135
- break;
1136
-
1137
- case PT_UCNC:
1138
- OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1139
- c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1140
- c >= 0xe000;
1141
- break;
1142
-
1143
- /* Should never occur, but keep compilers from grumbling. */
1144
-
1145
- default:
1146
- OK = codevalue != OP_PROP;
1147
- break;
1148
- }
1149
-
1150
- if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
1151
- }
1152
- break;
1153
- #endif
1154
-
1155
-
1156
-
1157
- /* ========================================================================== */
1158
- /* These opcodes likewise inspect the subject character, but have an
1159
- argument that is not a data character. It is one of these opcodes:
1160
- OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
1161
- OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
1162
-
1163
- case OP_TYPEPLUS:
1164
- case OP_TYPEMINPLUS:
1165
- case OP_TYPEPOSPLUS:
1166
- count = current_state->count; /* Already matched */
1167
- if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1168
- if (clen > 0)
1169
- {
1170
- if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1171
- (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1172
- NLBLOCK->nltype == NLTYPE_FIXED &&
1173
- NLBLOCK->nllen == 2 &&
1174
- c == NLBLOCK->nl[0])
1175
- {
1176
- could_continue = partial_newline = TRUE;
1177
- }
1178
- else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1179
- (c < 256 &&
1180
- (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1181
- ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1182
- {
1183
- if (count > 0 && codevalue == OP_TYPEPOSPLUS)
1184
- {
1185
- active_count--; /* Remove non-match possibility */
1186
- next_active_state--;
1187
- }
1188
- count++;
1189
- ADD_NEW(state_offset, count);
1190
- }
1191
- }
1192
- break;
1193
-
1194
- /*-----------------------------------------------------------------*/
1195
- case OP_TYPEQUERY:
1196
- case OP_TYPEMINQUERY:
1197
- case OP_TYPEPOSQUERY:
1198
- ADD_ACTIVE(state_offset + 2, 0);
1199
- if (clen > 0)
1200
- {
1201
- if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1202
- (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1203
- NLBLOCK->nltype == NLTYPE_FIXED &&
1204
- NLBLOCK->nllen == 2 &&
1205
- c == NLBLOCK->nl[0])
1206
- {
1207
- could_continue = partial_newline = TRUE;
1208
- }
1209
- else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1210
- (c < 256 &&
1211
- (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1212
- ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1213
- {
1214
- if (codevalue == OP_TYPEPOSQUERY)
1215
- {
1216
- active_count--; /* Remove non-match possibility */
1217
- next_active_state--;
1218
- }
1219
- ADD_NEW(state_offset + 2, 0);
1220
- }
1221
- }
1222
- break;
1223
-
1224
- /*-----------------------------------------------------------------*/
1225
- case OP_TYPESTAR:
1226
- case OP_TYPEMINSTAR:
1227
- case OP_TYPEPOSSTAR:
1228
- ADD_ACTIVE(state_offset + 2, 0);
1229
- if (clen > 0)
1230
- {
1231
- if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1232
- (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1233
- NLBLOCK->nltype == NLTYPE_FIXED &&
1234
- NLBLOCK->nllen == 2 &&
1235
- c == NLBLOCK->nl[0])
1236
- {
1237
- could_continue = partial_newline = TRUE;
1238
- }
1239
- else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1240
- (c < 256 &&
1241
- (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1242
- ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1243
- {
1244
- if (codevalue == OP_TYPEPOSSTAR)
1245
- {
1246
- active_count--; /* Remove non-match possibility */
1247
- next_active_state--;
1248
- }
1249
- ADD_NEW(state_offset, 0);
1250
- }
1251
- }
1252
- break;
1253
-
1254
- /*-----------------------------------------------------------------*/
1255
- case OP_TYPEEXACT:
1256
- count = current_state->count; /* Number already matched */
1257
- if (clen > 0)
1258
- {
1259
- if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1260
- (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1261
- NLBLOCK->nltype == NLTYPE_FIXED &&
1262
- NLBLOCK->nllen == 2 &&
1263
- c == NLBLOCK->nl[0])
1264
- {
1265
- could_continue = partial_newline = TRUE;
1266
- }
1267
- else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1268
- (c < 256 &&
1269
- (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1270
- ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1271
- {
1272
- if (++count >= (int)GET2(code, 1))
1273
- { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1274
- else
1275
- { ADD_NEW(state_offset, count); }
1276
- }
1277
- }
1278
- break;
1279
-
1280
- /*-----------------------------------------------------------------*/
1281
- case OP_TYPEUPTO:
1282
- case OP_TYPEMINUPTO:
1283
- case OP_TYPEPOSUPTO:
1284
- ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1285
- count = current_state->count; /* Number already matched */
1286
- if (clen > 0)
1287
- {
1288
- if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1289
- (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1290
- NLBLOCK->nltype == NLTYPE_FIXED &&
1291
- NLBLOCK->nllen == 2 &&
1292
- c == NLBLOCK->nl[0])
1293
- {
1294
- could_continue = partial_newline = TRUE;
1295
- }
1296
- else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1297
- (c < 256 &&
1298
- (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1299
- ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1300
- {
1301
- if (codevalue == OP_TYPEPOSUPTO)
1302
- {
1303
- active_count--; /* Remove non-match possibility */
1304
- next_active_state--;
1305
- }
1306
- if (++count >= (int)GET2(code, 1))
1307
- { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1308
- else
1309
- { ADD_NEW(state_offset, count); }
1310
- }
1311
- }
1312
- break;
1313
-
1314
- /* ========================================================================== */
1315
- /* These are virtual opcodes that are used when something like
1316
- OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
1317
- argument. It keeps the code above fast for the other cases. The argument
1318
- is in the d variable. */
1319
-
1320
- #ifdef SUPPORT_UCP
1321
- case OP_PROP_EXTRA + OP_TYPEPLUS:
1322
- case OP_PROP_EXTRA + OP_TYPEMINPLUS:
1323
- case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
1324
- count = current_state->count; /* Already matched */
1325
- if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
1326
- if (clen > 0)
1327
- {
1328
- BOOL OK;
1329
- const pcre_uint32 *cp;
1330
- const ucd_record * prop = GET_UCD(c);
1331
- switch(code[2])
1332
- {
1333
- case PT_ANY:
1334
- OK = TRUE;
1335
- break;
1336
-
1337
- case PT_LAMP:
1338
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1339
- prop->chartype == ucp_Lt;
1340
- break;
1341
-
1342
- case PT_GC:
1343
- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1344
- break;
1345
-
1346
- case PT_PC:
1347
- OK = prop->chartype == code[3];
1348
- break;
1349
-
1350
- case PT_SC:
1351
- OK = prop->script == code[3];
1352
- break;
1353
-
1354
- /* These are specials for combination cases. */
1355
-
1356
- case PT_ALNUM:
1357
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1358
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1359
- break;
1360
-
1361
- /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1362
- which means that Perl space and POSIX space are now identical. PCRE
1363
- was changed at release 8.34. */
1364
-
1365
- case PT_SPACE: /* Perl space */
1366
- case PT_PXSPACE: /* POSIX space */
1367
- switch(c)
1368
- {
1369
- HSPACE_CASES:
1370
- VSPACE_CASES:
1371
- OK = TRUE;
1372
- break;
1373
-
1374
- default:
1375
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1376
- break;
1377
- }
1378
- break;
1379
-
1380
- case PT_WORD:
1381
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1382
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1383
- c == CHAR_UNDERSCORE;
1384
- break;
1385
-
1386
- case PT_CLIST:
1387
- cp = PRIV(ucd_caseless_sets) + code[3];
1388
- for (;;)
1389
- {
1390
- if (c < *cp) { OK = FALSE; break; }
1391
- if (c == *cp++) { OK = TRUE; break; }
1392
- }
1393
- break;
1394
-
1395
- case PT_UCNC:
1396
- OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1397
- c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1398
- c >= 0xe000;
1399
- break;
1400
-
1401
- /* Should never occur, but keep compilers from grumbling. */
1402
-
1403
- default:
1404
- OK = codevalue != OP_PROP;
1405
- break;
1406
- }
1407
-
1408
- if (OK == (d == OP_PROP))
1409
- {
1410
- if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1411
- {
1412
- active_count--; /* Remove non-match possibility */
1413
- next_active_state--;
1414
- }
1415
- count++;
1416
- ADD_NEW(state_offset, count);
1417
- }
1418
- }
1419
- break;
1420
-
1421
- /*-----------------------------------------------------------------*/
1422
- case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1423
- case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1424
- case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1425
- count = current_state->count; /* Already matched */
1426
- if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1427
- if (clen > 0)
1428
- {
1429
- int lgb, rgb;
1430
- const pcre_uchar *nptr = ptr + clen;
1431
- int ncount = 0;
1432
- if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1433
- {
1434
- active_count--; /* Remove non-match possibility */
1435
- next_active_state--;
1436
- }
1437
- lgb = UCD_GRAPHBREAK(c);
1438
- while (nptr < end_subject)
1439
- {
1440
- dlen = 1;
1441
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1442
- rgb = UCD_GRAPHBREAK(d);
1443
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1444
- ncount++;
1445
- lgb = rgb;
1446
- nptr += dlen;
1447
- }
1448
- count++;
1449
- ADD_NEW_DATA(-state_offset, count, ncount);
1450
- }
1451
- break;
1452
- #endif
1453
-
1454
- /*-----------------------------------------------------------------*/
1455
- case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1456
- case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1457
- case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1458
- count = current_state->count; /* Already matched */
1459
- if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1460
- if (clen > 0)
1461
- {
1462
- int ncount = 0;
1463
- switch (c)
1464
- {
1465
- case CHAR_VT:
1466
- case CHAR_FF:
1467
- case CHAR_NEL:
1468
- #ifndef EBCDIC
1469
- case 0x2028:
1470
- case 0x2029:
1471
- #endif /* Not EBCDIC */
1472
- if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1473
- goto ANYNL01;
1474
-
1475
- case CHAR_CR:
1476
- if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
1477
- /* Fall through */
1478
-
1479
- ANYNL01:
1480
- case CHAR_LF:
1481
- if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1482
- {
1483
- active_count--; /* Remove non-match possibility */
1484
- next_active_state--;
1485
- }
1486
- count++;
1487
- ADD_NEW_DATA(-state_offset, count, ncount);
1488
- break;
1489
-
1490
- default:
1491
- break;
1492
- }
1493
- }
1494
- break;
1495
-
1496
- /*-----------------------------------------------------------------*/
1497
- case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1498
- case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1499
- case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1500
- count = current_state->count; /* Already matched */
1501
- if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1502
- if (clen > 0)
1503
- {
1504
- BOOL OK;
1505
- switch (c)
1506
- {
1507
- VSPACE_CASES:
1508
- OK = TRUE;
1509
- break;
1510
-
1511
- default:
1512
- OK = FALSE;
1513
- break;
1514
- }
1515
-
1516
- if (OK == (d == OP_VSPACE))
1517
- {
1518
- if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1519
- {
1520
- active_count--; /* Remove non-match possibility */
1521
- next_active_state--;
1522
- }
1523
- count++;
1524
- ADD_NEW_DATA(-state_offset, count, 0);
1525
- }
1526
- }
1527
- break;
1528
-
1529
- /*-----------------------------------------------------------------*/
1530
- case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1531
- case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1532
- case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1533
- count = current_state->count; /* Already matched */
1534
- if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1535
- if (clen > 0)
1536
- {
1537
- BOOL OK;
1538
- switch (c)
1539
- {
1540
- HSPACE_CASES:
1541
- OK = TRUE;
1542
- break;
1543
-
1544
- default:
1545
- OK = FALSE;
1546
- break;
1547
- }
1548
-
1549
- if (OK == (d == OP_HSPACE))
1550
- {
1551
- if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1552
- {
1553
- active_count--; /* Remove non-match possibility */
1554
- next_active_state--;
1555
- }
1556
- count++;
1557
- ADD_NEW_DATA(-state_offset, count, 0);
1558
- }
1559
- }
1560
- break;
1561
-
1562
- /*-----------------------------------------------------------------*/
1563
- #ifdef SUPPORT_UCP
1564
- case OP_PROP_EXTRA + OP_TYPEQUERY:
1565
- case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1566
- case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1567
- count = 4;
1568
- goto QS1;
1569
-
1570
- case OP_PROP_EXTRA + OP_TYPESTAR:
1571
- case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1572
- case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1573
- count = 0;
1574
-
1575
- QS1:
1576
-
1577
- ADD_ACTIVE(state_offset + 4, 0);
1578
- if (clen > 0)
1579
- {
1580
- BOOL OK;
1581
- const pcre_uint32 *cp;
1582
- const ucd_record * prop = GET_UCD(c);
1583
- switch(code[2])
1584
- {
1585
- case PT_ANY:
1586
- OK = TRUE;
1587
- break;
1588
-
1589
- case PT_LAMP:
1590
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1591
- prop->chartype == ucp_Lt;
1592
- break;
1593
-
1594
- case PT_GC:
1595
- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1596
- break;
1597
-
1598
- case PT_PC:
1599
- OK = prop->chartype == code[3];
1600
- break;
1601
-
1602
- case PT_SC:
1603
- OK = prop->script == code[3];
1604
- break;
1605
-
1606
- /* These are specials for combination cases. */
1607
-
1608
- case PT_ALNUM:
1609
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1610
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1611
- break;
1612
-
1613
- /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1614
- which means that Perl space and POSIX space are now identical. PCRE
1615
- was changed at release 8.34. */
1616
-
1617
- case PT_SPACE: /* Perl space */
1618
- case PT_PXSPACE: /* POSIX space */
1619
- switch(c)
1620
- {
1621
- HSPACE_CASES:
1622
- VSPACE_CASES:
1623
- OK = TRUE;
1624
- break;
1625
-
1626
- default:
1627
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1628
- break;
1629
- }
1630
- break;
1631
-
1632
- case PT_WORD:
1633
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1634
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1635
- c == CHAR_UNDERSCORE;
1636
- break;
1637
-
1638
- case PT_CLIST:
1639
- cp = PRIV(ucd_caseless_sets) + code[3];
1640
- for (;;)
1641
- {
1642
- if (c < *cp) { OK = FALSE; break; }
1643
- if (c == *cp++) { OK = TRUE; break; }
1644
- }
1645
- break;
1646
-
1647
- case PT_UCNC:
1648
- OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1649
- c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1650
- c >= 0xe000;
1651
- break;
1652
-
1653
- /* Should never occur, but keep compilers from grumbling. */
1654
-
1655
- default:
1656
- OK = codevalue != OP_PROP;
1657
- break;
1658
- }
1659
-
1660
- if (OK == (d == OP_PROP))
1661
- {
1662
- if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1663
- codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1664
- {
1665
- active_count--; /* Remove non-match possibility */
1666
- next_active_state--;
1667
- }
1668
- ADD_NEW(state_offset + count, 0);
1669
- }
1670
- }
1671
- break;
1672
-
1673
- /*-----------------------------------------------------------------*/
1674
- case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1675
- case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1676
- case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1677
- count = 2;
1678
- goto QS2;
1679
-
1680
- case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1681
- case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1682
- case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1683
- count = 0;
1684
-
1685
- QS2:
1686
-
1687
- ADD_ACTIVE(state_offset + 2, 0);
1688
- if (clen > 0)
1689
- {
1690
- int lgb, rgb;
1691
- const pcre_uchar *nptr = ptr + clen;
1692
- int ncount = 0;
1693
- if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1694
- codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1695
- {
1696
- active_count--; /* Remove non-match possibility */
1697
- next_active_state--;
1698
- }
1699
- lgb = UCD_GRAPHBREAK(c);
1700
- while (nptr < end_subject)
1701
- {
1702
- dlen = 1;
1703
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1704
- rgb = UCD_GRAPHBREAK(d);
1705
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1706
- ncount++;
1707
- lgb = rgb;
1708
- nptr += dlen;
1709
- }
1710
- ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1711
- }
1712
- break;
1713
- #endif
1714
-
1715
- /*-----------------------------------------------------------------*/
1716
- case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1717
- case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1718
- case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1719
- count = 2;
1720
- goto QS3;
1721
-
1722
- case OP_ANYNL_EXTRA + OP_TYPESTAR:
1723
- case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1724
- case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1725
- count = 0;
1726
-
1727
- QS3:
1728
- ADD_ACTIVE(state_offset + 2, 0);
1729
- if (clen > 0)
1730
- {
1731
- int ncount = 0;
1732
- switch (c)
1733
- {
1734
- case CHAR_VT:
1735
- case CHAR_FF:
1736
- case CHAR_NEL:
1737
- #ifndef EBCDIC
1738
- case 0x2028:
1739
- case 0x2029:
1740
- #endif /* Not EBCDIC */
1741
- if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1742
- goto ANYNL02;
1743
-
1744
- case CHAR_CR:
1745
- if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
1746
- /* Fall through */
1747
-
1748
- ANYNL02:
1749
- case CHAR_LF:
1750
- if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1751
- codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1752
- {
1753
- active_count--; /* Remove non-match possibility */
1754
- next_active_state--;
1755
- }
1756
- ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
1757
- break;
1758
-
1759
- default:
1760
- break;
1761
- }
1762
- }
1763
- break;
1764
-
1765
- /*-----------------------------------------------------------------*/
1766
- case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1767
- case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1768
- case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1769
- count = 2;
1770
- goto QS4;
1771
-
1772
- case OP_VSPACE_EXTRA + OP_TYPESTAR:
1773
- case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1774
- case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1775
- count = 0;
1776
-
1777
- QS4:
1778
- ADD_ACTIVE(state_offset + 2, 0);
1779
- if (clen > 0)
1780
- {
1781
- BOOL OK;
1782
- switch (c)
1783
- {
1784
- VSPACE_CASES:
1785
- OK = TRUE;
1786
- break;
1787
-
1788
- default:
1789
- OK = FALSE;
1790
- break;
1791
- }
1792
- if (OK == (d == OP_VSPACE))
1793
- {
1794
- if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1795
- codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1796
- {
1797
- active_count--; /* Remove non-match possibility */
1798
- next_active_state--;
1799
- }
1800
- ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1801
- }
1802
- }
1803
- break;
1804
-
1805
- /*-----------------------------------------------------------------*/
1806
- case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1807
- case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1808
- case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1809
- count = 2;
1810
- goto QS5;
1811
-
1812
- case OP_HSPACE_EXTRA + OP_TYPESTAR:
1813
- case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1814
- case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1815
- count = 0;
1816
-
1817
- QS5:
1818
- ADD_ACTIVE(state_offset + 2, 0);
1819
- if (clen > 0)
1820
- {
1821
- BOOL OK;
1822
- switch (c)
1823
- {
1824
- HSPACE_CASES:
1825
- OK = TRUE;
1826
- break;
1827
-
1828
- default:
1829
- OK = FALSE;
1830
- break;
1831
- }
1832
-
1833
- if (OK == (d == OP_HSPACE))
1834
- {
1835
- if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1836
- codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1837
- {
1838
- active_count--; /* Remove non-match possibility */
1839
- next_active_state--;
1840
- }
1841
- ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1842
- }
1843
- }
1844
- break;
1845
-
1846
- /*-----------------------------------------------------------------*/
1847
- #ifdef SUPPORT_UCP
1848
- case OP_PROP_EXTRA + OP_TYPEEXACT:
1849
- case OP_PROP_EXTRA + OP_TYPEUPTO:
1850
- case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1851
- case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1852
- if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1853
- { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1854
- count = current_state->count; /* Number already matched */
1855
- if (clen > 0)
1856
- {
1857
- BOOL OK;
1858
- const pcre_uint32 *cp;
1859
- const ucd_record * prop = GET_UCD(c);
1860
- switch(code[1 + IMM2_SIZE + 1])
1861
- {
1862
- case PT_ANY:
1863
- OK = TRUE;
1864
- break;
1865
-
1866
- case PT_LAMP:
1867
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1868
- prop->chartype == ucp_Lt;
1869
- break;
1870
-
1871
- case PT_GC:
1872
- OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1873
- break;
1874
-
1875
- case PT_PC:
1876
- OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1877
- break;
1878
-
1879
- case PT_SC:
1880
- OK = prop->script == code[1 + IMM2_SIZE + 2];
1881
- break;
1882
-
1883
- /* These are specials for combination cases. */
1884
-
1885
- case PT_ALNUM:
1886
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1887
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1888
- break;
1889
-
1890
- /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1891
- which means that Perl space and POSIX space are now identical. PCRE
1892
- was changed at release 8.34. */
1893
-
1894
- case PT_SPACE: /* Perl space */
1895
- case PT_PXSPACE: /* POSIX space */
1896
- switch(c)
1897
- {
1898
- HSPACE_CASES:
1899
- VSPACE_CASES:
1900
- OK = TRUE;
1901
- break;
1902
-
1903
- default:
1904
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1905
- break;
1906
- }
1907
- break;
1908
-
1909
- case PT_WORD:
1910
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1911
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1912
- c == CHAR_UNDERSCORE;
1913
- break;
1914
-
1915
- case PT_CLIST:
1916
- cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
1917
- for (;;)
1918
- {
1919
- if (c < *cp) { OK = FALSE; break; }
1920
- if (c == *cp++) { OK = TRUE; break; }
1921
- }
1922
- break;
1923
-
1924
- case PT_UCNC:
1925
- OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1926
- c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1927
- c >= 0xe000;
1928
- break;
1929
-
1930
- /* Should never occur, but keep compilers from grumbling. */
1931
-
1932
- default:
1933
- OK = codevalue != OP_PROP;
1934
- break;
1935
- }
1936
-
1937
- if (OK == (d == OP_PROP))
1938
- {
1939
- if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1940
- {
1941
- active_count--; /* Remove non-match possibility */
1942
- next_active_state--;
1943
- }
1944
- if (++count >= (int)GET2(code, 1))
1945
- { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1946
- else
1947
- { ADD_NEW(state_offset, count); }
1948
- }
1949
- }
1950
- break;
1951
-
1952
- /*-----------------------------------------------------------------*/
1953
- case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1954
- case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1955
- case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1956
- case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1957
- if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1958
- { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1959
- count = current_state->count; /* Number already matched */
1960
- if (clen > 0)
1961
- {
1962
- int lgb, rgb;
1963
- const pcre_uchar *nptr = ptr + clen;
1964
- int ncount = 0;
1965
- if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1966
- {
1967
- active_count--; /* Remove non-match possibility */
1968
- next_active_state--;
1969
- }
1970
- lgb = UCD_GRAPHBREAK(c);
1971
- while (nptr < end_subject)
1972
- {
1973
- dlen = 1;
1974
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1975
- rgb = UCD_GRAPHBREAK(d);
1976
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1977
- ncount++;
1978
- lgb = rgb;
1979
- nptr += dlen;
1980
- }
1981
- if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1982
- reset_could_continue = TRUE;
1983
- if (++count >= (int)GET2(code, 1))
1984
- { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1985
- else
1986
- { ADD_NEW_DATA(-state_offset, count, ncount); }
1987
- }
1988
- break;
1989
- #endif
1990
-
1991
- /*-----------------------------------------------------------------*/
1992
- case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1993
- case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1994
- case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1995
- case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1996
- if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1997
- { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1998
- count = current_state->count; /* Number already matched */
1999
- if (clen > 0)
2000
- {
2001
- int ncount = 0;
2002
- switch (c)
2003
- {
2004
- case CHAR_VT:
2005
- case CHAR_FF:
2006
- case CHAR_NEL:
2007
- #ifndef EBCDIC
2008
- case 0x2028:
2009
- case 0x2029:
2010
- #endif /* Not EBCDIC */
2011
- if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2012
- goto ANYNL03;
2013
-
2014
- case CHAR_CR:
2015
- if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
2016
- /* Fall through */
2017
-
2018
- ANYNL03:
2019
- case CHAR_LF:
2020
- if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
2021
- {
2022
- active_count--; /* Remove non-match possibility */
2023
- next_active_state--;
2024
- }
2025
- if (++count >= (int)GET2(code, 1))
2026
- { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
2027
- else
2028
- { ADD_NEW_DATA(-state_offset, count, ncount); }
2029
- break;
2030
-
2031
- default:
2032
- break;
2033
- }
2034
- }
2035
- break;
2036
-
2037
- /*-----------------------------------------------------------------*/
2038
- case OP_VSPACE_EXTRA + OP_TYPEEXACT:
2039
- case OP_VSPACE_EXTRA + OP_TYPEUPTO:
2040
- case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
2041
- case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
2042
- if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
2043
- { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2044
- count = current_state->count; /* Number already matched */
2045
- if (clen > 0)
2046
- {
2047
- BOOL OK;
2048
- switch (c)
2049
- {
2050
- VSPACE_CASES:
2051
- OK = TRUE;
2052
- break;
2053
-
2054
- default:
2055
- OK = FALSE;
2056
- }
2057
-
2058
- if (OK == (d == OP_VSPACE))
2059
- {
2060
- if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
2061
- {
2062
- active_count--; /* Remove non-match possibility */
2063
- next_active_state--;
2064
- }
2065
- if (++count >= (int)GET2(code, 1))
2066
- { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2067
- else
2068
- { ADD_NEW_DATA(-state_offset, count, 0); }
2069
- }
2070
- }
2071
- break;
2072
-
2073
- /*-----------------------------------------------------------------*/
2074
- case OP_HSPACE_EXTRA + OP_TYPEEXACT:
2075
- case OP_HSPACE_EXTRA + OP_TYPEUPTO:
2076
- case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2077
- case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2078
- if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2079
- { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2080
- count = current_state->count; /* Number already matched */
2081
- if (clen > 0)
2082
- {
2083
- BOOL OK;
2084
- switch (c)
2085
- {
2086
- HSPACE_CASES:
2087
- OK = TRUE;
2088
- break;
2089
-
2090
- default:
2091
- OK = FALSE;
2092
- break;
2093
- }
2094
-
2095
- if (OK == (d == OP_HSPACE))
2096
- {
2097
- if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
2098
- {
2099
- active_count--; /* Remove non-match possibility */
2100
- next_active_state--;
2101
- }
2102
- if (++count >= (int)GET2(code, 1))
2103
- { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2104
- else
2105
- { ADD_NEW_DATA(-state_offset, count, 0); }
2106
- }
2107
- }
2108
- break;
2109
-
2110
- /* ========================================================================== */
2111
- /* These opcodes are followed by a character that is usually compared
2112
- to the current subject character; it is loaded into d. We still get
2113
- here even if there is no subject character, because in some cases zero
2114
- repetitions are permitted. */
2115
-
2116
- /*-----------------------------------------------------------------*/
2117
- case OP_CHAR:
2118
- if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
2119
- break;
2120
-
2121
- /*-----------------------------------------------------------------*/
2122
- case OP_CHARI:
2123
- if (clen == 0) break;
2124
-
2125
- #ifdef SUPPORT_UTF
2126
- if (utf)
2127
- {
2128
- if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2129
- {
2130
- unsigned int othercase;
2131
- if (c < 128)
2132
- othercase = fcc[c];
2133
- else
2134
- /* If we have Unicode property support, we can use it to test the
2135
- other case of the character. */
2136
- #ifdef SUPPORT_UCP
2137
- othercase = UCD_OTHERCASE(c);
2138
- #else
2139
- othercase = NOTACHAR;
2140
- #endif
2141
-
2142
- if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2143
- }
2144
- }
2145
- else
2146
- #endif /* SUPPORT_UTF */
2147
- /* Not UTF mode */
2148
- {
2149
- if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2150
- { ADD_NEW(state_offset + 2, 0); }
2151
- }
2152
- break;
2153
-
2154
-
2155
- #ifdef SUPPORT_UCP
2156
- /*-----------------------------------------------------------------*/
2157
- /* This is a tricky one because it can match more than one character.
2158
- Find out how many characters to skip, and then set up a negative state
2159
- to wait for them to pass before continuing. */
2160
-
2161
- case OP_EXTUNI:
2162
- if (clen > 0)
2163
- {
2164
- int lgb, rgb;
2165
- const pcre_uchar *nptr = ptr + clen;
2166
- int ncount = 0;
2167
- lgb = UCD_GRAPHBREAK(c);
2168
- while (nptr < end_subject)
2169
- {
2170
- dlen = 1;
2171
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2172
- rgb = UCD_GRAPHBREAK(d);
2173
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2174
- ncount++;
2175
- lgb = rgb;
2176
- nptr += dlen;
2177
- }
2178
- if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2179
- reset_could_continue = TRUE;
2180
- ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2181
- }
2182
- break;
2183
- #endif
2184
-
2185
- /*-----------------------------------------------------------------*/
2186
- /* This is a tricky like EXTUNI because it too can match more than one
2187
- character (when CR is followed by LF). In this case, set up a negative
2188
- state to wait for one character to pass before continuing. */
2189
-
2190
- case OP_ANYNL:
2191
- if (clen > 0) switch(c)
2192
- {
2193
- case CHAR_VT:
2194
- case CHAR_FF:
2195
- case CHAR_NEL:
2196
- #ifndef EBCDIC
2197
- case 0x2028:
2198
- case 0x2029:
2199
- #endif /* Not EBCDIC */
2200
- if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2201
-
2202
- case CHAR_LF:
2203
- ADD_NEW(state_offset + 1, 0);
2204
- break;
2205
-
2206
- case CHAR_CR:
2207
- if (ptr + 1 >= end_subject)
2208
- {
2209
- ADD_NEW(state_offset + 1, 0);
2210
- if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2211
- reset_could_continue = TRUE;
2212
- }
2213
- else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
2214
- {
2215
- ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2216
- }
2217
- else
2218
- {
2219
- ADD_NEW(state_offset + 1, 0);
2220
- }
2221
- break;
2222
- }
2223
- break;
2224
-
2225
- /*-----------------------------------------------------------------*/
2226
- case OP_NOT_VSPACE:
2227
- if (clen > 0) switch(c)
2228
- {
2229
- VSPACE_CASES:
2230
- break;
2231
-
2232
- default:
2233
- ADD_NEW(state_offset + 1, 0);
2234
- break;
2235
- }
2236
- break;
2237
-
2238
- /*-----------------------------------------------------------------*/
2239
- case OP_VSPACE:
2240
- if (clen > 0) switch(c)
2241
- {
2242
- VSPACE_CASES:
2243
- ADD_NEW(state_offset + 1, 0);
2244
- break;
2245
-
2246
- default:
2247
- break;
2248
- }
2249
- break;
2250
-
2251
- /*-----------------------------------------------------------------*/
2252
- case OP_NOT_HSPACE:
2253
- if (clen > 0) switch(c)
2254
- {
2255
- HSPACE_CASES:
2256
- break;
2257
-
2258
- default:
2259
- ADD_NEW(state_offset + 1, 0);
2260
- break;
2261
- }
2262
- break;
2263
-
2264
- /*-----------------------------------------------------------------*/
2265
- case OP_HSPACE:
2266
- if (clen > 0) switch(c)
2267
- {
2268
- HSPACE_CASES:
2269
- ADD_NEW(state_offset + 1, 0);
2270
- break;
2271
-
2272
- default:
2273
- break;
2274
- }
2275
- break;
2276
-
2277
- /*-----------------------------------------------------------------*/
2278
- /* Match a negated single character casefully. */
2279
-
2280
- case OP_NOT:
2281
- if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2282
- break;
2283
-
2284
- /*-----------------------------------------------------------------*/
2285
- /* Match a negated single character caselessly. */
2286
-
2287
- case OP_NOTI:
2288
- if (clen > 0)
2289
- {
2290
- pcre_uint32 otherd;
2291
- #ifdef SUPPORT_UTF
2292
- if (utf && d >= 128)
2293
- {
2294
- #ifdef SUPPORT_UCP
2295
- otherd = UCD_OTHERCASE(d);
2296
- #else
2297
- otherd = d;
2298
- #endif /* SUPPORT_UCP */
2299
- }
2300
- else
2301
- #endif /* SUPPORT_UTF */
2302
- otherd = TABLE_GET(d, fcc, d);
2303
- if (c != d && c != otherd)
2304
- { ADD_NEW(state_offset + dlen + 1, 0); }
2305
- }
2306
- break;
2307
-
2308
- /*-----------------------------------------------------------------*/
2309
- case OP_PLUSI:
2310
- case OP_MINPLUSI:
2311
- case OP_POSPLUSI:
2312
- case OP_NOTPLUSI:
2313
- case OP_NOTMINPLUSI:
2314
- case OP_NOTPOSPLUSI:
2315
- caseless = TRUE;
2316
- codevalue -= OP_STARI - OP_STAR;
2317
-
2318
- /* Fall through */
2319
- case OP_PLUS:
2320
- case OP_MINPLUS:
2321
- case OP_POSPLUS:
2322
- case OP_NOTPLUS:
2323
- case OP_NOTMINPLUS:
2324
- case OP_NOTPOSPLUS:
2325
- count = current_state->count; /* Already matched */
2326
- if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2327
- if (clen > 0)
2328
- {
2329
- pcre_uint32 otherd = NOTACHAR;
2330
- if (caseless)
2331
- {
2332
- #ifdef SUPPORT_UTF
2333
- if (utf && d >= 128)
2334
- {
2335
- #ifdef SUPPORT_UCP
2336
- otherd = UCD_OTHERCASE(d);
2337
- #endif /* SUPPORT_UCP */
2338
- }
2339
- else
2340
- #endif /* SUPPORT_UTF */
2341
- otherd = TABLE_GET(d, fcc, d);
2342
- }
2343
- if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2344
- {
2345
- if (count > 0 &&
2346
- (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
2347
- {
2348
- active_count--; /* Remove non-match possibility */
2349
- next_active_state--;
2350
- }
2351
- count++;
2352
- ADD_NEW(state_offset, count);
2353
- }
2354
- }
2355
- break;
2356
-
2357
- /*-----------------------------------------------------------------*/
2358
- case OP_QUERYI:
2359
- case OP_MINQUERYI:
2360
- case OP_POSQUERYI:
2361
- case OP_NOTQUERYI:
2362
- case OP_NOTMINQUERYI:
2363
- case OP_NOTPOSQUERYI:
2364
- caseless = TRUE;
2365
- codevalue -= OP_STARI - OP_STAR;
2366
- /* Fall through */
2367
- case OP_QUERY:
2368
- case OP_MINQUERY:
2369
- case OP_POSQUERY:
2370
- case OP_NOTQUERY:
2371
- case OP_NOTMINQUERY:
2372
- case OP_NOTPOSQUERY:
2373
- ADD_ACTIVE(state_offset + dlen + 1, 0);
2374
- if (clen > 0)
2375
- {
2376
- pcre_uint32 otherd = NOTACHAR;
2377
- if (caseless)
2378
- {
2379
- #ifdef SUPPORT_UTF
2380
- if (utf && d >= 128)
2381
- {
2382
- #ifdef SUPPORT_UCP
2383
- otherd = UCD_OTHERCASE(d);
2384
- #endif /* SUPPORT_UCP */
2385
- }
2386
- else
2387
- #endif /* SUPPORT_UTF */
2388
- otherd = TABLE_GET(d, fcc, d);
2389
- }
2390
- if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2391
- {
2392
- if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
2393
- {
2394
- active_count--; /* Remove non-match possibility */
2395
- next_active_state--;
2396
- }
2397
- ADD_NEW(state_offset + dlen + 1, 0);
2398
- }
2399
- }
2400
- break;
2401
-
2402
- /*-----------------------------------------------------------------*/
2403
- case OP_STARI:
2404
- case OP_MINSTARI:
2405
- case OP_POSSTARI:
2406
- case OP_NOTSTARI:
2407
- case OP_NOTMINSTARI:
2408
- case OP_NOTPOSSTARI:
2409
- caseless = TRUE;
2410
- codevalue -= OP_STARI - OP_STAR;
2411
- /* Fall through */
2412
- case OP_STAR:
2413
- case OP_MINSTAR:
2414
- case OP_POSSTAR:
2415
- case OP_NOTSTAR:
2416
- case OP_NOTMINSTAR:
2417
- case OP_NOTPOSSTAR:
2418
- ADD_ACTIVE(state_offset + dlen + 1, 0);
2419
- if (clen > 0)
2420
- {
2421
- pcre_uint32 otherd = NOTACHAR;
2422
- if (caseless)
2423
- {
2424
- #ifdef SUPPORT_UTF
2425
- if (utf && d >= 128)
2426
- {
2427
- #ifdef SUPPORT_UCP
2428
- otherd = UCD_OTHERCASE(d);
2429
- #endif /* SUPPORT_UCP */
2430
- }
2431
- else
2432
- #endif /* SUPPORT_UTF */
2433
- otherd = TABLE_GET(d, fcc, d);
2434
- }
2435
- if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2436
- {
2437
- if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
2438
- {
2439
- active_count--; /* Remove non-match possibility */
2440
- next_active_state--;
2441
- }
2442
- ADD_NEW(state_offset, 0);
2443
- }
2444
- }
2445
- break;
2446
-
2447
- /*-----------------------------------------------------------------*/
2448
- case OP_EXACTI:
2449
- case OP_NOTEXACTI:
2450
- caseless = TRUE;
2451
- codevalue -= OP_STARI - OP_STAR;
2452
- /* Fall through */
2453
- case OP_EXACT:
2454
- case OP_NOTEXACT:
2455
- count = current_state->count; /* Number already matched */
2456
- if (clen > 0)
2457
- {
2458
- pcre_uint32 otherd = NOTACHAR;
2459
- if (caseless)
2460
- {
2461
- #ifdef SUPPORT_UTF
2462
- if (utf && d >= 128)
2463
- {
2464
- #ifdef SUPPORT_UCP
2465
- otherd = UCD_OTHERCASE(d);
2466
- #endif /* SUPPORT_UCP */
2467
- }
2468
- else
2469
- #endif /* SUPPORT_UTF */
2470
- otherd = TABLE_GET(d, fcc, d);
2471
- }
2472
- if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2473
- {
2474
- if (++count >= (int)GET2(code, 1))
2475
- { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2476
- else
2477
- { ADD_NEW(state_offset, count); }
2478
- }
2479
- }
2480
- break;
2481
-
2482
- /*-----------------------------------------------------------------*/
2483
- case OP_UPTOI:
2484
- case OP_MINUPTOI:
2485
- case OP_POSUPTOI:
2486
- case OP_NOTUPTOI:
2487
- case OP_NOTMINUPTOI:
2488
- case OP_NOTPOSUPTOI:
2489
- caseless = TRUE;
2490
- codevalue -= OP_STARI - OP_STAR;
2491
- /* Fall through */
2492
- case OP_UPTO:
2493
- case OP_MINUPTO:
2494
- case OP_POSUPTO:
2495
- case OP_NOTUPTO:
2496
- case OP_NOTMINUPTO:
2497
- case OP_NOTPOSUPTO:
2498
- ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2499
- count = current_state->count; /* Number already matched */
2500
- if (clen > 0)
2501
- {
2502
- pcre_uint32 otherd = NOTACHAR;
2503
- if (caseless)
2504
- {
2505
- #ifdef SUPPORT_UTF
2506
- if (utf && d >= 128)
2507
- {
2508
- #ifdef SUPPORT_UCP
2509
- otherd = UCD_OTHERCASE(d);
2510
- #endif /* SUPPORT_UCP */
2511
- }
2512
- else
2513
- #endif /* SUPPORT_UTF */
2514
- otherd = TABLE_GET(d, fcc, d);
2515
- }
2516
- if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2517
- {
2518
- if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2519
- {
2520
- active_count--; /* Remove non-match possibility */
2521
- next_active_state--;
2522
- }
2523
- if (++count >= (int)GET2(code, 1))
2524
- { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2525
- else
2526
- { ADD_NEW(state_offset, count); }
2527
- }
2528
- }
2529
- break;
2530
-
2531
-
2532
- /* ========================================================================== */
2533
- /* These are the class-handling opcodes */
2534
-
2535
- case OP_CLASS:
2536
- case OP_NCLASS:
2537
- case OP_XCLASS:
2538
- {
2539
- BOOL isinclass = FALSE;
2540
- int next_state_offset;
2541
- const pcre_uchar *ecode;
2542
-
2543
- /* For a simple class, there is always just a 32-byte table, and we
2544
- can set isinclass from it. */
2545
-
2546
- if (codevalue != OP_XCLASS)
2547
- {
2548
- ecode = code + 1 + (32 / sizeof(pcre_uchar));
2549
- if (clen > 0)
2550
- {
2551
- isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2552
- ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2553
- }
2554
- }
2555
-
2556
- /* An extended class may have a table or a list of single characters,
2557
- ranges, or both, and it may be positive or negative. There's a
2558
- function that sorts all this out. */
2559
-
2560
- else
2561
- {
2562
- ecode = code + GET(code, 1);
2563
- if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2564
- }
2565
-
2566
- /* At this point, isinclass is set for all kinds of class, and ecode
2567
- points to the byte after the end of the class. If there is a
2568
- quantifier, this is where it will be. */
2569
-
2570
- next_state_offset = (int)(ecode - start_code);
2571
-
2572
- switch (*ecode)
2573
- {
2574
- case OP_CRSTAR:
2575
- case OP_CRMINSTAR:
2576
- case OP_CRPOSSTAR:
2577
- ADD_ACTIVE(next_state_offset + 1, 0);
2578
- if (isinclass)
2579
- {
2580
- if (*ecode == OP_CRPOSSTAR)
2581
- {
2582
- active_count--; /* Remove non-match possibility */
2583
- next_active_state--;
2584
- }
2585
- ADD_NEW(state_offset, 0);
2586
- }
2587
- break;
2588
-
2589
- case OP_CRPLUS:
2590
- case OP_CRMINPLUS:
2591
- case OP_CRPOSPLUS:
2592
- count = current_state->count; /* Already matched */
2593
- if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
2594
- if (isinclass)
2595
- {
2596
- if (count > 0 && *ecode == OP_CRPOSPLUS)
2597
- {
2598
- active_count--; /* Remove non-match possibility */
2599
- next_active_state--;
2600
- }
2601
- count++;
2602
- ADD_NEW(state_offset, count);
2603
- }
2604
- break;
2605
-
2606
- case OP_CRQUERY:
2607
- case OP_CRMINQUERY:
2608
- case OP_CRPOSQUERY:
2609
- ADD_ACTIVE(next_state_offset + 1, 0);
2610
- if (isinclass)
2611
- {
2612
- if (*ecode == OP_CRPOSQUERY)
2613
- {
2614
- active_count--; /* Remove non-match possibility */
2615
- next_active_state--;
2616
- }
2617
- ADD_NEW(next_state_offset + 1, 0);
2618
- }
2619
- break;
2620
-
2621
- case OP_CRRANGE:
2622
- case OP_CRMINRANGE:
2623
- case OP_CRPOSRANGE:
2624
- count = current_state->count; /* Already matched */
2625
- if (count >= (int)GET2(ecode, 1))
2626
- { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2627
- if (isinclass)
2628
- {
2629
- int max = (int)GET2(ecode, 1 + IMM2_SIZE);
2630
- if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
2631
- {
2632
- active_count--; /* Remove non-match possibility */
2633
- next_active_state--;
2634
- }
2635
- if (++count >= max && max != 0) /* Max 0 => no limit */
2636
- { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2637
- else
2638
- { ADD_NEW(state_offset, count); }
2639
- }
2640
- break;
2641
-
2642
- default:
2643
- if (isinclass) { ADD_NEW(next_state_offset, 0); }
2644
- break;
2645
- }
2646
- }
2647
- break;
2648
-
2649
- /* ========================================================================== */
2650
- /* These are the opcodes for fancy brackets of various kinds. We have
2651
- to use recursion in order to handle them. The "always failing" assertion
2652
- (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2653
- though the other "backtracking verbs" are not supported. */
2654
-
2655
- case OP_FAIL:
2656
- forced_fail++; /* Count FAILs for multiple states */
2657
- break;
2658
-
2659
- case OP_ASSERT:
2660
- case OP_ASSERT_NOT:
2661
- case OP_ASSERTBACK:
2662
- case OP_ASSERTBACK_NOT:
2663
- {
2664
- int rc;
2665
- int local_offsets[2];
2666
- int local_workspace[1000];
2667
- const pcre_uchar *endasscode = code + GET(code, 1);
2668
-
2669
- while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2670
-
2671
- rc = internal_dfa_exec(
2672
- md, /* static match data */
2673
- code, /* this subexpression's code */
2674
- ptr, /* where we currently are */
2675
- (int)(ptr - start_subject), /* start offset */
2676
- local_offsets, /* offset vector */
2677
- sizeof(local_offsets)/sizeof(int), /* size of same */
2678
- local_workspace, /* workspace vector */
2679
- sizeof(local_workspace)/sizeof(int), /* size of same */
2680
- rlevel); /* function recursion level */
2681
-
2682
- if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2683
- if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2684
- { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2685
- }
2686
- break;
2687
-
2688
- /*-----------------------------------------------------------------*/
2689
- case OP_COND:
2690
- case OP_SCOND:
2691
- {
2692
- int local_offsets[1000];
2693
- int local_workspace[1000];
2694
- int codelink = GET(code, 1);
2695
- int condcode;
2696
-
2697
- /* Because of the way auto-callout works during compile, a callout item
2698
- is inserted between OP_COND and an assertion condition. This does not
2699
- happen for the other conditions. */
2700
-
2701
- if (code[LINK_SIZE+1] == OP_CALLOUT)
2702
- {
2703
- rrc = 0;
2704
- if (PUBL(callout) != NULL)
2705
- {
2706
- PUBL(callout_block) cb;
2707
- cb.version = 1; /* Version 1 of the callout block */
2708
- cb.callout_number = code[LINK_SIZE+2];
2709
- cb.offset_vector = offsets;
2710
- #if defined COMPILE_PCRE8
2711
- cb.subject = (PCRE_SPTR)start_subject;
2712
- #elif defined COMPILE_PCRE16
2713
- cb.subject = (PCRE_SPTR16)start_subject;
2714
- #elif defined COMPILE_PCRE32
2715
- cb.subject = (PCRE_SPTR32)start_subject;
2716
- #endif
2717
- cb.subject_length = (int)(end_subject - start_subject);
2718
- cb.start_match = (int)(current_subject - start_subject);
2719
- cb.current_position = (int)(ptr - start_subject);
2720
- cb.pattern_position = GET(code, LINK_SIZE + 3);
2721
- cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2722
- cb.capture_top = 1;
2723
- cb.capture_last = -1;
2724
- cb.callout_data = md->callout_data;
2725
- cb.mark = NULL; /* No (*MARK) support */
2726
- if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */
2727
- }
2728
- if (rrc > 0) break; /* Fail this thread */
2729
- code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */
2730
- }
2731
-
2732
- condcode = code[LINK_SIZE+1];
2733
-
2734
- /* Back reference conditions and duplicate named recursion conditions
2735
- are not supported */
2736
-
2737
- if (condcode == OP_CREF || condcode == OP_DNCREF ||
2738
- condcode == OP_DNRREF)
2739
- return PCRE_ERROR_DFA_UCOND;
2740
-
2741
- /* The DEFINE condition is always false, and the assertion (?!) is
2742
- converted to OP_FAIL. */
2743
-
2744
- if (condcode == OP_DEF || condcode == OP_FAIL)
2745
- { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2746
-
2747
- /* The only supported version of OP_RREF is for the value RREF_ANY,
2748
- which means "test if in any recursion". We can't test for specifically
2749
- recursed groups. */
2750
-
2751
- else if (condcode == OP_RREF)
2752
- {
2753
- int value = GET2(code, LINK_SIZE + 2);
2754
- if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2755
- if (md->recursive != NULL)
2756
- { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2757
- else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2758
- }
2759
-
2760
- /* Otherwise, the condition is an assertion */
2761
-
2762
- else
2763
- {
2764
- int rc;
2765
- const pcre_uchar *asscode = code + LINK_SIZE + 1;
2766
- const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2767
-
2768
- while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2769
-
2770
- rc = internal_dfa_exec(
2771
- md, /* fixed match data */
2772
- asscode, /* this subexpression's code */
2773
- ptr, /* where we currently are */
2774
- (int)(ptr - start_subject), /* start offset */
2775
- local_offsets, /* offset vector */
2776
- sizeof(local_offsets)/sizeof(int), /* size of same */
2777
- local_workspace, /* workspace vector */
2778
- sizeof(local_workspace)/sizeof(int), /* size of same */
2779
- rlevel); /* function recursion level */
2780
-
2781
- if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2782
- if ((rc >= 0) ==
2783
- (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2784
- { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2785
- else
2786
- { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2787
- }
2788
- }
2789
- break;
2790
-
2791
- /*-----------------------------------------------------------------*/
2792
- case OP_RECURSE:
2793
- {
2794
- dfa_recursion_info *ri;
2795
- int local_offsets[1000];
2796
- int local_workspace[1000];
2797
- const pcre_uchar *callpat = start_code + GET(code, 1);
2798
- int recno = (callpat == md->start_code)? 0 :
2799
- GET2(callpat, 1 + LINK_SIZE);
2800
- int rc;
2801
-
2802
- DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2803
-
2804
- /* Check for repeating a recursion without advancing the subject
2805
- pointer. This should catch convoluted mutual recursions. (Some simple
2806
- cases are caught at compile time.) */
2807
-
2808
- for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2809
- if (recno == ri->group_num && ptr == ri->subject_position)
2810
- return PCRE_ERROR_RECURSELOOP;
2811
-
2812
- /* Remember this recursion and where we started it so as to
2813
- catch infinite loops. */
2814
-
2815
- new_recursive.group_num = recno;
2816
- new_recursive.subject_position = ptr;
2817
- new_recursive.prevrec = md->recursive;
2818
- md->recursive = &new_recursive;
2819
-
2820
- rc = internal_dfa_exec(
2821
- md, /* fixed match data */
2822
- callpat, /* this subexpression's code */
2823
- ptr, /* where we currently are */
2824
- (int)(ptr - start_subject), /* start offset */
2825
- local_offsets, /* offset vector */
2826
- sizeof(local_offsets)/sizeof(int), /* size of same */
2827
- local_workspace, /* workspace vector */
2828
- sizeof(local_workspace)/sizeof(int), /* size of same */
2829
- rlevel); /* function recursion level */
2830
-
2831
- md->recursive = new_recursive.prevrec; /* Done this recursion */
2832
-
2833
- DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2834
- rc));
2835
-
2836
- /* Ran out of internal offsets */
2837
-
2838
- if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
2839
-
2840
- /* For each successful matched substring, set up the next state with a
2841
- count of characters to skip before trying it. Note that the count is in
2842
- characters, not bytes. */
2843
-
2844
- if (rc > 0)
2845
- {
2846
- for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2847
- {
2848
- int charcount = local_offsets[rc+1] - local_offsets[rc];
2849
- #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2850
- if (utf)
2851
- {
2852
- const pcre_uchar *p = start_subject + local_offsets[rc];
2853
- const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2854
- while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2855
- }
2856
- #endif
2857
- if (charcount > 0)
2858
- {
2859
- ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
2860
- }
2861
- else
2862
- {
2863
- ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
2864
- }
2865
- }
2866
- }
2867
- else if (rc != PCRE_ERROR_NOMATCH) return rc;
2868
- }
2869
- break;
2870
-
2871
- /*-----------------------------------------------------------------*/
2872
- case OP_BRAPOS:
2873
- case OP_SBRAPOS:
2874
- case OP_CBRAPOS:
2875
- case OP_SCBRAPOS:
2876
- case OP_BRAPOSZERO:
2877
- {
2878
- int charcount, matched_count;
2879
- const pcre_uchar *local_ptr = ptr;
2880
- BOOL allow_zero;
2881
-
2882
- if (codevalue == OP_BRAPOSZERO)
2883
- {
2884
- allow_zero = TRUE;
2885
- codevalue = *(++code); /* Codevalue will be one of above BRAs */
2886
- }
2887
- else allow_zero = FALSE;
2888
-
2889
- /* Loop to match the subpattern as many times as possible as if it were
2890
- a complete pattern. */
2891
-
2892
- for (matched_count = 0;; matched_count++)
2893
- {
2894
- int local_offsets[2];
2895
- int local_workspace[1000];
2896
-
2897
- int rc = internal_dfa_exec(
2898
- md, /* fixed match data */
2899
- code, /* this subexpression's code */
2900
- local_ptr, /* where we currently are */
2901
- (int)(ptr - start_subject), /* start offset */
2902
- local_offsets, /* offset vector */
2903
- sizeof(local_offsets)/sizeof(int), /* size of same */
2904
- local_workspace, /* workspace vector */
2905
- sizeof(local_workspace)/sizeof(int), /* size of same */
2906
- rlevel); /* function recursion level */
2907
-
2908
- /* Failed to match */
2909
-
2910
- if (rc < 0)
2911
- {
2912
- if (rc != PCRE_ERROR_NOMATCH) return rc;
2913
- break;
2914
- }
2915
-
2916
- /* Matched: break the loop if zero characters matched. */
2917
-
2918
- charcount = local_offsets[1] - local_offsets[0];
2919
- if (charcount == 0) break;
2920
- local_ptr += charcount; /* Advance temporary position ptr */
2921
- }
2922
-
2923
- /* At this point we have matched the subpattern matched_count
2924
- times, and local_ptr is pointing to the character after the end of the
2925
- last match. */
2926
-
2927
- if (matched_count > 0 || allow_zero)
2928
- {
2929
- const pcre_uchar *end_subpattern = code;
2930
- int next_state_offset;
2931
-
2932
- do { end_subpattern += GET(end_subpattern, 1); }
2933
- while (*end_subpattern == OP_ALT);
2934
- next_state_offset =
2935
- (int)(end_subpattern - start_code + LINK_SIZE + 1);
2936
-
2937
- /* Optimization: if there are no more active states, and there
2938
- are no new states yet set up, then skip over the subject string
2939
- right here, to save looping. Otherwise, set up the new state to swing
2940
- into action when the end of the matched substring is reached. */
2941
-
2942
- if (i + 1 >= active_count && new_count == 0)
2943
- {
2944
- ptr = local_ptr;
2945
- clen = 0;
2946
- ADD_NEW(next_state_offset, 0);
2947
- }
2948
- else
2949
- {
2950
- const pcre_uchar *p = ptr;
2951
- const pcre_uchar *pp = local_ptr;
2952
- charcount = (int)(pp - p);
2953
- #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2954
- if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2955
- #endif
2956
- ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2957
- }
2958
- }
2959
- }
2960
- break;
2961
-
2962
- /*-----------------------------------------------------------------*/
2963
- case OP_ONCE:
2964
- case OP_ONCE_NC:
2965
- {
2966
- int local_offsets[2];
2967
- int local_workspace[1000];
2968
-
2969
- int rc = internal_dfa_exec(
2970
- md, /* fixed match data */
2971
- code, /* this subexpression's code */
2972
- ptr, /* where we currently are */
2973
- (int)(ptr - start_subject), /* start offset */
2974
- local_offsets, /* offset vector */
2975
- sizeof(local_offsets)/sizeof(int), /* size of same */
2976
- local_workspace, /* workspace vector */
2977
- sizeof(local_workspace)/sizeof(int), /* size of same */
2978
- rlevel); /* function recursion level */
2979
-
2980
- if (rc >= 0)
2981
- {
2982
- const pcre_uchar *end_subpattern = code;
2983
- int charcount = local_offsets[1] - local_offsets[0];
2984
- int next_state_offset, repeat_state_offset;
2985
-
2986
- do { end_subpattern += GET(end_subpattern, 1); }
2987
- while (*end_subpattern == OP_ALT);
2988
- next_state_offset =
2989
- (int)(end_subpattern - start_code + LINK_SIZE + 1);
2990
-
2991
- /* If the end of this subpattern is KETRMAX or KETRMIN, we must
2992
- arrange for the repeat state also to be added to the relevant list.
2993
- Calculate the offset, or set -1 for no repeat. */
2994
-
2995
- repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
2996
- *end_subpattern == OP_KETRMIN)?
2997
- (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
2998
-
2999
- /* If we have matched an empty string, add the next state at the
3000
- current character pointer. This is important so that the duplicate
3001
- checking kicks in, which is what breaks infinite loops that match an
3002
- empty string. */
3003
-
3004
- if (charcount == 0)
3005
- {
3006
- ADD_ACTIVE(next_state_offset, 0);
3007
- }
3008
-
3009
- /* Optimization: if there are no more active states, and there
3010
- are no new states yet set up, then skip over the subject string
3011
- right here, to save looping. Otherwise, set up the new state to swing
3012
- into action when the end of the matched substring is reached. */
3013
-
3014
- else if (i + 1 >= active_count && new_count == 0)
3015
- {
3016
- ptr += charcount;
3017
- clen = 0;
3018
- ADD_NEW(next_state_offset, 0);
3019
-
3020
- /* If we are adding a repeat state at the new character position,
3021
- we must fudge things so that it is the only current state.
3022
- Otherwise, it might be a duplicate of one we processed before, and
3023
- that would cause it to be skipped. */
3024
-
3025
- if (repeat_state_offset >= 0)
3026
- {
3027
- next_active_state = active_states;
3028
- active_count = 0;
3029
- i = -1;
3030
- ADD_ACTIVE(repeat_state_offset, 0);
3031
- }
3032
- }
3033
- else
3034
- {
3035
- #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3036
- if (utf)
3037
- {
3038
- const pcre_uchar *p = start_subject + local_offsets[0];
3039
- const pcre_uchar *pp = start_subject + local_offsets[1];
3040
- while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
3041
- }
3042
- #endif
3043
- ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
3044
- if (repeat_state_offset >= 0)
3045
- { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
3046
- }
3047
- }
3048
- else if (rc != PCRE_ERROR_NOMATCH) return rc;
3049
- }
3050
- break;
3051
-
3052
-
3053
- /* ========================================================================== */
3054
- /* Handle callouts */
3055
-
3056
- case OP_CALLOUT:
3057
- rrc = 0;
3058
- if (PUBL(callout) != NULL)
3059
- {
3060
- PUBL(callout_block) cb;
3061
- cb.version = 1; /* Version 1 of the callout block */
3062
- cb.callout_number = code[1];
3063
- cb.offset_vector = offsets;
3064
- #if defined COMPILE_PCRE8
3065
- cb.subject = (PCRE_SPTR)start_subject;
3066
- #elif defined COMPILE_PCRE16
3067
- cb.subject = (PCRE_SPTR16)start_subject;
3068
- #elif defined COMPILE_PCRE32
3069
- cb.subject = (PCRE_SPTR32)start_subject;
3070
- #endif
3071
- cb.subject_length = (int)(end_subject - start_subject);
3072
- cb.start_match = (int)(current_subject - start_subject);
3073
- cb.current_position = (int)(ptr - start_subject);
3074
- cb.pattern_position = GET(code, 2);
3075
- cb.next_item_length = GET(code, 2 + LINK_SIZE);
3076
- cb.capture_top = 1;
3077
- cb.capture_last = -1;
3078
- cb.callout_data = md->callout_data;
3079
- cb.mark = NULL; /* No (*MARK) support */
3080
- if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */
3081
- }
3082
- if (rrc == 0)
3083
- { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
3084
- break;
3085
-
3086
-
3087
- /* ========================================================================== */
3088
- default: /* Unsupported opcode */
3089
- return PCRE_ERROR_DFA_UITEM;
3090
- }
3091
-
3092
- NEXT_ACTIVE_STATE: continue;
3093
-
3094
- } /* End of loop scanning active states */
3095
-
3096
- /* We have finished the processing at the current subject character. If no
3097
- new states have been set for the next character, we have found all the
3098
- matches that we are going to find. If we are at the top level and partial
3099
- matching has been requested, check for appropriate conditions.
3100
-
3101
- The "forced_ fail" variable counts the number of (*F) encountered for the
3102
- character. If it is equal to the original active_count (saved in
3103
- workspace[1]) it means that (*F) was found on every active state. In this
3104
- case we don't want to give a partial match.
3105
-
3106
- The "could_continue" variable is true if a state could have continued but
3107
- for the fact that the end of the subject was reached. */
3108
-
3109
- if (new_count <= 0)
3110
- {
3111
- if (rlevel == 1 && /* Top level, and */
3112
- could_continue && /* Some could go on, and */
3113
- forced_fail != workspace[1] && /* Not all forced fail & */
3114
- ( /* either... */
3115
- (md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
3116
- || /* or... */
3117
- ((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
3118
- match_count < 0) /* no matches */
3119
- ) && /* And... */
3120
- (
3121
- partial_newline || /* Either partial NL */
3122
- ( /* or ... */
3123
- ptr >= end_subject && /* End of subject and */
3124
- ptr > md->start_used_ptr) /* Inspected non-empty string */
3125
- )
3126
- )
3127
- match_count = PCRE_ERROR_PARTIAL;
3128
- DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
3129
- "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
3130
- rlevel*2-2, SP));
3131
- break; /* In effect, "return", but see the comment below */
3132
- }
3133
-
3134
- /* One or more states are active for the next character. */
3135
-
3136
- ptr += clen; /* Advance to next subject character */
3137
- } /* Loop to move along the subject string */
3138
-
3139
- /* Control gets here from "break" a few lines above. We do it this way because
3140
- if we use "return" above, we have compiler trouble. Some compilers warn if
3141
- there's nothing here because they think the function doesn't return a value. On
3142
- the other hand, if we put a dummy statement here, some more clever compilers
3143
- complain that it can't be reached. Sigh. */
3144
-
3145
- return match_count;
3146
- }
3147
-
3148
-
3149
-
3150
-
3151
- /*************************************************
3152
- * Execute a Regular Expression - DFA engine *
3153
- *************************************************/
3154
-
3155
- /* This external function applies a compiled re to a subject string using a DFA
3156
- engine. This function calls the internal function multiple times if the pattern
3157
- is not anchored.
3158
-
3159
- Arguments:
3160
- argument_re points to the compiled expression
3161
- extra_data points to extra data or is NULL
3162
- subject points to the subject string
3163
- length length of subject string (may contain binary zeros)
3164
- start_offset where to start in the subject string
3165
- options option bits
3166
- offsets vector of match offsets
3167
- offsetcount size of same
3168
- workspace workspace vector
3169
- wscount size of same
3170
-
3171
- Returns: > 0 => number of match offset pairs placed in offsets
3172
- = 0 => offsets overflowed; longest matches are present
3173
- -1 => failed to match
3174
- < -1 => some kind of unexpected problem
3175
- */
3176
-
3177
- #if defined COMPILE_PCRE8
3178
- PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3179
- pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3180
- const char *subject, int length, int start_offset, int options, int *offsets,
3181
- int offsetcount, int *workspace, int wscount)
3182
- #elif defined COMPILE_PCRE16
3183
- PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3184
- pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3185
- PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3186
- int offsetcount, int *workspace, int wscount)
3187
- #elif defined COMPILE_PCRE32
3188
- PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3189
- pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
3190
- PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
3191
- int offsetcount, int *workspace, int wscount)
3192
- #endif
3193
- {
3194
- REAL_PCRE *re = (REAL_PCRE *)argument_re;
3195
- dfa_match_data match_block;
3196
- dfa_match_data *md = &match_block;
3197
- BOOL utf, anchored, startline, firstline;
3198
- const pcre_uchar *current_subject, *end_subject;
3199
- const pcre_study_data *study = NULL;
3200
-
3201
- const pcre_uchar *req_char_ptr;
3202
- const pcre_uint8 *start_bits = NULL;
3203
- BOOL has_first_char = FALSE;
3204
- BOOL has_req_char = FALSE;
3205
- pcre_uchar first_char = 0;
3206
- pcre_uchar first_char2 = 0;
3207
- pcre_uchar req_char = 0;
3208
- pcre_uchar req_char2 = 0;
3209
- int newline;
3210
-
3211
- /* Plausibility checks */
3212
-
3213
- if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3214
- if (re == NULL || subject == NULL || workspace == NULL ||
3215
- (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3216
- if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3217
- if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3218
- if (length < 0) return PCRE_ERROR_BADLENGTH;
3219
- if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3220
-
3221
- /* Check that the first field in the block is the magic number. If it is not,
3222
- return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3223
- REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3224
- means that the pattern is likely compiled with different endianness. */
3225
-
3226
- if (re->magic_number != MAGIC_NUMBER)
3227
- return re->magic_number == REVERSED_MAGIC_NUMBER?
3228
- PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3229
- if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3230
-
3231
- /* If restarting after a partial match, do some sanity checks on the contents
3232
- of the workspace. */
3233
-
3234
- if ((options & PCRE_DFA_RESTART) != 0)
3235
- {
3236
- if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3237
- workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3238
- return PCRE_ERROR_DFA_BADRESTART;
3239
- }
3240
-
3241
- /* Set up study, callout, and table data */
3242
-
3243
- md->tables = re->tables;
3244
- md->callout_data = NULL;
3245
-
3246
- if (extra_data != NULL)
3247
- {
3248
- unsigned long int flags = extra_data->flags;
3249
- if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3250
- study = (const pcre_study_data *)extra_data->study_data;
3251
- if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
3252
- if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3253
- return PCRE_ERROR_DFA_UMLIMIT;
3254
- if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3255
- md->callout_data = extra_data->callout_data;
3256
- if ((flags & PCRE_EXTRA_TABLES) != 0)
3257
- md->tables = extra_data->tables;
3258
- }
3259
-
3260
- /* Set some local values */
3261
-
3262
- current_subject = (const pcre_uchar *)subject + start_offset;
3263
- end_subject = (const pcre_uchar *)subject + length;
3264
- req_char_ptr = current_subject - 1;
3265
-
3266
- #ifdef SUPPORT_UTF
3267
- /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
3268
- utf = (re->options & PCRE_UTF8) != 0;
3269
- #else
3270
- utf = FALSE;
3271
- #endif
3272
-
3273
- anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
3274
- (re->options & PCRE_ANCHORED) != 0;
3275
-
3276
- /* The remaining fixed data for passing around. */
3277
-
3278
- md->start_code = (const pcre_uchar *)argument_re +
3279
- re->name_table_offset + re->name_count * re->name_entry_size;
3280
- md->start_subject = (const pcre_uchar *)subject;
3281
- md->end_subject = end_subject;
3282
- md->start_offset = start_offset;
3283
- md->moptions = options;
3284
- md->poptions = re->options;
3285
-
3286
- /* If the BSR option is not set at match time, copy what was set
3287
- at compile time. */
3288
-
3289
- if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
3290
- {
3291
- if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
3292
- md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
3293
- #ifdef BSR_ANYCRLF
3294
- else md->moptions |= PCRE_BSR_ANYCRLF;
3295
- #endif
3296
- }
3297
-
3298
- /* Handle different types of newline. The three bits give eight cases. If
3299
- nothing is set at run time, whatever was used at compile time applies. */
3300
-
3301
- switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
3302
- PCRE_NEWLINE_BITS)
3303
- {
3304
- case 0: newline = NEWLINE; break; /* Compile-time default */
3305
- case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
3306
- case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
3307
- case PCRE_NEWLINE_CR+
3308
- PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
3309
- case PCRE_NEWLINE_ANY: newline = -1; break;
3310
- case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
3311
- default: return PCRE_ERROR_BADNEWLINE;
3312
- }
3313
-
3314
- if (newline == -2)
3315
- {
3316
- md->nltype = NLTYPE_ANYCRLF;
3317
- }
3318
- else if (newline < 0)
3319
- {
3320
- md->nltype = NLTYPE_ANY;
3321
- }
3322
- else
3323
- {
3324
- md->nltype = NLTYPE_FIXED;
3325
- if (newline > 255)
3326
- {
3327
- md->nllen = 2;
3328
- md->nl[0] = (newline >> 8) & 255;
3329
- md->nl[1] = newline & 255;
3330
- }
3331
- else
3332
- {
3333
- md->nllen = 1;
3334
- md->nl[0] = newline;
3335
- }
3336
- }
3337
-
3338
- /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3339
- back the character offset. */
3340
-
3341
- #ifdef SUPPORT_UTF
3342
- if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3343
- {
3344
- int erroroffset;
3345
- int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3346
- if (errorcode != 0)
3347
- {
3348
- if (offsetcount >= 2)
3349
- {
3350
- offsets[0] = erroroffset;
3351
- offsets[1] = errorcode;
3352
- }
3353
- #if defined COMPILE_PCRE8
3354
- return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
3355
- PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3356
- #elif defined COMPILE_PCRE16
3357
- return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
3358
- PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
3359
- #elif defined COMPILE_PCRE32
3360
- return PCRE_ERROR_BADUTF32;
3361
- #endif
3362
- }
3363
- #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3364
- if (start_offset > 0 && start_offset < length &&
3365
- NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3366
- return PCRE_ERROR_BADUTF8_OFFSET;
3367
- #endif
3368
- }
3369
- #endif
3370
-
3371
- /* If the exec call supplied NULL for tables, use the inbuilt ones. This
3372
- is a feature that makes it possible to save compiled regex and re-use them
3373
- in other programs later. */
3374
-
3375
- if (md->tables == NULL) md->tables = PRIV(default_tables);
3376
-
3377
- /* The "must be at the start of a line" flags are used in a loop when finding
3378
- where to start. */
3379
-
3380
- startline = (re->flags & PCRE_STARTLINE) != 0;
3381
- firstline = (re->options & PCRE_FIRSTLINE) != 0;
3382
-
3383
- /* Set up the first character to match, if available. The first_byte value is
3384
- never set for an anchored regular expression, but the anchoring may be forced
3385
- at run time, so we have to test for anchoring. The first char may be unset for
3386
- an unanchored pattern, of course. If there's no first char and the pattern was
3387
- studied, there may be a bitmap of possible first characters. */
3388
-
3389
- if (!anchored)
3390
- {
3391
- if ((re->flags & PCRE_FIRSTSET) != 0)
3392
- {
3393
- has_first_char = TRUE;
3394
- first_char = first_char2 = (pcre_uchar)(re->first_char);
3395
- if ((re->flags & PCRE_FCH_CASELESS) != 0)
3396
- {
3397
- first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3398
- #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3399
- if (utf && first_char > 127)
3400
- first_char2 = UCD_OTHERCASE(first_char);
3401
- #endif
3402
- }
3403
- }
3404
- else
3405
- {
3406
- if (!startline && study != NULL &&
3407
- (study->flags & PCRE_STUDY_MAPPED) != 0)
3408
- start_bits = study->start_bits;
3409
- }
3410
- }
3411
-
3412
- /* For anchored or unanchored matches, there may be a "last known required
3413
- character" set. */
3414
-
3415
- if ((re->flags & PCRE_REQCHSET) != 0)
3416
- {
3417
- has_req_char = TRUE;
3418
- req_char = req_char2 = (pcre_uchar)(re->req_char);
3419
- if ((re->flags & PCRE_RCH_CASELESS) != 0)
3420
- {
3421
- req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3422
- #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3423
- if (utf && req_char > 127)
3424
- req_char2 = UCD_OTHERCASE(req_char);
3425
- #endif
3426
- }
3427
- }
3428
-
3429
- /* Call the main matching function, looping for a non-anchored regex after a
3430
- failed match. If not restarting, perform certain optimizations at the start of
3431
- a match. */
3432
-
3433
- for (;;)
3434
- {
3435
- int rc;
3436
-
3437
- if ((options & PCRE_DFA_RESTART) == 0)
3438
- {
3439
- const pcre_uchar *save_end_subject = end_subject;
3440
-
3441
- /* If firstline is TRUE, the start of the match is constrained to the first
3442
- line of a multiline string. Implement this by temporarily adjusting
3443
- end_subject so that we stop scanning at a newline. If the match fails at
3444
- the newline, later code breaks this loop. */
3445
-
3446
- if (firstline)
3447
- {
3448
- PCRE_PUCHAR t = current_subject;
3449
- #ifdef SUPPORT_UTF
3450
- if (utf)
3451
- {
3452
- while (t < md->end_subject && !IS_NEWLINE(t))
3453
- {
3454
- t++;
3455
- ACROSSCHAR(t < end_subject, *t, t++);
3456
- }
3457
- }
3458
- else
3459
- #endif
3460
- while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3461
- end_subject = t;
3462
- }
3463
-
3464
- /* There are some optimizations that avoid running the match if a known
3465
- starting point is not found. However, there is an option that disables
3466
- these, for testing and for ensuring that all callouts do actually occur.
3467
- The option can be set in the regex by (*NO_START_OPT) or passed in
3468
- match-time options. */
3469
-
3470
- if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3471
- {
3472
- /* Advance to a known first pcre_uchar (i.e. data item) */
3473
-
3474
- if (has_first_char)
3475
- {
3476
- if (first_char != first_char2)
3477
- {
3478
- pcre_uchar csc;
3479
- while (current_subject < end_subject &&
3480
- (csc = UCHAR21TEST(current_subject)) != first_char && csc != first_char2)
3481
- current_subject++;
3482
- }
3483
- else
3484
- while (current_subject < end_subject &&
3485
- UCHAR21TEST(current_subject) != first_char)
3486
- current_subject++;
3487
- }
3488
-
3489
- /* Or to just after a linebreak for a multiline match if possible */
3490
-
3491
- else if (startline)
3492
- {
3493
- if (current_subject > md->start_subject + start_offset)
3494
- {
3495
- #ifdef SUPPORT_UTF
3496
- if (utf)
3497
- {
3498
- while (current_subject < end_subject &&
3499
- !WAS_NEWLINE(current_subject))
3500
- {
3501
- current_subject++;
3502
- ACROSSCHAR(current_subject < end_subject, *current_subject,
3503
- current_subject++);
3504
- }
3505
- }
3506
- else
3507
- #endif
3508
- while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
3509
- current_subject++;
3510
-
3511
- /* If we have just passed a CR and the newline option is ANY or
3512
- ANYCRLF, and we are now at a LF, advance the match position by one
3513
- more character. */
3514
-
3515
- if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
3516
- (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3517
- current_subject < end_subject &&
3518
- UCHAR21TEST(current_subject) == CHAR_NL)
3519
- current_subject++;
3520
- }
3521
- }
3522
-
3523
- /* Advance to a non-unique first pcre_uchar after study */
3524
-
3525
- else if (start_bits != NULL)
3526
- {
3527
- while (current_subject < end_subject)
3528
- {
3529
- register pcre_uint32 c = UCHAR21TEST(current_subject);
3530
- #ifndef COMPILE_PCRE8
3531
- if (c > 255) c = 255;
3532
- #endif
3533
- if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
3534
- current_subject++;
3535
- }
3536
- }
3537
- }
3538
-
3539
- /* Restore fudged end_subject */
3540
-
3541
- end_subject = save_end_subject;
3542
-
3543
- /* The following two optimizations are disabled for partial matching or if
3544
- disabling is explicitly requested (and of course, by the test above, this
3545
- code is not obeyed when restarting after a partial match). */
3546
-
3547
- if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3548
- (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3549
- {
3550
- /* If the pattern was studied, a minimum subject length may be set. This
3551
- is a lower bound; no actual string of that length may actually match the
3552
- pattern. Although the value is, strictly, in characters, we treat it as
3553
- in pcre_uchar units to avoid spending too much time in this optimization.
3554
- */
3555
-
3556
- if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
3557
- (pcre_uint32)(end_subject - current_subject) < study->minlength)
3558
- return PCRE_ERROR_NOMATCH;
3559
-
3560
- /* If req_char is set, we know that that pcre_uchar must appear in the
3561
- subject for the match to succeed. If the first pcre_uchar is set,
3562
- req_char must be later in the subject; otherwise the test starts at the
3563
- match point. This optimization can save a huge amount of work in patterns
3564
- with nested unlimited repeats that aren't going to match. Writing
3565
- separate code for cased/caseless versions makes it go faster, as does
3566
- using an autoincrement and backing off on a match.
3567
-
3568
- HOWEVER: when the subject string is very, very long, searching to its end
3569
- can take a long time, and give bad performance on quite ordinary
3570
- patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3571
- string... so we don't do this when the string is sufficiently long. */
3572
-
3573
- if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3574
- {
3575
- register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3576
-
3577
- /* We don't need to repeat the search if we haven't yet reached the
3578
- place we found it at last time. */
3579
-
3580
- if (p > req_char_ptr)
3581
- {
3582
- if (req_char != req_char2)
3583
- {
3584
- while (p < end_subject)
3585
- {
3586
- register pcre_uint32 pp = UCHAR21INCTEST(p);
3587
- if (pp == req_char || pp == req_char2) { p--; break; }
3588
- }
3589
- }
3590
- else
3591
- {
3592
- while (p < end_subject)
3593
- {
3594
- if (UCHAR21INCTEST(p) == req_char) { p--; break; }
3595
- }
3596
- }
3597
-
3598
- /* If we can't find the required pcre_uchar, break the matching loop,
3599
- which will cause a return or PCRE_ERROR_NOMATCH. */
3600
-
3601
- if (p >= end_subject) break;
3602
-
3603
- /* If we have found the required pcre_uchar, save the point where we
3604
- found it, so that we don't search again next time round the loop if
3605
- the start hasn't passed this point yet. */
3606
-
3607
- req_char_ptr = p;
3608
- }
3609
- }
3610
- }
3611
- } /* End of optimizations that are done when not restarting */
3612
-
3613
- /* OK, now we can do the business */
3614
-
3615
- md->start_used_ptr = current_subject;
3616
- md->recursive = NULL;
3617
-
3618
- rc = internal_dfa_exec(
3619
- md, /* fixed match data */
3620
- md->start_code, /* this subexpression's code */
3621
- current_subject, /* where we currently are */
3622
- start_offset, /* start offset in subject */
3623
- offsets, /* offset vector */
3624
- offsetcount, /* size of same */
3625
- workspace, /* workspace vector */
3626
- wscount, /* size of same */
3627
- 0); /* function recurse level */
3628
-
3629
- /* Anything other than "no match" means we are done, always; otherwise, carry
3630
- on only if not anchored. */
3631
-
3632
- if (rc != PCRE_ERROR_NOMATCH || anchored)
3633
- {
3634
- if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
3635
- {
3636
- offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
3637
- offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
3638
- if (offsetcount > 2)
3639
- offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
3640
- }
3641
- return rc;
3642
- }
3643
-
3644
- /* Advance to the next subject character unless we are at the end of a line
3645
- and firstline is set. */
3646
-
3647
- if (firstline && IS_NEWLINE(current_subject)) break;
3648
- current_subject++;
3649
- #ifdef SUPPORT_UTF
3650
- if (utf)
3651
- {
3652
- ACROSSCHAR(current_subject < end_subject, *current_subject,
3653
- current_subject++);
3654
- }
3655
- #endif
3656
- if (current_subject > end_subject) break;
3657
-
3658
- /* If we have just passed a CR and we are now at a LF, and the pattern does
3659
- not contain any explicit matches for \r or \n, and the newline option is CRLF
3660
- or ANY or ANYCRLF, advance the match position by one more character. */
3661
-
3662
- if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
3663
- current_subject < end_subject &&
3664
- UCHAR21TEST(current_subject) == CHAR_NL &&
3665
- (re->flags & PCRE_HASCRORLF) == 0 &&
3666
- (md->nltype == NLTYPE_ANY ||
3667
- md->nltype == NLTYPE_ANYCRLF ||
3668
- md->nllen == 2))
3669
- current_subject++;
3670
-
3671
- } /* "Bumpalong" loop */
3672
-
3673
- return PCRE_ERROR_NOMATCH;
3674
- }
3675
-
3676
- /* End of pcre_dfa_exec.c */