rugged 0.28.4.1 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (391) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/ext/rugged/extconf.rb +3 -1
  4. data/ext/rugged/rugged.c +35 -31
  5. data/ext/rugged/rugged.h +13 -0
  6. data/ext/rugged/rugged_blob.c +11 -9
  7. data/ext/rugged/rugged_commit.c +17 -15
  8. data/ext/rugged/rugged_config.c +1 -1
  9. data/ext/rugged/rugged_diff.c +4 -26
  10. data/ext/rugged/rugged_index.c +4 -2
  11. data/ext/rugged/rugged_note.c +5 -3
  12. data/ext/rugged/rugged_object.c +57 -10
  13. data/ext/rugged/rugged_rebase.c +3 -1
  14. data/ext/rugged/rugged_remote.c +32 -8
  15. data/ext/rugged/rugged_repo.c +232 -17
  16. data/ext/rugged/rugged_tag.c +8 -6
  17. data/ext/rugged/rugged_tree.c +18 -16
  18. data/lib/rugged/commit.rb +1 -2
  19. data/lib/rugged/repository.rb +5 -6
  20. data/lib/rugged/submodule_collection.rb +4 -4
  21. data/lib/rugged/version.rb +1 -1
  22. data/vendor/libgit2/AUTHORS +1 -0
  23. data/vendor/libgit2/CMakeLists.txt +39 -19
  24. data/vendor/libgit2/COPYING +28 -0
  25. data/vendor/libgit2/cmake/Modules/EnableWarnings.cmake +5 -1
  26. data/vendor/libgit2/cmake/Modules/FindCoreFoundation.cmake +2 -2
  27. data/vendor/libgit2/cmake/Modules/FindGSSAPI.cmake +1 -1
  28. data/vendor/libgit2/cmake/Modules/FindGSSFramework.cmake +28 -0
  29. data/vendor/libgit2/cmake/Modules/FindPCRE.cmake +38 -0
  30. data/vendor/libgit2/cmake/Modules/FindPCRE2.cmake +37 -0
  31. data/vendor/libgit2/cmake/Modules/FindSecurity.cmake +2 -2
  32. data/vendor/libgit2/cmake/Modules/FindStatNsec.cmake +6 -0
  33. data/vendor/libgit2/cmake/Modules/PkgBuildConfig.cmake +77 -0
  34. data/vendor/libgit2/cmake/Modules/SanitizeBool.cmake +20 -0
  35. data/vendor/libgit2/cmake/Modules/SelectGSSAPI.cmake +56 -0
  36. data/vendor/libgit2/cmake/Modules/SelectHTTPSBackend.cmake +127 -0
  37. data/vendor/libgit2/cmake/Modules/SelectHashes.cmake +69 -0
  38. data/vendor/libgit2/deps/http-parser/http_parser.c +11 -6
  39. data/vendor/libgit2/deps/ntlmclient/CMakeLists.txt +23 -0
  40. data/vendor/libgit2/deps/ntlmclient/compat.h +55 -0
  41. data/vendor/libgit2/deps/ntlmclient/crypt.h +64 -0
  42. data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.c +120 -0
  43. data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.h +18 -0
  44. data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.c +145 -0
  45. data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.h +18 -0
  46. data/vendor/libgit2/deps/ntlmclient/crypt_openssl.c +130 -0
  47. data/vendor/libgit2/deps/ntlmclient/crypt_openssl.h +21 -0
  48. data/vendor/libgit2/deps/ntlmclient/ntlm.c +1422 -0
  49. data/vendor/libgit2/deps/ntlmclient/ntlm.h +174 -0
  50. data/vendor/libgit2/deps/ntlmclient/ntlmclient.h +320 -0
  51. data/vendor/libgit2/deps/ntlmclient/unicode.h +36 -0
  52. data/vendor/libgit2/deps/ntlmclient/unicode_builtin.c +445 -0
  53. data/vendor/libgit2/deps/ntlmclient/unicode_iconv.c +201 -0
  54. data/vendor/libgit2/deps/ntlmclient/utf8.h +1257 -0
  55. data/vendor/libgit2/deps/ntlmclient/util.c +21 -0
  56. data/vendor/libgit2/deps/ntlmclient/util.h +14 -0
  57. data/vendor/libgit2/deps/pcre/CMakeLists.txt +140 -0
  58. data/vendor/libgit2/deps/pcre/COPYING +5 -0
  59. data/vendor/libgit2/deps/pcre/cmake/COPYING-CMAKE-SCRIPTS +22 -0
  60. data/vendor/libgit2/deps/pcre/cmake/FindEditline.cmake +17 -0
  61. data/vendor/libgit2/deps/pcre/cmake/FindPackageHandleStandardArgs.cmake +58 -0
  62. data/vendor/libgit2/deps/pcre/cmake/FindReadline.cmake +29 -0
  63. data/vendor/libgit2/deps/pcre/config.h.in +57 -0
  64. data/vendor/libgit2/deps/pcre/pcre.h +641 -0
  65. data/vendor/libgit2/deps/pcre/pcre_byte_order.c +319 -0
  66. data/vendor/libgit2/deps/pcre/pcre_chartables.c +198 -0
  67. data/vendor/libgit2/deps/pcre/pcre_compile.c +9800 -0
  68. data/vendor/libgit2/deps/pcre/pcre_config.c +190 -0
  69. data/vendor/libgit2/deps/pcre/pcre_dfa_exec.c +3676 -0
  70. data/vendor/libgit2/deps/pcre/pcre_exec.c +7173 -0
  71. data/vendor/libgit2/deps/pcre/pcre_fullinfo.c +245 -0
  72. data/vendor/libgit2/deps/pcre/pcre_get.c +669 -0
  73. data/vendor/libgit2/deps/pcre/pcre_globals.c +86 -0
  74. data/vendor/libgit2/deps/pcre/pcre_internal.h +2787 -0
  75. data/vendor/libgit2/deps/pcre/pcre_jit_compile.c +11913 -0
  76. data/vendor/libgit2/deps/pcre/pcre_maketables.c +156 -0
  77. data/vendor/libgit2/deps/pcre/pcre_newline.c +210 -0
  78. data/vendor/libgit2/deps/pcre/pcre_ord2utf8.c +94 -0
  79. data/vendor/libgit2/deps/pcre/pcre_printint.c +834 -0
  80. data/vendor/libgit2/deps/pcre/pcre_refcount.c +92 -0
  81. data/vendor/libgit2/deps/pcre/pcre_string_utils.c +211 -0
  82. data/vendor/libgit2/deps/pcre/pcre_study.c +1686 -0
  83. data/vendor/libgit2/deps/pcre/pcre_tables.c +727 -0
  84. data/vendor/libgit2/deps/pcre/pcre_ucd.c +3644 -0
  85. data/vendor/libgit2/deps/pcre/pcre_valid_utf8.c +301 -0
  86. data/vendor/libgit2/deps/pcre/pcre_version.c +98 -0
  87. data/vendor/libgit2/deps/pcre/pcre_xclass.c +268 -0
  88. data/vendor/libgit2/deps/pcre/pcreposix.c +421 -0
  89. data/vendor/libgit2/deps/pcre/pcreposix.h +117 -0
  90. data/vendor/libgit2/deps/pcre/ucp.h +224 -0
  91. data/vendor/libgit2/deps/zlib/adler32.c +0 -7
  92. data/vendor/libgit2/deps/zlib/crc32.c +0 -7
  93. data/vendor/libgit2/include/git2.h +2 -0
  94. data/vendor/libgit2/include/git2/apply.h +22 -2
  95. data/vendor/libgit2/include/git2/attr.h +23 -13
  96. data/vendor/libgit2/include/git2/blame.h +2 -2
  97. data/vendor/libgit2/include/git2/blob.h +44 -12
  98. data/vendor/libgit2/include/git2/branch.h +74 -57
  99. data/vendor/libgit2/include/git2/buffer.h +20 -14
  100. data/vendor/libgit2/include/git2/cert.h +135 -0
  101. data/vendor/libgit2/include/git2/checkout.h +46 -14
  102. data/vendor/libgit2/include/git2/cherrypick.h +3 -3
  103. data/vendor/libgit2/include/git2/clone.h +2 -2
  104. data/vendor/libgit2/include/git2/commit.h +23 -1
  105. data/vendor/libgit2/include/git2/common.h +15 -6
  106. data/vendor/libgit2/include/git2/config.h +12 -12
  107. data/vendor/libgit2/include/git2/cred_helpers.h +4 -42
  108. data/vendor/libgit2/include/git2/credential.h +314 -0
  109. data/vendor/libgit2/include/git2/credential_helpers.h +52 -0
  110. data/vendor/libgit2/include/git2/deprecated.h +321 -3
  111. data/vendor/libgit2/include/git2/describe.h +4 -4
  112. data/vendor/libgit2/include/git2/diff.h +16 -14
  113. data/vendor/libgit2/include/git2/errors.h +4 -2
  114. data/vendor/libgit2/include/git2/filter.h +8 -0
  115. data/vendor/libgit2/include/git2/index.h +2 -1
  116. data/vendor/libgit2/include/git2/indexer.h +48 -4
  117. data/vendor/libgit2/include/git2/merge.h +6 -10
  118. data/vendor/libgit2/include/git2/net.h +0 -5
  119. data/vendor/libgit2/include/git2/object.h +2 -14
  120. data/vendor/libgit2/include/git2/odb.h +3 -2
  121. data/vendor/libgit2/include/git2/odb_backend.h +5 -4
  122. data/vendor/libgit2/include/git2/oid.h +11 -6
  123. data/vendor/libgit2/include/git2/pack.h +12 -1
  124. data/vendor/libgit2/include/git2/proxy.h +6 -4
  125. data/vendor/libgit2/include/git2/rebase.h +46 -2
  126. data/vendor/libgit2/include/git2/refs.h +19 -0
  127. data/vendor/libgit2/include/git2/remote.h +40 -15
  128. data/vendor/libgit2/include/git2/repository.h +29 -6
  129. data/vendor/libgit2/include/git2/revert.h +1 -1
  130. data/vendor/libgit2/include/git2/revwalk.h +7 -3
  131. data/vendor/libgit2/include/git2/stash.h +4 -4
  132. data/vendor/libgit2/include/git2/status.h +25 -16
  133. data/vendor/libgit2/include/git2/submodule.h +20 -3
  134. data/vendor/libgit2/include/git2/sys/alloc.h +9 -9
  135. data/vendor/libgit2/include/git2/sys/cred.h +15 -0
  136. data/vendor/libgit2/include/git2/sys/credential.h +90 -0
  137. data/vendor/libgit2/include/git2/sys/index.h +4 -2
  138. data/vendor/libgit2/include/git2/sys/mempack.h +2 -1
  139. data/vendor/libgit2/include/git2/sys/merge.h +1 -1
  140. data/vendor/libgit2/include/git2/sys/odb_backend.h +48 -4
  141. data/vendor/libgit2/include/git2/sys/refdb_backend.h +164 -21
  142. data/vendor/libgit2/include/git2/sys/repository.h +17 -6
  143. data/vendor/libgit2/include/git2/sys/transport.h +4 -4
  144. data/vendor/libgit2/include/git2/tag.h +11 -2
  145. data/vendor/libgit2/include/git2/trace.h +2 -2
  146. data/vendor/libgit2/include/git2/transport.h +11 -340
  147. data/vendor/libgit2/include/git2/tree.h +5 -3
  148. data/vendor/libgit2/include/git2/types.h +4 -89
  149. data/vendor/libgit2/include/git2/version.h +5 -5
  150. data/vendor/libgit2/include/git2/worktree.h +5 -5
  151. data/vendor/libgit2/src/CMakeLists.txt +99 -236
  152. data/vendor/libgit2/src/alloc.c +2 -14
  153. data/vendor/libgit2/src/{stdalloc.c → allocators/stdalloc.c} +3 -4
  154. data/vendor/libgit2/src/{stdalloc.h → allocators/stdalloc.h} +4 -4
  155. data/vendor/libgit2/src/allocators/win32_crtdbg.c +118 -0
  156. data/vendor/libgit2/src/{transports/cred.h → allocators/win32_crtdbg.h} +5 -4
  157. data/vendor/libgit2/src/apply.c +60 -30
  158. data/vendor/libgit2/src/attr.c +70 -64
  159. data/vendor/libgit2/src/attr_file.c +189 -96
  160. data/vendor/libgit2/src/attr_file.h +9 -9
  161. data/vendor/libgit2/src/attrcache.c +48 -48
  162. data/vendor/libgit2/src/attrcache.h +2 -1
  163. data/vendor/libgit2/src/blame.c +17 -5
  164. data/vendor/libgit2/src/blame.h +1 -1
  165. data/vendor/libgit2/src/blame_git.c +21 -7
  166. data/vendor/libgit2/src/blob.c +81 -17
  167. data/vendor/libgit2/src/blob.h +2 -2
  168. data/vendor/libgit2/src/branch.c +60 -32
  169. data/vendor/libgit2/src/buffer.c +19 -7
  170. data/vendor/libgit2/src/buffer.h +1 -0
  171. data/vendor/libgit2/src/cache.c +33 -36
  172. data/vendor/libgit2/src/cache.h +1 -1
  173. data/vendor/libgit2/src/cc-compat.h +5 -0
  174. data/vendor/libgit2/src/checkout.c +26 -16
  175. data/vendor/libgit2/src/cherrypick.c +9 -3
  176. data/vendor/libgit2/src/clone.c +29 -7
  177. data/vendor/libgit2/src/clone.h +4 -0
  178. data/vendor/libgit2/src/commit.c +70 -22
  179. data/vendor/libgit2/src/commit.h +6 -0
  180. data/vendor/libgit2/src/commit_list.c +28 -76
  181. data/vendor/libgit2/src/commit_list.h +2 -2
  182. data/vendor/libgit2/src/common.h +3 -75
  183. data/vendor/libgit2/src/config.c +31 -40
  184. data/vendor/libgit2/src/config.h +7 -6
  185. data/vendor/libgit2/src/config_backend.h +12 -0
  186. data/vendor/libgit2/src/config_cache.c +39 -39
  187. data/vendor/libgit2/src/config_entries.c +69 -99
  188. data/vendor/libgit2/src/config_entries.h +1 -0
  189. data/vendor/libgit2/src/config_file.c +346 -380
  190. data/vendor/libgit2/src/config_mem.c +12 -16
  191. data/vendor/libgit2/src/config_parse.c +49 -29
  192. data/vendor/libgit2/src/config_parse.h +13 -12
  193. data/vendor/libgit2/src/config_snapshot.c +206 -0
  194. data/vendor/libgit2/src/crlf.c +14 -14
  195. data/vendor/libgit2/src/describe.c +21 -20
  196. data/vendor/libgit2/src/diff.c +43 -58
  197. data/vendor/libgit2/src/diff.h +4 -3
  198. data/vendor/libgit2/src/diff_driver.c +37 -38
  199. data/vendor/libgit2/src/diff_file.c +12 -10
  200. data/vendor/libgit2/src/diff_file.h +2 -2
  201. data/vendor/libgit2/src/diff_generate.c +148 -98
  202. data/vendor/libgit2/src/diff_generate.h +2 -2
  203. data/vendor/libgit2/src/diff_parse.c +1 -1
  204. data/vendor/libgit2/src/diff_print.c +25 -13
  205. data/vendor/libgit2/src/diff_stats.c +1 -1
  206. data/vendor/libgit2/src/diff_tform.c +11 -11
  207. data/vendor/libgit2/src/errors.c +21 -25
  208. data/vendor/libgit2/src/errors.h +81 -0
  209. data/vendor/libgit2/src/features.h.in +9 -2
  210. data/vendor/libgit2/src/fetch.c +7 -2
  211. data/vendor/libgit2/src/fetchhead.c +36 -4
  212. data/vendor/libgit2/src/filebuf.c +6 -10
  213. data/vendor/libgit2/src/filebuf.h +2 -2
  214. data/vendor/libgit2/src/filter.c +16 -8
  215. data/vendor/libgit2/src/{fileops.c → futils.c} +21 -17
  216. data/vendor/libgit2/src/{fileops.h → futils.h} +5 -5
  217. data/vendor/libgit2/src/global.c +12 -40
  218. data/vendor/libgit2/src/global.h +0 -2
  219. data/vendor/libgit2/src/hash.c +61 -0
  220. data/vendor/libgit2/src/hash.h +19 -21
  221. data/vendor/libgit2/src/hash/sha1.h +38 -0
  222. data/vendor/libgit2/src/hash/{hash_collisiondetect.h → sha1/collisiondetect.c} +14 -17
  223. data/vendor/libgit2/src/{sha1_lookup.h → hash/sha1/collisiondetect.h} +8 -8
  224. data/vendor/libgit2/src/hash/{hash_common_crypto.h → sha1/common_crypto.c} +15 -19
  225. data/vendor/libgit2/src/hash/sha1/common_crypto.h +19 -0
  226. data/vendor/libgit2/src/hash/{hash_generic.c → sha1/generic.c} +22 -10
  227. data/vendor/libgit2/src/hash/{hash_generic.h → sha1/generic.h} +4 -14
  228. data/vendor/libgit2/src/hash/{hash_mbedtls.c → sha1/mbedtls.c} +15 -7
  229. data/vendor/libgit2/src/hash/{hash_mbedtls.h → sha1/mbedtls.h} +6 -11
  230. data/vendor/libgit2/src/hash/{hash_openssl.h → sha1/openssl.c} +14 -18
  231. data/vendor/libgit2/src/hash/sha1/openssl.h +19 -0
  232. data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/sha1.c +14 -3
  233. data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/sha1.h +0 -0
  234. data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/ubc_check.c +0 -0
  235. data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/ubc_check.h +0 -0
  236. data/vendor/libgit2/src/hash/{hash_win32.c → sha1/win32.c} +34 -24
  237. data/vendor/libgit2/src/hash/{hash_win32.h → sha1/win32.h} +6 -19
  238. data/vendor/libgit2/src/hashsig.c +1 -1
  239. data/vendor/libgit2/src/idxmap.c +91 -65
  240. data/vendor/libgit2/src/idxmap.h +151 -15
  241. data/vendor/libgit2/src/ignore.c +32 -38
  242. data/vendor/libgit2/src/index.c +105 -83
  243. data/vendor/libgit2/src/index.h +1 -1
  244. data/vendor/libgit2/src/indexer.c +71 -72
  245. data/vendor/libgit2/src/integer.h +39 -4
  246. data/vendor/libgit2/src/iterator.c +40 -35
  247. data/vendor/libgit2/src/iterator.h +8 -8
  248. data/vendor/libgit2/src/map.h +1 -1
  249. data/vendor/libgit2/src/merge.c +78 -51
  250. data/vendor/libgit2/src/merge.h +2 -2
  251. data/vendor/libgit2/src/merge_driver.c +5 -5
  252. data/vendor/libgit2/src/merge_file.c +1 -1
  253. data/vendor/libgit2/src/mwindow.c +18 -23
  254. data/vendor/libgit2/src/mwindow.h +4 -4
  255. data/vendor/libgit2/src/net.c +411 -0
  256. data/vendor/libgit2/src/net.h +57 -0
  257. data/vendor/libgit2/src/netops.c +6 -193
  258. data/vendor/libgit2/src/netops.h +1 -34
  259. data/vendor/libgit2/src/notes.c +8 -5
  260. data/vendor/libgit2/src/object.c +3 -3
  261. data/vendor/libgit2/src/object.h +2 -0
  262. data/vendor/libgit2/src/odb.c +41 -23
  263. data/vendor/libgit2/src/odb.h +3 -2
  264. data/vendor/libgit2/src/odb_loose.c +17 -10
  265. data/vendor/libgit2/src/odb_mempack.c +13 -24
  266. data/vendor/libgit2/src/odb_pack.c +4 -5
  267. data/vendor/libgit2/src/offmap.c +43 -55
  268. data/vendor/libgit2/src/offmap.h +102 -24
  269. data/vendor/libgit2/src/oid.c +19 -8
  270. data/vendor/libgit2/src/oidmap.c +39 -57
  271. data/vendor/libgit2/src/oidmap.h +99 -19
  272. data/vendor/libgit2/src/pack-objects.c +28 -33
  273. data/vendor/libgit2/src/pack-objects.h +1 -1
  274. data/vendor/libgit2/src/pack.c +117 -129
  275. data/vendor/libgit2/src/pack.h +15 -18
  276. data/vendor/libgit2/src/parse.c +10 -0
  277. data/vendor/libgit2/src/parse.h +3 -3
  278. data/vendor/libgit2/src/patch.c +1 -1
  279. data/vendor/libgit2/src/patch_generate.c +2 -2
  280. data/vendor/libgit2/src/patch_parse.c +130 -33
  281. data/vendor/libgit2/src/path.c +43 -6
  282. data/vendor/libgit2/src/path.h +2 -0
  283. data/vendor/libgit2/src/pathspec.c +14 -14
  284. data/vendor/libgit2/src/pool.c +26 -22
  285. data/vendor/libgit2/src/pool.h +7 -7
  286. data/vendor/libgit2/src/posix.c +7 -7
  287. data/vendor/libgit2/src/posix.h +12 -1
  288. data/vendor/libgit2/src/proxy.c +7 -2
  289. data/vendor/libgit2/src/push.c +13 -7
  290. data/vendor/libgit2/src/reader.c +2 -2
  291. data/vendor/libgit2/src/rebase.c +87 -28
  292. data/vendor/libgit2/src/refdb.c +12 -0
  293. data/vendor/libgit2/src/refdb_fs.c +219 -167
  294. data/vendor/libgit2/src/reflog.c +11 -13
  295. data/vendor/libgit2/src/refs.c +39 -23
  296. data/vendor/libgit2/src/refs.h +8 -1
  297. data/vendor/libgit2/src/refspec.c +9 -16
  298. data/vendor/libgit2/src/regexp.c +221 -0
  299. data/vendor/libgit2/src/regexp.h +97 -0
  300. data/vendor/libgit2/src/remote.c +57 -55
  301. data/vendor/libgit2/src/remote.h +2 -2
  302. data/vendor/libgit2/src/repository.c +187 -154
  303. data/vendor/libgit2/src/repository.h +49 -40
  304. data/vendor/libgit2/src/revert.c +8 -3
  305. data/vendor/libgit2/src/revparse.c +18 -19
  306. data/vendor/libgit2/src/revwalk.c +72 -34
  307. data/vendor/libgit2/src/revwalk.h +20 -0
  308. data/vendor/libgit2/src/settings.c +13 -1
  309. data/vendor/libgit2/src/sortedcache.c +12 -26
  310. data/vendor/libgit2/src/sortedcache.h +1 -1
  311. data/vendor/libgit2/src/stash.c +47 -67
  312. data/vendor/libgit2/src/status.c +17 -11
  313. data/vendor/libgit2/src/streams/openssl.c +54 -2
  314. data/vendor/libgit2/src/streams/socket.c +2 -2
  315. data/vendor/libgit2/src/strmap.c +37 -84
  316. data/vendor/libgit2/src/strmap.h +105 -33
  317. data/vendor/libgit2/src/submodule.c +151 -126
  318. data/vendor/libgit2/src/submodule.h +1 -1
  319. data/vendor/libgit2/src/sysdir.c +11 -1
  320. data/vendor/libgit2/src/tag.c +10 -2
  321. data/vendor/libgit2/src/trace.c +1 -1
  322. data/vendor/libgit2/src/trace.h +3 -3
  323. data/vendor/libgit2/src/trailer.c +46 -32
  324. data/vendor/libgit2/src/transaction.c +10 -9
  325. data/vendor/libgit2/src/transports/auth.c +16 -15
  326. data/vendor/libgit2/src/transports/auth.h +18 -11
  327. data/vendor/libgit2/src/transports/auth_negotiate.c +64 -33
  328. data/vendor/libgit2/src/transports/auth_negotiate.h +2 -2
  329. data/vendor/libgit2/src/transports/auth_ntlm.c +223 -0
  330. data/vendor/libgit2/src/transports/auth_ntlm.h +38 -0
  331. data/vendor/libgit2/src/transports/credential.c +476 -0
  332. data/vendor/libgit2/src/transports/{cred_helpers.c → credential_helpers.c} +21 -8
  333. data/vendor/libgit2/src/transports/git.c +11 -16
  334. data/vendor/libgit2/src/transports/http.c +488 -1248
  335. data/vendor/libgit2/src/transports/http.h +4 -1
  336. data/vendor/libgit2/src/transports/httpclient.c +1549 -0
  337. data/vendor/libgit2/src/transports/httpclient.h +190 -0
  338. data/vendor/libgit2/src/transports/local.c +10 -10
  339. data/vendor/libgit2/src/transports/smart.c +19 -19
  340. data/vendor/libgit2/src/transports/smart.h +3 -3
  341. data/vendor/libgit2/src/transports/smart_pkt.c +1 -1
  342. data/vendor/libgit2/src/transports/smart_protocol.c +40 -64
  343. data/vendor/libgit2/src/transports/ssh.c +77 -59
  344. data/vendor/libgit2/src/transports/winhttp.c +272 -242
  345. data/vendor/libgit2/src/tree-cache.c +14 -7
  346. data/vendor/libgit2/src/tree.c +16 -26
  347. data/vendor/libgit2/src/unix/map.c +1 -1
  348. data/vendor/libgit2/src/unix/posix.h +2 -12
  349. data/vendor/libgit2/src/userdiff.h +3 -1
  350. data/vendor/libgit2/src/util.c +51 -53
  351. data/vendor/libgit2/src/util.h +16 -21
  352. data/vendor/libgit2/src/wildmatch.c +320 -0
  353. data/vendor/libgit2/src/wildmatch.h +23 -0
  354. data/vendor/libgit2/src/win32/map.c +3 -5
  355. data/vendor/libgit2/src/win32/path_w32.c +40 -3
  356. data/vendor/libgit2/src/win32/path_w32.h +15 -29
  357. data/vendor/libgit2/src/win32/posix.h +1 -4
  358. data/vendor/libgit2/src/win32/posix_w32.c +47 -5
  359. data/vendor/libgit2/src/win32/precompiled.h +0 -2
  360. data/vendor/libgit2/src/win32/thread.c +5 -10
  361. data/vendor/libgit2/src/win32/w32_buffer.c +7 -3
  362. data/vendor/libgit2/src/win32/w32_common.h +39 -0
  363. data/vendor/libgit2/src/win32/w32_crtdbg_stacktrace.c +0 -93
  364. data/vendor/libgit2/src/win32/w32_crtdbg_stacktrace.h +0 -2
  365. data/vendor/libgit2/src/win32/w32_stack.c +4 -9
  366. data/vendor/libgit2/src/win32/w32_stack.h +3 -3
  367. data/vendor/libgit2/src/win32/w32_util.c +31 -0
  368. data/vendor/libgit2/src/win32/w32_util.h +6 -32
  369. data/vendor/libgit2/src/worktree.c +79 -49
  370. data/vendor/libgit2/src/xdiff/xdiffi.c +1 -1
  371. data/vendor/libgit2/src/xdiff/xmerge.c +12 -0
  372. data/vendor/libgit2/src/xdiff/xpatience.c +3 -0
  373. data/vendor/libgit2/src/zstream.c +5 -0
  374. data/vendor/libgit2/src/zstream.h +1 -0
  375. metadata +108 -41
  376. data/vendor/libgit2/deps/regex/CMakeLists.txt +0 -2
  377. data/vendor/libgit2/deps/regex/COPYING +0 -502
  378. data/vendor/libgit2/deps/regex/config.h +0 -7
  379. data/vendor/libgit2/deps/regex/regcomp.c +0 -3857
  380. data/vendor/libgit2/deps/regex/regex.c +0 -92
  381. data/vendor/libgit2/deps/regex/regex.h +0 -582
  382. data/vendor/libgit2/deps/regex/regex_internal.c +0 -1744
  383. data/vendor/libgit2/deps/regex/regex_internal.h +0 -819
  384. data/vendor/libgit2/deps/regex/regexec.c +0 -4369
  385. data/vendor/libgit2/include/git2/inttypes.h +0 -309
  386. data/vendor/libgit2/include/git2/sys/time.h +0 -31
  387. data/vendor/libgit2/libgit2.pc.in +0 -13
  388. data/vendor/libgit2/src/fnmatch.c +0 -248
  389. data/vendor/libgit2/src/fnmatch.h +0 -48
  390. data/vendor/libgit2/src/sha1_lookup.c +0 -35
  391. data/vendor/libgit2/src/transports/cred.c +0 -390
@@ -0,0 +1,190 @@
1
+ /*************************************************
2
+ * Perl-Compatible Regular Expressions *
3
+ *************************************************/
4
+
5
+ /* PCRE is a library of functions to support regular expressions whose syntax
6
+ and semantics are as close as possible to those of the Perl 5 language.
7
+
8
+ Written by Philip Hazel
9
+ Copyright (c) 1997-2012 University of Cambridge
10
+
11
+ -----------------------------------------------------------------------------
12
+ Redistribution and use in source and binary forms, with or without
13
+ modification, are permitted provided that the following conditions are met:
14
+
15
+ * Redistributions of source code must retain the above copyright notice,
16
+ this list of conditions and the following disclaimer.
17
+
18
+ * Redistributions in binary form must reproduce the above copyright
19
+ notice, this list of conditions and the following disclaimer in the
20
+ documentation and/or other materials provided with the distribution.
21
+
22
+ * Neither the name of the University of Cambridge nor the names of its
23
+ contributors may be used to endorse or promote products derived from
24
+ this software without specific prior written permission.
25
+
26
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36
+ POSSIBILITY OF SUCH DAMAGE.
37
+ -----------------------------------------------------------------------------
38
+ */
39
+
40
+
41
+ /* This module contains the external function pcre_config(). */
42
+
43
+
44
+ #ifdef HAVE_CONFIG_H
45
+ #include "config.h"
46
+ #endif
47
+
48
+ /* Keep the original link size. */
49
+ static int real_link_size = LINK_SIZE;
50
+
51
+ #include "pcre_internal.h"
52
+
53
+
54
+ /*************************************************
55
+ * Return info about what features are configured *
56
+ *************************************************/
57
+
58
+ /* This function has an extensible interface so that additional items can be
59
+ added compatibly.
60
+
61
+ Arguments:
62
+ what what information is required
63
+ where where to put the information
64
+
65
+ Returns: 0 if data returned, negative on error
66
+ */
67
+
68
+ #if defined COMPILE_PCRE8
69
+ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
70
+ pcre_config(int what, void *where)
71
+ #elif defined COMPILE_PCRE16
72
+ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
73
+ pcre16_config(int what, void *where)
74
+ #elif defined COMPILE_PCRE32
75
+ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
76
+ pcre32_config(int what, void *where)
77
+ #endif
78
+ {
79
+ switch (what)
80
+ {
81
+ case PCRE_CONFIG_UTF8:
82
+ #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
83
+ *((int *)where) = 0;
84
+ return PCRE_ERROR_BADOPTION;
85
+ #else
86
+ #if defined SUPPORT_UTF
87
+ *((int *)where) = 1;
88
+ #else
89
+ *((int *)where) = 0;
90
+ #endif
91
+ break;
92
+ #endif
93
+
94
+ case PCRE_CONFIG_UTF16:
95
+ #if defined COMPILE_PCRE8 || defined COMPILE_PCRE32
96
+ *((int *)where) = 0;
97
+ return PCRE_ERROR_BADOPTION;
98
+ #else
99
+ #if defined SUPPORT_UTF
100
+ *((int *)where) = 1;
101
+ #else
102
+ *((int *)where) = 0;
103
+ #endif
104
+ break;
105
+ #endif
106
+
107
+ case PCRE_CONFIG_UTF32:
108
+ #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
109
+ *((int *)where) = 0;
110
+ return PCRE_ERROR_BADOPTION;
111
+ #else
112
+ #if defined SUPPORT_UTF
113
+ *((int *)where) = 1;
114
+ #else
115
+ *((int *)where) = 0;
116
+ #endif
117
+ break;
118
+ #endif
119
+
120
+ case PCRE_CONFIG_UNICODE_PROPERTIES:
121
+ #ifdef SUPPORT_UCP
122
+ *((int *)where) = 1;
123
+ #else
124
+ *((int *)where) = 0;
125
+ #endif
126
+ break;
127
+
128
+ case PCRE_CONFIG_JIT:
129
+ #ifdef SUPPORT_JIT
130
+ *((int *)where) = 1;
131
+ #else
132
+ *((int *)where) = 0;
133
+ #endif
134
+ break;
135
+
136
+ case PCRE_CONFIG_JITTARGET:
137
+ #ifdef SUPPORT_JIT
138
+ *((const char **)where) = PRIV(jit_get_target)();
139
+ #else
140
+ *((const char **)where) = NULL;
141
+ #endif
142
+ break;
143
+
144
+ case PCRE_CONFIG_NEWLINE:
145
+ *((int *)where) = NEWLINE;
146
+ break;
147
+
148
+ case PCRE_CONFIG_BSR:
149
+ #ifdef BSR_ANYCRLF
150
+ *((int *)where) = 1;
151
+ #else
152
+ *((int *)where) = 0;
153
+ #endif
154
+ break;
155
+
156
+ case PCRE_CONFIG_LINK_SIZE:
157
+ *((int *)where) = real_link_size;
158
+ break;
159
+
160
+ case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
161
+ *((int *)where) = POSIX_MALLOC_THRESHOLD;
162
+ break;
163
+
164
+ case PCRE_CONFIG_PARENS_LIMIT:
165
+ *((unsigned long int *)where) = PARENS_NEST_LIMIT;
166
+ break;
167
+
168
+ case PCRE_CONFIG_MATCH_LIMIT:
169
+ *((unsigned long int *)where) = MATCH_LIMIT;
170
+ break;
171
+
172
+ case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
173
+ *((unsigned long int *)where) = MATCH_LIMIT_RECURSION;
174
+ break;
175
+
176
+ case PCRE_CONFIG_STACKRECURSE:
177
+ #ifdef NO_RECURSE
178
+ *((int *)where) = 0;
179
+ #else
180
+ *((int *)where) = 1;
181
+ #endif
182
+ break;
183
+
184
+ default: return PCRE_ERROR_BADOPTION;
185
+ }
186
+
187
+ return 0;
188
+ }
189
+
190
+ /* End of pcre_config.c */
@@ -0,0 +1,3676 @@
1
+ /*************************************************
2
+ * Perl-Compatible Regular Expressions *
3
+ *************************************************/
4
+
5
+ /* PCRE is a library of functions to support regular expressions whose syntax
6
+ and semantics are as close as possible to those of the Perl 5 language (but see
7
+ below for why this module is different).
8
+
9
+ Written by Philip Hazel
10
+ Copyright (c) 1997-2017 University of Cambridge
11
+
12
+ -----------------------------------------------------------------------------
13
+ Redistribution and use in source and binary forms, with or without
14
+ modification, are permitted provided that the following conditions are met:
15
+
16
+ * Redistributions of source code must retain the above copyright notice,
17
+ this list of conditions and the following disclaimer.
18
+
19
+ * Redistributions in binary form must reproduce the above copyright
20
+ notice, this list of conditions and the following disclaimer in the
21
+ documentation and/or other materials provided with the distribution.
22
+
23
+ * Neither the name of the University of Cambridge nor the names of its
24
+ contributors may be used to endorse or promote products derived from
25
+ this software without specific prior written permission.
26
+
27
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
+ POSSIBILITY OF SUCH DAMAGE.
38
+ -----------------------------------------------------------------------------
39
+ */
40
+
41
+ /* This module contains the external function pcre_dfa_exec(), which is an
42
+ alternative matching function that uses a sort of DFA algorithm (not a true
43
+ FSM). This is NOT Perl-compatible, but it has advantages in certain
44
+ applications. */
45
+
46
+
47
+ /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
48
+ the performance of his patterns greatly. I could not use it as it stood, as it
49
+ was not thread safe, and made assumptions about pattern sizes. Also, it caused
50
+ test 7 to loop, and test 9 to crash with a segfault.
51
+
52
+ The issue is the check for duplicate states, which is done by a simple linear
53
+ search up the state list. (Grep for "duplicate" below to find the code.) For
54
+ many patterns, there will never be many states active at one time, so a simple
55
+ linear search is fine. In patterns that have many active states, it might be a
56
+ bottleneck. The suggested code used an indexing scheme to remember which states
57
+ had previously been used for each character, and avoided the linear search when
58
+ it knew there was no chance of a duplicate. This was implemented when adding
59
+ states to the state lists.
60
+
61
+ I wrote some thread-safe, not-limited code to try something similar at the time
62
+ of checking for duplicates (instead of when adding states), using index vectors
63
+ on the stack. It did give a 13% improvement with one specially constructed
64
+ pattern for certain subject strings, but on other strings and on many of the
65
+ simpler patterns in the test suite it did worse. The major problem, I think,
66
+ was the extra time to initialize the index. This had to be done for each call
67
+ of internal_dfa_exec(). (The supplied patch used a static vector, initialized
68
+ only once - I suspect this was the cause of the problems with the tests.)
69
+
70
+ Overall, I concluded that the gains in some cases did not outweigh the losses
71
+ in others, so I abandoned this code. */
72
+
73
+
74
+
75
+ #ifdef HAVE_CONFIG_H
76
+ #include "config.h"
77
+ #endif
78
+
79
+ #define NLBLOCK md /* Block containing newline information */
80
+ #define PSSTART start_subject /* Field containing processed string start */
81
+ #define PSEND end_subject /* Field containing processed string end */
82
+
83
+ #include "pcre_internal.h"
84
+
85
+
86
+ /* For use to indent debugging output */
87
+
88
+ #define SP " "
89
+
90
+
91
+ /*************************************************
92
+ * Code parameters and static tables *
93
+ *************************************************/
94
+
95
+ /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
96
+ into others, under special conditions. A gap of 20 between the blocks should be
97
+ enough. The resulting opcodes don't have to be less than 256 because they are
98
+ never stored, so we push them well clear of the normal opcodes. */
99
+
100
+ #define OP_PROP_EXTRA 300
101
+ #define OP_EXTUNI_EXTRA 320
102
+ #define OP_ANYNL_EXTRA 340
103
+ #define OP_HSPACE_EXTRA 360
104
+ #define OP_VSPACE_EXTRA 380
105
+
106
+
107
+ /* This table identifies those opcodes that are followed immediately by a
108
+ character that is to be tested in some way. This makes it possible to
109
+ centralize the loading of these characters. In the case of Type * etc, the
110
+ "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
111
+ small value. Non-zero values in the table are the offsets from the opcode where
112
+ the character is to be found. ***NOTE*** If the start of this table is
113
+ modified, the three tables that follow must also be modified. */
114
+
115
+ static const pcre_uint8 coptable[] = {
116
+ 0, /* End */
117
+ 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
118
+ 0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
119
+ 0, 0, 0, /* Any, AllAny, Anybyte */
120
+ 0, 0, /* \P, \p */
121
+ 0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
122
+ 0, /* \X */
123
+ 0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
124
+ 1, /* Char */
125
+ 1, /* Chari */
126
+ 1, /* not */
127
+ 1, /* noti */
128
+ /* Positive single-char repeats */
129
+ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
130
+ 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */
131
+ 1+IMM2_SIZE, /* exact */
132
+ 1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */
133
+ 1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
134
+ 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */
135
+ 1+IMM2_SIZE, /* exact I */
136
+ 1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */
137
+ /* Negative single-char repeats - only for chars < 256 */
138
+ 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
139
+ 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */
140
+ 1+IMM2_SIZE, /* NOT exact */
141
+ 1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */
142
+ 1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
143
+ 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */
144
+ 1+IMM2_SIZE, /* NOT exact I */
145
+ 1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */
146
+ /* Positive type repeats */
147
+ 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
148
+ 1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */
149
+ 1+IMM2_SIZE, /* Type exact */
150
+ 1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */
151
+ /* Character class & ref repeats */
152
+ 0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
153
+ 0, 0, /* CRRANGE, CRMINRANGE */
154
+ 0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
155
+ 0, /* CLASS */
156
+ 0, /* NCLASS */
157
+ 0, /* XCLASS - variable length */
158
+ 0, /* REF */
159
+ 0, /* REFI */
160
+ 0, /* DNREF */
161
+ 0, /* DNREFI */
162
+ 0, /* RECURSE */
163
+ 0, /* CALLOUT */
164
+ 0, /* Alt */
165
+ 0, /* Ket */
166
+ 0, /* KetRmax */
167
+ 0, /* KetRmin */
168
+ 0, /* KetRpos */
169
+ 0, /* Reverse */
170
+ 0, /* Assert */
171
+ 0, /* Assert not */
172
+ 0, /* Assert behind */
173
+ 0, /* Assert behind not */
174
+ 0, 0, /* ONCE, ONCE_NC */
175
+ 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
176
+ 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
177
+ 0, 0, /* CREF, DNCREF */
178
+ 0, 0, /* RREF, DNRREF */
179
+ 0, /* DEF */
180
+ 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
181
+ 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
182
+ 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
183
+ 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
184
+ 0, 0 /* CLOSE, SKIPZERO */
185
+ };
186
+
187
+ /* This table identifies those opcodes that inspect a character. It is used to
188
+ remember the fact that a character could have been inspected when the end of
189
+ the subject is reached. ***NOTE*** If the start of this table is modified, the
190
+ two tables that follow must also be modified. */
191
+
192
+ static const pcre_uint8 poptable[] = {
193
+ 0, /* End */
194
+ 0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */
195
+ 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
196
+ 1, 1, 1, /* Any, AllAny, Anybyte */
197
+ 1, 1, /* \P, \p */
198
+ 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
199
+ 1, /* \X */
200
+ 0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
201
+ 1, /* Char */
202
+ 1, /* Chari */
203
+ 1, /* not */
204
+ 1, /* noti */
205
+ /* Positive single-char repeats */
206
+ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
207
+ 1, 1, 1, /* upto, minupto, exact */
208
+ 1, 1, 1, 1, /* *+, ++, ?+, upto+ */
209
+ 1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
210
+ 1, 1, 1, /* upto I, minupto I, exact I */
211
+ 1, 1, 1, 1, /* *+I, ++I, ?+I, upto+I */
212
+ /* Negative single-char repeats - only for chars < 256 */
213
+ 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
214
+ 1, 1, 1, /* NOT upto, minupto, exact */
215
+ 1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */
216
+ 1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
217
+ 1, 1, 1, /* NOT upto I, minupto I, exact I */
218
+ 1, 1, 1, 1, /* NOT *+I, ++I, ?+I, upto+I */
219
+ /* Positive type repeats */
220
+ 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
221
+ 1, 1, 1, /* Type upto, minupto, exact */
222
+ 1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */
223
+ /* Character class & ref repeats */
224
+ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
225
+ 1, 1, /* CRRANGE, CRMINRANGE */
226
+ 1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
227
+ 1, /* CLASS */
228
+ 1, /* NCLASS */
229
+ 1, /* XCLASS - variable length */
230
+ 0, /* REF */
231
+ 0, /* REFI */
232
+ 0, /* DNREF */
233
+ 0, /* DNREFI */
234
+ 0, /* RECURSE */
235
+ 0, /* CALLOUT */
236
+ 0, /* Alt */
237
+ 0, /* Ket */
238
+ 0, /* KetRmax */
239
+ 0, /* KetRmin */
240
+ 0, /* KetRpos */
241
+ 0, /* Reverse */
242
+ 0, /* Assert */
243
+ 0, /* Assert not */
244
+ 0, /* Assert behind */
245
+ 0, /* Assert behind not */
246
+ 0, 0, /* ONCE, ONCE_NC */
247
+ 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
248
+ 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
249
+ 0, 0, /* CREF, DNCREF */
250
+ 0, 0, /* RREF, DNRREF */
251
+ 0, /* DEF */
252
+ 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
253
+ 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
254
+ 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
255
+ 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
256
+ 0, 0 /* CLOSE, SKIPZERO */
257
+ };
258
+
259
+ /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
260
+ and \w */
261
+
262
+ static const pcre_uint8 toptable1[] = {
263
+ 0, 0, 0, 0, 0, 0,
264
+ ctype_digit, ctype_digit,
265
+ ctype_space, ctype_space,
266
+ ctype_word, ctype_word,
267
+ 0, 0 /* OP_ANY, OP_ALLANY */
268
+ };
269
+
270
+ static const pcre_uint8 toptable2[] = {
271
+ 0, 0, 0, 0, 0, 0,
272
+ ctype_digit, 0,
273
+ ctype_space, 0,
274
+ ctype_word, 0,
275
+ 1, 1 /* OP_ANY, OP_ALLANY */
276
+ };
277
+
278
+
279
+ /* Structure for holding data about a particular state, which is in effect the
280
+ current data for an active path through the match tree. It must consist
281
+ entirely of ints because the working vector we are passed, and which we put
282
+ these structures in, is a vector of ints. */
283
+
284
+ typedef struct stateblock {
285
+ int offset; /* Offset to opcode */
286
+ int count; /* Count for repeats */
287
+ int data; /* Some use extra data */
288
+ } stateblock;
289
+
290
+ #define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
291
+
292
+
293
+ #ifdef PCRE_DEBUG
294
+ /*************************************************
295
+ * Print character string *
296
+ *************************************************/
297
+
298
+ /* Character string printing function for debugging.
299
+
300
+ Arguments:
301
+ p points to string
302
+ length number of bytes
303
+ f where to print
304
+
305
+ Returns: nothing
306
+ */
307
+
308
+ static void
309
+ pchars(const pcre_uchar *p, int length, FILE *f)
310
+ {
311
+ pcre_uint32 c;
312
+ while (length-- > 0)
313
+ {
314
+ if (isprint(c = *(p++)))
315
+ fprintf(f, "%c", c);
316
+ else
317
+ fprintf(f, "\\x{%02x}", c);
318
+ }
319
+ }
320
+ #endif
321
+
322
+
323
+
324
+ /*************************************************
325
+ * Execute a Regular Expression - DFA engine *
326
+ *************************************************/
327
+
328
+ /* This internal function applies a compiled pattern to a subject string,
329
+ starting at a given point, using a DFA engine. This function is called from the
330
+ external one, possibly multiple times if the pattern is not anchored. The
331
+ function calls itself recursively for some kinds of subpattern.
332
+
333
+ Arguments:
334
+ md the match_data block with fixed information
335
+ this_start_code the opening bracket of this subexpression's code
336
+ current_subject where we currently are in the subject string
337
+ start_offset start offset in the subject string
338
+ offsets vector to contain the matching string offsets
339
+ offsetcount size of same
340
+ workspace vector of workspace
341
+ wscount size of same
342
+ rlevel function call recursion level
343
+
344
+ Returns: > 0 => number of match offset pairs placed in offsets
345
+ = 0 => offsets overflowed; longest matches are present
346
+ -1 => failed to match
347
+ < -1 => some kind of unexpected problem
348
+
349
+ The following macros are used for adding states to the two state vectors (one
350
+ for the current character, one for the following character). */
351
+
352
+ #define ADD_ACTIVE(x,y) \
353
+ if (active_count++ < wscount) \
354
+ { \
355
+ next_active_state->offset = (x); \
356
+ next_active_state->count = (y); \
357
+ next_active_state++; \
358
+ DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
359
+ } \
360
+ else return PCRE_ERROR_DFA_WSSIZE
361
+
362
+ #define ADD_ACTIVE_DATA(x,y,z) \
363
+ if (active_count++ < wscount) \
364
+ { \
365
+ next_active_state->offset = (x); \
366
+ next_active_state->count = (y); \
367
+ next_active_state->data = (z); \
368
+ next_active_state++; \
369
+ DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
370
+ } \
371
+ else return PCRE_ERROR_DFA_WSSIZE
372
+
373
+ #define ADD_NEW(x,y) \
374
+ if (new_count++ < wscount) \
375
+ { \
376
+ next_new_state->offset = (x); \
377
+ next_new_state->count = (y); \
378
+ next_new_state++; \
379
+ DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
380
+ } \
381
+ else return PCRE_ERROR_DFA_WSSIZE
382
+
383
+ #define ADD_NEW_DATA(x,y,z) \
384
+ if (new_count++ < wscount) \
385
+ { \
386
+ next_new_state->offset = (x); \
387
+ next_new_state->count = (y); \
388
+ next_new_state->data = (z); \
389
+ next_new_state++; \
390
+ DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
391
+ (x), (y), (z), __LINE__)); \
392
+ } \
393
+ else return PCRE_ERROR_DFA_WSSIZE
394
+
395
+ /* And now, here is the code */
396
+
397
+ static int
398
+ internal_dfa_exec(
399
+ dfa_match_data *md,
400
+ const pcre_uchar *this_start_code,
401
+ const pcre_uchar *current_subject,
402
+ int start_offset,
403
+ int *offsets,
404
+ int offsetcount,
405
+ int *workspace,
406
+ int wscount,
407
+ int rlevel)
408
+ {
409
+ stateblock *active_states, *new_states, *temp_states;
410
+ stateblock *next_active_state, *next_new_state;
411
+
412
+ const pcre_uint8 *ctypes, *lcc, *fcc;
413
+ const pcre_uchar *ptr;
414
+ const pcre_uchar *end_code, *first_op;
415
+
416
+ dfa_recursion_info new_recursive;
417
+
418
+ int active_count, new_count, match_count;
419
+
420
+ /* Some fields in the md block are frequently referenced, so we load them into
421
+ independent variables in the hope that this will perform better. */
422
+
423
+ const pcre_uchar *start_subject = md->start_subject;
424
+ const pcre_uchar *end_subject = md->end_subject;
425
+ const pcre_uchar *start_code = md->start_code;
426
+
427
+ #ifdef SUPPORT_UTF
428
+ BOOL utf = (md->poptions & PCRE_UTF8) != 0;
429
+ #else
430
+ BOOL utf = FALSE;
431
+ #endif
432
+
433
+ BOOL reset_could_continue = FALSE;
434
+
435
+ rlevel++;
436
+ offsetcount &= (-2);
437
+
438
+ wscount -= 2;
439
+ wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
440
+ (2 * INTS_PER_STATEBLOCK);
441
+
442
+ DPRINTF(("\n%.*s---------------------\n"
443
+ "%.*sCall to internal_dfa_exec f=%d\n",
444
+ rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
445
+
446
+ ctypes = md->tables + ctypes_offset;
447
+ lcc = md->tables + lcc_offset;
448
+ fcc = md->tables + fcc_offset;
449
+
450
+ match_count = PCRE_ERROR_NOMATCH; /* A negative number */
451
+
452
+ active_states = (stateblock *)(workspace + 2);
453
+ next_new_state = new_states = active_states + wscount;
454
+ new_count = 0;
455
+
456
+ first_op = this_start_code + 1 + LINK_SIZE +
457
+ ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
458
+ *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
459
+ ? IMM2_SIZE:0);
460
+
461
+ /* The first thing in any (sub) pattern is a bracket of some sort. Push all
462
+ the alternative states onto the list, and find out where the end is. This
463
+ makes is possible to use this function recursively, when we want to stop at a
464
+ matching internal ket rather than at the end.
465
+
466
+ If the first opcode in the first alternative is OP_REVERSE, we are dealing with
467
+ a backward assertion. In that case, we have to find out the maximum amount to
468
+ move back, and set up each alternative appropriately. */
469
+
470
+ if (*first_op == OP_REVERSE)
471
+ {
472
+ int max_back = 0;
473
+ int gone_back;
474
+
475
+ end_code = this_start_code;
476
+ do
477
+ {
478
+ int back = GET(end_code, 2+LINK_SIZE);
479
+ if (back > max_back) max_back = back;
480
+ end_code += GET(end_code, 1);
481
+ }
482
+ while (*end_code == OP_ALT);
483
+
484
+ /* If we can't go back the amount required for the longest lookbehind
485
+ pattern, go back as far as we can; some alternatives may still be viable. */
486
+
487
+ #ifdef SUPPORT_UTF
488
+ /* In character mode we have to step back character by character */
489
+
490
+ if (utf)
491
+ {
492
+ for (gone_back = 0; gone_back < max_back; gone_back++)
493
+ {
494
+ if (current_subject <= start_subject) break;
495
+ current_subject--;
496
+ ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
497
+ }
498
+ }
499
+ else
500
+ #endif
501
+
502
+ /* In byte-mode we can do this quickly. */
503
+
504
+ {
505
+ gone_back = (current_subject - max_back < start_subject)?
506
+ (int)(current_subject - start_subject) : max_back;
507
+ current_subject -= gone_back;
508
+ }
509
+
510
+ /* Save the earliest consulted character */
511
+
512
+ if (current_subject < md->start_used_ptr)
513
+ md->start_used_ptr = current_subject;
514
+
515
+ /* Now we can process the individual branches. */
516
+
517
+ end_code = this_start_code;
518
+ do
519
+ {
520
+ int back = GET(end_code, 2+LINK_SIZE);
521
+ if (back <= gone_back)
522
+ {
523
+ int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
524
+ ADD_NEW_DATA(-bstate, 0, gone_back - back);
525
+ }
526
+ end_code += GET(end_code, 1);
527
+ }
528
+ while (*end_code == OP_ALT);
529
+ }
530
+
531
+ /* This is the code for a "normal" subpattern (not a backward assertion). The
532
+ start of a whole pattern is always one of these. If we are at the top level,
533
+ we may be asked to restart matching from the same point that we reached for a
534
+ previous partial match. We still have to scan through the top-level branches to
535
+ find the end state. */
536
+
537
+ else
538
+ {
539
+ end_code = this_start_code;
540
+
541
+ /* Restarting */
542
+
543
+ if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
544
+ {
545
+ do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
546
+ new_count = workspace[1];
547
+ if (!workspace[0])
548
+ memcpy(new_states, active_states, new_count * sizeof(stateblock));
549
+ }
550
+
551
+ /* Not restarting */
552
+
553
+ else
554
+ {
555
+ int length = 1 + LINK_SIZE +
556
+ ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
557
+ *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
558
+ ? IMM2_SIZE:0);
559
+ do
560
+ {
561
+ ADD_NEW((int)(end_code - start_code + length), 0);
562
+ end_code += GET(end_code, 1);
563
+ length = 1 + LINK_SIZE;
564
+ }
565
+ while (*end_code == OP_ALT);
566
+ }
567
+ }
568
+
569
+ workspace[0] = 0; /* Bit indicating which vector is current */
570
+
571
+ DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
572
+
573
+ /* Loop for scanning the subject */
574
+
575
+ ptr = current_subject;
576
+ for (;;)
577
+ {
578
+ int i, j;
579
+ int clen, dlen;
580
+ pcre_uint32 c, d;
581
+ int forced_fail = 0;
582
+ BOOL partial_newline = FALSE;
583
+ BOOL could_continue = reset_could_continue;
584
+ reset_could_continue = FALSE;
585
+
586
+ /* Make the new state list into the active state list and empty the
587
+ new state list. */
588
+
589
+ temp_states = active_states;
590
+ active_states = new_states;
591
+ new_states = temp_states;
592
+ active_count = new_count;
593
+ new_count = 0;
594
+
595
+ workspace[0] ^= 1; /* Remember for the restarting feature */
596
+ workspace[1] = active_count;
597
+
598
+ #ifdef PCRE_DEBUG
599
+ printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
600
+ pchars(ptr, STRLEN_UC(ptr), stdout);
601
+ printf("\"\n");
602
+
603
+ printf("%.*sActive states: ", rlevel*2-2, SP);
604
+ for (i = 0; i < active_count; i++)
605
+ printf("%d/%d ", active_states[i].offset, active_states[i].count);
606
+ printf("\n");
607
+ #endif
608
+
609
+ /* Set the pointers for adding new states */
610
+
611
+ next_active_state = active_states + active_count;
612
+ next_new_state = new_states;
613
+
614
+ /* Load the current character from the subject outside the loop, as many
615
+ different states may want to look at it, and we assume that at least one
616
+ will. */
617
+
618
+ if (ptr < end_subject)
619
+ {
620
+ clen = 1; /* Number of data items in the character */
621
+ #ifdef SUPPORT_UTF
622
+ GETCHARLENTEST(c, ptr, clen);
623
+ #else
624
+ c = *ptr;
625
+ #endif /* SUPPORT_UTF */
626
+ }
627
+ else
628
+ {
629
+ clen = 0; /* This indicates the end of the subject */
630
+ c = NOTACHAR; /* This value should never actually be used */
631
+ }
632
+
633
+ /* Scan up the active states and act on each one. The result of an action
634
+ may be to add more states to the currently active list (e.g. on hitting a
635
+ parenthesis) or it may be to put states on the new list, for considering
636
+ when we move the character pointer on. */
637
+
638
+ for (i = 0; i < active_count; i++)
639
+ {
640
+ stateblock *current_state = active_states + i;
641
+ BOOL caseless = FALSE;
642
+ const pcre_uchar *code;
643
+ int state_offset = current_state->offset;
644
+ int codevalue, rrc;
645
+ int count;
646
+
647
+ #ifdef PCRE_DEBUG
648
+ printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
649
+ if (clen == 0) printf("EOL\n");
650
+ else if (c > 32 && c < 127) printf("'%c'\n", c);
651
+ else printf("0x%02x\n", c);
652
+ #endif
653
+
654
+ /* A negative offset is a special case meaning "hold off going to this
655
+ (negated) state until the number of characters in the data field have
656
+ been skipped". If the could_continue flag was passed over from a previous
657
+ state, arrange for it to passed on. */
658
+
659
+ if (state_offset < 0)
660
+ {
661
+ if (current_state->data > 0)
662
+ {
663
+ DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
664
+ ADD_NEW_DATA(state_offset, current_state->count,
665
+ current_state->data - 1);
666
+ if (could_continue) reset_could_continue = TRUE;
667
+ continue;
668
+ }
669
+ else
670
+ {
671
+ current_state->offset = state_offset = -state_offset;
672
+ }
673
+ }
674
+
675
+ /* Check for a duplicate state with the same count, and skip if found.
676
+ See the note at the head of this module about the possibility of improving
677
+ performance here. */
678
+
679
+ for (j = 0; j < i; j++)
680
+ {
681
+ if (active_states[j].offset == state_offset &&
682
+ active_states[j].count == current_state->count)
683
+ {
684
+ DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
685
+ goto NEXT_ACTIVE_STATE;
686
+ }
687
+ }
688
+
689
+ /* The state offset is the offset to the opcode */
690
+
691
+ code = start_code + state_offset;
692
+ codevalue = *code;
693
+
694
+ /* If this opcode inspects a character, but we are at the end of the
695
+ subject, remember the fact for use when testing for a partial match. */
696
+
697
+ if (clen == 0 && poptable[codevalue] != 0)
698
+ could_continue = TRUE;
699
+
700
+ /* If this opcode is followed by an inline character, load it. It is
701
+ tempting to test for the presence of a subject character here, but that
702
+ is wrong, because sometimes zero repetitions of the subject are
703
+ permitted.
704
+
705
+ We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
706
+ argument that is not a data character - but is always one byte long because
707
+ the values are small. We have to take special action to deal with \P, \p,
708
+ \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
709
+ these ones to new opcodes. */
710
+
711
+ if (coptable[codevalue] > 0)
712
+ {
713
+ dlen = 1;
714
+ #ifdef SUPPORT_UTF
715
+ if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
716
+ #endif /* SUPPORT_UTF */
717
+ d = code[coptable[codevalue]];
718
+ if (codevalue >= OP_TYPESTAR)
719
+ {
720
+ switch(d)
721
+ {
722
+ case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
723
+ case OP_NOTPROP:
724
+ case OP_PROP: codevalue += OP_PROP_EXTRA; break;
725
+ case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
726
+ case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
727
+ case OP_NOT_HSPACE:
728
+ case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
729
+ case OP_NOT_VSPACE:
730
+ case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
731
+ default: break;
732
+ }
733
+ }
734
+ }
735
+ else
736
+ {
737
+ dlen = 0; /* Not strictly necessary, but compilers moan */
738
+ d = NOTACHAR; /* if these variables are not set. */
739
+ }
740
+
741
+
742
+ /* Now process the individual opcodes */
743
+
744
+ switch (codevalue)
745
+ {
746
+ /* ========================================================================== */
747
+ /* These cases are never obeyed. This is a fudge that causes a compile-
748
+ time error if the vectors coptable or poptable, which are indexed by
749
+ opcode, are not the correct length. It seems to be the only way to do
750
+ such a check at compile time, as the sizeof() operator does not work
751
+ in the C preprocessor. */
752
+
753
+ case OP_TABLE_LENGTH:
754
+ case OP_TABLE_LENGTH +
755
+ ((sizeof(coptable) == OP_TABLE_LENGTH) &&
756
+ (sizeof(poptable) == OP_TABLE_LENGTH)):
757
+ break;
758
+
759
+ /* ========================================================================== */
760
+ /* Reached a closing bracket. If not at the end of the pattern, carry
761
+ on with the next opcode. For repeating opcodes, also add the repeat
762
+ state. Note that KETRPOS will always be encountered at the end of the
763
+ subpattern, because the possessive subpattern repeats are always handled
764
+ using recursive calls. Thus, it never adds any new states.
765
+
766
+ At the end of the (sub)pattern, unless we have an empty string and
767
+ PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
768
+ start of the subject, save the match data, shifting up all previous
769
+ matches so we always have the longest first. */
770
+
771
+ case OP_KET:
772
+ case OP_KETRMIN:
773
+ case OP_KETRMAX:
774
+ case OP_KETRPOS:
775
+ if (code != end_code)
776
+ {
777
+ ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
778
+ if (codevalue != OP_KET)
779
+ {
780
+ ADD_ACTIVE(state_offset - GET(code, 1), 0);
781
+ }
782
+ }
783
+ else
784
+ {
785
+ if (ptr > current_subject ||
786
+ ((md->moptions & PCRE_NOTEMPTY) == 0 &&
787
+ ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
788
+ current_subject > start_subject + md->start_offset)))
789
+ {
790
+ if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
791
+ else if (match_count > 0 && ++match_count * 2 > offsetcount)
792
+ match_count = 0;
793
+ count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
794
+ if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
795
+ if (offsetcount >= 2)
796
+ {
797
+ offsets[0] = (int)(current_subject - start_subject);
798
+ offsets[1] = (int)(ptr - start_subject);
799
+ DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
800
+ offsets[1] - offsets[0], (char *)current_subject));
801
+ }
802
+ if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
803
+ {
804
+ DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
805
+ "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
806
+ match_count, rlevel*2-2, SP));
807
+ return match_count;
808
+ }
809
+ }
810
+ }
811
+ break;
812
+
813
+ /* ========================================================================== */
814
+ /* These opcodes add to the current list of states without looking
815
+ at the current character. */
816
+
817
+ /*-----------------------------------------------------------------*/
818
+ case OP_ALT:
819
+ do { code += GET(code, 1); } while (*code == OP_ALT);
820
+ ADD_ACTIVE((int)(code - start_code), 0);
821
+ break;
822
+
823
+ /*-----------------------------------------------------------------*/
824
+ case OP_BRA:
825
+ case OP_SBRA:
826
+ do
827
+ {
828
+ ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
829
+ code += GET(code, 1);
830
+ }
831
+ while (*code == OP_ALT);
832
+ break;
833
+
834
+ /*-----------------------------------------------------------------*/
835
+ case OP_CBRA:
836
+ case OP_SCBRA:
837
+ ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0);
838
+ code += GET(code, 1);
839
+ while (*code == OP_ALT)
840
+ {
841
+ ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
842
+ code += GET(code, 1);
843
+ }
844
+ break;
845
+
846
+ /*-----------------------------------------------------------------*/
847
+ case OP_BRAZERO:
848
+ case OP_BRAMINZERO:
849
+ ADD_ACTIVE(state_offset + 1, 0);
850
+ code += 1 + GET(code, 2);
851
+ while (*code == OP_ALT) code += GET(code, 1);
852
+ ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
853
+ break;
854
+
855
+ /*-----------------------------------------------------------------*/
856
+ case OP_SKIPZERO:
857
+ code += 1 + GET(code, 2);
858
+ while (*code == OP_ALT) code += GET(code, 1);
859
+ ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
860
+ break;
861
+
862
+ /*-----------------------------------------------------------------*/
863
+ case OP_CIRC:
864
+ if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
865
+ { ADD_ACTIVE(state_offset + 1, 0); }
866
+ break;
867
+
868
+ /*-----------------------------------------------------------------*/
869
+ case OP_CIRCM:
870
+ if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
871
+ (ptr != end_subject && WAS_NEWLINE(ptr)))
872
+ { ADD_ACTIVE(state_offset + 1, 0); }
873
+ break;
874
+
875
+ /*-----------------------------------------------------------------*/
876
+ case OP_EOD:
877
+ if (ptr >= end_subject)
878
+ {
879
+ if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
880
+ could_continue = TRUE;
881
+ else { ADD_ACTIVE(state_offset + 1, 0); }
882
+ }
883
+ break;
884
+
885
+ /*-----------------------------------------------------------------*/
886
+ case OP_SOD:
887
+ if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
888
+ break;
889
+
890
+ /*-----------------------------------------------------------------*/
891
+ case OP_SOM:
892
+ if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
893
+ break;
894
+
895
+
896
+ /* ========================================================================== */
897
+ /* These opcodes inspect the next subject character, and sometimes
898
+ the previous one as well, but do not have an argument. The variable
899
+ clen contains the length of the current character and is zero if we are
900
+ at the end of the subject. */
901
+
902
+ /*-----------------------------------------------------------------*/
903
+ case OP_ANY:
904
+ if (clen > 0 && !IS_NEWLINE(ptr))
905
+ {
906
+ if (ptr + 1 >= md->end_subject &&
907
+ (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
908
+ NLBLOCK->nltype == NLTYPE_FIXED &&
909
+ NLBLOCK->nllen == 2 &&
910
+ c == NLBLOCK->nl[0])
911
+ {
912
+ could_continue = partial_newline = TRUE;
913
+ }
914
+ else
915
+ {
916
+ ADD_NEW(state_offset + 1, 0);
917
+ }
918
+ }
919
+ break;
920
+
921
+ /*-----------------------------------------------------------------*/
922
+ case OP_ALLANY:
923
+ if (clen > 0)
924
+ { ADD_NEW(state_offset + 1, 0); }
925
+ break;
926
+
927
+ /*-----------------------------------------------------------------*/
928
+ case OP_EODN:
929
+ if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
930
+ could_continue = TRUE;
931
+ else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
932
+ { ADD_ACTIVE(state_offset + 1, 0); }
933
+ break;
934
+
935
+ /*-----------------------------------------------------------------*/
936
+ case OP_DOLL:
937
+ if ((md->moptions & PCRE_NOTEOL) == 0)
938
+ {
939
+ if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
940
+ could_continue = TRUE;
941
+ else if (clen == 0 ||
942
+ ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
943
+ (ptr == end_subject - md->nllen)
944
+ ))
945
+ { ADD_ACTIVE(state_offset + 1, 0); }
946
+ else if (ptr + 1 >= md->end_subject &&
947
+ (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
948
+ NLBLOCK->nltype == NLTYPE_FIXED &&
949
+ NLBLOCK->nllen == 2 &&
950
+ c == NLBLOCK->nl[0])
951
+ {
952
+ if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
953
+ {
954
+ reset_could_continue = TRUE;
955
+ ADD_NEW_DATA(-(state_offset + 1), 0, 1);
956
+ }
957
+ else could_continue = partial_newline = TRUE;
958
+ }
959
+ }
960
+ break;
961
+
962
+ /*-----------------------------------------------------------------*/
963
+ case OP_DOLLM:
964
+ if ((md->moptions & PCRE_NOTEOL) == 0)
965
+ {
966
+ if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
967
+ could_continue = TRUE;
968
+ else if (clen == 0 ||
969
+ ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
970
+ { ADD_ACTIVE(state_offset + 1, 0); }
971
+ else if (ptr + 1 >= md->end_subject &&
972
+ (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
973
+ NLBLOCK->nltype == NLTYPE_FIXED &&
974
+ NLBLOCK->nllen == 2 &&
975
+ c == NLBLOCK->nl[0])
976
+ {
977
+ if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
978
+ {
979
+ reset_could_continue = TRUE;
980
+ ADD_NEW_DATA(-(state_offset + 1), 0, 1);
981
+ }
982
+ else could_continue = partial_newline = TRUE;
983
+ }
984
+ }
985
+ else if (IS_NEWLINE(ptr))
986
+ { ADD_ACTIVE(state_offset + 1, 0); }
987
+ break;
988
+
989
+ /*-----------------------------------------------------------------*/
990
+
991
+ case OP_DIGIT:
992
+ case OP_WHITESPACE:
993
+ case OP_WORDCHAR:
994
+ if (clen > 0 && c < 256 &&
995
+ ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
996
+ { ADD_NEW(state_offset + 1, 0); }
997
+ break;
998
+
999
+ /*-----------------------------------------------------------------*/
1000
+ case OP_NOT_DIGIT:
1001
+ case OP_NOT_WHITESPACE:
1002
+ case OP_NOT_WORDCHAR:
1003
+ if (clen > 0 && (c >= 256 ||
1004
+ ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
1005
+ { ADD_NEW(state_offset + 1, 0); }
1006
+ break;
1007
+
1008
+ /*-----------------------------------------------------------------*/
1009
+ case OP_WORD_BOUNDARY:
1010
+ case OP_NOT_WORD_BOUNDARY:
1011
+ {
1012
+ int left_word, right_word;
1013
+
1014
+ if (ptr > start_subject)
1015
+ {
1016
+ const pcre_uchar *temp = ptr - 1;
1017
+ if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1018
+ #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1019
+ if (utf) { BACKCHAR(temp); }
1020
+ #endif
1021
+ GETCHARTEST(d, temp);
1022
+ #ifdef SUPPORT_UCP
1023
+ if ((md->poptions & PCRE_UCP) != 0)
1024
+ {
1025
+ if (d == '_') left_word = TRUE; else
1026
+ {
1027
+ int cat = UCD_CATEGORY(d);
1028
+ left_word = (cat == ucp_L || cat == ucp_N);
1029
+ }
1030
+ }
1031
+ else
1032
+ #endif
1033
+ left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
1034
+ }
1035
+ else left_word = FALSE;
1036
+
1037
+ if (clen > 0)
1038
+ {
1039
+ #ifdef SUPPORT_UCP
1040
+ if ((md->poptions & PCRE_UCP) != 0)
1041
+ {
1042
+ if (c == '_') right_word = TRUE; else
1043
+ {
1044
+ int cat = UCD_CATEGORY(c);
1045
+ right_word = (cat == ucp_L || cat == ucp_N);
1046
+ }
1047
+ }
1048
+ else
1049
+ #endif
1050
+ right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
1051
+ }
1052
+ else right_word = FALSE;
1053
+
1054
+ if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
1055
+ { ADD_ACTIVE(state_offset + 1, 0); }
1056
+ }
1057
+ break;
1058
+
1059
+
1060
+ /*-----------------------------------------------------------------*/
1061
+ /* Check the next character by Unicode property. We will get here only
1062
+ if the support is in the binary; otherwise a compile-time error occurs.
1063
+ */
1064
+
1065
+ #ifdef SUPPORT_UCP
1066
+ case OP_PROP:
1067
+ case OP_NOTPROP:
1068
+ if (clen > 0)
1069
+ {
1070
+ BOOL OK;
1071
+ const pcre_uint32 *cp;
1072
+ const ucd_record * prop = GET_UCD(c);
1073
+ switch(code[1])
1074
+ {
1075
+ case PT_ANY:
1076
+ OK = TRUE;
1077
+ break;
1078
+
1079
+ case PT_LAMP:
1080
+ OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1081
+ prop->chartype == ucp_Lt;
1082
+ break;
1083
+
1084
+ case PT_GC:
1085
+ OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1086
+ break;
1087
+
1088
+ case PT_PC:
1089
+ OK = prop->chartype == code[2];
1090
+ break;
1091
+
1092
+ case PT_SC:
1093
+ OK = prop->script == code[2];
1094
+ break;
1095
+
1096
+ /* These are specials for combination cases. */
1097
+
1098
+ case PT_ALNUM:
1099
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1100
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1101
+ break;
1102
+
1103
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1104
+ which means that Perl space and POSIX space are now identical. PCRE
1105
+ was changed at release 8.34. */
1106
+
1107
+ case PT_SPACE: /* Perl space */
1108
+ case PT_PXSPACE: /* POSIX space */
1109
+ switch(c)
1110
+ {
1111
+ HSPACE_CASES:
1112
+ VSPACE_CASES:
1113
+ OK = TRUE;
1114
+ break;
1115
+
1116
+ default:
1117
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1118
+ break;
1119
+ }
1120
+ break;
1121
+
1122
+ case PT_WORD:
1123
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1124
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1125
+ c == CHAR_UNDERSCORE;
1126
+ break;
1127
+
1128
+ case PT_CLIST:
1129
+ cp = PRIV(ucd_caseless_sets) + code[2];
1130
+ for (;;)
1131
+ {
1132
+ if (c < *cp) { OK = FALSE; break; }
1133
+ if (c == *cp++) { OK = TRUE; break; }
1134
+ }
1135
+ break;
1136
+
1137
+ case PT_UCNC:
1138
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1139
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1140
+ c >= 0xe000;
1141
+ break;
1142
+
1143
+ /* Should never occur, but keep compilers from grumbling. */
1144
+
1145
+ default:
1146
+ OK = codevalue != OP_PROP;
1147
+ break;
1148
+ }
1149
+
1150
+ if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
1151
+ }
1152
+ break;
1153
+ #endif
1154
+
1155
+
1156
+
1157
+ /* ========================================================================== */
1158
+ /* These opcodes likewise inspect the subject character, but have an
1159
+ argument that is not a data character. It is one of these opcodes:
1160
+ OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
1161
+ OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
1162
+
1163
+ case OP_TYPEPLUS:
1164
+ case OP_TYPEMINPLUS:
1165
+ case OP_TYPEPOSPLUS:
1166
+ count = current_state->count; /* Already matched */
1167
+ if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1168
+ if (clen > 0)
1169
+ {
1170
+ if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1171
+ (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1172
+ NLBLOCK->nltype == NLTYPE_FIXED &&
1173
+ NLBLOCK->nllen == 2 &&
1174
+ c == NLBLOCK->nl[0])
1175
+ {
1176
+ could_continue = partial_newline = TRUE;
1177
+ }
1178
+ else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1179
+ (c < 256 &&
1180
+ (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1181
+ ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1182
+ {
1183
+ if (count > 0 && codevalue == OP_TYPEPOSPLUS)
1184
+ {
1185
+ active_count--; /* Remove non-match possibility */
1186
+ next_active_state--;
1187
+ }
1188
+ count++;
1189
+ ADD_NEW(state_offset, count);
1190
+ }
1191
+ }
1192
+ break;
1193
+
1194
+ /*-----------------------------------------------------------------*/
1195
+ case OP_TYPEQUERY:
1196
+ case OP_TYPEMINQUERY:
1197
+ case OP_TYPEPOSQUERY:
1198
+ ADD_ACTIVE(state_offset + 2, 0);
1199
+ if (clen > 0)
1200
+ {
1201
+ if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1202
+ (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1203
+ NLBLOCK->nltype == NLTYPE_FIXED &&
1204
+ NLBLOCK->nllen == 2 &&
1205
+ c == NLBLOCK->nl[0])
1206
+ {
1207
+ could_continue = partial_newline = TRUE;
1208
+ }
1209
+ else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1210
+ (c < 256 &&
1211
+ (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1212
+ ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1213
+ {
1214
+ if (codevalue == OP_TYPEPOSQUERY)
1215
+ {
1216
+ active_count--; /* Remove non-match possibility */
1217
+ next_active_state--;
1218
+ }
1219
+ ADD_NEW(state_offset + 2, 0);
1220
+ }
1221
+ }
1222
+ break;
1223
+
1224
+ /*-----------------------------------------------------------------*/
1225
+ case OP_TYPESTAR:
1226
+ case OP_TYPEMINSTAR:
1227
+ case OP_TYPEPOSSTAR:
1228
+ ADD_ACTIVE(state_offset + 2, 0);
1229
+ if (clen > 0)
1230
+ {
1231
+ if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1232
+ (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1233
+ NLBLOCK->nltype == NLTYPE_FIXED &&
1234
+ NLBLOCK->nllen == 2 &&
1235
+ c == NLBLOCK->nl[0])
1236
+ {
1237
+ could_continue = partial_newline = TRUE;
1238
+ }
1239
+ else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1240
+ (c < 256 &&
1241
+ (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1242
+ ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1243
+ {
1244
+ if (codevalue == OP_TYPEPOSSTAR)
1245
+ {
1246
+ active_count--; /* Remove non-match possibility */
1247
+ next_active_state--;
1248
+ }
1249
+ ADD_NEW(state_offset, 0);
1250
+ }
1251
+ }
1252
+ break;
1253
+
1254
+ /*-----------------------------------------------------------------*/
1255
+ case OP_TYPEEXACT:
1256
+ count = current_state->count; /* Number already matched */
1257
+ if (clen > 0)
1258
+ {
1259
+ if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1260
+ (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1261
+ NLBLOCK->nltype == NLTYPE_FIXED &&
1262
+ NLBLOCK->nllen == 2 &&
1263
+ c == NLBLOCK->nl[0])
1264
+ {
1265
+ could_continue = partial_newline = TRUE;
1266
+ }
1267
+ else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1268
+ (c < 256 &&
1269
+ (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1270
+ ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1271
+ {
1272
+ if (++count >= (int)GET2(code, 1))
1273
+ { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1274
+ else
1275
+ { ADD_NEW(state_offset, count); }
1276
+ }
1277
+ }
1278
+ break;
1279
+
1280
+ /*-----------------------------------------------------------------*/
1281
+ case OP_TYPEUPTO:
1282
+ case OP_TYPEMINUPTO:
1283
+ case OP_TYPEPOSUPTO:
1284
+ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1285
+ count = current_state->count; /* Number already matched */
1286
+ if (clen > 0)
1287
+ {
1288
+ if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1289
+ (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1290
+ NLBLOCK->nltype == NLTYPE_FIXED &&
1291
+ NLBLOCK->nllen == 2 &&
1292
+ c == NLBLOCK->nl[0])
1293
+ {
1294
+ could_continue = partial_newline = TRUE;
1295
+ }
1296
+ else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1297
+ (c < 256 &&
1298
+ (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1299
+ ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1300
+ {
1301
+ if (codevalue == OP_TYPEPOSUPTO)
1302
+ {
1303
+ active_count--; /* Remove non-match possibility */
1304
+ next_active_state--;
1305
+ }
1306
+ if (++count >= (int)GET2(code, 1))
1307
+ { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1308
+ else
1309
+ { ADD_NEW(state_offset, count); }
1310
+ }
1311
+ }
1312
+ break;
1313
+
1314
+ /* ========================================================================== */
1315
+ /* These are virtual opcodes that are used when something like
1316
+ OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
1317
+ argument. It keeps the code above fast for the other cases. The argument
1318
+ is in the d variable. */
1319
+
1320
+ #ifdef SUPPORT_UCP
1321
+ case OP_PROP_EXTRA + OP_TYPEPLUS:
1322
+ case OP_PROP_EXTRA + OP_TYPEMINPLUS:
1323
+ case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
1324
+ count = current_state->count; /* Already matched */
1325
+ if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
1326
+ if (clen > 0)
1327
+ {
1328
+ BOOL OK;
1329
+ const pcre_uint32 *cp;
1330
+ const ucd_record * prop = GET_UCD(c);
1331
+ switch(code[2])
1332
+ {
1333
+ case PT_ANY:
1334
+ OK = TRUE;
1335
+ break;
1336
+
1337
+ case PT_LAMP:
1338
+ OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1339
+ prop->chartype == ucp_Lt;
1340
+ break;
1341
+
1342
+ case PT_GC:
1343
+ OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1344
+ break;
1345
+
1346
+ case PT_PC:
1347
+ OK = prop->chartype == code[3];
1348
+ break;
1349
+
1350
+ case PT_SC:
1351
+ OK = prop->script == code[3];
1352
+ break;
1353
+
1354
+ /* These are specials for combination cases. */
1355
+
1356
+ case PT_ALNUM:
1357
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1358
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1359
+ break;
1360
+
1361
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1362
+ which means that Perl space and POSIX space are now identical. PCRE
1363
+ was changed at release 8.34. */
1364
+
1365
+ case PT_SPACE: /* Perl space */
1366
+ case PT_PXSPACE: /* POSIX space */
1367
+ switch(c)
1368
+ {
1369
+ HSPACE_CASES:
1370
+ VSPACE_CASES:
1371
+ OK = TRUE;
1372
+ break;
1373
+
1374
+ default:
1375
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1376
+ break;
1377
+ }
1378
+ break;
1379
+
1380
+ case PT_WORD:
1381
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1382
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1383
+ c == CHAR_UNDERSCORE;
1384
+ break;
1385
+
1386
+ case PT_CLIST:
1387
+ cp = PRIV(ucd_caseless_sets) + code[3];
1388
+ for (;;)
1389
+ {
1390
+ if (c < *cp) { OK = FALSE; break; }
1391
+ if (c == *cp++) { OK = TRUE; break; }
1392
+ }
1393
+ break;
1394
+
1395
+ case PT_UCNC:
1396
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1397
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1398
+ c >= 0xe000;
1399
+ break;
1400
+
1401
+ /* Should never occur, but keep compilers from grumbling. */
1402
+
1403
+ default:
1404
+ OK = codevalue != OP_PROP;
1405
+ break;
1406
+ }
1407
+
1408
+ if (OK == (d == OP_PROP))
1409
+ {
1410
+ if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1411
+ {
1412
+ active_count--; /* Remove non-match possibility */
1413
+ next_active_state--;
1414
+ }
1415
+ count++;
1416
+ ADD_NEW(state_offset, count);
1417
+ }
1418
+ }
1419
+ break;
1420
+
1421
+ /*-----------------------------------------------------------------*/
1422
+ case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1423
+ case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1424
+ case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1425
+ count = current_state->count; /* Already matched */
1426
+ if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1427
+ if (clen > 0)
1428
+ {
1429
+ int lgb, rgb;
1430
+ const pcre_uchar *nptr = ptr + clen;
1431
+ int ncount = 0;
1432
+ if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1433
+ {
1434
+ active_count--; /* Remove non-match possibility */
1435
+ next_active_state--;
1436
+ }
1437
+ lgb = UCD_GRAPHBREAK(c);
1438
+ while (nptr < end_subject)
1439
+ {
1440
+ dlen = 1;
1441
+ if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1442
+ rgb = UCD_GRAPHBREAK(d);
1443
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1444
+ ncount++;
1445
+ lgb = rgb;
1446
+ nptr += dlen;
1447
+ }
1448
+ count++;
1449
+ ADD_NEW_DATA(-state_offset, count, ncount);
1450
+ }
1451
+ break;
1452
+ #endif
1453
+
1454
+ /*-----------------------------------------------------------------*/
1455
+ case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1456
+ case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1457
+ case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1458
+ count = current_state->count; /* Already matched */
1459
+ if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1460
+ if (clen > 0)
1461
+ {
1462
+ int ncount = 0;
1463
+ switch (c)
1464
+ {
1465
+ case CHAR_VT:
1466
+ case CHAR_FF:
1467
+ case CHAR_NEL:
1468
+ #ifndef EBCDIC
1469
+ case 0x2028:
1470
+ case 0x2029:
1471
+ #endif /* Not EBCDIC */
1472
+ if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1473
+ goto ANYNL01;
1474
+
1475
+ case CHAR_CR:
1476
+ if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
1477
+ /* Fall through */
1478
+
1479
+ ANYNL01:
1480
+ case CHAR_LF:
1481
+ if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1482
+ {
1483
+ active_count--; /* Remove non-match possibility */
1484
+ next_active_state--;
1485
+ }
1486
+ count++;
1487
+ ADD_NEW_DATA(-state_offset, count, ncount);
1488
+ break;
1489
+
1490
+ default:
1491
+ break;
1492
+ }
1493
+ }
1494
+ break;
1495
+
1496
+ /*-----------------------------------------------------------------*/
1497
+ case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1498
+ case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1499
+ case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1500
+ count = current_state->count; /* Already matched */
1501
+ if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1502
+ if (clen > 0)
1503
+ {
1504
+ BOOL OK;
1505
+ switch (c)
1506
+ {
1507
+ VSPACE_CASES:
1508
+ OK = TRUE;
1509
+ break;
1510
+
1511
+ default:
1512
+ OK = FALSE;
1513
+ break;
1514
+ }
1515
+
1516
+ if (OK == (d == OP_VSPACE))
1517
+ {
1518
+ if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1519
+ {
1520
+ active_count--; /* Remove non-match possibility */
1521
+ next_active_state--;
1522
+ }
1523
+ count++;
1524
+ ADD_NEW_DATA(-state_offset, count, 0);
1525
+ }
1526
+ }
1527
+ break;
1528
+
1529
+ /*-----------------------------------------------------------------*/
1530
+ case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1531
+ case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1532
+ case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1533
+ count = current_state->count; /* Already matched */
1534
+ if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1535
+ if (clen > 0)
1536
+ {
1537
+ BOOL OK;
1538
+ switch (c)
1539
+ {
1540
+ HSPACE_CASES:
1541
+ OK = TRUE;
1542
+ break;
1543
+
1544
+ default:
1545
+ OK = FALSE;
1546
+ break;
1547
+ }
1548
+
1549
+ if (OK == (d == OP_HSPACE))
1550
+ {
1551
+ if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1552
+ {
1553
+ active_count--; /* Remove non-match possibility */
1554
+ next_active_state--;
1555
+ }
1556
+ count++;
1557
+ ADD_NEW_DATA(-state_offset, count, 0);
1558
+ }
1559
+ }
1560
+ break;
1561
+
1562
+ /*-----------------------------------------------------------------*/
1563
+ #ifdef SUPPORT_UCP
1564
+ case OP_PROP_EXTRA + OP_TYPEQUERY:
1565
+ case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1566
+ case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1567
+ count = 4;
1568
+ goto QS1;
1569
+
1570
+ case OP_PROP_EXTRA + OP_TYPESTAR:
1571
+ case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1572
+ case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1573
+ count = 0;
1574
+
1575
+ QS1:
1576
+
1577
+ ADD_ACTIVE(state_offset + 4, 0);
1578
+ if (clen > 0)
1579
+ {
1580
+ BOOL OK;
1581
+ const pcre_uint32 *cp;
1582
+ const ucd_record * prop = GET_UCD(c);
1583
+ switch(code[2])
1584
+ {
1585
+ case PT_ANY:
1586
+ OK = TRUE;
1587
+ break;
1588
+
1589
+ case PT_LAMP:
1590
+ OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1591
+ prop->chartype == ucp_Lt;
1592
+ break;
1593
+
1594
+ case PT_GC:
1595
+ OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1596
+ break;
1597
+
1598
+ case PT_PC:
1599
+ OK = prop->chartype == code[3];
1600
+ break;
1601
+
1602
+ case PT_SC:
1603
+ OK = prop->script == code[3];
1604
+ break;
1605
+
1606
+ /* These are specials for combination cases. */
1607
+
1608
+ case PT_ALNUM:
1609
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1610
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1611
+ break;
1612
+
1613
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1614
+ which means that Perl space and POSIX space are now identical. PCRE
1615
+ was changed at release 8.34. */
1616
+
1617
+ case PT_SPACE: /* Perl space */
1618
+ case PT_PXSPACE: /* POSIX space */
1619
+ switch(c)
1620
+ {
1621
+ HSPACE_CASES:
1622
+ VSPACE_CASES:
1623
+ OK = TRUE;
1624
+ break;
1625
+
1626
+ default:
1627
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1628
+ break;
1629
+ }
1630
+ break;
1631
+
1632
+ case PT_WORD:
1633
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1634
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1635
+ c == CHAR_UNDERSCORE;
1636
+ break;
1637
+
1638
+ case PT_CLIST:
1639
+ cp = PRIV(ucd_caseless_sets) + code[3];
1640
+ for (;;)
1641
+ {
1642
+ if (c < *cp) { OK = FALSE; break; }
1643
+ if (c == *cp++) { OK = TRUE; break; }
1644
+ }
1645
+ break;
1646
+
1647
+ case PT_UCNC:
1648
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1649
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1650
+ c >= 0xe000;
1651
+ break;
1652
+
1653
+ /* Should never occur, but keep compilers from grumbling. */
1654
+
1655
+ default:
1656
+ OK = codevalue != OP_PROP;
1657
+ break;
1658
+ }
1659
+
1660
+ if (OK == (d == OP_PROP))
1661
+ {
1662
+ if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1663
+ codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1664
+ {
1665
+ active_count--; /* Remove non-match possibility */
1666
+ next_active_state--;
1667
+ }
1668
+ ADD_NEW(state_offset + count, 0);
1669
+ }
1670
+ }
1671
+ break;
1672
+
1673
+ /*-----------------------------------------------------------------*/
1674
+ case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1675
+ case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1676
+ case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1677
+ count = 2;
1678
+ goto QS2;
1679
+
1680
+ case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1681
+ case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1682
+ case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1683
+ count = 0;
1684
+
1685
+ QS2:
1686
+
1687
+ ADD_ACTIVE(state_offset + 2, 0);
1688
+ if (clen > 0)
1689
+ {
1690
+ int lgb, rgb;
1691
+ const pcre_uchar *nptr = ptr + clen;
1692
+ int ncount = 0;
1693
+ if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1694
+ codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1695
+ {
1696
+ active_count--; /* Remove non-match possibility */
1697
+ next_active_state--;
1698
+ }
1699
+ lgb = UCD_GRAPHBREAK(c);
1700
+ while (nptr < end_subject)
1701
+ {
1702
+ dlen = 1;
1703
+ if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1704
+ rgb = UCD_GRAPHBREAK(d);
1705
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1706
+ ncount++;
1707
+ lgb = rgb;
1708
+ nptr += dlen;
1709
+ }
1710
+ ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1711
+ }
1712
+ break;
1713
+ #endif
1714
+
1715
+ /*-----------------------------------------------------------------*/
1716
+ case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1717
+ case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1718
+ case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1719
+ count = 2;
1720
+ goto QS3;
1721
+
1722
+ case OP_ANYNL_EXTRA + OP_TYPESTAR:
1723
+ case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1724
+ case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1725
+ count = 0;
1726
+
1727
+ QS3:
1728
+ ADD_ACTIVE(state_offset + 2, 0);
1729
+ if (clen > 0)
1730
+ {
1731
+ int ncount = 0;
1732
+ switch (c)
1733
+ {
1734
+ case CHAR_VT:
1735
+ case CHAR_FF:
1736
+ case CHAR_NEL:
1737
+ #ifndef EBCDIC
1738
+ case 0x2028:
1739
+ case 0x2029:
1740
+ #endif /* Not EBCDIC */
1741
+ if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1742
+ goto ANYNL02;
1743
+
1744
+ case CHAR_CR:
1745
+ if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
1746
+ /* Fall through */
1747
+
1748
+ ANYNL02:
1749
+ case CHAR_LF:
1750
+ if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1751
+ codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1752
+ {
1753
+ active_count--; /* Remove non-match possibility */
1754
+ next_active_state--;
1755
+ }
1756
+ ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
1757
+ break;
1758
+
1759
+ default:
1760
+ break;
1761
+ }
1762
+ }
1763
+ break;
1764
+
1765
+ /*-----------------------------------------------------------------*/
1766
+ case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1767
+ case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1768
+ case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1769
+ count = 2;
1770
+ goto QS4;
1771
+
1772
+ case OP_VSPACE_EXTRA + OP_TYPESTAR:
1773
+ case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1774
+ case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1775
+ count = 0;
1776
+
1777
+ QS4:
1778
+ ADD_ACTIVE(state_offset + 2, 0);
1779
+ if (clen > 0)
1780
+ {
1781
+ BOOL OK;
1782
+ switch (c)
1783
+ {
1784
+ VSPACE_CASES:
1785
+ OK = TRUE;
1786
+ break;
1787
+
1788
+ default:
1789
+ OK = FALSE;
1790
+ break;
1791
+ }
1792
+ if (OK == (d == OP_VSPACE))
1793
+ {
1794
+ if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1795
+ codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1796
+ {
1797
+ active_count--; /* Remove non-match possibility */
1798
+ next_active_state--;
1799
+ }
1800
+ ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1801
+ }
1802
+ }
1803
+ break;
1804
+
1805
+ /*-----------------------------------------------------------------*/
1806
+ case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1807
+ case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1808
+ case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1809
+ count = 2;
1810
+ goto QS5;
1811
+
1812
+ case OP_HSPACE_EXTRA + OP_TYPESTAR:
1813
+ case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1814
+ case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1815
+ count = 0;
1816
+
1817
+ QS5:
1818
+ ADD_ACTIVE(state_offset + 2, 0);
1819
+ if (clen > 0)
1820
+ {
1821
+ BOOL OK;
1822
+ switch (c)
1823
+ {
1824
+ HSPACE_CASES:
1825
+ OK = TRUE;
1826
+ break;
1827
+
1828
+ default:
1829
+ OK = FALSE;
1830
+ break;
1831
+ }
1832
+
1833
+ if (OK == (d == OP_HSPACE))
1834
+ {
1835
+ if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1836
+ codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1837
+ {
1838
+ active_count--; /* Remove non-match possibility */
1839
+ next_active_state--;
1840
+ }
1841
+ ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1842
+ }
1843
+ }
1844
+ break;
1845
+
1846
+ /*-----------------------------------------------------------------*/
1847
+ #ifdef SUPPORT_UCP
1848
+ case OP_PROP_EXTRA + OP_TYPEEXACT:
1849
+ case OP_PROP_EXTRA + OP_TYPEUPTO:
1850
+ case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1851
+ case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1852
+ if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1853
+ { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1854
+ count = current_state->count; /* Number already matched */
1855
+ if (clen > 0)
1856
+ {
1857
+ BOOL OK;
1858
+ const pcre_uint32 *cp;
1859
+ const ucd_record * prop = GET_UCD(c);
1860
+ switch(code[1 + IMM2_SIZE + 1])
1861
+ {
1862
+ case PT_ANY:
1863
+ OK = TRUE;
1864
+ break;
1865
+
1866
+ case PT_LAMP:
1867
+ OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1868
+ prop->chartype == ucp_Lt;
1869
+ break;
1870
+
1871
+ case PT_GC:
1872
+ OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1873
+ break;
1874
+
1875
+ case PT_PC:
1876
+ OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1877
+ break;
1878
+
1879
+ case PT_SC:
1880
+ OK = prop->script == code[1 + IMM2_SIZE + 2];
1881
+ break;
1882
+
1883
+ /* These are specials for combination cases. */
1884
+
1885
+ case PT_ALNUM:
1886
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1887
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1888
+ break;
1889
+
1890
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1891
+ which means that Perl space and POSIX space are now identical. PCRE
1892
+ was changed at release 8.34. */
1893
+
1894
+ case PT_SPACE: /* Perl space */
1895
+ case PT_PXSPACE: /* POSIX space */
1896
+ switch(c)
1897
+ {
1898
+ HSPACE_CASES:
1899
+ VSPACE_CASES:
1900
+ OK = TRUE;
1901
+ break;
1902
+
1903
+ default:
1904
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1905
+ break;
1906
+ }
1907
+ break;
1908
+
1909
+ case PT_WORD:
1910
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1911
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1912
+ c == CHAR_UNDERSCORE;
1913
+ break;
1914
+
1915
+ case PT_CLIST:
1916
+ cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
1917
+ for (;;)
1918
+ {
1919
+ if (c < *cp) { OK = FALSE; break; }
1920
+ if (c == *cp++) { OK = TRUE; break; }
1921
+ }
1922
+ break;
1923
+
1924
+ case PT_UCNC:
1925
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1926
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1927
+ c >= 0xe000;
1928
+ break;
1929
+
1930
+ /* Should never occur, but keep compilers from grumbling. */
1931
+
1932
+ default:
1933
+ OK = codevalue != OP_PROP;
1934
+ break;
1935
+ }
1936
+
1937
+ if (OK == (d == OP_PROP))
1938
+ {
1939
+ if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1940
+ {
1941
+ active_count--; /* Remove non-match possibility */
1942
+ next_active_state--;
1943
+ }
1944
+ if (++count >= (int)GET2(code, 1))
1945
+ { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1946
+ else
1947
+ { ADD_NEW(state_offset, count); }
1948
+ }
1949
+ }
1950
+ break;
1951
+
1952
+ /*-----------------------------------------------------------------*/
1953
+ case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1954
+ case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1955
+ case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1956
+ case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1957
+ if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1958
+ { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1959
+ count = current_state->count; /* Number already matched */
1960
+ if (clen > 0)
1961
+ {
1962
+ int lgb, rgb;
1963
+ const pcre_uchar *nptr = ptr + clen;
1964
+ int ncount = 0;
1965
+ if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1966
+ {
1967
+ active_count--; /* Remove non-match possibility */
1968
+ next_active_state--;
1969
+ }
1970
+ lgb = UCD_GRAPHBREAK(c);
1971
+ while (nptr < end_subject)
1972
+ {
1973
+ dlen = 1;
1974
+ if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1975
+ rgb = UCD_GRAPHBREAK(d);
1976
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1977
+ ncount++;
1978
+ lgb = rgb;
1979
+ nptr += dlen;
1980
+ }
1981
+ if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1982
+ reset_could_continue = TRUE;
1983
+ if (++count >= (int)GET2(code, 1))
1984
+ { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1985
+ else
1986
+ { ADD_NEW_DATA(-state_offset, count, ncount); }
1987
+ }
1988
+ break;
1989
+ #endif
1990
+
1991
+ /*-----------------------------------------------------------------*/
1992
+ case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1993
+ case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1994
+ case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1995
+ case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1996
+ if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1997
+ { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1998
+ count = current_state->count; /* Number already matched */
1999
+ if (clen > 0)
2000
+ {
2001
+ int ncount = 0;
2002
+ switch (c)
2003
+ {
2004
+ case CHAR_VT:
2005
+ case CHAR_FF:
2006
+ case CHAR_NEL:
2007
+ #ifndef EBCDIC
2008
+ case 0x2028:
2009
+ case 0x2029:
2010
+ #endif /* Not EBCDIC */
2011
+ if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2012
+ goto ANYNL03;
2013
+
2014
+ case CHAR_CR:
2015
+ if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
2016
+ /* Fall through */
2017
+
2018
+ ANYNL03:
2019
+ case CHAR_LF:
2020
+ if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
2021
+ {
2022
+ active_count--; /* Remove non-match possibility */
2023
+ next_active_state--;
2024
+ }
2025
+ if (++count >= (int)GET2(code, 1))
2026
+ { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
2027
+ else
2028
+ { ADD_NEW_DATA(-state_offset, count, ncount); }
2029
+ break;
2030
+
2031
+ default:
2032
+ break;
2033
+ }
2034
+ }
2035
+ break;
2036
+
2037
+ /*-----------------------------------------------------------------*/
2038
+ case OP_VSPACE_EXTRA + OP_TYPEEXACT:
2039
+ case OP_VSPACE_EXTRA + OP_TYPEUPTO:
2040
+ case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
2041
+ case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
2042
+ if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
2043
+ { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2044
+ count = current_state->count; /* Number already matched */
2045
+ if (clen > 0)
2046
+ {
2047
+ BOOL OK;
2048
+ switch (c)
2049
+ {
2050
+ VSPACE_CASES:
2051
+ OK = TRUE;
2052
+ break;
2053
+
2054
+ default:
2055
+ OK = FALSE;
2056
+ }
2057
+
2058
+ if (OK == (d == OP_VSPACE))
2059
+ {
2060
+ if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
2061
+ {
2062
+ active_count--; /* Remove non-match possibility */
2063
+ next_active_state--;
2064
+ }
2065
+ if (++count >= (int)GET2(code, 1))
2066
+ { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2067
+ else
2068
+ { ADD_NEW_DATA(-state_offset, count, 0); }
2069
+ }
2070
+ }
2071
+ break;
2072
+
2073
+ /*-----------------------------------------------------------------*/
2074
+ case OP_HSPACE_EXTRA + OP_TYPEEXACT:
2075
+ case OP_HSPACE_EXTRA + OP_TYPEUPTO:
2076
+ case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2077
+ case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2078
+ if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2079
+ { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2080
+ count = current_state->count; /* Number already matched */
2081
+ if (clen > 0)
2082
+ {
2083
+ BOOL OK;
2084
+ switch (c)
2085
+ {
2086
+ HSPACE_CASES:
2087
+ OK = TRUE;
2088
+ break;
2089
+
2090
+ default:
2091
+ OK = FALSE;
2092
+ break;
2093
+ }
2094
+
2095
+ if (OK == (d == OP_HSPACE))
2096
+ {
2097
+ if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
2098
+ {
2099
+ active_count--; /* Remove non-match possibility */
2100
+ next_active_state--;
2101
+ }
2102
+ if (++count >= (int)GET2(code, 1))
2103
+ { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2104
+ else
2105
+ { ADD_NEW_DATA(-state_offset, count, 0); }
2106
+ }
2107
+ }
2108
+ break;
2109
+
2110
+ /* ========================================================================== */
2111
+ /* These opcodes are followed by a character that is usually compared
2112
+ to the current subject character; it is loaded into d. We still get
2113
+ here even if there is no subject character, because in some cases zero
2114
+ repetitions are permitted. */
2115
+
2116
+ /*-----------------------------------------------------------------*/
2117
+ case OP_CHAR:
2118
+ if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
2119
+ break;
2120
+
2121
+ /*-----------------------------------------------------------------*/
2122
+ case OP_CHARI:
2123
+ if (clen == 0) break;
2124
+
2125
+ #ifdef SUPPORT_UTF
2126
+ if (utf)
2127
+ {
2128
+ if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2129
+ {
2130
+ unsigned int othercase;
2131
+ if (c < 128)
2132
+ othercase = fcc[c];
2133
+ else
2134
+ /* If we have Unicode property support, we can use it to test the
2135
+ other case of the character. */
2136
+ #ifdef SUPPORT_UCP
2137
+ othercase = UCD_OTHERCASE(c);
2138
+ #else
2139
+ othercase = NOTACHAR;
2140
+ #endif
2141
+
2142
+ if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2143
+ }
2144
+ }
2145
+ else
2146
+ #endif /* SUPPORT_UTF */
2147
+ /* Not UTF mode */
2148
+ {
2149
+ if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2150
+ { ADD_NEW(state_offset + 2, 0); }
2151
+ }
2152
+ break;
2153
+
2154
+
2155
+ #ifdef SUPPORT_UCP
2156
+ /*-----------------------------------------------------------------*/
2157
+ /* This is a tricky one because it can match more than one character.
2158
+ Find out how many characters to skip, and then set up a negative state
2159
+ to wait for them to pass before continuing. */
2160
+
2161
+ case OP_EXTUNI:
2162
+ if (clen > 0)
2163
+ {
2164
+ int lgb, rgb;
2165
+ const pcre_uchar *nptr = ptr + clen;
2166
+ int ncount = 0;
2167
+ lgb = UCD_GRAPHBREAK(c);
2168
+ while (nptr < end_subject)
2169
+ {
2170
+ dlen = 1;
2171
+ if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2172
+ rgb = UCD_GRAPHBREAK(d);
2173
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2174
+ ncount++;
2175
+ lgb = rgb;
2176
+ nptr += dlen;
2177
+ }
2178
+ if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2179
+ reset_could_continue = TRUE;
2180
+ ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2181
+ }
2182
+ break;
2183
+ #endif
2184
+
2185
+ /*-----------------------------------------------------------------*/
2186
+ /* This is a tricky like EXTUNI because it too can match more than one
2187
+ character (when CR is followed by LF). In this case, set up a negative
2188
+ state to wait for one character to pass before continuing. */
2189
+
2190
+ case OP_ANYNL:
2191
+ if (clen > 0) switch(c)
2192
+ {
2193
+ case CHAR_VT:
2194
+ case CHAR_FF:
2195
+ case CHAR_NEL:
2196
+ #ifndef EBCDIC
2197
+ case 0x2028:
2198
+ case 0x2029:
2199
+ #endif /* Not EBCDIC */
2200
+ if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2201
+
2202
+ case CHAR_LF:
2203
+ ADD_NEW(state_offset + 1, 0);
2204
+ break;
2205
+
2206
+ case CHAR_CR:
2207
+ if (ptr + 1 >= end_subject)
2208
+ {
2209
+ ADD_NEW(state_offset + 1, 0);
2210
+ if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2211
+ reset_could_continue = TRUE;
2212
+ }
2213
+ else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
2214
+ {
2215
+ ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2216
+ }
2217
+ else
2218
+ {
2219
+ ADD_NEW(state_offset + 1, 0);
2220
+ }
2221
+ break;
2222
+ }
2223
+ break;
2224
+
2225
+ /*-----------------------------------------------------------------*/
2226
+ case OP_NOT_VSPACE:
2227
+ if (clen > 0) switch(c)
2228
+ {
2229
+ VSPACE_CASES:
2230
+ break;
2231
+
2232
+ default:
2233
+ ADD_NEW(state_offset + 1, 0);
2234
+ break;
2235
+ }
2236
+ break;
2237
+
2238
+ /*-----------------------------------------------------------------*/
2239
+ case OP_VSPACE:
2240
+ if (clen > 0) switch(c)
2241
+ {
2242
+ VSPACE_CASES:
2243
+ ADD_NEW(state_offset + 1, 0);
2244
+ break;
2245
+
2246
+ default:
2247
+ break;
2248
+ }
2249
+ break;
2250
+
2251
+ /*-----------------------------------------------------------------*/
2252
+ case OP_NOT_HSPACE:
2253
+ if (clen > 0) switch(c)
2254
+ {
2255
+ HSPACE_CASES:
2256
+ break;
2257
+
2258
+ default:
2259
+ ADD_NEW(state_offset + 1, 0);
2260
+ break;
2261
+ }
2262
+ break;
2263
+
2264
+ /*-----------------------------------------------------------------*/
2265
+ case OP_HSPACE:
2266
+ if (clen > 0) switch(c)
2267
+ {
2268
+ HSPACE_CASES:
2269
+ ADD_NEW(state_offset + 1, 0);
2270
+ break;
2271
+
2272
+ default:
2273
+ break;
2274
+ }
2275
+ break;
2276
+
2277
+ /*-----------------------------------------------------------------*/
2278
+ /* Match a negated single character casefully. */
2279
+
2280
+ case OP_NOT:
2281
+ if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2282
+ break;
2283
+
2284
+ /*-----------------------------------------------------------------*/
2285
+ /* Match a negated single character caselessly. */
2286
+
2287
+ case OP_NOTI:
2288
+ if (clen > 0)
2289
+ {
2290
+ pcre_uint32 otherd;
2291
+ #ifdef SUPPORT_UTF
2292
+ if (utf && d >= 128)
2293
+ {
2294
+ #ifdef SUPPORT_UCP
2295
+ otherd = UCD_OTHERCASE(d);
2296
+ #else
2297
+ otherd = d;
2298
+ #endif /* SUPPORT_UCP */
2299
+ }
2300
+ else
2301
+ #endif /* SUPPORT_UTF */
2302
+ otherd = TABLE_GET(d, fcc, d);
2303
+ if (c != d && c != otherd)
2304
+ { ADD_NEW(state_offset + dlen + 1, 0); }
2305
+ }
2306
+ break;
2307
+
2308
+ /*-----------------------------------------------------------------*/
2309
+ case OP_PLUSI:
2310
+ case OP_MINPLUSI:
2311
+ case OP_POSPLUSI:
2312
+ case OP_NOTPLUSI:
2313
+ case OP_NOTMINPLUSI:
2314
+ case OP_NOTPOSPLUSI:
2315
+ caseless = TRUE;
2316
+ codevalue -= OP_STARI - OP_STAR;
2317
+
2318
+ /* Fall through */
2319
+ case OP_PLUS:
2320
+ case OP_MINPLUS:
2321
+ case OP_POSPLUS:
2322
+ case OP_NOTPLUS:
2323
+ case OP_NOTMINPLUS:
2324
+ case OP_NOTPOSPLUS:
2325
+ count = current_state->count; /* Already matched */
2326
+ if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2327
+ if (clen > 0)
2328
+ {
2329
+ pcre_uint32 otherd = NOTACHAR;
2330
+ if (caseless)
2331
+ {
2332
+ #ifdef SUPPORT_UTF
2333
+ if (utf && d >= 128)
2334
+ {
2335
+ #ifdef SUPPORT_UCP
2336
+ otherd = UCD_OTHERCASE(d);
2337
+ #endif /* SUPPORT_UCP */
2338
+ }
2339
+ else
2340
+ #endif /* SUPPORT_UTF */
2341
+ otherd = TABLE_GET(d, fcc, d);
2342
+ }
2343
+ if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2344
+ {
2345
+ if (count > 0 &&
2346
+ (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
2347
+ {
2348
+ active_count--; /* Remove non-match possibility */
2349
+ next_active_state--;
2350
+ }
2351
+ count++;
2352
+ ADD_NEW(state_offset, count);
2353
+ }
2354
+ }
2355
+ break;
2356
+
2357
+ /*-----------------------------------------------------------------*/
2358
+ case OP_QUERYI:
2359
+ case OP_MINQUERYI:
2360
+ case OP_POSQUERYI:
2361
+ case OP_NOTQUERYI:
2362
+ case OP_NOTMINQUERYI:
2363
+ case OP_NOTPOSQUERYI:
2364
+ caseless = TRUE;
2365
+ codevalue -= OP_STARI - OP_STAR;
2366
+ /* Fall through */
2367
+ case OP_QUERY:
2368
+ case OP_MINQUERY:
2369
+ case OP_POSQUERY:
2370
+ case OP_NOTQUERY:
2371
+ case OP_NOTMINQUERY:
2372
+ case OP_NOTPOSQUERY:
2373
+ ADD_ACTIVE(state_offset + dlen + 1, 0);
2374
+ if (clen > 0)
2375
+ {
2376
+ pcre_uint32 otherd = NOTACHAR;
2377
+ if (caseless)
2378
+ {
2379
+ #ifdef SUPPORT_UTF
2380
+ if (utf && d >= 128)
2381
+ {
2382
+ #ifdef SUPPORT_UCP
2383
+ otherd = UCD_OTHERCASE(d);
2384
+ #endif /* SUPPORT_UCP */
2385
+ }
2386
+ else
2387
+ #endif /* SUPPORT_UTF */
2388
+ otherd = TABLE_GET(d, fcc, d);
2389
+ }
2390
+ if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2391
+ {
2392
+ if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
2393
+ {
2394
+ active_count--; /* Remove non-match possibility */
2395
+ next_active_state--;
2396
+ }
2397
+ ADD_NEW(state_offset + dlen + 1, 0);
2398
+ }
2399
+ }
2400
+ break;
2401
+
2402
+ /*-----------------------------------------------------------------*/
2403
+ case OP_STARI:
2404
+ case OP_MINSTARI:
2405
+ case OP_POSSTARI:
2406
+ case OP_NOTSTARI:
2407
+ case OP_NOTMINSTARI:
2408
+ case OP_NOTPOSSTARI:
2409
+ caseless = TRUE;
2410
+ codevalue -= OP_STARI - OP_STAR;
2411
+ /* Fall through */
2412
+ case OP_STAR:
2413
+ case OP_MINSTAR:
2414
+ case OP_POSSTAR:
2415
+ case OP_NOTSTAR:
2416
+ case OP_NOTMINSTAR:
2417
+ case OP_NOTPOSSTAR:
2418
+ ADD_ACTIVE(state_offset + dlen + 1, 0);
2419
+ if (clen > 0)
2420
+ {
2421
+ pcre_uint32 otherd = NOTACHAR;
2422
+ if (caseless)
2423
+ {
2424
+ #ifdef SUPPORT_UTF
2425
+ if (utf && d >= 128)
2426
+ {
2427
+ #ifdef SUPPORT_UCP
2428
+ otherd = UCD_OTHERCASE(d);
2429
+ #endif /* SUPPORT_UCP */
2430
+ }
2431
+ else
2432
+ #endif /* SUPPORT_UTF */
2433
+ otherd = TABLE_GET(d, fcc, d);
2434
+ }
2435
+ if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2436
+ {
2437
+ if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
2438
+ {
2439
+ active_count--; /* Remove non-match possibility */
2440
+ next_active_state--;
2441
+ }
2442
+ ADD_NEW(state_offset, 0);
2443
+ }
2444
+ }
2445
+ break;
2446
+
2447
+ /*-----------------------------------------------------------------*/
2448
+ case OP_EXACTI:
2449
+ case OP_NOTEXACTI:
2450
+ caseless = TRUE;
2451
+ codevalue -= OP_STARI - OP_STAR;
2452
+ /* Fall through */
2453
+ case OP_EXACT:
2454
+ case OP_NOTEXACT:
2455
+ count = current_state->count; /* Number already matched */
2456
+ if (clen > 0)
2457
+ {
2458
+ pcre_uint32 otherd = NOTACHAR;
2459
+ if (caseless)
2460
+ {
2461
+ #ifdef SUPPORT_UTF
2462
+ if (utf && d >= 128)
2463
+ {
2464
+ #ifdef SUPPORT_UCP
2465
+ otherd = UCD_OTHERCASE(d);
2466
+ #endif /* SUPPORT_UCP */
2467
+ }
2468
+ else
2469
+ #endif /* SUPPORT_UTF */
2470
+ otherd = TABLE_GET(d, fcc, d);
2471
+ }
2472
+ if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2473
+ {
2474
+ if (++count >= (int)GET2(code, 1))
2475
+ { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2476
+ else
2477
+ { ADD_NEW(state_offset, count); }
2478
+ }
2479
+ }
2480
+ break;
2481
+
2482
+ /*-----------------------------------------------------------------*/
2483
+ case OP_UPTOI:
2484
+ case OP_MINUPTOI:
2485
+ case OP_POSUPTOI:
2486
+ case OP_NOTUPTOI:
2487
+ case OP_NOTMINUPTOI:
2488
+ case OP_NOTPOSUPTOI:
2489
+ caseless = TRUE;
2490
+ codevalue -= OP_STARI - OP_STAR;
2491
+ /* Fall through */
2492
+ case OP_UPTO:
2493
+ case OP_MINUPTO:
2494
+ case OP_POSUPTO:
2495
+ case OP_NOTUPTO:
2496
+ case OP_NOTMINUPTO:
2497
+ case OP_NOTPOSUPTO:
2498
+ ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2499
+ count = current_state->count; /* Number already matched */
2500
+ if (clen > 0)
2501
+ {
2502
+ pcre_uint32 otherd = NOTACHAR;
2503
+ if (caseless)
2504
+ {
2505
+ #ifdef SUPPORT_UTF
2506
+ if (utf && d >= 128)
2507
+ {
2508
+ #ifdef SUPPORT_UCP
2509
+ otherd = UCD_OTHERCASE(d);
2510
+ #endif /* SUPPORT_UCP */
2511
+ }
2512
+ else
2513
+ #endif /* SUPPORT_UTF */
2514
+ otherd = TABLE_GET(d, fcc, d);
2515
+ }
2516
+ if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2517
+ {
2518
+ if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2519
+ {
2520
+ active_count--; /* Remove non-match possibility */
2521
+ next_active_state--;
2522
+ }
2523
+ if (++count >= (int)GET2(code, 1))
2524
+ { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2525
+ else
2526
+ { ADD_NEW(state_offset, count); }
2527
+ }
2528
+ }
2529
+ break;
2530
+
2531
+
2532
+ /* ========================================================================== */
2533
+ /* These are the class-handling opcodes */
2534
+
2535
+ case OP_CLASS:
2536
+ case OP_NCLASS:
2537
+ case OP_XCLASS:
2538
+ {
2539
+ BOOL isinclass = FALSE;
2540
+ int next_state_offset;
2541
+ const pcre_uchar *ecode;
2542
+
2543
+ /* For a simple class, there is always just a 32-byte table, and we
2544
+ can set isinclass from it. */
2545
+
2546
+ if (codevalue != OP_XCLASS)
2547
+ {
2548
+ ecode = code + 1 + (32 / sizeof(pcre_uchar));
2549
+ if (clen > 0)
2550
+ {
2551
+ isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2552
+ ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2553
+ }
2554
+ }
2555
+
2556
+ /* An extended class may have a table or a list of single characters,
2557
+ ranges, or both, and it may be positive or negative. There's a
2558
+ function that sorts all this out. */
2559
+
2560
+ else
2561
+ {
2562
+ ecode = code + GET(code, 1);
2563
+ if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2564
+ }
2565
+
2566
+ /* At this point, isinclass is set for all kinds of class, and ecode
2567
+ points to the byte after the end of the class. If there is a
2568
+ quantifier, this is where it will be. */
2569
+
2570
+ next_state_offset = (int)(ecode - start_code);
2571
+
2572
+ switch (*ecode)
2573
+ {
2574
+ case OP_CRSTAR:
2575
+ case OP_CRMINSTAR:
2576
+ case OP_CRPOSSTAR:
2577
+ ADD_ACTIVE(next_state_offset + 1, 0);
2578
+ if (isinclass)
2579
+ {
2580
+ if (*ecode == OP_CRPOSSTAR)
2581
+ {
2582
+ active_count--; /* Remove non-match possibility */
2583
+ next_active_state--;
2584
+ }
2585
+ ADD_NEW(state_offset, 0);
2586
+ }
2587
+ break;
2588
+
2589
+ case OP_CRPLUS:
2590
+ case OP_CRMINPLUS:
2591
+ case OP_CRPOSPLUS:
2592
+ count = current_state->count; /* Already matched */
2593
+ if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
2594
+ if (isinclass)
2595
+ {
2596
+ if (count > 0 && *ecode == OP_CRPOSPLUS)
2597
+ {
2598
+ active_count--; /* Remove non-match possibility */
2599
+ next_active_state--;
2600
+ }
2601
+ count++;
2602
+ ADD_NEW(state_offset, count);
2603
+ }
2604
+ break;
2605
+
2606
+ case OP_CRQUERY:
2607
+ case OP_CRMINQUERY:
2608
+ case OP_CRPOSQUERY:
2609
+ ADD_ACTIVE(next_state_offset + 1, 0);
2610
+ if (isinclass)
2611
+ {
2612
+ if (*ecode == OP_CRPOSQUERY)
2613
+ {
2614
+ active_count--; /* Remove non-match possibility */
2615
+ next_active_state--;
2616
+ }
2617
+ ADD_NEW(next_state_offset + 1, 0);
2618
+ }
2619
+ break;
2620
+
2621
+ case OP_CRRANGE:
2622
+ case OP_CRMINRANGE:
2623
+ case OP_CRPOSRANGE:
2624
+ count = current_state->count; /* Already matched */
2625
+ if (count >= (int)GET2(ecode, 1))
2626
+ { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2627
+ if (isinclass)
2628
+ {
2629
+ int max = (int)GET2(ecode, 1 + IMM2_SIZE);
2630
+ if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
2631
+ {
2632
+ active_count--; /* Remove non-match possibility */
2633
+ next_active_state--;
2634
+ }
2635
+ if (++count >= max && max != 0) /* Max 0 => no limit */
2636
+ { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2637
+ else
2638
+ { ADD_NEW(state_offset, count); }
2639
+ }
2640
+ break;
2641
+
2642
+ default:
2643
+ if (isinclass) { ADD_NEW(next_state_offset, 0); }
2644
+ break;
2645
+ }
2646
+ }
2647
+ break;
2648
+
2649
+ /* ========================================================================== */
2650
+ /* These are the opcodes for fancy brackets of various kinds. We have
2651
+ to use recursion in order to handle them. The "always failing" assertion
2652
+ (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2653
+ though the other "backtracking verbs" are not supported. */
2654
+
2655
+ case OP_FAIL:
2656
+ forced_fail++; /* Count FAILs for multiple states */
2657
+ break;
2658
+
2659
+ case OP_ASSERT:
2660
+ case OP_ASSERT_NOT:
2661
+ case OP_ASSERTBACK:
2662
+ case OP_ASSERTBACK_NOT:
2663
+ {
2664
+ int rc;
2665
+ int local_offsets[2];
2666
+ int local_workspace[1000];
2667
+ const pcre_uchar *endasscode = code + GET(code, 1);
2668
+
2669
+ while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2670
+
2671
+ rc = internal_dfa_exec(
2672
+ md, /* static match data */
2673
+ code, /* this subexpression's code */
2674
+ ptr, /* where we currently are */
2675
+ (int)(ptr - start_subject), /* start offset */
2676
+ local_offsets, /* offset vector */
2677
+ sizeof(local_offsets)/sizeof(int), /* size of same */
2678
+ local_workspace, /* workspace vector */
2679
+ sizeof(local_workspace)/sizeof(int), /* size of same */
2680
+ rlevel); /* function recursion level */
2681
+
2682
+ if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2683
+ if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2684
+ { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2685
+ }
2686
+ break;
2687
+
2688
+ /*-----------------------------------------------------------------*/
2689
+ case OP_COND:
2690
+ case OP_SCOND:
2691
+ {
2692
+ int local_offsets[1000];
2693
+ int local_workspace[1000];
2694
+ int codelink = GET(code, 1);
2695
+ int condcode;
2696
+
2697
+ /* Because of the way auto-callout works during compile, a callout item
2698
+ is inserted between OP_COND and an assertion condition. This does not
2699
+ happen for the other conditions. */
2700
+
2701
+ if (code[LINK_SIZE+1] == OP_CALLOUT)
2702
+ {
2703
+ rrc = 0;
2704
+ if (PUBL(callout) != NULL)
2705
+ {
2706
+ PUBL(callout_block) cb;
2707
+ cb.version = 1; /* Version 1 of the callout block */
2708
+ cb.callout_number = code[LINK_SIZE+2];
2709
+ cb.offset_vector = offsets;
2710
+ #if defined COMPILE_PCRE8
2711
+ cb.subject = (PCRE_SPTR)start_subject;
2712
+ #elif defined COMPILE_PCRE16
2713
+ cb.subject = (PCRE_SPTR16)start_subject;
2714
+ #elif defined COMPILE_PCRE32
2715
+ cb.subject = (PCRE_SPTR32)start_subject;
2716
+ #endif
2717
+ cb.subject_length = (int)(end_subject - start_subject);
2718
+ cb.start_match = (int)(current_subject - start_subject);
2719
+ cb.current_position = (int)(ptr - start_subject);
2720
+ cb.pattern_position = GET(code, LINK_SIZE + 3);
2721
+ cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2722
+ cb.capture_top = 1;
2723
+ cb.capture_last = -1;
2724
+ cb.callout_data = md->callout_data;
2725
+ cb.mark = NULL; /* No (*MARK) support */
2726
+ if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */
2727
+ }
2728
+ if (rrc > 0) break; /* Fail this thread */
2729
+ code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */
2730
+ }
2731
+
2732
+ condcode = code[LINK_SIZE+1];
2733
+
2734
+ /* Back reference conditions and duplicate named recursion conditions
2735
+ are not supported */
2736
+
2737
+ if (condcode == OP_CREF || condcode == OP_DNCREF ||
2738
+ condcode == OP_DNRREF)
2739
+ return PCRE_ERROR_DFA_UCOND;
2740
+
2741
+ /* The DEFINE condition is always false, and the assertion (?!) is
2742
+ converted to OP_FAIL. */
2743
+
2744
+ if (condcode == OP_DEF || condcode == OP_FAIL)
2745
+ { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2746
+
2747
+ /* The only supported version of OP_RREF is for the value RREF_ANY,
2748
+ which means "test if in any recursion". We can't test for specifically
2749
+ recursed groups. */
2750
+
2751
+ else if (condcode == OP_RREF)
2752
+ {
2753
+ int value = GET2(code, LINK_SIZE + 2);
2754
+ if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2755
+ if (md->recursive != NULL)
2756
+ { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2757
+ else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2758
+ }
2759
+
2760
+ /* Otherwise, the condition is an assertion */
2761
+
2762
+ else
2763
+ {
2764
+ int rc;
2765
+ const pcre_uchar *asscode = code + LINK_SIZE + 1;
2766
+ const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2767
+
2768
+ while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2769
+
2770
+ rc = internal_dfa_exec(
2771
+ md, /* fixed match data */
2772
+ asscode, /* this subexpression's code */
2773
+ ptr, /* where we currently are */
2774
+ (int)(ptr - start_subject), /* start offset */
2775
+ local_offsets, /* offset vector */
2776
+ sizeof(local_offsets)/sizeof(int), /* size of same */
2777
+ local_workspace, /* workspace vector */
2778
+ sizeof(local_workspace)/sizeof(int), /* size of same */
2779
+ rlevel); /* function recursion level */
2780
+
2781
+ if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2782
+ if ((rc >= 0) ==
2783
+ (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2784
+ { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2785
+ else
2786
+ { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2787
+ }
2788
+ }
2789
+ break;
2790
+
2791
+ /*-----------------------------------------------------------------*/
2792
+ case OP_RECURSE:
2793
+ {
2794
+ dfa_recursion_info *ri;
2795
+ int local_offsets[1000];
2796
+ int local_workspace[1000];
2797
+ const pcre_uchar *callpat = start_code + GET(code, 1);
2798
+ int recno = (callpat == md->start_code)? 0 :
2799
+ GET2(callpat, 1 + LINK_SIZE);
2800
+ int rc;
2801
+
2802
+ DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2803
+
2804
+ /* Check for repeating a recursion without advancing the subject
2805
+ pointer. This should catch convoluted mutual recursions. (Some simple
2806
+ cases are caught at compile time.) */
2807
+
2808
+ for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2809
+ if (recno == ri->group_num && ptr == ri->subject_position)
2810
+ return PCRE_ERROR_RECURSELOOP;
2811
+
2812
+ /* Remember this recursion and where we started it so as to
2813
+ catch infinite loops. */
2814
+
2815
+ new_recursive.group_num = recno;
2816
+ new_recursive.subject_position = ptr;
2817
+ new_recursive.prevrec = md->recursive;
2818
+ md->recursive = &new_recursive;
2819
+
2820
+ rc = internal_dfa_exec(
2821
+ md, /* fixed match data */
2822
+ callpat, /* this subexpression's code */
2823
+ ptr, /* where we currently are */
2824
+ (int)(ptr - start_subject), /* start offset */
2825
+ local_offsets, /* offset vector */
2826
+ sizeof(local_offsets)/sizeof(int), /* size of same */
2827
+ local_workspace, /* workspace vector */
2828
+ sizeof(local_workspace)/sizeof(int), /* size of same */
2829
+ rlevel); /* function recursion level */
2830
+
2831
+ md->recursive = new_recursive.prevrec; /* Done this recursion */
2832
+
2833
+ DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2834
+ rc));
2835
+
2836
+ /* Ran out of internal offsets */
2837
+
2838
+ if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
2839
+
2840
+ /* For each successful matched substring, set up the next state with a
2841
+ count of characters to skip before trying it. Note that the count is in
2842
+ characters, not bytes. */
2843
+
2844
+ if (rc > 0)
2845
+ {
2846
+ for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2847
+ {
2848
+ int charcount = local_offsets[rc+1] - local_offsets[rc];
2849
+ #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2850
+ if (utf)
2851
+ {
2852
+ const pcre_uchar *p = start_subject + local_offsets[rc];
2853
+ const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2854
+ while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2855
+ }
2856
+ #endif
2857
+ if (charcount > 0)
2858
+ {
2859
+ ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
2860
+ }
2861
+ else
2862
+ {
2863
+ ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
2864
+ }
2865
+ }
2866
+ }
2867
+ else if (rc != PCRE_ERROR_NOMATCH) return rc;
2868
+ }
2869
+ break;
2870
+
2871
+ /*-----------------------------------------------------------------*/
2872
+ case OP_BRAPOS:
2873
+ case OP_SBRAPOS:
2874
+ case OP_CBRAPOS:
2875
+ case OP_SCBRAPOS:
2876
+ case OP_BRAPOSZERO:
2877
+ {
2878
+ int charcount, matched_count;
2879
+ const pcre_uchar *local_ptr = ptr;
2880
+ BOOL allow_zero;
2881
+
2882
+ if (codevalue == OP_BRAPOSZERO)
2883
+ {
2884
+ allow_zero = TRUE;
2885
+ codevalue = *(++code); /* Codevalue will be one of above BRAs */
2886
+ }
2887
+ else allow_zero = FALSE;
2888
+
2889
+ /* Loop to match the subpattern as many times as possible as if it were
2890
+ a complete pattern. */
2891
+
2892
+ for (matched_count = 0;; matched_count++)
2893
+ {
2894
+ int local_offsets[2];
2895
+ int local_workspace[1000];
2896
+
2897
+ int rc = internal_dfa_exec(
2898
+ md, /* fixed match data */
2899
+ code, /* this subexpression's code */
2900
+ local_ptr, /* where we currently are */
2901
+ (int)(ptr - start_subject), /* start offset */
2902
+ local_offsets, /* offset vector */
2903
+ sizeof(local_offsets)/sizeof(int), /* size of same */
2904
+ local_workspace, /* workspace vector */
2905
+ sizeof(local_workspace)/sizeof(int), /* size of same */
2906
+ rlevel); /* function recursion level */
2907
+
2908
+ /* Failed to match */
2909
+
2910
+ if (rc < 0)
2911
+ {
2912
+ if (rc != PCRE_ERROR_NOMATCH) return rc;
2913
+ break;
2914
+ }
2915
+
2916
+ /* Matched: break the loop if zero characters matched. */
2917
+
2918
+ charcount = local_offsets[1] - local_offsets[0];
2919
+ if (charcount == 0) break;
2920
+ local_ptr += charcount; /* Advance temporary position ptr */
2921
+ }
2922
+
2923
+ /* At this point we have matched the subpattern matched_count
2924
+ times, and local_ptr is pointing to the character after the end of the
2925
+ last match. */
2926
+
2927
+ if (matched_count > 0 || allow_zero)
2928
+ {
2929
+ const pcre_uchar *end_subpattern = code;
2930
+ int next_state_offset;
2931
+
2932
+ do { end_subpattern += GET(end_subpattern, 1); }
2933
+ while (*end_subpattern == OP_ALT);
2934
+ next_state_offset =
2935
+ (int)(end_subpattern - start_code + LINK_SIZE + 1);
2936
+
2937
+ /* Optimization: if there are no more active states, and there
2938
+ are no new states yet set up, then skip over the subject string
2939
+ right here, to save looping. Otherwise, set up the new state to swing
2940
+ into action when the end of the matched substring is reached. */
2941
+
2942
+ if (i + 1 >= active_count && new_count == 0)
2943
+ {
2944
+ ptr = local_ptr;
2945
+ clen = 0;
2946
+ ADD_NEW(next_state_offset, 0);
2947
+ }
2948
+ else
2949
+ {
2950
+ const pcre_uchar *p = ptr;
2951
+ const pcre_uchar *pp = local_ptr;
2952
+ charcount = (int)(pp - p);
2953
+ #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2954
+ if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2955
+ #endif
2956
+ ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2957
+ }
2958
+ }
2959
+ }
2960
+ break;
2961
+
2962
+ /*-----------------------------------------------------------------*/
2963
+ case OP_ONCE:
2964
+ case OP_ONCE_NC:
2965
+ {
2966
+ int local_offsets[2];
2967
+ int local_workspace[1000];
2968
+
2969
+ int rc = internal_dfa_exec(
2970
+ md, /* fixed match data */
2971
+ code, /* this subexpression's code */
2972
+ ptr, /* where we currently are */
2973
+ (int)(ptr - start_subject), /* start offset */
2974
+ local_offsets, /* offset vector */
2975
+ sizeof(local_offsets)/sizeof(int), /* size of same */
2976
+ local_workspace, /* workspace vector */
2977
+ sizeof(local_workspace)/sizeof(int), /* size of same */
2978
+ rlevel); /* function recursion level */
2979
+
2980
+ if (rc >= 0)
2981
+ {
2982
+ const pcre_uchar *end_subpattern = code;
2983
+ int charcount = local_offsets[1] - local_offsets[0];
2984
+ int next_state_offset, repeat_state_offset;
2985
+
2986
+ do { end_subpattern += GET(end_subpattern, 1); }
2987
+ while (*end_subpattern == OP_ALT);
2988
+ next_state_offset =
2989
+ (int)(end_subpattern - start_code + LINK_SIZE + 1);
2990
+
2991
+ /* If the end of this subpattern is KETRMAX or KETRMIN, we must
2992
+ arrange for the repeat state also to be added to the relevant list.
2993
+ Calculate the offset, or set -1 for no repeat. */
2994
+
2995
+ repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
2996
+ *end_subpattern == OP_KETRMIN)?
2997
+ (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
2998
+
2999
+ /* If we have matched an empty string, add the next state at the
3000
+ current character pointer. This is important so that the duplicate
3001
+ checking kicks in, which is what breaks infinite loops that match an
3002
+ empty string. */
3003
+
3004
+ if (charcount == 0)
3005
+ {
3006
+ ADD_ACTIVE(next_state_offset, 0);
3007
+ }
3008
+
3009
+ /* Optimization: if there are no more active states, and there
3010
+ are no new states yet set up, then skip over the subject string
3011
+ right here, to save looping. Otherwise, set up the new state to swing
3012
+ into action when the end of the matched substring is reached. */
3013
+
3014
+ else if (i + 1 >= active_count && new_count == 0)
3015
+ {
3016
+ ptr += charcount;
3017
+ clen = 0;
3018
+ ADD_NEW(next_state_offset, 0);
3019
+
3020
+ /* If we are adding a repeat state at the new character position,
3021
+ we must fudge things so that it is the only current state.
3022
+ Otherwise, it might be a duplicate of one we processed before, and
3023
+ that would cause it to be skipped. */
3024
+
3025
+ if (repeat_state_offset >= 0)
3026
+ {
3027
+ next_active_state = active_states;
3028
+ active_count = 0;
3029
+ i = -1;
3030
+ ADD_ACTIVE(repeat_state_offset, 0);
3031
+ }
3032
+ }
3033
+ else
3034
+ {
3035
+ #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3036
+ if (utf)
3037
+ {
3038
+ const pcre_uchar *p = start_subject + local_offsets[0];
3039
+ const pcre_uchar *pp = start_subject + local_offsets[1];
3040
+ while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
3041
+ }
3042
+ #endif
3043
+ ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
3044
+ if (repeat_state_offset >= 0)
3045
+ { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
3046
+ }
3047
+ }
3048
+ else if (rc != PCRE_ERROR_NOMATCH) return rc;
3049
+ }
3050
+ break;
3051
+
3052
+
3053
+ /* ========================================================================== */
3054
+ /* Handle callouts */
3055
+
3056
+ case OP_CALLOUT:
3057
+ rrc = 0;
3058
+ if (PUBL(callout) != NULL)
3059
+ {
3060
+ PUBL(callout_block) cb;
3061
+ cb.version = 1; /* Version 1 of the callout block */
3062
+ cb.callout_number = code[1];
3063
+ cb.offset_vector = offsets;
3064
+ #if defined COMPILE_PCRE8
3065
+ cb.subject = (PCRE_SPTR)start_subject;
3066
+ #elif defined COMPILE_PCRE16
3067
+ cb.subject = (PCRE_SPTR16)start_subject;
3068
+ #elif defined COMPILE_PCRE32
3069
+ cb.subject = (PCRE_SPTR32)start_subject;
3070
+ #endif
3071
+ cb.subject_length = (int)(end_subject - start_subject);
3072
+ cb.start_match = (int)(current_subject - start_subject);
3073
+ cb.current_position = (int)(ptr - start_subject);
3074
+ cb.pattern_position = GET(code, 2);
3075
+ cb.next_item_length = GET(code, 2 + LINK_SIZE);
3076
+ cb.capture_top = 1;
3077
+ cb.capture_last = -1;
3078
+ cb.callout_data = md->callout_data;
3079
+ cb.mark = NULL; /* No (*MARK) support */
3080
+ if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */
3081
+ }
3082
+ if (rrc == 0)
3083
+ { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
3084
+ break;
3085
+
3086
+
3087
+ /* ========================================================================== */
3088
+ default: /* Unsupported opcode */
3089
+ return PCRE_ERROR_DFA_UITEM;
3090
+ }
3091
+
3092
+ NEXT_ACTIVE_STATE: continue;
3093
+
3094
+ } /* End of loop scanning active states */
3095
+
3096
+ /* We have finished the processing at the current subject character. If no
3097
+ new states have been set for the next character, we have found all the
3098
+ matches that we are going to find. If we are at the top level and partial
3099
+ matching has been requested, check for appropriate conditions.
3100
+
3101
+ The "forced_ fail" variable counts the number of (*F) encountered for the
3102
+ character. If it is equal to the original active_count (saved in
3103
+ workspace[1]) it means that (*F) was found on every active state. In this
3104
+ case we don't want to give a partial match.
3105
+
3106
+ The "could_continue" variable is true if a state could have continued but
3107
+ for the fact that the end of the subject was reached. */
3108
+
3109
+ if (new_count <= 0)
3110
+ {
3111
+ if (rlevel == 1 && /* Top level, and */
3112
+ could_continue && /* Some could go on, and */
3113
+ forced_fail != workspace[1] && /* Not all forced fail & */
3114
+ ( /* either... */
3115
+ (md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
3116
+ || /* or... */
3117
+ ((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
3118
+ match_count < 0) /* no matches */
3119
+ ) && /* And... */
3120
+ (
3121
+ partial_newline || /* Either partial NL */
3122
+ ( /* or ... */
3123
+ ptr >= end_subject && /* End of subject and */
3124
+ ptr > md->start_used_ptr) /* Inspected non-empty string */
3125
+ )
3126
+ )
3127
+ match_count = PCRE_ERROR_PARTIAL;
3128
+ DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
3129
+ "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
3130
+ rlevel*2-2, SP));
3131
+ break; /* In effect, "return", but see the comment below */
3132
+ }
3133
+
3134
+ /* One or more states are active for the next character. */
3135
+
3136
+ ptr += clen; /* Advance to next subject character */
3137
+ } /* Loop to move along the subject string */
3138
+
3139
+ /* Control gets here from "break" a few lines above. We do it this way because
3140
+ if we use "return" above, we have compiler trouble. Some compilers warn if
3141
+ there's nothing here because they think the function doesn't return a value. On
3142
+ the other hand, if we put a dummy statement here, some more clever compilers
3143
+ complain that it can't be reached. Sigh. */
3144
+
3145
+ return match_count;
3146
+ }
3147
+
3148
+
3149
+
3150
+
3151
+ /*************************************************
3152
+ * Execute a Regular Expression - DFA engine *
3153
+ *************************************************/
3154
+
3155
+ /* This external function applies a compiled re to a subject string using a DFA
3156
+ engine. This function calls the internal function multiple times if the pattern
3157
+ is not anchored.
3158
+
3159
+ Arguments:
3160
+ argument_re points to the compiled expression
3161
+ extra_data points to extra data or is NULL
3162
+ subject points to the subject string
3163
+ length length of subject string (may contain binary zeros)
3164
+ start_offset where to start in the subject string
3165
+ options option bits
3166
+ offsets vector of match offsets
3167
+ offsetcount size of same
3168
+ workspace workspace vector
3169
+ wscount size of same
3170
+
3171
+ Returns: > 0 => number of match offset pairs placed in offsets
3172
+ = 0 => offsets overflowed; longest matches are present
3173
+ -1 => failed to match
3174
+ < -1 => some kind of unexpected problem
3175
+ */
3176
+
3177
+ #if defined COMPILE_PCRE8
3178
+ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3179
+ pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3180
+ const char *subject, int length, int start_offset, int options, int *offsets,
3181
+ int offsetcount, int *workspace, int wscount)
3182
+ #elif defined COMPILE_PCRE16
3183
+ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3184
+ pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3185
+ PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3186
+ int offsetcount, int *workspace, int wscount)
3187
+ #elif defined COMPILE_PCRE32
3188
+ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3189
+ pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
3190
+ PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
3191
+ int offsetcount, int *workspace, int wscount)
3192
+ #endif
3193
+ {
3194
+ REAL_PCRE *re = (REAL_PCRE *)argument_re;
3195
+ dfa_match_data match_block;
3196
+ dfa_match_data *md = &match_block;
3197
+ BOOL utf, anchored, startline, firstline;
3198
+ const pcre_uchar *current_subject, *end_subject;
3199
+ const pcre_study_data *study = NULL;
3200
+
3201
+ const pcre_uchar *req_char_ptr;
3202
+ const pcre_uint8 *start_bits = NULL;
3203
+ BOOL has_first_char = FALSE;
3204
+ BOOL has_req_char = FALSE;
3205
+ pcre_uchar first_char = 0;
3206
+ pcre_uchar first_char2 = 0;
3207
+ pcre_uchar req_char = 0;
3208
+ pcre_uchar req_char2 = 0;
3209
+ int newline;
3210
+
3211
+ /* Plausibility checks */
3212
+
3213
+ if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3214
+ if (re == NULL || subject == NULL || workspace == NULL ||
3215
+ (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3216
+ if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3217
+ if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3218
+ if (length < 0) return PCRE_ERROR_BADLENGTH;
3219
+ if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3220
+
3221
+ /* Check that the first field in the block is the magic number. If it is not,
3222
+ return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3223
+ REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3224
+ means that the pattern is likely compiled with different endianness. */
3225
+
3226
+ if (re->magic_number != MAGIC_NUMBER)
3227
+ return re->magic_number == REVERSED_MAGIC_NUMBER?
3228
+ PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3229
+ if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3230
+
3231
+ /* If restarting after a partial match, do some sanity checks on the contents
3232
+ of the workspace. */
3233
+
3234
+ if ((options & PCRE_DFA_RESTART) != 0)
3235
+ {
3236
+ if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3237
+ workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3238
+ return PCRE_ERROR_DFA_BADRESTART;
3239
+ }
3240
+
3241
+ /* Set up study, callout, and table data */
3242
+
3243
+ md->tables = re->tables;
3244
+ md->callout_data = NULL;
3245
+
3246
+ if (extra_data != NULL)
3247
+ {
3248
+ unsigned long int flags = extra_data->flags;
3249
+ if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3250
+ study = (const pcre_study_data *)extra_data->study_data;
3251
+ if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
3252
+ if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3253
+ return PCRE_ERROR_DFA_UMLIMIT;
3254
+ if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3255
+ md->callout_data = extra_data->callout_data;
3256
+ if ((flags & PCRE_EXTRA_TABLES) != 0)
3257
+ md->tables = extra_data->tables;
3258
+ }
3259
+
3260
+ /* Set some local values */
3261
+
3262
+ current_subject = (const pcre_uchar *)subject + start_offset;
3263
+ end_subject = (const pcre_uchar *)subject + length;
3264
+ req_char_ptr = current_subject - 1;
3265
+
3266
+ #ifdef SUPPORT_UTF
3267
+ /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
3268
+ utf = (re->options & PCRE_UTF8) != 0;
3269
+ #else
3270
+ utf = FALSE;
3271
+ #endif
3272
+
3273
+ anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
3274
+ (re->options & PCRE_ANCHORED) != 0;
3275
+
3276
+ /* The remaining fixed data for passing around. */
3277
+
3278
+ md->start_code = (const pcre_uchar *)argument_re +
3279
+ re->name_table_offset + re->name_count * re->name_entry_size;
3280
+ md->start_subject = (const pcre_uchar *)subject;
3281
+ md->end_subject = end_subject;
3282
+ md->start_offset = start_offset;
3283
+ md->moptions = options;
3284
+ md->poptions = re->options;
3285
+
3286
+ /* If the BSR option is not set at match time, copy what was set
3287
+ at compile time. */
3288
+
3289
+ if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
3290
+ {
3291
+ if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
3292
+ md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
3293
+ #ifdef BSR_ANYCRLF
3294
+ else md->moptions |= PCRE_BSR_ANYCRLF;
3295
+ #endif
3296
+ }
3297
+
3298
+ /* Handle different types of newline. The three bits give eight cases. If
3299
+ nothing is set at run time, whatever was used at compile time applies. */
3300
+
3301
+ switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
3302
+ PCRE_NEWLINE_BITS)
3303
+ {
3304
+ case 0: newline = NEWLINE; break; /* Compile-time default */
3305
+ case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
3306
+ case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
3307
+ case PCRE_NEWLINE_CR+
3308
+ PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
3309
+ case PCRE_NEWLINE_ANY: newline = -1; break;
3310
+ case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
3311
+ default: return PCRE_ERROR_BADNEWLINE;
3312
+ }
3313
+
3314
+ if (newline == -2)
3315
+ {
3316
+ md->nltype = NLTYPE_ANYCRLF;
3317
+ }
3318
+ else if (newline < 0)
3319
+ {
3320
+ md->nltype = NLTYPE_ANY;
3321
+ }
3322
+ else
3323
+ {
3324
+ md->nltype = NLTYPE_FIXED;
3325
+ if (newline > 255)
3326
+ {
3327
+ md->nllen = 2;
3328
+ md->nl[0] = (newline >> 8) & 255;
3329
+ md->nl[1] = newline & 255;
3330
+ }
3331
+ else
3332
+ {
3333
+ md->nllen = 1;
3334
+ md->nl[0] = newline;
3335
+ }
3336
+ }
3337
+
3338
+ /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3339
+ back the character offset. */
3340
+
3341
+ #ifdef SUPPORT_UTF
3342
+ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3343
+ {
3344
+ int erroroffset;
3345
+ int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3346
+ if (errorcode != 0)
3347
+ {
3348
+ if (offsetcount >= 2)
3349
+ {
3350
+ offsets[0] = erroroffset;
3351
+ offsets[1] = errorcode;
3352
+ }
3353
+ #if defined COMPILE_PCRE8
3354
+ return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
3355
+ PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3356
+ #elif defined COMPILE_PCRE16
3357
+ return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
3358
+ PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
3359
+ #elif defined COMPILE_PCRE32
3360
+ return PCRE_ERROR_BADUTF32;
3361
+ #endif
3362
+ }
3363
+ #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3364
+ if (start_offset > 0 && start_offset < length &&
3365
+ NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3366
+ return PCRE_ERROR_BADUTF8_OFFSET;
3367
+ #endif
3368
+ }
3369
+ #endif
3370
+
3371
+ /* If the exec call supplied NULL for tables, use the inbuilt ones. This
3372
+ is a feature that makes it possible to save compiled regex and re-use them
3373
+ in other programs later. */
3374
+
3375
+ if (md->tables == NULL) md->tables = PRIV(default_tables);
3376
+
3377
+ /* The "must be at the start of a line" flags are used in a loop when finding
3378
+ where to start. */
3379
+
3380
+ startline = (re->flags & PCRE_STARTLINE) != 0;
3381
+ firstline = (re->options & PCRE_FIRSTLINE) != 0;
3382
+
3383
+ /* Set up the first character to match, if available. The first_byte value is
3384
+ never set for an anchored regular expression, but the anchoring may be forced
3385
+ at run time, so we have to test for anchoring. The first char may be unset for
3386
+ an unanchored pattern, of course. If there's no first char and the pattern was
3387
+ studied, there may be a bitmap of possible first characters. */
3388
+
3389
+ if (!anchored)
3390
+ {
3391
+ if ((re->flags & PCRE_FIRSTSET) != 0)
3392
+ {
3393
+ has_first_char = TRUE;
3394
+ first_char = first_char2 = (pcre_uchar)(re->first_char);
3395
+ if ((re->flags & PCRE_FCH_CASELESS) != 0)
3396
+ {
3397
+ first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3398
+ #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3399
+ if (utf && first_char > 127)
3400
+ first_char2 = UCD_OTHERCASE(first_char);
3401
+ #endif
3402
+ }
3403
+ }
3404
+ else
3405
+ {
3406
+ if (!startline && study != NULL &&
3407
+ (study->flags & PCRE_STUDY_MAPPED) != 0)
3408
+ start_bits = study->start_bits;
3409
+ }
3410
+ }
3411
+
3412
+ /* For anchored or unanchored matches, there may be a "last known required
3413
+ character" set. */
3414
+
3415
+ if ((re->flags & PCRE_REQCHSET) != 0)
3416
+ {
3417
+ has_req_char = TRUE;
3418
+ req_char = req_char2 = (pcre_uchar)(re->req_char);
3419
+ if ((re->flags & PCRE_RCH_CASELESS) != 0)
3420
+ {
3421
+ req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3422
+ #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3423
+ if (utf && req_char > 127)
3424
+ req_char2 = UCD_OTHERCASE(req_char);
3425
+ #endif
3426
+ }
3427
+ }
3428
+
3429
+ /* Call the main matching function, looping for a non-anchored regex after a
3430
+ failed match. If not restarting, perform certain optimizations at the start of
3431
+ a match. */
3432
+
3433
+ for (;;)
3434
+ {
3435
+ int rc;
3436
+
3437
+ if ((options & PCRE_DFA_RESTART) == 0)
3438
+ {
3439
+ const pcre_uchar *save_end_subject = end_subject;
3440
+
3441
+ /* If firstline is TRUE, the start of the match is constrained to the first
3442
+ line of a multiline string. Implement this by temporarily adjusting
3443
+ end_subject so that we stop scanning at a newline. If the match fails at
3444
+ the newline, later code breaks this loop. */
3445
+
3446
+ if (firstline)
3447
+ {
3448
+ PCRE_PUCHAR t = current_subject;
3449
+ #ifdef SUPPORT_UTF
3450
+ if (utf)
3451
+ {
3452
+ while (t < md->end_subject && !IS_NEWLINE(t))
3453
+ {
3454
+ t++;
3455
+ ACROSSCHAR(t < end_subject, *t, t++);
3456
+ }
3457
+ }
3458
+ else
3459
+ #endif
3460
+ while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3461
+ end_subject = t;
3462
+ }
3463
+
3464
+ /* There are some optimizations that avoid running the match if a known
3465
+ starting point is not found. However, there is an option that disables
3466
+ these, for testing and for ensuring that all callouts do actually occur.
3467
+ The option can be set in the regex by (*NO_START_OPT) or passed in
3468
+ match-time options. */
3469
+
3470
+ if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3471
+ {
3472
+ /* Advance to a known first pcre_uchar (i.e. data item) */
3473
+
3474
+ if (has_first_char)
3475
+ {
3476
+ if (first_char != first_char2)
3477
+ {
3478
+ pcre_uchar csc;
3479
+ while (current_subject < end_subject &&
3480
+ (csc = UCHAR21TEST(current_subject)) != first_char && csc != first_char2)
3481
+ current_subject++;
3482
+ }
3483
+ else
3484
+ while (current_subject < end_subject &&
3485
+ UCHAR21TEST(current_subject) != first_char)
3486
+ current_subject++;
3487
+ }
3488
+
3489
+ /* Or to just after a linebreak for a multiline match if possible */
3490
+
3491
+ else if (startline)
3492
+ {
3493
+ if (current_subject > md->start_subject + start_offset)
3494
+ {
3495
+ #ifdef SUPPORT_UTF
3496
+ if (utf)
3497
+ {
3498
+ while (current_subject < end_subject &&
3499
+ !WAS_NEWLINE(current_subject))
3500
+ {
3501
+ current_subject++;
3502
+ ACROSSCHAR(current_subject < end_subject, *current_subject,
3503
+ current_subject++);
3504
+ }
3505
+ }
3506
+ else
3507
+ #endif
3508
+ while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
3509
+ current_subject++;
3510
+
3511
+ /* If we have just passed a CR and the newline option is ANY or
3512
+ ANYCRLF, and we are now at a LF, advance the match position by one
3513
+ more character. */
3514
+
3515
+ if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
3516
+ (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3517
+ current_subject < end_subject &&
3518
+ UCHAR21TEST(current_subject) == CHAR_NL)
3519
+ current_subject++;
3520
+ }
3521
+ }
3522
+
3523
+ /* Advance to a non-unique first pcre_uchar after study */
3524
+
3525
+ else if (start_bits != NULL)
3526
+ {
3527
+ while (current_subject < end_subject)
3528
+ {
3529
+ register pcre_uint32 c = UCHAR21TEST(current_subject);
3530
+ #ifndef COMPILE_PCRE8
3531
+ if (c > 255) c = 255;
3532
+ #endif
3533
+ if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
3534
+ current_subject++;
3535
+ }
3536
+ }
3537
+ }
3538
+
3539
+ /* Restore fudged end_subject */
3540
+
3541
+ end_subject = save_end_subject;
3542
+
3543
+ /* The following two optimizations are disabled for partial matching or if
3544
+ disabling is explicitly requested (and of course, by the test above, this
3545
+ code is not obeyed when restarting after a partial match). */
3546
+
3547
+ if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3548
+ (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3549
+ {
3550
+ /* If the pattern was studied, a minimum subject length may be set. This
3551
+ is a lower bound; no actual string of that length may actually match the
3552
+ pattern. Although the value is, strictly, in characters, we treat it as
3553
+ in pcre_uchar units to avoid spending too much time in this optimization.
3554
+ */
3555
+
3556
+ if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
3557
+ (pcre_uint32)(end_subject - current_subject) < study->minlength)
3558
+ return PCRE_ERROR_NOMATCH;
3559
+
3560
+ /* If req_char is set, we know that that pcre_uchar must appear in the
3561
+ subject for the match to succeed. If the first pcre_uchar is set,
3562
+ req_char must be later in the subject; otherwise the test starts at the
3563
+ match point. This optimization can save a huge amount of work in patterns
3564
+ with nested unlimited repeats that aren't going to match. Writing
3565
+ separate code for cased/caseless versions makes it go faster, as does
3566
+ using an autoincrement and backing off on a match.
3567
+
3568
+ HOWEVER: when the subject string is very, very long, searching to its end
3569
+ can take a long time, and give bad performance on quite ordinary
3570
+ patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3571
+ string... so we don't do this when the string is sufficiently long. */
3572
+
3573
+ if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3574
+ {
3575
+ register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3576
+
3577
+ /* We don't need to repeat the search if we haven't yet reached the
3578
+ place we found it at last time. */
3579
+
3580
+ if (p > req_char_ptr)
3581
+ {
3582
+ if (req_char != req_char2)
3583
+ {
3584
+ while (p < end_subject)
3585
+ {
3586
+ register pcre_uint32 pp = UCHAR21INCTEST(p);
3587
+ if (pp == req_char || pp == req_char2) { p--; break; }
3588
+ }
3589
+ }
3590
+ else
3591
+ {
3592
+ while (p < end_subject)
3593
+ {
3594
+ if (UCHAR21INCTEST(p) == req_char) { p--; break; }
3595
+ }
3596
+ }
3597
+
3598
+ /* If we can't find the required pcre_uchar, break the matching loop,
3599
+ which will cause a return or PCRE_ERROR_NOMATCH. */
3600
+
3601
+ if (p >= end_subject) break;
3602
+
3603
+ /* If we have found the required pcre_uchar, save the point where we
3604
+ found it, so that we don't search again next time round the loop if
3605
+ the start hasn't passed this point yet. */
3606
+
3607
+ req_char_ptr = p;
3608
+ }
3609
+ }
3610
+ }
3611
+ } /* End of optimizations that are done when not restarting */
3612
+
3613
+ /* OK, now we can do the business */
3614
+
3615
+ md->start_used_ptr = current_subject;
3616
+ md->recursive = NULL;
3617
+
3618
+ rc = internal_dfa_exec(
3619
+ md, /* fixed match data */
3620
+ md->start_code, /* this subexpression's code */
3621
+ current_subject, /* where we currently are */
3622
+ start_offset, /* start offset in subject */
3623
+ offsets, /* offset vector */
3624
+ offsetcount, /* size of same */
3625
+ workspace, /* workspace vector */
3626
+ wscount, /* size of same */
3627
+ 0); /* function recurse level */
3628
+
3629
+ /* Anything other than "no match" means we are done, always; otherwise, carry
3630
+ on only if not anchored. */
3631
+
3632
+ if (rc != PCRE_ERROR_NOMATCH || anchored)
3633
+ {
3634
+ if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
3635
+ {
3636
+ offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
3637
+ offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
3638
+ if (offsetcount > 2)
3639
+ offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
3640
+ }
3641
+ return rc;
3642
+ }
3643
+
3644
+ /* Advance to the next subject character unless we are at the end of a line
3645
+ and firstline is set. */
3646
+
3647
+ if (firstline && IS_NEWLINE(current_subject)) break;
3648
+ current_subject++;
3649
+ #ifdef SUPPORT_UTF
3650
+ if (utf)
3651
+ {
3652
+ ACROSSCHAR(current_subject < end_subject, *current_subject,
3653
+ current_subject++);
3654
+ }
3655
+ #endif
3656
+ if (current_subject > end_subject) break;
3657
+
3658
+ /* If we have just passed a CR and we are now at a LF, and the pattern does
3659
+ not contain any explicit matches for \r or \n, and the newline option is CRLF
3660
+ or ANY or ANYCRLF, advance the match position by one more character. */
3661
+
3662
+ if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
3663
+ current_subject < end_subject &&
3664
+ UCHAR21TEST(current_subject) == CHAR_NL &&
3665
+ (re->flags & PCRE_HASCRORLF) == 0 &&
3666
+ (md->nltype == NLTYPE_ANY ||
3667
+ md->nltype == NLTYPE_ANYCRLF ||
3668
+ md->nllen == 2))
3669
+ current_subject++;
3670
+
3671
+ } /* "Bumpalong" loop */
3672
+
3673
+ return PCRE_ERROR_NOMATCH;
3674
+ }
3675
+
3676
+ /* End of pcre_dfa_exec.c */