rugged 0.28.4 → 0.28.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rugged/version.rb +1 -1
- data/vendor/libgit2/AUTHORS +0 -1
- data/vendor/libgit2/CMakeLists.txt +16 -36
- data/vendor/libgit2/COPYING +0 -28
- data/vendor/libgit2/cmake/Modules/EnableWarnings.cmake +1 -5
- data/vendor/libgit2/cmake/Modules/FindCoreFoundation.cmake +2 -2
- data/vendor/libgit2/cmake/Modules/FindGSSAPI.cmake +1 -1
- data/vendor/libgit2/cmake/Modules/FindSecurity.cmake +2 -2
- data/vendor/libgit2/cmake/Modules/FindStatNsec.cmake +0 -6
- data/vendor/libgit2/deps/http-parser/http_parser.c +6 -11
- data/vendor/libgit2/deps/regex/CMakeLists.txt +2 -0
- data/vendor/libgit2/deps/regex/COPYING +502 -0
- data/vendor/libgit2/deps/regex/config.h +7 -0
- data/vendor/libgit2/deps/regex/regcomp.c +3857 -0
- data/vendor/libgit2/deps/regex/regex.c +92 -0
- data/vendor/libgit2/deps/regex/regex.h +582 -0
- data/vendor/libgit2/deps/regex/regex_internal.c +1744 -0
- data/vendor/libgit2/deps/regex/regex_internal.h +819 -0
- data/vendor/libgit2/deps/regex/regexec.c +4369 -0
- data/vendor/libgit2/deps/zlib/adler32.c +7 -0
- data/vendor/libgit2/deps/zlib/crc32.c +7 -0
- data/vendor/libgit2/include/git2.h +0 -2
- data/vendor/libgit2/include/git2/apply.h +2 -22
- data/vendor/libgit2/include/git2/attr.h +12 -19
- data/vendor/libgit2/include/git2/blame.h +2 -2
- data/vendor/libgit2/include/git2/blob.h +12 -44
- data/vendor/libgit2/include/git2/buffer.h +14 -20
- data/vendor/libgit2/include/git2/checkout.h +14 -46
- data/vendor/libgit2/include/git2/cherrypick.h +3 -3
- data/vendor/libgit2/include/git2/clone.h +2 -2
- data/vendor/libgit2/include/git2/commit.h +1 -23
- data/vendor/libgit2/include/git2/common.h +5 -7
- data/vendor/libgit2/include/git2/config.h +12 -12
- data/vendor/libgit2/include/git2/deprecated.h +3 -243
- data/vendor/libgit2/include/git2/describe.h +4 -4
- data/vendor/libgit2/include/git2/diff.h +14 -16
- data/vendor/libgit2/include/git2/filter.h +0 -8
- data/vendor/libgit2/include/git2/index.h +1 -2
- data/vendor/libgit2/include/git2/indexer.h +4 -48
- data/vendor/libgit2/include/git2/inttypes.h +309 -0
- data/vendor/libgit2/include/git2/merge.h +10 -6
- data/vendor/libgit2/include/git2/net.h +5 -0
- data/vendor/libgit2/include/git2/object.h +14 -2
- data/vendor/libgit2/include/git2/odb.h +2 -3
- data/vendor/libgit2/include/git2/odb_backend.h +4 -5
- data/vendor/libgit2/include/git2/oid.h +1 -1
- data/vendor/libgit2/include/git2/pack.h +1 -12
- data/vendor/libgit2/include/git2/proxy.h +3 -5
- data/vendor/libgit2/include/git2/rebase.h +2 -46
- data/vendor/libgit2/include/git2/refs.h +0 -19
- data/vendor/libgit2/include/git2/remote.h +12 -35
- data/vendor/libgit2/include/git2/repository.h +2 -24
- data/vendor/libgit2/include/git2/revert.h +1 -1
- data/vendor/libgit2/include/git2/stash.h +3 -3
- data/vendor/libgit2/include/git2/status.h +16 -25
- data/vendor/libgit2/include/git2/submodule.h +3 -20
- data/vendor/libgit2/include/git2/sys/alloc.h +9 -9
- data/vendor/libgit2/include/git2/sys/odb_backend.h +4 -48
- data/vendor/libgit2/include/git2/sys/refdb_backend.h +21 -57
- data/vendor/libgit2/include/git2/sys/repository.h +1 -5
- data/vendor/libgit2/include/git2/sys/time.h +31 -0
- data/vendor/libgit2/include/git2/sys/transport.h +2 -2
- data/vendor/libgit2/include/git2/tag.h +2 -11
- data/vendor/libgit2/include/git2/trace.h +2 -2
- data/vendor/libgit2/include/git2/transport.h +340 -11
- data/vendor/libgit2/include/git2/tree.h +1 -1
- data/vendor/libgit2/include/git2/types.h +89 -4
- data/vendor/libgit2/include/git2/version.h +2 -2
- data/vendor/libgit2/include/git2/worktree.h +5 -5
- data/vendor/libgit2/libgit2.pc.in +13 -0
- data/vendor/libgit2/src/CMakeLists.txt +222 -88
- data/vendor/libgit2/src/alloc.c +14 -2
- data/vendor/libgit2/src/apply.c +30 -60
- data/vendor/libgit2/src/attr.c +64 -70
- data/vendor/libgit2/src/attr_file.c +96 -189
- data/vendor/libgit2/src/attr_file.h +9 -9
- data/vendor/libgit2/src/attrcache.c +46 -44
- data/vendor/libgit2/src/attrcache.h +1 -2
- data/vendor/libgit2/src/blame.c +5 -17
- data/vendor/libgit2/src/blame.h +1 -1
- data/vendor/libgit2/src/blame_git.c +7 -21
- data/vendor/libgit2/src/blob.c +17 -81
- data/vendor/libgit2/src/blob.h +2 -2
- data/vendor/libgit2/src/branch.c +5 -29
- data/vendor/libgit2/src/buffer.c +7 -14
- data/vendor/libgit2/src/cache.c +33 -26
- data/vendor/libgit2/src/cache.h +1 -1
- data/vendor/libgit2/src/cc-compat.h +0 -5
- data/vendor/libgit2/src/checkout.c +16 -26
- data/vendor/libgit2/src/cherrypick.c +3 -9
- data/vendor/libgit2/src/clone.c +7 -29
- data/vendor/libgit2/src/clone.h +0 -4
- data/vendor/libgit2/src/commit.c +21 -69
- data/vendor/libgit2/src/commit.h +0 -6
- data/vendor/libgit2/src/commit_list.c +76 -28
- data/vendor/libgit2/src/commit_list.h +2 -2
- data/vendor/libgit2/src/common.h +75 -3
- data/vendor/libgit2/src/config.c +40 -31
- data/vendor/libgit2/src/config.h +6 -7
- data/vendor/libgit2/src/config_backend.h +0 -12
- data/vendor/libgit2/src/config_cache.c +39 -39
- data/vendor/libgit2/src/config_entries.c +99 -69
- data/vendor/libgit2/src/config_entries.h +0 -1
- data/vendor/libgit2/src/config_file.c +380 -337
- data/vendor/libgit2/src/config_mem.c +16 -12
- data/vendor/libgit2/src/config_parse.c +29 -49
- data/vendor/libgit2/src/config_parse.h +12 -13
- data/vendor/libgit2/src/crlf.c +14 -14
- data/vendor/libgit2/src/describe.c +20 -21
- data/vendor/libgit2/src/diff.c +58 -43
- data/vendor/libgit2/src/diff.h +1 -2
- data/vendor/libgit2/src/diff_driver.c +38 -37
- data/vendor/libgit2/src/diff_file.c +7 -9
- data/vendor/libgit2/src/diff_file.h +1 -1
- data/vendor/libgit2/src/diff_generate.c +85 -135
- data/vendor/libgit2/src/diff_generate.h +2 -2
- data/vendor/libgit2/src/diff_parse.c +1 -1
- data/vendor/libgit2/src/diff_print.c +13 -25
- data/vendor/libgit2/src/diff_stats.c +1 -1
- data/vendor/libgit2/src/diff_tform.c +4 -4
- data/vendor/libgit2/src/errors.c +22 -12
- data/vendor/libgit2/src/features.h.in +2 -9
- data/vendor/libgit2/src/fetch.c +2 -7
- data/vendor/libgit2/src/fetchhead.c +1 -1
- data/vendor/libgit2/src/filebuf.c +10 -6
- data/vendor/libgit2/src/filebuf.h +2 -2
- data/vendor/libgit2/src/{futils.c → fileops.c} +17 -21
- data/vendor/libgit2/src/{futils.h → fileops.h} +5 -5
- data/vendor/libgit2/src/filter.c +8 -16
- data/vendor/libgit2/src/fnmatch.c +248 -0
- data/vendor/libgit2/src/fnmatch.h +48 -0
- data/vendor/libgit2/src/global.c +40 -12
- data/vendor/libgit2/src/global.h +2 -0
- data/vendor/libgit2/src/hash.c +0 -61
- data/vendor/libgit2/src/hash.h +21 -19
- data/vendor/libgit2/src/hash/{sha1/collisiondetect.c → hash_collisiondetect.h} +17 -14
- data/vendor/libgit2/src/hash/{sha1/common_crypto.c → hash_common_crypto.h} +19 -15
- data/vendor/libgit2/src/hash/{sha1/generic.c → hash_generic.c} +10 -22
- data/vendor/libgit2/src/hash/{sha1/generic.h → hash_generic.h} +14 -4
- data/vendor/libgit2/src/hash/{sha1/mbedtls.c → hash_mbedtls.c} +7 -15
- data/vendor/libgit2/src/hash/{sha1/mbedtls.h → hash_mbedtls.h} +11 -6
- data/vendor/libgit2/src/hash/{sha1/openssl.c → hash_openssl.h} +18 -14
- data/vendor/libgit2/src/hash/{sha1/win32.c → hash_win32.c} +24 -34
- data/vendor/libgit2/src/hash/{sha1/win32.h → hash_win32.h} +19 -6
- data/vendor/libgit2/src/hash/{sha1/sha1dc → sha1dc}/sha1.c +3 -14
- data/vendor/libgit2/src/hash/{sha1/sha1dc → sha1dc}/sha1.h +0 -0
- data/vendor/libgit2/src/hash/{sha1/sha1dc → sha1dc}/ubc_check.c +0 -0
- data/vendor/libgit2/src/hash/{sha1/sha1dc → sha1dc}/ubc_check.h +0 -0
- data/vendor/libgit2/src/hashsig.c +1 -1
- data/vendor/libgit2/src/idxmap.c +65 -91
- data/vendor/libgit2/src/idxmap.h +15 -151
- data/vendor/libgit2/src/ignore.c +38 -32
- data/vendor/libgit2/src/index.c +43 -66
- data/vendor/libgit2/src/index.h +1 -1
- data/vendor/libgit2/src/indexer.c +70 -69
- data/vendor/libgit2/src/integer.h +4 -39
- data/vendor/libgit2/src/iterator.c +22 -27
- data/vendor/libgit2/src/map.h +1 -1
- data/vendor/libgit2/src/merge.c +44 -58
- data/vendor/libgit2/src/merge_driver.c +4 -4
- data/vendor/libgit2/src/merge_file.c +1 -1
- data/vendor/libgit2/src/mwindow.c +23 -18
- data/vendor/libgit2/src/mwindow.h +4 -4
- data/vendor/libgit2/src/netops.c +165 -55
- data/vendor/libgit2/src/netops.h +25 -3
- data/vendor/libgit2/src/notes.c +2 -2
- data/vendor/libgit2/src/object.c +2 -2
- data/vendor/libgit2/src/object.h +0 -2
- data/vendor/libgit2/src/odb.c +23 -41
- data/vendor/libgit2/src/odb.h +2 -3
- data/vendor/libgit2/src/odb_loose.c +10 -17
- data/vendor/libgit2/src/odb_mempack.c +23 -10
- data/vendor/libgit2/src/odb_pack.c +4 -4
- data/vendor/libgit2/src/offmap.c +55 -43
- data/vendor/libgit2/src/offmap.h +24 -102
- data/vendor/libgit2/src/oid.c +1 -6
- data/vendor/libgit2/src/oidmap.c +57 -39
- data/vendor/libgit2/src/oidmap.h +19 -99
- data/vendor/libgit2/src/pack-objects.c +32 -25
- data/vendor/libgit2/src/pack-objects.h +1 -1
- data/vendor/libgit2/src/pack.c +47 -45
- data/vendor/libgit2/src/pack.h +14 -12
- data/vendor/libgit2/src/parse.c +0 -10
- data/vendor/libgit2/src/parse.h +3 -3
- data/vendor/libgit2/src/patch.c +1 -1
- data/vendor/libgit2/src/patch_generate.c +2 -2
- data/vendor/libgit2/src/patch_parse.c +31 -124
- data/vendor/libgit2/src/path.c +6 -43
- data/vendor/libgit2/src/path.h +0 -2
- data/vendor/libgit2/src/pathspec.c +13 -13
- data/vendor/libgit2/src/pool.c +22 -26
- data/vendor/libgit2/src/pool.h +7 -7
- data/vendor/libgit2/src/posix.c +7 -7
- data/vendor/libgit2/src/posix.h +1 -12
- data/vendor/libgit2/src/proxy.c +2 -7
- data/vendor/libgit2/src/push.c +5 -10
- data/vendor/libgit2/src/reader.c +2 -2
- data/vendor/libgit2/src/rebase.c +7 -66
- data/vendor/libgit2/src/refdb.c +0 -12
- data/vendor/libgit2/src/refdb_fs.c +165 -214
- data/vendor/libgit2/src/reflog.c +13 -11
- data/vendor/libgit2/src/refs.c +18 -24
- data/vendor/libgit2/src/refspec.c +16 -9
- data/vendor/libgit2/src/remote.c +52 -50
- data/vendor/libgit2/src/remote.h +2 -2
- data/vendor/libgit2/src/repository.c +100 -115
- data/vendor/libgit2/src/repository.h +40 -49
- data/vendor/libgit2/src/revert.c +3 -8
- data/vendor/libgit2/src/revparse.c +19 -18
- data/vendor/libgit2/src/revwalk.c +30 -63
- data/vendor/libgit2/src/revwalk.h +0 -20
- data/vendor/libgit2/src/settings.c +0 -5
- data/vendor/libgit2/src/sortedcache.c +26 -12
- data/vendor/libgit2/src/sortedcache.h +1 -1
- data/vendor/libgit2/src/stash.c +65 -45
- data/vendor/libgit2/src/status.c +9 -15
- data/vendor/libgit2/src/{allocators/stdalloc.c → stdalloc.c} +4 -3
- data/vendor/libgit2/src/{allocators/stdalloc.h → stdalloc.h} +4 -4
- data/vendor/libgit2/src/streams/openssl.c +0 -20
- data/vendor/libgit2/src/streams/socket.c +2 -2
- data/vendor/libgit2/src/strmap.c +84 -37
- data/vendor/libgit2/src/strmap.h +33 -105
- data/vendor/libgit2/src/submodule.c +70 -102
- data/vendor/libgit2/src/submodule.h +1 -1
- data/vendor/libgit2/src/sysdir.c +1 -11
- data/vendor/libgit2/src/tag.c +2 -10
- data/vendor/libgit2/src/trace.c +1 -1
- data/vendor/libgit2/src/trace.h +2 -2
- data/vendor/libgit2/src/trailer.c +32 -46
- data/vendor/libgit2/src/transaction.c +9 -10
- data/vendor/libgit2/src/transports/auth.c +9 -10
- data/vendor/libgit2/src/transports/auth.h +4 -11
- data/vendor/libgit2/src/transports/auth_negotiate.c +9 -23
- data/vendor/libgit2/src/transports/auth_negotiate.h +2 -2
- data/vendor/libgit2/src/transports/cred.c +6 -6
- data/vendor/libgit2/src/{allocators/win32_crtdbg.h → transports/cred.h} +4 -5
- data/vendor/libgit2/src/transports/git.c +16 -11
- data/vendor/libgit2/src/transports/http.c +276 -419
- data/vendor/libgit2/src/transports/http.h +1 -1
- data/vendor/libgit2/src/transports/local.c +9 -9
- data/vendor/libgit2/src/transports/smart.c +17 -17
- data/vendor/libgit2/src/transports/smart.h +2 -2
- data/vendor/libgit2/src/transports/smart_protocol.c +60 -36
- data/vendor/libgit2/src/transports/ssh.c +36 -46
- data/vendor/libgit2/src/transports/winhttp.c +207 -231
- data/vendor/libgit2/src/tree-cache.c +7 -14
- data/vendor/libgit2/src/tree.c +24 -10
- data/vendor/libgit2/src/unix/map.c +1 -1
- data/vendor/libgit2/src/unix/posix.h +11 -1
- data/vendor/libgit2/src/userdiff.h +1 -3
- data/vendor/libgit2/src/util.c +53 -51
- data/vendor/libgit2/src/util.h +21 -16
- data/vendor/libgit2/src/win32/map.c +5 -3
- data/vendor/libgit2/src/win32/path_w32.c +2 -12
- data/vendor/libgit2/src/win32/path_w32.h +29 -0
- data/vendor/libgit2/src/win32/posix.h +4 -1
- data/vendor/libgit2/src/win32/posix_w32.c +5 -40
- data/vendor/libgit2/src/win32/precompiled.h +2 -0
- data/vendor/libgit2/src/win32/thread.c +10 -5
- data/vendor/libgit2/src/win32/w32_buffer.c +3 -7
- data/vendor/libgit2/src/win32/w32_crtdbg_stacktrace.c +93 -0
- data/vendor/libgit2/src/win32/w32_crtdbg_stacktrace.h +2 -0
- data/vendor/libgit2/src/win32/w32_stack.c +9 -4
- data/vendor/libgit2/src/win32/w32_stack.h +3 -3
- data/vendor/libgit2/src/win32/w32_util.c +0 -31
- data/vendor/libgit2/src/win32/w32_util.h +32 -6
- data/vendor/libgit2/src/worktree.c +22 -36
- data/vendor/libgit2/src/xdiff/xdiffi.c +1 -1
- data/vendor/libgit2/src/xdiff/xmerge.c +0 -12
- data/vendor/libgit2/src/xdiff/xpatience.c +0 -3
- metadata +34 -98
- data/vendor/libgit2/cmake/Modules/FindGSSFramework.cmake +0 -28
- data/vendor/libgit2/cmake/Modules/FindPCRE.cmake +0 -38
- data/vendor/libgit2/cmake/Modules/FindPCRE2.cmake +0 -37
- data/vendor/libgit2/cmake/Modules/PkgBuildConfig.cmake +0 -110
- data/vendor/libgit2/cmake/Modules/SelectGSSAPI.cmake +0 -53
- data/vendor/libgit2/cmake/Modules/SelectHTTPSBackend.cmake +0 -124
- data/vendor/libgit2/cmake/Modules/SelectHashes.cmake +0 -66
- data/vendor/libgit2/deps/ntlmclient/CMakeLists.txt +0 -21
- data/vendor/libgit2/deps/ntlmclient/compat.h +0 -33
- data/vendor/libgit2/deps/ntlmclient/crypt.h +0 -64
- data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.c +0 -120
- data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.h +0 -18
- data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.c +0 -145
- data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.h +0 -18
- data/vendor/libgit2/deps/ntlmclient/crypt_openssl.c +0 -130
- data/vendor/libgit2/deps/ntlmclient/crypt_openssl.h +0 -21
- data/vendor/libgit2/deps/ntlmclient/ntlm.c +0 -1420
- data/vendor/libgit2/deps/ntlmclient/ntlm.h +0 -174
- data/vendor/libgit2/deps/ntlmclient/ntlmclient.h +0 -320
- data/vendor/libgit2/deps/ntlmclient/unicode.h +0 -36
- data/vendor/libgit2/deps/ntlmclient/unicode_builtin.c +0 -445
- data/vendor/libgit2/deps/ntlmclient/unicode_iconv.c +0 -201
- data/vendor/libgit2/deps/ntlmclient/utf8.h +0 -1257
- data/vendor/libgit2/deps/ntlmclient/util.c +0 -21
- data/vendor/libgit2/deps/ntlmclient/util.h +0 -14
- data/vendor/libgit2/deps/pcre/CMakeLists.txt +0 -140
- data/vendor/libgit2/deps/pcre/COPYING +0 -5
- data/vendor/libgit2/deps/pcre/cmake/COPYING-CMAKE-SCRIPTS +0 -22
- data/vendor/libgit2/deps/pcre/cmake/FindEditline.cmake +0 -17
- data/vendor/libgit2/deps/pcre/cmake/FindPackageHandleStandardArgs.cmake +0 -58
- data/vendor/libgit2/deps/pcre/cmake/FindReadline.cmake +0 -29
- data/vendor/libgit2/deps/pcre/config.h.in +0 -57
- data/vendor/libgit2/deps/pcre/pcre.h +0 -641
- data/vendor/libgit2/deps/pcre/pcre_byte_order.c +0 -319
- data/vendor/libgit2/deps/pcre/pcre_chartables.c +0 -198
- data/vendor/libgit2/deps/pcre/pcre_compile.c +0 -9800
- data/vendor/libgit2/deps/pcre/pcre_config.c +0 -190
- data/vendor/libgit2/deps/pcre/pcre_dfa_exec.c +0 -3676
- data/vendor/libgit2/deps/pcre/pcre_exec.c +0 -7173
- data/vendor/libgit2/deps/pcre/pcre_fullinfo.c +0 -245
- data/vendor/libgit2/deps/pcre/pcre_get.c +0 -669
- data/vendor/libgit2/deps/pcre/pcre_globals.c +0 -86
- data/vendor/libgit2/deps/pcre/pcre_internal.h +0 -2787
- data/vendor/libgit2/deps/pcre/pcre_jit_compile.c +0 -11913
- data/vendor/libgit2/deps/pcre/pcre_maketables.c +0 -156
- data/vendor/libgit2/deps/pcre/pcre_newline.c +0 -210
- data/vendor/libgit2/deps/pcre/pcre_ord2utf8.c +0 -94
- data/vendor/libgit2/deps/pcre/pcre_printint.c +0 -834
- data/vendor/libgit2/deps/pcre/pcre_refcount.c +0 -92
- data/vendor/libgit2/deps/pcre/pcre_string_utils.c +0 -211
- data/vendor/libgit2/deps/pcre/pcre_study.c +0 -1686
- data/vendor/libgit2/deps/pcre/pcre_tables.c +0 -727
- data/vendor/libgit2/deps/pcre/pcre_ucd.c +0 -3644
- data/vendor/libgit2/deps/pcre/pcre_valid_utf8.c +0 -301
- data/vendor/libgit2/deps/pcre/pcre_version.c +0 -98
- data/vendor/libgit2/deps/pcre/pcre_xclass.c +0 -268
- data/vendor/libgit2/deps/pcre/pcreposix.c +0 -421
- data/vendor/libgit2/deps/pcre/pcreposix.h +0 -117
- data/vendor/libgit2/deps/pcre/ucp.h +0 -224
- data/vendor/libgit2/include/git2/cert.h +0 -135
- data/vendor/libgit2/include/git2/cred.h +0 -308
- data/vendor/libgit2/include/git2/sys/cred.h +0 -90
- data/vendor/libgit2/src/allocators/win32_crtdbg.c +0 -118
- data/vendor/libgit2/src/config_snapshot.c +0 -206
- data/vendor/libgit2/src/errors.h +0 -81
- data/vendor/libgit2/src/hash/sha1.h +0 -38
- data/vendor/libgit2/src/hash/sha1/collisiondetect.h +0 -19
- data/vendor/libgit2/src/hash/sha1/common_crypto.h +0 -19
- data/vendor/libgit2/src/hash/sha1/openssl.h +0 -19
- data/vendor/libgit2/src/net.c +0 -184
- data/vendor/libgit2/src/net.h +0 -36
- data/vendor/libgit2/src/regexp.c +0 -221
- data/vendor/libgit2/src/regexp.h +0 -97
- data/vendor/libgit2/src/transports/auth_ntlm.c +0 -223
- data/vendor/libgit2/src/transports/auth_ntlm.h +0 -35
- data/vendor/libgit2/src/wildmatch.c +0 -320
- data/vendor/libgit2/src/wildmatch.h +0 -23
- data/vendor/libgit2/src/win32/w32_common.h +0 -39
@@ -1,190 +0,0 @@
|
|
1
|
-
/*************************************************
|
2
|
-
* Perl-Compatible Regular Expressions *
|
3
|
-
*************************************************/
|
4
|
-
|
5
|
-
/* PCRE is a library of functions to support regular expressions whose syntax
|
6
|
-
and semantics are as close as possible to those of the Perl 5 language.
|
7
|
-
|
8
|
-
Written by Philip Hazel
|
9
|
-
Copyright (c) 1997-2012 University of Cambridge
|
10
|
-
|
11
|
-
-----------------------------------------------------------------------------
|
12
|
-
Redistribution and use in source and binary forms, with or without
|
13
|
-
modification, are permitted provided that the following conditions are met:
|
14
|
-
|
15
|
-
* Redistributions of source code must retain the above copyright notice,
|
16
|
-
this list of conditions and the following disclaimer.
|
17
|
-
|
18
|
-
* Redistributions in binary form must reproduce the above copyright
|
19
|
-
notice, this list of conditions and the following disclaimer in the
|
20
|
-
documentation and/or other materials provided with the distribution.
|
21
|
-
|
22
|
-
* Neither the name of the University of Cambridge nor the names of its
|
23
|
-
contributors may be used to endorse or promote products derived from
|
24
|
-
this software without specific prior written permission.
|
25
|
-
|
26
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
27
|
-
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
28
|
-
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
29
|
-
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
30
|
-
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
31
|
-
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
32
|
-
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
33
|
-
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
34
|
-
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
35
|
-
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
36
|
-
POSSIBILITY OF SUCH DAMAGE.
|
37
|
-
-----------------------------------------------------------------------------
|
38
|
-
*/
|
39
|
-
|
40
|
-
|
41
|
-
/* This module contains the external function pcre_config(). */
|
42
|
-
|
43
|
-
|
44
|
-
#ifdef HAVE_CONFIG_H
|
45
|
-
#include "config.h"
|
46
|
-
#endif
|
47
|
-
|
48
|
-
/* Keep the original link size. */
|
49
|
-
static int real_link_size = LINK_SIZE;
|
50
|
-
|
51
|
-
#include "pcre_internal.h"
|
52
|
-
|
53
|
-
|
54
|
-
/*************************************************
|
55
|
-
* Return info about what features are configured *
|
56
|
-
*************************************************/
|
57
|
-
|
58
|
-
/* This function has an extensible interface so that additional items can be
|
59
|
-
added compatibly.
|
60
|
-
|
61
|
-
Arguments:
|
62
|
-
what what information is required
|
63
|
-
where where to put the information
|
64
|
-
|
65
|
-
Returns: 0 if data returned, negative on error
|
66
|
-
*/
|
67
|
-
|
68
|
-
#if defined COMPILE_PCRE8
|
69
|
-
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
70
|
-
pcre_config(int what, void *where)
|
71
|
-
#elif defined COMPILE_PCRE16
|
72
|
-
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
73
|
-
pcre16_config(int what, void *where)
|
74
|
-
#elif defined COMPILE_PCRE32
|
75
|
-
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
76
|
-
pcre32_config(int what, void *where)
|
77
|
-
#endif
|
78
|
-
{
|
79
|
-
switch (what)
|
80
|
-
{
|
81
|
-
case PCRE_CONFIG_UTF8:
|
82
|
-
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
|
83
|
-
*((int *)where) = 0;
|
84
|
-
return PCRE_ERROR_BADOPTION;
|
85
|
-
#else
|
86
|
-
#if defined SUPPORT_UTF
|
87
|
-
*((int *)where) = 1;
|
88
|
-
#else
|
89
|
-
*((int *)where) = 0;
|
90
|
-
#endif
|
91
|
-
break;
|
92
|
-
#endif
|
93
|
-
|
94
|
-
case PCRE_CONFIG_UTF16:
|
95
|
-
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE32
|
96
|
-
*((int *)where) = 0;
|
97
|
-
return PCRE_ERROR_BADOPTION;
|
98
|
-
#else
|
99
|
-
#if defined SUPPORT_UTF
|
100
|
-
*((int *)where) = 1;
|
101
|
-
#else
|
102
|
-
*((int *)where) = 0;
|
103
|
-
#endif
|
104
|
-
break;
|
105
|
-
#endif
|
106
|
-
|
107
|
-
case PCRE_CONFIG_UTF32:
|
108
|
-
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
|
109
|
-
*((int *)where) = 0;
|
110
|
-
return PCRE_ERROR_BADOPTION;
|
111
|
-
#else
|
112
|
-
#if defined SUPPORT_UTF
|
113
|
-
*((int *)where) = 1;
|
114
|
-
#else
|
115
|
-
*((int *)where) = 0;
|
116
|
-
#endif
|
117
|
-
break;
|
118
|
-
#endif
|
119
|
-
|
120
|
-
case PCRE_CONFIG_UNICODE_PROPERTIES:
|
121
|
-
#ifdef SUPPORT_UCP
|
122
|
-
*((int *)where) = 1;
|
123
|
-
#else
|
124
|
-
*((int *)where) = 0;
|
125
|
-
#endif
|
126
|
-
break;
|
127
|
-
|
128
|
-
case PCRE_CONFIG_JIT:
|
129
|
-
#ifdef SUPPORT_JIT
|
130
|
-
*((int *)where) = 1;
|
131
|
-
#else
|
132
|
-
*((int *)where) = 0;
|
133
|
-
#endif
|
134
|
-
break;
|
135
|
-
|
136
|
-
case PCRE_CONFIG_JITTARGET:
|
137
|
-
#ifdef SUPPORT_JIT
|
138
|
-
*((const char **)where) = PRIV(jit_get_target)();
|
139
|
-
#else
|
140
|
-
*((const char **)where) = NULL;
|
141
|
-
#endif
|
142
|
-
break;
|
143
|
-
|
144
|
-
case PCRE_CONFIG_NEWLINE:
|
145
|
-
*((int *)where) = NEWLINE;
|
146
|
-
break;
|
147
|
-
|
148
|
-
case PCRE_CONFIG_BSR:
|
149
|
-
#ifdef BSR_ANYCRLF
|
150
|
-
*((int *)where) = 1;
|
151
|
-
#else
|
152
|
-
*((int *)where) = 0;
|
153
|
-
#endif
|
154
|
-
break;
|
155
|
-
|
156
|
-
case PCRE_CONFIG_LINK_SIZE:
|
157
|
-
*((int *)where) = real_link_size;
|
158
|
-
break;
|
159
|
-
|
160
|
-
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
|
161
|
-
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
162
|
-
break;
|
163
|
-
|
164
|
-
case PCRE_CONFIG_PARENS_LIMIT:
|
165
|
-
*((unsigned long int *)where) = PARENS_NEST_LIMIT;
|
166
|
-
break;
|
167
|
-
|
168
|
-
case PCRE_CONFIG_MATCH_LIMIT:
|
169
|
-
*((unsigned long int *)where) = MATCH_LIMIT;
|
170
|
-
break;
|
171
|
-
|
172
|
-
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
|
173
|
-
*((unsigned long int *)where) = MATCH_LIMIT_RECURSION;
|
174
|
-
break;
|
175
|
-
|
176
|
-
case PCRE_CONFIG_STACKRECURSE:
|
177
|
-
#ifdef NO_RECURSE
|
178
|
-
*((int *)where) = 0;
|
179
|
-
#else
|
180
|
-
*((int *)where) = 1;
|
181
|
-
#endif
|
182
|
-
break;
|
183
|
-
|
184
|
-
default: return PCRE_ERROR_BADOPTION;
|
185
|
-
}
|
186
|
-
|
187
|
-
return 0;
|
188
|
-
}
|
189
|
-
|
190
|
-
/* End of pcre_config.c */
|
@@ -1,3676 +0,0 @@
|
|
1
|
-
/*************************************************
|
2
|
-
* Perl-Compatible Regular Expressions *
|
3
|
-
*************************************************/
|
4
|
-
|
5
|
-
/* PCRE is a library of functions to support regular expressions whose syntax
|
6
|
-
and semantics are as close as possible to those of the Perl 5 language (but see
|
7
|
-
below for why this module is different).
|
8
|
-
|
9
|
-
Written by Philip Hazel
|
10
|
-
Copyright (c) 1997-2017 University of Cambridge
|
11
|
-
|
12
|
-
-----------------------------------------------------------------------------
|
13
|
-
Redistribution and use in source and binary forms, with or without
|
14
|
-
modification, are permitted provided that the following conditions are met:
|
15
|
-
|
16
|
-
* Redistributions of source code must retain the above copyright notice,
|
17
|
-
this list of conditions and the following disclaimer.
|
18
|
-
|
19
|
-
* Redistributions in binary form must reproduce the above copyright
|
20
|
-
notice, this list of conditions and the following disclaimer in the
|
21
|
-
documentation and/or other materials provided with the distribution.
|
22
|
-
|
23
|
-
* Neither the name of the University of Cambridge nor the names of its
|
24
|
-
contributors may be used to endorse or promote products derived from
|
25
|
-
this software without specific prior written permission.
|
26
|
-
|
27
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
28
|
-
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
29
|
-
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
30
|
-
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
31
|
-
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
32
|
-
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
33
|
-
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
34
|
-
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
35
|
-
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
36
|
-
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
37
|
-
POSSIBILITY OF SUCH DAMAGE.
|
38
|
-
-----------------------------------------------------------------------------
|
39
|
-
*/
|
40
|
-
|
41
|
-
/* This module contains the external function pcre_dfa_exec(), which is an
|
42
|
-
alternative matching function that uses a sort of DFA algorithm (not a true
|
43
|
-
FSM). This is NOT Perl-compatible, but it has advantages in certain
|
44
|
-
applications. */
|
45
|
-
|
46
|
-
|
47
|
-
/* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
|
48
|
-
the performance of his patterns greatly. I could not use it as it stood, as it
|
49
|
-
was not thread safe, and made assumptions about pattern sizes. Also, it caused
|
50
|
-
test 7 to loop, and test 9 to crash with a segfault.
|
51
|
-
|
52
|
-
The issue is the check for duplicate states, which is done by a simple linear
|
53
|
-
search up the state list. (Grep for "duplicate" below to find the code.) For
|
54
|
-
many patterns, there will never be many states active at one time, so a simple
|
55
|
-
linear search is fine. In patterns that have many active states, it might be a
|
56
|
-
bottleneck. The suggested code used an indexing scheme to remember which states
|
57
|
-
had previously been used for each character, and avoided the linear search when
|
58
|
-
it knew there was no chance of a duplicate. This was implemented when adding
|
59
|
-
states to the state lists.
|
60
|
-
|
61
|
-
I wrote some thread-safe, not-limited code to try something similar at the time
|
62
|
-
of checking for duplicates (instead of when adding states), using index vectors
|
63
|
-
on the stack. It did give a 13% improvement with one specially constructed
|
64
|
-
pattern for certain subject strings, but on other strings and on many of the
|
65
|
-
simpler patterns in the test suite it did worse. The major problem, I think,
|
66
|
-
was the extra time to initialize the index. This had to be done for each call
|
67
|
-
of internal_dfa_exec(). (The supplied patch used a static vector, initialized
|
68
|
-
only once - I suspect this was the cause of the problems with the tests.)
|
69
|
-
|
70
|
-
Overall, I concluded that the gains in some cases did not outweigh the losses
|
71
|
-
in others, so I abandoned this code. */
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
#ifdef HAVE_CONFIG_H
|
76
|
-
#include "config.h"
|
77
|
-
#endif
|
78
|
-
|
79
|
-
#define NLBLOCK md /* Block containing newline information */
|
80
|
-
#define PSSTART start_subject /* Field containing processed string start */
|
81
|
-
#define PSEND end_subject /* Field containing processed string end */
|
82
|
-
|
83
|
-
#include "pcre_internal.h"
|
84
|
-
|
85
|
-
|
86
|
-
/* For use to indent debugging output */
|
87
|
-
|
88
|
-
#define SP " "
|
89
|
-
|
90
|
-
|
91
|
-
/*************************************************
|
92
|
-
* Code parameters and static tables *
|
93
|
-
*************************************************/
|
94
|
-
|
95
|
-
/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
|
96
|
-
into others, under special conditions. A gap of 20 between the blocks should be
|
97
|
-
enough. The resulting opcodes don't have to be less than 256 because they are
|
98
|
-
never stored, so we push them well clear of the normal opcodes. */
|
99
|
-
|
100
|
-
#define OP_PROP_EXTRA 300
|
101
|
-
#define OP_EXTUNI_EXTRA 320
|
102
|
-
#define OP_ANYNL_EXTRA 340
|
103
|
-
#define OP_HSPACE_EXTRA 360
|
104
|
-
#define OP_VSPACE_EXTRA 380
|
105
|
-
|
106
|
-
|
107
|
-
/* This table identifies those opcodes that are followed immediately by a
|
108
|
-
character that is to be tested in some way. This makes it possible to
|
109
|
-
centralize the loading of these characters. In the case of Type * etc, the
|
110
|
-
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
|
111
|
-
small value. Non-zero values in the table are the offsets from the opcode where
|
112
|
-
the character is to be found. ***NOTE*** If the start of this table is
|
113
|
-
modified, the three tables that follow must also be modified. */
|
114
|
-
|
115
|
-
static const pcre_uint8 coptable[] = {
|
116
|
-
0, /* End */
|
117
|
-
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
|
118
|
-
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
|
119
|
-
0, 0, 0, /* Any, AllAny, Anybyte */
|
120
|
-
0, 0, /* \P, \p */
|
121
|
-
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
122
|
-
0, /* \X */
|
123
|
-
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
|
124
|
-
1, /* Char */
|
125
|
-
1, /* Chari */
|
126
|
-
1, /* not */
|
127
|
-
1, /* noti */
|
128
|
-
/* Positive single-char repeats */
|
129
|
-
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
130
|
-
1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */
|
131
|
-
1+IMM2_SIZE, /* exact */
|
132
|
-
1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */
|
133
|
-
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
|
134
|
-
1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */
|
135
|
-
1+IMM2_SIZE, /* exact I */
|
136
|
-
1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */
|
137
|
-
/* Negative single-char repeats - only for chars < 256 */
|
138
|
-
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
|
139
|
-
1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */
|
140
|
-
1+IMM2_SIZE, /* NOT exact */
|
141
|
-
1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */
|
142
|
-
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
|
143
|
-
1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */
|
144
|
-
1+IMM2_SIZE, /* NOT exact I */
|
145
|
-
1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */
|
146
|
-
/* Positive type repeats */
|
147
|
-
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
|
148
|
-
1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */
|
149
|
-
1+IMM2_SIZE, /* Type exact */
|
150
|
-
1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */
|
151
|
-
/* Character class & ref repeats */
|
152
|
-
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
|
153
|
-
0, 0, /* CRRANGE, CRMINRANGE */
|
154
|
-
0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
|
155
|
-
0, /* CLASS */
|
156
|
-
0, /* NCLASS */
|
157
|
-
0, /* XCLASS - variable length */
|
158
|
-
0, /* REF */
|
159
|
-
0, /* REFI */
|
160
|
-
0, /* DNREF */
|
161
|
-
0, /* DNREFI */
|
162
|
-
0, /* RECURSE */
|
163
|
-
0, /* CALLOUT */
|
164
|
-
0, /* Alt */
|
165
|
-
0, /* Ket */
|
166
|
-
0, /* KetRmax */
|
167
|
-
0, /* KetRmin */
|
168
|
-
0, /* KetRpos */
|
169
|
-
0, /* Reverse */
|
170
|
-
0, /* Assert */
|
171
|
-
0, /* Assert not */
|
172
|
-
0, /* Assert behind */
|
173
|
-
0, /* Assert behind not */
|
174
|
-
0, 0, /* ONCE, ONCE_NC */
|
175
|
-
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
176
|
-
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
177
|
-
0, 0, /* CREF, DNCREF */
|
178
|
-
0, 0, /* RREF, DNRREF */
|
179
|
-
0, /* DEF */
|
180
|
-
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
181
|
-
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
182
|
-
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
183
|
-
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
|
184
|
-
0, 0 /* CLOSE, SKIPZERO */
|
185
|
-
};
|
186
|
-
|
187
|
-
/* This table identifies those opcodes that inspect a character. It is used to
|
188
|
-
remember the fact that a character could have been inspected when the end of
|
189
|
-
the subject is reached. ***NOTE*** If the start of this table is modified, the
|
190
|
-
two tables that follow must also be modified. */
|
191
|
-
|
192
|
-
static const pcre_uint8 poptable[] = {
|
193
|
-
0, /* End */
|
194
|
-
0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */
|
195
|
-
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
|
196
|
-
1, 1, 1, /* Any, AllAny, Anybyte */
|
197
|
-
1, 1, /* \P, \p */
|
198
|
-
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
|
199
|
-
1, /* \X */
|
200
|
-
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
|
201
|
-
1, /* Char */
|
202
|
-
1, /* Chari */
|
203
|
-
1, /* not */
|
204
|
-
1, /* noti */
|
205
|
-
/* Positive single-char repeats */
|
206
|
-
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
207
|
-
1, 1, 1, /* upto, minupto, exact */
|
208
|
-
1, 1, 1, 1, /* *+, ++, ?+, upto+ */
|
209
|
-
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
|
210
|
-
1, 1, 1, /* upto I, minupto I, exact I */
|
211
|
-
1, 1, 1, 1, /* *+I, ++I, ?+I, upto+I */
|
212
|
-
/* Negative single-char repeats - only for chars < 256 */
|
213
|
-
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
|
214
|
-
1, 1, 1, /* NOT upto, minupto, exact */
|
215
|
-
1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */
|
216
|
-
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
|
217
|
-
1, 1, 1, /* NOT upto I, minupto I, exact I */
|
218
|
-
1, 1, 1, 1, /* NOT *+I, ++I, ?+I, upto+I */
|
219
|
-
/* Positive type repeats */
|
220
|
-
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
|
221
|
-
1, 1, 1, /* Type upto, minupto, exact */
|
222
|
-
1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */
|
223
|
-
/* Character class & ref repeats */
|
224
|
-
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
225
|
-
1, 1, /* CRRANGE, CRMINRANGE */
|
226
|
-
1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
|
227
|
-
1, /* CLASS */
|
228
|
-
1, /* NCLASS */
|
229
|
-
1, /* XCLASS - variable length */
|
230
|
-
0, /* REF */
|
231
|
-
0, /* REFI */
|
232
|
-
0, /* DNREF */
|
233
|
-
0, /* DNREFI */
|
234
|
-
0, /* RECURSE */
|
235
|
-
0, /* CALLOUT */
|
236
|
-
0, /* Alt */
|
237
|
-
0, /* Ket */
|
238
|
-
0, /* KetRmax */
|
239
|
-
0, /* KetRmin */
|
240
|
-
0, /* KetRpos */
|
241
|
-
0, /* Reverse */
|
242
|
-
0, /* Assert */
|
243
|
-
0, /* Assert not */
|
244
|
-
0, /* Assert behind */
|
245
|
-
0, /* Assert behind not */
|
246
|
-
0, 0, /* ONCE, ONCE_NC */
|
247
|
-
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
248
|
-
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
249
|
-
0, 0, /* CREF, DNCREF */
|
250
|
-
0, 0, /* RREF, DNRREF */
|
251
|
-
0, /* DEF */
|
252
|
-
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
253
|
-
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
254
|
-
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
255
|
-
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
|
256
|
-
0, 0 /* CLOSE, SKIPZERO */
|
257
|
-
};
|
258
|
-
|
259
|
-
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
|
260
|
-
and \w */
|
261
|
-
|
262
|
-
static const pcre_uint8 toptable1[] = {
|
263
|
-
0, 0, 0, 0, 0, 0,
|
264
|
-
ctype_digit, ctype_digit,
|
265
|
-
ctype_space, ctype_space,
|
266
|
-
ctype_word, ctype_word,
|
267
|
-
0, 0 /* OP_ANY, OP_ALLANY */
|
268
|
-
};
|
269
|
-
|
270
|
-
static const pcre_uint8 toptable2[] = {
|
271
|
-
0, 0, 0, 0, 0, 0,
|
272
|
-
ctype_digit, 0,
|
273
|
-
ctype_space, 0,
|
274
|
-
ctype_word, 0,
|
275
|
-
1, 1 /* OP_ANY, OP_ALLANY */
|
276
|
-
};
|
277
|
-
|
278
|
-
|
279
|
-
/* Structure for holding data about a particular state, which is in effect the
|
280
|
-
current data for an active path through the match tree. It must consist
|
281
|
-
entirely of ints because the working vector we are passed, and which we put
|
282
|
-
these structures in, is a vector of ints. */
|
283
|
-
|
284
|
-
typedef struct stateblock {
|
285
|
-
int offset; /* Offset to opcode */
|
286
|
-
int count; /* Count for repeats */
|
287
|
-
int data; /* Some use extra data */
|
288
|
-
} stateblock;
|
289
|
-
|
290
|
-
#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
|
291
|
-
|
292
|
-
|
293
|
-
#ifdef PCRE_DEBUG
|
294
|
-
/*************************************************
|
295
|
-
* Print character string *
|
296
|
-
*************************************************/
|
297
|
-
|
298
|
-
/* Character string printing function for debugging.
|
299
|
-
|
300
|
-
Arguments:
|
301
|
-
p points to string
|
302
|
-
length number of bytes
|
303
|
-
f where to print
|
304
|
-
|
305
|
-
Returns: nothing
|
306
|
-
*/
|
307
|
-
|
308
|
-
static void
|
309
|
-
pchars(const pcre_uchar *p, int length, FILE *f)
|
310
|
-
{
|
311
|
-
pcre_uint32 c;
|
312
|
-
while (length-- > 0)
|
313
|
-
{
|
314
|
-
if (isprint(c = *(p++)))
|
315
|
-
fprintf(f, "%c", c);
|
316
|
-
else
|
317
|
-
fprintf(f, "\\x{%02x}", c);
|
318
|
-
}
|
319
|
-
}
|
320
|
-
#endif
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
/*************************************************
|
325
|
-
* Execute a Regular Expression - DFA engine *
|
326
|
-
*************************************************/
|
327
|
-
|
328
|
-
/* This internal function applies a compiled pattern to a subject string,
|
329
|
-
starting at a given point, using a DFA engine. This function is called from the
|
330
|
-
external one, possibly multiple times if the pattern is not anchored. The
|
331
|
-
function calls itself recursively for some kinds of subpattern.
|
332
|
-
|
333
|
-
Arguments:
|
334
|
-
md the match_data block with fixed information
|
335
|
-
this_start_code the opening bracket of this subexpression's code
|
336
|
-
current_subject where we currently are in the subject string
|
337
|
-
start_offset start offset in the subject string
|
338
|
-
offsets vector to contain the matching string offsets
|
339
|
-
offsetcount size of same
|
340
|
-
workspace vector of workspace
|
341
|
-
wscount size of same
|
342
|
-
rlevel function call recursion level
|
343
|
-
|
344
|
-
Returns: > 0 => number of match offset pairs placed in offsets
|
345
|
-
= 0 => offsets overflowed; longest matches are present
|
346
|
-
-1 => failed to match
|
347
|
-
< -1 => some kind of unexpected problem
|
348
|
-
|
349
|
-
The following macros are used for adding states to the two state vectors (one
|
350
|
-
for the current character, one for the following character). */
|
351
|
-
|
352
|
-
#define ADD_ACTIVE(x,y) \
|
353
|
-
if (active_count++ < wscount) \
|
354
|
-
{ \
|
355
|
-
next_active_state->offset = (x); \
|
356
|
-
next_active_state->count = (y); \
|
357
|
-
next_active_state++; \
|
358
|
-
DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
|
359
|
-
} \
|
360
|
-
else return PCRE_ERROR_DFA_WSSIZE
|
361
|
-
|
362
|
-
#define ADD_ACTIVE_DATA(x,y,z) \
|
363
|
-
if (active_count++ < wscount) \
|
364
|
-
{ \
|
365
|
-
next_active_state->offset = (x); \
|
366
|
-
next_active_state->count = (y); \
|
367
|
-
next_active_state->data = (z); \
|
368
|
-
next_active_state++; \
|
369
|
-
DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
|
370
|
-
} \
|
371
|
-
else return PCRE_ERROR_DFA_WSSIZE
|
372
|
-
|
373
|
-
#define ADD_NEW(x,y) \
|
374
|
-
if (new_count++ < wscount) \
|
375
|
-
{ \
|
376
|
-
next_new_state->offset = (x); \
|
377
|
-
next_new_state->count = (y); \
|
378
|
-
next_new_state++; \
|
379
|
-
DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
|
380
|
-
} \
|
381
|
-
else return PCRE_ERROR_DFA_WSSIZE
|
382
|
-
|
383
|
-
#define ADD_NEW_DATA(x,y,z) \
|
384
|
-
if (new_count++ < wscount) \
|
385
|
-
{ \
|
386
|
-
next_new_state->offset = (x); \
|
387
|
-
next_new_state->count = (y); \
|
388
|
-
next_new_state->data = (z); \
|
389
|
-
next_new_state++; \
|
390
|
-
DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
|
391
|
-
(x), (y), (z), __LINE__)); \
|
392
|
-
} \
|
393
|
-
else return PCRE_ERROR_DFA_WSSIZE
|
394
|
-
|
395
|
-
/* And now, here is the code */
|
396
|
-
|
397
|
-
static int
|
398
|
-
internal_dfa_exec(
|
399
|
-
dfa_match_data *md,
|
400
|
-
const pcre_uchar *this_start_code,
|
401
|
-
const pcre_uchar *current_subject,
|
402
|
-
int start_offset,
|
403
|
-
int *offsets,
|
404
|
-
int offsetcount,
|
405
|
-
int *workspace,
|
406
|
-
int wscount,
|
407
|
-
int rlevel)
|
408
|
-
{
|
409
|
-
stateblock *active_states, *new_states, *temp_states;
|
410
|
-
stateblock *next_active_state, *next_new_state;
|
411
|
-
|
412
|
-
const pcre_uint8 *ctypes, *lcc, *fcc;
|
413
|
-
const pcre_uchar *ptr;
|
414
|
-
const pcre_uchar *end_code, *first_op;
|
415
|
-
|
416
|
-
dfa_recursion_info new_recursive;
|
417
|
-
|
418
|
-
int active_count, new_count, match_count;
|
419
|
-
|
420
|
-
/* Some fields in the md block are frequently referenced, so we load them into
|
421
|
-
independent variables in the hope that this will perform better. */
|
422
|
-
|
423
|
-
const pcre_uchar *start_subject = md->start_subject;
|
424
|
-
const pcre_uchar *end_subject = md->end_subject;
|
425
|
-
const pcre_uchar *start_code = md->start_code;
|
426
|
-
|
427
|
-
#ifdef SUPPORT_UTF
|
428
|
-
BOOL utf = (md->poptions & PCRE_UTF8) != 0;
|
429
|
-
#else
|
430
|
-
BOOL utf = FALSE;
|
431
|
-
#endif
|
432
|
-
|
433
|
-
BOOL reset_could_continue = FALSE;
|
434
|
-
|
435
|
-
rlevel++;
|
436
|
-
offsetcount &= (-2);
|
437
|
-
|
438
|
-
wscount -= 2;
|
439
|
-
wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
|
440
|
-
(2 * INTS_PER_STATEBLOCK);
|
441
|
-
|
442
|
-
DPRINTF(("\n%.*s---------------------\n"
|
443
|
-
"%.*sCall to internal_dfa_exec f=%d\n",
|
444
|
-
rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
|
445
|
-
|
446
|
-
ctypes = md->tables + ctypes_offset;
|
447
|
-
lcc = md->tables + lcc_offset;
|
448
|
-
fcc = md->tables + fcc_offset;
|
449
|
-
|
450
|
-
match_count = PCRE_ERROR_NOMATCH; /* A negative number */
|
451
|
-
|
452
|
-
active_states = (stateblock *)(workspace + 2);
|
453
|
-
next_new_state = new_states = active_states + wscount;
|
454
|
-
new_count = 0;
|
455
|
-
|
456
|
-
first_op = this_start_code + 1 + LINK_SIZE +
|
457
|
-
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
|
458
|
-
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
|
459
|
-
? IMM2_SIZE:0);
|
460
|
-
|
461
|
-
/* The first thing in any (sub) pattern is a bracket of some sort. Push all
|
462
|
-
the alternative states onto the list, and find out where the end is. This
|
463
|
-
makes is possible to use this function recursively, when we want to stop at a
|
464
|
-
matching internal ket rather than at the end.
|
465
|
-
|
466
|
-
If the first opcode in the first alternative is OP_REVERSE, we are dealing with
|
467
|
-
a backward assertion. In that case, we have to find out the maximum amount to
|
468
|
-
move back, and set up each alternative appropriately. */
|
469
|
-
|
470
|
-
if (*first_op == OP_REVERSE)
|
471
|
-
{
|
472
|
-
int max_back = 0;
|
473
|
-
int gone_back;
|
474
|
-
|
475
|
-
end_code = this_start_code;
|
476
|
-
do
|
477
|
-
{
|
478
|
-
int back = GET(end_code, 2+LINK_SIZE);
|
479
|
-
if (back > max_back) max_back = back;
|
480
|
-
end_code += GET(end_code, 1);
|
481
|
-
}
|
482
|
-
while (*end_code == OP_ALT);
|
483
|
-
|
484
|
-
/* If we can't go back the amount required for the longest lookbehind
|
485
|
-
pattern, go back as far as we can; some alternatives may still be viable. */
|
486
|
-
|
487
|
-
#ifdef SUPPORT_UTF
|
488
|
-
/* In character mode we have to step back character by character */
|
489
|
-
|
490
|
-
if (utf)
|
491
|
-
{
|
492
|
-
for (gone_back = 0; gone_back < max_back; gone_back++)
|
493
|
-
{
|
494
|
-
if (current_subject <= start_subject) break;
|
495
|
-
current_subject--;
|
496
|
-
ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
|
497
|
-
}
|
498
|
-
}
|
499
|
-
else
|
500
|
-
#endif
|
501
|
-
|
502
|
-
/* In byte-mode we can do this quickly. */
|
503
|
-
|
504
|
-
{
|
505
|
-
gone_back = (current_subject - max_back < start_subject)?
|
506
|
-
(int)(current_subject - start_subject) : max_back;
|
507
|
-
current_subject -= gone_back;
|
508
|
-
}
|
509
|
-
|
510
|
-
/* Save the earliest consulted character */
|
511
|
-
|
512
|
-
if (current_subject < md->start_used_ptr)
|
513
|
-
md->start_used_ptr = current_subject;
|
514
|
-
|
515
|
-
/* Now we can process the individual branches. */
|
516
|
-
|
517
|
-
end_code = this_start_code;
|
518
|
-
do
|
519
|
-
{
|
520
|
-
int back = GET(end_code, 2+LINK_SIZE);
|
521
|
-
if (back <= gone_back)
|
522
|
-
{
|
523
|
-
int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
|
524
|
-
ADD_NEW_DATA(-bstate, 0, gone_back - back);
|
525
|
-
}
|
526
|
-
end_code += GET(end_code, 1);
|
527
|
-
}
|
528
|
-
while (*end_code == OP_ALT);
|
529
|
-
}
|
530
|
-
|
531
|
-
/* This is the code for a "normal" subpattern (not a backward assertion). The
|
532
|
-
start of a whole pattern is always one of these. If we are at the top level,
|
533
|
-
we may be asked to restart matching from the same point that we reached for a
|
534
|
-
previous partial match. We still have to scan through the top-level branches to
|
535
|
-
find the end state. */
|
536
|
-
|
537
|
-
else
|
538
|
-
{
|
539
|
-
end_code = this_start_code;
|
540
|
-
|
541
|
-
/* Restarting */
|
542
|
-
|
543
|
-
if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
|
544
|
-
{
|
545
|
-
do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
|
546
|
-
new_count = workspace[1];
|
547
|
-
if (!workspace[0])
|
548
|
-
memcpy(new_states, active_states, new_count * sizeof(stateblock));
|
549
|
-
}
|
550
|
-
|
551
|
-
/* Not restarting */
|
552
|
-
|
553
|
-
else
|
554
|
-
{
|
555
|
-
int length = 1 + LINK_SIZE +
|
556
|
-
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
|
557
|
-
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
|
558
|
-
? IMM2_SIZE:0);
|
559
|
-
do
|
560
|
-
{
|
561
|
-
ADD_NEW((int)(end_code - start_code + length), 0);
|
562
|
-
end_code += GET(end_code, 1);
|
563
|
-
length = 1 + LINK_SIZE;
|
564
|
-
}
|
565
|
-
while (*end_code == OP_ALT);
|
566
|
-
}
|
567
|
-
}
|
568
|
-
|
569
|
-
workspace[0] = 0; /* Bit indicating which vector is current */
|
570
|
-
|
571
|
-
DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
|
572
|
-
|
573
|
-
/* Loop for scanning the subject */
|
574
|
-
|
575
|
-
ptr = current_subject;
|
576
|
-
for (;;)
|
577
|
-
{
|
578
|
-
int i, j;
|
579
|
-
int clen, dlen;
|
580
|
-
pcre_uint32 c, d;
|
581
|
-
int forced_fail = 0;
|
582
|
-
BOOL partial_newline = FALSE;
|
583
|
-
BOOL could_continue = reset_could_continue;
|
584
|
-
reset_could_continue = FALSE;
|
585
|
-
|
586
|
-
/* Make the new state list into the active state list and empty the
|
587
|
-
new state list. */
|
588
|
-
|
589
|
-
temp_states = active_states;
|
590
|
-
active_states = new_states;
|
591
|
-
new_states = temp_states;
|
592
|
-
active_count = new_count;
|
593
|
-
new_count = 0;
|
594
|
-
|
595
|
-
workspace[0] ^= 1; /* Remember for the restarting feature */
|
596
|
-
workspace[1] = active_count;
|
597
|
-
|
598
|
-
#ifdef PCRE_DEBUG
|
599
|
-
printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
|
600
|
-
pchars(ptr, STRLEN_UC(ptr), stdout);
|
601
|
-
printf("\"\n");
|
602
|
-
|
603
|
-
printf("%.*sActive states: ", rlevel*2-2, SP);
|
604
|
-
for (i = 0; i < active_count; i++)
|
605
|
-
printf("%d/%d ", active_states[i].offset, active_states[i].count);
|
606
|
-
printf("\n");
|
607
|
-
#endif
|
608
|
-
|
609
|
-
/* Set the pointers for adding new states */
|
610
|
-
|
611
|
-
next_active_state = active_states + active_count;
|
612
|
-
next_new_state = new_states;
|
613
|
-
|
614
|
-
/* Load the current character from the subject outside the loop, as many
|
615
|
-
different states may want to look at it, and we assume that at least one
|
616
|
-
will. */
|
617
|
-
|
618
|
-
if (ptr < end_subject)
|
619
|
-
{
|
620
|
-
clen = 1; /* Number of data items in the character */
|
621
|
-
#ifdef SUPPORT_UTF
|
622
|
-
GETCHARLENTEST(c, ptr, clen);
|
623
|
-
#else
|
624
|
-
c = *ptr;
|
625
|
-
#endif /* SUPPORT_UTF */
|
626
|
-
}
|
627
|
-
else
|
628
|
-
{
|
629
|
-
clen = 0; /* This indicates the end of the subject */
|
630
|
-
c = NOTACHAR; /* This value should never actually be used */
|
631
|
-
}
|
632
|
-
|
633
|
-
/* Scan up the active states and act on each one. The result of an action
|
634
|
-
may be to add more states to the currently active list (e.g. on hitting a
|
635
|
-
parenthesis) or it may be to put states on the new list, for considering
|
636
|
-
when we move the character pointer on. */
|
637
|
-
|
638
|
-
for (i = 0; i < active_count; i++)
|
639
|
-
{
|
640
|
-
stateblock *current_state = active_states + i;
|
641
|
-
BOOL caseless = FALSE;
|
642
|
-
const pcre_uchar *code;
|
643
|
-
int state_offset = current_state->offset;
|
644
|
-
int codevalue, rrc;
|
645
|
-
int count;
|
646
|
-
|
647
|
-
#ifdef PCRE_DEBUG
|
648
|
-
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
|
649
|
-
if (clen == 0) printf("EOL\n");
|
650
|
-
else if (c > 32 && c < 127) printf("'%c'\n", c);
|
651
|
-
else printf("0x%02x\n", c);
|
652
|
-
#endif
|
653
|
-
|
654
|
-
/* A negative offset is a special case meaning "hold off going to this
|
655
|
-
(negated) state until the number of characters in the data field have
|
656
|
-
been skipped". If the could_continue flag was passed over from a previous
|
657
|
-
state, arrange for it to passed on. */
|
658
|
-
|
659
|
-
if (state_offset < 0)
|
660
|
-
{
|
661
|
-
if (current_state->data > 0)
|
662
|
-
{
|
663
|
-
DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
|
664
|
-
ADD_NEW_DATA(state_offset, current_state->count,
|
665
|
-
current_state->data - 1);
|
666
|
-
if (could_continue) reset_could_continue = TRUE;
|
667
|
-
continue;
|
668
|
-
}
|
669
|
-
else
|
670
|
-
{
|
671
|
-
current_state->offset = state_offset = -state_offset;
|
672
|
-
}
|
673
|
-
}
|
674
|
-
|
675
|
-
/* Check for a duplicate state with the same count, and skip if found.
|
676
|
-
See the note at the head of this module about the possibility of improving
|
677
|
-
performance here. */
|
678
|
-
|
679
|
-
for (j = 0; j < i; j++)
|
680
|
-
{
|
681
|
-
if (active_states[j].offset == state_offset &&
|
682
|
-
active_states[j].count == current_state->count)
|
683
|
-
{
|
684
|
-
DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
|
685
|
-
goto NEXT_ACTIVE_STATE;
|
686
|
-
}
|
687
|
-
}
|
688
|
-
|
689
|
-
/* The state offset is the offset to the opcode */
|
690
|
-
|
691
|
-
code = start_code + state_offset;
|
692
|
-
codevalue = *code;
|
693
|
-
|
694
|
-
/* If this opcode inspects a character, but we are at the end of the
|
695
|
-
subject, remember the fact for use when testing for a partial match. */
|
696
|
-
|
697
|
-
if (clen == 0 && poptable[codevalue] != 0)
|
698
|
-
could_continue = TRUE;
|
699
|
-
|
700
|
-
/* If this opcode is followed by an inline character, load it. It is
|
701
|
-
tempting to test for the presence of a subject character here, but that
|
702
|
-
is wrong, because sometimes zero repetitions of the subject are
|
703
|
-
permitted.
|
704
|
-
|
705
|
-
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
|
706
|
-
argument that is not a data character - but is always one byte long because
|
707
|
-
the values are small. We have to take special action to deal with \P, \p,
|
708
|
-
\H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
|
709
|
-
these ones to new opcodes. */
|
710
|
-
|
711
|
-
if (coptable[codevalue] > 0)
|
712
|
-
{
|
713
|
-
dlen = 1;
|
714
|
-
#ifdef SUPPORT_UTF
|
715
|
-
if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
|
716
|
-
#endif /* SUPPORT_UTF */
|
717
|
-
d = code[coptable[codevalue]];
|
718
|
-
if (codevalue >= OP_TYPESTAR)
|
719
|
-
{
|
720
|
-
switch(d)
|
721
|
-
{
|
722
|
-
case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
|
723
|
-
case OP_NOTPROP:
|
724
|
-
case OP_PROP: codevalue += OP_PROP_EXTRA; break;
|
725
|
-
case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
|
726
|
-
case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
|
727
|
-
case OP_NOT_HSPACE:
|
728
|
-
case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
|
729
|
-
case OP_NOT_VSPACE:
|
730
|
-
case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
|
731
|
-
default: break;
|
732
|
-
}
|
733
|
-
}
|
734
|
-
}
|
735
|
-
else
|
736
|
-
{
|
737
|
-
dlen = 0; /* Not strictly necessary, but compilers moan */
|
738
|
-
d = NOTACHAR; /* if these variables are not set. */
|
739
|
-
}
|
740
|
-
|
741
|
-
|
742
|
-
/* Now process the individual opcodes */
|
743
|
-
|
744
|
-
switch (codevalue)
|
745
|
-
{
|
746
|
-
/* ========================================================================== */
|
747
|
-
/* These cases are never obeyed. This is a fudge that causes a compile-
|
748
|
-
time error if the vectors coptable or poptable, which are indexed by
|
749
|
-
opcode, are not the correct length. It seems to be the only way to do
|
750
|
-
such a check at compile time, as the sizeof() operator does not work
|
751
|
-
in the C preprocessor. */
|
752
|
-
|
753
|
-
case OP_TABLE_LENGTH:
|
754
|
-
case OP_TABLE_LENGTH +
|
755
|
-
((sizeof(coptable) == OP_TABLE_LENGTH) &&
|
756
|
-
(sizeof(poptable) == OP_TABLE_LENGTH)):
|
757
|
-
break;
|
758
|
-
|
759
|
-
/* ========================================================================== */
|
760
|
-
/* Reached a closing bracket. If not at the end of the pattern, carry
|
761
|
-
on with the next opcode. For repeating opcodes, also add the repeat
|
762
|
-
state. Note that KETRPOS will always be encountered at the end of the
|
763
|
-
subpattern, because the possessive subpattern repeats are always handled
|
764
|
-
using recursive calls. Thus, it never adds any new states.
|
765
|
-
|
766
|
-
At the end of the (sub)pattern, unless we have an empty string and
|
767
|
-
PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
|
768
|
-
start of the subject, save the match data, shifting up all previous
|
769
|
-
matches so we always have the longest first. */
|
770
|
-
|
771
|
-
case OP_KET:
|
772
|
-
case OP_KETRMIN:
|
773
|
-
case OP_KETRMAX:
|
774
|
-
case OP_KETRPOS:
|
775
|
-
if (code != end_code)
|
776
|
-
{
|
777
|
-
ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
|
778
|
-
if (codevalue != OP_KET)
|
779
|
-
{
|
780
|
-
ADD_ACTIVE(state_offset - GET(code, 1), 0);
|
781
|
-
}
|
782
|
-
}
|
783
|
-
else
|
784
|
-
{
|
785
|
-
if (ptr > current_subject ||
|
786
|
-
((md->moptions & PCRE_NOTEMPTY) == 0 &&
|
787
|
-
((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
|
788
|
-
current_subject > start_subject + md->start_offset)))
|
789
|
-
{
|
790
|
-
if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
|
791
|
-
else if (match_count > 0 && ++match_count * 2 > offsetcount)
|
792
|
-
match_count = 0;
|
793
|
-
count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
|
794
|
-
if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
|
795
|
-
if (offsetcount >= 2)
|
796
|
-
{
|
797
|
-
offsets[0] = (int)(current_subject - start_subject);
|
798
|
-
offsets[1] = (int)(ptr - start_subject);
|
799
|
-
DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
|
800
|
-
offsets[1] - offsets[0], (char *)current_subject));
|
801
|
-
}
|
802
|
-
if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
|
803
|
-
{
|
804
|
-
DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
|
805
|
-
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
|
806
|
-
match_count, rlevel*2-2, SP));
|
807
|
-
return match_count;
|
808
|
-
}
|
809
|
-
}
|
810
|
-
}
|
811
|
-
break;
|
812
|
-
|
813
|
-
/* ========================================================================== */
|
814
|
-
/* These opcodes add to the current list of states without looking
|
815
|
-
at the current character. */
|
816
|
-
|
817
|
-
/*-----------------------------------------------------------------*/
|
818
|
-
case OP_ALT:
|
819
|
-
do { code += GET(code, 1); } while (*code == OP_ALT);
|
820
|
-
ADD_ACTIVE((int)(code - start_code), 0);
|
821
|
-
break;
|
822
|
-
|
823
|
-
/*-----------------------------------------------------------------*/
|
824
|
-
case OP_BRA:
|
825
|
-
case OP_SBRA:
|
826
|
-
do
|
827
|
-
{
|
828
|
-
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
829
|
-
code += GET(code, 1);
|
830
|
-
}
|
831
|
-
while (*code == OP_ALT);
|
832
|
-
break;
|
833
|
-
|
834
|
-
/*-----------------------------------------------------------------*/
|
835
|
-
case OP_CBRA:
|
836
|
-
case OP_SCBRA:
|
837
|
-
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0);
|
838
|
-
code += GET(code, 1);
|
839
|
-
while (*code == OP_ALT)
|
840
|
-
{
|
841
|
-
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
842
|
-
code += GET(code, 1);
|
843
|
-
}
|
844
|
-
break;
|
845
|
-
|
846
|
-
/*-----------------------------------------------------------------*/
|
847
|
-
case OP_BRAZERO:
|
848
|
-
case OP_BRAMINZERO:
|
849
|
-
ADD_ACTIVE(state_offset + 1, 0);
|
850
|
-
code += 1 + GET(code, 2);
|
851
|
-
while (*code == OP_ALT) code += GET(code, 1);
|
852
|
-
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
853
|
-
break;
|
854
|
-
|
855
|
-
/*-----------------------------------------------------------------*/
|
856
|
-
case OP_SKIPZERO:
|
857
|
-
code += 1 + GET(code, 2);
|
858
|
-
while (*code == OP_ALT) code += GET(code, 1);
|
859
|
-
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
860
|
-
break;
|
861
|
-
|
862
|
-
/*-----------------------------------------------------------------*/
|
863
|
-
case OP_CIRC:
|
864
|
-
if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
|
865
|
-
{ ADD_ACTIVE(state_offset + 1, 0); }
|
866
|
-
break;
|
867
|
-
|
868
|
-
/*-----------------------------------------------------------------*/
|
869
|
-
case OP_CIRCM:
|
870
|
-
if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
|
871
|
-
(ptr != end_subject && WAS_NEWLINE(ptr)))
|
872
|
-
{ ADD_ACTIVE(state_offset + 1, 0); }
|
873
|
-
break;
|
874
|
-
|
875
|
-
/*-----------------------------------------------------------------*/
|
876
|
-
case OP_EOD:
|
877
|
-
if (ptr >= end_subject)
|
878
|
-
{
|
879
|
-
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
|
880
|
-
could_continue = TRUE;
|
881
|
-
else { ADD_ACTIVE(state_offset + 1, 0); }
|
882
|
-
}
|
883
|
-
break;
|
884
|
-
|
885
|
-
/*-----------------------------------------------------------------*/
|
886
|
-
case OP_SOD:
|
887
|
-
if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
|
888
|
-
break;
|
889
|
-
|
890
|
-
/*-----------------------------------------------------------------*/
|
891
|
-
case OP_SOM:
|
892
|
-
if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
|
893
|
-
break;
|
894
|
-
|
895
|
-
|
896
|
-
/* ========================================================================== */
|
897
|
-
/* These opcodes inspect the next subject character, and sometimes
|
898
|
-
the previous one as well, but do not have an argument. The variable
|
899
|
-
clen contains the length of the current character and is zero if we are
|
900
|
-
at the end of the subject. */
|
901
|
-
|
902
|
-
/*-----------------------------------------------------------------*/
|
903
|
-
case OP_ANY:
|
904
|
-
if (clen > 0 && !IS_NEWLINE(ptr))
|
905
|
-
{
|
906
|
-
if (ptr + 1 >= md->end_subject &&
|
907
|
-
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
|
908
|
-
NLBLOCK->nltype == NLTYPE_FIXED &&
|
909
|
-
NLBLOCK->nllen == 2 &&
|
910
|
-
c == NLBLOCK->nl[0])
|
911
|
-
{
|
912
|
-
could_continue = partial_newline = TRUE;
|
913
|
-
}
|
914
|
-
else
|
915
|
-
{
|
916
|
-
ADD_NEW(state_offset + 1, 0);
|
917
|
-
}
|
918
|
-
}
|
919
|
-
break;
|
920
|
-
|
921
|
-
/*-----------------------------------------------------------------*/
|
922
|
-
case OP_ALLANY:
|
923
|
-
if (clen > 0)
|
924
|
-
{ ADD_NEW(state_offset + 1, 0); }
|
925
|
-
break;
|
926
|
-
|
927
|
-
/*-----------------------------------------------------------------*/
|
928
|
-
case OP_EODN:
|
929
|
-
if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
|
930
|
-
could_continue = TRUE;
|
931
|
-
else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
|
932
|
-
{ ADD_ACTIVE(state_offset + 1, 0); }
|
933
|
-
break;
|
934
|
-
|
935
|
-
/*-----------------------------------------------------------------*/
|
936
|
-
case OP_DOLL:
|
937
|
-
if ((md->moptions & PCRE_NOTEOL) == 0)
|
938
|
-
{
|
939
|
-
if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
|
940
|
-
could_continue = TRUE;
|
941
|
-
else if (clen == 0 ||
|
942
|
-
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
|
943
|
-
(ptr == end_subject - md->nllen)
|
944
|
-
))
|
945
|
-
{ ADD_ACTIVE(state_offset + 1, 0); }
|
946
|
-
else if (ptr + 1 >= md->end_subject &&
|
947
|
-
(md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
|
948
|
-
NLBLOCK->nltype == NLTYPE_FIXED &&
|
949
|
-
NLBLOCK->nllen == 2 &&
|
950
|
-
c == NLBLOCK->nl[0])
|
951
|
-
{
|
952
|
-
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
|
953
|
-
{
|
954
|
-
reset_could_continue = TRUE;
|
955
|
-
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
|
956
|
-
}
|
957
|
-
else could_continue = partial_newline = TRUE;
|
958
|
-
}
|
959
|
-
}
|
960
|
-
break;
|
961
|
-
|
962
|
-
/*-----------------------------------------------------------------*/
|
963
|
-
case OP_DOLLM:
|
964
|
-
if ((md->moptions & PCRE_NOTEOL) == 0)
|
965
|
-
{
|
966
|
-
if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
|
967
|
-
could_continue = TRUE;
|
968
|
-
else if (clen == 0 ||
|
969
|
-
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
|
970
|
-
{ ADD_ACTIVE(state_offset + 1, 0); }
|
971
|
-
else if (ptr + 1 >= md->end_subject &&
|
972
|
-
(md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
|
973
|
-
NLBLOCK->nltype == NLTYPE_FIXED &&
|
974
|
-
NLBLOCK->nllen == 2 &&
|
975
|
-
c == NLBLOCK->nl[0])
|
976
|
-
{
|
977
|
-
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
|
978
|
-
{
|
979
|
-
reset_could_continue = TRUE;
|
980
|
-
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
|
981
|
-
}
|
982
|
-
else could_continue = partial_newline = TRUE;
|
983
|
-
}
|
984
|
-
}
|
985
|
-
else if (IS_NEWLINE(ptr))
|
986
|
-
{ ADD_ACTIVE(state_offset + 1, 0); }
|
987
|
-
break;
|
988
|
-
|
989
|
-
/*-----------------------------------------------------------------*/
|
990
|
-
|
991
|
-
case OP_DIGIT:
|
992
|
-
case OP_WHITESPACE:
|
993
|
-
case OP_WORDCHAR:
|
994
|
-
if (clen > 0 && c < 256 &&
|
995
|
-
((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
|
996
|
-
{ ADD_NEW(state_offset + 1, 0); }
|
997
|
-
break;
|
998
|
-
|
999
|
-
/*-----------------------------------------------------------------*/
|
1000
|
-
case OP_NOT_DIGIT:
|
1001
|
-
case OP_NOT_WHITESPACE:
|
1002
|
-
case OP_NOT_WORDCHAR:
|
1003
|
-
if (clen > 0 && (c >= 256 ||
|
1004
|
-
((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
|
1005
|
-
{ ADD_NEW(state_offset + 1, 0); }
|
1006
|
-
break;
|
1007
|
-
|
1008
|
-
/*-----------------------------------------------------------------*/
|
1009
|
-
case OP_WORD_BOUNDARY:
|
1010
|
-
case OP_NOT_WORD_BOUNDARY:
|
1011
|
-
{
|
1012
|
-
int left_word, right_word;
|
1013
|
-
|
1014
|
-
if (ptr > start_subject)
|
1015
|
-
{
|
1016
|
-
const pcre_uchar *temp = ptr - 1;
|
1017
|
-
if (temp < md->start_used_ptr) md->start_used_ptr = temp;
|
1018
|
-
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
|
1019
|
-
if (utf) { BACKCHAR(temp); }
|
1020
|
-
#endif
|
1021
|
-
GETCHARTEST(d, temp);
|
1022
|
-
#ifdef SUPPORT_UCP
|
1023
|
-
if ((md->poptions & PCRE_UCP) != 0)
|
1024
|
-
{
|
1025
|
-
if (d == '_') left_word = TRUE; else
|
1026
|
-
{
|
1027
|
-
int cat = UCD_CATEGORY(d);
|
1028
|
-
left_word = (cat == ucp_L || cat == ucp_N);
|
1029
|
-
}
|
1030
|
-
}
|
1031
|
-
else
|
1032
|
-
#endif
|
1033
|
-
left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
|
1034
|
-
}
|
1035
|
-
else left_word = FALSE;
|
1036
|
-
|
1037
|
-
if (clen > 0)
|
1038
|
-
{
|
1039
|
-
#ifdef SUPPORT_UCP
|
1040
|
-
if ((md->poptions & PCRE_UCP) != 0)
|
1041
|
-
{
|
1042
|
-
if (c == '_') right_word = TRUE; else
|
1043
|
-
{
|
1044
|
-
int cat = UCD_CATEGORY(c);
|
1045
|
-
right_word = (cat == ucp_L || cat == ucp_N);
|
1046
|
-
}
|
1047
|
-
}
|
1048
|
-
else
|
1049
|
-
#endif
|
1050
|
-
right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
|
1051
|
-
}
|
1052
|
-
else right_word = FALSE;
|
1053
|
-
|
1054
|
-
if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
|
1055
|
-
{ ADD_ACTIVE(state_offset + 1, 0); }
|
1056
|
-
}
|
1057
|
-
break;
|
1058
|
-
|
1059
|
-
|
1060
|
-
/*-----------------------------------------------------------------*/
|
1061
|
-
/* Check the next character by Unicode property. We will get here only
|
1062
|
-
if the support is in the binary; otherwise a compile-time error occurs.
|
1063
|
-
*/
|
1064
|
-
|
1065
|
-
#ifdef SUPPORT_UCP
|
1066
|
-
case OP_PROP:
|
1067
|
-
case OP_NOTPROP:
|
1068
|
-
if (clen > 0)
|
1069
|
-
{
|
1070
|
-
BOOL OK;
|
1071
|
-
const pcre_uint32 *cp;
|
1072
|
-
const ucd_record * prop = GET_UCD(c);
|
1073
|
-
switch(code[1])
|
1074
|
-
{
|
1075
|
-
case PT_ANY:
|
1076
|
-
OK = TRUE;
|
1077
|
-
break;
|
1078
|
-
|
1079
|
-
case PT_LAMP:
|
1080
|
-
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
1081
|
-
prop->chartype == ucp_Lt;
|
1082
|
-
break;
|
1083
|
-
|
1084
|
-
case PT_GC:
|
1085
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
|
1086
|
-
break;
|
1087
|
-
|
1088
|
-
case PT_PC:
|
1089
|
-
OK = prop->chartype == code[2];
|
1090
|
-
break;
|
1091
|
-
|
1092
|
-
case PT_SC:
|
1093
|
-
OK = prop->script == code[2];
|
1094
|
-
break;
|
1095
|
-
|
1096
|
-
/* These are specials for combination cases. */
|
1097
|
-
|
1098
|
-
case PT_ALNUM:
|
1099
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
1100
|
-
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
1101
|
-
break;
|
1102
|
-
|
1103
|
-
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
1104
|
-
which means that Perl space and POSIX space are now identical. PCRE
|
1105
|
-
was changed at release 8.34. */
|
1106
|
-
|
1107
|
-
case PT_SPACE: /* Perl space */
|
1108
|
-
case PT_PXSPACE: /* POSIX space */
|
1109
|
-
switch(c)
|
1110
|
-
{
|
1111
|
-
HSPACE_CASES:
|
1112
|
-
VSPACE_CASES:
|
1113
|
-
OK = TRUE;
|
1114
|
-
break;
|
1115
|
-
|
1116
|
-
default:
|
1117
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
1118
|
-
break;
|
1119
|
-
}
|
1120
|
-
break;
|
1121
|
-
|
1122
|
-
case PT_WORD:
|
1123
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
1124
|
-
PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
1125
|
-
c == CHAR_UNDERSCORE;
|
1126
|
-
break;
|
1127
|
-
|
1128
|
-
case PT_CLIST:
|
1129
|
-
cp = PRIV(ucd_caseless_sets) + code[2];
|
1130
|
-
for (;;)
|
1131
|
-
{
|
1132
|
-
if (c < *cp) { OK = FALSE; break; }
|
1133
|
-
if (c == *cp++) { OK = TRUE; break; }
|
1134
|
-
}
|
1135
|
-
break;
|
1136
|
-
|
1137
|
-
case PT_UCNC:
|
1138
|
-
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
1139
|
-
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
1140
|
-
c >= 0xe000;
|
1141
|
-
break;
|
1142
|
-
|
1143
|
-
/* Should never occur, but keep compilers from grumbling. */
|
1144
|
-
|
1145
|
-
default:
|
1146
|
-
OK = codevalue != OP_PROP;
|
1147
|
-
break;
|
1148
|
-
}
|
1149
|
-
|
1150
|
-
if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
|
1151
|
-
}
|
1152
|
-
break;
|
1153
|
-
#endif
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
/* ========================================================================== */
|
1158
|
-
/* These opcodes likewise inspect the subject character, but have an
|
1159
|
-
argument that is not a data character. It is one of these opcodes:
|
1160
|
-
OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
|
1161
|
-
OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
|
1162
|
-
|
1163
|
-
case OP_TYPEPLUS:
|
1164
|
-
case OP_TYPEMINPLUS:
|
1165
|
-
case OP_TYPEPOSPLUS:
|
1166
|
-
count = current_state->count; /* Already matched */
|
1167
|
-
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
1168
|
-
if (clen > 0)
|
1169
|
-
{
|
1170
|
-
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
|
1171
|
-
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
|
1172
|
-
NLBLOCK->nltype == NLTYPE_FIXED &&
|
1173
|
-
NLBLOCK->nllen == 2 &&
|
1174
|
-
c == NLBLOCK->nl[0])
|
1175
|
-
{
|
1176
|
-
could_continue = partial_newline = TRUE;
|
1177
|
-
}
|
1178
|
-
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
1179
|
-
(c < 256 &&
|
1180
|
-
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
1181
|
-
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
1182
|
-
{
|
1183
|
-
if (count > 0 && codevalue == OP_TYPEPOSPLUS)
|
1184
|
-
{
|
1185
|
-
active_count--; /* Remove non-match possibility */
|
1186
|
-
next_active_state--;
|
1187
|
-
}
|
1188
|
-
count++;
|
1189
|
-
ADD_NEW(state_offset, count);
|
1190
|
-
}
|
1191
|
-
}
|
1192
|
-
break;
|
1193
|
-
|
1194
|
-
/*-----------------------------------------------------------------*/
|
1195
|
-
case OP_TYPEQUERY:
|
1196
|
-
case OP_TYPEMINQUERY:
|
1197
|
-
case OP_TYPEPOSQUERY:
|
1198
|
-
ADD_ACTIVE(state_offset + 2, 0);
|
1199
|
-
if (clen > 0)
|
1200
|
-
{
|
1201
|
-
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
|
1202
|
-
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
|
1203
|
-
NLBLOCK->nltype == NLTYPE_FIXED &&
|
1204
|
-
NLBLOCK->nllen == 2 &&
|
1205
|
-
c == NLBLOCK->nl[0])
|
1206
|
-
{
|
1207
|
-
could_continue = partial_newline = TRUE;
|
1208
|
-
}
|
1209
|
-
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
1210
|
-
(c < 256 &&
|
1211
|
-
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
1212
|
-
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
1213
|
-
{
|
1214
|
-
if (codevalue == OP_TYPEPOSQUERY)
|
1215
|
-
{
|
1216
|
-
active_count--; /* Remove non-match possibility */
|
1217
|
-
next_active_state--;
|
1218
|
-
}
|
1219
|
-
ADD_NEW(state_offset + 2, 0);
|
1220
|
-
}
|
1221
|
-
}
|
1222
|
-
break;
|
1223
|
-
|
1224
|
-
/*-----------------------------------------------------------------*/
|
1225
|
-
case OP_TYPESTAR:
|
1226
|
-
case OP_TYPEMINSTAR:
|
1227
|
-
case OP_TYPEPOSSTAR:
|
1228
|
-
ADD_ACTIVE(state_offset + 2, 0);
|
1229
|
-
if (clen > 0)
|
1230
|
-
{
|
1231
|
-
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
|
1232
|
-
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
|
1233
|
-
NLBLOCK->nltype == NLTYPE_FIXED &&
|
1234
|
-
NLBLOCK->nllen == 2 &&
|
1235
|
-
c == NLBLOCK->nl[0])
|
1236
|
-
{
|
1237
|
-
could_continue = partial_newline = TRUE;
|
1238
|
-
}
|
1239
|
-
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
1240
|
-
(c < 256 &&
|
1241
|
-
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
1242
|
-
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
1243
|
-
{
|
1244
|
-
if (codevalue == OP_TYPEPOSSTAR)
|
1245
|
-
{
|
1246
|
-
active_count--; /* Remove non-match possibility */
|
1247
|
-
next_active_state--;
|
1248
|
-
}
|
1249
|
-
ADD_NEW(state_offset, 0);
|
1250
|
-
}
|
1251
|
-
}
|
1252
|
-
break;
|
1253
|
-
|
1254
|
-
/*-----------------------------------------------------------------*/
|
1255
|
-
case OP_TYPEEXACT:
|
1256
|
-
count = current_state->count; /* Number already matched */
|
1257
|
-
if (clen > 0)
|
1258
|
-
{
|
1259
|
-
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
|
1260
|
-
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
|
1261
|
-
NLBLOCK->nltype == NLTYPE_FIXED &&
|
1262
|
-
NLBLOCK->nllen == 2 &&
|
1263
|
-
c == NLBLOCK->nl[0])
|
1264
|
-
{
|
1265
|
-
could_continue = partial_newline = TRUE;
|
1266
|
-
}
|
1267
|
-
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
1268
|
-
(c < 256 &&
|
1269
|
-
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
1270
|
-
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
1271
|
-
{
|
1272
|
-
if (++count >= (int)GET2(code, 1))
|
1273
|
-
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
|
1274
|
-
else
|
1275
|
-
{ ADD_NEW(state_offset, count); }
|
1276
|
-
}
|
1277
|
-
}
|
1278
|
-
break;
|
1279
|
-
|
1280
|
-
/*-----------------------------------------------------------------*/
|
1281
|
-
case OP_TYPEUPTO:
|
1282
|
-
case OP_TYPEMINUPTO:
|
1283
|
-
case OP_TYPEPOSUPTO:
|
1284
|
-
ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
|
1285
|
-
count = current_state->count; /* Number already matched */
|
1286
|
-
if (clen > 0)
|
1287
|
-
{
|
1288
|
-
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
|
1289
|
-
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
|
1290
|
-
NLBLOCK->nltype == NLTYPE_FIXED &&
|
1291
|
-
NLBLOCK->nllen == 2 &&
|
1292
|
-
c == NLBLOCK->nl[0])
|
1293
|
-
{
|
1294
|
-
could_continue = partial_newline = TRUE;
|
1295
|
-
}
|
1296
|
-
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
1297
|
-
(c < 256 &&
|
1298
|
-
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
1299
|
-
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
1300
|
-
{
|
1301
|
-
if (codevalue == OP_TYPEPOSUPTO)
|
1302
|
-
{
|
1303
|
-
active_count--; /* Remove non-match possibility */
|
1304
|
-
next_active_state--;
|
1305
|
-
}
|
1306
|
-
if (++count >= (int)GET2(code, 1))
|
1307
|
-
{ ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
|
1308
|
-
else
|
1309
|
-
{ ADD_NEW(state_offset, count); }
|
1310
|
-
}
|
1311
|
-
}
|
1312
|
-
break;
|
1313
|
-
|
1314
|
-
/* ========================================================================== */
|
1315
|
-
/* These are virtual opcodes that are used when something like
|
1316
|
-
OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
|
1317
|
-
argument. It keeps the code above fast for the other cases. The argument
|
1318
|
-
is in the d variable. */
|
1319
|
-
|
1320
|
-
#ifdef SUPPORT_UCP
|
1321
|
-
case OP_PROP_EXTRA + OP_TYPEPLUS:
|
1322
|
-
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
|
1323
|
-
case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
|
1324
|
-
count = current_state->count; /* Already matched */
|
1325
|
-
if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
|
1326
|
-
if (clen > 0)
|
1327
|
-
{
|
1328
|
-
BOOL OK;
|
1329
|
-
const pcre_uint32 *cp;
|
1330
|
-
const ucd_record * prop = GET_UCD(c);
|
1331
|
-
switch(code[2])
|
1332
|
-
{
|
1333
|
-
case PT_ANY:
|
1334
|
-
OK = TRUE;
|
1335
|
-
break;
|
1336
|
-
|
1337
|
-
case PT_LAMP:
|
1338
|
-
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
1339
|
-
prop->chartype == ucp_Lt;
|
1340
|
-
break;
|
1341
|
-
|
1342
|
-
case PT_GC:
|
1343
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
|
1344
|
-
break;
|
1345
|
-
|
1346
|
-
case PT_PC:
|
1347
|
-
OK = prop->chartype == code[3];
|
1348
|
-
break;
|
1349
|
-
|
1350
|
-
case PT_SC:
|
1351
|
-
OK = prop->script == code[3];
|
1352
|
-
break;
|
1353
|
-
|
1354
|
-
/* These are specials for combination cases. */
|
1355
|
-
|
1356
|
-
case PT_ALNUM:
|
1357
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
1358
|
-
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
1359
|
-
break;
|
1360
|
-
|
1361
|
-
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
1362
|
-
which means that Perl space and POSIX space are now identical. PCRE
|
1363
|
-
was changed at release 8.34. */
|
1364
|
-
|
1365
|
-
case PT_SPACE: /* Perl space */
|
1366
|
-
case PT_PXSPACE: /* POSIX space */
|
1367
|
-
switch(c)
|
1368
|
-
{
|
1369
|
-
HSPACE_CASES:
|
1370
|
-
VSPACE_CASES:
|
1371
|
-
OK = TRUE;
|
1372
|
-
break;
|
1373
|
-
|
1374
|
-
default:
|
1375
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
1376
|
-
break;
|
1377
|
-
}
|
1378
|
-
break;
|
1379
|
-
|
1380
|
-
case PT_WORD:
|
1381
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
1382
|
-
PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
1383
|
-
c == CHAR_UNDERSCORE;
|
1384
|
-
break;
|
1385
|
-
|
1386
|
-
case PT_CLIST:
|
1387
|
-
cp = PRIV(ucd_caseless_sets) + code[3];
|
1388
|
-
for (;;)
|
1389
|
-
{
|
1390
|
-
if (c < *cp) { OK = FALSE; break; }
|
1391
|
-
if (c == *cp++) { OK = TRUE; break; }
|
1392
|
-
}
|
1393
|
-
break;
|
1394
|
-
|
1395
|
-
case PT_UCNC:
|
1396
|
-
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
1397
|
-
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
1398
|
-
c >= 0xe000;
|
1399
|
-
break;
|
1400
|
-
|
1401
|
-
/* Should never occur, but keep compilers from grumbling. */
|
1402
|
-
|
1403
|
-
default:
|
1404
|
-
OK = codevalue != OP_PROP;
|
1405
|
-
break;
|
1406
|
-
}
|
1407
|
-
|
1408
|
-
if (OK == (d == OP_PROP))
|
1409
|
-
{
|
1410
|
-
if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
|
1411
|
-
{
|
1412
|
-
active_count--; /* Remove non-match possibility */
|
1413
|
-
next_active_state--;
|
1414
|
-
}
|
1415
|
-
count++;
|
1416
|
-
ADD_NEW(state_offset, count);
|
1417
|
-
}
|
1418
|
-
}
|
1419
|
-
break;
|
1420
|
-
|
1421
|
-
/*-----------------------------------------------------------------*/
|
1422
|
-
case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
|
1423
|
-
case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
|
1424
|
-
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
|
1425
|
-
count = current_state->count; /* Already matched */
|
1426
|
-
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
1427
|
-
if (clen > 0)
|
1428
|
-
{
|
1429
|
-
int lgb, rgb;
|
1430
|
-
const pcre_uchar *nptr = ptr + clen;
|
1431
|
-
int ncount = 0;
|
1432
|
-
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
|
1433
|
-
{
|
1434
|
-
active_count--; /* Remove non-match possibility */
|
1435
|
-
next_active_state--;
|
1436
|
-
}
|
1437
|
-
lgb = UCD_GRAPHBREAK(c);
|
1438
|
-
while (nptr < end_subject)
|
1439
|
-
{
|
1440
|
-
dlen = 1;
|
1441
|
-
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
|
1442
|
-
rgb = UCD_GRAPHBREAK(d);
|
1443
|
-
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
1444
|
-
ncount++;
|
1445
|
-
lgb = rgb;
|
1446
|
-
nptr += dlen;
|
1447
|
-
}
|
1448
|
-
count++;
|
1449
|
-
ADD_NEW_DATA(-state_offset, count, ncount);
|
1450
|
-
}
|
1451
|
-
break;
|
1452
|
-
#endif
|
1453
|
-
|
1454
|
-
/*-----------------------------------------------------------------*/
|
1455
|
-
case OP_ANYNL_EXTRA + OP_TYPEPLUS:
|
1456
|
-
case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
|
1457
|
-
case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
|
1458
|
-
count = current_state->count; /* Already matched */
|
1459
|
-
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
1460
|
-
if (clen > 0)
|
1461
|
-
{
|
1462
|
-
int ncount = 0;
|
1463
|
-
switch (c)
|
1464
|
-
{
|
1465
|
-
case CHAR_VT:
|
1466
|
-
case CHAR_FF:
|
1467
|
-
case CHAR_NEL:
|
1468
|
-
#ifndef EBCDIC
|
1469
|
-
case 0x2028:
|
1470
|
-
case 0x2029:
|
1471
|
-
#endif /* Not EBCDIC */
|
1472
|
-
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
|
1473
|
-
goto ANYNL01;
|
1474
|
-
|
1475
|
-
case CHAR_CR:
|
1476
|
-
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
|
1477
|
-
/* Fall through */
|
1478
|
-
|
1479
|
-
ANYNL01:
|
1480
|
-
case CHAR_LF:
|
1481
|
-
if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
|
1482
|
-
{
|
1483
|
-
active_count--; /* Remove non-match possibility */
|
1484
|
-
next_active_state--;
|
1485
|
-
}
|
1486
|
-
count++;
|
1487
|
-
ADD_NEW_DATA(-state_offset, count, ncount);
|
1488
|
-
break;
|
1489
|
-
|
1490
|
-
default:
|
1491
|
-
break;
|
1492
|
-
}
|
1493
|
-
}
|
1494
|
-
break;
|
1495
|
-
|
1496
|
-
/*-----------------------------------------------------------------*/
|
1497
|
-
case OP_VSPACE_EXTRA + OP_TYPEPLUS:
|
1498
|
-
case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
|
1499
|
-
case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
|
1500
|
-
count = current_state->count; /* Already matched */
|
1501
|
-
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
1502
|
-
if (clen > 0)
|
1503
|
-
{
|
1504
|
-
BOOL OK;
|
1505
|
-
switch (c)
|
1506
|
-
{
|
1507
|
-
VSPACE_CASES:
|
1508
|
-
OK = TRUE;
|
1509
|
-
break;
|
1510
|
-
|
1511
|
-
default:
|
1512
|
-
OK = FALSE;
|
1513
|
-
break;
|
1514
|
-
}
|
1515
|
-
|
1516
|
-
if (OK == (d == OP_VSPACE))
|
1517
|
-
{
|
1518
|
-
if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
|
1519
|
-
{
|
1520
|
-
active_count--; /* Remove non-match possibility */
|
1521
|
-
next_active_state--;
|
1522
|
-
}
|
1523
|
-
count++;
|
1524
|
-
ADD_NEW_DATA(-state_offset, count, 0);
|
1525
|
-
}
|
1526
|
-
}
|
1527
|
-
break;
|
1528
|
-
|
1529
|
-
/*-----------------------------------------------------------------*/
|
1530
|
-
case OP_HSPACE_EXTRA + OP_TYPEPLUS:
|
1531
|
-
case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
|
1532
|
-
case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
|
1533
|
-
count = current_state->count; /* Already matched */
|
1534
|
-
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
1535
|
-
if (clen > 0)
|
1536
|
-
{
|
1537
|
-
BOOL OK;
|
1538
|
-
switch (c)
|
1539
|
-
{
|
1540
|
-
HSPACE_CASES:
|
1541
|
-
OK = TRUE;
|
1542
|
-
break;
|
1543
|
-
|
1544
|
-
default:
|
1545
|
-
OK = FALSE;
|
1546
|
-
break;
|
1547
|
-
}
|
1548
|
-
|
1549
|
-
if (OK == (d == OP_HSPACE))
|
1550
|
-
{
|
1551
|
-
if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
|
1552
|
-
{
|
1553
|
-
active_count--; /* Remove non-match possibility */
|
1554
|
-
next_active_state--;
|
1555
|
-
}
|
1556
|
-
count++;
|
1557
|
-
ADD_NEW_DATA(-state_offset, count, 0);
|
1558
|
-
}
|
1559
|
-
}
|
1560
|
-
break;
|
1561
|
-
|
1562
|
-
/*-----------------------------------------------------------------*/
|
1563
|
-
#ifdef SUPPORT_UCP
|
1564
|
-
case OP_PROP_EXTRA + OP_TYPEQUERY:
|
1565
|
-
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
|
1566
|
-
case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
|
1567
|
-
count = 4;
|
1568
|
-
goto QS1;
|
1569
|
-
|
1570
|
-
case OP_PROP_EXTRA + OP_TYPESTAR:
|
1571
|
-
case OP_PROP_EXTRA + OP_TYPEMINSTAR:
|
1572
|
-
case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
|
1573
|
-
count = 0;
|
1574
|
-
|
1575
|
-
QS1:
|
1576
|
-
|
1577
|
-
ADD_ACTIVE(state_offset + 4, 0);
|
1578
|
-
if (clen > 0)
|
1579
|
-
{
|
1580
|
-
BOOL OK;
|
1581
|
-
const pcre_uint32 *cp;
|
1582
|
-
const ucd_record * prop = GET_UCD(c);
|
1583
|
-
switch(code[2])
|
1584
|
-
{
|
1585
|
-
case PT_ANY:
|
1586
|
-
OK = TRUE;
|
1587
|
-
break;
|
1588
|
-
|
1589
|
-
case PT_LAMP:
|
1590
|
-
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
1591
|
-
prop->chartype == ucp_Lt;
|
1592
|
-
break;
|
1593
|
-
|
1594
|
-
case PT_GC:
|
1595
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
|
1596
|
-
break;
|
1597
|
-
|
1598
|
-
case PT_PC:
|
1599
|
-
OK = prop->chartype == code[3];
|
1600
|
-
break;
|
1601
|
-
|
1602
|
-
case PT_SC:
|
1603
|
-
OK = prop->script == code[3];
|
1604
|
-
break;
|
1605
|
-
|
1606
|
-
/* These are specials for combination cases. */
|
1607
|
-
|
1608
|
-
case PT_ALNUM:
|
1609
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
1610
|
-
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
1611
|
-
break;
|
1612
|
-
|
1613
|
-
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
1614
|
-
which means that Perl space and POSIX space are now identical. PCRE
|
1615
|
-
was changed at release 8.34. */
|
1616
|
-
|
1617
|
-
case PT_SPACE: /* Perl space */
|
1618
|
-
case PT_PXSPACE: /* POSIX space */
|
1619
|
-
switch(c)
|
1620
|
-
{
|
1621
|
-
HSPACE_CASES:
|
1622
|
-
VSPACE_CASES:
|
1623
|
-
OK = TRUE;
|
1624
|
-
break;
|
1625
|
-
|
1626
|
-
default:
|
1627
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
1628
|
-
break;
|
1629
|
-
}
|
1630
|
-
break;
|
1631
|
-
|
1632
|
-
case PT_WORD:
|
1633
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
1634
|
-
PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
1635
|
-
c == CHAR_UNDERSCORE;
|
1636
|
-
break;
|
1637
|
-
|
1638
|
-
case PT_CLIST:
|
1639
|
-
cp = PRIV(ucd_caseless_sets) + code[3];
|
1640
|
-
for (;;)
|
1641
|
-
{
|
1642
|
-
if (c < *cp) { OK = FALSE; break; }
|
1643
|
-
if (c == *cp++) { OK = TRUE; break; }
|
1644
|
-
}
|
1645
|
-
break;
|
1646
|
-
|
1647
|
-
case PT_UCNC:
|
1648
|
-
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
1649
|
-
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
1650
|
-
c >= 0xe000;
|
1651
|
-
break;
|
1652
|
-
|
1653
|
-
/* Should never occur, but keep compilers from grumbling. */
|
1654
|
-
|
1655
|
-
default:
|
1656
|
-
OK = codevalue != OP_PROP;
|
1657
|
-
break;
|
1658
|
-
}
|
1659
|
-
|
1660
|
-
if (OK == (d == OP_PROP))
|
1661
|
-
{
|
1662
|
-
if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
|
1663
|
-
codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
|
1664
|
-
{
|
1665
|
-
active_count--; /* Remove non-match possibility */
|
1666
|
-
next_active_state--;
|
1667
|
-
}
|
1668
|
-
ADD_NEW(state_offset + count, 0);
|
1669
|
-
}
|
1670
|
-
}
|
1671
|
-
break;
|
1672
|
-
|
1673
|
-
/*-----------------------------------------------------------------*/
|
1674
|
-
case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
|
1675
|
-
case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
|
1676
|
-
case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
|
1677
|
-
count = 2;
|
1678
|
-
goto QS2;
|
1679
|
-
|
1680
|
-
case OP_EXTUNI_EXTRA + OP_TYPESTAR:
|
1681
|
-
case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
|
1682
|
-
case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
|
1683
|
-
count = 0;
|
1684
|
-
|
1685
|
-
QS2:
|
1686
|
-
|
1687
|
-
ADD_ACTIVE(state_offset + 2, 0);
|
1688
|
-
if (clen > 0)
|
1689
|
-
{
|
1690
|
-
int lgb, rgb;
|
1691
|
-
const pcre_uchar *nptr = ptr + clen;
|
1692
|
-
int ncount = 0;
|
1693
|
-
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
|
1694
|
-
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
|
1695
|
-
{
|
1696
|
-
active_count--; /* Remove non-match possibility */
|
1697
|
-
next_active_state--;
|
1698
|
-
}
|
1699
|
-
lgb = UCD_GRAPHBREAK(c);
|
1700
|
-
while (nptr < end_subject)
|
1701
|
-
{
|
1702
|
-
dlen = 1;
|
1703
|
-
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
|
1704
|
-
rgb = UCD_GRAPHBREAK(d);
|
1705
|
-
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
1706
|
-
ncount++;
|
1707
|
-
lgb = rgb;
|
1708
|
-
nptr += dlen;
|
1709
|
-
}
|
1710
|
-
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
1711
|
-
}
|
1712
|
-
break;
|
1713
|
-
#endif
|
1714
|
-
|
1715
|
-
/*-----------------------------------------------------------------*/
|
1716
|
-
case OP_ANYNL_EXTRA + OP_TYPEQUERY:
|
1717
|
-
case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
|
1718
|
-
case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
|
1719
|
-
count = 2;
|
1720
|
-
goto QS3;
|
1721
|
-
|
1722
|
-
case OP_ANYNL_EXTRA + OP_TYPESTAR:
|
1723
|
-
case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
|
1724
|
-
case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
|
1725
|
-
count = 0;
|
1726
|
-
|
1727
|
-
QS3:
|
1728
|
-
ADD_ACTIVE(state_offset + 2, 0);
|
1729
|
-
if (clen > 0)
|
1730
|
-
{
|
1731
|
-
int ncount = 0;
|
1732
|
-
switch (c)
|
1733
|
-
{
|
1734
|
-
case CHAR_VT:
|
1735
|
-
case CHAR_FF:
|
1736
|
-
case CHAR_NEL:
|
1737
|
-
#ifndef EBCDIC
|
1738
|
-
case 0x2028:
|
1739
|
-
case 0x2029:
|
1740
|
-
#endif /* Not EBCDIC */
|
1741
|
-
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
|
1742
|
-
goto ANYNL02;
|
1743
|
-
|
1744
|
-
case CHAR_CR:
|
1745
|
-
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
|
1746
|
-
/* Fall through */
|
1747
|
-
|
1748
|
-
ANYNL02:
|
1749
|
-
case CHAR_LF:
|
1750
|
-
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
|
1751
|
-
codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
|
1752
|
-
{
|
1753
|
-
active_count--; /* Remove non-match possibility */
|
1754
|
-
next_active_state--;
|
1755
|
-
}
|
1756
|
-
ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
|
1757
|
-
break;
|
1758
|
-
|
1759
|
-
default:
|
1760
|
-
break;
|
1761
|
-
}
|
1762
|
-
}
|
1763
|
-
break;
|
1764
|
-
|
1765
|
-
/*-----------------------------------------------------------------*/
|
1766
|
-
case OP_VSPACE_EXTRA + OP_TYPEQUERY:
|
1767
|
-
case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
|
1768
|
-
case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
|
1769
|
-
count = 2;
|
1770
|
-
goto QS4;
|
1771
|
-
|
1772
|
-
case OP_VSPACE_EXTRA + OP_TYPESTAR:
|
1773
|
-
case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
|
1774
|
-
case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
|
1775
|
-
count = 0;
|
1776
|
-
|
1777
|
-
QS4:
|
1778
|
-
ADD_ACTIVE(state_offset + 2, 0);
|
1779
|
-
if (clen > 0)
|
1780
|
-
{
|
1781
|
-
BOOL OK;
|
1782
|
-
switch (c)
|
1783
|
-
{
|
1784
|
-
VSPACE_CASES:
|
1785
|
-
OK = TRUE;
|
1786
|
-
break;
|
1787
|
-
|
1788
|
-
default:
|
1789
|
-
OK = FALSE;
|
1790
|
-
break;
|
1791
|
-
}
|
1792
|
-
if (OK == (d == OP_VSPACE))
|
1793
|
-
{
|
1794
|
-
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
1795
|
-
codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
|
1796
|
-
{
|
1797
|
-
active_count--; /* Remove non-match possibility */
|
1798
|
-
next_active_state--;
|
1799
|
-
}
|
1800
|
-
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
|
1801
|
-
}
|
1802
|
-
}
|
1803
|
-
break;
|
1804
|
-
|
1805
|
-
/*-----------------------------------------------------------------*/
|
1806
|
-
case OP_HSPACE_EXTRA + OP_TYPEQUERY:
|
1807
|
-
case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
|
1808
|
-
case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
|
1809
|
-
count = 2;
|
1810
|
-
goto QS5;
|
1811
|
-
|
1812
|
-
case OP_HSPACE_EXTRA + OP_TYPESTAR:
|
1813
|
-
case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
|
1814
|
-
case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
|
1815
|
-
count = 0;
|
1816
|
-
|
1817
|
-
QS5:
|
1818
|
-
ADD_ACTIVE(state_offset + 2, 0);
|
1819
|
-
if (clen > 0)
|
1820
|
-
{
|
1821
|
-
BOOL OK;
|
1822
|
-
switch (c)
|
1823
|
-
{
|
1824
|
-
HSPACE_CASES:
|
1825
|
-
OK = TRUE;
|
1826
|
-
break;
|
1827
|
-
|
1828
|
-
default:
|
1829
|
-
OK = FALSE;
|
1830
|
-
break;
|
1831
|
-
}
|
1832
|
-
|
1833
|
-
if (OK == (d == OP_HSPACE))
|
1834
|
-
{
|
1835
|
-
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
1836
|
-
codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
|
1837
|
-
{
|
1838
|
-
active_count--; /* Remove non-match possibility */
|
1839
|
-
next_active_state--;
|
1840
|
-
}
|
1841
|
-
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
|
1842
|
-
}
|
1843
|
-
}
|
1844
|
-
break;
|
1845
|
-
|
1846
|
-
/*-----------------------------------------------------------------*/
|
1847
|
-
#ifdef SUPPORT_UCP
|
1848
|
-
case OP_PROP_EXTRA + OP_TYPEEXACT:
|
1849
|
-
case OP_PROP_EXTRA + OP_TYPEUPTO:
|
1850
|
-
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
|
1851
|
-
case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
|
1852
|
-
if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
|
1853
|
-
{ ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
|
1854
|
-
count = current_state->count; /* Number already matched */
|
1855
|
-
if (clen > 0)
|
1856
|
-
{
|
1857
|
-
BOOL OK;
|
1858
|
-
const pcre_uint32 *cp;
|
1859
|
-
const ucd_record * prop = GET_UCD(c);
|
1860
|
-
switch(code[1 + IMM2_SIZE + 1])
|
1861
|
-
{
|
1862
|
-
case PT_ANY:
|
1863
|
-
OK = TRUE;
|
1864
|
-
break;
|
1865
|
-
|
1866
|
-
case PT_LAMP:
|
1867
|
-
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
1868
|
-
prop->chartype == ucp_Lt;
|
1869
|
-
break;
|
1870
|
-
|
1871
|
-
case PT_GC:
|
1872
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
|
1873
|
-
break;
|
1874
|
-
|
1875
|
-
case PT_PC:
|
1876
|
-
OK = prop->chartype == code[1 + IMM2_SIZE + 2];
|
1877
|
-
break;
|
1878
|
-
|
1879
|
-
case PT_SC:
|
1880
|
-
OK = prop->script == code[1 + IMM2_SIZE + 2];
|
1881
|
-
break;
|
1882
|
-
|
1883
|
-
/* These are specials for combination cases. */
|
1884
|
-
|
1885
|
-
case PT_ALNUM:
|
1886
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
1887
|
-
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
1888
|
-
break;
|
1889
|
-
|
1890
|
-
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
1891
|
-
which means that Perl space and POSIX space are now identical. PCRE
|
1892
|
-
was changed at release 8.34. */
|
1893
|
-
|
1894
|
-
case PT_SPACE: /* Perl space */
|
1895
|
-
case PT_PXSPACE: /* POSIX space */
|
1896
|
-
switch(c)
|
1897
|
-
{
|
1898
|
-
HSPACE_CASES:
|
1899
|
-
VSPACE_CASES:
|
1900
|
-
OK = TRUE;
|
1901
|
-
break;
|
1902
|
-
|
1903
|
-
default:
|
1904
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
1905
|
-
break;
|
1906
|
-
}
|
1907
|
-
break;
|
1908
|
-
|
1909
|
-
case PT_WORD:
|
1910
|
-
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
1911
|
-
PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
1912
|
-
c == CHAR_UNDERSCORE;
|
1913
|
-
break;
|
1914
|
-
|
1915
|
-
case PT_CLIST:
|
1916
|
-
cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
|
1917
|
-
for (;;)
|
1918
|
-
{
|
1919
|
-
if (c < *cp) { OK = FALSE; break; }
|
1920
|
-
if (c == *cp++) { OK = TRUE; break; }
|
1921
|
-
}
|
1922
|
-
break;
|
1923
|
-
|
1924
|
-
case PT_UCNC:
|
1925
|
-
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
1926
|
-
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
1927
|
-
c >= 0xe000;
|
1928
|
-
break;
|
1929
|
-
|
1930
|
-
/* Should never occur, but keep compilers from grumbling. */
|
1931
|
-
|
1932
|
-
default:
|
1933
|
-
OK = codevalue != OP_PROP;
|
1934
|
-
break;
|
1935
|
-
}
|
1936
|
-
|
1937
|
-
if (OK == (d == OP_PROP))
|
1938
|
-
{
|
1939
|
-
if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
|
1940
|
-
{
|
1941
|
-
active_count--; /* Remove non-match possibility */
|
1942
|
-
next_active_state--;
|
1943
|
-
}
|
1944
|
-
if (++count >= (int)GET2(code, 1))
|
1945
|
-
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
|
1946
|
-
else
|
1947
|
-
{ ADD_NEW(state_offset, count); }
|
1948
|
-
}
|
1949
|
-
}
|
1950
|
-
break;
|
1951
|
-
|
1952
|
-
/*-----------------------------------------------------------------*/
|
1953
|
-
case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
|
1954
|
-
case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
|
1955
|
-
case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
|
1956
|
-
case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
|
1957
|
-
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
|
1958
|
-
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
1959
|
-
count = current_state->count; /* Number already matched */
|
1960
|
-
if (clen > 0)
|
1961
|
-
{
|
1962
|
-
int lgb, rgb;
|
1963
|
-
const pcre_uchar *nptr = ptr + clen;
|
1964
|
-
int ncount = 0;
|
1965
|
-
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
|
1966
|
-
{
|
1967
|
-
active_count--; /* Remove non-match possibility */
|
1968
|
-
next_active_state--;
|
1969
|
-
}
|
1970
|
-
lgb = UCD_GRAPHBREAK(c);
|
1971
|
-
while (nptr < end_subject)
|
1972
|
-
{
|
1973
|
-
dlen = 1;
|
1974
|
-
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
|
1975
|
-
rgb = UCD_GRAPHBREAK(d);
|
1976
|
-
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
1977
|
-
ncount++;
|
1978
|
-
lgb = rgb;
|
1979
|
-
nptr += dlen;
|
1980
|
-
}
|
1981
|
-
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
|
1982
|
-
reset_could_continue = TRUE;
|
1983
|
-
if (++count >= (int)GET2(code, 1))
|
1984
|
-
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
|
1985
|
-
else
|
1986
|
-
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
1987
|
-
}
|
1988
|
-
break;
|
1989
|
-
#endif
|
1990
|
-
|
1991
|
-
/*-----------------------------------------------------------------*/
|
1992
|
-
case OP_ANYNL_EXTRA + OP_TYPEEXACT:
|
1993
|
-
case OP_ANYNL_EXTRA + OP_TYPEUPTO:
|
1994
|
-
case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
|
1995
|
-
case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
|
1996
|
-
if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
|
1997
|
-
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
1998
|
-
count = current_state->count; /* Number already matched */
|
1999
|
-
if (clen > 0)
|
2000
|
-
{
|
2001
|
-
int ncount = 0;
|
2002
|
-
switch (c)
|
2003
|
-
{
|
2004
|
-
case CHAR_VT:
|
2005
|
-
case CHAR_FF:
|
2006
|
-
case CHAR_NEL:
|
2007
|
-
#ifndef EBCDIC
|
2008
|
-
case 0x2028:
|
2009
|
-
case 0x2029:
|
2010
|
-
#endif /* Not EBCDIC */
|
2011
|
-
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
|
2012
|
-
goto ANYNL03;
|
2013
|
-
|
2014
|
-
case CHAR_CR:
|
2015
|
-
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
|
2016
|
-
/* Fall through */
|
2017
|
-
|
2018
|
-
ANYNL03:
|
2019
|
-
case CHAR_LF:
|
2020
|
-
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
|
2021
|
-
{
|
2022
|
-
active_count--; /* Remove non-match possibility */
|
2023
|
-
next_active_state--;
|
2024
|
-
}
|
2025
|
-
if (++count >= (int)GET2(code, 1))
|
2026
|
-
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
|
2027
|
-
else
|
2028
|
-
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
2029
|
-
break;
|
2030
|
-
|
2031
|
-
default:
|
2032
|
-
break;
|
2033
|
-
}
|
2034
|
-
}
|
2035
|
-
break;
|
2036
|
-
|
2037
|
-
/*-----------------------------------------------------------------*/
|
2038
|
-
case OP_VSPACE_EXTRA + OP_TYPEEXACT:
|
2039
|
-
case OP_VSPACE_EXTRA + OP_TYPEUPTO:
|
2040
|
-
case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
|
2041
|
-
case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
|
2042
|
-
if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
|
2043
|
-
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
2044
|
-
count = current_state->count; /* Number already matched */
|
2045
|
-
if (clen > 0)
|
2046
|
-
{
|
2047
|
-
BOOL OK;
|
2048
|
-
switch (c)
|
2049
|
-
{
|
2050
|
-
VSPACE_CASES:
|
2051
|
-
OK = TRUE;
|
2052
|
-
break;
|
2053
|
-
|
2054
|
-
default:
|
2055
|
-
OK = FALSE;
|
2056
|
-
}
|
2057
|
-
|
2058
|
-
if (OK == (d == OP_VSPACE))
|
2059
|
-
{
|
2060
|
-
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
|
2061
|
-
{
|
2062
|
-
active_count--; /* Remove non-match possibility */
|
2063
|
-
next_active_state--;
|
2064
|
-
}
|
2065
|
-
if (++count >= (int)GET2(code, 1))
|
2066
|
-
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
|
2067
|
-
else
|
2068
|
-
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
2069
|
-
}
|
2070
|
-
}
|
2071
|
-
break;
|
2072
|
-
|
2073
|
-
/*-----------------------------------------------------------------*/
|
2074
|
-
case OP_HSPACE_EXTRA + OP_TYPEEXACT:
|
2075
|
-
case OP_HSPACE_EXTRA + OP_TYPEUPTO:
|
2076
|
-
case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
|
2077
|
-
case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
|
2078
|
-
if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
|
2079
|
-
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
2080
|
-
count = current_state->count; /* Number already matched */
|
2081
|
-
if (clen > 0)
|
2082
|
-
{
|
2083
|
-
BOOL OK;
|
2084
|
-
switch (c)
|
2085
|
-
{
|
2086
|
-
HSPACE_CASES:
|
2087
|
-
OK = TRUE;
|
2088
|
-
break;
|
2089
|
-
|
2090
|
-
default:
|
2091
|
-
OK = FALSE;
|
2092
|
-
break;
|
2093
|
-
}
|
2094
|
-
|
2095
|
-
if (OK == (d == OP_HSPACE))
|
2096
|
-
{
|
2097
|
-
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
|
2098
|
-
{
|
2099
|
-
active_count--; /* Remove non-match possibility */
|
2100
|
-
next_active_state--;
|
2101
|
-
}
|
2102
|
-
if (++count >= (int)GET2(code, 1))
|
2103
|
-
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
|
2104
|
-
else
|
2105
|
-
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
2106
|
-
}
|
2107
|
-
}
|
2108
|
-
break;
|
2109
|
-
|
2110
|
-
/* ========================================================================== */
|
2111
|
-
/* These opcodes are followed by a character that is usually compared
|
2112
|
-
to the current subject character; it is loaded into d. We still get
|
2113
|
-
here even if there is no subject character, because in some cases zero
|
2114
|
-
repetitions are permitted. */
|
2115
|
-
|
2116
|
-
/*-----------------------------------------------------------------*/
|
2117
|
-
case OP_CHAR:
|
2118
|
-
if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
|
2119
|
-
break;
|
2120
|
-
|
2121
|
-
/*-----------------------------------------------------------------*/
|
2122
|
-
case OP_CHARI:
|
2123
|
-
if (clen == 0) break;
|
2124
|
-
|
2125
|
-
#ifdef SUPPORT_UTF
|
2126
|
-
if (utf)
|
2127
|
-
{
|
2128
|
-
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
|
2129
|
-
{
|
2130
|
-
unsigned int othercase;
|
2131
|
-
if (c < 128)
|
2132
|
-
othercase = fcc[c];
|
2133
|
-
else
|
2134
|
-
/* If we have Unicode property support, we can use it to test the
|
2135
|
-
other case of the character. */
|
2136
|
-
#ifdef SUPPORT_UCP
|
2137
|
-
othercase = UCD_OTHERCASE(c);
|
2138
|
-
#else
|
2139
|
-
othercase = NOTACHAR;
|
2140
|
-
#endif
|
2141
|
-
|
2142
|
-
if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
|
2143
|
-
}
|
2144
|
-
}
|
2145
|
-
else
|
2146
|
-
#endif /* SUPPORT_UTF */
|
2147
|
-
/* Not UTF mode */
|
2148
|
-
{
|
2149
|
-
if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
|
2150
|
-
{ ADD_NEW(state_offset + 2, 0); }
|
2151
|
-
}
|
2152
|
-
break;
|
2153
|
-
|
2154
|
-
|
2155
|
-
#ifdef SUPPORT_UCP
|
2156
|
-
/*-----------------------------------------------------------------*/
|
2157
|
-
/* This is a tricky one because it can match more than one character.
|
2158
|
-
Find out how many characters to skip, and then set up a negative state
|
2159
|
-
to wait for them to pass before continuing. */
|
2160
|
-
|
2161
|
-
case OP_EXTUNI:
|
2162
|
-
if (clen > 0)
|
2163
|
-
{
|
2164
|
-
int lgb, rgb;
|
2165
|
-
const pcre_uchar *nptr = ptr + clen;
|
2166
|
-
int ncount = 0;
|
2167
|
-
lgb = UCD_GRAPHBREAK(c);
|
2168
|
-
while (nptr < end_subject)
|
2169
|
-
{
|
2170
|
-
dlen = 1;
|
2171
|
-
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
|
2172
|
-
rgb = UCD_GRAPHBREAK(d);
|
2173
|
-
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
2174
|
-
ncount++;
|
2175
|
-
lgb = rgb;
|
2176
|
-
nptr += dlen;
|
2177
|
-
}
|
2178
|
-
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
|
2179
|
-
reset_could_continue = TRUE;
|
2180
|
-
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
|
2181
|
-
}
|
2182
|
-
break;
|
2183
|
-
#endif
|
2184
|
-
|
2185
|
-
/*-----------------------------------------------------------------*/
|
2186
|
-
/* This is a tricky like EXTUNI because it too can match more than one
|
2187
|
-
character (when CR is followed by LF). In this case, set up a negative
|
2188
|
-
state to wait for one character to pass before continuing. */
|
2189
|
-
|
2190
|
-
case OP_ANYNL:
|
2191
|
-
if (clen > 0) switch(c)
|
2192
|
-
{
|
2193
|
-
case CHAR_VT:
|
2194
|
-
case CHAR_FF:
|
2195
|
-
case CHAR_NEL:
|
2196
|
-
#ifndef EBCDIC
|
2197
|
-
case 0x2028:
|
2198
|
-
case 0x2029:
|
2199
|
-
#endif /* Not EBCDIC */
|
2200
|
-
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
|
2201
|
-
|
2202
|
-
case CHAR_LF:
|
2203
|
-
ADD_NEW(state_offset + 1, 0);
|
2204
|
-
break;
|
2205
|
-
|
2206
|
-
case CHAR_CR:
|
2207
|
-
if (ptr + 1 >= end_subject)
|
2208
|
-
{
|
2209
|
-
ADD_NEW(state_offset + 1, 0);
|
2210
|
-
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
|
2211
|
-
reset_could_continue = TRUE;
|
2212
|
-
}
|
2213
|
-
else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
|
2214
|
-
{
|
2215
|
-
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
|
2216
|
-
}
|
2217
|
-
else
|
2218
|
-
{
|
2219
|
-
ADD_NEW(state_offset + 1, 0);
|
2220
|
-
}
|
2221
|
-
break;
|
2222
|
-
}
|
2223
|
-
break;
|
2224
|
-
|
2225
|
-
/*-----------------------------------------------------------------*/
|
2226
|
-
case OP_NOT_VSPACE:
|
2227
|
-
if (clen > 0) switch(c)
|
2228
|
-
{
|
2229
|
-
VSPACE_CASES:
|
2230
|
-
break;
|
2231
|
-
|
2232
|
-
default:
|
2233
|
-
ADD_NEW(state_offset + 1, 0);
|
2234
|
-
break;
|
2235
|
-
}
|
2236
|
-
break;
|
2237
|
-
|
2238
|
-
/*-----------------------------------------------------------------*/
|
2239
|
-
case OP_VSPACE:
|
2240
|
-
if (clen > 0) switch(c)
|
2241
|
-
{
|
2242
|
-
VSPACE_CASES:
|
2243
|
-
ADD_NEW(state_offset + 1, 0);
|
2244
|
-
break;
|
2245
|
-
|
2246
|
-
default:
|
2247
|
-
break;
|
2248
|
-
}
|
2249
|
-
break;
|
2250
|
-
|
2251
|
-
/*-----------------------------------------------------------------*/
|
2252
|
-
case OP_NOT_HSPACE:
|
2253
|
-
if (clen > 0) switch(c)
|
2254
|
-
{
|
2255
|
-
HSPACE_CASES:
|
2256
|
-
break;
|
2257
|
-
|
2258
|
-
default:
|
2259
|
-
ADD_NEW(state_offset + 1, 0);
|
2260
|
-
break;
|
2261
|
-
}
|
2262
|
-
break;
|
2263
|
-
|
2264
|
-
/*-----------------------------------------------------------------*/
|
2265
|
-
case OP_HSPACE:
|
2266
|
-
if (clen > 0) switch(c)
|
2267
|
-
{
|
2268
|
-
HSPACE_CASES:
|
2269
|
-
ADD_NEW(state_offset + 1, 0);
|
2270
|
-
break;
|
2271
|
-
|
2272
|
-
default:
|
2273
|
-
break;
|
2274
|
-
}
|
2275
|
-
break;
|
2276
|
-
|
2277
|
-
/*-----------------------------------------------------------------*/
|
2278
|
-
/* Match a negated single character casefully. */
|
2279
|
-
|
2280
|
-
case OP_NOT:
|
2281
|
-
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
|
2282
|
-
break;
|
2283
|
-
|
2284
|
-
/*-----------------------------------------------------------------*/
|
2285
|
-
/* Match a negated single character caselessly. */
|
2286
|
-
|
2287
|
-
case OP_NOTI:
|
2288
|
-
if (clen > 0)
|
2289
|
-
{
|
2290
|
-
pcre_uint32 otherd;
|
2291
|
-
#ifdef SUPPORT_UTF
|
2292
|
-
if (utf && d >= 128)
|
2293
|
-
{
|
2294
|
-
#ifdef SUPPORT_UCP
|
2295
|
-
otherd = UCD_OTHERCASE(d);
|
2296
|
-
#else
|
2297
|
-
otherd = d;
|
2298
|
-
#endif /* SUPPORT_UCP */
|
2299
|
-
}
|
2300
|
-
else
|
2301
|
-
#endif /* SUPPORT_UTF */
|
2302
|
-
otherd = TABLE_GET(d, fcc, d);
|
2303
|
-
if (c != d && c != otherd)
|
2304
|
-
{ ADD_NEW(state_offset + dlen + 1, 0); }
|
2305
|
-
}
|
2306
|
-
break;
|
2307
|
-
|
2308
|
-
/*-----------------------------------------------------------------*/
|
2309
|
-
case OP_PLUSI:
|
2310
|
-
case OP_MINPLUSI:
|
2311
|
-
case OP_POSPLUSI:
|
2312
|
-
case OP_NOTPLUSI:
|
2313
|
-
case OP_NOTMINPLUSI:
|
2314
|
-
case OP_NOTPOSPLUSI:
|
2315
|
-
caseless = TRUE;
|
2316
|
-
codevalue -= OP_STARI - OP_STAR;
|
2317
|
-
|
2318
|
-
/* Fall through */
|
2319
|
-
case OP_PLUS:
|
2320
|
-
case OP_MINPLUS:
|
2321
|
-
case OP_POSPLUS:
|
2322
|
-
case OP_NOTPLUS:
|
2323
|
-
case OP_NOTMINPLUS:
|
2324
|
-
case OP_NOTPOSPLUS:
|
2325
|
-
count = current_state->count; /* Already matched */
|
2326
|
-
if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
|
2327
|
-
if (clen > 0)
|
2328
|
-
{
|
2329
|
-
pcre_uint32 otherd = NOTACHAR;
|
2330
|
-
if (caseless)
|
2331
|
-
{
|
2332
|
-
#ifdef SUPPORT_UTF
|
2333
|
-
if (utf && d >= 128)
|
2334
|
-
{
|
2335
|
-
#ifdef SUPPORT_UCP
|
2336
|
-
otherd = UCD_OTHERCASE(d);
|
2337
|
-
#endif /* SUPPORT_UCP */
|
2338
|
-
}
|
2339
|
-
else
|
2340
|
-
#endif /* SUPPORT_UTF */
|
2341
|
-
otherd = TABLE_GET(d, fcc, d);
|
2342
|
-
}
|
2343
|
-
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
2344
|
-
{
|
2345
|
-
if (count > 0 &&
|
2346
|
-
(codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
|
2347
|
-
{
|
2348
|
-
active_count--; /* Remove non-match possibility */
|
2349
|
-
next_active_state--;
|
2350
|
-
}
|
2351
|
-
count++;
|
2352
|
-
ADD_NEW(state_offset, count);
|
2353
|
-
}
|
2354
|
-
}
|
2355
|
-
break;
|
2356
|
-
|
2357
|
-
/*-----------------------------------------------------------------*/
|
2358
|
-
case OP_QUERYI:
|
2359
|
-
case OP_MINQUERYI:
|
2360
|
-
case OP_POSQUERYI:
|
2361
|
-
case OP_NOTQUERYI:
|
2362
|
-
case OP_NOTMINQUERYI:
|
2363
|
-
case OP_NOTPOSQUERYI:
|
2364
|
-
caseless = TRUE;
|
2365
|
-
codevalue -= OP_STARI - OP_STAR;
|
2366
|
-
/* Fall through */
|
2367
|
-
case OP_QUERY:
|
2368
|
-
case OP_MINQUERY:
|
2369
|
-
case OP_POSQUERY:
|
2370
|
-
case OP_NOTQUERY:
|
2371
|
-
case OP_NOTMINQUERY:
|
2372
|
-
case OP_NOTPOSQUERY:
|
2373
|
-
ADD_ACTIVE(state_offset + dlen + 1, 0);
|
2374
|
-
if (clen > 0)
|
2375
|
-
{
|
2376
|
-
pcre_uint32 otherd = NOTACHAR;
|
2377
|
-
if (caseless)
|
2378
|
-
{
|
2379
|
-
#ifdef SUPPORT_UTF
|
2380
|
-
if (utf && d >= 128)
|
2381
|
-
{
|
2382
|
-
#ifdef SUPPORT_UCP
|
2383
|
-
otherd = UCD_OTHERCASE(d);
|
2384
|
-
#endif /* SUPPORT_UCP */
|
2385
|
-
}
|
2386
|
-
else
|
2387
|
-
#endif /* SUPPORT_UTF */
|
2388
|
-
otherd = TABLE_GET(d, fcc, d);
|
2389
|
-
}
|
2390
|
-
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
2391
|
-
{
|
2392
|
-
if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
|
2393
|
-
{
|
2394
|
-
active_count--; /* Remove non-match possibility */
|
2395
|
-
next_active_state--;
|
2396
|
-
}
|
2397
|
-
ADD_NEW(state_offset + dlen + 1, 0);
|
2398
|
-
}
|
2399
|
-
}
|
2400
|
-
break;
|
2401
|
-
|
2402
|
-
/*-----------------------------------------------------------------*/
|
2403
|
-
case OP_STARI:
|
2404
|
-
case OP_MINSTARI:
|
2405
|
-
case OP_POSSTARI:
|
2406
|
-
case OP_NOTSTARI:
|
2407
|
-
case OP_NOTMINSTARI:
|
2408
|
-
case OP_NOTPOSSTARI:
|
2409
|
-
caseless = TRUE;
|
2410
|
-
codevalue -= OP_STARI - OP_STAR;
|
2411
|
-
/* Fall through */
|
2412
|
-
case OP_STAR:
|
2413
|
-
case OP_MINSTAR:
|
2414
|
-
case OP_POSSTAR:
|
2415
|
-
case OP_NOTSTAR:
|
2416
|
-
case OP_NOTMINSTAR:
|
2417
|
-
case OP_NOTPOSSTAR:
|
2418
|
-
ADD_ACTIVE(state_offset + dlen + 1, 0);
|
2419
|
-
if (clen > 0)
|
2420
|
-
{
|
2421
|
-
pcre_uint32 otherd = NOTACHAR;
|
2422
|
-
if (caseless)
|
2423
|
-
{
|
2424
|
-
#ifdef SUPPORT_UTF
|
2425
|
-
if (utf && d >= 128)
|
2426
|
-
{
|
2427
|
-
#ifdef SUPPORT_UCP
|
2428
|
-
otherd = UCD_OTHERCASE(d);
|
2429
|
-
#endif /* SUPPORT_UCP */
|
2430
|
-
}
|
2431
|
-
else
|
2432
|
-
#endif /* SUPPORT_UTF */
|
2433
|
-
otherd = TABLE_GET(d, fcc, d);
|
2434
|
-
}
|
2435
|
-
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
2436
|
-
{
|
2437
|
-
if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
|
2438
|
-
{
|
2439
|
-
active_count--; /* Remove non-match possibility */
|
2440
|
-
next_active_state--;
|
2441
|
-
}
|
2442
|
-
ADD_NEW(state_offset, 0);
|
2443
|
-
}
|
2444
|
-
}
|
2445
|
-
break;
|
2446
|
-
|
2447
|
-
/*-----------------------------------------------------------------*/
|
2448
|
-
case OP_EXACTI:
|
2449
|
-
case OP_NOTEXACTI:
|
2450
|
-
caseless = TRUE;
|
2451
|
-
codevalue -= OP_STARI - OP_STAR;
|
2452
|
-
/* Fall through */
|
2453
|
-
case OP_EXACT:
|
2454
|
-
case OP_NOTEXACT:
|
2455
|
-
count = current_state->count; /* Number already matched */
|
2456
|
-
if (clen > 0)
|
2457
|
-
{
|
2458
|
-
pcre_uint32 otherd = NOTACHAR;
|
2459
|
-
if (caseless)
|
2460
|
-
{
|
2461
|
-
#ifdef SUPPORT_UTF
|
2462
|
-
if (utf && d >= 128)
|
2463
|
-
{
|
2464
|
-
#ifdef SUPPORT_UCP
|
2465
|
-
otherd = UCD_OTHERCASE(d);
|
2466
|
-
#endif /* SUPPORT_UCP */
|
2467
|
-
}
|
2468
|
-
else
|
2469
|
-
#endif /* SUPPORT_UTF */
|
2470
|
-
otherd = TABLE_GET(d, fcc, d);
|
2471
|
-
}
|
2472
|
-
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
2473
|
-
{
|
2474
|
-
if (++count >= (int)GET2(code, 1))
|
2475
|
-
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
|
2476
|
-
else
|
2477
|
-
{ ADD_NEW(state_offset, count); }
|
2478
|
-
}
|
2479
|
-
}
|
2480
|
-
break;
|
2481
|
-
|
2482
|
-
/*-----------------------------------------------------------------*/
|
2483
|
-
case OP_UPTOI:
|
2484
|
-
case OP_MINUPTOI:
|
2485
|
-
case OP_POSUPTOI:
|
2486
|
-
case OP_NOTUPTOI:
|
2487
|
-
case OP_NOTMINUPTOI:
|
2488
|
-
case OP_NOTPOSUPTOI:
|
2489
|
-
caseless = TRUE;
|
2490
|
-
codevalue -= OP_STARI - OP_STAR;
|
2491
|
-
/* Fall through */
|
2492
|
-
case OP_UPTO:
|
2493
|
-
case OP_MINUPTO:
|
2494
|
-
case OP_POSUPTO:
|
2495
|
-
case OP_NOTUPTO:
|
2496
|
-
case OP_NOTMINUPTO:
|
2497
|
-
case OP_NOTPOSUPTO:
|
2498
|
-
ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
|
2499
|
-
count = current_state->count; /* Number already matched */
|
2500
|
-
if (clen > 0)
|
2501
|
-
{
|
2502
|
-
pcre_uint32 otherd = NOTACHAR;
|
2503
|
-
if (caseless)
|
2504
|
-
{
|
2505
|
-
#ifdef SUPPORT_UTF
|
2506
|
-
if (utf && d >= 128)
|
2507
|
-
{
|
2508
|
-
#ifdef SUPPORT_UCP
|
2509
|
-
otherd = UCD_OTHERCASE(d);
|
2510
|
-
#endif /* SUPPORT_UCP */
|
2511
|
-
}
|
2512
|
-
else
|
2513
|
-
#endif /* SUPPORT_UTF */
|
2514
|
-
otherd = TABLE_GET(d, fcc, d);
|
2515
|
-
}
|
2516
|
-
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
2517
|
-
{
|
2518
|
-
if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
|
2519
|
-
{
|
2520
|
-
active_count--; /* Remove non-match possibility */
|
2521
|
-
next_active_state--;
|
2522
|
-
}
|
2523
|
-
if (++count >= (int)GET2(code, 1))
|
2524
|
-
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
|
2525
|
-
else
|
2526
|
-
{ ADD_NEW(state_offset, count); }
|
2527
|
-
}
|
2528
|
-
}
|
2529
|
-
break;
|
2530
|
-
|
2531
|
-
|
2532
|
-
/* ========================================================================== */
|
2533
|
-
/* These are the class-handling opcodes */
|
2534
|
-
|
2535
|
-
case OP_CLASS:
|
2536
|
-
case OP_NCLASS:
|
2537
|
-
case OP_XCLASS:
|
2538
|
-
{
|
2539
|
-
BOOL isinclass = FALSE;
|
2540
|
-
int next_state_offset;
|
2541
|
-
const pcre_uchar *ecode;
|
2542
|
-
|
2543
|
-
/* For a simple class, there is always just a 32-byte table, and we
|
2544
|
-
can set isinclass from it. */
|
2545
|
-
|
2546
|
-
if (codevalue != OP_XCLASS)
|
2547
|
-
{
|
2548
|
-
ecode = code + 1 + (32 / sizeof(pcre_uchar));
|
2549
|
-
if (clen > 0)
|
2550
|
-
{
|
2551
|
-
isinclass = (c > 255)? (codevalue == OP_NCLASS) :
|
2552
|
-
((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
|
2553
|
-
}
|
2554
|
-
}
|
2555
|
-
|
2556
|
-
/* An extended class may have a table or a list of single characters,
|
2557
|
-
ranges, or both, and it may be positive or negative. There's a
|
2558
|
-
function that sorts all this out. */
|
2559
|
-
|
2560
|
-
else
|
2561
|
-
{
|
2562
|
-
ecode = code + GET(code, 1);
|
2563
|
-
if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
|
2564
|
-
}
|
2565
|
-
|
2566
|
-
/* At this point, isinclass is set for all kinds of class, and ecode
|
2567
|
-
points to the byte after the end of the class. If there is a
|
2568
|
-
quantifier, this is where it will be. */
|
2569
|
-
|
2570
|
-
next_state_offset = (int)(ecode - start_code);
|
2571
|
-
|
2572
|
-
switch (*ecode)
|
2573
|
-
{
|
2574
|
-
case OP_CRSTAR:
|
2575
|
-
case OP_CRMINSTAR:
|
2576
|
-
case OP_CRPOSSTAR:
|
2577
|
-
ADD_ACTIVE(next_state_offset + 1, 0);
|
2578
|
-
if (isinclass)
|
2579
|
-
{
|
2580
|
-
if (*ecode == OP_CRPOSSTAR)
|
2581
|
-
{
|
2582
|
-
active_count--; /* Remove non-match possibility */
|
2583
|
-
next_active_state--;
|
2584
|
-
}
|
2585
|
-
ADD_NEW(state_offset, 0);
|
2586
|
-
}
|
2587
|
-
break;
|
2588
|
-
|
2589
|
-
case OP_CRPLUS:
|
2590
|
-
case OP_CRMINPLUS:
|
2591
|
-
case OP_CRPOSPLUS:
|
2592
|
-
count = current_state->count; /* Already matched */
|
2593
|
-
if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
|
2594
|
-
if (isinclass)
|
2595
|
-
{
|
2596
|
-
if (count > 0 && *ecode == OP_CRPOSPLUS)
|
2597
|
-
{
|
2598
|
-
active_count--; /* Remove non-match possibility */
|
2599
|
-
next_active_state--;
|
2600
|
-
}
|
2601
|
-
count++;
|
2602
|
-
ADD_NEW(state_offset, count);
|
2603
|
-
}
|
2604
|
-
break;
|
2605
|
-
|
2606
|
-
case OP_CRQUERY:
|
2607
|
-
case OP_CRMINQUERY:
|
2608
|
-
case OP_CRPOSQUERY:
|
2609
|
-
ADD_ACTIVE(next_state_offset + 1, 0);
|
2610
|
-
if (isinclass)
|
2611
|
-
{
|
2612
|
-
if (*ecode == OP_CRPOSQUERY)
|
2613
|
-
{
|
2614
|
-
active_count--; /* Remove non-match possibility */
|
2615
|
-
next_active_state--;
|
2616
|
-
}
|
2617
|
-
ADD_NEW(next_state_offset + 1, 0);
|
2618
|
-
}
|
2619
|
-
break;
|
2620
|
-
|
2621
|
-
case OP_CRRANGE:
|
2622
|
-
case OP_CRMINRANGE:
|
2623
|
-
case OP_CRPOSRANGE:
|
2624
|
-
count = current_state->count; /* Already matched */
|
2625
|
-
if (count >= (int)GET2(ecode, 1))
|
2626
|
-
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
2627
|
-
if (isinclass)
|
2628
|
-
{
|
2629
|
-
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
|
2630
|
-
if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
|
2631
|
-
{
|
2632
|
-
active_count--; /* Remove non-match possibility */
|
2633
|
-
next_active_state--;
|
2634
|
-
}
|
2635
|
-
if (++count >= max && max != 0) /* Max 0 => no limit */
|
2636
|
-
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
2637
|
-
else
|
2638
|
-
{ ADD_NEW(state_offset, count); }
|
2639
|
-
}
|
2640
|
-
break;
|
2641
|
-
|
2642
|
-
default:
|
2643
|
-
if (isinclass) { ADD_NEW(next_state_offset, 0); }
|
2644
|
-
break;
|
2645
|
-
}
|
2646
|
-
}
|
2647
|
-
break;
|
2648
|
-
|
2649
|
-
/* ========================================================================== */
|
2650
|
-
/* These are the opcodes for fancy brackets of various kinds. We have
|
2651
|
-
to use recursion in order to handle them. The "always failing" assertion
|
2652
|
-
(?!) is optimised to OP_FAIL when compiling, so we have to support that,
|
2653
|
-
though the other "backtracking verbs" are not supported. */
|
2654
|
-
|
2655
|
-
case OP_FAIL:
|
2656
|
-
forced_fail++; /* Count FAILs for multiple states */
|
2657
|
-
break;
|
2658
|
-
|
2659
|
-
case OP_ASSERT:
|
2660
|
-
case OP_ASSERT_NOT:
|
2661
|
-
case OP_ASSERTBACK:
|
2662
|
-
case OP_ASSERTBACK_NOT:
|
2663
|
-
{
|
2664
|
-
int rc;
|
2665
|
-
int local_offsets[2];
|
2666
|
-
int local_workspace[1000];
|
2667
|
-
const pcre_uchar *endasscode = code + GET(code, 1);
|
2668
|
-
|
2669
|
-
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
|
2670
|
-
|
2671
|
-
rc = internal_dfa_exec(
|
2672
|
-
md, /* static match data */
|
2673
|
-
code, /* this subexpression's code */
|
2674
|
-
ptr, /* where we currently are */
|
2675
|
-
(int)(ptr - start_subject), /* start offset */
|
2676
|
-
local_offsets, /* offset vector */
|
2677
|
-
sizeof(local_offsets)/sizeof(int), /* size of same */
|
2678
|
-
local_workspace, /* workspace vector */
|
2679
|
-
sizeof(local_workspace)/sizeof(int), /* size of same */
|
2680
|
-
rlevel); /* function recursion level */
|
2681
|
-
|
2682
|
-
if (rc == PCRE_ERROR_DFA_UITEM) return rc;
|
2683
|
-
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
|
2684
|
-
{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
|
2685
|
-
}
|
2686
|
-
break;
|
2687
|
-
|
2688
|
-
/*-----------------------------------------------------------------*/
|
2689
|
-
case OP_COND:
|
2690
|
-
case OP_SCOND:
|
2691
|
-
{
|
2692
|
-
int local_offsets[1000];
|
2693
|
-
int local_workspace[1000];
|
2694
|
-
int codelink = GET(code, 1);
|
2695
|
-
int condcode;
|
2696
|
-
|
2697
|
-
/* Because of the way auto-callout works during compile, a callout item
|
2698
|
-
is inserted between OP_COND and an assertion condition. This does not
|
2699
|
-
happen for the other conditions. */
|
2700
|
-
|
2701
|
-
if (code[LINK_SIZE+1] == OP_CALLOUT)
|
2702
|
-
{
|
2703
|
-
rrc = 0;
|
2704
|
-
if (PUBL(callout) != NULL)
|
2705
|
-
{
|
2706
|
-
PUBL(callout_block) cb;
|
2707
|
-
cb.version = 1; /* Version 1 of the callout block */
|
2708
|
-
cb.callout_number = code[LINK_SIZE+2];
|
2709
|
-
cb.offset_vector = offsets;
|
2710
|
-
#if defined COMPILE_PCRE8
|
2711
|
-
cb.subject = (PCRE_SPTR)start_subject;
|
2712
|
-
#elif defined COMPILE_PCRE16
|
2713
|
-
cb.subject = (PCRE_SPTR16)start_subject;
|
2714
|
-
#elif defined COMPILE_PCRE32
|
2715
|
-
cb.subject = (PCRE_SPTR32)start_subject;
|
2716
|
-
#endif
|
2717
|
-
cb.subject_length = (int)(end_subject - start_subject);
|
2718
|
-
cb.start_match = (int)(current_subject - start_subject);
|
2719
|
-
cb.current_position = (int)(ptr - start_subject);
|
2720
|
-
cb.pattern_position = GET(code, LINK_SIZE + 3);
|
2721
|
-
cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
|
2722
|
-
cb.capture_top = 1;
|
2723
|
-
cb.capture_last = -1;
|
2724
|
-
cb.callout_data = md->callout_data;
|
2725
|
-
cb.mark = NULL; /* No (*MARK) support */
|
2726
|
-
if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */
|
2727
|
-
}
|
2728
|
-
if (rrc > 0) break; /* Fail this thread */
|
2729
|
-
code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */
|
2730
|
-
}
|
2731
|
-
|
2732
|
-
condcode = code[LINK_SIZE+1];
|
2733
|
-
|
2734
|
-
/* Back reference conditions and duplicate named recursion conditions
|
2735
|
-
are not supported */
|
2736
|
-
|
2737
|
-
if (condcode == OP_CREF || condcode == OP_DNCREF ||
|
2738
|
-
condcode == OP_DNRREF)
|
2739
|
-
return PCRE_ERROR_DFA_UCOND;
|
2740
|
-
|
2741
|
-
/* The DEFINE condition is always false, and the assertion (?!) is
|
2742
|
-
converted to OP_FAIL. */
|
2743
|
-
|
2744
|
-
if (condcode == OP_DEF || condcode == OP_FAIL)
|
2745
|
-
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
2746
|
-
|
2747
|
-
/* The only supported version of OP_RREF is for the value RREF_ANY,
|
2748
|
-
which means "test if in any recursion". We can't test for specifically
|
2749
|
-
recursed groups. */
|
2750
|
-
|
2751
|
-
else if (condcode == OP_RREF)
|
2752
|
-
{
|
2753
|
-
int value = GET2(code, LINK_SIZE + 2);
|
2754
|
-
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
|
2755
|
-
if (md->recursive != NULL)
|
2756
|
-
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
|
2757
|
-
else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
2758
|
-
}
|
2759
|
-
|
2760
|
-
/* Otherwise, the condition is an assertion */
|
2761
|
-
|
2762
|
-
else
|
2763
|
-
{
|
2764
|
-
int rc;
|
2765
|
-
const pcre_uchar *asscode = code + LINK_SIZE + 1;
|
2766
|
-
const pcre_uchar *endasscode = asscode + GET(asscode, 1);
|
2767
|
-
|
2768
|
-
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
|
2769
|
-
|
2770
|
-
rc = internal_dfa_exec(
|
2771
|
-
md, /* fixed match data */
|
2772
|
-
asscode, /* this subexpression's code */
|
2773
|
-
ptr, /* where we currently are */
|
2774
|
-
(int)(ptr - start_subject), /* start offset */
|
2775
|
-
local_offsets, /* offset vector */
|
2776
|
-
sizeof(local_offsets)/sizeof(int), /* size of same */
|
2777
|
-
local_workspace, /* workspace vector */
|
2778
|
-
sizeof(local_workspace)/sizeof(int), /* size of same */
|
2779
|
-
rlevel); /* function recursion level */
|
2780
|
-
|
2781
|
-
if (rc == PCRE_ERROR_DFA_UITEM) return rc;
|
2782
|
-
if ((rc >= 0) ==
|
2783
|
-
(condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
|
2784
|
-
{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
|
2785
|
-
else
|
2786
|
-
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
2787
|
-
}
|
2788
|
-
}
|
2789
|
-
break;
|
2790
|
-
|
2791
|
-
/*-----------------------------------------------------------------*/
|
2792
|
-
case OP_RECURSE:
|
2793
|
-
{
|
2794
|
-
dfa_recursion_info *ri;
|
2795
|
-
int local_offsets[1000];
|
2796
|
-
int local_workspace[1000];
|
2797
|
-
const pcre_uchar *callpat = start_code + GET(code, 1);
|
2798
|
-
int recno = (callpat == md->start_code)? 0 :
|
2799
|
-
GET2(callpat, 1 + LINK_SIZE);
|
2800
|
-
int rc;
|
2801
|
-
|
2802
|
-
DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
|
2803
|
-
|
2804
|
-
/* Check for repeating a recursion without advancing the subject
|
2805
|
-
pointer. This should catch convoluted mutual recursions. (Some simple
|
2806
|
-
cases are caught at compile time.) */
|
2807
|
-
|
2808
|
-
for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
|
2809
|
-
if (recno == ri->group_num && ptr == ri->subject_position)
|
2810
|
-
return PCRE_ERROR_RECURSELOOP;
|
2811
|
-
|
2812
|
-
/* Remember this recursion and where we started it so as to
|
2813
|
-
catch infinite loops. */
|
2814
|
-
|
2815
|
-
new_recursive.group_num = recno;
|
2816
|
-
new_recursive.subject_position = ptr;
|
2817
|
-
new_recursive.prevrec = md->recursive;
|
2818
|
-
md->recursive = &new_recursive;
|
2819
|
-
|
2820
|
-
rc = internal_dfa_exec(
|
2821
|
-
md, /* fixed match data */
|
2822
|
-
callpat, /* this subexpression's code */
|
2823
|
-
ptr, /* where we currently are */
|
2824
|
-
(int)(ptr - start_subject), /* start offset */
|
2825
|
-
local_offsets, /* offset vector */
|
2826
|
-
sizeof(local_offsets)/sizeof(int), /* size of same */
|
2827
|
-
local_workspace, /* workspace vector */
|
2828
|
-
sizeof(local_workspace)/sizeof(int), /* size of same */
|
2829
|
-
rlevel); /* function recursion level */
|
2830
|
-
|
2831
|
-
md->recursive = new_recursive.prevrec; /* Done this recursion */
|
2832
|
-
|
2833
|
-
DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
|
2834
|
-
rc));
|
2835
|
-
|
2836
|
-
/* Ran out of internal offsets */
|
2837
|
-
|
2838
|
-
if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
|
2839
|
-
|
2840
|
-
/* For each successful matched substring, set up the next state with a
|
2841
|
-
count of characters to skip before trying it. Note that the count is in
|
2842
|
-
characters, not bytes. */
|
2843
|
-
|
2844
|
-
if (rc > 0)
|
2845
|
-
{
|
2846
|
-
for (rc = rc*2 - 2; rc >= 0; rc -= 2)
|
2847
|
-
{
|
2848
|
-
int charcount = local_offsets[rc+1] - local_offsets[rc];
|
2849
|
-
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
|
2850
|
-
if (utf)
|
2851
|
-
{
|
2852
|
-
const pcre_uchar *p = start_subject + local_offsets[rc];
|
2853
|
-
const pcre_uchar *pp = start_subject + local_offsets[rc+1];
|
2854
|
-
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
|
2855
|
-
}
|
2856
|
-
#endif
|
2857
|
-
if (charcount > 0)
|
2858
|
-
{
|
2859
|
-
ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
|
2860
|
-
}
|
2861
|
-
else
|
2862
|
-
{
|
2863
|
-
ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
|
2864
|
-
}
|
2865
|
-
}
|
2866
|
-
}
|
2867
|
-
else if (rc != PCRE_ERROR_NOMATCH) return rc;
|
2868
|
-
}
|
2869
|
-
break;
|
2870
|
-
|
2871
|
-
/*-----------------------------------------------------------------*/
|
2872
|
-
case OP_BRAPOS:
|
2873
|
-
case OP_SBRAPOS:
|
2874
|
-
case OP_CBRAPOS:
|
2875
|
-
case OP_SCBRAPOS:
|
2876
|
-
case OP_BRAPOSZERO:
|
2877
|
-
{
|
2878
|
-
int charcount, matched_count;
|
2879
|
-
const pcre_uchar *local_ptr = ptr;
|
2880
|
-
BOOL allow_zero;
|
2881
|
-
|
2882
|
-
if (codevalue == OP_BRAPOSZERO)
|
2883
|
-
{
|
2884
|
-
allow_zero = TRUE;
|
2885
|
-
codevalue = *(++code); /* Codevalue will be one of above BRAs */
|
2886
|
-
}
|
2887
|
-
else allow_zero = FALSE;
|
2888
|
-
|
2889
|
-
/* Loop to match the subpattern as many times as possible as if it were
|
2890
|
-
a complete pattern. */
|
2891
|
-
|
2892
|
-
for (matched_count = 0;; matched_count++)
|
2893
|
-
{
|
2894
|
-
int local_offsets[2];
|
2895
|
-
int local_workspace[1000];
|
2896
|
-
|
2897
|
-
int rc = internal_dfa_exec(
|
2898
|
-
md, /* fixed match data */
|
2899
|
-
code, /* this subexpression's code */
|
2900
|
-
local_ptr, /* where we currently are */
|
2901
|
-
(int)(ptr - start_subject), /* start offset */
|
2902
|
-
local_offsets, /* offset vector */
|
2903
|
-
sizeof(local_offsets)/sizeof(int), /* size of same */
|
2904
|
-
local_workspace, /* workspace vector */
|
2905
|
-
sizeof(local_workspace)/sizeof(int), /* size of same */
|
2906
|
-
rlevel); /* function recursion level */
|
2907
|
-
|
2908
|
-
/* Failed to match */
|
2909
|
-
|
2910
|
-
if (rc < 0)
|
2911
|
-
{
|
2912
|
-
if (rc != PCRE_ERROR_NOMATCH) return rc;
|
2913
|
-
break;
|
2914
|
-
}
|
2915
|
-
|
2916
|
-
/* Matched: break the loop if zero characters matched. */
|
2917
|
-
|
2918
|
-
charcount = local_offsets[1] - local_offsets[0];
|
2919
|
-
if (charcount == 0) break;
|
2920
|
-
local_ptr += charcount; /* Advance temporary position ptr */
|
2921
|
-
}
|
2922
|
-
|
2923
|
-
/* At this point we have matched the subpattern matched_count
|
2924
|
-
times, and local_ptr is pointing to the character after the end of the
|
2925
|
-
last match. */
|
2926
|
-
|
2927
|
-
if (matched_count > 0 || allow_zero)
|
2928
|
-
{
|
2929
|
-
const pcre_uchar *end_subpattern = code;
|
2930
|
-
int next_state_offset;
|
2931
|
-
|
2932
|
-
do { end_subpattern += GET(end_subpattern, 1); }
|
2933
|
-
while (*end_subpattern == OP_ALT);
|
2934
|
-
next_state_offset =
|
2935
|
-
(int)(end_subpattern - start_code + LINK_SIZE + 1);
|
2936
|
-
|
2937
|
-
/* Optimization: if there are no more active states, and there
|
2938
|
-
are no new states yet set up, then skip over the subject string
|
2939
|
-
right here, to save looping. Otherwise, set up the new state to swing
|
2940
|
-
into action when the end of the matched substring is reached. */
|
2941
|
-
|
2942
|
-
if (i + 1 >= active_count && new_count == 0)
|
2943
|
-
{
|
2944
|
-
ptr = local_ptr;
|
2945
|
-
clen = 0;
|
2946
|
-
ADD_NEW(next_state_offset, 0);
|
2947
|
-
}
|
2948
|
-
else
|
2949
|
-
{
|
2950
|
-
const pcre_uchar *p = ptr;
|
2951
|
-
const pcre_uchar *pp = local_ptr;
|
2952
|
-
charcount = (int)(pp - p);
|
2953
|
-
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
|
2954
|
-
if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
|
2955
|
-
#endif
|
2956
|
-
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
|
2957
|
-
}
|
2958
|
-
}
|
2959
|
-
}
|
2960
|
-
break;
|
2961
|
-
|
2962
|
-
/*-----------------------------------------------------------------*/
|
2963
|
-
case OP_ONCE:
|
2964
|
-
case OP_ONCE_NC:
|
2965
|
-
{
|
2966
|
-
int local_offsets[2];
|
2967
|
-
int local_workspace[1000];
|
2968
|
-
|
2969
|
-
int rc = internal_dfa_exec(
|
2970
|
-
md, /* fixed match data */
|
2971
|
-
code, /* this subexpression's code */
|
2972
|
-
ptr, /* where we currently are */
|
2973
|
-
(int)(ptr - start_subject), /* start offset */
|
2974
|
-
local_offsets, /* offset vector */
|
2975
|
-
sizeof(local_offsets)/sizeof(int), /* size of same */
|
2976
|
-
local_workspace, /* workspace vector */
|
2977
|
-
sizeof(local_workspace)/sizeof(int), /* size of same */
|
2978
|
-
rlevel); /* function recursion level */
|
2979
|
-
|
2980
|
-
if (rc >= 0)
|
2981
|
-
{
|
2982
|
-
const pcre_uchar *end_subpattern = code;
|
2983
|
-
int charcount = local_offsets[1] - local_offsets[0];
|
2984
|
-
int next_state_offset, repeat_state_offset;
|
2985
|
-
|
2986
|
-
do { end_subpattern += GET(end_subpattern, 1); }
|
2987
|
-
while (*end_subpattern == OP_ALT);
|
2988
|
-
next_state_offset =
|
2989
|
-
(int)(end_subpattern - start_code + LINK_SIZE + 1);
|
2990
|
-
|
2991
|
-
/* If the end of this subpattern is KETRMAX or KETRMIN, we must
|
2992
|
-
arrange for the repeat state also to be added to the relevant list.
|
2993
|
-
Calculate the offset, or set -1 for no repeat. */
|
2994
|
-
|
2995
|
-
repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
|
2996
|
-
*end_subpattern == OP_KETRMIN)?
|
2997
|
-
(int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
|
2998
|
-
|
2999
|
-
/* If we have matched an empty string, add the next state at the
|
3000
|
-
current character pointer. This is important so that the duplicate
|
3001
|
-
checking kicks in, which is what breaks infinite loops that match an
|
3002
|
-
empty string. */
|
3003
|
-
|
3004
|
-
if (charcount == 0)
|
3005
|
-
{
|
3006
|
-
ADD_ACTIVE(next_state_offset, 0);
|
3007
|
-
}
|
3008
|
-
|
3009
|
-
/* Optimization: if there are no more active states, and there
|
3010
|
-
are no new states yet set up, then skip over the subject string
|
3011
|
-
right here, to save looping. Otherwise, set up the new state to swing
|
3012
|
-
into action when the end of the matched substring is reached. */
|
3013
|
-
|
3014
|
-
else if (i + 1 >= active_count && new_count == 0)
|
3015
|
-
{
|
3016
|
-
ptr += charcount;
|
3017
|
-
clen = 0;
|
3018
|
-
ADD_NEW(next_state_offset, 0);
|
3019
|
-
|
3020
|
-
/* If we are adding a repeat state at the new character position,
|
3021
|
-
we must fudge things so that it is the only current state.
|
3022
|
-
Otherwise, it might be a duplicate of one we processed before, and
|
3023
|
-
that would cause it to be skipped. */
|
3024
|
-
|
3025
|
-
if (repeat_state_offset >= 0)
|
3026
|
-
{
|
3027
|
-
next_active_state = active_states;
|
3028
|
-
active_count = 0;
|
3029
|
-
i = -1;
|
3030
|
-
ADD_ACTIVE(repeat_state_offset, 0);
|
3031
|
-
}
|
3032
|
-
}
|
3033
|
-
else
|
3034
|
-
{
|
3035
|
-
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
|
3036
|
-
if (utf)
|
3037
|
-
{
|
3038
|
-
const pcre_uchar *p = start_subject + local_offsets[0];
|
3039
|
-
const pcre_uchar *pp = start_subject + local_offsets[1];
|
3040
|
-
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
|
3041
|
-
}
|
3042
|
-
#endif
|
3043
|
-
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
|
3044
|
-
if (repeat_state_offset >= 0)
|
3045
|
-
{ ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
|
3046
|
-
}
|
3047
|
-
}
|
3048
|
-
else if (rc != PCRE_ERROR_NOMATCH) return rc;
|
3049
|
-
}
|
3050
|
-
break;
|
3051
|
-
|
3052
|
-
|
3053
|
-
/* ========================================================================== */
|
3054
|
-
/* Handle callouts */
|
3055
|
-
|
3056
|
-
case OP_CALLOUT:
|
3057
|
-
rrc = 0;
|
3058
|
-
if (PUBL(callout) != NULL)
|
3059
|
-
{
|
3060
|
-
PUBL(callout_block) cb;
|
3061
|
-
cb.version = 1; /* Version 1 of the callout block */
|
3062
|
-
cb.callout_number = code[1];
|
3063
|
-
cb.offset_vector = offsets;
|
3064
|
-
#if defined COMPILE_PCRE8
|
3065
|
-
cb.subject = (PCRE_SPTR)start_subject;
|
3066
|
-
#elif defined COMPILE_PCRE16
|
3067
|
-
cb.subject = (PCRE_SPTR16)start_subject;
|
3068
|
-
#elif defined COMPILE_PCRE32
|
3069
|
-
cb.subject = (PCRE_SPTR32)start_subject;
|
3070
|
-
#endif
|
3071
|
-
cb.subject_length = (int)(end_subject - start_subject);
|
3072
|
-
cb.start_match = (int)(current_subject - start_subject);
|
3073
|
-
cb.current_position = (int)(ptr - start_subject);
|
3074
|
-
cb.pattern_position = GET(code, 2);
|
3075
|
-
cb.next_item_length = GET(code, 2 + LINK_SIZE);
|
3076
|
-
cb.capture_top = 1;
|
3077
|
-
cb.capture_last = -1;
|
3078
|
-
cb.callout_data = md->callout_data;
|
3079
|
-
cb.mark = NULL; /* No (*MARK) support */
|
3080
|
-
if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */
|
3081
|
-
}
|
3082
|
-
if (rrc == 0)
|
3083
|
-
{ ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
|
3084
|
-
break;
|
3085
|
-
|
3086
|
-
|
3087
|
-
/* ========================================================================== */
|
3088
|
-
default: /* Unsupported opcode */
|
3089
|
-
return PCRE_ERROR_DFA_UITEM;
|
3090
|
-
}
|
3091
|
-
|
3092
|
-
NEXT_ACTIVE_STATE: continue;
|
3093
|
-
|
3094
|
-
} /* End of loop scanning active states */
|
3095
|
-
|
3096
|
-
/* We have finished the processing at the current subject character. If no
|
3097
|
-
new states have been set for the next character, we have found all the
|
3098
|
-
matches that we are going to find. If we are at the top level and partial
|
3099
|
-
matching has been requested, check for appropriate conditions.
|
3100
|
-
|
3101
|
-
The "forced_ fail" variable counts the number of (*F) encountered for the
|
3102
|
-
character. If it is equal to the original active_count (saved in
|
3103
|
-
workspace[1]) it means that (*F) was found on every active state. In this
|
3104
|
-
case we don't want to give a partial match.
|
3105
|
-
|
3106
|
-
The "could_continue" variable is true if a state could have continued but
|
3107
|
-
for the fact that the end of the subject was reached. */
|
3108
|
-
|
3109
|
-
if (new_count <= 0)
|
3110
|
-
{
|
3111
|
-
if (rlevel == 1 && /* Top level, and */
|
3112
|
-
could_continue && /* Some could go on, and */
|
3113
|
-
forced_fail != workspace[1] && /* Not all forced fail & */
|
3114
|
-
( /* either... */
|
3115
|
-
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
|
3116
|
-
|| /* or... */
|
3117
|
-
((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
|
3118
|
-
match_count < 0) /* no matches */
|
3119
|
-
) && /* And... */
|
3120
|
-
(
|
3121
|
-
partial_newline || /* Either partial NL */
|
3122
|
-
( /* or ... */
|
3123
|
-
ptr >= end_subject && /* End of subject and */
|
3124
|
-
ptr > md->start_used_ptr) /* Inspected non-empty string */
|
3125
|
-
)
|
3126
|
-
)
|
3127
|
-
match_count = PCRE_ERROR_PARTIAL;
|
3128
|
-
DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
|
3129
|
-
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
|
3130
|
-
rlevel*2-2, SP));
|
3131
|
-
break; /* In effect, "return", but see the comment below */
|
3132
|
-
}
|
3133
|
-
|
3134
|
-
/* One or more states are active for the next character. */
|
3135
|
-
|
3136
|
-
ptr += clen; /* Advance to next subject character */
|
3137
|
-
} /* Loop to move along the subject string */
|
3138
|
-
|
3139
|
-
/* Control gets here from "break" a few lines above. We do it this way because
|
3140
|
-
if we use "return" above, we have compiler trouble. Some compilers warn if
|
3141
|
-
there's nothing here because they think the function doesn't return a value. On
|
3142
|
-
the other hand, if we put a dummy statement here, some more clever compilers
|
3143
|
-
complain that it can't be reached. Sigh. */
|
3144
|
-
|
3145
|
-
return match_count;
|
3146
|
-
}
|
3147
|
-
|
3148
|
-
|
3149
|
-
|
3150
|
-
|
3151
|
-
/*************************************************
|
3152
|
-
* Execute a Regular Expression - DFA engine *
|
3153
|
-
*************************************************/
|
3154
|
-
|
3155
|
-
/* This external function applies a compiled re to a subject string using a DFA
|
3156
|
-
engine. This function calls the internal function multiple times if the pattern
|
3157
|
-
is not anchored.
|
3158
|
-
|
3159
|
-
Arguments:
|
3160
|
-
argument_re points to the compiled expression
|
3161
|
-
extra_data points to extra data or is NULL
|
3162
|
-
subject points to the subject string
|
3163
|
-
length length of subject string (may contain binary zeros)
|
3164
|
-
start_offset where to start in the subject string
|
3165
|
-
options option bits
|
3166
|
-
offsets vector of match offsets
|
3167
|
-
offsetcount size of same
|
3168
|
-
workspace workspace vector
|
3169
|
-
wscount size of same
|
3170
|
-
|
3171
|
-
Returns: > 0 => number of match offset pairs placed in offsets
|
3172
|
-
= 0 => offsets overflowed; longest matches are present
|
3173
|
-
-1 => failed to match
|
3174
|
-
< -1 => some kind of unexpected problem
|
3175
|
-
*/
|
3176
|
-
|
3177
|
-
#if defined COMPILE_PCRE8
|
3178
|
-
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
3179
|
-
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
|
3180
|
-
const char *subject, int length, int start_offset, int options, int *offsets,
|
3181
|
-
int offsetcount, int *workspace, int wscount)
|
3182
|
-
#elif defined COMPILE_PCRE16
|
3183
|
-
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
3184
|
-
pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
|
3185
|
-
PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
|
3186
|
-
int offsetcount, int *workspace, int wscount)
|
3187
|
-
#elif defined COMPILE_PCRE32
|
3188
|
-
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
3189
|
-
pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
|
3190
|
-
PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
|
3191
|
-
int offsetcount, int *workspace, int wscount)
|
3192
|
-
#endif
|
3193
|
-
{
|
3194
|
-
REAL_PCRE *re = (REAL_PCRE *)argument_re;
|
3195
|
-
dfa_match_data match_block;
|
3196
|
-
dfa_match_data *md = &match_block;
|
3197
|
-
BOOL utf, anchored, startline, firstline;
|
3198
|
-
const pcre_uchar *current_subject, *end_subject;
|
3199
|
-
const pcre_study_data *study = NULL;
|
3200
|
-
|
3201
|
-
const pcre_uchar *req_char_ptr;
|
3202
|
-
const pcre_uint8 *start_bits = NULL;
|
3203
|
-
BOOL has_first_char = FALSE;
|
3204
|
-
BOOL has_req_char = FALSE;
|
3205
|
-
pcre_uchar first_char = 0;
|
3206
|
-
pcre_uchar first_char2 = 0;
|
3207
|
-
pcre_uchar req_char = 0;
|
3208
|
-
pcre_uchar req_char2 = 0;
|
3209
|
-
int newline;
|
3210
|
-
|
3211
|
-
/* Plausibility checks */
|
3212
|
-
|
3213
|
-
if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
|
3214
|
-
if (re == NULL || subject == NULL || workspace == NULL ||
|
3215
|
-
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
|
3216
|
-
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
|
3217
|
-
if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
|
3218
|
-
if (length < 0) return PCRE_ERROR_BADLENGTH;
|
3219
|
-
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
|
3220
|
-
|
3221
|
-
/* Check that the first field in the block is the magic number. If it is not,
|
3222
|
-
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
|
3223
|
-
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
|
3224
|
-
means that the pattern is likely compiled with different endianness. */
|
3225
|
-
|
3226
|
-
if (re->magic_number != MAGIC_NUMBER)
|
3227
|
-
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
3228
|
-
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
|
3229
|
-
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
|
3230
|
-
|
3231
|
-
/* If restarting after a partial match, do some sanity checks on the contents
|
3232
|
-
of the workspace. */
|
3233
|
-
|
3234
|
-
if ((options & PCRE_DFA_RESTART) != 0)
|
3235
|
-
{
|
3236
|
-
if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
|
3237
|
-
workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
|
3238
|
-
return PCRE_ERROR_DFA_BADRESTART;
|
3239
|
-
}
|
3240
|
-
|
3241
|
-
/* Set up study, callout, and table data */
|
3242
|
-
|
3243
|
-
md->tables = re->tables;
|
3244
|
-
md->callout_data = NULL;
|
3245
|
-
|
3246
|
-
if (extra_data != NULL)
|
3247
|
-
{
|
3248
|
-
unsigned long int flags = extra_data->flags;
|
3249
|
-
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
3250
|
-
study = (const pcre_study_data *)extra_data->study_data;
|
3251
|
-
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
|
3252
|
-
if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
|
3253
|
-
return PCRE_ERROR_DFA_UMLIMIT;
|
3254
|
-
if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
|
3255
|
-
md->callout_data = extra_data->callout_data;
|
3256
|
-
if ((flags & PCRE_EXTRA_TABLES) != 0)
|
3257
|
-
md->tables = extra_data->tables;
|
3258
|
-
}
|
3259
|
-
|
3260
|
-
/* Set some local values */
|
3261
|
-
|
3262
|
-
current_subject = (const pcre_uchar *)subject + start_offset;
|
3263
|
-
end_subject = (const pcre_uchar *)subject + length;
|
3264
|
-
req_char_ptr = current_subject - 1;
|
3265
|
-
|
3266
|
-
#ifdef SUPPORT_UTF
|
3267
|
-
/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
|
3268
|
-
utf = (re->options & PCRE_UTF8) != 0;
|
3269
|
-
#else
|
3270
|
-
utf = FALSE;
|
3271
|
-
#endif
|
3272
|
-
|
3273
|
-
anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
|
3274
|
-
(re->options & PCRE_ANCHORED) != 0;
|
3275
|
-
|
3276
|
-
/* The remaining fixed data for passing around. */
|
3277
|
-
|
3278
|
-
md->start_code = (const pcre_uchar *)argument_re +
|
3279
|
-
re->name_table_offset + re->name_count * re->name_entry_size;
|
3280
|
-
md->start_subject = (const pcre_uchar *)subject;
|
3281
|
-
md->end_subject = end_subject;
|
3282
|
-
md->start_offset = start_offset;
|
3283
|
-
md->moptions = options;
|
3284
|
-
md->poptions = re->options;
|
3285
|
-
|
3286
|
-
/* If the BSR option is not set at match time, copy what was set
|
3287
|
-
at compile time. */
|
3288
|
-
|
3289
|
-
if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
|
3290
|
-
{
|
3291
|
-
if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
|
3292
|
-
md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
|
3293
|
-
#ifdef BSR_ANYCRLF
|
3294
|
-
else md->moptions |= PCRE_BSR_ANYCRLF;
|
3295
|
-
#endif
|
3296
|
-
}
|
3297
|
-
|
3298
|
-
/* Handle different types of newline. The three bits give eight cases. If
|
3299
|
-
nothing is set at run time, whatever was used at compile time applies. */
|
3300
|
-
|
3301
|
-
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
|
3302
|
-
PCRE_NEWLINE_BITS)
|
3303
|
-
{
|
3304
|
-
case 0: newline = NEWLINE; break; /* Compile-time default */
|
3305
|
-
case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
|
3306
|
-
case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
|
3307
|
-
case PCRE_NEWLINE_CR+
|
3308
|
-
PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
|
3309
|
-
case PCRE_NEWLINE_ANY: newline = -1; break;
|
3310
|
-
case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
|
3311
|
-
default: return PCRE_ERROR_BADNEWLINE;
|
3312
|
-
}
|
3313
|
-
|
3314
|
-
if (newline == -2)
|
3315
|
-
{
|
3316
|
-
md->nltype = NLTYPE_ANYCRLF;
|
3317
|
-
}
|
3318
|
-
else if (newline < 0)
|
3319
|
-
{
|
3320
|
-
md->nltype = NLTYPE_ANY;
|
3321
|
-
}
|
3322
|
-
else
|
3323
|
-
{
|
3324
|
-
md->nltype = NLTYPE_FIXED;
|
3325
|
-
if (newline > 255)
|
3326
|
-
{
|
3327
|
-
md->nllen = 2;
|
3328
|
-
md->nl[0] = (newline >> 8) & 255;
|
3329
|
-
md->nl[1] = newline & 255;
|
3330
|
-
}
|
3331
|
-
else
|
3332
|
-
{
|
3333
|
-
md->nllen = 1;
|
3334
|
-
md->nl[0] = newline;
|
3335
|
-
}
|
3336
|
-
}
|
3337
|
-
|
3338
|
-
/* Check a UTF-8 string if required. Unfortunately there's no way of passing
|
3339
|
-
back the character offset. */
|
3340
|
-
|
3341
|
-
#ifdef SUPPORT_UTF
|
3342
|
-
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
|
3343
|
-
{
|
3344
|
-
int erroroffset;
|
3345
|
-
int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
|
3346
|
-
if (errorcode != 0)
|
3347
|
-
{
|
3348
|
-
if (offsetcount >= 2)
|
3349
|
-
{
|
3350
|
-
offsets[0] = erroroffset;
|
3351
|
-
offsets[1] = errorcode;
|
3352
|
-
}
|
3353
|
-
#if defined COMPILE_PCRE8
|
3354
|
-
return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
|
3355
|
-
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
|
3356
|
-
#elif defined COMPILE_PCRE16
|
3357
|
-
return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
|
3358
|
-
PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
|
3359
|
-
#elif defined COMPILE_PCRE32
|
3360
|
-
return PCRE_ERROR_BADUTF32;
|
3361
|
-
#endif
|
3362
|
-
}
|
3363
|
-
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
|
3364
|
-
if (start_offset > 0 && start_offset < length &&
|
3365
|
-
NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
|
3366
|
-
return PCRE_ERROR_BADUTF8_OFFSET;
|
3367
|
-
#endif
|
3368
|
-
}
|
3369
|
-
#endif
|
3370
|
-
|
3371
|
-
/* If the exec call supplied NULL for tables, use the inbuilt ones. This
|
3372
|
-
is a feature that makes it possible to save compiled regex and re-use them
|
3373
|
-
in other programs later. */
|
3374
|
-
|
3375
|
-
if (md->tables == NULL) md->tables = PRIV(default_tables);
|
3376
|
-
|
3377
|
-
/* The "must be at the start of a line" flags are used in a loop when finding
|
3378
|
-
where to start. */
|
3379
|
-
|
3380
|
-
startline = (re->flags & PCRE_STARTLINE) != 0;
|
3381
|
-
firstline = (re->options & PCRE_FIRSTLINE) != 0;
|
3382
|
-
|
3383
|
-
/* Set up the first character to match, if available. The first_byte value is
|
3384
|
-
never set for an anchored regular expression, but the anchoring may be forced
|
3385
|
-
at run time, so we have to test for anchoring. The first char may be unset for
|
3386
|
-
an unanchored pattern, of course. If there's no first char and the pattern was
|
3387
|
-
studied, there may be a bitmap of possible first characters. */
|
3388
|
-
|
3389
|
-
if (!anchored)
|
3390
|
-
{
|
3391
|
-
if ((re->flags & PCRE_FIRSTSET) != 0)
|
3392
|
-
{
|
3393
|
-
has_first_char = TRUE;
|
3394
|
-
first_char = first_char2 = (pcre_uchar)(re->first_char);
|
3395
|
-
if ((re->flags & PCRE_FCH_CASELESS) != 0)
|
3396
|
-
{
|
3397
|
-
first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
|
3398
|
-
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
|
3399
|
-
if (utf && first_char > 127)
|
3400
|
-
first_char2 = UCD_OTHERCASE(first_char);
|
3401
|
-
#endif
|
3402
|
-
}
|
3403
|
-
}
|
3404
|
-
else
|
3405
|
-
{
|
3406
|
-
if (!startline && study != NULL &&
|
3407
|
-
(study->flags & PCRE_STUDY_MAPPED) != 0)
|
3408
|
-
start_bits = study->start_bits;
|
3409
|
-
}
|
3410
|
-
}
|
3411
|
-
|
3412
|
-
/* For anchored or unanchored matches, there may be a "last known required
|
3413
|
-
character" set. */
|
3414
|
-
|
3415
|
-
if ((re->flags & PCRE_REQCHSET) != 0)
|
3416
|
-
{
|
3417
|
-
has_req_char = TRUE;
|
3418
|
-
req_char = req_char2 = (pcre_uchar)(re->req_char);
|
3419
|
-
if ((re->flags & PCRE_RCH_CASELESS) != 0)
|
3420
|
-
{
|
3421
|
-
req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
|
3422
|
-
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
|
3423
|
-
if (utf && req_char > 127)
|
3424
|
-
req_char2 = UCD_OTHERCASE(req_char);
|
3425
|
-
#endif
|
3426
|
-
}
|
3427
|
-
}
|
3428
|
-
|
3429
|
-
/* Call the main matching function, looping for a non-anchored regex after a
|
3430
|
-
failed match. If not restarting, perform certain optimizations at the start of
|
3431
|
-
a match. */
|
3432
|
-
|
3433
|
-
for (;;)
|
3434
|
-
{
|
3435
|
-
int rc;
|
3436
|
-
|
3437
|
-
if ((options & PCRE_DFA_RESTART) == 0)
|
3438
|
-
{
|
3439
|
-
const pcre_uchar *save_end_subject = end_subject;
|
3440
|
-
|
3441
|
-
/* If firstline is TRUE, the start of the match is constrained to the first
|
3442
|
-
line of a multiline string. Implement this by temporarily adjusting
|
3443
|
-
end_subject so that we stop scanning at a newline. If the match fails at
|
3444
|
-
the newline, later code breaks this loop. */
|
3445
|
-
|
3446
|
-
if (firstline)
|
3447
|
-
{
|
3448
|
-
PCRE_PUCHAR t = current_subject;
|
3449
|
-
#ifdef SUPPORT_UTF
|
3450
|
-
if (utf)
|
3451
|
-
{
|
3452
|
-
while (t < md->end_subject && !IS_NEWLINE(t))
|
3453
|
-
{
|
3454
|
-
t++;
|
3455
|
-
ACROSSCHAR(t < end_subject, *t, t++);
|
3456
|
-
}
|
3457
|
-
}
|
3458
|
-
else
|
3459
|
-
#endif
|
3460
|
-
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
|
3461
|
-
end_subject = t;
|
3462
|
-
}
|
3463
|
-
|
3464
|
-
/* There are some optimizations that avoid running the match if a known
|
3465
|
-
starting point is not found. However, there is an option that disables
|
3466
|
-
these, for testing and for ensuring that all callouts do actually occur.
|
3467
|
-
The option can be set in the regex by (*NO_START_OPT) or passed in
|
3468
|
-
match-time options. */
|
3469
|
-
|
3470
|
-
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
|
3471
|
-
{
|
3472
|
-
/* Advance to a known first pcre_uchar (i.e. data item) */
|
3473
|
-
|
3474
|
-
if (has_first_char)
|
3475
|
-
{
|
3476
|
-
if (first_char != first_char2)
|
3477
|
-
{
|
3478
|
-
pcre_uchar csc;
|
3479
|
-
while (current_subject < end_subject &&
|
3480
|
-
(csc = UCHAR21TEST(current_subject)) != first_char && csc != first_char2)
|
3481
|
-
current_subject++;
|
3482
|
-
}
|
3483
|
-
else
|
3484
|
-
while (current_subject < end_subject &&
|
3485
|
-
UCHAR21TEST(current_subject) != first_char)
|
3486
|
-
current_subject++;
|
3487
|
-
}
|
3488
|
-
|
3489
|
-
/* Or to just after a linebreak for a multiline match if possible */
|
3490
|
-
|
3491
|
-
else if (startline)
|
3492
|
-
{
|
3493
|
-
if (current_subject > md->start_subject + start_offset)
|
3494
|
-
{
|
3495
|
-
#ifdef SUPPORT_UTF
|
3496
|
-
if (utf)
|
3497
|
-
{
|
3498
|
-
while (current_subject < end_subject &&
|
3499
|
-
!WAS_NEWLINE(current_subject))
|
3500
|
-
{
|
3501
|
-
current_subject++;
|
3502
|
-
ACROSSCHAR(current_subject < end_subject, *current_subject,
|
3503
|
-
current_subject++);
|
3504
|
-
}
|
3505
|
-
}
|
3506
|
-
else
|
3507
|
-
#endif
|
3508
|
-
while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
|
3509
|
-
current_subject++;
|
3510
|
-
|
3511
|
-
/* If we have just passed a CR and the newline option is ANY or
|
3512
|
-
ANYCRLF, and we are now at a LF, advance the match position by one
|
3513
|
-
more character. */
|
3514
|
-
|
3515
|
-
if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
|
3516
|
-
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
|
3517
|
-
current_subject < end_subject &&
|
3518
|
-
UCHAR21TEST(current_subject) == CHAR_NL)
|
3519
|
-
current_subject++;
|
3520
|
-
}
|
3521
|
-
}
|
3522
|
-
|
3523
|
-
/* Advance to a non-unique first pcre_uchar after study */
|
3524
|
-
|
3525
|
-
else if (start_bits != NULL)
|
3526
|
-
{
|
3527
|
-
while (current_subject < end_subject)
|
3528
|
-
{
|
3529
|
-
register pcre_uint32 c = UCHAR21TEST(current_subject);
|
3530
|
-
#ifndef COMPILE_PCRE8
|
3531
|
-
if (c > 255) c = 255;
|
3532
|
-
#endif
|
3533
|
-
if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
|
3534
|
-
current_subject++;
|
3535
|
-
}
|
3536
|
-
}
|
3537
|
-
}
|
3538
|
-
|
3539
|
-
/* Restore fudged end_subject */
|
3540
|
-
|
3541
|
-
end_subject = save_end_subject;
|
3542
|
-
|
3543
|
-
/* The following two optimizations are disabled for partial matching or if
|
3544
|
-
disabling is explicitly requested (and of course, by the test above, this
|
3545
|
-
code is not obeyed when restarting after a partial match). */
|
3546
|
-
|
3547
|
-
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
|
3548
|
-
(options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
|
3549
|
-
{
|
3550
|
-
/* If the pattern was studied, a minimum subject length may be set. This
|
3551
|
-
is a lower bound; no actual string of that length may actually match the
|
3552
|
-
pattern. Although the value is, strictly, in characters, we treat it as
|
3553
|
-
in pcre_uchar units to avoid spending too much time in this optimization.
|
3554
|
-
*/
|
3555
|
-
|
3556
|
-
if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
|
3557
|
-
(pcre_uint32)(end_subject - current_subject) < study->minlength)
|
3558
|
-
return PCRE_ERROR_NOMATCH;
|
3559
|
-
|
3560
|
-
/* If req_char is set, we know that that pcre_uchar must appear in the
|
3561
|
-
subject for the match to succeed. If the first pcre_uchar is set,
|
3562
|
-
req_char must be later in the subject; otherwise the test starts at the
|
3563
|
-
match point. This optimization can save a huge amount of work in patterns
|
3564
|
-
with nested unlimited repeats that aren't going to match. Writing
|
3565
|
-
separate code for cased/caseless versions makes it go faster, as does
|
3566
|
-
using an autoincrement and backing off on a match.
|
3567
|
-
|
3568
|
-
HOWEVER: when the subject string is very, very long, searching to its end
|
3569
|
-
can take a long time, and give bad performance on quite ordinary
|
3570
|
-
patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
|
3571
|
-
string... so we don't do this when the string is sufficiently long. */
|
3572
|
-
|
3573
|
-
if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
|
3574
|
-
{
|
3575
|
-
register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
|
3576
|
-
|
3577
|
-
/* We don't need to repeat the search if we haven't yet reached the
|
3578
|
-
place we found it at last time. */
|
3579
|
-
|
3580
|
-
if (p > req_char_ptr)
|
3581
|
-
{
|
3582
|
-
if (req_char != req_char2)
|
3583
|
-
{
|
3584
|
-
while (p < end_subject)
|
3585
|
-
{
|
3586
|
-
register pcre_uint32 pp = UCHAR21INCTEST(p);
|
3587
|
-
if (pp == req_char || pp == req_char2) { p--; break; }
|
3588
|
-
}
|
3589
|
-
}
|
3590
|
-
else
|
3591
|
-
{
|
3592
|
-
while (p < end_subject)
|
3593
|
-
{
|
3594
|
-
if (UCHAR21INCTEST(p) == req_char) { p--; break; }
|
3595
|
-
}
|
3596
|
-
}
|
3597
|
-
|
3598
|
-
/* If we can't find the required pcre_uchar, break the matching loop,
|
3599
|
-
which will cause a return or PCRE_ERROR_NOMATCH. */
|
3600
|
-
|
3601
|
-
if (p >= end_subject) break;
|
3602
|
-
|
3603
|
-
/* If we have found the required pcre_uchar, save the point where we
|
3604
|
-
found it, so that we don't search again next time round the loop if
|
3605
|
-
the start hasn't passed this point yet. */
|
3606
|
-
|
3607
|
-
req_char_ptr = p;
|
3608
|
-
}
|
3609
|
-
}
|
3610
|
-
}
|
3611
|
-
} /* End of optimizations that are done when not restarting */
|
3612
|
-
|
3613
|
-
/* OK, now we can do the business */
|
3614
|
-
|
3615
|
-
md->start_used_ptr = current_subject;
|
3616
|
-
md->recursive = NULL;
|
3617
|
-
|
3618
|
-
rc = internal_dfa_exec(
|
3619
|
-
md, /* fixed match data */
|
3620
|
-
md->start_code, /* this subexpression's code */
|
3621
|
-
current_subject, /* where we currently are */
|
3622
|
-
start_offset, /* start offset in subject */
|
3623
|
-
offsets, /* offset vector */
|
3624
|
-
offsetcount, /* size of same */
|
3625
|
-
workspace, /* workspace vector */
|
3626
|
-
wscount, /* size of same */
|
3627
|
-
0); /* function recurse level */
|
3628
|
-
|
3629
|
-
/* Anything other than "no match" means we are done, always; otherwise, carry
|
3630
|
-
on only if not anchored. */
|
3631
|
-
|
3632
|
-
if (rc != PCRE_ERROR_NOMATCH || anchored)
|
3633
|
-
{
|
3634
|
-
if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
|
3635
|
-
{
|
3636
|
-
offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
|
3637
|
-
offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
|
3638
|
-
if (offsetcount > 2)
|
3639
|
-
offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
|
3640
|
-
}
|
3641
|
-
return rc;
|
3642
|
-
}
|
3643
|
-
|
3644
|
-
/* Advance to the next subject character unless we are at the end of a line
|
3645
|
-
and firstline is set. */
|
3646
|
-
|
3647
|
-
if (firstline && IS_NEWLINE(current_subject)) break;
|
3648
|
-
current_subject++;
|
3649
|
-
#ifdef SUPPORT_UTF
|
3650
|
-
if (utf)
|
3651
|
-
{
|
3652
|
-
ACROSSCHAR(current_subject < end_subject, *current_subject,
|
3653
|
-
current_subject++);
|
3654
|
-
}
|
3655
|
-
#endif
|
3656
|
-
if (current_subject > end_subject) break;
|
3657
|
-
|
3658
|
-
/* If we have just passed a CR and we are now at a LF, and the pattern does
|
3659
|
-
not contain any explicit matches for \r or \n, and the newline option is CRLF
|
3660
|
-
or ANY or ANYCRLF, advance the match position by one more character. */
|
3661
|
-
|
3662
|
-
if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
|
3663
|
-
current_subject < end_subject &&
|
3664
|
-
UCHAR21TEST(current_subject) == CHAR_NL &&
|
3665
|
-
(re->flags & PCRE_HASCRORLF) == 0 &&
|
3666
|
-
(md->nltype == NLTYPE_ANY ||
|
3667
|
-
md->nltype == NLTYPE_ANYCRLF ||
|
3668
|
-
md->nllen == 2))
|
3669
|
-
current_subject++;
|
3670
|
-
|
3671
|
-
} /* "Bumpalong" loop */
|
3672
|
-
|
3673
|
-
return PCRE_ERROR_NOMATCH;
|
3674
|
-
}
|
3675
|
-
|
3676
|
-
/* End of pcre_dfa_exec.c */
|