rugged 0.28.4.1 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/ext/rugged/extconf.rb +3 -1
- data/ext/rugged/rugged.c +35 -31
- data/ext/rugged/rugged.h +13 -0
- data/ext/rugged/rugged_blob.c +11 -9
- data/ext/rugged/rugged_commit.c +17 -15
- data/ext/rugged/rugged_config.c +1 -1
- data/ext/rugged/rugged_diff.c +4 -26
- data/ext/rugged/rugged_index.c +4 -2
- data/ext/rugged/rugged_note.c +5 -3
- data/ext/rugged/rugged_object.c +57 -10
- data/ext/rugged/rugged_rebase.c +3 -1
- data/ext/rugged/rugged_remote.c +32 -8
- data/ext/rugged/rugged_repo.c +232 -17
- data/ext/rugged/rugged_tag.c +8 -6
- data/ext/rugged/rugged_tree.c +18 -16
- data/lib/rugged/commit.rb +1 -2
- data/lib/rugged/repository.rb +5 -6
- data/lib/rugged/submodule_collection.rb +4 -4
- data/lib/rugged/version.rb +1 -1
- data/vendor/libgit2/AUTHORS +1 -0
- data/vendor/libgit2/CMakeLists.txt +39 -19
- data/vendor/libgit2/COPYING +28 -0
- data/vendor/libgit2/cmake/Modules/EnableWarnings.cmake +5 -1
- data/vendor/libgit2/cmake/Modules/FindCoreFoundation.cmake +2 -2
- data/vendor/libgit2/cmake/Modules/FindGSSAPI.cmake +1 -1
- data/vendor/libgit2/cmake/Modules/FindGSSFramework.cmake +28 -0
- data/vendor/libgit2/cmake/Modules/FindPCRE.cmake +38 -0
- data/vendor/libgit2/cmake/Modules/FindPCRE2.cmake +37 -0
- data/vendor/libgit2/cmake/Modules/FindSecurity.cmake +2 -2
- data/vendor/libgit2/cmake/Modules/FindStatNsec.cmake +6 -0
- data/vendor/libgit2/cmake/Modules/PkgBuildConfig.cmake +77 -0
- data/vendor/libgit2/cmake/Modules/SanitizeBool.cmake +20 -0
- data/vendor/libgit2/cmake/Modules/SelectGSSAPI.cmake +56 -0
- data/vendor/libgit2/cmake/Modules/SelectHTTPSBackend.cmake +127 -0
- data/vendor/libgit2/cmake/Modules/SelectHashes.cmake +69 -0
- data/vendor/libgit2/deps/http-parser/http_parser.c +11 -6
- data/vendor/libgit2/deps/ntlmclient/CMakeLists.txt +23 -0
- data/vendor/libgit2/deps/ntlmclient/compat.h +55 -0
- data/vendor/libgit2/deps/ntlmclient/crypt.h +64 -0
- data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.c +120 -0
- data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.h +18 -0
- data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.c +145 -0
- data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.h +18 -0
- data/vendor/libgit2/deps/ntlmclient/crypt_openssl.c +130 -0
- data/vendor/libgit2/deps/ntlmclient/crypt_openssl.h +21 -0
- data/vendor/libgit2/deps/ntlmclient/ntlm.c +1422 -0
- data/vendor/libgit2/deps/ntlmclient/ntlm.h +174 -0
- data/vendor/libgit2/deps/ntlmclient/ntlmclient.h +320 -0
- data/vendor/libgit2/deps/ntlmclient/unicode.h +36 -0
- data/vendor/libgit2/deps/ntlmclient/unicode_builtin.c +445 -0
- data/vendor/libgit2/deps/ntlmclient/unicode_iconv.c +201 -0
- data/vendor/libgit2/deps/ntlmclient/utf8.h +1257 -0
- data/vendor/libgit2/deps/ntlmclient/util.c +21 -0
- data/vendor/libgit2/deps/ntlmclient/util.h +14 -0
- data/vendor/libgit2/deps/pcre/CMakeLists.txt +140 -0
- data/vendor/libgit2/deps/pcre/COPYING +5 -0
- data/vendor/libgit2/deps/pcre/cmake/COPYING-CMAKE-SCRIPTS +22 -0
- data/vendor/libgit2/deps/pcre/cmake/FindEditline.cmake +17 -0
- data/vendor/libgit2/deps/pcre/cmake/FindPackageHandleStandardArgs.cmake +58 -0
- data/vendor/libgit2/deps/pcre/cmake/FindReadline.cmake +29 -0
- data/vendor/libgit2/deps/pcre/config.h.in +57 -0
- data/vendor/libgit2/deps/pcre/pcre.h +641 -0
- data/vendor/libgit2/deps/pcre/pcre_byte_order.c +319 -0
- data/vendor/libgit2/deps/pcre/pcre_chartables.c +198 -0
- data/vendor/libgit2/deps/pcre/pcre_compile.c +9800 -0
- data/vendor/libgit2/deps/pcre/pcre_config.c +190 -0
- data/vendor/libgit2/deps/pcre/pcre_dfa_exec.c +3676 -0
- data/vendor/libgit2/deps/pcre/pcre_exec.c +7173 -0
- data/vendor/libgit2/deps/pcre/pcre_fullinfo.c +245 -0
- data/vendor/libgit2/deps/pcre/pcre_get.c +669 -0
- data/vendor/libgit2/deps/pcre/pcre_globals.c +86 -0
- data/vendor/libgit2/deps/pcre/pcre_internal.h +2787 -0
- data/vendor/libgit2/deps/pcre/pcre_jit_compile.c +11913 -0
- data/vendor/libgit2/deps/pcre/pcre_maketables.c +156 -0
- data/vendor/libgit2/deps/pcre/pcre_newline.c +210 -0
- data/vendor/libgit2/deps/pcre/pcre_ord2utf8.c +94 -0
- data/vendor/libgit2/deps/pcre/pcre_printint.c +834 -0
- data/vendor/libgit2/deps/pcre/pcre_refcount.c +92 -0
- data/vendor/libgit2/deps/pcre/pcre_string_utils.c +211 -0
- data/vendor/libgit2/deps/pcre/pcre_study.c +1686 -0
- data/vendor/libgit2/deps/pcre/pcre_tables.c +727 -0
- data/vendor/libgit2/deps/pcre/pcre_ucd.c +3644 -0
- data/vendor/libgit2/deps/pcre/pcre_valid_utf8.c +301 -0
- data/vendor/libgit2/deps/pcre/pcre_version.c +98 -0
- data/vendor/libgit2/deps/pcre/pcre_xclass.c +268 -0
- data/vendor/libgit2/deps/pcre/pcreposix.c +421 -0
- data/vendor/libgit2/deps/pcre/pcreposix.h +117 -0
- data/vendor/libgit2/deps/pcre/ucp.h +224 -0
- data/vendor/libgit2/deps/zlib/adler32.c +0 -7
- data/vendor/libgit2/deps/zlib/crc32.c +0 -7
- data/vendor/libgit2/include/git2.h +2 -0
- data/vendor/libgit2/include/git2/apply.h +22 -2
- data/vendor/libgit2/include/git2/attr.h +23 -13
- data/vendor/libgit2/include/git2/blame.h +2 -2
- data/vendor/libgit2/include/git2/blob.h +44 -12
- data/vendor/libgit2/include/git2/branch.h +74 -57
- data/vendor/libgit2/include/git2/buffer.h +20 -14
- data/vendor/libgit2/include/git2/cert.h +135 -0
- data/vendor/libgit2/include/git2/checkout.h +46 -14
- data/vendor/libgit2/include/git2/cherrypick.h +3 -3
- data/vendor/libgit2/include/git2/clone.h +2 -2
- data/vendor/libgit2/include/git2/commit.h +23 -1
- data/vendor/libgit2/include/git2/common.h +15 -6
- data/vendor/libgit2/include/git2/config.h +12 -12
- data/vendor/libgit2/include/git2/cred_helpers.h +4 -42
- data/vendor/libgit2/include/git2/credential.h +314 -0
- data/vendor/libgit2/include/git2/credential_helpers.h +52 -0
- data/vendor/libgit2/include/git2/deprecated.h +321 -3
- data/vendor/libgit2/include/git2/describe.h +4 -4
- data/vendor/libgit2/include/git2/diff.h +16 -14
- data/vendor/libgit2/include/git2/errors.h +4 -2
- data/vendor/libgit2/include/git2/filter.h +8 -0
- data/vendor/libgit2/include/git2/index.h +2 -1
- data/vendor/libgit2/include/git2/indexer.h +48 -4
- data/vendor/libgit2/include/git2/merge.h +6 -10
- data/vendor/libgit2/include/git2/net.h +0 -5
- data/vendor/libgit2/include/git2/object.h +2 -14
- data/vendor/libgit2/include/git2/odb.h +3 -2
- data/vendor/libgit2/include/git2/odb_backend.h +5 -4
- data/vendor/libgit2/include/git2/oid.h +11 -6
- data/vendor/libgit2/include/git2/pack.h +12 -1
- data/vendor/libgit2/include/git2/proxy.h +6 -4
- data/vendor/libgit2/include/git2/rebase.h +46 -2
- data/vendor/libgit2/include/git2/refs.h +19 -0
- data/vendor/libgit2/include/git2/remote.h +40 -15
- data/vendor/libgit2/include/git2/repository.h +29 -6
- data/vendor/libgit2/include/git2/revert.h +1 -1
- data/vendor/libgit2/include/git2/revwalk.h +7 -3
- data/vendor/libgit2/include/git2/stash.h +4 -4
- data/vendor/libgit2/include/git2/status.h +25 -16
- data/vendor/libgit2/include/git2/submodule.h +20 -3
- data/vendor/libgit2/include/git2/sys/alloc.h +9 -9
- data/vendor/libgit2/include/git2/sys/cred.h +15 -0
- data/vendor/libgit2/include/git2/sys/credential.h +90 -0
- data/vendor/libgit2/include/git2/sys/index.h +4 -2
- data/vendor/libgit2/include/git2/sys/mempack.h +2 -1
- data/vendor/libgit2/include/git2/sys/merge.h +1 -1
- data/vendor/libgit2/include/git2/sys/odb_backend.h +48 -4
- data/vendor/libgit2/include/git2/sys/refdb_backend.h +164 -21
- data/vendor/libgit2/include/git2/sys/repository.h +17 -6
- data/vendor/libgit2/include/git2/sys/transport.h +4 -4
- data/vendor/libgit2/include/git2/tag.h +11 -2
- data/vendor/libgit2/include/git2/trace.h +2 -2
- data/vendor/libgit2/include/git2/transport.h +11 -340
- data/vendor/libgit2/include/git2/tree.h +5 -3
- data/vendor/libgit2/include/git2/types.h +4 -89
- data/vendor/libgit2/include/git2/version.h +5 -5
- data/vendor/libgit2/include/git2/worktree.h +5 -5
- data/vendor/libgit2/src/CMakeLists.txt +99 -236
- data/vendor/libgit2/src/alloc.c +2 -14
- data/vendor/libgit2/src/{stdalloc.c → allocators/stdalloc.c} +3 -4
- data/vendor/libgit2/src/{stdalloc.h → allocators/stdalloc.h} +4 -4
- data/vendor/libgit2/src/allocators/win32_crtdbg.c +118 -0
- data/vendor/libgit2/src/{transports/cred.h → allocators/win32_crtdbg.h} +5 -4
- data/vendor/libgit2/src/apply.c +60 -30
- data/vendor/libgit2/src/attr.c +70 -64
- data/vendor/libgit2/src/attr_file.c +189 -96
- data/vendor/libgit2/src/attr_file.h +9 -9
- data/vendor/libgit2/src/attrcache.c +48 -48
- data/vendor/libgit2/src/attrcache.h +2 -1
- data/vendor/libgit2/src/blame.c +17 -5
- data/vendor/libgit2/src/blame.h +1 -1
- data/vendor/libgit2/src/blame_git.c +21 -7
- data/vendor/libgit2/src/blob.c +81 -17
- data/vendor/libgit2/src/blob.h +2 -2
- data/vendor/libgit2/src/branch.c +60 -32
- data/vendor/libgit2/src/buffer.c +19 -7
- data/vendor/libgit2/src/buffer.h +1 -0
- data/vendor/libgit2/src/cache.c +33 -36
- data/vendor/libgit2/src/cache.h +1 -1
- data/vendor/libgit2/src/cc-compat.h +5 -0
- data/vendor/libgit2/src/checkout.c +26 -16
- data/vendor/libgit2/src/cherrypick.c +9 -3
- data/vendor/libgit2/src/clone.c +29 -7
- data/vendor/libgit2/src/clone.h +4 -0
- data/vendor/libgit2/src/commit.c +70 -22
- data/vendor/libgit2/src/commit.h +6 -0
- data/vendor/libgit2/src/commit_list.c +28 -76
- data/vendor/libgit2/src/commit_list.h +2 -2
- data/vendor/libgit2/src/common.h +3 -75
- data/vendor/libgit2/src/config.c +31 -40
- data/vendor/libgit2/src/config.h +7 -6
- data/vendor/libgit2/src/config_backend.h +12 -0
- data/vendor/libgit2/src/config_cache.c +39 -39
- data/vendor/libgit2/src/config_entries.c +69 -99
- data/vendor/libgit2/src/config_entries.h +1 -0
- data/vendor/libgit2/src/config_file.c +346 -380
- data/vendor/libgit2/src/config_mem.c +12 -16
- data/vendor/libgit2/src/config_parse.c +49 -29
- data/vendor/libgit2/src/config_parse.h +13 -12
- data/vendor/libgit2/src/config_snapshot.c +206 -0
- data/vendor/libgit2/src/crlf.c +14 -14
- data/vendor/libgit2/src/describe.c +21 -20
- data/vendor/libgit2/src/diff.c +43 -58
- data/vendor/libgit2/src/diff.h +4 -3
- data/vendor/libgit2/src/diff_driver.c +37 -38
- data/vendor/libgit2/src/diff_file.c +12 -10
- data/vendor/libgit2/src/diff_file.h +2 -2
- data/vendor/libgit2/src/diff_generate.c +148 -98
- data/vendor/libgit2/src/diff_generate.h +2 -2
- data/vendor/libgit2/src/diff_parse.c +1 -1
- data/vendor/libgit2/src/diff_print.c +25 -13
- data/vendor/libgit2/src/diff_stats.c +1 -1
- data/vendor/libgit2/src/diff_tform.c +11 -11
- data/vendor/libgit2/src/errors.c +21 -25
- data/vendor/libgit2/src/errors.h +81 -0
- data/vendor/libgit2/src/features.h.in +9 -2
- data/vendor/libgit2/src/fetch.c +7 -2
- data/vendor/libgit2/src/fetchhead.c +36 -4
- data/vendor/libgit2/src/filebuf.c +6 -10
- data/vendor/libgit2/src/filebuf.h +2 -2
- data/vendor/libgit2/src/filter.c +16 -8
- data/vendor/libgit2/src/{fileops.c → futils.c} +21 -17
- data/vendor/libgit2/src/{fileops.h → futils.h} +5 -5
- data/vendor/libgit2/src/global.c +12 -40
- data/vendor/libgit2/src/global.h +0 -2
- data/vendor/libgit2/src/hash.c +61 -0
- data/vendor/libgit2/src/hash.h +19 -21
- data/vendor/libgit2/src/hash/sha1.h +38 -0
- data/vendor/libgit2/src/hash/{hash_collisiondetect.h → sha1/collisiondetect.c} +14 -17
- data/vendor/libgit2/src/{sha1_lookup.h → hash/sha1/collisiondetect.h} +8 -8
- data/vendor/libgit2/src/hash/{hash_common_crypto.h → sha1/common_crypto.c} +15 -19
- data/vendor/libgit2/src/hash/sha1/common_crypto.h +19 -0
- data/vendor/libgit2/src/hash/{hash_generic.c → sha1/generic.c} +22 -10
- data/vendor/libgit2/src/hash/{hash_generic.h → sha1/generic.h} +4 -14
- data/vendor/libgit2/src/hash/{hash_mbedtls.c → sha1/mbedtls.c} +15 -7
- data/vendor/libgit2/src/hash/{hash_mbedtls.h → sha1/mbedtls.h} +6 -11
- data/vendor/libgit2/src/hash/{hash_openssl.h → sha1/openssl.c} +14 -18
- data/vendor/libgit2/src/hash/sha1/openssl.h +19 -0
- data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/sha1.c +14 -3
- data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/sha1.h +0 -0
- data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/ubc_check.c +0 -0
- data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/ubc_check.h +0 -0
- data/vendor/libgit2/src/hash/{hash_win32.c → sha1/win32.c} +34 -24
- data/vendor/libgit2/src/hash/{hash_win32.h → sha1/win32.h} +6 -19
- data/vendor/libgit2/src/hashsig.c +1 -1
- data/vendor/libgit2/src/idxmap.c +91 -65
- data/vendor/libgit2/src/idxmap.h +151 -15
- data/vendor/libgit2/src/ignore.c +32 -38
- data/vendor/libgit2/src/index.c +105 -83
- data/vendor/libgit2/src/index.h +1 -1
- data/vendor/libgit2/src/indexer.c +71 -72
- data/vendor/libgit2/src/integer.h +39 -4
- data/vendor/libgit2/src/iterator.c +40 -35
- data/vendor/libgit2/src/iterator.h +8 -8
- data/vendor/libgit2/src/map.h +1 -1
- data/vendor/libgit2/src/merge.c +78 -51
- data/vendor/libgit2/src/merge.h +2 -2
- data/vendor/libgit2/src/merge_driver.c +5 -5
- data/vendor/libgit2/src/merge_file.c +1 -1
- data/vendor/libgit2/src/mwindow.c +18 -23
- data/vendor/libgit2/src/mwindow.h +4 -4
- data/vendor/libgit2/src/net.c +411 -0
- data/vendor/libgit2/src/net.h +57 -0
- data/vendor/libgit2/src/netops.c +6 -193
- data/vendor/libgit2/src/netops.h +1 -34
- data/vendor/libgit2/src/notes.c +8 -5
- data/vendor/libgit2/src/object.c +3 -3
- data/vendor/libgit2/src/object.h +2 -0
- data/vendor/libgit2/src/odb.c +41 -23
- data/vendor/libgit2/src/odb.h +3 -2
- data/vendor/libgit2/src/odb_loose.c +17 -10
- data/vendor/libgit2/src/odb_mempack.c +13 -24
- data/vendor/libgit2/src/odb_pack.c +4 -5
- data/vendor/libgit2/src/offmap.c +43 -55
- data/vendor/libgit2/src/offmap.h +102 -24
- data/vendor/libgit2/src/oid.c +19 -8
- data/vendor/libgit2/src/oidmap.c +39 -57
- data/vendor/libgit2/src/oidmap.h +99 -19
- data/vendor/libgit2/src/pack-objects.c +28 -33
- data/vendor/libgit2/src/pack-objects.h +1 -1
- data/vendor/libgit2/src/pack.c +117 -129
- data/vendor/libgit2/src/pack.h +15 -18
- data/vendor/libgit2/src/parse.c +10 -0
- data/vendor/libgit2/src/parse.h +3 -3
- data/vendor/libgit2/src/patch.c +1 -1
- data/vendor/libgit2/src/patch_generate.c +2 -2
- data/vendor/libgit2/src/patch_parse.c +130 -33
- data/vendor/libgit2/src/path.c +43 -6
- data/vendor/libgit2/src/path.h +2 -0
- data/vendor/libgit2/src/pathspec.c +14 -14
- data/vendor/libgit2/src/pool.c +26 -22
- data/vendor/libgit2/src/pool.h +7 -7
- data/vendor/libgit2/src/posix.c +7 -7
- data/vendor/libgit2/src/posix.h +12 -1
- data/vendor/libgit2/src/proxy.c +7 -2
- data/vendor/libgit2/src/push.c +13 -7
- data/vendor/libgit2/src/reader.c +2 -2
- data/vendor/libgit2/src/rebase.c +87 -28
- data/vendor/libgit2/src/refdb.c +12 -0
- data/vendor/libgit2/src/refdb_fs.c +219 -167
- data/vendor/libgit2/src/reflog.c +11 -13
- data/vendor/libgit2/src/refs.c +39 -23
- data/vendor/libgit2/src/refs.h +8 -1
- data/vendor/libgit2/src/refspec.c +9 -16
- data/vendor/libgit2/src/regexp.c +221 -0
- data/vendor/libgit2/src/regexp.h +97 -0
- data/vendor/libgit2/src/remote.c +57 -55
- data/vendor/libgit2/src/remote.h +2 -2
- data/vendor/libgit2/src/repository.c +187 -154
- data/vendor/libgit2/src/repository.h +49 -40
- data/vendor/libgit2/src/revert.c +8 -3
- data/vendor/libgit2/src/revparse.c +18 -19
- data/vendor/libgit2/src/revwalk.c +72 -34
- data/vendor/libgit2/src/revwalk.h +20 -0
- data/vendor/libgit2/src/settings.c +13 -1
- data/vendor/libgit2/src/sortedcache.c +12 -26
- data/vendor/libgit2/src/sortedcache.h +1 -1
- data/vendor/libgit2/src/stash.c +47 -67
- data/vendor/libgit2/src/status.c +17 -11
- data/vendor/libgit2/src/streams/openssl.c +54 -2
- data/vendor/libgit2/src/streams/socket.c +2 -2
- data/vendor/libgit2/src/strmap.c +37 -84
- data/vendor/libgit2/src/strmap.h +105 -33
- data/vendor/libgit2/src/submodule.c +151 -126
- data/vendor/libgit2/src/submodule.h +1 -1
- data/vendor/libgit2/src/sysdir.c +11 -1
- data/vendor/libgit2/src/tag.c +10 -2
- data/vendor/libgit2/src/trace.c +1 -1
- data/vendor/libgit2/src/trace.h +3 -3
- data/vendor/libgit2/src/trailer.c +46 -32
- data/vendor/libgit2/src/transaction.c +10 -9
- data/vendor/libgit2/src/transports/auth.c +16 -15
- data/vendor/libgit2/src/transports/auth.h +18 -11
- data/vendor/libgit2/src/transports/auth_negotiate.c +64 -33
- data/vendor/libgit2/src/transports/auth_negotiate.h +2 -2
- data/vendor/libgit2/src/transports/auth_ntlm.c +223 -0
- data/vendor/libgit2/src/transports/auth_ntlm.h +38 -0
- data/vendor/libgit2/src/transports/credential.c +476 -0
- data/vendor/libgit2/src/transports/{cred_helpers.c → credential_helpers.c} +21 -8
- data/vendor/libgit2/src/transports/git.c +11 -16
- data/vendor/libgit2/src/transports/http.c +488 -1248
- data/vendor/libgit2/src/transports/http.h +4 -1
- data/vendor/libgit2/src/transports/httpclient.c +1549 -0
- data/vendor/libgit2/src/transports/httpclient.h +190 -0
- data/vendor/libgit2/src/transports/local.c +10 -10
- data/vendor/libgit2/src/transports/smart.c +19 -19
- data/vendor/libgit2/src/transports/smart.h +3 -3
- data/vendor/libgit2/src/transports/smart_pkt.c +1 -1
- data/vendor/libgit2/src/transports/smart_protocol.c +40 -64
- data/vendor/libgit2/src/transports/ssh.c +77 -59
- data/vendor/libgit2/src/transports/winhttp.c +272 -242
- data/vendor/libgit2/src/tree-cache.c +14 -7
- data/vendor/libgit2/src/tree.c +16 -26
- data/vendor/libgit2/src/unix/map.c +1 -1
- data/vendor/libgit2/src/unix/posix.h +2 -12
- data/vendor/libgit2/src/userdiff.h +3 -1
- data/vendor/libgit2/src/util.c +51 -53
- data/vendor/libgit2/src/util.h +16 -21
- data/vendor/libgit2/src/wildmatch.c +320 -0
- data/vendor/libgit2/src/wildmatch.h +23 -0
- data/vendor/libgit2/src/win32/map.c +3 -5
- data/vendor/libgit2/src/win32/path_w32.c +40 -3
- data/vendor/libgit2/src/win32/path_w32.h +15 -29
- data/vendor/libgit2/src/win32/posix.h +1 -4
- data/vendor/libgit2/src/win32/posix_w32.c +47 -5
- data/vendor/libgit2/src/win32/precompiled.h +0 -2
- data/vendor/libgit2/src/win32/thread.c +5 -10
- data/vendor/libgit2/src/win32/w32_buffer.c +7 -3
- data/vendor/libgit2/src/win32/w32_common.h +39 -0
- data/vendor/libgit2/src/win32/w32_crtdbg_stacktrace.c +0 -93
- data/vendor/libgit2/src/win32/w32_crtdbg_stacktrace.h +0 -2
- data/vendor/libgit2/src/win32/w32_stack.c +4 -9
- data/vendor/libgit2/src/win32/w32_stack.h +3 -3
- data/vendor/libgit2/src/win32/w32_util.c +31 -0
- data/vendor/libgit2/src/win32/w32_util.h +6 -32
- data/vendor/libgit2/src/worktree.c +79 -49
- data/vendor/libgit2/src/xdiff/xdiffi.c +1 -1
- data/vendor/libgit2/src/xdiff/xmerge.c +12 -0
- data/vendor/libgit2/src/xdiff/xpatience.c +3 -0
- data/vendor/libgit2/src/zstream.c +5 -0
- data/vendor/libgit2/src/zstream.h +1 -0
- metadata +108 -41
- data/vendor/libgit2/deps/regex/CMakeLists.txt +0 -2
- data/vendor/libgit2/deps/regex/COPYING +0 -502
- data/vendor/libgit2/deps/regex/config.h +0 -7
- data/vendor/libgit2/deps/regex/regcomp.c +0 -3857
- data/vendor/libgit2/deps/regex/regex.c +0 -92
- data/vendor/libgit2/deps/regex/regex.h +0 -582
- data/vendor/libgit2/deps/regex/regex_internal.c +0 -1744
- data/vendor/libgit2/deps/regex/regex_internal.h +0 -819
- data/vendor/libgit2/deps/regex/regexec.c +0 -4369
- data/vendor/libgit2/include/git2/inttypes.h +0 -309
- data/vendor/libgit2/include/git2/sys/time.h +0 -31
- data/vendor/libgit2/libgit2.pc.in +0 -13
- data/vendor/libgit2/src/fnmatch.c +0 -248
- data/vendor/libgit2/src/fnmatch.h +0 -48
- data/vendor/libgit2/src/sha1_lookup.c +0 -35
- data/vendor/libgit2/src/transports/cred.c +0 -390
@@ -0,0 +1,201 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) Edward Thomson. All rights reserved.
|
3
|
+
*
|
4
|
+
* This file is part of ntlmclient, distributed under the MIT license.
|
5
|
+
* For full terms and copyright information, and for third-party
|
6
|
+
* copyright information, see the included LICENSE.txt file.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#include <locale.h>
|
10
|
+
#include <iconv.h>
|
11
|
+
#include <string.h>
|
12
|
+
#include <errno.h>
|
13
|
+
|
14
|
+
#include "ntlmclient.h"
|
15
|
+
#include "unicode.h"
|
16
|
+
#include "ntlm.h"
|
17
|
+
#include "compat.h"
|
18
|
+
|
19
|
+
struct ntlm_unicode_ctx {
|
20
|
+
ntlm_client *ntlm;
|
21
|
+
iconv_t utf8_to_16;
|
22
|
+
iconv_t utf16_to_8;
|
23
|
+
};
|
24
|
+
|
25
|
+
ntlm_unicode_ctx *ntlm_unicode_ctx_init(ntlm_client *ntlm)
|
26
|
+
{
|
27
|
+
ntlm_unicode_ctx *ctx;
|
28
|
+
|
29
|
+
if ((ctx = calloc(1, sizeof(ntlm_unicode_ctx))) == NULL)
|
30
|
+
return NULL;
|
31
|
+
|
32
|
+
ctx->ntlm = ntlm;
|
33
|
+
ctx->utf8_to_16 = (iconv_t)-1;
|
34
|
+
ctx->utf16_to_8 = (iconv_t)-1;
|
35
|
+
|
36
|
+
return ctx;
|
37
|
+
}
|
38
|
+
|
39
|
+
typedef enum {
|
40
|
+
unicode_iconv_utf8_to_16,
|
41
|
+
unicode_iconv_utf16_to_8
|
42
|
+
} unicode_iconv_encoding_direction;
|
43
|
+
|
44
|
+
static inline bool unicode_iconv_init(ntlm_unicode_ctx *ctx)
|
45
|
+
{
|
46
|
+
if (ctx->utf8_to_16 != (iconv_t)-1 || ctx->utf16_to_8 != (iconv_t)-1)
|
47
|
+
return true;
|
48
|
+
|
49
|
+
if ((ctx->utf8_to_16 = iconv_open("UTF-16LE", "UTF-8")) == (iconv_t)-1 ||
|
50
|
+
(ctx->utf16_to_8 = iconv_open("UTF-8", "UTF-16LE")) == (iconv_t)-1) {
|
51
|
+
if (errno == EINVAL)
|
52
|
+
ntlm_client_set_errmsg(ctx->ntlm,
|
53
|
+
"iconv does not support UTF8 <-> UTF16 conversion");
|
54
|
+
else
|
55
|
+
ntlm_client_set_errmsg(ctx->ntlm, strerror(errno));
|
56
|
+
|
57
|
+
return false;
|
58
|
+
}
|
59
|
+
|
60
|
+
return true;
|
61
|
+
}
|
62
|
+
|
63
|
+
static inline bool unicode_iconv_encoding_convert(
|
64
|
+
char **converted,
|
65
|
+
size_t *converted_len,
|
66
|
+
ntlm_unicode_ctx *ctx,
|
67
|
+
const char *string,
|
68
|
+
size_t string_len,
|
69
|
+
unicode_iconv_encoding_direction direction)
|
70
|
+
{
|
71
|
+
char *in_start, *out_start, *out, *new_out;
|
72
|
+
size_t in_start_len, out_start_len, out_size, nul_size, ret, written = 0;
|
73
|
+
iconv_t converter;
|
74
|
+
|
75
|
+
*converted = NULL;
|
76
|
+
*converted_len = 0;
|
77
|
+
|
78
|
+
if (!unicode_iconv_init(ctx))
|
79
|
+
return false;
|
80
|
+
|
81
|
+
/*
|
82
|
+
* When translating UTF8 to UTF16, these strings are only used
|
83
|
+
* internally, and we obey the given length, so we can simply
|
84
|
+
* use a buffer that is 2x the size. When translating from UTF16
|
85
|
+
* to UTF8, we may need to return to callers, so we need to NUL
|
86
|
+
* terminate and expect an extra byte for UTF8, two for UTF16.
|
87
|
+
*/
|
88
|
+
if (direction == unicode_iconv_utf8_to_16) {
|
89
|
+
converter = ctx->utf8_to_16;
|
90
|
+
out_size = (string_len * 2) + 2;
|
91
|
+
nul_size = 2;
|
92
|
+
} else {
|
93
|
+
converter = ctx->utf16_to_8;
|
94
|
+
out_size = (string_len / 2) + 1;
|
95
|
+
nul_size = 1;
|
96
|
+
}
|
97
|
+
|
98
|
+
/* Round to the nearest multiple of 8 */
|
99
|
+
out_size = (out_size + 7) & ~7;
|
100
|
+
|
101
|
+
if ((out = malloc(out_size)) == NULL) {
|
102
|
+
ntlm_client_set_errmsg(ctx->ntlm, "out of memory");
|
103
|
+
return false;
|
104
|
+
}
|
105
|
+
|
106
|
+
in_start = (char *)string;
|
107
|
+
in_start_len = string_len;
|
108
|
+
|
109
|
+
while (true) {
|
110
|
+
out_start = out + written;
|
111
|
+
out_start_len = (out_size - nul_size) - written;
|
112
|
+
|
113
|
+
ret = iconv(converter, &in_start, &in_start_len, &out_start, &out_start_len);
|
114
|
+
written = (out_size - nul_size) - out_start_len;
|
115
|
+
|
116
|
+
if (ret == 0)
|
117
|
+
break;
|
118
|
+
|
119
|
+
if (ret == (size_t)-1 && errno != E2BIG) {
|
120
|
+
ntlm_client_set_errmsg(ctx->ntlm, strerror(errno));
|
121
|
+
goto on_error;
|
122
|
+
}
|
123
|
+
|
124
|
+
/* Grow buffer size by 1.5 (rounded up to a multiple of 8) */
|
125
|
+
out_size = ((((out_size << 1) - (out_size >> 1)) + 7) & ~7);
|
126
|
+
|
127
|
+
if (out_size > NTLM_UNICODE_MAX_LEN) {
|
128
|
+
ntlm_client_set_errmsg(ctx->ntlm,
|
129
|
+
"unicode conversion too large");
|
130
|
+
goto on_error;
|
131
|
+
}
|
132
|
+
|
133
|
+
if ((new_out = realloc(out, out_size)) == NULL) {
|
134
|
+
ntlm_client_set_errmsg(ctx->ntlm, "out of memory");
|
135
|
+
goto on_error;
|
136
|
+
}
|
137
|
+
|
138
|
+
out = new_out;
|
139
|
+
}
|
140
|
+
|
141
|
+
if (in_start_len != 0) {
|
142
|
+
ntlm_client_set_errmsg(ctx->ntlm,
|
143
|
+
"invalid unicode string; trailing data remains");
|
144
|
+
goto on_error;
|
145
|
+
}
|
146
|
+
|
147
|
+
/* NUL terminate */
|
148
|
+
out[written] = '\0';
|
149
|
+
|
150
|
+
if (direction == unicode_iconv_utf8_to_16)
|
151
|
+
out[written + 1] = '\0';
|
152
|
+
|
153
|
+
*converted = out;
|
154
|
+
|
155
|
+
if (converted_len)
|
156
|
+
*converted_len = written;
|
157
|
+
|
158
|
+
return true;
|
159
|
+
|
160
|
+
on_error:
|
161
|
+
free(out);
|
162
|
+
return false;
|
163
|
+
}
|
164
|
+
|
165
|
+
bool ntlm_unicode_utf8_to_16(
|
166
|
+
char **converted,
|
167
|
+
size_t *converted_len,
|
168
|
+
ntlm_unicode_ctx *ctx,
|
169
|
+
const char *string,
|
170
|
+
size_t string_len)
|
171
|
+
{
|
172
|
+
return unicode_iconv_encoding_convert(
|
173
|
+
converted, converted_len, ctx, string, string_len,
|
174
|
+
unicode_iconv_utf8_to_16);
|
175
|
+
}
|
176
|
+
|
177
|
+
bool ntlm_unicode_utf16_to_8(
|
178
|
+
char **converted,
|
179
|
+
size_t *converted_len,
|
180
|
+
ntlm_unicode_ctx *ctx,
|
181
|
+
const char *string,
|
182
|
+
size_t string_len)
|
183
|
+
{
|
184
|
+
return unicode_iconv_encoding_convert(
|
185
|
+
converted, converted_len, ctx, string, string_len,
|
186
|
+
unicode_iconv_utf16_to_8);
|
187
|
+
}
|
188
|
+
|
189
|
+
void ntlm_unicode_ctx_free(ntlm_unicode_ctx *ctx)
|
190
|
+
{
|
191
|
+
if (!ctx)
|
192
|
+
return;
|
193
|
+
|
194
|
+
if (ctx->utf16_to_8 != (iconv_t)-1)
|
195
|
+
iconv_close(ctx->utf16_to_8);
|
196
|
+
|
197
|
+
if (ctx->utf8_to_16 != (iconv_t)-1)
|
198
|
+
iconv_close(ctx->utf8_to_16);
|
199
|
+
|
200
|
+
free(ctx);
|
201
|
+
}
|
@@ -0,0 +1,1257 @@
|
|
1
|
+
// The latest version of this library is available on GitHub;
|
2
|
+
// https://github.com/sheredom/utf8.h
|
3
|
+
|
4
|
+
// This is free and unencumbered software released into the public domain.
|
5
|
+
//
|
6
|
+
// Anyone is free to copy, modify, publish, use, compile, sell, or
|
7
|
+
// distribute this software, either in source code form or as a compiled
|
8
|
+
// binary, for any purpose, commercial or non-commercial, and by any
|
9
|
+
// means.
|
10
|
+
//
|
11
|
+
// In jurisdictions that recognize copyright laws, the author or authors
|
12
|
+
// of this software dedicate any and all copyright interest in the
|
13
|
+
// software to the public domain. We make this dedication for the benefit
|
14
|
+
// of the public at large and to the detriment of our heirs and
|
15
|
+
// successors. We intend this dedication to be an overt act of
|
16
|
+
// relinquishment in perpetuity of all present and future rights to this
|
17
|
+
// software under copyright law.
|
18
|
+
//
|
19
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
20
|
+
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
21
|
+
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
22
|
+
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
23
|
+
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
24
|
+
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
25
|
+
// OTHER DEALINGS IN THE SOFTWARE.
|
26
|
+
//
|
27
|
+
// For more information, please refer to <http://unlicense.org/>
|
28
|
+
|
29
|
+
#ifndef SHEREDOM_UTF8_H_INCLUDED
|
30
|
+
#define SHEREDOM_UTF8_H_INCLUDED
|
31
|
+
|
32
|
+
#if defined(_MSC_VER)
|
33
|
+
#pragma warning(push)
|
34
|
+
|
35
|
+
// disable 'bytes padding added after construct' warning
|
36
|
+
#pragma warning(disable : 4820)
|
37
|
+
#endif
|
38
|
+
|
39
|
+
#include <stddef.h>
|
40
|
+
#include <stdlib.h>
|
41
|
+
|
42
|
+
#if defined(_MSC_VER)
|
43
|
+
#pragma warning(pop)
|
44
|
+
#endif
|
45
|
+
|
46
|
+
#if defined(_MSC_VER)
|
47
|
+
typedef __int32 utf8_int32_t;
|
48
|
+
#else
|
49
|
+
#include <stdint.h>
|
50
|
+
typedef int32_t utf8_int32_t;
|
51
|
+
#endif
|
52
|
+
|
53
|
+
#if defined(__clang__)
|
54
|
+
#pragma clang diagnostic push
|
55
|
+
#pragma clang diagnostic ignored "-Wold-style-cast"
|
56
|
+
#pragma clang diagnostic ignored "-Wcast-qual"
|
57
|
+
#endif
|
58
|
+
|
59
|
+
#ifdef __cplusplus
|
60
|
+
extern "C" {
|
61
|
+
#endif
|
62
|
+
|
63
|
+
#if defined(__clang__) || defined(__GNUC__)
|
64
|
+
#define utf8_nonnull __attribute__((nonnull))
|
65
|
+
#define utf8_pure __attribute__((pure))
|
66
|
+
#define utf8_restrict __restrict__
|
67
|
+
#define utf8_weak __attribute__((weak))
|
68
|
+
#elif defined(_MSC_VER)
|
69
|
+
#define utf8_nonnull
|
70
|
+
#define utf8_pure
|
71
|
+
#define utf8_restrict __restrict
|
72
|
+
#define utf8_weak __inline
|
73
|
+
#else
|
74
|
+
#error Non clang, non gcc, non MSVC compiler found!
|
75
|
+
#endif
|
76
|
+
|
77
|
+
#ifdef __cplusplus
|
78
|
+
#define utf8_null NULL
|
79
|
+
#else
|
80
|
+
#define utf8_null 0
|
81
|
+
#endif
|
82
|
+
|
83
|
+
// Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
|
84
|
+
// src2 respectively, case insensitive.
|
85
|
+
utf8_nonnull utf8_pure utf8_weak int utf8casecmp(const void *src1,
|
86
|
+
const void *src2);
|
87
|
+
|
88
|
+
// Append the utf8 string src onto the utf8 string dst.
|
89
|
+
utf8_nonnull utf8_weak void *utf8cat(void *utf8_restrict dst,
|
90
|
+
const void *utf8_restrict src);
|
91
|
+
|
92
|
+
// Find the first match of the utf8 codepoint chr in the utf8 string src.
|
93
|
+
utf8_nonnull utf8_pure utf8_weak void *utf8chr(const void *src,
|
94
|
+
utf8_int32_t chr);
|
95
|
+
|
96
|
+
// Return less than 0, 0, greater than 0 if src1 < src2,
|
97
|
+
// src1 == src2, src1 > src2 respectively.
|
98
|
+
utf8_nonnull utf8_pure utf8_weak int utf8cmp(const void *src1,
|
99
|
+
const void *src2);
|
100
|
+
|
101
|
+
// Copy the utf8 string src onto the memory allocated in dst.
|
102
|
+
utf8_nonnull utf8_weak void *utf8cpy(void *utf8_restrict dst,
|
103
|
+
const void *utf8_restrict src);
|
104
|
+
|
105
|
+
// Number of utf8 codepoints in the utf8 string src that consists entirely
|
106
|
+
// of utf8 codepoints not from the utf8 string reject.
|
107
|
+
utf8_nonnull utf8_pure utf8_weak size_t utf8cspn(const void *src,
|
108
|
+
const void *reject);
|
109
|
+
|
110
|
+
// Duplicate the utf8 string src by getting its size, malloc'ing a new buffer
|
111
|
+
// copying over the data, and returning that. Or 0 if malloc failed.
|
112
|
+
utf8_nonnull utf8_weak void *utf8dup(const void *src);
|
113
|
+
|
114
|
+
// Number of utf8 codepoints in the utf8 string str,
|
115
|
+
// excluding the null terminating byte.
|
116
|
+
utf8_nonnull utf8_pure utf8_weak size_t utf8len(const void *str);
|
117
|
+
|
118
|
+
// Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
|
119
|
+
// src2 respectively, case insensitive. Checking at most n bytes of each utf8
|
120
|
+
// string.
|
121
|
+
utf8_nonnull utf8_pure utf8_weak int utf8ncasecmp(const void *src1,
|
122
|
+
const void *src2, size_t n);
|
123
|
+
|
124
|
+
// Append the utf8 string src onto the utf8 string dst,
|
125
|
+
// writing at most n+1 bytes. Can produce an invalid utf8
|
126
|
+
// string if n falls partway through a utf8 codepoint.
|
127
|
+
utf8_nonnull utf8_weak void *utf8ncat(void *utf8_restrict dst,
|
128
|
+
const void *utf8_restrict src, size_t n);
|
129
|
+
|
130
|
+
// Return less than 0, 0, greater than 0 if src1 < src2,
|
131
|
+
// src1 == src2, src1 > src2 respectively. Checking at most n
|
132
|
+
// bytes of each utf8 string.
|
133
|
+
utf8_nonnull utf8_pure utf8_weak int utf8ncmp(const void *src1,
|
134
|
+
const void *src2, size_t n);
|
135
|
+
|
136
|
+
// Copy the utf8 string src onto the memory allocated in dst.
|
137
|
+
// Copies at most n bytes. If there is no terminating null byte in
|
138
|
+
// the first n bytes of src, the string placed into dst will not be
|
139
|
+
// null-terminated. If the size (in bytes) of src is less than n,
|
140
|
+
// extra null terminating bytes are appended to dst such that at
|
141
|
+
// total of n bytes are written. Can produce an invalid utf8
|
142
|
+
// string if n falls partway through a utf8 codepoint.
|
143
|
+
utf8_nonnull utf8_weak void *utf8ncpy(void *utf8_restrict dst,
|
144
|
+
const void *utf8_restrict src, size_t n);
|
145
|
+
|
146
|
+
// Similar to utf8dup, except that at most n bytes of src are copied. If src is
|
147
|
+
// longer than n, only n bytes are copied and a null byte is added.
|
148
|
+
//
|
149
|
+
// Returns a new string if successful, 0 otherwise
|
150
|
+
utf8_nonnull utf8_weak void *utf8ndup(const void *src, size_t n);
|
151
|
+
|
152
|
+
// Locates the first occurence in the utf8 string str of any byte in the
|
153
|
+
// utf8 string accept, or 0 if no match was found.
|
154
|
+
utf8_nonnull utf8_pure utf8_weak void *utf8pbrk(const void *str,
|
155
|
+
const void *accept);
|
156
|
+
|
157
|
+
// Find the last match of the utf8 codepoint chr in the utf8 string src.
|
158
|
+
utf8_nonnull utf8_pure utf8_weak void *utf8rchr(const void *src, int chr);
|
159
|
+
|
160
|
+
// Number of bytes in the utf8 string str,
|
161
|
+
// including the null terminating byte.
|
162
|
+
utf8_nonnull utf8_pure utf8_weak size_t utf8size(const void *str);
|
163
|
+
|
164
|
+
// Number of utf8 codepoints in the utf8 string src that consists entirely
|
165
|
+
// of utf8 codepoints from the utf8 string accept.
|
166
|
+
utf8_nonnull utf8_pure utf8_weak size_t utf8spn(const void *src,
|
167
|
+
const void *accept);
|
168
|
+
|
169
|
+
// The position of the utf8 string needle in the utf8 string haystack.
|
170
|
+
utf8_nonnull utf8_pure utf8_weak void *utf8str(const void *haystack,
|
171
|
+
const void *needle);
|
172
|
+
|
173
|
+
// The position of the utf8 string needle in the utf8 string haystack, case
|
174
|
+
// insensitive.
|
175
|
+
utf8_nonnull utf8_pure utf8_weak void *utf8casestr(const void *haystack,
|
176
|
+
const void *needle);
|
177
|
+
|
178
|
+
// Return 0 on success, or the position of the invalid
|
179
|
+
// utf8 codepoint on failure.
|
180
|
+
utf8_nonnull utf8_pure utf8_weak void *utf8valid(const void *str);
|
181
|
+
|
182
|
+
// Sets out_codepoint to the next utf8 codepoint in str, and returns the address
|
183
|
+
// of the utf8 codepoint after the current one in str.
|
184
|
+
utf8_nonnull utf8_weak void *
|
185
|
+
utf8codepoint(const void *utf8_restrict str,
|
186
|
+
utf8_int32_t *utf8_restrict out_codepoint);
|
187
|
+
|
188
|
+
// Returns the size of the given codepoint in bytes.
|
189
|
+
utf8_weak size_t utf8codepointsize(utf8_int32_t chr);
|
190
|
+
|
191
|
+
// Write a codepoint to the given string, and return the address to the next
|
192
|
+
// place after the written codepoint. Pass how many bytes left in the buffer to
|
193
|
+
// n. If there is not enough space for the codepoint, this function returns
|
194
|
+
// null.
|
195
|
+
utf8_nonnull utf8_weak void *utf8catcodepoint(void *utf8_restrict str,
|
196
|
+
utf8_int32_t chr, size_t n);
|
197
|
+
|
198
|
+
// Returns 1 if the given character is lowercase, or 0 if it is not.
|
199
|
+
utf8_weak int utf8islower(utf8_int32_t chr);
|
200
|
+
|
201
|
+
// Returns 1 if the given character is uppercase, or 0 if it is not.
|
202
|
+
utf8_weak int utf8isupper(utf8_int32_t chr);
|
203
|
+
|
204
|
+
// Transform the given string into all lowercase codepoints.
|
205
|
+
utf8_nonnull utf8_weak void utf8lwr(void *utf8_restrict str);
|
206
|
+
|
207
|
+
// Transform the given string into all uppercase codepoints.
|
208
|
+
utf8_nonnull utf8_weak void utf8upr(void *utf8_restrict str);
|
209
|
+
|
210
|
+
// Make a codepoint lower case if possible.
|
211
|
+
utf8_weak utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp);
|
212
|
+
|
213
|
+
// Make a codepoint upper case if possible.
|
214
|
+
utf8_weak utf8_int32_t utf8uprcodepoint(utf8_int32_t cp);
|
215
|
+
|
216
|
+
#undef utf8_weak
|
217
|
+
#undef utf8_pure
|
218
|
+
#undef utf8_nonnull
|
219
|
+
|
220
|
+
int utf8casecmp(const void *src1, const void *src2) {
|
221
|
+
utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
|
222
|
+
|
223
|
+
for (;;) {
|
224
|
+
src1 = utf8codepoint(src1, &src1_cp);
|
225
|
+
src2 = utf8codepoint(src2, &src2_cp);
|
226
|
+
|
227
|
+
// Take a copy of src1 & src2
|
228
|
+
src1_orig_cp = src1_cp;
|
229
|
+
src2_orig_cp = src2_cp;
|
230
|
+
|
231
|
+
// Lower the srcs if required
|
232
|
+
src1_cp = utf8lwrcodepoint(src1_cp);
|
233
|
+
src2_cp = utf8lwrcodepoint(src2_cp);
|
234
|
+
|
235
|
+
// Check if the lowered codepoints match
|
236
|
+
if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) {
|
237
|
+
return 0;
|
238
|
+
} else if (src1_cp == src2_cp) {
|
239
|
+
continue;
|
240
|
+
}
|
241
|
+
|
242
|
+
// If they don't match, then we return which of the original's are less
|
243
|
+
if (src1_orig_cp < src2_orig_cp) {
|
244
|
+
return -1;
|
245
|
+
} else if (src1_orig_cp > src2_orig_cp) {
|
246
|
+
return 1;
|
247
|
+
}
|
248
|
+
}
|
249
|
+
}
|
250
|
+
|
251
|
+
void *utf8cat(void *utf8_restrict dst, const void *utf8_restrict src) {
|
252
|
+
char *d = (char *)dst;
|
253
|
+
const char *s = (const char *)src;
|
254
|
+
|
255
|
+
// find the null terminating byte in dst
|
256
|
+
while ('\0' != *d) {
|
257
|
+
d++;
|
258
|
+
}
|
259
|
+
|
260
|
+
// overwriting the null terminating byte in dst, append src byte-by-byte
|
261
|
+
while ('\0' != *s) {
|
262
|
+
*d++ = *s++;
|
263
|
+
}
|
264
|
+
|
265
|
+
// write out a new null terminating byte into dst
|
266
|
+
*d = '\0';
|
267
|
+
|
268
|
+
return dst;
|
269
|
+
}
|
270
|
+
|
271
|
+
void *utf8chr(const void *src, utf8_int32_t chr) {
|
272
|
+
char c[5] = {'\0', '\0', '\0', '\0', '\0'};
|
273
|
+
|
274
|
+
if (0 == chr) {
|
275
|
+
// being asked to return position of null terminating byte, so
|
276
|
+
// just run s to the end, and return!
|
277
|
+
const char *s = (const char *)src;
|
278
|
+
while ('\0' != *s) {
|
279
|
+
s++;
|
280
|
+
}
|
281
|
+
return (void *)s;
|
282
|
+
} else if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
|
283
|
+
// 1-byte/7-bit ascii
|
284
|
+
// (0b0xxxxxxx)
|
285
|
+
c[0] = (char)chr;
|
286
|
+
} else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
|
287
|
+
// 2-byte/11-bit utf8 code point
|
288
|
+
// (0b110xxxxx 0b10xxxxxx)
|
289
|
+
c[0] = 0xc0 | (char)(chr >> 6);
|
290
|
+
c[1] = 0x80 | (char)(chr & 0x3f);
|
291
|
+
} else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
|
292
|
+
// 3-byte/16-bit utf8 code point
|
293
|
+
// (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
|
294
|
+
c[0] = 0xe0 | (char)(chr >> 12);
|
295
|
+
c[1] = 0x80 | (char)((chr >> 6) & 0x3f);
|
296
|
+
c[2] = 0x80 | (char)(chr & 0x3f);
|
297
|
+
} else { // if (0 == ((int)0xffe00000 & chr)) {
|
298
|
+
// 4-byte/21-bit utf8 code point
|
299
|
+
// (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
|
300
|
+
c[0] = 0xf0 | (char)(chr >> 18);
|
301
|
+
c[1] = 0x80 | (char)((chr >> 12) & 0x3f);
|
302
|
+
c[2] = 0x80 | (char)((chr >> 6) & 0x3f);
|
303
|
+
c[3] = 0x80 | (char)(chr & 0x3f);
|
304
|
+
}
|
305
|
+
|
306
|
+
// we've made c into a 2 utf8 codepoint string, one for the chr we are
|
307
|
+
// seeking, another for the null terminating byte. Now use utf8str to
|
308
|
+
// search
|
309
|
+
return utf8str(src, c);
|
310
|
+
}
|
311
|
+
|
312
|
+
int utf8cmp(const void *src1, const void *src2) {
|
313
|
+
const unsigned char *s1 = (const unsigned char *)src1;
|
314
|
+
const unsigned char *s2 = (const unsigned char *)src2;
|
315
|
+
|
316
|
+
while (('\0' != *s1) || ('\0' != *s2)) {
|
317
|
+
if (*s1 < *s2) {
|
318
|
+
return -1;
|
319
|
+
} else if (*s1 > *s2) {
|
320
|
+
return 1;
|
321
|
+
}
|
322
|
+
|
323
|
+
s1++;
|
324
|
+
s2++;
|
325
|
+
}
|
326
|
+
|
327
|
+
// both utf8 strings matched
|
328
|
+
return 0;
|
329
|
+
}
|
330
|
+
|
331
|
+
int utf8coll(const void *src1, const void *src2);
|
332
|
+
|
333
|
+
void *utf8cpy(void *utf8_restrict dst, const void *utf8_restrict src) {
|
334
|
+
char *d = (char *)dst;
|
335
|
+
const char *s = (const char *)src;
|
336
|
+
|
337
|
+
// overwriting anything previously in dst, write byte-by-byte
|
338
|
+
// from src
|
339
|
+
while ('\0' != *s) {
|
340
|
+
*d++ = *s++;
|
341
|
+
}
|
342
|
+
|
343
|
+
// append null terminating byte
|
344
|
+
*d = '\0';
|
345
|
+
|
346
|
+
return dst;
|
347
|
+
}
|
348
|
+
|
349
|
+
size_t utf8cspn(const void *src, const void *reject) {
|
350
|
+
const char *s = (const char *)src;
|
351
|
+
size_t chars = 0;
|
352
|
+
|
353
|
+
while ('\0' != *s) {
|
354
|
+
const char *r = (const char *)reject;
|
355
|
+
size_t offset = 0;
|
356
|
+
|
357
|
+
while ('\0' != *r) {
|
358
|
+
// checking that if *r is the start of a utf8 codepoint
|
359
|
+
// (it is not 0b10xxxxxx) and we have successfully matched
|
360
|
+
// a previous character (0 < offset) - we found a match
|
361
|
+
if ((0x80 != (0xc0 & *r)) && (0 < offset)) {
|
362
|
+
return chars;
|
363
|
+
} else {
|
364
|
+
if (*r == s[offset]) {
|
365
|
+
// part of a utf8 codepoint matched, so move our checking
|
366
|
+
// onwards to the next byte
|
367
|
+
offset++;
|
368
|
+
r++;
|
369
|
+
} else {
|
370
|
+
// r could be in the middle of an unmatching utf8 code point,
|
371
|
+
// so we need to march it on to the next character beginning,
|
372
|
+
|
373
|
+
do {
|
374
|
+
r++;
|
375
|
+
} while (0x80 == (0xc0 & *r));
|
376
|
+
|
377
|
+
// reset offset too as we found a mismatch
|
378
|
+
offset = 0;
|
379
|
+
}
|
380
|
+
}
|
381
|
+
}
|
382
|
+
|
383
|
+
// the current utf8 codepoint in src did not match reject, but src
|
384
|
+
// could have been partway through a utf8 codepoint, so we need to
|
385
|
+
// march it onto the next utf8 codepoint starting byte
|
386
|
+
do {
|
387
|
+
s++;
|
388
|
+
} while ((0x80 == (0xc0 & *s)));
|
389
|
+
chars++;
|
390
|
+
}
|
391
|
+
|
392
|
+
return chars;
|
393
|
+
}
|
394
|
+
|
395
|
+
size_t utf8size(const void *str);
|
396
|
+
|
397
|
+
void *utf8dup(const void *src) {
|
398
|
+
const char *s = (const char *)src;
|
399
|
+
char *n = utf8_null;
|
400
|
+
|
401
|
+
// figure out how many bytes (including the terminator) we need to copy first
|
402
|
+
size_t bytes = utf8size(src);
|
403
|
+
|
404
|
+
n = (char *)malloc(bytes);
|
405
|
+
|
406
|
+
if (utf8_null == n) {
|
407
|
+
// out of memory so we bail
|
408
|
+
return utf8_null;
|
409
|
+
} else {
|
410
|
+
bytes = 0;
|
411
|
+
|
412
|
+
// copy src byte-by-byte into our new utf8 string
|
413
|
+
while ('\0' != s[bytes]) {
|
414
|
+
n[bytes] = s[bytes];
|
415
|
+
bytes++;
|
416
|
+
}
|
417
|
+
|
418
|
+
// append null terminating byte
|
419
|
+
n[bytes] = '\0';
|
420
|
+
return n;
|
421
|
+
}
|
422
|
+
}
|
423
|
+
|
424
|
+
void *utf8fry(const void *str);
|
425
|
+
|
426
|
+
size_t utf8len(const void *str) {
|
427
|
+
const unsigned char *s = (const unsigned char *)str;
|
428
|
+
size_t length = 0;
|
429
|
+
|
430
|
+
while ('\0' != *s) {
|
431
|
+
if (0xf0 == (0xf8 & *s)) {
|
432
|
+
// 4-byte utf8 code point (began with 0b11110xxx)
|
433
|
+
s += 4;
|
434
|
+
} else if (0xe0 == (0xf0 & *s)) {
|
435
|
+
// 3-byte utf8 code point (began with 0b1110xxxx)
|
436
|
+
s += 3;
|
437
|
+
} else if (0xc0 == (0xe0 & *s)) {
|
438
|
+
// 2-byte utf8 code point (began with 0b110xxxxx)
|
439
|
+
s += 2;
|
440
|
+
} else { // if (0x00 == (0x80 & *s)) {
|
441
|
+
// 1-byte ascii (began with 0b0xxxxxxx)
|
442
|
+
s += 1;
|
443
|
+
}
|
444
|
+
|
445
|
+
// no matter the bytes we marched s forward by, it was
|
446
|
+
// only 1 utf8 codepoint
|
447
|
+
length++;
|
448
|
+
}
|
449
|
+
|
450
|
+
return length;
|
451
|
+
}
|
452
|
+
|
453
|
+
int utf8ncasecmp(const void *src1, const void *src2, size_t n) {
|
454
|
+
utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
|
455
|
+
|
456
|
+
do {
|
457
|
+
const unsigned char *const s1 = (const unsigned char *)src1;
|
458
|
+
const unsigned char *const s2 = (const unsigned char *)src2;
|
459
|
+
|
460
|
+
// first check that we have enough bytes left in n to contain an entire
|
461
|
+
// codepoint
|
462
|
+
if (0 == n) {
|
463
|
+
return 0;
|
464
|
+
}
|
465
|
+
|
466
|
+
if ((1 == n) && ((0xc0 == (0xe0 & *s1)) || (0xc0 == (0xe0 & *s2)))) {
|
467
|
+
const utf8_int32_t c1 = (0xe0 & *s1);
|
468
|
+
const utf8_int32_t c2 = (0xe0 & *s2);
|
469
|
+
|
470
|
+
if (c1 < c2) {
|
471
|
+
return -1;
|
472
|
+
} else if (c1 > c2) {
|
473
|
+
return 1;
|
474
|
+
} else {
|
475
|
+
return 0;
|
476
|
+
}
|
477
|
+
}
|
478
|
+
|
479
|
+
if ((2 >= n) && ((0xe0 == (0xf0 & *s1)) || (0xe0 == (0xf0 & *s2)))) {
|
480
|
+
const utf8_int32_t c1 = (0xf0 & *s1);
|
481
|
+
const utf8_int32_t c2 = (0xf0 & *s2);
|
482
|
+
|
483
|
+
if (c1 < c2) {
|
484
|
+
return -1;
|
485
|
+
} else if (c1 > c2) {
|
486
|
+
return 1;
|
487
|
+
} else {
|
488
|
+
return 0;
|
489
|
+
}
|
490
|
+
}
|
491
|
+
|
492
|
+
if ((3 >= n) && ((0xf0 == (0xf8 & *s1)) || (0xf0 == (0xf8 & *s2)))) {
|
493
|
+
const utf8_int32_t c1 = (0xf8 & *s1);
|
494
|
+
const utf8_int32_t c2 = (0xf8 & *s2);
|
495
|
+
|
496
|
+
if (c1 < c2) {
|
497
|
+
return -1;
|
498
|
+
} else if (c1 > c2) {
|
499
|
+
return 1;
|
500
|
+
} else {
|
501
|
+
return 0;
|
502
|
+
}
|
503
|
+
}
|
504
|
+
|
505
|
+
src1 = utf8codepoint(src1, &src1_cp);
|
506
|
+
src2 = utf8codepoint(src2, &src2_cp);
|
507
|
+
n -= utf8codepointsize(src1_cp);
|
508
|
+
|
509
|
+
// Take a copy of src1 & src2
|
510
|
+
src1_orig_cp = src1_cp;
|
511
|
+
src2_orig_cp = src2_cp;
|
512
|
+
|
513
|
+
// Lower srcs if required
|
514
|
+
src1_cp = utf8lwrcodepoint(src1_cp);
|
515
|
+
src2_cp = utf8lwrcodepoint(src2_cp);
|
516
|
+
|
517
|
+
// Check if the lowered codepoints match
|
518
|
+
if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) {
|
519
|
+
return 0;
|
520
|
+
} else if (src1_cp == src2_cp) {
|
521
|
+
continue;
|
522
|
+
}
|
523
|
+
|
524
|
+
// If they don't match, then we return which of the original's are less
|
525
|
+
if (src1_orig_cp < src2_orig_cp) {
|
526
|
+
return -1;
|
527
|
+
} else if (src1_orig_cp > src2_orig_cp) {
|
528
|
+
return 1;
|
529
|
+
}
|
530
|
+
} while (0 < n);
|
531
|
+
|
532
|
+
// both utf8 strings matched
|
533
|
+
return 0;
|
534
|
+
}
|
535
|
+
|
536
|
+
void *utf8ncat(void *utf8_restrict dst, const void *utf8_restrict src,
|
537
|
+
size_t n) {
|
538
|
+
char *d = (char *)dst;
|
539
|
+
const char *s = (const char *)src;
|
540
|
+
|
541
|
+
// find the null terminating byte in dst
|
542
|
+
while ('\0' != *d) {
|
543
|
+
d++;
|
544
|
+
}
|
545
|
+
|
546
|
+
// overwriting the null terminating byte in dst, append src byte-by-byte
|
547
|
+
// stopping if we run out of space
|
548
|
+
do {
|
549
|
+
*d++ = *s++;
|
550
|
+
} while (('\0' != *s) && (0 != --n));
|
551
|
+
|
552
|
+
// write out a new null terminating byte into dst
|
553
|
+
*d = '\0';
|
554
|
+
|
555
|
+
return dst;
|
556
|
+
}
|
557
|
+
|
558
|
+
int utf8ncmp(const void *src1, const void *src2, size_t n) {
|
559
|
+
const unsigned char *s1 = (const unsigned char *)src1;
|
560
|
+
const unsigned char *s2 = (const unsigned char *)src2;
|
561
|
+
|
562
|
+
while ((('\0' != *s1) || ('\0' != *s2)) && (0 != n--)) {
|
563
|
+
if (*s1 < *s2) {
|
564
|
+
return -1;
|
565
|
+
} else if (*s1 > *s2) {
|
566
|
+
return 1;
|
567
|
+
}
|
568
|
+
|
569
|
+
s1++;
|
570
|
+
s2++;
|
571
|
+
}
|
572
|
+
|
573
|
+
// both utf8 strings matched
|
574
|
+
return 0;
|
575
|
+
}
|
576
|
+
|
577
|
+
void *utf8ncpy(void *utf8_restrict dst, const void *utf8_restrict src,
|
578
|
+
size_t n) {
|
579
|
+
char *d = (char *)dst;
|
580
|
+
const char *s = (const char *)src;
|
581
|
+
|
582
|
+
// overwriting anything previously in dst, write byte-by-byte
|
583
|
+
// from src
|
584
|
+
do {
|
585
|
+
*d++ = *s++;
|
586
|
+
} while (('\0' != *s) && (0 != --n));
|
587
|
+
|
588
|
+
// append null terminating byte
|
589
|
+
while (0 != n) {
|
590
|
+
*d++ = '\0';
|
591
|
+
n--;
|
592
|
+
}
|
593
|
+
|
594
|
+
return dst;
|
595
|
+
}
|
596
|
+
|
597
|
+
void *utf8ndup(const void *src, size_t n) {
|
598
|
+
const char *s = (const char *)src;
|
599
|
+
char *c = utf8_null;
|
600
|
+
size_t bytes = 0;
|
601
|
+
|
602
|
+
// Find the end of the string or stop when n is reached
|
603
|
+
while ('\0' != s[bytes] && bytes < n) {
|
604
|
+
bytes++;
|
605
|
+
}
|
606
|
+
|
607
|
+
// In case bytes is actually less than n, we need to set it
|
608
|
+
// to be used later in the copy byte by byte.
|
609
|
+
n = bytes;
|
610
|
+
|
611
|
+
c = (char *)malloc(bytes + 1);
|
612
|
+
if (utf8_null == c) {
|
613
|
+
// out of memory so we bail
|
614
|
+
return utf8_null;
|
615
|
+
}
|
616
|
+
|
617
|
+
bytes = 0;
|
618
|
+
|
619
|
+
// copy src byte-by-byte into our new utf8 string
|
620
|
+
while ('\0' != s[bytes] && bytes < n) {
|
621
|
+
c[bytes] = s[bytes];
|
622
|
+
bytes++;
|
623
|
+
}
|
624
|
+
|
625
|
+
// append null terminating byte
|
626
|
+
c[bytes] = '\0';
|
627
|
+
return c;
|
628
|
+
}
|
629
|
+
|
630
|
+
void *utf8rchr(const void *src, int chr) {
|
631
|
+
const char *s = (const char *)src;
|
632
|
+
const char *match = utf8_null;
|
633
|
+
char c[5] = {'\0', '\0', '\0', '\0', '\0'};
|
634
|
+
|
635
|
+
if (0 == chr) {
|
636
|
+
// being asked to return position of null terminating byte, so
|
637
|
+
// just run s to the end, and return!
|
638
|
+
while ('\0' != *s) {
|
639
|
+
s++;
|
640
|
+
}
|
641
|
+
return (void *)s;
|
642
|
+
} else if (0 == ((int)0xffffff80 & chr)) {
|
643
|
+
// 1-byte/7-bit ascii
|
644
|
+
// (0b0xxxxxxx)
|
645
|
+
c[0] = (char)chr;
|
646
|
+
} else if (0 == ((int)0xfffff800 & chr)) {
|
647
|
+
// 2-byte/11-bit utf8 code point
|
648
|
+
// (0b110xxxxx 0b10xxxxxx)
|
649
|
+
c[0] = 0xc0 | (char)(chr >> 6);
|
650
|
+
c[1] = 0x80 | (char)(chr & 0x3f);
|
651
|
+
} else if (0 == ((int)0xffff0000 & chr)) {
|
652
|
+
// 3-byte/16-bit utf8 code point
|
653
|
+
// (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
|
654
|
+
c[0] = 0xe0 | (char)(chr >> 12);
|
655
|
+
c[1] = 0x80 | (char)((chr >> 6) & 0x3f);
|
656
|
+
c[2] = 0x80 | (char)(chr & 0x3f);
|
657
|
+
} else { // if (0 == ((int)0xffe00000 & chr)) {
|
658
|
+
// 4-byte/21-bit utf8 code point
|
659
|
+
// (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
|
660
|
+
c[0] = 0xf0 | (char)(chr >> 18);
|
661
|
+
c[1] = 0x80 | (char)((chr >> 12) & 0x3f);
|
662
|
+
c[2] = 0x80 | (char)((chr >> 6) & 0x3f);
|
663
|
+
c[3] = 0x80 | (char)(chr & 0x3f);
|
664
|
+
}
|
665
|
+
|
666
|
+
// we've created a 2 utf8 codepoint string in c that is
|
667
|
+
// the utf8 character asked for by chr, and a null
|
668
|
+
// terminating byte
|
669
|
+
|
670
|
+
while ('\0' != *s) {
|
671
|
+
size_t offset = 0;
|
672
|
+
|
673
|
+
while (s[offset] == c[offset]) {
|
674
|
+
offset++;
|
675
|
+
}
|
676
|
+
|
677
|
+
if ('\0' == c[offset]) {
|
678
|
+
// we found a matching utf8 code point
|
679
|
+
match = s;
|
680
|
+
s += offset;
|
681
|
+
} else {
|
682
|
+
s += offset;
|
683
|
+
|
684
|
+
// need to march s along to next utf8 codepoint start
|
685
|
+
// (the next byte that doesn't match 0b10xxxxxx)
|
686
|
+
if ('\0' != *s) {
|
687
|
+
do {
|
688
|
+
s++;
|
689
|
+
} while (0x80 == (0xc0 & *s));
|
690
|
+
}
|
691
|
+
}
|
692
|
+
}
|
693
|
+
|
694
|
+
// return the last match we found (or 0 if no match was found)
|
695
|
+
return (void *)match;
|
696
|
+
}
|
697
|
+
|
698
|
+
void *utf8pbrk(const void *str, const void *accept) {
|
699
|
+
const char *s = (const char *)str;
|
700
|
+
|
701
|
+
while ('\0' != *s) {
|
702
|
+
const char *a = (const char *)accept;
|
703
|
+
size_t offset = 0;
|
704
|
+
|
705
|
+
while ('\0' != *a) {
|
706
|
+
// checking that if *a is the start of a utf8 codepoint
|
707
|
+
// (it is not 0b10xxxxxx) and we have successfully matched
|
708
|
+
// a previous character (0 < offset) - we found a match
|
709
|
+
if ((0x80 != (0xc0 & *a)) && (0 < offset)) {
|
710
|
+
return (void *)s;
|
711
|
+
} else {
|
712
|
+
if (*a == s[offset]) {
|
713
|
+
// part of a utf8 codepoint matched, so move our checking
|
714
|
+
// onwards to the next byte
|
715
|
+
offset++;
|
716
|
+
a++;
|
717
|
+
} else {
|
718
|
+
// r could be in the middle of an unmatching utf8 code point,
|
719
|
+
// so we need to march it on to the next character beginning,
|
720
|
+
|
721
|
+
do {
|
722
|
+
a++;
|
723
|
+
} while (0x80 == (0xc0 & *a));
|
724
|
+
|
725
|
+
// reset offset too as we found a mismatch
|
726
|
+
offset = 0;
|
727
|
+
}
|
728
|
+
}
|
729
|
+
}
|
730
|
+
|
731
|
+
// we found a match on the last utf8 codepoint
|
732
|
+
if (0 < offset) {
|
733
|
+
return (void *)s;
|
734
|
+
}
|
735
|
+
|
736
|
+
// the current utf8 codepoint in src did not match accept, but src
|
737
|
+
// could have been partway through a utf8 codepoint, so we need to
|
738
|
+
// march it onto the next utf8 codepoint starting byte
|
739
|
+
do {
|
740
|
+
s++;
|
741
|
+
} while ((0x80 == (0xc0 & *s)));
|
742
|
+
}
|
743
|
+
|
744
|
+
return utf8_null;
|
745
|
+
}
|
746
|
+
|
747
|
+
size_t utf8size(const void *str) {
|
748
|
+
const char *s = (const char *)str;
|
749
|
+
size_t size = 0;
|
750
|
+
while ('\0' != s[size]) {
|
751
|
+
size++;
|
752
|
+
}
|
753
|
+
|
754
|
+
// we are including the null terminating byte in the size calculation
|
755
|
+
size++;
|
756
|
+
return size;
|
757
|
+
}
|
758
|
+
|
759
|
+
size_t utf8spn(const void *src, const void *accept) {
|
760
|
+
const char *s = (const char *)src;
|
761
|
+
size_t chars = 0;
|
762
|
+
|
763
|
+
while ('\0' != *s) {
|
764
|
+
const char *a = (const char *)accept;
|
765
|
+
size_t offset = 0;
|
766
|
+
|
767
|
+
while ('\0' != *a) {
|
768
|
+
// checking that if *r is the start of a utf8 codepoint
|
769
|
+
// (it is not 0b10xxxxxx) and we have successfully matched
|
770
|
+
// a previous character (0 < offset) - we found a match
|
771
|
+
if ((0x80 != (0xc0 & *a)) && (0 < offset)) {
|
772
|
+
// found a match, so increment the number of utf8 codepoints
|
773
|
+
// that have matched and stop checking whether any other utf8
|
774
|
+
// codepoints in a match
|
775
|
+
chars++;
|
776
|
+
s += offset;
|
777
|
+
break;
|
778
|
+
} else {
|
779
|
+
if (*a == s[offset]) {
|
780
|
+
offset++;
|
781
|
+
a++;
|
782
|
+
} else {
|
783
|
+
// a could be in the middle of an unmatching utf8 codepoint,
|
784
|
+
// so we need to march it on to the next character beginning,
|
785
|
+
do {
|
786
|
+
a++;
|
787
|
+
} while (0x80 == (0xc0 & *a));
|
788
|
+
|
789
|
+
// reset offset too as we found a mismatch
|
790
|
+
offset = 0;
|
791
|
+
}
|
792
|
+
}
|
793
|
+
}
|
794
|
+
|
795
|
+
// if a got to its terminating null byte, then we didn't find a match.
|
796
|
+
// Return the current number of matched utf8 codepoints
|
797
|
+
if ('\0' == *a) {
|
798
|
+
return chars;
|
799
|
+
}
|
800
|
+
}
|
801
|
+
|
802
|
+
return chars;
|
803
|
+
}
|
804
|
+
|
805
|
+
void *utf8str(const void *haystack, const void *needle) {
|
806
|
+
const char *h = (const char *)haystack;
|
807
|
+
|
808
|
+
// if needle has no utf8 codepoints before the null terminating
|
809
|
+
// byte then return haystack
|
810
|
+
if ('\0' == *((const char *)needle)) {
|
811
|
+
return (void *)haystack;
|
812
|
+
}
|
813
|
+
|
814
|
+
while ('\0' != *h) {
|
815
|
+
const char *maybeMatch = h;
|
816
|
+
const char *n = (const char *)needle;
|
817
|
+
|
818
|
+
while (*h == *n && (*h != '\0' && *n != '\0')) {
|
819
|
+
n++;
|
820
|
+
h++;
|
821
|
+
}
|
822
|
+
|
823
|
+
if ('\0' == *n) {
|
824
|
+
// we found the whole utf8 string for needle in haystack at
|
825
|
+
// maybeMatch, so return it
|
826
|
+
return (void *)maybeMatch;
|
827
|
+
} else {
|
828
|
+
// h could be in the middle of an unmatching utf8 codepoint,
|
829
|
+
// so we need to march it on to the next character beginning,
|
830
|
+
if ('\0' != *h) {
|
831
|
+
do {
|
832
|
+
h++;
|
833
|
+
} while (0x80 == (0xc0 & *h));
|
834
|
+
}
|
835
|
+
}
|
836
|
+
}
|
837
|
+
|
838
|
+
// no match
|
839
|
+
return utf8_null;
|
840
|
+
}
|
841
|
+
|
842
|
+
void *utf8casestr(const void *haystack, const void *needle) {
|
843
|
+
const void *h = haystack;
|
844
|
+
|
845
|
+
// if needle has no utf8 codepoints before the null terminating
|
846
|
+
// byte then return haystack
|
847
|
+
if ('\0' == *((const char *)needle)) {
|
848
|
+
return (void *)haystack;
|
849
|
+
}
|
850
|
+
|
851
|
+
for (;;) {
|
852
|
+
const void *maybeMatch = h;
|
853
|
+
const void *n = needle;
|
854
|
+
utf8_int32_t h_cp, n_cp;
|
855
|
+
|
856
|
+
h = utf8codepoint(h, &h_cp);
|
857
|
+
n = utf8codepoint(n, &n_cp);
|
858
|
+
|
859
|
+
while ((0 != h_cp) && (0 != n_cp)) {
|
860
|
+
h_cp = utf8lwrcodepoint(h_cp);
|
861
|
+
n_cp = utf8lwrcodepoint(n_cp);
|
862
|
+
|
863
|
+
// if we find a mismatch, bail out!
|
864
|
+
if (h_cp != n_cp) {
|
865
|
+
break;
|
866
|
+
}
|
867
|
+
|
868
|
+
h = utf8codepoint(h, &h_cp);
|
869
|
+
n = utf8codepoint(n, &n_cp);
|
870
|
+
}
|
871
|
+
|
872
|
+
if (0 == n_cp) {
|
873
|
+
// we found the whole utf8 string for needle in haystack at
|
874
|
+
// maybeMatch, so return it
|
875
|
+
return (void *)maybeMatch;
|
876
|
+
}
|
877
|
+
|
878
|
+
if (0 == h_cp) {
|
879
|
+
// no match
|
880
|
+
return utf8_null;
|
881
|
+
}
|
882
|
+
}
|
883
|
+
}
|
884
|
+
|
885
|
+
void *utf8valid(const void *str) {
|
886
|
+
const char *s = (const char *)str;
|
887
|
+
|
888
|
+
while ('\0' != *s) {
|
889
|
+
if (0xf0 == (0xf8 & *s)) {
|
890
|
+
// ensure each of the 3 following bytes in this 4-byte
|
891
|
+
// utf8 codepoint began with 0b10xxxxxx
|
892
|
+
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
|
893
|
+
(0x80 != (0xc0 & s[3]))) {
|
894
|
+
return (void *)s;
|
895
|
+
}
|
896
|
+
|
897
|
+
// ensure that our utf8 codepoint ended after 4 bytes
|
898
|
+
if (0x80 == (0xc0 & s[4])) {
|
899
|
+
return (void *)s;
|
900
|
+
}
|
901
|
+
|
902
|
+
// ensure that the top 5 bits of this 4-byte utf8
|
903
|
+
// codepoint were not 0, as then we could have used
|
904
|
+
// one of the smaller encodings
|
905
|
+
if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
|
906
|
+
return (void *)s;
|
907
|
+
}
|
908
|
+
|
909
|
+
// 4-byte utf8 code point (began with 0b11110xxx)
|
910
|
+
s += 4;
|
911
|
+
} else if (0xe0 == (0xf0 & *s)) {
|
912
|
+
// ensure each of the 2 following bytes in this 3-byte
|
913
|
+
// utf8 codepoint began with 0b10xxxxxx
|
914
|
+
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
|
915
|
+
return (void *)s;
|
916
|
+
}
|
917
|
+
|
918
|
+
// ensure that our utf8 codepoint ended after 3 bytes
|
919
|
+
if (0x80 == (0xc0 & s[3])) {
|
920
|
+
return (void *)s;
|
921
|
+
}
|
922
|
+
|
923
|
+
// ensure that the top 5 bits of this 3-byte utf8
|
924
|
+
// codepoint were not 0, as then we could have used
|
925
|
+
// one of the smaller encodings
|
926
|
+
if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
|
927
|
+
return (void *)s;
|
928
|
+
}
|
929
|
+
|
930
|
+
// 3-byte utf8 code point (began with 0b1110xxxx)
|
931
|
+
s += 3;
|
932
|
+
} else if (0xc0 == (0xe0 & *s)) {
|
933
|
+
// ensure the 1 following byte in this 2-byte
|
934
|
+
// utf8 codepoint began with 0b10xxxxxx
|
935
|
+
if (0x80 != (0xc0 & s[1])) {
|
936
|
+
return (void *)s;
|
937
|
+
}
|
938
|
+
|
939
|
+
// ensure that our utf8 codepoint ended after 2 bytes
|
940
|
+
if (0x80 == (0xc0 & s[2])) {
|
941
|
+
return (void *)s;
|
942
|
+
}
|
943
|
+
|
944
|
+
// ensure that the top 4 bits of this 2-byte utf8
|
945
|
+
// codepoint were not 0, as then we could have used
|
946
|
+
// one of the smaller encodings
|
947
|
+
if (0 == (0x1e & s[0])) {
|
948
|
+
return (void *)s;
|
949
|
+
}
|
950
|
+
|
951
|
+
// 2-byte utf8 code point (began with 0b110xxxxx)
|
952
|
+
s += 2;
|
953
|
+
} else if (0x00 == (0x80 & *s)) {
|
954
|
+
// 1-byte ascii (began with 0b0xxxxxxx)
|
955
|
+
s += 1;
|
956
|
+
} else {
|
957
|
+
// we have an invalid 0b1xxxxxxx utf8 code point entry
|
958
|
+
return (void *)s;
|
959
|
+
}
|
960
|
+
}
|
961
|
+
|
962
|
+
return utf8_null;
|
963
|
+
}
|
964
|
+
|
965
|
+
void *utf8codepoint(const void *utf8_restrict str,
|
966
|
+
utf8_int32_t *utf8_restrict out_codepoint) {
|
967
|
+
const char *s = (const char *)str;
|
968
|
+
|
969
|
+
if (0xf0 == (0xf8 & s[0])) {
|
970
|
+
// 4 byte utf8 codepoint
|
971
|
+
*out_codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) |
|
972
|
+
((0x3f & s[2]) << 6) | (0x3f & s[3]);
|
973
|
+
s += 4;
|
974
|
+
} else if (0xe0 == (0xf0 & s[0])) {
|
975
|
+
// 3 byte utf8 codepoint
|
976
|
+
*out_codepoint =
|
977
|
+
((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]);
|
978
|
+
s += 3;
|
979
|
+
} else if (0xc0 == (0xe0 & s[0])) {
|
980
|
+
// 2 byte utf8 codepoint
|
981
|
+
*out_codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]);
|
982
|
+
s += 2;
|
983
|
+
} else {
|
984
|
+
// 1 byte utf8 codepoint otherwise
|
985
|
+
*out_codepoint = s[0];
|
986
|
+
s += 1;
|
987
|
+
}
|
988
|
+
|
989
|
+
return (void *)s;
|
990
|
+
}
|
991
|
+
|
992
|
+
size_t utf8codepointsize(utf8_int32_t chr) {
|
993
|
+
if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
|
994
|
+
return 1;
|
995
|
+
} else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
|
996
|
+
return 2;
|
997
|
+
} else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
|
998
|
+
return 3;
|
999
|
+
} else { // if (0 == ((int)0xffe00000 & chr)) {
|
1000
|
+
return 4;
|
1001
|
+
}
|
1002
|
+
}
|
1003
|
+
|
1004
|
+
void *utf8catcodepoint(void *utf8_restrict str, utf8_int32_t chr, size_t n) {
|
1005
|
+
char *s = (char *)str;
|
1006
|
+
|
1007
|
+
if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
|
1008
|
+
// 1-byte/7-bit ascii
|
1009
|
+
// (0b0xxxxxxx)
|
1010
|
+
if (n < 1) {
|
1011
|
+
return utf8_null;
|
1012
|
+
}
|
1013
|
+
s[0] = (char)chr;
|
1014
|
+
s += 1;
|
1015
|
+
} else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
|
1016
|
+
// 2-byte/11-bit utf8 code point
|
1017
|
+
// (0b110xxxxx 0b10xxxxxx)
|
1018
|
+
if (n < 2) {
|
1019
|
+
return utf8_null;
|
1020
|
+
}
|
1021
|
+
s[0] = 0xc0 | (char)(chr >> 6);
|
1022
|
+
s[1] = 0x80 | (char)(chr & 0x3f);
|
1023
|
+
s += 2;
|
1024
|
+
} else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
|
1025
|
+
// 3-byte/16-bit utf8 code point
|
1026
|
+
// (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
|
1027
|
+
if (n < 3) {
|
1028
|
+
return utf8_null;
|
1029
|
+
}
|
1030
|
+
s[0] = 0xe0 | (char)(chr >> 12);
|
1031
|
+
s[1] = 0x80 | (char)((chr >> 6) & 0x3f);
|
1032
|
+
s[2] = 0x80 | (char)(chr & 0x3f);
|
1033
|
+
s += 3;
|
1034
|
+
} else { // if (0 == ((int)0xffe00000 & chr)) {
|
1035
|
+
// 4-byte/21-bit utf8 code point
|
1036
|
+
// (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
|
1037
|
+
if (n < 4) {
|
1038
|
+
return utf8_null;
|
1039
|
+
}
|
1040
|
+
s[0] = 0xf0 | (char)(chr >> 18);
|
1041
|
+
s[1] = 0x80 | (char)((chr >> 12) & 0x3f);
|
1042
|
+
s[2] = 0x80 | (char)((chr >> 6) & 0x3f);
|
1043
|
+
s[3] = 0x80 | (char)(chr & 0x3f);
|
1044
|
+
s += 4;
|
1045
|
+
}
|
1046
|
+
|
1047
|
+
return s;
|
1048
|
+
}
|
1049
|
+
|
1050
|
+
int utf8islower(utf8_int32_t chr) { return chr != utf8uprcodepoint(chr); }
|
1051
|
+
|
1052
|
+
int utf8isupper(utf8_int32_t chr) { return chr != utf8lwrcodepoint(chr); }
|
1053
|
+
|
1054
|
+
void utf8lwr(void *utf8_restrict str) {
|
1055
|
+
void *p, *pn;
|
1056
|
+
utf8_int32_t cp;
|
1057
|
+
|
1058
|
+
p = (char *)str;
|
1059
|
+
pn = utf8codepoint(p, &cp);
|
1060
|
+
|
1061
|
+
while (cp != 0) {
|
1062
|
+
const utf8_int32_t lwr_cp = utf8lwrcodepoint(cp);
|
1063
|
+
const size_t size = utf8codepointsize(lwr_cp);
|
1064
|
+
|
1065
|
+
if (lwr_cp != cp) {
|
1066
|
+
utf8catcodepoint(p, lwr_cp, size);
|
1067
|
+
}
|
1068
|
+
|
1069
|
+
p = pn;
|
1070
|
+
pn = utf8codepoint(p, &cp);
|
1071
|
+
}
|
1072
|
+
}
|
1073
|
+
|
1074
|
+
void utf8upr(void *utf8_restrict str) {
|
1075
|
+
void *p, *pn;
|
1076
|
+
utf8_int32_t cp;
|
1077
|
+
|
1078
|
+
p = (char *)str;
|
1079
|
+
pn = utf8codepoint(p, &cp);
|
1080
|
+
|
1081
|
+
while (cp != 0) {
|
1082
|
+
const utf8_int32_t lwr_cp = utf8uprcodepoint(cp);
|
1083
|
+
const size_t size = utf8codepointsize(lwr_cp);
|
1084
|
+
|
1085
|
+
if (lwr_cp != cp) {
|
1086
|
+
utf8catcodepoint(p, lwr_cp, size);
|
1087
|
+
}
|
1088
|
+
|
1089
|
+
p = pn;
|
1090
|
+
pn = utf8codepoint(p, &cp);
|
1091
|
+
}
|
1092
|
+
}
|
1093
|
+
|
1094
|
+
utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
|
1095
|
+
if (((0x0041 <= cp) && (0x005a >= cp)) ||
|
1096
|
+
((0x00c0 <= cp) && (0x00d6 >= cp)) ||
|
1097
|
+
((0x00d8 <= cp) && (0x00de >= cp)) ||
|
1098
|
+
((0x0391 <= cp) && (0x03a1 >= cp)) ||
|
1099
|
+
((0x03a3 <= cp) && (0x03ab >= cp))) {
|
1100
|
+
cp += 32;
|
1101
|
+
} else if (((0x0100 <= cp) && (0x012f >= cp)) ||
|
1102
|
+
((0x0132 <= cp) && (0x0137 >= cp)) ||
|
1103
|
+
((0x014a <= cp) && (0x0177 >= cp)) ||
|
1104
|
+
((0x0182 <= cp) && (0x0185 >= cp)) ||
|
1105
|
+
((0x01a0 <= cp) && (0x01a5 >= cp)) ||
|
1106
|
+
((0x01de <= cp) && (0x01ef >= cp)) ||
|
1107
|
+
((0x01f8 <= cp) && (0x021f >= cp)) ||
|
1108
|
+
((0x0222 <= cp) && (0x0233 >= cp)) ||
|
1109
|
+
((0x0246 <= cp) && (0x024f >= cp)) ||
|
1110
|
+
((0x03d8 <= cp) && (0x03ef >= cp))) {
|
1111
|
+
cp |= 0x1;
|
1112
|
+
} else if (((0x0139 <= cp) && (0x0148 >= cp)) ||
|
1113
|
+
((0x0179 <= cp) && (0x017e >= cp)) ||
|
1114
|
+
((0x01af <= cp) && (0x01b0 >= cp)) ||
|
1115
|
+
((0x01b3 <= cp) && (0x01b6 >= cp)) ||
|
1116
|
+
((0x01cd <= cp) && (0x01dc >= cp))) {
|
1117
|
+
cp += 1;
|
1118
|
+
cp &= ~0x1;
|
1119
|
+
} else {
|
1120
|
+
switch (cp) {
|
1121
|
+
default: break;
|
1122
|
+
case 0x0178: cp = 0x00ff; break;
|
1123
|
+
case 0x0243: cp = 0x0180; break;
|
1124
|
+
case 0x018e: cp = 0x01dd; break;
|
1125
|
+
case 0x023d: cp = 0x019a; break;
|
1126
|
+
case 0x0220: cp = 0x019e; break;
|
1127
|
+
case 0x01b7: cp = 0x0292; break;
|
1128
|
+
case 0x01c4: cp = 0x01c6; break;
|
1129
|
+
case 0x01c7: cp = 0x01c9; break;
|
1130
|
+
case 0x01ca: cp = 0x01cc; break;
|
1131
|
+
case 0x01f1: cp = 0x01f3; break;
|
1132
|
+
case 0x01f7: cp = 0x01bf; break;
|
1133
|
+
case 0x0187: cp = 0x0188; break;
|
1134
|
+
case 0x018b: cp = 0x018c; break;
|
1135
|
+
case 0x0191: cp = 0x0192; break;
|
1136
|
+
case 0x0198: cp = 0x0199; break;
|
1137
|
+
case 0x01a7: cp = 0x01a8; break;
|
1138
|
+
case 0x01ac: cp = 0x01ad; break;
|
1139
|
+
case 0x01af: cp = 0x01b0; break;
|
1140
|
+
case 0x01b8: cp = 0x01b9; break;
|
1141
|
+
case 0x01bc: cp = 0x01bd; break;
|
1142
|
+
case 0x01f4: cp = 0x01f5; break;
|
1143
|
+
case 0x023b: cp = 0x023c; break;
|
1144
|
+
case 0x0241: cp = 0x0242; break;
|
1145
|
+
case 0x03fd: cp = 0x037b; break;
|
1146
|
+
case 0x03fe: cp = 0x037c; break;
|
1147
|
+
case 0x03ff: cp = 0x037d; break;
|
1148
|
+
case 0x037f: cp = 0x03f3; break;
|
1149
|
+
case 0x0386: cp = 0x03ac; break;
|
1150
|
+
case 0x0388: cp = 0x03ad; break;
|
1151
|
+
case 0x0389: cp = 0x03ae; break;
|
1152
|
+
case 0x038a: cp = 0x03af; break;
|
1153
|
+
case 0x038c: cp = 0x03cc; break;
|
1154
|
+
case 0x038e: cp = 0x03cd; break;
|
1155
|
+
case 0x038f: cp = 0x03ce; break;
|
1156
|
+
case 0x0370: cp = 0x0371; break;
|
1157
|
+
case 0x0372: cp = 0x0373; break;
|
1158
|
+
case 0x0376: cp = 0x0377; break;
|
1159
|
+
case 0x03f4: cp = 0x03d1; break;
|
1160
|
+
case 0x03cf: cp = 0x03d7; break;
|
1161
|
+
case 0x03f9: cp = 0x03f2; break;
|
1162
|
+
case 0x03f7: cp = 0x03f8; break;
|
1163
|
+
case 0x03fa: cp = 0x03fb; break;
|
1164
|
+
};
|
1165
|
+
}
|
1166
|
+
|
1167
|
+
return cp;
|
1168
|
+
}
|
1169
|
+
|
1170
|
+
utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
|
1171
|
+
if (((0x0061 <= cp) && (0x007a >= cp)) ||
|
1172
|
+
((0x00e0 <= cp) && (0x00f6 >= cp)) ||
|
1173
|
+
((0x00f8 <= cp) && (0x00fe >= cp)) ||
|
1174
|
+
((0x03b1 <= cp) && (0x03c1 >= cp)) ||
|
1175
|
+
((0x03c3 <= cp) && (0x03cb >= cp))) {
|
1176
|
+
cp -= 32;
|
1177
|
+
} else if (((0x0100 <= cp) && (0x012f >= cp)) ||
|
1178
|
+
((0x0132 <= cp) && (0x0137 >= cp)) ||
|
1179
|
+
((0x014a <= cp) && (0x0177 >= cp)) ||
|
1180
|
+
((0x0182 <= cp) && (0x0185 >= cp)) ||
|
1181
|
+
((0x01a0 <= cp) && (0x01a5 >= cp)) ||
|
1182
|
+
((0x01de <= cp) && (0x01ef >= cp)) ||
|
1183
|
+
((0x01f8 <= cp) && (0x021f >= cp)) ||
|
1184
|
+
((0x0222 <= cp) && (0x0233 >= cp)) ||
|
1185
|
+
((0x0246 <= cp) && (0x024f >= cp)) ||
|
1186
|
+
((0x03d8 <= cp) && (0x03ef >= cp))) {
|
1187
|
+
cp &= ~0x1;
|
1188
|
+
} else if (((0x0139 <= cp) && (0x0148 >= cp)) ||
|
1189
|
+
((0x0179 <= cp) && (0x017e >= cp)) ||
|
1190
|
+
((0x01af <= cp) && (0x01b0 >= cp)) ||
|
1191
|
+
((0x01b3 <= cp) && (0x01b6 >= cp)) ||
|
1192
|
+
((0x01cd <= cp) && (0x01dc >= cp))) {
|
1193
|
+
cp -= 1;
|
1194
|
+
cp |= 0x1;
|
1195
|
+
} else {
|
1196
|
+
switch (cp) {
|
1197
|
+
default: break;
|
1198
|
+
case 0x00ff: cp = 0x0178; break;
|
1199
|
+
case 0x0180: cp = 0x0243; break;
|
1200
|
+
case 0x01dd: cp = 0x018e; break;
|
1201
|
+
case 0x019a: cp = 0x023d; break;
|
1202
|
+
case 0x019e: cp = 0x0220; break;
|
1203
|
+
case 0x0292: cp = 0x01b7; break;
|
1204
|
+
case 0x01c6: cp = 0x01c4; break;
|
1205
|
+
case 0x01c9: cp = 0x01c7; break;
|
1206
|
+
case 0x01cc: cp = 0x01ca; break;
|
1207
|
+
case 0x01f3: cp = 0x01f1; break;
|
1208
|
+
case 0x01bf: cp = 0x01f7; break;
|
1209
|
+
case 0x0188: cp = 0x0187; break;
|
1210
|
+
case 0x018c: cp = 0x018b; break;
|
1211
|
+
case 0x0192: cp = 0x0191; break;
|
1212
|
+
case 0x0199: cp = 0x0198; break;
|
1213
|
+
case 0x01a8: cp = 0x01a7; break;
|
1214
|
+
case 0x01ad: cp = 0x01ac; break;
|
1215
|
+
case 0x01b0: cp = 0x01af; break;
|
1216
|
+
case 0x01b9: cp = 0x01b8; break;
|
1217
|
+
case 0x01bd: cp = 0x01bc; break;
|
1218
|
+
case 0x01f5: cp = 0x01f4; break;
|
1219
|
+
case 0x023c: cp = 0x023b; break;
|
1220
|
+
case 0x0242: cp = 0x0241; break;
|
1221
|
+
case 0x037b: cp = 0x03fd; break;
|
1222
|
+
case 0x037c: cp = 0x03fe; break;
|
1223
|
+
case 0x037d: cp = 0x03ff; break;
|
1224
|
+
case 0x03f3: cp = 0x037f; break;
|
1225
|
+
case 0x03ac: cp = 0x0386; break;
|
1226
|
+
case 0x03ad: cp = 0x0388; break;
|
1227
|
+
case 0x03ae: cp = 0x0389; break;
|
1228
|
+
case 0x03af: cp = 0x038a; break;
|
1229
|
+
case 0x03cc: cp = 0x038c; break;
|
1230
|
+
case 0x03cd: cp = 0x038e; break;
|
1231
|
+
case 0x03ce: cp = 0x038f; break;
|
1232
|
+
case 0x0371: cp = 0x0370; break;
|
1233
|
+
case 0x0373: cp = 0x0372; break;
|
1234
|
+
case 0x0377: cp = 0x0376; break;
|
1235
|
+
case 0x03d1: cp = 0x03f4; break;
|
1236
|
+
case 0x03d7: cp = 0x03cf; break;
|
1237
|
+
case 0x03f2: cp = 0x03f9; break;
|
1238
|
+
case 0x03f8: cp = 0x03f7; break;
|
1239
|
+
case 0x03fb: cp = 0x03fa; break;
|
1240
|
+
};
|
1241
|
+
}
|
1242
|
+
|
1243
|
+
return cp;
|
1244
|
+
}
|
1245
|
+
|
1246
|
+
#undef utf8_restrict
|
1247
|
+
#undef utf8_null
|
1248
|
+
|
1249
|
+
#ifdef __cplusplus
|
1250
|
+
} // extern "C"
|
1251
|
+
#endif
|
1252
|
+
|
1253
|
+
#if defined(__clang__)
|
1254
|
+
#pragma clang diagnostic pop
|
1255
|
+
#endif
|
1256
|
+
|
1257
|
+
#endif // SHEREDOM_UTF8_H_INCLUDED
|