rugged 1.3.1 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +34 -2
- data/ext/rugged/extconf.rb +6 -3
- data/ext/rugged/rugged.c +16 -0
- data/ext/rugged/rugged.h +4 -0
- data/ext/rugged/rugged_allocator.c +0 -54
- data/ext/rugged/rugged_blame.c +2 -0
- data/ext/rugged/rugged_blob.c +3 -0
- data/ext/rugged/rugged_commit.c +1 -0
- data/ext/rugged/rugged_config.c +9 -2
- data/ext/rugged/rugged_diff.c +1 -0
- data/ext/rugged/rugged_index.c +2 -0
- data/ext/rugged/rugged_patch.c +1 -0
- data/ext/rugged/rugged_rebase.c +1 -0
- data/ext/rugged/rugged_reference.c +1 -0
- data/ext/rugged/rugged_remote.c +28 -10
- data/ext/rugged/rugged_repo.c +7 -9
- data/ext/rugged/rugged_revwalk.c +5 -1
- data/ext/rugged/rugged_settings.c +5 -0
- data/ext/rugged/rugged_submodule.c +1 -0
- data/ext/rugged/rugged_tag.c +1 -0
- data/ext/rugged/rugged_tree.c +4 -0
- data/lib/rugged/index.rb +1 -1
- data/lib/rugged/tree.rb +5 -1
- data/lib/rugged/version.rb +1 -1
- data/vendor/libgit2/AUTHORS +1 -0
- data/vendor/libgit2/CMakeLists.txt +141 -289
- data/vendor/libgit2/COPYING +301 -20
- data/vendor/libgit2/cmake/AddCFlagIfSupported.cmake +21 -21
- data/vendor/libgit2/cmake/AddClarTest.cmake +7 -0
- data/vendor/libgit2/cmake/CheckPrototypeDefinitionSafe.cmake +16 -0
- data/vendor/libgit2/cmake/DefaultCFlags.cmake +154 -0
- data/vendor/libgit2/cmake/EnableWarnings.cmake +13 -13
- data/vendor/libgit2/cmake/ExperimentalFeatures.cmake +23 -0
- data/vendor/libgit2/cmake/FindCoreFoundation.cmake +13 -13
- data/vendor/libgit2/cmake/FindGSSAPI.cmake +171 -287
- data/vendor/libgit2/cmake/FindGSSFramework.cmake +13 -13
- data/vendor/libgit2/cmake/FindHTTP_Parser.cmake +17 -17
- data/vendor/libgit2/cmake/FindIntlIconv.cmake +51 -0
- data/vendor/libgit2/cmake/FindLLHTTP.cmake +39 -0
- data/vendor/libgit2/cmake/FindLibSSH2.cmake +5 -5
- data/vendor/libgit2/cmake/FindPCRE.cmake +12 -13
- data/vendor/libgit2/cmake/FindPCRE2.cmake +12 -12
- data/vendor/libgit2/cmake/FindPkgLibraries.cmake +19 -19
- data/vendor/libgit2/cmake/FindSecurity.cmake +14 -14
- data/vendor/libgit2/cmake/FindStatNsec.cmake +12 -18
- data/vendor/libgit2/cmake/Findfutimens.cmake +8 -8
- data/vendor/libgit2/cmake/FindmbedTLS.cmake +63 -70
- data/vendor/libgit2/cmake/IdeSplitSources.cmake +18 -18
- data/vendor/libgit2/cmake/PkgBuildConfig.cmake +60 -60
- data/vendor/libgit2/cmake/SanitizeBool.cmake +20 -20
- data/vendor/libgit2/cmake/SelectGSSAPI.cmake +37 -37
- data/vendor/libgit2/cmake/SelectHTTPParser.cmake +34 -0
- data/vendor/libgit2/cmake/SelectHTTPSBackend.cmake +129 -101
- data/vendor/libgit2/cmake/SelectHashes.cmake +113 -54
- data/vendor/libgit2/cmake/SelectRegex.cmake +56 -0
- data/vendor/libgit2/cmake/SelectSSH.cmake +46 -0
- data/vendor/libgit2/cmake/SelectXdiff.cmake +9 -0
- data/vendor/libgit2/cmake/SelectZlib.cmake +38 -0
- data/vendor/libgit2/deps/chromium-zlib/CMakeLists.txt +6 -6
- data/vendor/libgit2/deps/llhttp/CMakeLists.txt +8 -0
- data/vendor/libgit2/deps/llhttp/LICENSE-MIT +22 -0
- data/vendor/libgit2/deps/llhttp/api.c +510 -0
- data/vendor/libgit2/deps/llhttp/http.c +170 -0
- data/vendor/libgit2/deps/llhttp/llhttp.c +10168 -0
- data/vendor/libgit2/deps/llhttp/llhttp.h +897 -0
- data/vendor/libgit2/deps/ntlmclient/CMakeLists.txt +33 -31
- data/vendor/libgit2/deps/ntlmclient/crypt_builtin_md4.c +311 -0
- data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.c +2 -1
- data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.c +0 -20
- data/vendor/libgit2/deps/ntlmclient/crypt_openssl.c +7 -5
- data/vendor/libgit2/deps/ntlmclient/ntlm.c +25 -25
- data/vendor/libgit2/deps/ntlmclient/ntlm.h +4 -4
- data/vendor/libgit2/deps/ntlmclient/ntlmclient.h +2 -2
- data/vendor/libgit2/deps/ntlmclient/unicode_builtin.c +5 -4
- data/vendor/libgit2/deps/ntlmclient/unicode_iconv.c +2 -1
- data/vendor/libgit2/deps/ntlmclient/utf8.h +1176 -721
- data/vendor/libgit2/deps/ntlmclient/util.h +11 -0
- data/vendor/libgit2/deps/pcre/CMakeLists.txt +89 -88
- data/vendor/libgit2/deps/pcre/LICENCE +5 -5
- data/vendor/libgit2/deps/pcre/pcre.h +2 -2
- data/vendor/libgit2/deps/pcre/pcre_compile.c +6 -3
- data/vendor/libgit2/deps/pcre/pcre_exec.c +2 -2
- data/vendor/libgit2/deps/winhttp/CMakeLists.txt +14 -16
- data/vendor/libgit2/deps/xdiff/CMakeLists.txt +28 -0
- data/vendor/libgit2/deps/xdiff/git-xdiff.h +56 -0
- data/vendor/libgit2/{src → deps}/xdiff/xdiff.h +15 -15
- data/vendor/libgit2/{src → deps}/xdiff/xdiffi.c +152 -125
- data/vendor/libgit2/{src → deps}/xdiff/xdiffi.h +2 -4
- data/vendor/libgit2/{src → deps}/xdiff/xemit.c +26 -10
- data/vendor/libgit2/{src → deps}/xdiff/xhistogram.c +92 -94
- data/vendor/libgit2/{src → deps}/xdiff/xinclude.h +1 -12
- data/vendor/libgit2/{src → deps}/xdiff/xmacros.h +18 -1
- data/vendor/libgit2/{src → deps}/xdiff/xmerge.c +126 -137
- data/vendor/libgit2/{src → deps}/xdiff/xpatience.c +26 -46
- data/vendor/libgit2/{src → deps}/xdiff/xprepare.c +24 -46
- data/vendor/libgit2/{src → deps}/xdiff/xutils.c +36 -8
- data/vendor/libgit2/{src → deps}/xdiff/xutils.h +2 -1
- data/vendor/libgit2/deps/zlib/CMakeLists.txt +6 -1
- data/vendor/libgit2/deps/zlib/LICENSE +22 -0
- data/vendor/libgit2/deps/zlib/adler32.c +7 -22
- data/vendor/libgit2/deps/zlib/crc32.c +931 -317
- data/vendor/libgit2/deps/zlib/crc32.h +9441 -436
- data/vendor/libgit2/deps/zlib/deflate.c +428 -453
- data/vendor/libgit2/deps/zlib/deflate.h +51 -23
- data/vendor/libgit2/deps/zlib/gzguts.h +15 -19
- data/vendor/libgit2/deps/zlib/infback.c +19 -31
- data/vendor/libgit2/deps/zlib/inffast.c +15 -18
- data/vendor/libgit2/deps/zlib/inffast.h +1 -1
- data/vendor/libgit2/deps/zlib/inflate.c +75 -110
- data/vendor/libgit2/deps/zlib/inflate.h +3 -2
- data/vendor/libgit2/deps/zlib/inftrees.c +6 -11
- data/vendor/libgit2/deps/zlib/inftrees.h +6 -6
- data/vendor/libgit2/deps/zlib/trees.c +294 -380
- data/vendor/libgit2/deps/zlib/zconf.h +23 -14
- data/vendor/libgit2/deps/zlib/zlib.h +310 -284
- data/vendor/libgit2/deps/zlib/zutil.c +20 -46
- data/vendor/libgit2/deps/zlib/zutil.h +24 -41
- data/vendor/libgit2/include/git2/annotated_commit.h +12 -5
- data/vendor/libgit2/include/git2/apply.h +43 -8
- data/vendor/libgit2/include/git2/attr.h +28 -6
- data/vendor/libgit2/include/git2/blame.h +137 -29
- data/vendor/libgit2/include/git2/blob.h +85 -29
- data/vendor/libgit2/include/git2/branch.h +25 -16
- data/vendor/libgit2/include/git2/buffer.h +24 -82
- data/vendor/libgit2/include/git2/cert.h +4 -3
- data/vendor/libgit2/include/git2/checkout.h +88 -34
- data/vendor/libgit2/include/git2/cherrypick.h +10 -3
- data/vendor/libgit2/include/git2/clone.h +28 -12
- data/vendor/libgit2/include/git2/commit.h +134 -3
- data/vendor/libgit2/include/git2/common.h +172 -59
- data/vendor/libgit2/include/git2/config.h +118 -32
- data/vendor/libgit2/include/git2/credential.h +32 -3
- data/vendor/libgit2/include/git2/credential_helpers.h +2 -0
- data/vendor/libgit2/include/git2/deprecated.h +141 -3
- data/vendor/libgit2/include/git2/describe.h +20 -3
- data/vendor/libgit2/include/git2/diff.h +95 -19
- data/vendor/libgit2/include/git2/email.h +10 -30
- data/vendor/libgit2/include/git2/errors.h +51 -61
- data/vendor/libgit2/include/git2/experimental.h +20 -0
- data/vendor/libgit2/include/git2/filter.h +21 -9
- data/vendor/libgit2/include/git2/global.h +8 -1
- data/vendor/libgit2/include/git2/graph.h +4 -2
- data/vendor/libgit2/include/git2/ignore.h +11 -1
- data/vendor/libgit2/include/git2/index.h +111 -11
- data/vendor/libgit2/include/git2/indexer.h +67 -2
- data/vendor/libgit2/include/git2/mailmap.h +7 -1
- data/vendor/libgit2/include/git2/merge.h +70 -5
- data/vendor/libgit2/include/git2/message.h +4 -2
- data/vendor/libgit2/include/git2/net.h +3 -1
- data/vendor/libgit2/include/git2/notes.h +9 -6
- data/vendor/libgit2/include/git2/object.h +57 -7
- data/vendor/libgit2/include/git2/odb.h +156 -33
- data/vendor/libgit2/include/git2/odb_backend.h +132 -16
- data/vendor/libgit2/include/git2/oid.h +116 -17
- data/vendor/libgit2/include/git2/oidarray.h +7 -1
- data/vendor/libgit2/include/git2/pack.h +37 -9
- data/vendor/libgit2/include/git2/patch.h +10 -3
- data/vendor/libgit2/include/git2/pathspec.h +10 -1
- data/vendor/libgit2/include/git2/proxy.h +11 -1
- data/vendor/libgit2/include/git2/rebase.h +18 -7
- data/vendor/libgit2/include/git2/refdb.h +5 -2
- data/vendor/libgit2/include/git2/reflog.h +4 -3
- data/vendor/libgit2/include/git2/refs.h +11 -8
- data/vendor/libgit2/include/git2/refspec.h +14 -4
- data/vendor/libgit2/include/git2/remote.h +295 -54
- data/vendor/libgit2/include/git2/repository.h +95 -25
- data/vendor/libgit2/include/git2/reset.h +18 -5
- data/vendor/libgit2/include/git2/revert.h +9 -4
- data/vendor/libgit2/include/git2/revparse.h +4 -4
- data/vendor/libgit2/include/git2/revwalk.h +7 -3
- data/vendor/libgit2/include/git2/signature.h +47 -2
- data/vendor/libgit2/include/git2/stash.h +78 -10
- data/vendor/libgit2/include/git2/status.h +24 -11
- data/vendor/libgit2/include/git2/stdint.h +87 -85
- data/vendor/libgit2/include/git2/strarray.h +2 -16
- data/vendor/libgit2/include/git2/submodule.h +27 -11
- data/vendor/libgit2/include/git2/sys/alloc.h +12 -34
- data/vendor/libgit2/include/git2/sys/commit.h +77 -3
- data/vendor/libgit2/include/git2/sys/commit_graph.h +110 -59
- data/vendor/libgit2/include/git2/sys/config.h +80 -4
- data/vendor/libgit2/include/git2/sys/credential.h +4 -3
- data/vendor/libgit2/include/git2/sys/diff.h +21 -1
- data/vendor/libgit2/include/git2/sys/email.h +7 -0
- data/vendor/libgit2/include/git2/sys/errors.h +76 -0
- data/vendor/libgit2/include/git2/sys/filter.h +66 -3
- data/vendor/libgit2/include/git2/sys/hashsig.h +11 -0
- data/vendor/libgit2/include/git2/sys/index.h +3 -2
- data/vendor/libgit2/include/git2/sys/mempack.h +32 -2
- data/vendor/libgit2/include/git2/sys/merge.h +55 -7
- data/vendor/libgit2/include/git2/sys/midx.h +47 -4
- data/vendor/libgit2/include/git2/sys/odb_backend.h +10 -9
- data/vendor/libgit2/include/git2/sys/openssl.h +8 -1
- data/vendor/libgit2/include/git2/sys/path.h +12 -1
- data/vendor/libgit2/include/git2/sys/refdb_backend.h +40 -36
- data/vendor/libgit2/include/git2/sys/refs.h +3 -2
- data/vendor/libgit2/include/git2/sys/remote.h +53 -0
- data/vendor/libgit2/include/git2/sys/repository.h +63 -3
- data/vendor/libgit2/include/git2/sys/stream.h +26 -3
- data/vendor/libgit2/include/git2/sys/transport.h +87 -41
- data/vendor/libgit2/include/git2/tag.h +4 -1
- data/vendor/libgit2/include/git2/trace.h +9 -3
- data/vendor/libgit2/include/git2/transaction.h +3 -2
- data/vendor/libgit2/include/git2/transport.h +11 -3
- data/vendor/libgit2/include/git2/tree.h +20 -8
- data/vendor/libgit2/include/git2/types.h +26 -10
- data/vendor/libgit2/include/git2/version.h +63 -6
- data/vendor/libgit2/include/git2/worktree.h +30 -8
- data/vendor/libgit2/include/git2.h +1 -0
- data/vendor/libgit2/src/CMakeLists.txt +203 -420
- data/vendor/libgit2/src/README.md +12 -0
- data/vendor/libgit2/src/cli/CMakeLists.txt +56 -0
- data/vendor/libgit2/src/cli/README.md +26 -0
- data/vendor/libgit2/src/{branch.h → cli/cmd.c} +10 -8
- data/vendor/libgit2/src/cli/cmd.h +37 -0
- data/vendor/libgit2/src/cli/cmd_blame.c +287 -0
- data/vendor/libgit2/src/cli/cmd_cat_file.c +202 -0
- data/vendor/libgit2/src/cli/cmd_clone.c +190 -0
- data/vendor/libgit2/src/cli/cmd_config.c +241 -0
- data/vendor/libgit2/src/cli/cmd_hash_object.c +152 -0
- data/vendor/libgit2/src/cli/cmd_help.c +85 -0
- data/vendor/libgit2/src/cli/cmd_index_pack.c +114 -0
- data/vendor/libgit2/src/cli/cmd_init.c +102 -0
- data/vendor/libgit2/src/cli/common.c +168 -0
- data/vendor/libgit2/src/cli/common.h +63 -0
- data/vendor/libgit2/src/cli/error.h +51 -0
- data/vendor/libgit2/src/cli/main.c +134 -0
- data/vendor/libgit2/src/cli/opt.c +695 -0
- data/vendor/libgit2/src/cli/opt.h +367 -0
- data/vendor/libgit2/src/cli/opt_usage.c +263 -0
- data/vendor/libgit2/src/cli/opt_usage.h +40 -0
- data/vendor/libgit2/src/cli/progress.c +395 -0
- data/vendor/libgit2/src/cli/progress.h +129 -0
- data/vendor/libgit2/src/cli/sighandler.h +20 -0
- data/vendor/libgit2/src/cli/unix/sighandler.c +37 -0
- data/vendor/libgit2/src/cli/win32/precompiled.h +3 -0
- data/vendor/libgit2/src/cli/win32/sighandler.c +37 -0
- data/vendor/libgit2/src/libgit2/CMakeLists.txt +140 -0
- data/vendor/libgit2/src/{annotated_commit.c → libgit2/annotated_commit.c} +2 -2
- data/vendor/libgit2/src/{annotated_commit.h → libgit2/annotated_commit.h} +2 -2
- data/vendor/libgit2/src/{apply.c → libgit2/apply.c} +32 -34
- data/vendor/libgit2/src/{apply.h → libgit2/apply.h} +2 -2
- data/vendor/libgit2/src/{attr.c → libgit2/attr.c} +48 -31
- data/vendor/libgit2/src/{attr_file.c → libgit2/attr_file.c} +25 -20
- data/vendor/libgit2/src/{attr_file.h → libgit2/attr_file.h} +6 -4
- data/vendor/libgit2/src/{attrcache.c → libgit2/attrcache.c} +87 -46
- data/vendor/libgit2/src/{attrcache.h → libgit2/attrcache.h} +5 -9
- data/vendor/libgit2/src/{blame.c → libgit2/blame.c} +152 -57
- data/vendor/libgit2/src/{blame.h → libgit2/blame.h} +1 -0
- data/vendor/libgit2/src/{blame_git.c → libgit2/blame_git.c} +1 -2
- data/vendor/libgit2/src/{blob.c → libgit2/blob.c} +38 -29
- data/vendor/libgit2/src/{blob.h → libgit2/blob.h} +3 -3
- data/vendor/libgit2/src/{branch.c → libgit2/branch.c} +164 -118
- data/vendor/libgit2/src/libgit2/branch.h +31 -0
- data/vendor/libgit2/src/libgit2/buf.c +126 -0
- data/vendor/libgit2/src/libgit2/buf.h +50 -0
- data/vendor/libgit2/src/{cache.c → libgit2/cache.c} +22 -17
- data/vendor/libgit2/src/{cache.h → libgit2/cache.h} +7 -9
- data/vendor/libgit2/src/{checkout.c → libgit2/checkout.c} +107 -91
- data/vendor/libgit2/src/{checkout.h → libgit2/checkout.h} +0 -2
- data/vendor/libgit2/src/{cherrypick.c → libgit2/cherrypick.c} +14 -15
- data/vendor/libgit2/src/{clone.c → libgit2/clone.c} +254 -203
- data/vendor/libgit2/src/{clone.h → libgit2/clone.h} +4 -1
- data/vendor/libgit2/src/{commit.c → libgit2/commit.c} +296 -77
- data/vendor/libgit2/src/libgit2/commit.h +87 -0
- data/vendor/libgit2/src/{commit_graph.c → libgit2/commit_graph.c} +246 -135
- data/vendor/libgit2/src/{commit_graph.h → libgit2/commit_graph.h} +33 -8
- data/vendor/libgit2/src/{commit_list.c → libgit2/commit_list.c} +17 -7
- data/vendor/libgit2/src/{commit_list.h → libgit2/commit_list.h} +1 -0
- data/vendor/libgit2/src/libgit2/common.h +55 -0
- data/vendor/libgit2/src/{config.c → libgit2/config.c} +490 -360
- data/vendor/libgit2/src/libgit2/config.cmake.in +3 -0
- data/vendor/libgit2/src/{config.h → libgit2/config.h} +24 -6
- data/vendor/libgit2/src/{config_backend.h → libgit2/config_backend.h} +8 -10
- data/vendor/libgit2/src/{config_cache.c → libgit2/config_cache.c} +4 -5
- data/vendor/libgit2/src/{config_file.c → libgit2/config_file.c} +212 -183
- data/vendor/libgit2/src/libgit2/config_list.c +285 -0
- data/vendor/libgit2/src/libgit2/config_list.h +32 -0
- data/vendor/libgit2/src/libgit2/config_mem.c +374 -0
- data/vendor/libgit2/src/{config_parse.c → libgit2/config_parse.c} +37 -32
- data/vendor/libgit2/src/{config_snapshot.c → libgit2/config_snapshot.c} +24 -31
- data/vendor/libgit2/src/{crlf.c → libgit2/crlf.c} +24 -21
- data/vendor/libgit2/src/{describe.c → libgit2/describe.c} +62 -51
- data/vendor/libgit2/src/{diff.c → libgit2/diff.c} +44 -14
- data/vendor/libgit2/src/{diff.h → libgit2/diff.h} +8 -10
- data/vendor/libgit2/src/{diff_driver.c → libgit2/diff_driver.c} +46 -55
- data/vendor/libgit2/src/{diff_driver.h → libgit2/diff_driver.h} +5 -5
- data/vendor/libgit2/src/{diff_file.c → libgit2/diff_file.c} +45 -27
- data/vendor/libgit2/src/{diff_generate.c → libgit2/diff_generate.c} +70 -20
- data/vendor/libgit2/src/{diff_generate.h → libgit2/diff_generate.h} +5 -3
- data/vendor/libgit2/src/{diff_parse.c → libgit2/diff_parse.c} +22 -6
- data/vendor/libgit2/src/{diff_print.c → libgit2/diff_print.c} +192 -105
- data/vendor/libgit2/src/{diff_stats.c → libgit2/diff_stats.c} +40 -29
- data/vendor/libgit2/src/libgit2/diff_stats.h +18 -0
- data/vendor/libgit2/src/{diff_tform.c → libgit2/diff_tform.c} +49 -16
- data/vendor/libgit2/src/{diff_xdiff.c → libgit2/diff_xdiff.c} +4 -8
- data/vendor/libgit2/src/{diff_xdiff.h → libgit2/diff_xdiff.h} +1 -1
- data/vendor/libgit2/src/{email.c → libgit2/email.c} +58 -40
- data/vendor/libgit2/src/{email.h → libgit2/email.h} +1 -1
- data/vendor/libgit2/src/{transports/ssh.h → libgit2/experimental.h.in} +3 -4
- data/vendor/libgit2/src/{fetch.c → libgit2/fetch.c} +105 -30
- data/vendor/libgit2/src/{fetch.h → libgit2/fetch.h} +1 -3
- data/vendor/libgit2/src/{fetchhead.c → libgit2/fetchhead.c} +30 -28
- data/vendor/libgit2/src/{filter.c → libgit2/filter.c} +132 -58
- data/vendor/libgit2/src/{filter.h → libgit2/filter.h} +26 -5
- data/vendor/libgit2/src/{win32 → libgit2}/git2.rc +3 -3
- data/vendor/libgit2/src/libgit2/grafts.c +270 -0
- data/vendor/libgit2/src/libgit2/grafts.h +35 -0
- data/vendor/libgit2/src/{graph.c → libgit2/graph.c} +1 -1
- data/vendor/libgit2/src/libgit2/hashmap_oid.h +30 -0
- data/vendor/libgit2/src/{ident.c → libgit2/ident.c} +20 -20
- data/vendor/libgit2/src/{ignore.c → libgit2/ignore.c} +44 -39
- data/vendor/libgit2/src/{ignore.h → libgit2/ignore.h} +2 -2
- data/vendor/libgit2/src/{index.c → libgit2/index.c} +460 -276
- data/vendor/libgit2/src/{index.h → libgit2/index.h} +21 -5
- data/vendor/libgit2/src/libgit2/index_map.c +95 -0
- data/vendor/libgit2/src/libgit2/index_map.h +28 -0
- data/vendor/libgit2/src/{indexer.c → libgit2/indexer.c} +208 -124
- data/vendor/libgit2/src/{iterator.c → libgit2/iterator.c} +102 -71
- data/vendor/libgit2/src/{iterator.h → libgit2/iterator.h} +8 -5
- data/vendor/libgit2/src/libgit2/libgit2.c +268 -0
- data/vendor/libgit2/src/{mailmap.c → libgit2/mailmap.c} +39 -37
- data/vendor/libgit2/src/{merge.c → libgit2/merge.c} +83 -73
- data/vendor/libgit2/src/{merge.h → libgit2/merge.h} +1 -14
- data/vendor/libgit2/src/{merge_driver.c → libgit2/merge_driver.c} +4 -4
- data/vendor/libgit2/src/{merge_file.c → libgit2/merge_file.c} +13 -5
- data/vendor/libgit2/src/{message.c → libgit2/message.c} +21 -10
- data/vendor/libgit2/src/{midx.c → libgit2/midx.c} +174 -112
- data/vendor/libgit2/src/{midx.h → libgit2/midx.h} +17 -6
- data/vendor/libgit2/src/{mwindow.c → libgit2/mwindow.c} +53 -57
- data/vendor/libgit2/src/{mwindow.h → libgit2/mwindow.h} +9 -2
- data/vendor/libgit2/src/{notes.c → libgit2/notes.c} +29 -37
- data/vendor/libgit2/src/{object.c → libgit2/object.c} +166 -35
- data/vendor/libgit2/src/{object.h → libgit2/object.h} +17 -2
- data/vendor/libgit2/src/{odb.c → libgit2/odb.c} +261 -88
- data/vendor/libgit2/src/{odb.h → libgit2/odb.h} +44 -5
- data/vendor/libgit2/src/{odb_loose.c → libgit2/odb_loose.c} +192 -134
- data/vendor/libgit2/src/{odb_mempack.c → libgit2/odb_mempack.c} +67 -22
- data/vendor/libgit2/src/{odb_pack.c → libgit2/odb_pack.c} +162 -89
- data/vendor/libgit2/src/{oid.c → libgit2/oid.c} +171 -92
- data/vendor/libgit2/src/libgit2/oid.h +284 -0
- data/vendor/libgit2/src/libgit2/oidarray.c +89 -0
- data/vendor/libgit2/src/{oidarray.h → libgit2/oidarray.h} +5 -1
- data/vendor/libgit2/src/{pack-objects.c → libgit2/pack-objects.c} +126 -66
- data/vendor/libgit2/src/{pack-objects.h → libgit2/pack-objects.h} +28 -12
- data/vendor/libgit2/src/{pack.c → libgit2/pack.c} +146 -111
- data/vendor/libgit2/src/{pack.h → libgit2/pack.h} +45 -25
- data/vendor/libgit2/src/{parse.c → libgit2/parse.c} +8 -4
- data/vendor/libgit2/src/{parse.h → libgit2/parse.h} +1 -1
- data/vendor/libgit2/src/{patch.c → libgit2/patch.c} +3 -3
- data/vendor/libgit2/src/{patch.h → libgit2/patch.h} +8 -1
- data/vendor/libgit2/src/{patch_generate.c → libgit2/patch_generate.c} +51 -16
- data/vendor/libgit2/src/{patch_generate.h → libgit2/patch_generate.h} +5 -5
- data/vendor/libgit2/src/{patch_parse.c → libgit2/patch_parse.c} +42 -34
- data/vendor/libgit2/src/libgit2/path.c +375 -0
- data/vendor/libgit2/src/libgit2/path.h +68 -0
- data/vendor/libgit2/src/{pathspec.c → libgit2/pathspec.c} +7 -7
- data/vendor/libgit2/src/{pathspec.h → libgit2/pathspec.h} +2 -2
- data/vendor/libgit2/src/{proxy.c → libgit2/proxy.c} +4 -1
- data/vendor/libgit2/src/{proxy.h → libgit2/proxy.h} +1 -1
- data/vendor/libgit2/src/{push.c → libgit2/push.c} +116 -60
- data/vendor/libgit2/src/{push.h → libgit2/push.h} +5 -16
- data/vendor/libgit2/src/{reader.c → libgit2/reader.c} +9 -9
- data/vendor/libgit2/src/{reader.h → libgit2/reader.h} +2 -2
- data/vendor/libgit2/src/{rebase.c → libgit2/rebase.c} +147 -147
- data/vendor/libgit2/src/{refdb_fs.c → libgit2/refdb_fs.c} +639 -254
- data/vendor/libgit2/src/{reflog.c → libgit2/reflog.c} +8 -7
- data/vendor/libgit2/src/{reflog.h → libgit2/reflog.h} +3 -2
- data/vendor/libgit2/src/{refs.c → libgit2/refs.c} +67 -39
- data/vendor/libgit2/src/{refs.h → libgit2/refs.h} +8 -3
- data/vendor/libgit2/src/{refspec.c → libgit2/refspec.c} +60 -38
- data/vendor/libgit2/src/{refspec.h → libgit2/refspec.h} +13 -2
- data/vendor/libgit2/src/{remote.c → libgit2/remote.c} +821 -454
- data/vendor/libgit2/src/libgit2/remote.h +101 -0
- data/vendor/libgit2/src/{repository.c → libgit2/repository.c} +1377 -594
- data/vendor/libgit2/src/{repository.h → libgit2/repository.h} +43 -12
- data/vendor/libgit2/src/{reset.c → libgit2/reset.c} +8 -5
- data/vendor/libgit2/src/{revert.c → libgit2/revert.c} +18 -22
- data/vendor/libgit2/src/{revparse.c → libgit2/revparse.c} +76 -44
- data/vendor/libgit2/src/{revwalk.c → libgit2/revwalk.c} +48 -19
- data/vendor/libgit2/src/{revwalk.h → libgit2/revwalk.h} +3 -3
- data/vendor/libgit2/src/{libgit2.c → libgit2/settings.c} +162 -95
- data/vendor/libgit2/src/{settings.h → libgit2/settings.h} +6 -2
- data/vendor/libgit2/src/{signature.c → libgit2/signature.c} +144 -21
- data/vendor/libgit2/src/{signature.h → libgit2/signature.h} +1 -2
- data/vendor/libgit2/src/{stash.c → libgit2/stash.c} +243 -68
- data/vendor/libgit2/src/{status.c → libgit2/status.c} +5 -2
- data/vendor/libgit2/src/{strarray.c → libgit2/strarray.c} +1 -0
- data/vendor/libgit2/src/libgit2/strarray.h +25 -0
- data/vendor/libgit2/src/{streams → libgit2/streams}/mbedtls.c +62 -67
- data/vendor/libgit2/src/{streams → libgit2/streams}/openssl.c +41 -24
- data/vendor/libgit2/src/{streams → libgit2/streams}/openssl.h +2 -0
- data/vendor/libgit2/src/{streams → libgit2/streams}/openssl_dynamic.c +11 -3
- data/vendor/libgit2/src/{streams → libgit2/streams}/openssl_dynamic.h +6 -3
- data/vendor/libgit2/src/libgit2/streams/schannel.c +715 -0
- data/vendor/libgit2/src/libgit2/streams/schannel.h +28 -0
- data/vendor/libgit2/src/libgit2/streams/socket.c +428 -0
- data/vendor/libgit2/src/{streams → libgit2/streams}/socket.h +3 -1
- data/vendor/libgit2/src/{streams → libgit2/streams}/stransport.c +79 -19
- data/vendor/libgit2/src/{streams → libgit2/streams}/tls.c +5 -0
- data/vendor/libgit2/src/{submodule.c → libgit2/submodule.c} +279 -220
- data/vendor/libgit2/src/{submodule.h → libgit2/submodule.h} +10 -11
- data/vendor/libgit2/src/libgit2/sysdir.c +650 -0
- data/vendor/libgit2/src/{sysdir.h → libgit2/sysdir.h} +55 -18
- data/vendor/libgit2/src/{tag.c → libgit2/tag.c} +74 -43
- data/vendor/libgit2/src/{tag.h → libgit2/tag.h} +2 -2
- data/vendor/libgit2/src/{trace.c → libgit2/trace.c} +1 -14
- data/vendor/libgit2/src/{trace.h → libgit2/trace.h} +5 -22
- data/vendor/libgit2/src/{trailer.c → libgit2/trailer.c} +7 -7
- data/vendor/libgit2/src/{transaction.c → libgit2/transaction.c} +27 -21
- data/vendor/libgit2/src/{transaction.h → libgit2/transaction.h} +4 -1
- data/vendor/libgit2/src/{transport.c → libgit2/transport.c} +14 -11
- data/vendor/libgit2/src/{transports → libgit2/transports}/auth.c +7 -9
- data/vendor/libgit2/src/{transports → libgit2/transports}/auth.h +3 -5
- data/vendor/libgit2/src/{transports/auth_negotiate.c → libgit2/transports/auth_gssapi.c} +44 -45
- data/vendor/libgit2/src/{transports → libgit2/transports}/auth_negotiate.h +1 -1
- data/vendor/libgit2/src/{transports → libgit2/transports}/auth_ntlm.h +1 -2
- data/vendor/libgit2/src/{transports/auth_ntlm.c → libgit2/transports/auth_ntlmclient.c} +22 -22
- data/vendor/libgit2/src/libgit2/transports/auth_sspi.c +341 -0
- data/vendor/libgit2/src/{transports → libgit2/transports}/credential.c +1 -1
- data/vendor/libgit2/src/{transports → libgit2/transports}/git.c +16 -19
- data/vendor/libgit2/src/{transports → libgit2/transports}/http.c +49 -24
- data/vendor/libgit2/src/{transports → libgit2/transports}/http.h +0 -11
- data/vendor/libgit2/src/{transports → libgit2/transports}/httpclient.c +188 -134
- data/vendor/libgit2/src/{transports → libgit2/transports}/httpclient.h +10 -0
- data/vendor/libgit2/src/libgit2/transports/httpparser.c +128 -0
- data/vendor/libgit2/src/libgit2/transports/httpparser.h +99 -0
- data/vendor/libgit2/src/{transports → libgit2/transports}/local.c +159 -127
- data/vendor/libgit2/src/{transports → libgit2/transports}/smart.c +142 -165
- data/vendor/libgit2/src/{transports → libgit2/transports}/smart.h +56 -36
- data/vendor/libgit2/src/{transports → libgit2/transports}/smart_pkt.c +307 -74
- data/vendor/libgit2/src/{transports → libgit2/transports}/smart_protocol.c +297 -97
- data/vendor/libgit2/src/libgit2/transports/ssh.c +85 -0
- data/vendor/libgit2/src/libgit2/transports/ssh_exec.c +347 -0
- data/vendor/libgit2/src/libgit2/transports/ssh_exec.h +26 -0
- data/vendor/libgit2/src/{transports/ssh.c → libgit2/transports/ssh_libssh2.c} +414 -268
- data/vendor/libgit2/src/libgit2/transports/ssh_libssh2.h +28 -0
- data/vendor/libgit2/src/{transports → libgit2/transports}/winhttp.c +101 -75
- data/vendor/libgit2/src/{tree-cache.c → libgit2/tree-cache.c} +30 -20
- data/vendor/libgit2/src/{tree-cache.h → libgit2/tree-cache.h} +7 -5
- data/vendor/libgit2/src/{tree.c → libgit2/tree.c} +128 -110
- data/vendor/libgit2/src/{tree.h → libgit2/tree.h} +7 -6
- data/vendor/libgit2/src/{worktree.c → libgit2/worktree.c} +160 -121
- data/vendor/libgit2/src/{worktree.h → libgit2/worktree.h} +1 -1
- data/vendor/libgit2/src/util/CMakeLists.txt +77 -0
- data/vendor/libgit2/src/{alloc.c → util/alloc.c} +69 -7
- data/vendor/libgit2/src/util/alloc.h +65 -0
- data/vendor/libgit2/src/util/allocators/debugalloc.c +73 -0
- data/vendor/libgit2/src/util/allocators/debugalloc.h +17 -0
- data/vendor/libgit2/src/util/allocators/failalloc.c +32 -0
- data/vendor/libgit2/src/util/allocators/failalloc.h +17 -0
- data/vendor/libgit2/src/util/allocators/stdalloc.c +37 -0
- data/vendor/libgit2/src/{allocators → util/allocators}/stdalloc.h +1 -1
- data/vendor/libgit2/src/util/allocators/win32_leakcheck.c +50 -0
- data/vendor/libgit2/src/{allocators → util/allocators}/win32_leakcheck.h +1 -1
- data/vendor/libgit2/src/{array.h → util/array.h} +25 -19
- data/vendor/libgit2/src/{assert_safe.h → util/assert_safe.h} +16 -0
- data/vendor/libgit2/src/{cc-compat.h → util/cc-compat.h} +5 -1
- data/vendor/libgit2/src/util/ctype_compat.h +70 -0
- data/vendor/libgit2/src/{date.c → util/date.c} +35 -33
- data/vendor/libgit2/src/util/date.h +45 -0
- data/vendor/libgit2/src/util/errors.c +401 -0
- data/vendor/libgit2/src/{errors.h → util/errors.h} +22 -19
- data/vendor/libgit2/src/{filebuf.c → util/filebuf.c} +35 -30
- data/vendor/libgit2/src/{filebuf.h → util/filebuf.h} +21 -8
- data/vendor/libgit2/src/{path.c → util/fs_path.c} +591 -615
- data/vendor/libgit2/src/{path.h → util/fs_path.h} +257 -181
- data/vendor/libgit2/src/{futils.c → util/futils.c} +144 -95
- data/vendor/libgit2/src/{futils.h → util/futils.h} +40 -18
- data/vendor/libgit2/src/{features.h.in → util/git2_features.h.in} +33 -2
- data/vendor/libgit2/src/{common.h → util/git2_util.h} +26 -59
- data/vendor/libgit2/src/util/hash/builtin.c +53 -0
- data/vendor/libgit2/src/{hash/sha1/openssl.h → util/hash/builtin.h} +6 -6
- data/vendor/libgit2/src/{hash/sha1 → util/hash}/collisiondetect.c +3 -3
- data/vendor/libgit2/src/{hash/sha1 → util/hash}/collisiondetect.h +3 -3
- data/vendor/libgit2/src/util/hash/common_crypto.c +112 -0
- data/vendor/libgit2/src/{hash/sha1 → util/hash}/common_crypto.h +11 -3
- data/vendor/libgit2/src/util/hash/mbedtls.c +92 -0
- data/vendor/libgit2/src/{hash/sha1 → util/hash}/mbedtls.h +14 -4
- data/vendor/libgit2/src/util/hash/openssl.c +347 -0
- data/vendor/libgit2/src/util/hash/openssl.h +61 -0
- data/vendor/libgit2/src/util/hash/rfc6234/sha.h +243 -0
- data/vendor/libgit2/src/util/hash/rfc6234/sha224-256.c +601 -0
- data/vendor/libgit2/src/util/hash/sha.h +73 -0
- data/vendor/libgit2/src/{hash/sha1 → util/hash}/sha1dc/sha1.c +1 -1
- data/vendor/libgit2/src/util/hash/win32.c +549 -0
- data/vendor/libgit2/src/util/hash/win32.h +60 -0
- data/vendor/libgit2/src/util/hash.c +158 -0
- data/vendor/libgit2/src/util/hash.h +61 -0
- data/vendor/libgit2/src/util/hashmap.h +424 -0
- data/vendor/libgit2/src/util/hashmap_str.h +43 -0
- data/vendor/libgit2/src/{integer.h → util/integer.h} +3 -1
- data/vendor/libgit2/src/{map.h → util/map.h} +1 -1
- data/vendor/libgit2/src/util/net.c +1160 -0
- data/vendor/libgit2/src/{net.h → util/net.h} +45 -4
- data/vendor/libgit2/src/{pool.c → util/pool.c} +1 -1
- data/vendor/libgit2/src/{pool.h → util/pool.h} +6 -1
- data/vendor/libgit2/src/{posix.c → util/posix.c} +57 -3
- data/vendor/libgit2/src/{posix.h → util/posix.h} +26 -1
- data/vendor/libgit2/src/{pqueue.h → util/pqueue.h} +3 -3
- data/vendor/libgit2/src/util/process.h +222 -0
- data/vendor/libgit2/src/util/rand.c +230 -0
- data/vendor/libgit2/src/util/rand.h +37 -0
- data/vendor/libgit2/src/{regexp.c → util/regexp.c} +5 -5
- data/vendor/libgit2/src/{regexp.h → util/regexp.h} +1 -1
- data/vendor/libgit2/src/{runtime.c → util/runtime.c} +1 -1
- data/vendor/libgit2/src/{runtime.h → util/runtime.h} +1 -1
- data/vendor/libgit2/src/{sortedcache.c → util/sortedcache.c} +15 -14
- data/vendor/libgit2/src/{sortedcache.h → util/sortedcache.h} +5 -5
- data/vendor/libgit2/src/util/staticstr.h +66 -0
- data/vendor/libgit2/src/{buffer.c → util/str.c} +159 -153
- data/vendor/libgit2/src/util/str.h +357 -0
- data/vendor/libgit2/src/util/strlist.c +108 -0
- data/vendor/libgit2/src/util/strlist.h +36 -0
- data/vendor/libgit2/src/{thread.c → util/thread.c} +1 -1
- data/vendor/libgit2/src/{thread.h → util/thread.h} +23 -22
- data/vendor/libgit2/src/{tsort.c → util/tsort.c} +1 -1
- data/vendor/libgit2/src/{unix → util/unix}/map.c +1 -3
- data/vendor/libgit2/src/{unix → util/unix}/posix.h +1 -6
- data/vendor/libgit2/src/util/unix/process.c +629 -0
- data/vendor/libgit2/src/{unix → util/unix}/realpath.c +24 -8
- data/vendor/libgit2/src/{utf8.c → util/utf8.c} +1 -1
- data/vendor/libgit2/src/{utf8.h → util/utf8.h} +1 -1
- data/vendor/libgit2/src/{util.c → util/util.c} +24 -19
- data/vendor/libgit2/src/{util.h → util/util.h} +30 -81
- data/vendor/libgit2/src/{varint.h → util/varint.h} +1 -1
- data/vendor/libgit2/src/{vector.c → util/vector.c} +3 -3
- data/vendor/libgit2/src/{vector.h → util/vector.h} +4 -4
- data/vendor/libgit2/src/{wildmatch.h → util/wildmatch.h} +1 -1
- data/vendor/libgit2/src/{win32 → util/win32}/dir.h +1 -1
- data/vendor/libgit2/src/{win32 → util/win32}/error.c +1 -1
- data/vendor/libgit2/src/{win32 → util/win32}/error.h +1 -1
- data/vendor/libgit2/src/{win32 → util/win32}/map.c +1 -1
- data/vendor/libgit2/src/{win32 → util/win32}/path_w32.c +148 -17
- data/vendor/libgit2/src/{win32 → util/win32}/path_w32.h +3 -1
- data/vendor/libgit2/src/{win32 → util/win32}/posix.h +1 -2
- data/vendor/libgit2/src/{win32 → util/win32}/posix_w32.c +42 -35
- data/vendor/libgit2/src/util/win32/precompiled.c +1 -0
- data/vendor/libgit2/src/{win32 → util/win32}/precompiled.h +1 -1
- data/vendor/libgit2/src/util/win32/process.c +506 -0
- data/vendor/libgit2/src/{win32 → util/win32}/thread.h +1 -1
- data/vendor/libgit2/src/util/win32/utf-conv.c +144 -0
- data/vendor/libgit2/src/util/win32/utf-conv.h +127 -0
- data/vendor/libgit2/src/{win32 → util/win32}/w32_buffer.c +2 -3
- data/vendor/libgit2/src/{win32 → util/win32}/w32_buffer.h +3 -4
- data/vendor/libgit2/src/{win32 → util/win32}/w32_leakcheck.c +1 -1
- data/vendor/libgit2/src/{win32 → util/win32}/w32_leakcheck.h +1 -1
- data/vendor/libgit2/src/{win32 → util/win32}/w32_util.c +1 -1
- data/vendor/libgit2/src/{win32 → util/win32}/w32_util.h +1 -1
- data/vendor/libgit2/src/{zstream.c → util/zstream.c} +5 -5
- data/vendor/libgit2/src/{zstream.h → util/zstream.h} +5 -5
- metadata +431 -362
- data/vendor/libgit2/cmake/FindIconv.cmake +0 -45
- data/vendor/libgit2/deps/http-parser/CMakeLists.txt +0 -6
- data/vendor/libgit2/deps/http-parser/COPYING +0 -23
- data/vendor/libgit2/deps/http-parser/http_parser.c +0 -2182
- data/vendor/libgit2/deps/http-parser/http_parser.h +0 -305
- data/vendor/libgit2/deps/zlib/COPYING +0 -27
- data/vendor/libgit2/include/git2/sys/reflog.h +0 -21
- data/vendor/libgit2/src/alloc.h +0 -40
- data/vendor/libgit2/src/allocators/failalloc.c +0 -92
- data/vendor/libgit2/src/allocators/failalloc.h +0 -23
- data/vendor/libgit2/src/allocators/stdalloc.c +0 -150
- data/vendor/libgit2/src/allocators/win32_leakcheck.c +0 -118
- data/vendor/libgit2/src/buffer.h +0 -374
- data/vendor/libgit2/src/commit.h +0 -46
- data/vendor/libgit2/src/config_entries.c +0 -237
- data/vendor/libgit2/src/config_entries.h +0 -24
- data/vendor/libgit2/src/config_mem.c +0 -220
- data/vendor/libgit2/src/errors.c +0 -238
- data/vendor/libgit2/src/hash/sha1/common_crypto.c +0 -57
- data/vendor/libgit2/src/hash/sha1/generic.c +0 -300
- data/vendor/libgit2/src/hash/sha1/generic.h +0 -19
- data/vendor/libgit2/src/hash/sha1/mbedtls.c +0 -46
- data/vendor/libgit2/src/hash/sha1/openssl.c +0 -59
- data/vendor/libgit2/src/hash/sha1/win32.c +0 -333
- data/vendor/libgit2/src/hash/sha1/win32.h +0 -128
- data/vendor/libgit2/src/hash/sha1.h +0 -38
- data/vendor/libgit2/src/hash.c +0 -110
- data/vendor/libgit2/src/hash.h +0 -46
- data/vendor/libgit2/src/idxmap.c +0 -157
- data/vendor/libgit2/src/idxmap.h +0 -177
- data/vendor/libgit2/src/khash.h +0 -615
- data/vendor/libgit2/src/libgit2.h +0 -15
- data/vendor/libgit2/src/message.h +0 -17
- data/vendor/libgit2/src/net.c +0 -540
- data/vendor/libgit2/src/netops.c +0 -125
- data/vendor/libgit2/src/netops.h +0 -68
- data/vendor/libgit2/src/offmap.c +0 -101
- data/vendor/libgit2/src/offmap.h +0 -133
- data/vendor/libgit2/src/oid.h +0 -51
- data/vendor/libgit2/src/oidarray.c +0 -43
- data/vendor/libgit2/src/oidmap.c +0 -107
- data/vendor/libgit2/src/oidmap.h +0 -128
- data/vendor/libgit2/src/remote.h +0 -55
- data/vendor/libgit2/src/streams/socket.c +0 -239
- data/vendor/libgit2/src/strmap.c +0 -100
- data/vendor/libgit2/src/strmap.h +0 -131
- data/vendor/libgit2/src/sysdir.c +0 -347
- data/vendor/libgit2/src/threadstate.c +0 -84
- data/vendor/libgit2/src/threadstate.h +0 -24
- data/vendor/libgit2/src/win32/findfile.c +0 -230
- data/vendor/libgit2/src/win32/findfile.h +0 -19
- data/vendor/libgit2/src/win32/utf-conv.c +0 -146
- data/vendor/libgit2/src/win32/utf-conv.h +0 -60
- /data/vendor/libgit2/{src → deps}/xdiff/xemit.h +0 -0
- /data/vendor/libgit2/{src → deps}/xdiff/xprepare.h +0 -0
- /data/vendor/libgit2/{src → deps}/xdiff/xtypes.h +0 -0
- /data/vendor/libgit2/src/{win32 → cli/win32}/precompiled.c +0 -0
- /data/vendor/libgit2/src/{attr.h → libgit2/attr.h} +0 -0
- /data/vendor/libgit2/src/{blame_git.h → libgit2/blame_git.h} +0 -0
- /data/vendor/libgit2/src/{config_parse.h → libgit2/config_parse.h} +0 -0
- /data/vendor/libgit2/src/{delta.c → libgit2/delta.c} +0 -0
- /data/vendor/libgit2/src/{delta.h → libgit2/delta.h} +0 -0
- /data/vendor/libgit2/src/{diff_file.h → libgit2/diff_file.h} +0 -0
- /data/vendor/libgit2/src/{diff_parse.h → libgit2/diff_parse.h} +0 -0
- /data/vendor/libgit2/src/{diff_tform.h → libgit2/diff_tform.h} +0 -0
- /data/vendor/libgit2/src/{fetchhead.h → libgit2/fetchhead.h} +0 -0
- /data/vendor/libgit2/src/{hashsig.c → libgit2/hashsig.c} +0 -0
- /data/vendor/libgit2/src/{indexer.h → libgit2/indexer.h} +0 -0
- /data/vendor/libgit2/src/{mailmap.h → libgit2/mailmap.h} +0 -0
- /data/vendor/libgit2/src/{merge_driver.h → libgit2/merge_driver.h} +0 -0
- /data/vendor/libgit2/src/{notes.h → libgit2/notes.h} +0 -0
- /data/vendor/libgit2/src/{object_api.c → libgit2/object_api.c} +0 -0
- /data/vendor/libgit2/src/{patch_parse.h → libgit2/patch_parse.h} +0 -0
- /data/vendor/libgit2/src/{refdb.c → libgit2/refdb.c} +0 -0
- /data/vendor/libgit2/src/{refdb.h → libgit2/refdb.h} +0 -0
- /data/vendor/libgit2/src/{repo_template.h → libgit2/repo_template.h} +0 -0
- /data/vendor/libgit2/src/{status.h → libgit2/status.h} +0 -0
- /data/vendor/libgit2/src/{stream.h → libgit2/stream.h} +0 -0
- /data/vendor/libgit2/src/{streams → libgit2/streams}/mbedtls.h +0 -0
- /data/vendor/libgit2/src/{streams → libgit2/streams}/openssl_legacy.c +0 -0
- /data/vendor/libgit2/src/{streams → libgit2/streams}/openssl_legacy.h +0 -0
- /data/vendor/libgit2/src/{streams → libgit2/streams}/registry.c +0 -0
- /data/vendor/libgit2/src/{streams → libgit2/streams}/registry.h +0 -0
- /data/vendor/libgit2/src/{streams → libgit2/streams}/stransport.h +0 -0
- /data/vendor/libgit2/src/{streams → libgit2/streams}/tls.h +0 -0
- /data/vendor/libgit2/src/{transports → libgit2/transports}/credential_helpers.c +0 -0
- /data/vendor/libgit2/src/{userdiff.h → libgit2/userdiff.h} +0 -0
- /data/vendor/libgit2/src/{bitvec.h → util/bitvec.h} +0 -0
- /data/vendor/libgit2/src/{hash/sha1 → util/hash}/sha1dc/sha1.h +0 -0
- /data/vendor/libgit2/src/{hash/sha1 → util/hash}/sha1dc/ubc_check.c +0 -0
- /data/vendor/libgit2/src/{hash/sha1 → util/hash}/sha1dc/ubc_check.h +0 -0
- /data/vendor/libgit2/src/{pqueue.c → util/pqueue.c} +0 -0
- /data/vendor/libgit2/src/{strnlen.h → util/strnlen.h} +0 -0
- /data/vendor/libgit2/src/{unix → util/unix}/pthread.h +0 -0
- /data/vendor/libgit2/src/{varint.c → util/varint.c} +0 -0
- /data/vendor/libgit2/src/{wildmatch.c → util/wildmatch.c} +0 -0
- /data/vendor/libgit2/src/{win32 → util/win32}/dir.c +0 -0
- /data/vendor/libgit2/src/{win32 → util/win32}/mingw-compat.h +0 -0
- /data/vendor/libgit2/src/{win32 → util/win32}/msvc-compat.h +0 -0
- /data/vendor/libgit2/src/{win32 → util/win32}/reparse.h +0 -0
- /data/vendor/libgit2/src/{win32 → util/win32}/thread.c +0 -0
- /data/vendor/libgit2/src/{win32 → util/win32}/version.h +0 -0
- /data/vendor/libgit2/src/{win32 → util/win32}/w32_common.h +0 -0
- /data/vendor/libgit2/src/{win32 → util/win32}/win32-compat.h +0 -0
@@ -1,30 +1,30 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
1
|
+
/* The latest version of this library is available on GitHub;
|
2
|
+
* https://github.com/sheredom/utf8.h */
|
3
|
+
|
4
|
+
/* This is free and unencumbered software released into the public domain.
|
5
|
+
*
|
6
|
+
* Anyone is free to copy, modify, publish, use, compile, sell, or
|
7
|
+
* distribute this software, either in source code form or as a compiled
|
8
|
+
* binary, for any purpose, commercial or non-commercial, and by any
|
9
|
+
* means.
|
10
|
+
*
|
11
|
+
* In jurisdictions that recognize copyright laws, the author or authors
|
12
|
+
* of this software dedicate any and all copyright interest in the
|
13
|
+
* software to the public domain. We make this dedication for the benefit
|
14
|
+
* of the public at large and to the detriment of our heirs and
|
15
|
+
* successors. We intend this dedication to be an overt act of
|
16
|
+
* relinquishment in perpetuity of all present and future rights to this
|
17
|
+
* software under copyright law.
|
18
|
+
*
|
19
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
20
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
21
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
22
|
+
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
23
|
+
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
24
|
+
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
25
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
26
|
+
*
|
27
|
+
* For more information, please refer to <http://unlicense.org/> */
|
28
28
|
|
29
29
|
#ifndef SHEREDOM_UTF8_H_INCLUDED
|
30
30
|
#define SHEREDOM_UTF8_H_INCLUDED
|
@@ -32,10 +32,25 @@
|
|
32
32
|
#if defined(_MSC_VER)
|
33
33
|
#pragma warning(push)
|
34
34
|
|
35
|
-
|
35
|
+
/* disable warning: no function prototype given: converting '()' to '(void)' */
|
36
|
+
#pragma warning(disable : 4255)
|
37
|
+
|
38
|
+
/* disable warning: '__cplusplus' is not defined as a preprocessor macro,
|
39
|
+
* replacing with '0' for '#if/#elif' */
|
40
|
+
#pragma warning(disable : 4668)
|
41
|
+
|
42
|
+
/* disable warning: bytes padding added after construct */
|
36
43
|
#pragma warning(disable : 4820)
|
37
44
|
#endif
|
38
45
|
|
46
|
+
#if defined(__cplusplus)
|
47
|
+
#if defined(_MSC_VER)
|
48
|
+
#define utf8_cplusplus _MSVC_LANG
|
49
|
+
#else
|
50
|
+
#define utf8_cplusplus __cplusplus
|
51
|
+
#endif
|
52
|
+
#endif
|
53
|
+
|
39
54
|
#include <stddef.h>
|
40
55
|
#include <stdlib.h>
|
41
56
|
|
@@ -43,7 +58,7 @@
|
|
43
58
|
#pragma warning(pop)
|
44
59
|
#endif
|
45
60
|
|
46
|
-
#if defined(_MSC_VER)
|
61
|
+
#if defined(_MSC_VER) && (_MSC_VER < 1920)
|
47
62
|
typedef __int32 utf8_int32_t;
|
48
63
|
#else
|
49
64
|
#include <stdint.h>
|
@@ -54,411 +69,516 @@ typedef int32_t utf8_int32_t;
|
|
54
69
|
#pragma clang diagnostic push
|
55
70
|
#pragma clang diagnostic ignored "-Wold-style-cast"
|
56
71
|
#pragma clang diagnostic ignored "-Wcast-qual"
|
72
|
+
|
73
|
+
#if __has_warning("-Wunsafe-buffer-usage")
|
74
|
+
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
|
75
|
+
#endif
|
57
76
|
#endif
|
58
77
|
|
59
|
-
#ifdef
|
78
|
+
#ifdef utf8_cplusplus
|
60
79
|
extern "C" {
|
61
80
|
#endif
|
62
81
|
|
63
|
-
#if defined(
|
64
|
-
#define
|
65
|
-
#
|
66
|
-
#define
|
67
|
-
#
|
68
|
-
|
82
|
+
#if defined(__TINYC__)
|
83
|
+
#define UTF8_ATTRIBUTE(a) __attribute((a))
|
84
|
+
#else
|
85
|
+
#define UTF8_ATTRIBUTE(a) __attribute__((a))
|
86
|
+
#endif
|
87
|
+
|
88
|
+
#if defined(_MSC_VER)
|
69
89
|
#define utf8_nonnull
|
70
90
|
#define utf8_pure
|
71
91
|
#define utf8_restrict __restrict
|
72
92
|
#define utf8_weak __inline
|
93
|
+
#elif defined(__clang__) || defined(__GNUC__)
|
94
|
+
#define utf8_nonnull UTF8_ATTRIBUTE(nonnull)
|
95
|
+
#define utf8_pure UTF8_ATTRIBUTE(pure)
|
96
|
+
#define utf8_restrict __restrict__
|
97
|
+
#define utf8_weak UTF8_ATTRIBUTE(weak)
|
98
|
+
#elif defined(__TINYC__)
|
99
|
+
#define utf8_nonnull UTF8_ATTRIBUTE(nonnull)
|
100
|
+
#define utf8_pure UTF8_ATTRIBUTE(pure)
|
101
|
+
#define utf8_restrict
|
102
|
+
#define utf8_weak UTF8_ATTRIBUTE(weak)
|
73
103
|
#else
|
74
|
-
#error Non clang, non gcc, non MSVC compiler found!
|
104
|
+
#error Non clang, non gcc, non MSVC, non tcc compiler found!
|
75
105
|
#endif
|
76
106
|
|
77
|
-
#ifdef
|
107
|
+
#ifdef utf8_cplusplus
|
78
108
|
#define utf8_null NULL
|
79
109
|
#else
|
80
110
|
#define utf8_null 0
|
81
111
|
#endif
|
82
112
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
// Find the first match of the utf8 codepoint chr in the utf8 string src.
|
93
|
-
utf8_nonnull utf8_pure utf8_weak void *utf8chr(const void *src,
|
94
|
-
utf8_int32_t chr);
|
95
|
-
|
96
|
-
// Return less than 0, 0, greater than 0 if src1 < src2,
|
97
|
-
// src1 == src2, src1 > src2 respectively.
|
98
|
-
utf8_nonnull utf8_pure utf8_weak int utf8cmp(const void *src1,
|
99
|
-
const void *src2);
|
100
|
-
|
101
|
-
// Copy the utf8 string src onto the memory allocated in dst.
|
102
|
-
utf8_nonnull utf8_weak void *utf8cpy(void *utf8_restrict dst,
|
103
|
-
const void *utf8_restrict src);
|
104
|
-
|
105
|
-
// Number of utf8 codepoints in the utf8 string src that consists entirely
|
106
|
-
// of utf8 codepoints not from the utf8 string reject.
|
107
|
-
utf8_nonnull utf8_pure utf8_weak size_t utf8cspn(const void *src,
|
108
|
-
const void *reject);
|
109
|
-
|
110
|
-
// Duplicate the utf8 string src by getting its size, malloc'ing a new buffer
|
111
|
-
// copying over the data, and returning that. Or 0 if malloc failed.
|
112
|
-
utf8_nonnull utf8_weak void *utf8dup(const void *src);
|
113
|
-
|
114
|
-
// Number of utf8 codepoints in the utf8 string str,
|
115
|
-
// excluding the null terminating byte.
|
116
|
-
utf8_nonnull utf8_pure utf8_weak size_t utf8len(const void *str);
|
117
|
-
|
118
|
-
// Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
|
119
|
-
// src2 respectively, case insensitive. Checking at most n bytes of each utf8
|
120
|
-
// string.
|
121
|
-
utf8_nonnull utf8_pure utf8_weak int utf8ncasecmp(const void *src1,
|
122
|
-
const void *src2, size_t n);
|
123
|
-
|
124
|
-
// Append the utf8 string src onto the utf8 string dst,
|
125
|
-
// writing at most n+1 bytes. Can produce an invalid utf8
|
126
|
-
// string if n falls partway through a utf8 codepoint.
|
127
|
-
utf8_nonnull utf8_weak void *utf8ncat(void *utf8_restrict dst,
|
128
|
-
const void *utf8_restrict src, size_t n);
|
129
|
-
|
130
|
-
// Return less than 0, 0, greater than 0 if src1 < src2,
|
131
|
-
// src1 == src2, src1 > src2 respectively. Checking at most n
|
132
|
-
// bytes of each utf8 string.
|
133
|
-
utf8_nonnull utf8_pure utf8_weak int utf8ncmp(const void *src1,
|
134
|
-
const void *src2, size_t n);
|
135
|
-
|
136
|
-
// Copy the utf8 string src onto the memory allocated in dst.
|
137
|
-
// Copies at most n bytes. If there is no terminating null byte in
|
138
|
-
// the first n bytes of src, the string placed into dst will not be
|
139
|
-
// null-terminated. If the size (in bytes) of src is less than n,
|
140
|
-
// extra null terminating bytes are appended to dst such that at
|
141
|
-
// total of n bytes are written. Can produce an invalid utf8
|
142
|
-
// string if n falls partway through a utf8 codepoint.
|
143
|
-
utf8_nonnull utf8_weak void *utf8ncpy(void *utf8_restrict dst,
|
144
|
-
const void *utf8_restrict src, size_t n);
|
145
|
-
|
146
|
-
// Similar to utf8dup, except that at most n bytes of src are copied. If src is
|
147
|
-
// longer than n, only n bytes are copied and a null byte is added.
|
148
|
-
//
|
149
|
-
// Returns a new string if successful, 0 otherwise
|
150
|
-
utf8_nonnull utf8_weak void *utf8ndup(const void *src, size_t n);
|
151
|
-
|
152
|
-
// Locates the first occurence in the utf8 string str of any byte in the
|
153
|
-
// utf8 string accept, or 0 if no match was found.
|
154
|
-
utf8_nonnull utf8_pure utf8_weak void *utf8pbrk(const void *str,
|
155
|
-
const void *accept);
|
156
|
-
|
157
|
-
// Find the last match of the utf8 codepoint chr in the utf8 string src.
|
158
|
-
utf8_nonnull utf8_pure utf8_weak void *utf8rchr(const void *src, int chr);
|
159
|
-
|
160
|
-
// Number of bytes in the utf8 string str,
|
161
|
-
// including the null terminating byte.
|
162
|
-
utf8_nonnull utf8_pure utf8_weak size_t utf8size(const void *str);
|
163
|
-
|
164
|
-
// Number of utf8 codepoints in the utf8 string src that consists entirely
|
165
|
-
// of utf8 codepoints from the utf8 string accept.
|
166
|
-
utf8_nonnull utf8_pure utf8_weak size_t utf8spn(const void *src,
|
167
|
-
const void *accept);
|
168
|
-
|
169
|
-
// The position of the utf8 string needle in the utf8 string haystack.
|
170
|
-
utf8_nonnull utf8_pure utf8_weak void *utf8str(const void *haystack,
|
171
|
-
const void *needle);
|
172
|
-
|
173
|
-
// The position of the utf8 string needle in the utf8 string haystack, case
|
174
|
-
// insensitive.
|
175
|
-
utf8_nonnull utf8_pure utf8_weak void *utf8casestr(const void *haystack,
|
176
|
-
const void *needle);
|
177
|
-
|
178
|
-
// Return 0 on success, or the position of the invalid
|
179
|
-
// utf8 codepoint on failure.
|
180
|
-
utf8_nonnull utf8_pure utf8_weak void *utf8valid(const void *str);
|
181
|
-
|
182
|
-
// Sets out_codepoint to the next utf8 codepoint in str, and returns the address
|
183
|
-
// of the utf8 codepoint after the current one in str.
|
184
|
-
utf8_nonnull utf8_weak void *
|
185
|
-
utf8codepoint(const void *utf8_restrict str,
|
186
|
-
utf8_int32_t *utf8_restrict out_codepoint);
|
187
|
-
|
188
|
-
// Returns the size of the given codepoint in bytes.
|
189
|
-
utf8_weak size_t utf8codepointsize(utf8_int32_t chr);
|
190
|
-
|
191
|
-
// Write a codepoint to the given string, and return the address to the next
|
192
|
-
// place after the written codepoint. Pass how many bytes left in the buffer to
|
193
|
-
// n. If there is not enough space for the codepoint, this function returns
|
194
|
-
// null.
|
195
|
-
utf8_nonnull utf8_weak void *utf8catcodepoint(void *utf8_restrict str,
|
196
|
-
utf8_int32_t chr, size_t n);
|
197
|
-
|
198
|
-
// Returns 1 if the given character is lowercase, or 0 if it is not.
|
199
|
-
utf8_weak int utf8islower(utf8_int32_t chr);
|
200
|
-
|
201
|
-
// Returns 1 if the given character is uppercase, or 0 if it is not.
|
202
|
-
utf8_weak int utf8isupper(utf8_int32_t chr);
|
203
|
-
|
204
|
-
// Transform the given string into all lowercase codepoints.
|
205
|
-
utf8_nonnull utf8_weak void utf8lwr(void *utf8_restrict str);
|
113
|
+
#if defined(utf8_cplusplus) && utf8_cplusplus >= 201402L && (!defined(_MSC_VER) || (defined(_MSC_VER) && _MSC_VER >= 1910))
|
114
|
+
#define utf8_constexpr14 constexpr
|
115
|
+
#define utf8_constexpr14_impl constexpr
|
116
|
+
#else
|
117
|
+
/* constexpr and weak are incompatible. so only enable one of them */
|
118
|
+
#define utf8_constexpr14 utf8_weak
|
119
|
+
#define utf8_constexpr14_impl
|
120
|
+
#endif
|
206
121
|
|
207
|
-
|
208
|
-
|
122
|
+
#if defined(utf8_cplusplus) && utf8_cplusplus >= 202002L
|
123
|
+
using utf8_int8_t = char8_t; /* Introduced in C++20 */
|
124
|
+
#else
|
125
|
+
typedef char utf8_int8_t;
|
126
|
+
#endif
|
209
127
|
|
210
|
-
|
211
|
-
|
128
|
+
/* Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
|
129
|
+
* src2 respectively, case insensitive. */
|
130
|
+
utf8_constexpr14 utf8_nonnull utf8_pure int
|
131
|
+
utf8casecmp(const utf8_int8_t *src1, const utf8_int8_t *src2);
|
132
|
+
|
133
|
+
/* Append the utf8 string src onto the utf8 string dst. */
|
134
|
+
utf8_nonnull utf8_weak utf8_int8_t *
|
135
|
+
utf8cat(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src);
|
136
|
+
|
137
|
+
/* Find the first match of the utf8 codepoint chr in the utf8 string src. */
|
138
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
139
|
+
utf8chr(const utf8_int8_t *src, utf8_int32_t chr);
|
140
|
+
|
141
|
+
/* Return less than 0, 0, greater than 0 if src1 < src2,
|
142
|
+
* src1 == src2, src1 > src2 respectively. */
|
143
|
+
utf8_constexpr14 utf8_nonnull utf8_pure int utf8cmp(const utf8_int8_t *src1,
|
144
|
+
const utf8_int8_t *src2);
|
145
|
+
|
146
|
+
/* Copy the utf8 string src onto the memory allocated in dst. */
|
147
|
+
utf8_nonnull utf8_weak utf8_int8_t *
|
148
|
+
utf8cpy(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src);
|
149
|
+
|
150
|
+
/* Number of utf8 codepoints in the utf8 string src that consists entirely
|
151
|
+
* of utf8 codepoints not from the utf8 string reject. */
|
152
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t
|
153
|
+
utf8cspn(const utf8_int8_t *src, const utf8_int8_t *reject);
|
154
|
+
|
155
|
+
/* Duplicate the utf8 string src by getting its size, malloc'ing a new buffer
|
156
|
+
* copying over the data, and returning that. Or 0 if malloc failed. */
|
157
|
+
utf8_weak utf8_int8_t *utf8dup(const utf8_int8_t *src);
|
158
|
+
|
159
|
+
/* Number of utf8 codepoints in the utf8 string str,
|
160
|
+
* excluding the null terminating byte. */
|
161
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8len(const utf8_int8_t *str);
|
162
|
+
|
163
|
+
/* Similar to utf8len, except that only at most n bytes of src are looked. */
|
164
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8nlen(const utf8_int8_t *str,
|
165
|
+
size_t n);
|
166
|
+
|
167
|
+
/* Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
|
168
|
+
* src2 respectively, case insensitive. Checking at most n bytes of each utf8
|
169
|
+
* string. */
|
170
|
+
utf8_constexpr14 utf8_nonnull utf8_pure int
|
171
|
+
utf8ncasecmp(const utf8_int8_t *src1, const utf8_int8_t *src2, size_t n);
|
172
|
+
|
173
|
+
/* Append the utf8 string src onto the utf8 string dst,
|
174
|
+
* writing at most n+1 bytes. Can produce an invalid utf8
|
175
|
+
* string if n falls partway through a utf8 codepoint. */
|
176
|
+
utf8_nonnull utf8_weak utf8_int8_t *
|
177
|
+
utf8ncat(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src,
|
178
|
+
size_t n);
|
179
|
+
|
180
|
+
/* Return less than 0, 0, greater than 0 if src1 < src2,
|
181
|
+
* src1 == src2, src1 > src2 respectively. Checking at most n
|
182
|
+
* bytes of each utf8 string. */
|
183
|
+
utf8_constexpr14 utf8_nonnull utf8_pure int
|
184
|
+
utf8ncmp(const utf8_int8_t *src1, const utf8_int8_t *src2, size_t n);
|
185
|
+
|
186
|
+
/* Copy the utf8 string src onto the memory allocated in dst.
|
187
|
+
* Copies at most n bytes. If n falls partway through a utf8
|
188
|
+
* codepoint, or if dst doesn't have enough room for a null
|
189
|
+
* terminator, the final string will be cut short to preserve
|
190
|
+
* utf8 validity. */
|
191
|
+
|
192
|
+
utf8_nonnull utf8_weak utf8_int8_t *
|
193
|
+
utf8ncpy(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src,
|
194
|
+
size_t n);
|
195
|
+
|
196
|
+
/* Similar to utf8dup, except that at most n bytes of src are copied. If src is
|
197
|
+
* longer than n, only n bytes are copied and a null byte is added.
|
198
|
+
*
|
199
|
+
* Returns a new string if successful, 0 otherwise */
|
200
|
+
utf8_weak utf8_int8_t *utf8ndup(const utf8_int8_t *src, size_t n);
|
201
|
+
|
202
|
+
/* Locates the first occurrence in the utf8 string str of any byte in the
|
203
|
+
* utf8 string accept, or 0 if no match was found. */
|
204
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
205
|
+
utf8pbrk(const utf8_int8_t *str, const utf8_int8_t *accept);
|
206
|
+
|
207
|
+
/* Find the last match of the utf8 codepoint chr in the utf8 string src. */
|
208
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
209
|
+
utf8rchr(const utf8_int8_t *src, int chr);
|
210
|
+
|
211
|
+
/* Number of bytes in the utf8 string str,
|
212
|
+
* including the null terminating byte. */
|
213
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8size(const utf8_int8_t *str);
|
214
|
+
|
215
|
+
/* Similar to utf8size, except that the null terminating byte is excluded. */
|
216
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t
|
217
|
+
utf8size_lazy(const utf8_int8_t *str);
|
218
|
+
|
219
|
+
/* Similar to utf8size, except that only at most n bytes of src are looked and
|
220
|
+
* the null terminating byte is excluded. */
|
221
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t
|
222
|
+
utf8nsize_lazy(const utf8_int8_t *str, size_t n);
|
223
|
+
|
224
|
+
/* Number of utf8 codepoints in the utf8 string src that consists entirely
|
225
|
+
* of utf8 codepoints from the utf8 string accept. */
|
226
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t
|
227
|
+
utf8spn(const utf8_int8_t *src, const utf8_int8_t *accept);
|
228
|
+
|
229
|
+
/* The position of the utf8 string needle in the utf8 string haystack. */
|
230
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
231
|
+
utf8str(const utf8_int8_t *haystack, const utf8_int8_t *needle);
|
232
|
+
|
233
|
+
/* The position of the utf8 string needle in the utf8 string haystack, case
|
234
|
+
* insensitive. */
|
235
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
236
|
+
utf8casestr(const utf8_int8_t *haystack, const utf8_int8_t *needle);
|
237
|
+
|
238
|
+
/* Return 0 on success, or the position of the invalid
|
239
|
+
* utf8 codepoint on failure. */
|
240
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
241
|
+
utf8valid(const utf8_int8_t *str);
|
242
|
+
|
243
|
+
/* Similar to utf8valid, except that only at most n bytes of src are looked. */
|
244
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
245
|
+
utf8nvalid(const utf8_int8_t *str, size_t n);
|
246
|
+
|
247
|
+
/* Given a null-terminated string, makes the string valid by replacing invalid
|
248
|
+
* codepoints with a 1-byte replacement. Returns 0 on success. */
|
249
|
+
utf8_nonnull utf8_weak int utf8makevalid(utf8_int8_t *str,
|
250
|
+
const utf8_int32_t replacement);
|
251
|
+
|
252
|
+
/* Sets out_codepoint to the current utf8 codepoint in str, and returns the
|
253
|
+
* address of the next utf8 codepoint after the current one in str. */
|
254
|
+
utf8_constexpr14 utf8_nonnull utf8_int8_t *
|
255
|
+
utf8codepoint(const utf8_int8_t *utf8_restrict str,
|
256
|
+
utf8_int32_t *utf8_restrict out_codepoint);
|
212
257
|
|
213
|
-
|
214
|
-
|
258
|
+
/* Calculates the size of the next utf8 codepoint in str. */
|
259
|
+
utf8_constexpr14 utf8_nonnull size_t
|
260
|
+
utf8codepointcalcsize(const utf8_int8_t *str);
|
261
|
+
|
262
|
+
/* Returns the size of the given codepoint in bytes. */
|
263
|
+
utf8_constexpr14 size_t utf8codepointsize(utf8_int32_t chr);
|
264
|
+
|
265
|
+
/* Write a codepoint to the given string, and return the address to the next
|
266
|
+
* place after the written codepoint. Pass how many bytes left in the buffer to
|
267
|
+
* n. If there is not enough space for the codepoint, this function returns
|
268
|
+
* null. */
|
269
|
+
utf8_nonnull utf8_weak utf8_int8_t *
|
270
|
+
utf8catcodepoint(utf8_int8_t *str, utf8_int32_t chr, size_t n);
|
271
|
+
|
272
|
+
/* Returns 1 if the given character is lowercase, or 0 if it is not. */
|
273
|
+
utf8_constexpr14 int utf8islower(utf8_int32_t chr);
|
274
|
+
|
275
|
+
/* Returns 1 if the given character is uppercase, or 0 if it is not. */
|
276
|
+
utf8_constexpr14 int utf8isupper(utf8_int32_t chr);
|
277
|
+
|
278
|
+
/* Transform the given string into all lowercase codepoints. */
|
279
|
+
utf8_nonnull utf8_weak void utf8lwr(utf8_int8_t *utf8_restrict str);
|
280
|
+
|
281
|
+
/* Transform the given string into all uppercase codepoints. */
|
282
|
+
utf8_nonnull utf8_weak void utf8upr(utf8_int8_t *utf8_restrict str);
|
283
|
+
|
284
|
+
/* Make a codepoint lower case if possible. */
|
285
|
+
utf8_constexpr14 utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp);
|
286
|
+
|
287
|
+
/* Make a codepoint upper case if possible. */
|
288
|
+
utf8_constexpr14 utf8_int32_t utf8uprcodepoint(utf8_int32_t cp);
|
289
|
+
|
290
|
+
/* Sets out_codepoint to the current utf8 codepoint in str, and returns the
|
291
|
+
* address of the previous utf8 codepoint before the current one in str. */
|
292
|
+
utf8_constexpr14 utf8_nonnull utf8_int8_t *
|
293
|
+
utf8rcodepoint(const utf8_int8_t *utf8_restrict str,
|
294
|
+
utf8_int32_t *utf8_restrict out_codepoint);
|
295
|
+
|
296
|
+
/* Duplicate the utf8 string src by getting its size, calling alloc_func_ptr to
|
297
|
+
* copy over data to a new buffer, and returning that. Or 0 if alloc_func_ptr
|
298
|
+
* returned null. */
|
299
|
+
utf8_weak utf8_int8_t *utf8dup_ex(const utf8_int8_t *src,
|
300
|
+
utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *,
|
301
|
+
size_t),
|
302
|
+
utf8_int8_t *user_data);
|
303
|
+
|
304
|
+
/* Similar to utf8dup, except that at most n bytes of src are copied. If src is
|
305
|
+
* longer than n, only n bytes are copied and a null byte is added.
|
306
|
+
*
|
307
|
+
* Returns a new string if successful, 0 otherwise. */
|
308
|
+
utf8_weak utf8_int8_t *utf8ndup_ex(const utf8_int8_t *src, size_t n,
|
309
|
+
utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *,
|
310
|
+
size_t),
|
311
|
+
utf8_int8_t *user_data);
|
215
312
|
|
216
313
|
#undef utf8_weak
|
217
314
|
#undef utf8_pure
|
218
315
|
#undef utf8_nonnull
|
219
316
|
|
220
|
-
int utf8casecmp(const
|
221
|
-
|
317
|
+
utf8_constexpr14_impl int utf8casecmp(const utf8_int8_t *src1,
|
318
|
+
const utf8_int8_t *src2) {
|
319
|
+
utf8_int32_t src1_lwr_cp = 0, src2_lwr_cp = 0, src1_upr_cp = 0,
|
320
|
+
src2_upr_cp = 0, src1_orig_cp = 0, src2_orig_cp = 0;
|
222
321
|
|
223
322
|
for (;;) {
|
224
|
-
src1 = utf8codepoint(src1, &
|
225
|
-
src2 = utf8codepoint(src2, &
|
323
|
+
src1 = utf8codepoint(src1, &src1_orig_cp);
|
324
|
+
src2 = utf8codepoint(src2, &src2_orig_cp);
|
226
325
|
|
227
|
-
|
228
|
-
|
229
|
-
|
326
|
+
/* lower the srcs if required */
|
327
|
+
src1_lwr_cp = utf8lwrcodepoint(src1_orig_cp);
|
328
|
+
src2_lwr_cp = utf8lwrcodepoint(src2_orig_cp);
|
230
329
|
|
231
|
-
|
232
|
-
|
233
|
-
|
330
|
+
/* lower the srcs if required */
|
331
|
+
src1_upr_cp = utf8uprcodepoint(src1_orig_cp);
|
332
|
+
src2_upr_cp = utf8uprcodepoint(src2_orig_cp);
|
234
333
|
|
235
|
-
|
334
|
+
/* check if the lowered codepoints match */
|
236
335
|
if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) {
|
237
336
|
return 0;
|
238
|
-
} else if (
|
337
|
+
} else if ((src1_lwr_cp == src2_lwr_cp) || (src1_upr_cp == src2_upr_cp)) {
|
239
338
|
continue;
|
240
339
|
}
|
241
340
|
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
} else if (src1_orig_cp > src2_orig_cp) {
|
246
|
-
return 1;
|
247
|
-
}
|
341
|
+
/* if they don't match, then we return the difference between the characters
|
342
|
+
*/
|
343
|
+
return src1_lwr_cp - src2_lwr_cp;
|
248
344
|
}
|
249
345
|
}
|
250
346
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
// find the null terminating byte in dst
|
347
|
+
utf8_int8_t *utf8cat(utf8_int8_t *utf8_restrict dst,
|
348
|
+
const utf8_int8_t *utf8_restrict src) {
|
349
|
+
utf8_int8_t *d = dst;
|
350
|
+
/* find the null terminating byte in dst */
|
256
351
|
while ('\0' != *d) {
|
257
352
|
d++;
|
258
353
|
}
|
259
354
|
|
260
|
-
|
261
|
-
while ('\0' != *
|
262
|
-
*d++ = *
|
355
|
+
/* overwriting the null terminating byte in dst, append src byte-by-byte */
|
356
|
+
while ('\0' != *src) {
|
357
|
+
*d++ = *src++;
|
263
358
|
}
|
264
359
|
|
265
|
-
|
360
|
+
/* write out a new null terminating byte into dst */
|
266
361
|
*d = '\0';
|
267
362
|
|
268
363
|
return dst;
|
269
364
|
}
|
270
365
|
|
271
|
-
|
272
|
-
|
366
|
+
utf8_constexpr14_impl utf8_int8_t *utf8chr(const utf8_int8_t *src,
|
367
|
+
utf8_int32_t chr) {
|
368
|
+
utf8_int8_t c[5] = {'\0', '\0', '\0', '\0', '\0'};
|
273
369
|
|
274
370
|
if (0 == chr) {
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
s++;
|
371
|
+
/* being asked to return position of null terminating byte, so
|
372
|
+
* just run s to the end, and return! */
|
373
|
+
while ('\0' != *src) {
|
374
|
+
src++;
|
280
375
|
}
|
281
|
-
return (
|
376
|
+
return (utf8_int8_t *)src;
|
282
377
|
} else if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
|
283
|
-
|
284
|
-
|
285
|
-
c[0] = (
|
378
|
+
/* 1-byte/7-bit ascii
|
379
|
+
* (0b0xxxxxxx) */
|
380
|
+
c[0] = (utf8_int8_t)chr;
|
286
381
|
} else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
|
287
|
-
|
288
|
-
|
289
|
-
c[0] = 0xc0 | (
|
290
|
-
c[1] = 0x80 | (
|
382
|
+
/* 2-byte/11-bit utf8 code point
|
383
|
+
* (0b110xxxxx 0b10xxxxxx) */
|
384
|
+
c[0] = (utf8_int8_t)(0xc0 | (utf8_int8_t)(chr >> 6));
|
385
|
+
c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
291
386
|
} else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
|
292
|
-
|
293
|
-
|
294
|
-
c[0] = 0xe0 | (
|
295
|
-
c[1] = 0x80 | (
|
296
|
-
c[2] = 0x80 | (
|
297
|
-
} else {
|
298
|
-
|
299
|
-
|
300
|
-
c[0] = 0xf0 | (
|
301
|
-
c[1] = 0x80 | (
|
302
|
-
c[2] = 0x80 | (
|
303
|
-
c[3] = 0x80 | (
|
387
|
+
/* 3-byte/16-bit utf8 code point
|
388
|
+
* (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) */
|
389
|
+
c[0] = (utf8_int8_t)(0xe0 | (utf8_int8_t)(chr >> 12));
|
390
|
+
c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
|
391
|
+
c[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
392
|
+
} else { /* if (0 == ((int)0xffe00000 & chr)) { */
|
393
|
+
/* 4-byte/21-bit utf8 code point
|
394
|
+
* (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx) */
|
395
|
+
c[0] = (utf8_int8_t)(0xf0 | (utf8_int8_t)(chr >> 18));
|
396
|
+
c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 12) & 0x3f));
|
397
|
+
c[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
|
398
|
+
c[3] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
304
399
|
}
|
305
400
|
|
306
|
-
|
307
|
-
|
308
|
-
|
401
|
+
/* we've made c into a 2 utf8 codepoint string, one for the chr we are
|
402
|
+
* seeking, another for the null terminating byte. Now use utf8str to
|
403
|
+
* search */
|
309
404
|
return utf8str(src, c);
|
310
405
|
}
|
311
406
|
|
312
|
-
int utf8cmp(const
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
while (('\0' != *s1) || ('\0' != *s2)) {
|
317
|
-
if (*s1 < *s2) {
|
407
|
+
utf8_constexpr14_impl int utf8cmp(const utf8_int8_t *src1,
|
408
|
+
const utf8_int8_t *src2) {
|
409
|
+
while (('\0' != *src1) || ('\0' != *src2)) {
|
410
|
+
if (*src1 < *src2) {
|
318
411
|
return -1;
|
319
|
-
} else if (*
|
412
|
+
} else if (*src1 > *src2) {
|
320
413
|
return 1;
|
321
414
|
}
|
322
415
|
|
323
|
-
|
324
|
-
|
416
|
+
src1++;
|
417
|
+
src2++;
|
325
418
|
}
|
326
419
|
|
327
|
-
|
420
|
+
/* both utf8 strings matched */
|
328
421
|
return 0;
|
329
422
|
}
|
330
423
|
|
331
|
-
int utf8coll(const
|
424
|
+
utf8_constexpr14_impl int utf8coll(const utf8_int8_t *src1,
|
425
|
+
const utf8_int8_t *src2);
|
332
426
|
|
333
|
-
|
334
|
-
|
335
|
-
|
427
|
+
utf8_int8_t *utf8cpy(utf8_int8_t *utf8_restrict dst,
|
428
|
+
const utf8_int8_t *utf8_restrict src) {
|
429
|
+
utf8_int8_t *d = dst;
|
336
430
|
|
337
|
-
|
338
|
-
|
339
|
-
while ('\0' != *
|
340
|
-
*d++ = *
|
431
|
+
/* overwriting anything previously in dst, write byte-by-byte
|
432
|
+
* from src */
|
433
|
+
while ('\0' != *src) {
|
434
|
+
*d++ = *src++;
|
341
435
|
}
|
342
436
|
|
343
|
-
|
437
|
+
/* append null terminating byte */
|
344
438
|
*d = '\0';
|
345
439
|
|
346
440
|
return dst;
|
347
441
|
}
|
348
442
|
|
349
|
-
size_t utf8cspn(const
|
350
|
-
|
443
|
+
utf8_constexpr14_impl size_t utf8cspn(const utf8_int8_t *src,
|
444
|
+
const utf8_int8_t *reject) {
|
351
445
|
size_t chars = 0;
|
352
446
|
|
353
|
-
while ('\0' != *
|
354
|
-
const
|
447
|
+
while ('\0' != *src) {
|
448
|
+
const utf8_int8_t *r = reject;
|
355
449
|
size_t offset = 0;
|
356
450
|
|
357
451
|
while ('\0' != *r) {
|
358
|
-
|
359
|
-
|
360
|
-
|
452
|
+
/* checking that if *r is the start of a utf8 codepoint
|
453
|
+
* (it is not 0b10xxxxxx) and we have successfully matched
|
454
|
+
* a previous character (0 < offset) - we found a match */
|
361
455
|
if ((0x80 != (0xc0 & *r)) && (0 < offset)) {
|
362
456
|
return chars;
|
363
457
|
} else {
|
364
|
-
if (*r ==
|
365
|
-
|
366
|
-
|
458
|
+
if (*r == src[offset]) {
|
459
|
+
/* part of a utf8 codepoint matched, so move our checking
|
460
|
+
* onwards to the next byte */
|
367
461
|
offset++;
|
368
462
|
r++;
|
369
463
|
} else {
|
370
|
-
|
371
|
-
|
464
|
+
/* r could be in the middle of an unmatching utf8 code point,
|
465
|
+
* so we need to march it on to the next character beginning, */
|
372
466
|
|
373
467
|
do {
|
374
468
|
r++;
|
375
469
|
} while (0x80 == (0xc0 & *r));
|
376
470
|
|
377
|
-
|
471
|
+
/* reset offset too as we found a mismatch */
|
378
472
|
offset = 0;
|
379
473
|
}
|
380
474
|
}
|
381
475
|
}
|
382
476
|
|
383
|
-
|
384
|
-
|
385
|
-
|
477
|
+
/* found a match at the end of *r, so didn't get a chance to test it */
|
478
|
+
if (0 < offset) {
|
479
|
+
return chars;
|
480
|
+
}
|
481
|
+
|
482
|
+
/* the current utf8 codepoint in src did not match reject, but src
|
483
|
+
* could have been partway through a utf8 codepoint, so we need to
|
484
|
+
* march it onto the next utf8 codepoint starting byte */
|
386
485
|
do {
|
387
|
-
|
388
|
-
} while ((0x80 == (0xc0 & *
|
486
|
+
src++;
|
487
|
+
} while ((0x80 == (0xc0 & *src)));
|
389
488
|
chars++;
|
390
489
|
}
|
391
490
|
|
392
491
|
return chars;
|
393
492
|
}
|
394
493
|
|
395
|
-
|
494
|
+
utf8_int8_t *utf8dup(const utf8_int8_t *src) {
|
495
|
+
return utf8dup_ex(src, utf8_null, utf8_null);
|
496
|
+
}
|
396
497
|
|
397
|
-
|
398
|
-
|
399
|
-
|
498
|
+
utf8_int8_t *utf8dup_ex(const utf8_int8_t *src,
|
499
|
+
utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *, size_t),
|
500
|
+
utf8_int8_t *user_data) {
|
501
|
+
utf8_int8_t *n = utf8_null;
|
400
502
|
|
401
|
-
|
503
|
+
/* figure out how many bytes (including the terminator) we need to copy first
|
504
|
+
*/
|
402
505
|
size_t bytes = utf8size(src);
|
403
506
|
|
404
|
-
|
507
|
+
if (alloc_func_ptr) {
|
508
|
+
n = alloc_func_ptr(user_data, bytes);
|
509
|
+
} else {
|
510
|
+
#if !defined(UTF8_NO_STD_MALLOC)
|
511
|
+
n = (utf8_int8_t *)malloc(bytes);
|
512
|
+
#else
|
513
|
+
return utf8_null;
|
514
|
+
#endif
|
515
|
+
}
|
405
516
|
|
406
517
|
if (utf8_null == n) {
|
407
|
-
|
518
|
+
/* out of memory so we bail */
|
408
519
|
return utf8_null;
|
409
520
|
} else {
|
410
521
|
bytes = 0;
|
411
522
|
|
412
|
-
|
413
|
-
while ('\0' !=
|
414
|
-
n[bytes] =
|
523
|
+
/* copy src byte-by-byte into our new utf8 string */
|
524
|
+
while ('\0' != src[bytes]) {
|
525
|
+
n[bytes] = src[bytes];
|
415
526
|
bytes++;
|
416
527
|
}
|
417
528
|
|
418
|
-
|
529
|
+
/* append null terminating byte */
|
419
530
|
n[bytes] = '\0';
|
420
531
|
return n;
|
421
532
|
}
|
422
533
|
}
|
423
534
|
|
424
|
-
|
535
|
+
utf8_constexpr14_impl utf8_int8_t *utf8fry(const utf8_int8_t *str);
|
536
|
+
|
537
|
+
utf8_constexpr14_impl size_t utf8len(const utf8_int8_t *str) {
|
538
|
+
return utf8nlen(str, SIZE_MAX);
|
539
|
+
}
|
425
540
|
|
426
|
-
size_t
|
427
|
-
const
|
541
|
+
utf8_constexpr14_impl size_t utf8nlen(const utf8_int8_t *str, size_t n) {
|
542
|
+
const utf8_int8_t *t = str;
|
428
543
|
size_t length = 0;
|
429
544
|
|
430
|
-
while ('\0' != *
|
431
|
-
if (0xf0 == (0xf8 & *
|
432
|
-
|
433
|
-
|
434
|
-
} else if (0xe0 == (0xf0 & *
|
435
|
-
|
436
|
-
|
437
|
-
} else if (0xc0 == (0xe0 & *
|
438
|
-
|
439
|
-
|
440
|
-
} else {
|
441
|
-
|
442
|
-
|
545
|
+
while ((size_t)(str - t) < n && '\0' != *str) {
|
546
|
+
if (0xf0 == (0xf8 & *str)) {
|
547
|
+
/* 4-byte utf8 code point (began with 0b11110xxx) */
|
548
|
+
str += 4;
|
549
|
+
} else if (0xe0 == (0xf0 & *str)) {
|
550
|
+
/* 3-byte utf8 code point (began with 0b1110xxxx) */
|
551
|
+
str += 3;
|
552
|
+
} else if (0xc0 == (0xe0 & *str)) {
|
553
|
+
/* 2-byte utf8 code point (began with 0b110xxxxx) */
|
554
|
+
str += 2;
|
555
|
+
} else { /* if (0x00 == (0x80 & *s)) { */
|
556
|
+
/* 1-byte ascii (began with 0b0xxxxxxx) */
|
557
|
+
str += 1;
|
443
558
|
}
|
444
559
|
|
445
|
-
|
446
|
-
|
560
|
+
/* no matter the bytes we marched s forward by, it was
|
561
|
+
* only 1 utf8 codepoint */
|
447
562
|
length++;
|
448
563
|
}
|
449
564
|
|
565
|
+
if ((size_t)(str - t) > n) {
|
566
|
+
length--;
|
567
|
+
}
|
450
568
|
return length;
|
451
569
|
}
|
452
570
|
|
453
|
-
int utf8ncasecmp(const
|
454
|
-
|
571
|
+
utf8_constexpr14_impl int utf8ncasecmp(const utf8_int8_t *src1,
|
572
|
+
const utf8_int8_t *src2, size_t n) {
|
573
|
+
utf8_int32_t src1_lwr_cp = 0, src2_lwr_cp = 0, src1_upr_cp = 0,
|
574
|
+
src2_upr_cp = 0, src1_orig_cp = 0, src2_orig_cp = 0;
|
455
575
|
|
456
576
|
do {
|
457
|
-
const
|
458
|
-
const
|
577
|
+
const utf8_int8_t *const s1 = src1;
|
578
|
+
const utf8_int8_t *const s2 = src2;
|
459
579
|
|
460
|
-
|
461
|
-
|
580
|
+
/* first check that we have enough bytes left in n to contain an entire
|
581
|
+
* codepoint */
|
462
582
|
if (0 == n) {
|
463
583
|
return 0;
|
464
584
|
}
|
@@ -467,10 +587,8 @@ int utf8ncasecmp(const void *src1, const void *src2, size_t n) {
|
|
467
587
|
const utf8_int32_t c1 = (0xe0 & *s1);
|
468
588
|
const utf8_int32_t c2 = (0xe0 & *s2);
|
469
589
|
|
470
|
-
if (c1
|
471
|
-
return -
|
472
|
-
} else if (c1 > c2) {
|
473
|
-
return 1;
|
590
|
+
if (c1 != c2) {
|
591
|
+
return c1 - c2;
|
474
592
|
} else {
|
475
593
|
return 0;
|
476
594
|
}
|
@@ -480,10 +598,8 @@ int utf8ncasecmp(const void *src1, const void *src2, size_t n) {
|
|
480
598
|
const utf8_int32_t c1 = (0xf0 & *s1);
|
481
599
|
const utf8_int32_t c2 = (0xf0 & *s2);
|
482
600
|
|
483
|
-
if (c1
|
484
|
-
return -
|
485
|
-
} else if (c1 > c2) {
|
486
|
-
return 1;
|
601
|
+
if (c1 != c2) {
|
602
|
+
return c1 - c2;
|
487
603
|
} else {
|
488
604
|
return 0;
|
489
605
|
}
|
@@ -493,307 +609,343 @@ int utf8ncasecmp(const void *src1, const void *src2, size_t n) {
|
|
493
609
|
const utf8_int32_t c1 = (0xf8 & *s1);
|
494
610
|
const utf8_int32_t c2 = (0xf8 & *s2);
|
495
611
|
|
496
|
-
if (c1
|
497
|
-
return -
|
498
|
-
} else if (c1 > c2) {
|
499
|
-
return 1;
|
612
|
+
if (c1 != c2) {
|
613
|
+
return c1 - c2;
|
500
614
|
} else {
|
501
615
|
return 0;
|
502
616
|
}
|
503
617
|
}
|
504
618
|
|
505
|
-
src1 = utf8codepoint(src1, &
|
506
|
-
src2 = utf8codepoint(src2, &
|
507
|
-
n -= utf8codepointsize(
|
619
|
+
src1 = utf8codepoint(src1, &src1_orig_cp);
|
620
|
+
src2 = utf8codepoint(src2, &src2_orig_cp);
|
621
|
+
n -= utf8codepointsize(src1_orig_cp);
|
508
622
|
|
509
|
-
|
510
|
-
|
511
|
-
src2_orig_cp = src2_cp;
|
623
|
+
src1_lwr_cp = utf8lwrcodepoint(src1_orig_cp);
|
624
|
+
src2_lwr_cp = utf8lwrcodepoint(src2_orig_cp);
|
512
625
|
|
513
|
-
|
514
|
-
|
515
|
-
src2_cp = utf8lwrcodepoint(src2_cp);
|
626
|
+
src1_upr_cp = utf8uprcodepoint(src1_orig_cp);
|
627
|
+
src2_upr_cp = utf8uprcodepoint(src2_orig_cp);
|
516
628
|
|
517
|
-
|
629
|
+
/* check if the lowered codepoints match */
|
518
630
|
if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) {
|
519
631
|
return 0;
|
520
|
-
} else if (
|
632
|
+
} else if ((src1_lwr_cp == src2_lwr_cp) || (src1_upr_cp == src2_upr_cp)) {
|
521
633
|
continue;
|
522
634
|
}
|
523
635
|
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
} else if (src1_orig_cp > src2_orig_cp) {
|
528
|
-
return 1;
|
529
|
-
}
|
636
|
+
/* if they don't match, then we return the difference between the characters
|
637
|
+
*/
|
638
|
+
return src1_lwr_cp - src2_lwr_cp;
|
530
639
|
} while (0 < n);
|
531
640
|
|
532
|
-
|
641
|
+
/* both utf8 strings matched */
|
533
642
|
return 0;
|
534
643
|
}
|
535
644
|
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
const char *s = (const char *)src;
|
645
|
+
utf8_int8_t *utf8ncat(utf8_int8_t *utf8_restrict dst,
|
646
|
+
const utf8_int8_t *utf8_restrict src, size_t n) {
|
647
|
+
utf8_int8_t *d = dst;
|
540
648
|
|
541
|
-
|
649
|
+
/* find the null terminating byte in dst */
|
542
650
|
while ('\0' != *d) {
|
543
651
|
d++;
|
544
652
|
}
|
545
653
|
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
*d++ = *
|
550
|
-
}
|
654
|
+
/* overwriting the null terminating byte in dst, append src byte-by-byte
|
655
|
+
* stopping if we run out of space */
|
656
|
+
while (('\0' != *src) && (0 != n--)) {
|
657
|
+
*d++ = *src++;
|
658
|
+
}
|
551
659
|
|
552
|
-
|
660
|
+
/* write out a new null terminating byte into dst */
|
553
661
|
*d = '\0';
|
554
662
|
|
555
663
|
return dst;
|
556
664
|
}
|
557
665
|
|
558
|
-
int utf8ncmp(const
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
while ((('\0' != *s1) || ('\0' != *s2)) && (0 != n--)) {
|
563
|
-
if (*s1 < *s2) {
|
666
|
+
utf8_constexpr14_impl int utf8ncmp(const utf8_int8_t *src1,
|
667
|
+
const utf8_int8_t *src2, size_t n) {
|
668
|
+
while ((0 != n--) && (('\0' != *src1) || ('\0' != *src2))) {
|
669
|
+
if (*src1 < *src2) {
|
564
670
|
return -1;
|
565
|
-
} else if (*
|
671
|
+
} else if (*src1 > *src2) {
|
566
672
|
return 1;
|
567
673
|
}
|
568
674
|
|
569
|
-
|
570
|
-
|
675
|
+
src1++;
|
676
|
+
src2++;
|
571
677
|
}
|
572
678
|
|
573
|
-
|
679
|
+
/* both utf8 strings matched */
|
574
680
|
return 0;
|
575
681
|
}
|
576
682
|
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
683
|
+
utf8_int8_t *utf8ncpy(utf8_int8_t *utf8_restrict dst,
|
684
|
+
const utf8_int8_t *utf8_restrict src, size_t n) {
|
685
|
+
utf8_int8_t *d = dst;
|
686
|
+
size_t index = 0, check_index = 0;
|
581
687
|
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
*d++ = *s++;
|
586
|
-
} while (('\0' != *s) && (0 != --n));
|
688
|
+
if (n == 0) {
|
689
|
+
return dst;
|
690
|
+
}
|
587
691
|
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
692
|
+
/* overwriting anything previously in dst, write byte-by-byte
|
693
|
+
* from src */
|
694
|
+
for (index = 0; index < n; index++) {
|
695
|
+
d[index] = src[index];
|
696
|
+
if ('\0' == src[index]) {
|
697
|
+
break;
|
698
|
+
}
|
699
|
+
}
|
700
|
+
|
701
|
+
for (check_index = index - 1;
|
702
|
+
check_index > 0 && 0x80 == (0xc0 & d[check_index]); check_index--) {
|
703
|
+
/* just moving the index */
|
704
|
+
}
|
705
|
+
|
706
|
+
if (check_index < index &&
|
707
|
+
((index - check_index) < utf8codepointcalcsize(&d[check_index]) ||
|
708
|
+
(index - check_index) == n)) {
|
709
|
+
index = check_index;
|
710
|
+
}
|
711
|
+
|
712
|
+
/* append null terminating byte */
|
713
|
+
for (; index < n; index++) {
|
714
|
+
d[index] = 0;
|
592
715
|
}
|
593
716
|
|
594
717
|
return dst;
|
595
718
|
}
|
596
719
|
|
597
|
-
|
598
|
-
|
599
|
-
|
720
|
+
utf8_int8_t *utf8ndup(const utf8_int8_t *src, size_t n) {
|
721
|
+
return utf8ndup_ex(src, n, utf8_null, utf8_null);
|
722
|
+
}
|
723
|
+
|
724
|
+
utf8_int8_t *utf8ndup_ex(const utf8_int8_t *src, size_t n,
|
725
|
+
utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *, size_t),
|
726
|
+
utf8_int8_t *user_data) {
|
727
|
+
utf8_int8_t *c = utf8_null;
|
600
728
|
size_t bytes = 0;
|
601
729
|
|
602
|
-
|
603
|
-
while ('\0' !=
|
730
|
+
/* Find the end of the string or stop when n is reached */
|
731
|
+
while ('\0' != src[bytes] && bytes < n) {
|
604
732
|
bytes++;
|
605
733
|
}
|
606
734
|
|
607
|
-
|
608
|
-
|
735
|
+
/* In case bytes is actually less than n, we need to set it
|
736
|
+
* to be used later in the copy byte by byte. */
|
609
737
|
n = bytes;
|
610
738
|
|
611
|
-
|
739
|
+
if (alloc_func_ptr) {
|
740
|
+
c = alloc_func_ptr(user_data, bytes + 1);
|
741
|
+
} else {
|
742
|
+
#if !defined(UTF8_NO_STD_MALLOC)
|
743
|
+
c = (utf8_int8_t *)malloc(bytes + 1);
|
744
|
+
#else
|
745
|
+
c = utf8_null;
|
746
|
+
#endif
|
747
|
+
}
|
748
|
+
|
612
749
|
if (utf8_null == c) {
|
613
|
-
|
750
|
+
/* out of memory so we bail */
|
614
751
|
return utf8_null;
|
615
752
|
}
|
616
753
|
|
617
754
|
bytes = 0;
|
618
755
|
|
619
|
-
|
620
|
-
while ('\0' !=
|
621
|
-
c[bytes] =
|
756
|
+
/* copy src byte-by-byte into our new utf8 string */
|
757
|
+
while ('\0' != src[bytes] && bytes < n) {
|
758
|
+
c[bytes] = src[bytes];
|
622
759
|
bytes++;
|
623
760
|
}
|
624
761
|
|
625
|
-
|
762
|
+
/* append null terminating byte */
|
626
763
|
c[bytes] = '\0';
|
627
764
|
return c;
|
628
765
|
}
|
629
766
|
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
767
|
+
utf8_constexpr14_impl utf8_int8_t *utf8rchr(const utf8_int8_t *src, int chr) {
|
768
|
+
|
769
|
+
utf8_int8_t *match = utf8_null;
|
770
|
+
utf8_int8_t c[5] = {'\0', '\0', '\0', '\0', '\0'};
|
634
771
|
|
635
772
|
if (0 == chr) {
|
636
|
-
|
637
|
-
|
638
|
-
while ('\0' != *
|
639
|
-
|
773
|
+
/* being asked to return position of null terminating byte, so
|
774
|
+
* just run s to the end, and return! */
|
775
|
+
while ('\0' != *src) {
|
776
|
+
src++;
|
640
777
|
}
|
641
|
-
return (
|
778
|
+
return (utf8_int8_t *)src;
|
642
779
|
} else if (0 == ((int)0xffffff80 & chr)) {
|
643
|
-
|
644
|
-
|
645
|
-
c[0] = (
|
780
|
+
/* 1-byte/7-bit ascii
|
781
|
+
* (0b0xxxxxxx) */
|
782
|
+
c[0] = (utf8_int8_t)chr;
|
646
783
|
} else if (0 == ((int)0xfffff800 & chr)) {
|
647
|
-
|
648
|
-
|
649
|
-
c[0] = 0xc0 | (
|
650
|
-
c[1] = 0x80 | (
|
784
|
+
/* 2-byte/11-bit utf8 code point
|
785
|
+
* (0b110xxxxx 0b10xxxxxx) */
|
786
|
+
c[0] = (utf8_int8_t)(0xc0 | (utf8_int8_t)(chr >> 6));
|
787
|
+
c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
651
788
|
} else if (0 == ((int)0xffff0000 & chr)) {
|
652
|
-
|
653
|
-
|
654
|
-
c[0] = 0xe0 | (
|
655
|
-
c[1] = 0x80 | (
|
656
|
-
c[2] = 0x80 | (
|
657
|
-
} else {
|
658
|
-
|
659
|
-
|
660
|
-
c[0] = 0xf0 | (
|
661
|
-
c[1] = 0x80 | (
|
662
|
-
c[2] = 0x80 | (
|
663
|
-
c[3] = 0x80 | (
|
789
|
+
/* 3-byte/16-bit utf8 code point
|
790
|
+
* (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) */
|
791
|
+
c[0] = (utf8_int8_t)(0xe0 | (utf8_int8_t)(chr >> 12));
|
792
|
+
c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
|
793
|
+
c[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
794
|
+
} else { /* if (0 == ((int)0xffe00000 & chr)) { */
|
795
|
+
/* 4-byte/21-bit utf8 code point
|
796
|
+
* (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx) */
|
797
|
+
c[0] = (utf8_int8_t)(0xf0 | (utf8_int8_t)(chr >> 18));
|
798
|
+
c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 12) & 0x3f));
|
799
|
+
c[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
|
800
|
+
c[3] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
664
801
|
}
|
665
802
|
|
666
|
-
|
667
|
-
|
668
|
-
|
803
|
+
/* we've created a 2 utf8 codepoint string in c that is
|
804
|
+
* the utf8 character asked for by chr, and a null
|
805
|
+
* terminating byte */
|
669
806
|
|
670
|
-
while ('\0' != *
|
807
|
+
while ('\0' != *src) {
|
671
808
|
size_t offset = 0;
|
672
809
|
|
673
|
-
while (
|
810
|
+
while ((src[offset] == c[offset]) && ('\0' != src[offset])) {
|
674
811
|
offset++;
|
675
812
|
}
|
676
813
|
|
677
814
|
if ('\0' == c[offset]) {
|
678
|
-
|
679
|
-
match =
|
680
|
-
|
815
|
+
/* we found a matching utf8 code point */
|
816
|
+
match = (utf8_int8_t *)src;
|
817
|
+
src += offset;
|
818
|
+
|
819
|
+
if ('\0' == *src) {
|
820
|
+
break;
|
821
|
+
}
|
681
822
|
} else {
|
682
|
-
|
823
|
+
src += offset;
|
683
824
|
|
684
|
-
|
685
|
-
|
686
|
-
if ('\0' != *
|
825
|
+
/* need to march s along to next utf8 codepoint start
|
826
|
+
* (the next byte that doesn't match 0b10xxxxxx) */
|
827
|
+
if ('\0' != *src) {
|
687
828
|
do {
|
688
|
-
|
689
|
-
} while (0x80 == (0xc0 & *
|
829
|
+
src++;
|
830
|
+
} while (0x80 == (0xc0 & *src));
|
690
831
|
}
|
691
832
|
}
|
692
833
|
}
|
693
834
|
|
694
|
-
|
695
|
-
return
|
835
|
+
/* return the last match we found (or 0 if no match was found) */
|
836
|
+
return match;
|
696
837
|
}
|
697
838
|
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
const char *a = (const char *)accept;
|
839
|
+
utf8_constexpr14_impl utf8_int8_t *utf8pbrk(const utf8_int8_t *str,
|
840
|
+
const utf8_int8_t *accept) {
|
841
|
+
while ('\0' != *str) {
|
842
|
+
const utf8_int8_t *a = accept;
|
703
843
|
size_t offset = 0;
|
704
844
|
|
705
845
|
while ('\0' != *a) {
|
706
|
-
|
707
|
-
|
708
|
-
|
846
|
+
/* checking that if *a is the start of a utf8 codepoint
|
847
|
+
* (it is not 0b10xxxxxx) and we have successfully matched
|
848
|
+
* a previous character (0 < offset) - we found a match */
|
709
849
|
if ((0x80 != (0xc0 & *a)) && (0 < offset)) {
|
710
|
-
return (
|
850
|
+
return (utf8_int8_t *)str;
|
711
851
|
} else {
|
712
|
-
if (*a ==
|
713
|
-
|
714
|
-
|
852
|
+
if (*a == str[offset]) {
|
853
|
+
/* part of a utf8 codepoint matched, so move our checking
|
854
|
+
* onwards to the next byte */
|
715
855
|
offset++;
|
716
856
|
a++;
|
717
857
|
} else {
|
718
|
-
|
719
|
-
|
858
|
+
/* r could be in the middle of an unmatching utf8 code point,
|
859
|
+
* so we need to march it on to the next character beginning, */
|
720
860
|
|
721
861
|
do {
|
722
862
|
a++;
|
723
863
|
} while (0x80 == (0xc0 & *a));
|
724
864
|
|
725
|
-
|
865
|
+
/* reset offset too as we found a mismatch */
|
726
866
|
offset = 0;
|
727
867
|
}
|
728
868
|
}
|
729
869
|
}
|
730
870
|
|
731
|
-
|
871
|
+
/* we found a match on the last utf8 codepoint */
|
732
872
|
if (0 < offset) {
|
733
|
-
return (
|
873
|
+
return (utf8_int8_t *)str;
|
734
874
|
}
|
735
875
|
|
736
|
-
|
737
|
-
|
738
|
-
|
876
|
+
/* the current utf8 codepoint in src did not match accept, but src
|
877
|
+
* could have been partway through a utf8 codepoint, so we need to
|
878
|
+
* march it onto the next utf8 codepoint starting byte */
|
739
879
|
do {
|
740
|
-
|
741
|
-
} while ((0x80 == (0xc0 & *
|
880
|
+
str++;
|
881
|
+
} while ((0x80 == (0xc0 & *str)));
|
742
882
|
}
|
743
883
|
|
744
884
|
return utf8_null;
|
745
885
|
}
|
746
886
|
|
747
|
-
size_t utf8size(const
|
748
|
-
|
887
|
+
utf8_constexpr14_impl size_t utf8size(const utf8_int8_t *str) {
|
888
|
+
return utf8size_lazy(str) + 1;
|
889
|
+
}
|
890
|
+
|
891
|
+
utf8_constexpr14_impl size_t utf8size_lazy(const utf8_int8_t *str) {
|
892
|
+
return utf8nsize_lazy(str, SIZE_MAX);
|
893
|
+
}
|
894
|
+
|
895
|
+
utf8_constexpr14_impl size_t utf8nsize_lazy(const utf8_int8_t *str, size_t n) {
|
749
896
|
size_t size = 0;
|
750
|
-
while ('\0' !=
|
897
|
+
while (size < n && '\0' != str[size]) {
|
751
898
|
size++;
|
752
899
|
}
|
753
|
-
|
754
|
-
// we are including the null terminating byte in the size calculation
|
755
|
-
size++;
|
756
900
|
return size;
|
757
901
|
}
|
758
902
|
|
759
|
-
size_t utf8spn(const
|
760
|
-
|
903
|
+
utf8_constexpr14_impl size_t utf8spn(const utf8_int8_t *src,
|
904
|
+
const utf8_int8_t *accept) {
|
761
905
|
size_t chars = 0;
|
762
906
|
|
763
|
-
while ('\0' != *
|
764
|
-
const
|
907
|
+
while ('\0' != *src) {
|
908
|
+
const utf8_int8_t *a = accept;
|
765
909
|
size_t offset = 0;
|
766
910
|
|
767
911
|
while ('\0' != *a) {
|
768
|
-
|
769
|
-
|
770
|
-
|
912
|
+
/* checking that if *r is the start of a utf8 codepoint
|
913
|
+
* (it is not 0b10xxxxxx) and we have successfully matched
|
914
|
+
* a previous character (0 < offset) - we found a match */
|
771
915
|
if ((0x80 != (0xc0 & *a)) && (0 < offset)) {
|
772
|
-
|
773
|
-
|
774
|
-
|
916
|
+
/* found a match, so increment the number of utf8 codepoints
|
917
|
+
* that have matched and stop checking whether any other utf8
|
918
|
+
* codepoints in a match */
|
775
919
|
chars++;
|
776
|
-
|
920
|
+
src += offset;
|
921
|
+
offset = 0;
|
777
922
|
break;
|
778
923
|
} else {
|
779
|
-
if (*a ==
|
924
|
+
if (*a == src[offset]) {
|
780
925
|
offset++;
|
781
926
|
a++;
|
782
927
|
} else {
|
783
|
-
|
784
|
-
|
928
|
+
/* a could be in the middle of an unmatching utf8 codepoint,
|
929
|
+
* so we need to march it on to the next character beginning, */
|
785
930
|
do {
|
786
931
|
a++;
|
787
932
|
} while (0x80 == (0xc0 & *a));
|
788
933
|
|
789
|
-
|
934
|
+
/* reset offset too as we found a mismatch */
|
790
935
|
offset = 0;
|
791
936
|
}
|
792
937
|
}
|
793
938
|
}
|
794
939
|
|
795
|
-
|
796
|
-
|
940
|
+
/* found a match at the end of *a, so didn't get a chance to test it */
|
941
|
+
if (0 < offset) {
|
942
|
+
chars++;
|
943
|
+
src += offset;
|
944
|
+
continue;
|
945
|
+
}
|
946
|
+
|
947
|
+
/* if a got to its terminating null byte, then we didn't find a match.
|
948
|
+
* Return the current number of matched utf8 codepoints */
|
797
949
|
if ('\0' == *a) {
|
798
950
|
return chars;
|
799
951
|
}
|
@@ -802,302 +954,405 @@ size_t utf8spn(const void *src, const void *accept) {
|
|
802
954
|
return chars;
|
803
955
|
}
|
804
956
|
|
805
|
-
|
806
|
-
|
957
|
+
utf8_constexpr14_impl utf8_int8_t *utf8str(const utf8_int8_t *haystack,
|
958
|
+
const utf8_int8_t *needle) {
|
959
|
+
utf8_int32_t throwaway_codepoint = 0;
|
807
960
|
|
808
|
-
|
809
|
-
|
810
|
-
if ('\0' == *
|
811
|
-
return (
|
961
|
+
/* if needle has no utf8 codepoints before the null terminating
|
962
|
+
* byte then return haystack */
|
963
|
+
if ('\0' == *needle) {
|
964
|
+
return (utf8_int8_t *)haystack;
|
812
965
|
}
|
813
966
|
|
814
|
-
while ('\0' != *
|
815
|
-
const
|
816
|
-
const
|
967
|
+
while ('\0' != *haystack) {
|
968
|
+
const utf8_int8_t *maybeMatch = haystack;
|
969
|
+
const utf8_int8_t *n = needle;
|
817
970
|
|
818
|
-
while (*
|
971
|
+
while (*haystack == *n && (*haystack != '\0' && *n != '\0')) {
|
819
972
|
n++;
|
820
|
-
|
973
|
+
haystack++;
|
821
974
|
}
|
822
975
|
|
823
976
|
if ('\0' == *n) {
|
824
|
-
|
825
|
-
|
826
|
-
return (
|
977
|
+
/* we found the whole utf8 string for needle in haystack at
|
978
|
+
* maybeMatch, so return it */
|
979
|
+
return (utf8_int8_t *)maybeMatch;
|
827
980
|
} else {
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
h++;
|
833
|
-
} while (0x80 == (0xc0 & *h));
|
834
|
-
}
|
981
|
+
/* h could be in the middle of an unmatching utf8 codepoint,
|
982
|
+
* so we need to march it on to the next character beginning
|
983
|
+
* starting from the current character */
|
984
|
+
haystack = utf8codepoint(maybeMatch, &throwaway_codepoint);
|
835
985
|
}
|
836
986
|
}
|
837
987
|
|
838
|
-
|
988
|
+
/* no match */
|
839
989
|
return utf8_null;
|
840
990
|
}
|
841
991
|
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
return (void *)haystack;
|
992
|
+
utf8_constexpr14_impl utf8_int8_t *utf8casestr(const utf8_int8_t *haystack,
|
993
|
+
const utf8_int8_t *needle) {
|
994
|
+
/* if needle has no utf8 codepoints before the null terminating
|
995
|
+
* byte then return haystack */
|
996
|
+
if ('\0' == *needle) {
|
997
|
+
return (utf8_int8_t *)haystack;
|
849
998
|
}
|
850
999
|
|
851
1000
|
for (;;) {
|
852
|
-
const
|
853
|
-
const
|
854
|
-
utf8_int32_t h_cp, n_cp;
|
1001
|
+
const utf8_int8_t *maybeMatch = haystack;
|
1002
|
+
const utf8_int8_t *n = needle;
|
1003
|
+
utf8_int32_t h_cp = 0, n_cp = 0;
|
855
1004
|
|
856
|
-
|
1005
|
+
/* Get the next code point and track it */
|
1006
|
+
const utf8_int8_t *nextH = haystack = utf8codepoint(haystack, &h_cp);
|
857
1007
|
n = utf8codepoint(n, &n_cp);
|
858
1008
|
|
859
1009
|
while ((0 != h_cp) && (0 != n_cp)) {
|
860
1010
|
h_cp = utf8lwrcodepoint(h_cp);
|
861
1011
|
n_cp = utf8lwrcodepoint(n_cp);
|
862
1012
|
|
863
|
-
|
1013
|
+
/* if we find a mismatch, bail out! */
|
864
1014
|
if (h_cp != n_cp) {
|
865
1015
|
break;
|
866
1016
|
}
|
867
1017
|
|
868
|
-
|
1018
|
+
haystack = utf8codepoint(haystack, &h_cp);
|
869
1019
|
n = utf8codepoint(n, &n_cp);
|
870
1020
|
}
|
871
1021
|
|
872
1022
|
if (0 == n_cp) {
|
873
|
-
|
874
|
-
|
875
|
-
return (
|
1023
|
+
/* we found the whole utf8 string for needle in haystack at
|
1024
|
+
* maybeMatch, so return it */
|
1025
|
+
return (utf8_int8_t *)maybeMatch;
|
876
1026
|
}
|
877
1027
|
|
878
1028
|
if (0 == h_cp) {
|
879
|
-
|
1029
|
+
/* no match */
|
880
1030
|
return utf8_null;
|
881
1031
|
}
|
1032
|
+
|
1033
|
+
/* Roll back to the next code point in the haystack to test */
|
1034
|
+
haystack = nextH;
|
882
1035
|
}
|
883
1036
|
}
|
884
1037
|
|
885
|
-
|
886
|
-
|
1038
|
+
utf8_constexpr14_impl utf8_int8_t *utf8valid(const utf8_int8_t *str) {
|
1039
|
+
return utf8nvalid(str, SIZE_MAX);
|
1040
|
+
}
|
1041
|
+
|
1042
|
+
utf8_constexpr14_impl utf8_int8_t *utf8nvalid(const utf8_int8_t *str,
|
1043
|
+
size_t n) {
|
1044
|
+
const utf8_int8_t *t = str;
|
1045
|
+
size_t consumed = 0;
|
1046
|
+
|
1047
|
+
while ((void)(consumed = (size_t)(str - t)), consumed < n && '\0' != *str) {
|
1048
|
+
const size_t remaining = n - consumed;
|
1049
|
+
|
1050
|
+
if (0xf0 == (0xf8 & *str)) {
|
1051
|
+
/* ensure that there's 4 bytes or more remaining */
|
1052
|
+
if (remaining < 4) {
|
1053
|
+
return (utf8_int8_t *)str;
|
1054
|
+
}
|
1055
|
+
|
1056
|
+
/* ensure each of the 3 following bytes in this 4-byte
|
1057
|
+
* utf8 codepoint began with 0b10xxxxxx */
|
1058
|
+
if ((0x80 != (0xc0 & str[1])) || (0x80 != (0xc0 & str[2])) ||
|
1059
|
+
(0x80 != (0xc0 & str[3]))) {
|
1060
|
+
return (utf8_int8_t *)str;
|
1061
|
+
}
|
1062
|
+
|
1063
|
+
/* ensure that our utf8 codepoint ended after 4 bytes */
|
1064
|
+
if ((remaining != 4) && (0x80 == (0xc0 & str[4]))) {
|
1065
|
+
return (utf8_int8_t *)str;
|
1066
|
+
}
|
887
1067
|
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
(0x80 != (0xc0 & s[3]))) {
|
894
|
-
return (void *)s;
|
1068
|
+
/* ensure that the top 5 bits of this 4-byte utf8
|
1069
|
+
* codepoint were not 0, as then we could have used
|
1070
|
+
* one of the smaller encodings */
|
1071
|
+
if ((0 == (0x07 & str[0])) && (0 == (0x30 & str[1]))) {
|
1072
|
+
return (utf8_int8_t *)str;
|
895
1073
|
}
|
896
1074
|
|
897
|
-
|
898
|
-
|
899
|
-
|
1075
|
+
/* 4-byte utf8 code point (began with 0b11110xxx) */
|
1076
|
+
str += 4;
|
1077
|
+
} else if (0xe0 == (0xf0 & *str)) {
|
1078
|
+
/* ensure that there's 3 bytes or more remaining */
|
1079
|
+
if (remaining < 3) {
|
1080
|
+
return (utf8_int8_t *)str;
|
900
1081
|
}
|
901
1082
|
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
return (void *)s;
|
1083
|
+
/* ensure each of the 2 following bytes in this 3-byte
|
1084
|
+
* utf8 codepoint began with 0b10xxxxxx */
|
1085
|
+
if ((0x80 != (0xc0 & str[1])) || (0x80 != (0xc0 & str[2]))) {
|
1086
|
+
return (utf8_int8_t *)str;
|
907
1087
|
}
|
908
1088
|
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
// ensure each of the 2 following bytes in this 3-byte
|
913
|
-
// utf8 codepoint began with 0b10xxxxxx
|
914
|
-
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
|
915
|
-
return (void *)s;
|
1089
|
+
/* ensure that our utf8 codepoint ended after 3 bytes */
|
1090
|
+
if ((remaining != 3) && (0x80 == (0xc0 & str[3]))) {
|
1091
|
+
return (utf8_int8_t *)str;
|
916
1092
|
}
|
917
1093
|
|
918
|
-
|
919
|
-
|
920
|
-
|
1094
|
+
/* ensure that the top 5 bits of this 3-byte utf8
|
1095
|
+
* codepoint were not 0, as then we could have used
|
1096
|
+
* one of the smaller encodings */
|
1097
|
+
if ((0 == (0x0f & str[0])) && (0 == (0x20 & str[1]))) {
|
1098
|
+
return (utf8_int8_t *)str;
|
921
1099
|
}
|
922
1100
|
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
1101
|
+
/* 3-byte utf8 code point (began with 0b1110xxxx) */
|
1102
|
+
str += 3;
|
1103
|
+
} else if (0xc0 == (0xe0 & *str)) {
|
1104
|
+
/* ensure that there's 2 bytes or more remaining */
|
1105
|
+
if (remaining < 2) {
|
1106
|
+
return (utf8_int8_t *)str;
|
928
1107
|
}
|
929
1108
|
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
// utf8 codepoint began with 0b10xxxxxx
|
935
|
-
if (0x80 != (0xc0 & s[1])) {
|
936
|
-
return (void *)s;
|
1109
|
+
/* ensure the 1 following byte in this 2-byte
|
1110
|
+
* utf8 codepoint began with 0b10xxxxxx */
|
1111
|
+
if (0x80 != (0xc0 & str[1])) {
|
1112
|
+
return (utf8_int8_t *)str;
|
937
1113
|
}
|
938
1114
|
|
939
|
-
|
940
|
-
if (0x80 == (0xc0 &
|
941
|
-
return (
|
1115
|
+
/* ensure that our utf8 codepoint ended after 2 bytes */
|
1116
|
+
if ((remaining != 2) && (0x80 == (0xc0 & str[2]))) {
|
1117
|
+
return (utf8_int8_t *)str;
|
942
1118
|
}
|
943
1119
|
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
if (0 == (0x1e &
|
948
|
-
return (
|
1120
|
+
/* ensure that the top 4 bits of this 2-byte utf8
|
1121
|
+
* codepoint were not 0, as then we could have used
|
1122
|
+
* one of the smaller encodings */
|
1123
|
+
if (0 == (0x1e & str[0])) {
|
1124
|
+
return (utf8_int8_t *)str;
|
949
1125
|
}
|
950
1126
|
|
951
|
-
|
952
|
-
|
953
|
-
} else if (0x00 == (0x80 & *
|
954
|
-
|
955
|
-
|
1127
|
+
/* 2-byte utf8 code point (began with 0b110xxxxx) */
|
1128
|
+
str += 2;
|
1129
|
+
} else if (0x00 == (0x80 & *str)) {
|
1130
|
+
/* 1-byte ascii (began with 0b0xxxxxxx) */
|
1131
|
+
str += 1;
|
956
1132
|
} else {
|
957
|
-
|
958
|
-
return (
|
1133
|
+
/* we have an invalid 0b1xxxxxxx utf8 code point entry */
|
1134
|
+
return (utf8_int8_t *)str;
|
959
1135
|
}
|
960
1136
|
}
|
961
1137
|
|
962
1138
|
return utf8_null;
|
963
1139
|
}
|
964
1140
|
|
965
|
-
|
966
|
-
|
967
|
-
|
1141
|
+
int utf8makevalid(utf8_int8_t *str, const utf8_int32_t replacement) {
|
1142
|
+
utf8_int8_t *read = str;
|
1143
|
+
utf8_int8_t *write = read;
|
1144
|
+
const utf8_int8_t r = (utf8_int8_t)replacement;
|
1145
|
+
utf8_int32_t codepoint = 0;
|
968
1146
|
|
969
|
-
if (
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
1147
|
+
if (replacement > 0x7f) {
|
1148
|
+
return -1;
|
1149
|
+
}
|
1150
|
+
|
1151
|
+
while ('\0' != *read) {
|
1152
|
+
if (0xf0 == (0xf8 & *read)) {
|
1153
|
+
/* ensure each of the 3 following bytes in this 4-byte
|
1154
|
+
* utf8 codepoint began with 0b10xxxxxx */
|
1155
|
+
if ((0x80 != (0xc0 & read[1])) || (0x80 != (0xc0 & read[2])) ||
|
1156
|
+
(0x80 != (0xc0 & read[3]))) {
|
1157
|
+
*write++ = r;
|
1158
|
+
read++;
|
1159
|
+
continue;
|
1160
|
+
}
|
1161
|
+
|
1162
|
+
/* 4-byte utf8 code point (began with 0b11110xxx) */
|
1163
|
+
read = utf8codepoint(read, &codepoint);
|
1164
|
+
write = utf8catcodepoint(write, codepoint, 4);
|
1165
|
+
} else if (0xe0 == (0xf0 & *read)) {
|
1166
|
+
/* ensure each of the 2 following bytes in this 3-byte
|
1167
|
+
* utf8 codepoint began with 0b10xxxxxx */
|
1168
|
+
if ((0x80 != (0xc0 & read[1])) || (0x80 != (0xc0 & read[2]))) {
|
1169
|
+
*write++ = r;
|
1170
|
+
read++;
|
1171
|
+
continue;
|
1172
|
+
}
|
1173
|
+
|
1174
|
+
/* 3-byte utf8 code point (began with 0b1110xxxx) */
|
1175
|
+
read = utf8codepoint(read, &codepoint);
|
1176
|
+
write = utf8catcodepoint(write, codepoint, 3);
|
1177
|
+
} else if (0xc0 == (0xe0 & *read)) {
|
1178
|
+
/* ensure the 1 following byte in this 2-byte
|
1179
|
+
* utf8 codepoint began with 0b10xxxxxx */
|
1180
|
+
if (0x80 != (0xc0 & read[1])) {
|
1181
|
+
*write++ = r;
|
1182
|
+
read++;
|
1183
|
+
continue;
|
1184
|
+
}
|
1185
|
+
|
1186
|
+
/* 2-byte utf8 code point (began with 0b110xxxxx) */
|
1187
|
+
read = utf8codepoint(read, &codepoint);
|
1188
|
+
write = utf8catcodepoint(write, codepoint, 2);
|
1189
|
+
} else if (0x00 == (0x80 & *read)) {
|
1190
|
+
/* 1-byte ascii (began with 0b0xxxxxxx) */
|
1191
|
+
read = utf8codepoint(read, &codepoint);
|
1192
|
+
write = utf8catcodepoint(write, codepoint, 1);
|
1193
|
+
} else {
|
1194
|
+
/* if we got here then we've got a dangling continuation (0b10xxxxxx) */
|
1195
|
+
*write++ = r;
|
1196
|
+
read++;
|
1197
|
+
continue;
|
1198
|
+
}
|
1199
|
+
}
|
1200
|
+
|
1201
|
+
*write = '\0';
|
1202
|
+
|
1203
|
+
return 0;
|
1204
|
+
}
|
1205
|
+
|
1206
|
+
utf8_constexpr14_impl utf8_int8_t *
|
1207
|
+
utf8codepoint(const utf8_int8_t *utf8_restrict str,
|
1208
|
+
utf8_int32_t *utf8_restrict out_codepoint) {
|
1209
|
+
if (0xf0 == (0xf8 & str[0])) {
|
1210
|
+
/* 4 byte utf8 codepoint */
|
1211
|
+
*out_codepoint = ((0x07 & str[0]) << 18) | ((0x3f & str[1]) << 12) |
|
1212
|
+
((0x3f & str[2]) << 6) | (0x3f & str[3]);
|
1213
|
+
str += 4;
|
1214
|
+
} else if (0xe0 == (0xf0 & str[0])) {
|
1215
|
+
/* 3 byte utf8 codepoint */
|
976
1216
|
*out_codepoint =
|
977
|
-
((0x0f &
|
978
|
-
|
979
|
-
} else if (0xc0 == (0xe0 &
|
980
|
-
|
981
|
-
*out_codepoint = ((0x1f &
|
982
|
-
|
1217
|
+
((0x0f & str[0]) << 12) | ((0x3f & str[1]) << 6) | (0x3f & str[2]);
|
1218
|
+
str += 3;
|
1219
|
+
} else if (0xc0 == (0xe0 & str[0])) {
|
1220
|
+
/* 2 byte utf8 codepoint */
|
1221
|
+
*out_codepoint = ((0x1f & str[0]) << 6) | (0x3f & str[1]);
|
1222
|
+
str += 2;
|
983
1223
|
} else {
|
984
|
-
|
985
|
-
*out_codepoint =
|
986
|
-
|
1224
|
+
/* 1 byte utf8 codepoint otherwise */
|
1225
|
+
*out_codepoint = str[0];
|
1226
|
+
str += 1;
|
987
1227
|
}
|
988
1228
|
|
989
|
-
return (
|
1229
|
+
return (utf8_int8_t *)str;
|
990
1230
|
}
|
991
1231
|
|
992
|
-
size_t
|
1232
|
+
utf8_constexpr14_impl size_t utf8codepointcalcsize(const utf8_int8_t *str) {
|
1233
|
+
if (0xf0 == (0xf8 & str[0])) {
|
1234
|
+
/* 4 byte utf8 codepoint */
|
1235
|
+
return 4;
|
1236
|
+
} else if (0xe0 == (0xf0 & str[0])) {
|
1237
|
+
/* 3 byte utf8 codepoint */
|
1238
|
+
return 3;
|
1239
|
+
} else if (0xc0 == (0xe0 & str[0])) {
|
1240
|
+
/* 2 byte utf8 codepoint */
|
1241
|
+
return 2;
|
1242
|
+
}
|
1243
|
+
|
1244
|
+
/* 1 byte utf8 codepoint otherwise */
|
1245
|
+
return 1;
|
1246
|
+
}
|
1247
|
+
|
1248
|
+
utf8_constexpr14_impl size_t utf8codepointsize(utf8_int32_t chr) {
|
993
1249
|
if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
|
994
1250
|
return 1;
|
995
1251
|
} else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
|
996
1252
|
return 2;
|
997
1253
|
} else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
|
998
1254
|
return 3;
|
999
|
-
} else {
|
1255
|
+
} else { /* if (0 == ((int)0xffe00000 & chr)) { */
|
1000
1256
|
return 4;
|
1001
1257
|
}
|
1002
1258
|
}
|
1003
1259
|
|
1004
|
-
|
1005
|
-
char *s = (char *)str;
|
1006
|
-
|
1260
|
+
utf8_int8_t *utf8catcodepoint(utf8_int8_t *str, utf8_int32_t chr, size_t n) {
|
1007
1261
|
if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
|
1008
|
-
|
1009
|
-
|
1262
|
+
/* 1-byte/7-bit ascii
|
1263
|
+
* (0b0xxxxxxx) */
|
1010
1264
|
if (n < 1) {
|
1011
1265
|
return utf8_null;
|
1012
1266
|
}
|
1013
|
-
|
1014
|
-
|
1267
|
+
str[0] = (utf8_int8_t)chr;
|
1268
|
+
str += 1;
|
1015
1269
|
} else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
|
1016
|
-
|
1017
|
-
|
1270
|
+
/* 2-byte/11-bit utf8 code point
|
1271
|
+
* (0b110xxxxx 0b10xxxxxx) */
|
1018
1272
|
if (n < 2) {
|
1019
1273
|
return utf8_null;
|
1020
1274
|
}
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1275
|
+
str[0] = (utf8_int8_t)(0xc0 | (utf8_int8_t)((chr >> 6) & 0x1f));
|
1276
|
+
str[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
1277
|
+
str += 2;
|
1024
1278
|
} else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
|
1025
|
-
|
1026
|
-
|
1279
|
+
/* 3-byte/16-bit utf8 code point
|
1280
|
+
* (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) */
|
1027
1281
|
if (n < 3) {
|
1028
1282
|
return utf8_null;
|
1029
1283
|
}
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
} else {
|
1035
|
-
|
1036
|
-
|
1284
|
+
str[0] = (utf8_int8_t)(0xe0 | (utf8_int8_t)((chr >> 12) & 0x0f));
|
1285
|
+
str[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
|
1286
|
+
str[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
1287
|
+
str += 3;
|
1288
|
+
} else { /* if (0 == ((int)0xffe00000 & chr)) { */
|
1289
|
+
/* 4-byte/21-bit utf8 code point
|
1290
|
+
* (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx) */
|
1037
1291
|
if (n < 4) {
|
1038
1292
|
return utf8_null;
|
1039
1293
|
}
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1294
|
+
str[0] = (utf8_int8_t)(0xf0 | (utf8_int8_t)((chr >> 18) & 0x07));
|
1295
|
+
str[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 12) & 0x3f));
|
1296
|
+
str[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
|
1297
|
+
str[3] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
1298
|
+
str += 4;
|
1045
1299
|
}
|
1046
1300
|
|
1047
|
-
return
|
1301
|
+
return str;
|
1048
1302
|
}
|
1049
1303
|
|
1050
|
-
int utf8islower(utf8_int32_t chr) {
|
1051
|
-
|
1052
|
-
|
1304
|
+
utf8_constexpr14_impl int utf8islower(utf8_int32_t chr) {
|
1305
|
+
return chr != utf8uprcodepoint(chr);
|
1306
|
+
}
|
1053
1307
|
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1308
|
+
utf8_constexpr14_impl int utf8isupper(utf8_int32_t chr) {
|
1309
|
+
return chr != utf8lwrcodepoint(chr);
|
1310
|
+
}
|
1057
1311
|
|
1058
|
-
|
1059
|
-
|
1312
|
+
void utf8lwr(utf8_int8_t *utf8_restrict str) {
|
1313
|
+
utf8_int32_t cp = 0;
|
1314
|
+
utf8_int8_t *pn = utf8codepoint(str, &cp);
|
1060
1315
|
|
1061
1316
|
while (cp != 0) {
|
1062
1317
|
const utf8_int32_t lwr_cp = utf8lwrcodepoint(cp);
|
1063
1318
|
const size_t size = utf8codepointsize(lwr_cp);
|
1064
1319
|
|
1065
1320
|
if (lwr_cp != cp) {
|
1066
|
-
utf8catcodepoint(
|
1321
|
+
utf8catcodepoint(str, lwr_cp, size);
|
1067
1322
|
}
|
1068
1323
|
|
1069
|
-
|
1070
|
-
pn = utf8codepoint(
|
1324
|
+
str = pn;
|
1325
|
+
pn = utf8codepoint(str, &cp);
|
1071
1326
|
}
|
1072
1327
|
}
|
1073
1328
|
|
1074
|
-
void utf8upr(
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
p = (char *)str;
|
1079
|
-
pn = utf8codepoint(p, &cp);
|
1329
|
+
void utf8upr(utf8_int8_t *utf8_restrict str) {
|
1330
|
+
utf8_int32_t cp = 0;
|
1331
|
+
utf8_int8_t *pn = utf8codepoint(str, &cp);
|
1080
1332
|
|
1081
1333
|
while (cp != 0) {
|
1082
1334
|
const utf8_int32_t lwr_cp = utf8uprcodepoint(cp);
|
1083
1335
|
const size_t size = utf8codepointsize(lwr_cp);
|
1084
1336
|
|
1085
1337
|
if (lwr_cp != cp) {
|
1086
|
-
utf8catcodepoint(
|
1338
|
+
utf8catcodepoint(str, lwr_cp, size);
|
1087
1339
|
}
|
1088
1340
|
|
1089
|
-
|
1090
|
-
pn = utf8codepoint(
|
1341
|
+
str = pn;
|
1342
|
+
pn = utf8codepoint(str, &cp);
|
1091
1343
|
}
|
1092
1344
|
}
|
1093
1345
|
|
1094
|
-
utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
|
1346
|
+
utf8_constexpr14_impl utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
|
1095
1347
|
if (((0x0041 <= cp) && (0x005a >= cp)) ||
|
1096
1348
|
((0x00c0 <= cp) && (0x00d6 >= cp)) ||
|
1097
1349
|
((0x00d8 <= cp) && (0x00de >= cp)) ||
|
1098
1350
|
((0x0391 <= cp) && (0x03a1 >= cp)) ||
|
1099
|
-
((0x03a3 <= cp) && (0x03ab >= cp))
|
1351
|
+
((0x03a3 <= cp) && (0x03ab >= cp)) ||
|
1352
|
+
((0x0410 <= cp) && (0x042f >= cp))) {
|
1100
1353
|
cp += 32;
|
1354
|
+
} else if ((0x0400 <= cp) && (0x040f >= cp)) {
|
1355
|
+
cp += 80;
|
1101
1356
|
} else if (((0x0100 <= cp) && (0x012f >= cp)) ||
|
1102
1357
|
((0x0132 <= cp) && (0x0137 >= cp)) ||
|
1103
1358
|
((0x014a <= cp) && (0x0177 >= cp)) ||
|
@@ -1107,7 +1362,9 @@ utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
|
|
1107
1362
|
((0x01f8 <= cp) && (0x021f >= cp)) ||
|
1108
1363
|
((0x0222 <= cp) && (0x0233 >= cp)) ||
|
1109
1364
|
((0x0246 <= cp) && (0x024f >= cp)) ||
|
1110
|
-
((0x03d8 <= cp) && (0x03ef >= cp))
|
1365
|
+
((0x03d8 <= cp) && (0x03ef >= cp)) ||
|
1366
|
+
((0x0460 <= cp) && (0x0481 >= cp)) ||
|
1367
|
+
((0x048a <= cp) && (0x04ff >= cp))) {
|
1111
1368
|
cp |= 0x1;
|
1112
1369
|
} else if (((0x0139 <= cp) && (0x0148 >= cp)) ||
|
1113
1370
|
((0x0179 <= cp) && (0x017e >= cp)) ||
|
@@ -1118,62 +1375,147 @@ utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
|
|
1118
1375
|
cp &= ~0x1;
|
1119
1376
|
} else {
|
1120
1377
|
switch (cp) {
|
1121
|
-
default:
|
1122
|
-
|
1123
|
-
case
|
1124
|
-
|
1125
|
-
|
1126
|
-
case
|
1127
|
-
|
1128
|
-
|
1129
|
-
case
|
1130
|
-
|
1131
|
-
|
1132
|
-
case
|
1133
|
-
|
1134
|
-
|
1135
|
-
case
|
1136
|
-
|
1137
|
-
|
1138
|
-
case
|
1139
|
-
|
1140
|
-
|
1141
|
-
case
|
1142
|
-
|
1143
|
-
|
1144
|
-
case
|
1145
|
-
|
1146
|
-
|
1147
|
-
case
|
1148
|
-
|
1149
|
-
|
1150
|
-
case
|
1151
|
-
|
1152
|
-
|
1153
|
-
case
|
1154
|
-
|
1155
|
-
|
1156
|
-
case
|
1157
|
-
|
1158
|
-
|
1159
|
-
case
|
1160
|
-
|
1161
|
-
|
1162
|
-
case
|
1163
|
-
|
1164
|
-
|
1378
|
+
default:
|
1379
|
+
break;
|
1380
|
+
case 0x0178:
|
1381
|
+
cp = 0x00ff;
|
1382
|
+
break;
|
1383
|
+
case 0x0243:
|
1384
|
+
cp = 0x0180;
|
1385
|
+
break;
|
1386
|
+
case 0x018e:
|
1387
|
+
cp = 0x01dd;
|
1388
|
+
break;
|
1389
|
+
case 0x023d:
|
1390
|
+
cp = 0x019a;
|
1391
|
+
break;
|
1392
|
+
case 0x0220:
|
1393
|
+
cp = 0x019e;
|
1394
|
+
break;
|
1395
|
+
case 0x01b7:
|
1396
|
+
cp = 0x0292;
|
1397
|
+
break;
|
1398
|
+
case 0x01c4:
|
1399
|
+
cp = 0x01c6;
|
1400
|
+
break;
|
1401
|
+
case 0x01c7:
|
1402
|
+
cp = 0x01c9;
|
1403
|
+
break;
|
1404
|
+
case 0x01ca:
|
1405
|
+
cp = 0x01cc;
|
1406
|
+
break;
|
1407
|
+
case 0x01f1:
|
1408
|
+
cp = 0x01f3;
|
1409
|
+
break;
|
1410
|
+
case 0x01f7:
|
1411
|
+
cp = 0x01bf;
|
1412
|
+
break;
|
1413
|
+
case 0x0187:
|
1414
|
+
cp = 0x0188;
|
1415
|
+
break;
|
1416
|
+
case 0x018b:
|
1417
|
+
cp = 0x018c;
|
1418
|
+
break;
|
1419
|
+
case 0x0191:
|
1420
|
+
cp = 0x0192;
|
1421
|
+
break;
|
1422
|
+
case 0x0198:
|
1423
|
+
cp = 0x0199;
|
1424
|
+
break;
|
1425
|
+
case 0x01a7:
|
1426
|
+
cp = 0x01a8;
|
1427
|
+
break;
|
1428
|
+
case 0x01ac:
|
1429
|
+
cp = 0x01ad;
|
1430
|
+
break;
|
1431
|
+
case 0x01b8:
|
1432
|
+
cp = 0x01b9;
|
1433
|
+
break;
|
1434
|
+
case 0x01bc:
|
1435
|
+
cp = 0x01bd;
|
1436
|
+
break;
|
1437
|
+
case 0x01f4:
|
1438
|
+
cp = 0x01f5;
|
1439
|
+
break;
|
1440
|
+
case 0x023b:
|
1441
|
+
cp = 0x023c;
|
1442
|
+
break;
|
1443
|
+
case 0x0241:
|
1444
|
+
cp = 0x0242;
|
1445
|
+
break;
|
1446
|
+
case 0x03fd:
|
1447
|
+
cp = 0x037b;
|
1448
|
+
break;
|
1449
|
+
case 0x03fe:
|
1450
|
+
cp = 0x037c;
|
1451
|
+
break;
|
1452
|
+
case 0x03ff:
|
1453
|
+
cp = 0x037d;
|
1454
|
+
break;
|
1455
|
+
case 0x037f:
|
1456
|
+
cp = 0x03f3;
|
1457
|
+
break;
|
1458
|
+
case 0x0386:
|
1459
|
+
cp = 0x03ac;
|
1460
|
+
break;
|
1461
|
+
case 0x0388:
|
1462
|
+
cp = 0x03ad;
|
1463
|
+
break;
|
1464
|
+
case 0x0389:
|
1465
|
+
cp = 0x03ae;
|
1466
|
+
break;
|
1467
|
+
case 0x038a:
|
1468
|
+
cp = 0x03af;
|
1469
|
+
break;
|
1470
|
+
case 0x038c:
|
1471
|
+
cp = 0x03cc;
|
1472
|
+
break;
|
1473
|
+
case 0x038e:
|
1474
|
+
cp = 0x03cd;
|
1475
|
+
break;
|
1476
|
+
case 0x038f:
|
1477
|
+
cp = 0x03ce;
|
1478
|
+
break;
|
1479
|
+
case 0x0370:
|
1480
|
+
cp = 0x0371;
|
1481
|
+
break;
|
1482
|
+
case 0x0372:
|
1483
|
+
cp = 0x0373;
|
1484
|
+
break;
|
1485
|
+
case 0x0376:
|
1486
|
+
cp = 0x0377;
|
1487
|
+
break;
|
1488
|
+
case 0x03f4:
|
1489
|
+
cp = 0x03b8;
|
1490
|
+
break;
|
1491
|
+
case 0x03cf:
|
1492
|
+
cp = 0x03d7;
|
1493
|
+
break;
|
1494
|
+
case 0x03f9:
|
1495
|
+
cp = 0x03f2;
|
1496
|
+
break;
|
1497
|
+
case 0x03f7:
|
1498
|
+
cp = 0x03f8;
|
1499
|
+
break;
|
1500
|
+
case 0x03fa:
|
1501
|
+
cp = 0x03fb;
|
1502
|
+
break;
|
1503
|
+
}
|
1165
1504
|
}
|
1166
1505
|
|
1167
1506
|
return cp;
|
1168
1507
|
}
|
1169
1508
|
|
1170
|
-
utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
|
1509
|
+
utf8_constexpr14_impl utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
|
1171
1510
|
if (((0x0061 <= cp) && (0x007a >= cp)) ||
|
1172
1511
|
((0x00e0 <= cp) && (0x00f6 >= cp)) ||
|
1173
1512
|
((0x00f8 <= cp) && (0x00fe >= cp)) ||
|
1174
1513
|
((0x03b1 <= cp) && (0x03c1 >= cp)) ||
|
1175
|
-
((0x03c3 <= cp) && (0x03cb >= cp))
|
1514
|
+
((0x03c3 <= cp) && (0x03cb >= cp)) ||
|
1515
|
+
((0x0430 <= cp) && (0x044f >= cp))) {
|
1176
1516
|
cp -= 32;
|
1517
|
+
} else if ((0x0450 <= cp) && (0x045f >= cp)) {
|
1518
|
+
cp -= 80;
|
1177
1519
|
} else if (((0x0100 <= cp) && (0x012f >= cp)) ||
|
1178
1520
|
((0x0132 <= cp) && (0x0137 >= cp)) ||
|
1179
1521
|
((0x014a <= cp) && (0x0177 >= cp)) ||
|
@@ -1183,7 +1525,9 @@ utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
|
|
1183
1525
|
((0x01f8 <= cp) && (0x021f >= cp)) ||
|
1184
1526
|
((0x0222 <= cp) && (0x0233 >= cp)) ||
|
1185
1527
|
((0x0246 <= cp) && (0x024f >= cp)) ||
|
1186
|
-
((0x03d8 <= cp) && (0x03ef >= cp))
|
1528
|
+
((0x03d8 <= cp) && (0x03ef >= cp)) ||
|
1529
|
+
((0x0460 <= cp) && (0x0481 >= cp)) ||
|
1530
|
+
((0x048a <= cp) && (0x04ff >= cp))) {
|
1187
1531
|
cp &= ~0x1;
|
1188
1532
|
} else if (((0x0139 <= cp) && (0x0148 >= cp)) ||
|
1189
1533
|
((0x0179 <= cp) && (0x017e >= cp)) ||
|
@@ -1194,64 +1538,175 @@ utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
|
|
1194
1538
|
cp |= 0x1;
|
1195
1539
|
} else {
|
1196
1540
|
switch (cp) {
|
1197
|
-
default:
|
1198
|
-
|
1199
|
-
case
|
1200
|
-
|
1201
|
-
|
1202
|
-
case
|
1203
|
-
|
1204
|
-
|
1205
|
-
case
|
1206
|
-
|
1207
|
-
|
1208
|
-
case
|
1209
|
-
|
1210
|
-
|
1211
|
-
case
|
1212
|
-
|
1213
|
-
|
1214
|
-
case
|
1215
|
-
|
1216
|
-
|
1217
|
-
case
|
1218
|
-
|
1219
|
-
|
1220
|
-
case
|
1221
|
-
|
1222
|
-
|
1223
|
-
case
|
1224
|
-
|
1225
|
-
|
1226
|
-
case
|
1227
|
-
|
1228
|
-
|
1229
|
-
case
|
1230
|
-
|
1231
|
-
|
1232
|
-
case
|
1233
|
-
|
1234
|
-
|
1235
|
-
case
|
1236
|
-
|
1237
|
-
|
1238
|
-
case
|
1239
|
-
|
1240
|
-
|
1541
|
+
default:
|
1542
|
+
break;
|
1543
|
+
case 0x00ff:
|
1544
|
+
cp = 0x0178;
|
1545
|
+
break;
|
1546
|
+
case 0x0180:
|
1547
|
+
cp = 0x0243;
|
1548
|
+
break;
|
1549
|
+
case 0x01dd:
|
1550
|
+
cp = 0x018e;
|
1551
|
+
break;
|
1552
|
+
case 0x019a:
|
1553
|
+
cp = 0x023d;
|
1554
|
+
break;
|
1555
|
+
case 0x019e:
|
1556
|
+
cp = 0x0220;
|
1557
|
+
break;
|
1558
|
+
case 0x0292:
|
1559
|
+
cp = 0x01b7;
|
1560
|
+
break;
|
1561
|
+
case 0x01c6:
|
1562
|
+
cp = 0x01c4;
|
1563
|
+
break;
|
1564
|
+
case 0x01c9:
|
1565
|
+
cp = 0x01c7;
|
1566
|
+
break;
|
1567
|
+
case 0x01cc:
|
1568
|
+
cp = 0x01ca;
|
1569
|
+
break;
|
1570
|
+
case 0x01f3:
|
1571
|
+
cp = 0x01f1;
|
1572
|
+
break;
|
1573
|
+
case 0x01bf:
|
1574
|
+
cp = 0x01f7;
|
1575
|
+
break;
|
1576
|
+
case 0x0188:
|
1577
|
+
cp = 0x0187;
|
1578
|
+
break;
|
1579
|
+
case 0x018c:
|
1580
|
+
cp = 0x018b;
|
1581
|
+
break;
|
1582
|
+
case 0x0192:
|
1583
|
+
cp = 0x0191;
|
1584
|
+
break;
|
1585
|
+
case 0x0199:
|
1586
|
+
cp = 0x0198;
|
1587
|
+
break;
|
1588
|
+
case 0x01a8:
|
1589
|
+
cp = 0x01a7;
|
1590
|
+
break;
|
1591
|
+
case 0x01ad:
|
1592
|
+
cp = 0x01ac;
|
1593
|
+
break;
|
1594
|
+
case 0x01b9:
|
1595
|
+
cp = 0x01b8;
|
1596
|
+
break;
|
1597
|
+
case 0x01bd:
|
1598
|
+
cp = 0x01bc;
|
1599
|
+
break;
|
1600
|
+
case 0x01f5:
|
1601
|
+
cp = 0x01f4;
|
1602
|
+
break;
|
1603
|
+
case 0x023c:
|
1604
|
+
cp = 0x023b;
|
1605
|
+
break;
|
1606
|
+
case 0x0242:
|
1607
|
+
cp = 0x0241;
|
1608
|
+
break;
|
1609
|
+
case 0x037b:
|
1610
|
+
cp = 0x03fd;
|
1611
|
+
break;
|
1612
|
+
case 0x037c:
|
1613
|
+
cp = 0x03fe;
|
1614
|
+
break;
|
1615
|
+
case 0x037d:
|
1616
|
+
cp = 0x03ff;
|
1617
|
+
break;
|
1618
|
+
case 0x03f3:
|
1619
|
+
cp = 0x037f;
|
1620
|
+
break;
|
1621
|
+
case 0x03ac:
|
1622
|
+
cp = 0x0386;
|
1623
|
+
break;
|
1624
|
+
case 0x03ad:
|
1625
|
+
cp = 0x0388;
|
1626
|
+
break;
|
1627
|
+
case 0x03ae:
|
1628
|
+
cp = 0x0389;
|
1629
|
+
break;
|
1630
|
+
case 0x03af:
|
1631
|
+
cp = 0x038a;
|
1632
|
+
break;
|
1633
|
+
case 0x03cc:
|
1634
|
+
cp = 0x038c;
|
1635
|
+
break;
|
1636
|
+
case 0x03cd:
|
1637
|
+
cp = 0x038e;
|
1638
|
+
break;
|
1639
|
+
case 0x03ce:
|
1640
|
+
cp = 0x038f;
|
1641
|
+
break;
|
1642
|
+
case 0x0371:
|
1643
|
+
cp = 0x0370;
|
1644
|
+
break;
|
1645
|
+
case 0x0373:
|
1646
|
+
cp = 0x0372;
|
1647
|
+
break;
|
1648
|
+
case 0x0377:
|
1649
|
+
cp = 0x0376;
|
1650
|
+
break;
|
1651
|
+
case 0x03d1:
|
1652
|
+
cp = 0x0398;
|
1653
|
+
break;
|
1654
|
+
case 0x03d7:
|
1655
|
+
cp = 0x03cf;
|
1656
|
+
break;
|
1657
|
+
case 0x03f2:
|
1658
|
+
cp = 0x03f9;
|
1659
|
+
break;
|
1660
|
+
case 0x03f8:
|
1661
|
+
cp = 0x03f7;
|
1662
|
+
break;
|
1663
|
+
case 0x03fb:
|
1664
|
+
cp = 0x03fa;
|
1665
|
+
break;
|
1666
|
+
}
|
1241
1667
|
}
|
1242
1668
|
|
1243
1669
|
return cp;
|
1244
1670
|
}
|
1245
1671
|
|
1672
|
+
utf8_constexpr14_impl utf8_int8_t *
|
1673
|
+
utf8rcodepoint(const utf8_int8_t *utf8_restrict str,
|
1674
|
+
utf8_int32_t *utf8_restrict out_codepoint) {
|
1675
|
+
const utf8_int8_t *s = (const utf8_int8_t *)str;
|
1676
|
+
|
1677
|
+
if (0xf0 == (0xf8 & s[0])) {
|
1678
|
+
/* 4 byte utf8 codepoint */
|
1679
|
+
*out_codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) |
|
1680
|
+
((0x3f & s[2]) << 6) | (0x3f & s[3]);
|
1681
|
+
} else if (0xe0 == (0xf0 & s[0])) {
|
1682
|
+
/* 3 byte utf8 codepoint */
|
1683
|
+
*out_codepoint =
|
1684
|
+
((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]);
|
1685
|
+
} else if (0xc0 == (0xe0 & s[0])) {
|
1686
|
+
/* 2 byte utf8 codepoint */
|
1687
|
+
*out_codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]);
|
1688
|
+
} else {
|
1689
|
+
/* 1 byte utf8 codepoint otherwise */
|
1690
|
+
*out_codepoint = s[0];
|
1691
|
+
}
|
1692
|
+
|
1693
|
+
do {
|
1694
|
+
s--;
|
1695
|
+
} while ((0 != (0x80 & s[0])) && (0x80 == (0xc0 & s[0])));
|
1696
|
+
|
1697
|
+
return (utf8_int8_t *)s;
|
1698
|
+
}
|
1699
|
+
|
1246
1700
|
#undef utf8_restrict
|
1701
|
+
#undef utf8_constexpr14
|
1247
1702
|
#undef utf8_null
|
1248
1703
|
|
1249
|
-
#ifdef
|
1250
|
-
}
|
1704
|
+
#ifdef utf8_cplusplus
|
1705
|
+
} /* extern "C" */
|
1251
1706
|
#endif
|
1252
1707
|
|
1253
1708
|
#if defined(__clang__)
|
1254
1709
|
#pragma clang diagnostic pop
|
1255
1710
|
#endif
|
1256
1711
|
|
1257
|
-
#endif
|
1712
|
+
#endif /* SHEREDOM_UTF8_H_INCLUDED */
|