rugged 1.7.2 → 1.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rugged/version.rb +1 -1
- data/vendor/libgit2/AUTHORS +1 -0
- data/vendor/libgit2/CMakeLists.txt +23 -10
- data/vendor/libgit2/COPYING +195 -1
- data/vendor/libgit2/cmake/{FindIconv.cmake → FindIntlIconv.cmake} +6 -0
- data/vendor/libgit2/cmake/FindLLHTTP.cmake +39 -0
- data/vendor/libgit2/cmake/SelectGSSAPI.cmake +1 -1
- data/vendor/libgit2/cmake/SelectHTTPParser.cmake +23 -8
- data/vendor/libgit2/cmake/SelectHTTPSBackend.cmake +17 -8
- data/vendor/libgit2/cmake/SelectHashes.cmake +28 -11
- data/vendor/libgit2/cmake/SelectRegex.cmake +6 -1
- data/vendor/libgit2/cmake/SelectSSH.cmake +22 -17
- data/vendor/libgit2/cmake/SelectZlib.cmake +4 -0
- data/vendor/libgit2/deps/llhttp/CMakeLists.txt +8 -0
- data/vendor/libgit2/deps/llhttp/LICENSE-MIT +22 -0
- data/vendor/libgit2/deps/llhttp/api.c +510 -0
- data/vendor/libgit2/deps/llhttp/http.c +170 -0
- data/vendor/libgit2/deps/llhttp/llhttp.c +10168 -0
- data/vendor/libgit2/deps/llhttp/llhttp.h +897 -0
- data/vendor/libgit2/deps/ntlmclient/CMakeLists.txt +1 -1
- data/vendor/libgit2/deps/ntlmclient/crypt_builtin_md4.c +311 -0
- data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.c +2 -1
- data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.c +0 -20
- data/vendor/libgit2/deps/ntlmclient/crypt_openssl.c +4 -4
- data/vendor/libgit2/deps/ntlmclient/ntlm.c +21 -21
- data/vendor/libgit2/deps/ntlmclient/unicode_builtin.c +5 -4
- data/vendor/libgit2/deps/ntlmclient/unicode_iconv.c +2 -1
- data/vendor/libgit2/deps/ntlmclient/utf8.h +1176 -721
- data/vendor/libgit2/deps/ntlmclient/util.h +11 -0
- data/vendor/libgit2/deps/pcre/CMakeLists.txt +1 -0
- data/vendor/libgit2/deps/xdiff/xmerge.c +2 -2
- data/vendor/libgit2/deps/zlib/CMakeLists.txt +6 -1
- data/vendor/libgit2/deps/zlib/LICENSE +22 -0
- data/vendor/libgit2/deps/zlib/adler32.c +5 -27
- data/vendor/libgit2/deps/zlib/crc32.c +94 -167
- data/vendor/libgit2/deps/zlib/deflate.c +358 -435
- data/vendor/libgit2/deps/zlib/deflate.h +41 -10
- data/vendor/libgit2/deps/zlib/gzguts.h +13 -18
- data/vendor/libgit2/deps/zlib/infback.c +17 -30
- data/vendor/libgit2/deps/zlib/inffast.c +1 -4
- data/vendor/libgit2/deps/zlib/inffast.h +1 -1
- data/vendor/libgit2/deps/zlib/inflate.c +36 -102
- data/vendor/libgit2/deps/zlib/inftrees.c +6 -11
- data/vendor/libgit2/deps/zlib/inftrees.h +6 -6
- data/vendor/libgit2/deps/zlib/trees.c +287 -352
- data/vendor/libgit2/deps/zlib/zconf.h +23 -14
- data/vendor/libgit2/deps/zlib/zlib.h +202 -202
- data/vendor/libgit2/deps/zlib/zutil.c +18 -44
- data/vendor/libgit2/deps/zlib/zutil.h +13 -33
- data/vendor/libgit2/include/git2/annotated_commit.h +12 -5
- data/vendor/libgit2/include/git2/apply.h +27 -6
- data/vendor/libgit2/include/git2/attr.h +17 -4
- data/vendor/libgit2/include/git2/blame.h +133 -28
- data/vendor/libgit2/include/git2/blob.h +71 -28
- data/vendor/libgit2/include/git2/branch.h +22 -15
- data/vendor/libgit2/include/git2/buffer.h +6 -4
- data/vendor/libgit2/include/git2/cert.h +2 -1
- data/vendor/libgit2/include/git2/checkout.h +83 -32
- data/vendor/libgit2/include/git2/cherrypick.h +10 -3
- data/vendor/libgit2/include/git2/clone.h +25 -9
- data/vendor/libgit2/include/git2/commit.h +132 -3
- data/vendor/libgit2/include/git2/common.h +120 -63
- data/vendor/libgit2/include/git2/config.h +93 -23
- data/vendor/libgit2/include/git2/credential.h +30 -2
- data/vendor/libgit2/include/git2/credential_helpers.h +1 -0
- data/vendor/libgit2/include/git2/deprecated.h +133 -3
- data/vendor/libgit2/include/git2/describe.h +13 -1
- data/vendor/libgit2/include/git2/diff.h +38 -8
- data/vendor/libgit2/include/git2/email.h +9 -29
- data/vendor/libgit2/include/git2/errors.h +46 -73
- data/vendor/libgit2/include/git2/filter.h +14 -7
- data/vendor/libgit2/include/git2/global.h +8 -1
- data/vendor/libgit2/include/git2/graph.h +3 -2
- data/vendor/libgit2/include/git2/ignore.h +10 -0
- data/vendor/libgit2/include/git2/index.h +99 -14
- data/vendor/libgit2/include/git2/indexer.h +21 -4
- data/vendor/libgit2/include/git2/mailmap.h +7 -1
- data/vendor/libgit2/include/git2/merge.h +46 -1
- data/vendor/libgit2/include/git2/message.h +2 -2
- data/vendor/libgit2/include/git2/net.h +3 -1
- data/vendor/libgit2/include/git2/notes.h +9 -6
- data/vendor/libgit2/include/git2/object.h +9 -8
- data/vendor/libgit2/include/git2/odb.h +91 -49
- data/vendor/libgit2/include/git2/odb_backend.h +80 -52
- data/vendor/libgit2/include/git2/oid.h +23 -24
- data/vendor/libgit2/include/git2/oidarray.h +7 -1
- data/vendor/libgit2/include/git2/pack.h +13 -1
- data/vendor/libgit2/include/git2/patch.h +2 -3
- data/vendor/libgit2/include/git2/pathspec.h +9 -0
- data/vendor/libgit2/include/git2/proxy.h +10 -0
- data/vendor/libgit2/include/git2/rebase.h +9 -6
- data/vendor/libgit2/include/git2/refdb.h +2 -2
- data/vendor/libgit2/include/git2/reflog.h +3 -2
- data/vendor/libgit2/include/git2/refs.h +9 -6
- data/vendor/libgit2/include/git2/refspec.h +14 -4
- data/vendor/libgit2/include/git2/remote.h +94 -18
- data/vendor/libgit2/include/git2/repository.h +57 -21
- data/vendor/libgit2/include/git2/reset.h +16 -3
- data/vendor/libgit2/include/git2/revert.h +9 -4
- data/vendor/libgit2/include/git2/revparse.h +3 -3
- data/vendor/libgit2/include/git2/revwalk.h +3 -2
- data/vendor/libgit2/include/git2/signature.h +46 -1
- data/vendor/libgit2/include/git2/stash.h +17 -3
- data/vendor/libgit2/include/git2/status.h +10 -6
- data/vendor/libgit2/include/git2/stdint.h +87 -85
- data/vendor/libgit2/include/git2/strarray.h +2 -3
- data/vendor/libgit2/include/git2/submodule.h +20 -9
- data/vendor/libgit2/include/git2/sys/alloc.h +12 -0
- data/vendor/libgit2/include/git2/sys/commit.h +77 -3
- data/vendor/libgit2/include/git2/sys/commit_graph.h +103 -62
- data/vendor/libgit2/include/git2/sys/config.h +80 -4
- data/vendor/libgit2/include/git2/sys/credential.h +4 -3
- data/vendor/libgit2/include/git2/sys/diff.h +21 -1
- data/vendor/libgit2/include/git2/sys/email.h +7 -0
- data/vendor/libgit2/include/git2/sys/errors.h +76 -0
- data/vendor/libgit2/include/git2/sys/filter.h +66 -3
- data/vendor/libgit2/include/git2/sys/hashsig.h +11 -0
- data/vendor/libgit2/include/git2/sys/index.h +3 -2
- data/vendor/libgit2/include/git2/sys/mempack.h +32 -2
- data/vendor/libgit2/include/git2/sys/merge.h +55 -7
- data/vendor/libgit2/include/git2/sys/midx.h +43 -4
- data/vendor/libgit2/include/git2/sys/odb_backend.h +7 -3
- data/vendor/libgit2/include/git2/sys/openssl.h +8 -1
- data/vendor/libgit2/include/git2/sys/path.h +12 -1
- data/vendor/libgit2/include/git2/sys/refdb_backend.h +40 -36
- data/vendor/libgit2/include/git2/sys/refs.h +3 -2
- data/vendor/libgit2/include/git2/sys/remote.h +8 -1
- data/vendor/libgit2/include/git2/sys/repository.h +63 -3
- data/vendor/libgit2/include/git2/sys/stream.h +11 -2
- data/vendor/libgit2/include/git2/sys/transport.h +24 -3
- data/vendor/libgit2/include/git2/tag.h +3 -1
- data/vendor/libgit2/include/git2/trace.h +9 -3
- data/vendor/libgit2/include/git2/transaction.h +3 -2
- data/vendor/libgit2/include/git2/transport.h +11 -3
- data/vendor/libgit2/include/git2/tree.h +16 -5
- data/vendor/libgit2/include/git2/types.h +19 -3
- data/vendor/libgit2/include/git2/version.h +44 -8
- data/vendor/libgit2/include/git2/worktree.h +16 -6
- data/vendor/libgit2/src/CMakeLists.txt +6 -4
- data/vendor/libgit2/src/cli/CMakeLists.txt +2 -2
- data/vendor/libgit2/src/cli/cmd.c +1 -1
- data/vendor/libgit2/src/cli/cmd.h +4 -0
- data/vendor/libgit2/src/cli/cmd_blame.c +287 -0
- data/vendor/libgit2/src/cli/cmd_cat_file.c +6 -8
- data/vendor/libgit2/src/cli/cmd_clone.c +5 -7
- data/vendor/libgit2/src/cli/cmd_config.c +241 -0
- data/vendor/libgit2/src/cli/cmd_hash_object.c +6 -8
- data/vendor/libgit2/src/cli/cmd_help.c +6 -7
- data/vendor/libgit2/src/cli/cmd_index_pack.c +114 -0
- data/vendor/libgit2/src/cli/cmd_init.c +102 -0
- data/vendor/libgit2/src/cli/common.c +168 -0
- data/vendor/libgit2/src/cli/common.h +63 -0
- data/vendor/libgit2/src/cli/error.h +1 -1
- data/vendor/libgit2/src/cli/main.c +52 -24
- data/vendor/libgit2/src/cli/opt.c +29 -3
- data/vendor/libgit2/src/cli/opt.h +21 -3
- data/vendor/libgit2/src/cli/opt_usage.c +102 -33
- data/vendor/libgit2/src/cli/opt_usage.h +6 -1
- data/vendor/libgit2/src/cli/progress.c +51 -2
- data/vendor/libgit2/src/cli/progress.h +12 -0
- data/vendor/libgit2/src/cli/unix/sighandler.c +2 -1
- data/vendor/libgit2/src/cli/win32/precompiled.h +1 -1
- data/vendor/libgit2/src/cli/win32/sighandler.c +1 -1
- data/vendor/libgit2/src/libgit2/CMakeLists.txt +26 -8
- data/vendor/libgit2/src/libgit2/apply.c +10 -13
- data/vendor/libgit2/src/libgit2/attr.c +30 -13
- data/vendor/libgit2/src/libgit2/attr_file.c +7 -2
- data/vendor/libgit2/src/libgit2/attr_file.h +2 -0
- data/vendor/libgit2/src/libgit2/attrcache.c +69 -33
- data/vendor/libgit2/src/libgit2/attrcache.h +5 -9
- data/vendor/libgit2/src/libgit2/blame.c +130 -44
- data/vendor/libgit2/src/libgit2/blame.h +1 -0
- data/vendor/libgit2/src/libgit2/cache.c +22 -17
- data/vendor/libgit2/src/libgit2/cache.h +7 -9
- data/vendor/libgit2/src/libgit2/checkout.c +34 -24
- data/vendor/libgit2/src/libgit2/checkout.h +0 -2
- data/vendor/libgit2/src/libgit2/cherrypick.c +1 -2
- data/vendor/libgit2/src/libgit2/clone.c +186 -166
- data/vendor/libgit2/src/libgit2/clone.h +4 -1
- data/vendor/libgit2/src/libgit2/commit.c +92 -0
- data/vendor/libgit2/src/libgit2/commit_graph.c +67 -56
- data/vendor/libgit2/src/libgit2/commit_graph.h +1 -2
- data/vendor/libgit2/src/libgit2/config.c +389 -298
- data/vendor/libgit2/src/libgit2/config.cmake.in +3 -0
- data/vendor/libgit2/src/libgit2/config.h +9 -4
- data/vendor/libgit2/src/libgit2/config_backend.h +8 -10
- data/vendor/libgit2/src/libgit2/config_cache.c +4 -5
- data/vendor/libgit2/src/libgit2/config_file.c +99 -88
- data/vendor/libgit2/src/libgit2/config_list.c +285 -0
- data/vendor/libgit2/src/libgit2/config_list.h +32 -0
- data/vendor/libgit2/src/libgit2/config_mem.c +194 -40
- data/vendor/libgit2/src/libgit2/config_parse.c +10 -9
- data/vendor/libgit2/src/libgit2/config_snapshot.c +24 -31
- data/vendor/libgit2/src/libgit2/describe.c +24 -24
- data/vendor/libgit2/src/libgit2/diff.c +1 -1
- data/vendor/libgit2/src/libgit2/diff_driver.c +12 -19
- data/vendor/libgit2/src/libgit2/diff_driver.h +2 -2
- data/vendor/libgit2/src/libgit2/diff_generate.c +3 -3
- data/vendor/libgit2/src/libgit2/diff_parse.c +2 -2
- data/vendor/libgit2/src/libgit2/diff_print.c +65 -9
- data/vendor/libgit2/src/libgit2/diff_tform.c +36 -8
- data/vendor/libgit2/src/libgit2/email.c +1 -0
- data/vendor/libgit2/src/libgit2/fetch.c +5 -3
- data/vendor/libgit2/src/libgit2/filter.c +5 -5
- data/vendor/libgit2/src/libgit2/git2.rc +3 -3
- data/vendor/libgit2/src/libgit2/grafts.c +18 -20
- data/vendor/libgit2/src/libgit2/grafts.h +0 -1
- data/vendor/libgit2/src/libgit2/graph.c +1 -1
- data/vendor/libgit2/src/libgit2/hashmap_oid.h +30 -0
- data/vendor/libgit2/src/libgit2/ignore.c +9 -5
- data/vendor/libgit2/src/libgit2/index.c +68 -90
- data/vendor/libgit2/src/libgit2/index.h +2 -2
- data/vendor/libgit2/src/libgit2/index_map.c +95 -0
- data/vendor/libgit2/src/libgit2/index_map.h +28 -0
- data/vendor/libgit2/src/libgit2/indexer.c +34 -38
- data/vendor/libgit2/src/libgit2/iterator.c +14 -8
- data/vendor/libgit2/src/libgit2/libgit2.c +153 -368
- data/vendor/libgit2/src/libgit2/mailmap.c +1 -1
- data/vendor/libgit2/src/libgit2/merge.c +42 -37
- data/vendor/libgit2/src/libgit2/merge_driver.c +2 -2
- data/vendor/libgit2/src/libgit2/midx.c +28 -15
- data/vendor/libgit2/src/libgit2/mwindow.c +38 -45
- data/vendor/libgit2/src/libgit2/mwindow.h +4 -0
- data/vendor/libgit2/src/libgit2/object.c +6 -5
- data/vendor/libgit2/src/libgit2/odb.c +5 -4
- data/vendor/libgit2/src/libgit2/odb_mempack.c +49 -17
- data/vendor/libgit2/src/libgit2/odb_pack.c +13 -5
- data/vendor/libgit2/src/libgit2/oid.c +32 -5
- data/vendor/libgit2/src/libgit2/oid.h +11 -0
- data/vendor/libgit2/src/libgit2/pack-objects.c +58 -31
- data/vendor/libgit2/src/libgit2/pack-objects.h +12 -4
- data/vendor/libgit2/src/libgit2/pack.c +30 -24
- data/vendor/libgit2/src/libgit2/pack.h +15 -10
- data/vendor/libgit2/src/libgit2/patch_parse.c +2 -2
- data/vendor/libgit2/src/libgit2/path.c +1 -1
- data/vendor/libgit2/src/libgit2/pathspec.c +1 -1
- data/vendor/libgit2/src/libgit2/push.c +79 -28
- data/vendor/libgit2/src/libgit2/push.h +1 -0
- data/vendor/libgit2/src/libgit2/refdb_fs.c +128 -61
- data/vendor/libgit2/src/libgit2/reflog.c +1 -2
- data/vendor/libgit2/src/libgit2/reflog.h +2 -0
- data/vendor/libgit2/src/libgit2/refs.c +26 -7
- data/vendor/libgit2/src/libgit2/refs.h +6 -1
- data/vendor/libgit2/src/libgit2/refspec.c +28 -1
- data/vendor/libgit2/src/libgit2/refspec.h +8 -0
- data/vendor/libgit2/src/libgit2/remote.c +121 -61
- data/vendor/libgit2/src/libgit2/repository.c +231 -51
- data/vendor/libgit2/src/libgit2/repository.h +10 -6
- data/vendor/libgit2/src/libgit2/revert.c +1 -2
- data/vendor/libgit2/src/libgit2/revparse.c +2 -2
- data/vendor/libgit2/src/libgit2/revwalk.c +13 -10
- data/vendor/libgit2/src/libgit2/revwalk.h +3 -3
- data/vendor/libgit2/src/libgit2/settings.c +468 -0
- data/vendor/libgit2/src/libgit2/settings.h +6 -2
- data/vendor/libgit2/src/libgit2/signature.c +132 -15
- data/vendor/libgit2/src/libgit2/signature.h +0 -1
- data/vendor/libgit2/src/libgit2/status.c +1 -1
- data/vendor/libgit2/src/libgit2/streams/mbedtls.c +54 -60
- data/vendor/libgit2/src/libgit2/streams/openssl.c +32 -7
- data/vendor/libgit2/src/libgit2/streams/openssl.h +2 -0
- data/vendor/libgit2/src/libgit2/streams/openssl_dynamic.c +4 -0
- data/vendor/libgit2/src/libgit2/streams/openssl_dynamic.h +3 -0
- data/vendor/libgit2/src/libgit2/streams/stransport.c +39 -7
- data/vendor/libgit2/src/libgit2/submodule.c +106 -63
- data/vendor/libgit2/src/libgit2/submodule.h +6 -7
- data/vendor/libgit2/src/libgit2/tag.c +1 -1
- data/vendor/libgit2/src/libgit2/trailer.c +6 -6
- data/vendor/libgit2/src/libgit2/transaction.c +26 -20
- data/vendor/libgit2/src/libgit2/transaction.h +4 -1
- data/vendor/libgit2/src/libgit2/transport.c +4 -1
- data/vendor/libgit2/src/libgit2/transports/credential.c +1 -1
- data/vendor/libgit2/src/libgit2/transports/http.c +1 -2
- data/vendor/libgit2/src/libgit2/transports/http.h +0 -10
- data/vendor/libgit2/src/libgit2/transports/httpclient.c +112 -72
- data/vendor/libgit2/src/libgit2/transports/httpparser.c +128 -0
- data/vendor/libgit2/src/libgit2/transports/httpparser.h +99 -0
- data/vendor/libgit2/src/libgit2/transports/local.c +8 -7
- data/vendor/libgit2/src/libgit2/transports/smart.c +20 -8
- data/vendor/libgit2/src/libgit2/transports/smart.h +4 -2
- data/vendor/libgit2/src/libgit2/transports/smart_pkt.c +2 -2
- data/vendor/libgit2/src/libgit2/transports/smart_protocol.c +55 -10
- data/vendor/libgit2/src/libgit2/transports/ssh.c +41 -1103
- data/vendor/libgit2/src/libgit2/transports/ssh_exec.c +347 -0
- data/vendor/libgit2/src/libgit2/transports/ssh_exec.h +26 -0
- data/vendor/libgit2/src/libgit2/transports/ssh_libssh2.c +1126 -0
- data/vendor/libgit2/src/libgit2/transports/ssh_libssh2.h +28 -0
- data/vendor/libgit2/src/libgit2/transports/winhttp.c +35 -7
- data/vendor/libgit2/src/libgit2/tree.c +34 -26
- data/vendor/libgit2/src/libgit2/tree.h +3 -2
- data/vendor/libgit2/src/libgit2/worktree.c +14 -17
- data/vendor/libgit2/src/util/CMakeLists.txt +4 -6
- data/vendor/libgit2/src/util/alloc.c +4 -1
- data/vendor/libgit2/src/util/allocators/debugalloc.c +73 -0
- data/vendor/libgit2/src/{cli/cli.h → util/allocators/debugalloc.h} +6 -9
- data/vendor/libgit2/src/util/allocators/stdalloc.c +0 -10
- data/vendor/libgit2/src/util/array.h +18 -17
- data/vendor/libgit2/src/util/cc-compat.h +2 -0
- data/vendor/libgit2/src/util/ctype_compat.h +70 -0
- data/vendor/libgit2/src/util/date.c +22 -14
- data/vendor/libgit2/src/util/date.h +12 -0
- data/vendor/libgit2/src/util/errors.c +401 -0
- data/vendor/libgit2/src/{libgit2 → util}/errors.h +21 -17
- data/vendor/libgit2/src/util/fs_path.c +15 -4
- data/vendor/libgit2/src/util/fs_path.h +23 -0
- data/vendor/libgit2/src/util/futils.c +6 -5
- data/vendor/libgit2/src/util/futils.h +13 -4
- data/vendor/libgit2/src/util/git2_features.h.in +12 -1
- data/vendor/libgit2/src/util/git2_util.h +6 -0
- data/vendor/libgit2/src/util/hash/openssl.c +152 -0
- data/vendor/libgit2/src/util/hash/openssl.h +17 -1
- data/vendor/libgit2/src/util/hash/sha.h +4 -1
- data/vendor/libgit2/src/util/hashmap.h +424 -0
- data/vendor/libgit2/src/util/hashmap_str.h +43 -0
- data/vendor/libgit2/src/util/integer.h +3 -1
- data/vendor/libgit2/src/util/net.c +13 -7
- data/vendor/libgit2/src/util/net.h +2 -0
- data/vendor/libgit2/src/util/pool.c +1 -1
- data/vendor/libgit2/src/util/pool.h +5 -0
- data/vendor/libgit2/src/util/pqueue.h +1 -1
- data/vendor/libgit2/src/util/process.h +222 -0
- data/vendor/libgit2/src/util/rand.c +1 -7
- data/vendor/libgit2/src/util/regexp.c +1 -1
- data/vendor/libgit2/src/util/sortedcache.c +14 -13
- data/vendor/libgit2/src/util/sortedcache.h +3 -3
- data/vendor/libgit2/src/util/str.c +2 -2
- data/vendor/libgit2/src/util/strlist.c +108 -0
- data/vendor/libgit2/src/util/strlist.h +36 -0
- data/vendor/libgit2/src/util/unix/posix.h +0 -2
- data/vendor/libgit2/src/util/unix/process.c +629 -0
- data/vendor/libgit2/src/util/unix/realpath.c +23 -5
- data/vendor/libgit2/src/util/util.c +2 -2
- data/vendor/libgit2/src/util/util.h +4 -38
- data/vendor/libgit2/src/util/vector.c +3 -3
- data/vendor/libgit2/src/util/vector.h +2 -2
- data/vendor/libgit2/src/util/win32/posix_w32.c +29 -6
- data/vendor/libgit2/src/util/win32/process.c +506 -0
- metadata +45 -28
- data/vendor/libgit2/deps/http-parser/CMakeLists.txt +0 -6
- data/vendor/libgit2/deps/http-parser/COPYING +0 -23
- data/vendor/libgit2/deps/http-parser/http_parser.c +0 -2182
- data/vendor/libgit2/deps/http-parser/http_parser.h +0 -305
- data/vendor/libgit2/deps/zlib/COPYING +0 -27
- data/vendor/libgit2/include/git2/sys/reflog.h +0 -21
- data/vendor/libgit2/src/libgit2/config_entries.c +0 -237
- data/vendor/libgit2/src/libgit2/config_entries.h +0 -24
- data/vendor/libgit2/src/libgit2/errors.c +0 -293
- data/vendor/libgit2/src/libgit2/idxmap.c +0 -157
- data/vendor/libgit2/src/libgit2/idxmap.h +0 -177
- data/vendor/libgit2/src/libgit2/libgit2.h +0 -15
- data/vendor/libgit2/src/libgit2/offmap.c +0 -101
- data/vendor/libgit2/src/libgit2/offmap.h +0 -133
- data/vendor/libgit2/src/libgit2/oidmap.c +0 -107
- data/vendor/libgit2/src/libgit2/oidmap.h +0 -128
- data/vendor/libgit2/src/libgit2/threadstate.c +0 -97
- data/vendor/libgit2/src/libgit2/threadstate.h +0 -22
- data/vendor/libgit2/src/libgit2/transports/ssh.h +0 -14
- data/vendor/libgit2/src/util/khash.h +0 -615
- data/vendor/libgit2/src/util/strmap.c +0 -100
- data/vendor/libgit2/src/util/strmap.h +0 -131
- /data/vendor/libgit2/cmake/{FindHTTPParser.cmake → FindHTTP_Parser.cmake} +0 -0
@@ -1,30 +1,30 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
1
|
+
/* The latest version of this library is available on GitHub;
|
2
|
+
* https://github.com/sheredom/utf8.h */
|
3
|
+
|
4
|
+
/* This is free and unencumbered software released into the public domain.
|
5
|
+
*
|
6
|
+
* Anyone is free to copy, modify, publish, use, compile, sell, or
|
7
|
+
* distribute this software, either in source code form or as a compiled
|
8
|
+
* binary, for any purpose, commercial or non-commercial, and by any
|
9
|
+
* means.
|
10
|
+
*
|
11
|
+
* In jurisdictions that recognize copyright laws, the author or authors
|
12
|
+
* of this software dedicate any and all copyright interest in the
|
13
|
+
* software to the public domain. We make this dedication for the benefit
|
14
|
+
* of the public at large and to the detriment of our heirs and
|
15
|
+
* successors. We intend this dedication to be an overt act of
|
16
|
+
* relinquishment in perpetuity of all present and future rights to this
|
17
|
+
* software under copyright law.
|
18
|
+
*
|
19
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
20
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
21
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
22
|
+
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
23
|
+
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
24
|
+
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
25
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
26
|
+
*
|
27
|
+
* For more information, please refer to <http://unlicense.org/> */
|
28
28
|
|
29
29
|
#ifndef SHEREDOM_UTF8_H_INCLUDED
|
30
30
|
#define SHEREDOM_UTF8_H_INCLUDED
|
@@ -32,10 +32,25 @@
|
|
32
32
|
#if defined(_MSC_VER)
|
33
33
|
#pragma warning(push)
|
34
34
|
|
35
|
-
|
35
|
+
/* disable warning: no function prototype given: converting '()' to '(void)' */
|
36
|
+
#pragma warning(disable : 4255)
|
37
|
+
|
38
|
+
/* disable warning: '__cplusplus' is not defined as a preprocessor macro,
|
39
|
+
* replacing with '0' for '#if/#elif' */
|
40
|
+
#pragma warning(disable : 4668)
|
41
|
+
|
42
|
+
/* disable warning: bytes padding added after construct */
|
36
43
|
#pragma warning(disable : 4820)
|
37
44
|
#endif
|
38
45
|
|
46
|
+
#if defined(__cplusplus)
|
47
|
+
#if defined(_MSC_VER)
|
48
|
+
#define utf8_cplusplus _MSVC_LANG
|
49
|
+
#else
|
50
|
+
#define utf8_cplusplus __cplusplus
|
51
|
+
#endif
|
52
|
+
#endif
|
53
|
+
|
39
54
|
#include <stddef.h>
|
40
55
|
#include <stdlib.h>
|
41
56
|
|
@@ -43,7 +58,7 @@
|
|
43
58
|
#pragma warning(pop)
|
44
59
|
#endif
|
45
60
|
|
46
|
-
#if defined(_MSC_VER)
|
61
|
+
#if defined(_MSC_VER) && (_MSC_VER < 1920)
|
47
62
|
typedef __int32 utf8_int32_t;
|
48
63
|
#else
|
49
64
|
#include <stdint.h>
|
@@ -54,411 +69,516 @@ typedef int32_t utf8_int32_t;
|
|
54
69
|
#pragma clang diagnostic push
|
55
70
|
#pragma clang diagnostic ignored "-Wold-style-cast"
|
56
71
|
#pragma clang diagnostic ignored "-Wcast-qual"
|
72
|
+
|
73
|
+
#if __has_warning("-Wunsafe-buffer-usage")
|
74
|
+
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
|
75
|
+
#endif
|
57
76
|
#endif
|
58
77
|
|
59
|
-
#ifdef
|
78
|
+
#ifdef utf8_cplusplus
|
60
79
|
extern "C" {
|
61
80
|
#endif
|
62
81
|
|
63
|
-
#if defined(
|
64
|
-
#define
|
65
|
-
#
|
66
|
-
#define
|
67
|
-
#
|
68
|
-
|
82
|
+
#if defined(__TINYC__)
|
83
|
+
#define UTF8_ATTRIBUTE(a) __attribute((a))
|
84
|
+
#else
|
85
|
+
#define UTF8_ATTRIBUTE(a) __attribute__((a))
|
86
|
+
#endif
|
87
|
+
|
88
|
+
#if defined(_MSC_VER)
|
69
89
|
#define utf8_nonnull
|
70
90
|
#define utf8_pure
|
71
91
|
#define utf8_restrict __restrict
|
72
92
|
#define utf8_weak __inline
|
93
|
+
#elif defined(__clang__) || defined(__GNUC__)
|
94
|
+
#define utf8_nonnull UTF8_ATTRIBUTE(nonnull)
|
95
|
+
#define utf8_pure UTF8_ATTRIBUTE(pure)
|
96
|
+
#define utf8_restrict __restrict__
|
97
|
+
#define utf8_weak UTF8_ATTRIBUTE(weak)
|
98
|
+
#elif defined(__TINYC__)
|
99
|
+
#define utf8_nonnull UTF8_ATTRIBUTE(nonnull)
|
100
|
+
#define utf8_pure UTF8_ATTRIBUTE(pure)
|
101
|
+
#define utf8_restrict
|
102
|
+
#define utf8_weak UTF8_ATTRIBUTE(weak)
|
73
103
|
#else
|
74
|
-
#error Non clang, non gcc, non MSVC compiler found!
|
104
|
+
#error Non clang, non gcc, non MSVC, non tcc compiler found!
|
75
105
|
#endif
|
76
106
|
|
77
|
-
#ifdef
|
107
|
+
#ifdef utf8_cplusplus
|
78
108
|
#define utf8_null NULL
|
79
109
|
#else
|
80
110
|
#define utf8_null 0
|
81
111
|
#endif
|
82
112
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
// Find the first match of the utf8 codepoint chr in the utf8 string src.
|
93
|
-
utf8_nonnull utf8_pure utf8_weak void *utf8chr(const void *src,
|
94
|
-
utf8_int32_t chr);
|
95
|
-
|
96
|
-
// Return less than 0, 0, greater than 0 if src1 < src2,
|
97
|
-
// src1 == src2, src1 > src2 respectively.
|
98
|
-
utf8_nonnull utf8_pure utf8_weak int utf8cmp(const void *src1,
|
99
|
-
const void *src2);
|
100
|
-
|
101
|
-
// Copy the utf8 string src onto the memory allocated in dst.
|
102
|
-
utf8_nonnull utf8_weak void *utf8cpy(void *utf8_restrict dst,
|
103
|
-
const void *utf8_restrict src);
|
104
|
-
|
105
|
-
// Number of utf8 codepoints in the utf8 string src that consists entirely
|
106
|
-
// of utf8 codepoints not from the utf8 string reject.
|
107
|
-
utf8_nonnull utf8_pure utf8_weak size_t utf8cspn(const void *src,
|
108
|
-
const void *reject);
|
109
|
-
|
110
|
-
// Duplicate the utf8 string src by getting its size, malloc'ing a new buffer
|
111
|
-
// copying over the data, and returning that. Or 0 if malloc failed.
|
112
|
-
utf8_nonnull utf8_weak void *utf8dup(const void *src);
|
113
|
-
|
114
|
-
// Number of utf8 codepoints in the utf8 string str,
|
115
|
-
// excluding the null terminating byte.
|
116
|
-
utf8_nonnull utf8_pure utf8_weak size_t utf8len(const void *str);
|
117
|
-
|
118
|
-
// Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
|
119
|
-
// src2 respectively, case insensitive. Checking at most n bytes of each utf8
|
120
|
-
// string.
|
121
|
-
utf8_nonnull utf8_pure utf8_weak int utf8ncasecmp(const void *src1,
|
122
|
-
const void *src2, size_t n);
|
123
|
-
|
124
|
-
// Append the utf8 string src onto the utf8 string dst,
|
125
|
-
// writing at most n+1 bytes. Can produce an invalid utf8
|
126
|
-
// string if n falls partway through a utf8 codepoint.
|
127
|
-
utf8_nonnull utf8_weak void *utf8ncat(void *utf8_restrict dst,
|
128
|
-
const void *utf8_restrict src, size_t n);
|
129
|
-
|
130
|
-
// Return less than 0, 0, greater than 0 if src1 < src2,
|
131
|
-
// src1 == src2, src1 > src2 respectively. Checking at most n
|
132
|
-
// bytes of each utf8 string.
|
133
|
-
utf8_nonnull utf8_pure utf8_weak int utf8ncmp(const void *src1,
|
134
|
-
const void *src2, size_t n);
|
135
|
-
|
136
|
-
// Copy the utf8 string src onto the memory allocated in dst.
|
137
|
-
// Copies at most n bytes. If there is no terminating null byte in
|
138
|
-
// the first n bytes of src, the string placed into dst will not be
|
139
|
-
// null-terminated. If the size (in bytes) of src is less than n,
|
140
|
-
// extra null terminating bytes are appended to dst such that at
|
141
|
-
// total of n bytes are written. Can produce an invalid utf8
|
142
|
-
// string if n falls partway through a utf8 codepoint.
|
143
|
-
utf8_nonnull utf8_weak void *utf8ncpy(void *utf8_restrict dst,
|
144
|
-
const void *utf8_restrict src, size_t n);
|
145
|
-
|
146
|
-
// Similar to utf8dup, except that at most n bytes of src are copied. If src is
|
147
|
-
// longer than n, only n bytes are copied and a null byte is added.
|
148
|
-
//
|
149
|
-
// Returns a new string if successful, 0 otherwise
|
150
|
-
utf8_nonnull utf8_weak void *utf8ndup(const void *src, size_t n);
|
151
|
-
|
152
|
-
// Locates the first occurence in the utf8 string str of any byte in the
|
153
|
-
// utf8 string accept, or 0 if no match was found.
|
154
|
-
utf8_nonnull utf8_pure utf8_weak void *utf8pbrk(const void *str,
|
155
|
-
const void *accept);
|
156
|
-
|
157
|
-
// Find the last match of the utf8 codepoint chr in the utf8 string src.
|
158
|
-
utf8_nonnull utf8_pure utf8_weak void *utf8rchr(const void *src, int chr);
|
159
|
-
|
160
|
-
// Number of bytes in the utf8 string str,
|
161
|
-
// including the null terminating byte.
|
162
|
-
utf8_nonnull utf8_pure utf8_weak size_t utf8size(const void *str);
|
163
|
-
|
164
|
-
// Number of utf8 codepoints in the utf8 string src that consists entirely
|
165
|
-
// of utf8 codepoints from the utf8 string accept.
|
166
|
-
utf8_nonnull utf8_pure utf8_weak size_t utf8spn(const void *src,
|
167
|
-
const void *accept);
|
168
|
-
|
169
|
-
// The position of the utf8 string needle in the utf8 string haystack.
|
170
|
-
utf8_nonnull utf8_pure utf8_weak void *utf8str(const void *haystack,
|
171
|
-
const void *needle);
|
172
|
-
|
173
|
-
// The position of the utf8 string needle in the utf8 string haystack, case
|
174
|
-
// insensitive.
|
175
|
-
utf8_nonnull utf8_pure utf8_weak void *utf8casestr(const void *haystack,
|
176
|
-
const void *needle);
|
177
|
-
|
178
|
-
// Return 0 on success, or the position of the invalid
|
179
|
-
// utf8 codepoint on failure.
|
180
|
-
utf8_nonnull utf8_pure utf8_weak void *utf8valid(const void *str);
|
181
|
-
|
182
|
-
// Sets out_codepoint to the next utf8 codepoint in str, and returns the address
|
183
|
-
// of the utf8 codepoint after the current one in str.
|
184
|
-
utf8_nonnull utf8_weak void *
|
185
|
-
utf8codepoint(const void *utf8_restrict str,
|
186
|
-
utf8_int32_t *utf8_restrict out_codepoint);
|
187
|
-
|
188
|
-
// Returns the size of the given codepoint in bytes.
|
189
|
-
utf8_weak size_t utf8codepointsize(utf8_int32_t chr);
|
190
|
-
|
191
|
-
// Write a codepoint to the given string, and return the address to the next
|
192
|
-
// place after the written codepoint. Pass how many bytes left in the buffer to
|
193
|
-
// n. If there is not enough space for the codepoint, this function returns
|
194
|
-
// null.
|
195
|
-
utf8_nonnull utf8_weak void *utf8catcodepoint(void *utf8_restrict str,
|
196
|
-
utf8_int32_t chr, size_t n);
|
197
|
-
|
198
|
-
// Returns 1 if the given character is lowercase, or 0 if it is not.
|
199
|
-
utf8_weak int utf8islower(utf8_int32_t chr);
|
200
|
-
|
201
|
-
// Returns 1 if the given character is uppercase, or 0 if it is not.
|
202
|
-
utf8_weak int utf8isupper(utf8_int32_t chr);
|
203
|
-
|
204
|
-
// Transform the given string into all lowercase codepoints.
|
205
|
-
utf8_nonnull utf8_weak void utf8lwr(void *utf8_restrict str);
|
113
|
+
#if defined(utf8_cplusplus) && utf8_cplusplus >= 201402L && (!defined(_MSC_VER) || (defined(_MSC_VER) && _MSC_VER >= 1910))
|
114
|
+
#define utf8_constexpr14 constexpr
|
115
|
+
#define utf8_constexpr14_impl constexpr
|
116
|
+
#else
|
117
|
+
/* constexpr and weak are incompatible. so only enable one of them */
|
118
|
+
#define utf8_constexpr14 utf8_weak
|
119
|
+
#define utf8_constexpr14_impl
|
120
|
+
#endif
|
206
121
|
|
207
|
-
|
208
|
-
|
122
|
+
#if defined(utf8_cplusplus) && utf8_cplusplus >= 202002L
|
123
|
+
using utf8_int8_t = char8_t; /* Introduced in C++20 */
|
124
|
+
#else
|
125
|
+
typedef char utf8_int8_t;
|
126
|
+
#endif
|
209
127
|
|
210
|
-
|
211
|
-
|
128
|
+
/* Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
|
129
|
+
* src2 respectively, case insensitive. */
|
130
|
+
utf8_constexpr14 utf8_nonnull utf8_pure int
|
131
|
+
utf8casecmp(const utf8_int8_t *src1, const utf8_int8_t *src2);
|
132
|
+
|
133
|
+
/* Append the utf8 string src onto the utf8 string dst. */
|
134
|
+
utf8_nonnull utf8_weak utf8_int8_t *
|
135
|
+
utf8cat(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src);
|
136
|
+
|
137
|
+
/* Find the first match of the utf8 codepoint chr in the utf8 string src. */
|
138
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
139
|
+
utf8chr(const utf8_int8_t *src, utf8_int32_t chr);
|
140
|
+
|
141
|
+
/* Return less than 0, 0, greater than 0 if src1 < src2,
|
142
|
+
* src1 == src2, src1 > src2 respectively. */
|
143
|
+
utf8_constexpr14 utf8_nonnull utf8_pure int utf8cmp(const utf8_int8_t *src1,
|
144
|
+
const utf8_int8_t *src2);
|
145
|
+
|
146
|
+
/* Copy the utf8 string src onto the memory allocated in dst. */
|
147
|
+
utf8_nonnull utf8_weak utf8_int8_t *
|
148
|
+
utf8cpy(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src);
|
149
|
+
|
150
|
+
/* Number of utf8 codepoints in the utf8 string src that consists entirely
|
151
|
+
* of utf8 codepoints not from the utf8 string reject. */
|
152
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t
|
153
|
+
utf8cspn(const utf8_int8_t *src, const utf8_int8_t *reject);
|
154
|
+
|
155
|
+
/* Duplicate the utf8 string src by getting its size, malloc'ing a new buffer
|
156
|
+
* copying over the data, and returning that. Or 0 if malloc failed. */
|
157
|
+
utf8_weak utf8_int8_t *utf8dup(const utf8_int8_t *src);
|
158
|
+
|
159
|
+
/* Number of utf8 codepoints in the utf8 string str,
|
160
|
+
* excluding the null terminating byte. */
|
161
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8len(const utf8_int8_t *str);
|
162
|
+
|
163
|
+
/* Similar to utf8len, except that only at most n bytes of src are looked. */
|
164
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8nlen(const utf8_int8_t *str,
|
165
|
+
size_t n);
|
166
|
+
|
167
|
+
/* Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
|
168
|
+
* src2 respectively, case insensitive. Checking at most n bytes of each utf8
|
169
|
+
* string. */
|
170
|
+
utf8_constexpr14 utf8_nonnull utf8_pure int
|
171
|
+
utf8ncasecmp(const utf8_int8_t *src1, const utf8_int8_t *src2, size_t n);
|
172
|
+
|
173
|
+
/* Append the utf8 string src onto the utf8 string dst,
|
174
|
+
* writing at most n+1 bytes. Can produce an invalid utf8
|
175
|
+
* string if n falls partway through a utf8 codepoint. */
|
176
|
+
utf8_nonnull utf8_weak utf8_int8_t *
|
177
|
+
utf8ncat(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src,
|
178
|
+
size_t n);
|
179
|
+
|
180
|
+
/* Return less than 0, 0, greater than 0 if src1 < src2,
|
181
|
+
* src1 == src2, src1 > src2 respectively. Checking at most n
|
182
|
+
* bytes of each utf8 string. */
|
183
|
+
utf8_constexpr14 utf8_nonnull utf8_pure int
|
184
|
+
utf8ncmp(const utf8_int8_t *src1, const utf8_int8_t *src2, size_t n);
|
185
|
+
|
186
|
+
/* Copy the utf8 string src onto the memory allocated in dst.
|
187
|
+
* Copies at most n bytes. If n falls partway through a utf8
|
188
|
+
* codepoint, or if dst doesn't have enough room for a null
|
189
|
+
* terminator, the final string will be cut short to preserve
|
190
|
+
* utf8 validity. */
|
191
|
+
|
192
|
+
utf8_nonnull utf8_weak utf8_int8_t *
|
193
|
+
utf8ncpy(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src,
|
194
|
+
size_t n);
|
195
|
+
|
196
|
+
/* Similar to utf8dup, except that at most n bytes of src are copied. If src is
|
197
|
+
* longer than n, only n bytes are copied and a null byte is added.
|
198
|
+
*
|
199
|
+
* Returns a new string if successful, 0 otherwise */
|
200
|
+
utf8_weak utf8_int8_t *utf8ndup(const utf8_int8_t *src, size_t n);
|
201
|
+
|
202
|
+
/* Locates the first occurrence in the utf8 string str of any byte in the
|
203
|
+
* utf8 string accept, or 0 if no match was found. */
|
204
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
205
|
+
utf8pbrk(const utf8_int8_t *str, const utf8_int8_t *accept);
|
206
|
+
|
207
|
+
/* Find the last match of the utf8 codepoint chr in the utf8 string src. */
|
208
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
209
|
+
utf8rchr(const utf8_int8_t *src, int chr);
|
210
|
+
|
211
|
+
/* Number of bytes in the utf8 string str,
|
212
|
+
* including the null terminating byte. */
|
213
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8size(const utf8_int8_t *str);
|
214
|
+
|
215
|
+
/* Similar to utf8size, except that the null terminating byte is excluded. */
|
216
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t
|
217
|
+
utf8size_lazy(const utf8_int8_t *str);
|
218
|
+
|
219
|
+
/* Similar to utf8size, except that only at most n bytes of src are looked and
|
220
|
+
* the null terminating byte is excluded. */
|
221
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t
|
222
|
+
utf8nsize_lazy(const utf8_int8_t *str, size_t n);
|
223
|
+
|
224
|
+
/* Number of utf8 codepoints in the utf8 string src that consists entirely
|
225
|
+
* of utf8 codepoints from the utf8 string accept. */
|
226
|
+
utf8_constexpr14 utf8_nonnull utf8_pure size_t
|
227
|
+
utf8spn(const utf8_int8_t *src, const utf8_int8_t *accept);
|
228
|
+
|
229
|
+
/* The position of the utf8 string needle in the utf8 string haystack. */
|
230
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
231
|
+
utf8str(const utf8_int8_t *haystack, const utf8_int8_t *needle);
|
232
|
+
|
233
|
+
/* The position of the utf8 string needle in the utf8 string haystack, case
|
234
|
+
* insensitive. */
|
235
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
236
|
+
utf8casestr(const utf8_int8_t *haystack, const utf8_int8_t *needle);
|
237
|
+
|
238
|
+
/* Return 0 on success, or the position of the invalid
|
239
|
+
* utf8 codepoint on failure. */
|
240
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
241
|
+
utf8valid(const utf8_int8_t *str);
|
242
|
+
|
243
|
+
/* Similar to utf8valid, except that only at most n bytes of src are looked. */
|
244
|
+
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
|
245
|
+
utf8nvalid(const utf8_int8_t *str, size_t n);
|
246
|
+
|
247
|
+
/* Given a null-terminated string, makes the string valid by replacing invalid
|
248
|
+
* codepoints with a 1-byte replacement. Returns 0 on success. */
|
249
|
+
utf8_nonnull utf8_weak int utf8makevalid(utf8_int8_t *str,
|
250
|
+
const utf8_int32_t replacement);
|
251
|
+
|
252
|
+
/* Sets out_codepoint to the current utf8 codepoint in str, and returns the
|
253
|
+
* address of the next utf8 codepoint after the current one in str. */
|
254
|
+
utf8_constexpr14 utf8_nonnull utf8_int8_t *
|
255
|
+
utf8codepoint(const utf8_int8_t *utf8_restrict str,
|
256
|
+
utf8_int32_t *utf8_restrict out_codepoint);
|
212
257
|
|
213
|
-
|
214
|
-
|
258
|
+
/* Calculates the size of the next utf8 codepoint in str. */
|
259
|
+
utf8_constexpr14 utf8_nonnull size_t
|
260
|
+
utf8codepointcalcsize(const utf8_int8_t *str);
|
261
|
+
|
262
|
+
/* Returns the size of the given codepoint in bytes. */
|
263
|
+
utf8_constexpr14 size_t utf8codepointsize(utf8_int32_t chr);
|
264
|
+
|
265
|
+
/* Write a codepoint to the given string, and return the address to the next
|
266
|
+
* place after the written codepoint. Pass how many bytes left in the buffer to
|
267
|
+
* n. If there is not enough space for the codepoint, this function returns
|
268
|
+
* null. */
|
269
|
+
utf8_nonnull utf8_weak utf8_int8_t *
|
270
|
+
utf8catcodepoint(utf8_int8_t *str, utf8_int32_t chr, size_t n);
|
271
|
+
|
272
|
+
/* Returns 1 if the given character is lowercase, or 0 if it is not. */
|
273
|
+
utf8_constexpr14 int utf8islower(utf8_int32_t chr);
|
274
|
+
|
275
|
+
/* Returns 1 if the given character is uppercase, or 0 if it is not. */
|
276
|
+
utf8_constexpr14 int utf8isupper(utf8_int32_t chr);
|
277
|
+
|
278
|
+
/* Transform the given string into all lowercase codepoints. */
|
279
|
+
utf8_nonnull utf8_weak void utf8lwr(utf8_int8_t *utf8_restrict str);
|
280
|
+
|
281
|
+
/* Transform the given string into all uppercase codepoints. */
|
282
|
+
utf8_nonnull utf8_weak void utf8upr(utf8_int8_t *utf8_restrict str);
|
283
|
+
|
284
|
+
/* Make a codepoint lower case if possible. */
|
285
|
+
utf8_constexpr14 utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp);
|
286
|
+
|
287
|
+
/* Make a codepoint upper case if possible. */
|
288
|
+
utf8_constexpr14 utf8_int32_t utf8uprcodepoint(utf8_int32_t cp);
|
289
|
+
|
290
|
+
/* Sets out_codepoint to the current utf8 codepoint in str, and returns the
|
291
|
+
* address of the previous utf8 codepoint before the current one in str. */
|
292
|
+
utf8_constexpr14 utf8_nonnull utf8_int8_t *
|
293
|
+
utf8rcodepoint(const utf8_int8_t *utf8_restrict str,
|
294
|
+
utf8_int32_t *utf8_restrict out_codepoint);
|
295
|
+
|
296
|
+
/* Duplicate the utf8 string src by getting its size, calling alloc_func_ptr to
|
297
|
+
* copy over data to a new buffer, and returning that. Or 0 if alloc_func_ptr
|
298
|
+
* returned null. */
|
299
|
+
utf8_weak utf8_int8_t *utf8dup_ex(const utf8_int8_t *src,
|
300
|
+
utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *,
|
301
|
+
size_t),
|
302
|
+
utf8_int8_t *user_data);
|
303
|
+
|
304
|
+
/* Similar to utf8dup, except that at most n bytes of src are copied. If src is
|
305
|
+
* longer than n, only n bytes are copied and a null byte is added.
|
306
|
+
*
|
307
|
+
* Returns a new string if successful, 0 otherwise. */
|
308
|
+
utf8_weak utf8_int8_t *utf8ndup_ex(const utf8_int8_t *src, size_t n,
|
309
|
+
utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *,
|
310
|
+
size_t),
|
311
|
+
utf8_int8_t *user_data);
|
215
312
|
|
216
313
|
#undef utf8_weak
|
217
314
|
#undef utf8_pure
|
218
315
|
#undef utf8_nonnull
|
219
316
|
|
220
|
-
int utf8casecmp(const
|
221
|
-
|
317
|
+
utf8_constexpr14_impl int utf8casecmp(const utf8_int8_t *src1,
|
318
|
+
const utf8_int8_t *src2) {
|
319
|
+
utf8_int32_t src1_lwr_cp = 0, src2_lwr_cp = 0, src1_upr_cp = 0,
|
320
|
+
src2_upr_cp = 0, src1_orig_cp = 0, src2_orig_cp = 0;
|
222
321
|
|
223
322
|
for (;;) {
|
224
|
-
src1 = utf8codepoint(src1, &
|
225
|
-
src2 = utf8codepoint(src2, &
|
323
|
+
src1 = utf8codepoint(src1, &src1_orig_cp);
|
324
|
+
src2 = utf8codepoint(src2, &src2_orig_cp);
|
226
325
|
|
227
|
-
|
228
|
-
|
229
|
-
|
326
|
+
/* lower the srcs if required */
|
327
|
+
src1_lwr_cp = utf8lwrcodepoint(src1_orig_cp);
|
328
|
+
src2_lwr_cp = utf8lwrcodepoint(src2_orig_cp);
|
230
329
|
|
231
|
-
|
232
|
-
|
233
|
-
|
330
|
+
/* lower the srcs if required */
|
331
|
+
src1_upr_cp = utf8uprcodepoint(src1_orig_cp);
|
332
|
+
src2_upr_cp = utf8uprcodepoint(src2_orig_cp);
|
234
333
|
|
235
|
-
|
334
|
+
/* check if the lowered codepoints match */
|
236
335
|
if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) {
|
237
336
|
return 0;
|
238
|
-
} else if (
|
337
|
+
} else if ((src1_lwr_cp == src2_lwr_cp) || (src1_upr_cp == src2_upr_cp)) {
|
239
338
|
continue;
|
240
339
|
}
|
241
340
|
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
} else if (src1_orig_cp > src2_orig_cp) {
|
246
|
-
return 1;
|
247
|
-
}
|
341
|
+
/* if they don't match, then we return the difference between the characters
|
342
|
+
*/
|
343
|
+
return src1_lwr_cp - src2_lwr_cp;
|
248
344
|
}
|
249
345
|
}
|
250
346
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
// find the null terminating byte in dst
|
347
|
+
utf8_int8_t *utf8cat(utf8_int8_t *utf8_restrict dst,
|
348
|
+
const utf8_int8_t *utf8_restrict src) {
|
349
|
+
utf8_int8_t *d = dst;
|
350
|
+
/* find the null terminating byte in dst */
|
256
351
|
while ('\0' != *d) {
|
257
352
|
d++;
|
258
353
|
}
|
259
354
|
|
260
|
-
|
261
|
-
while ('\0' != *
|
262
|
-
*d++ = *
|
355
|
+
/* overwriting the null terminating byte in dst, append src byte-by-byte */
|
356
|
+
while ('\0' != *src) {
|
357
|
+
*d++ = *src++;
|
263
358
|
}
|
264
359
|
|
265
|
-
|
360
|
+
/* write out a new null terminating byte into dst */
|
266
361
|
*d = '\0';
|
267
362
|
|
268
363
|
return dst;
|
269
364
|
}
|
270
365
|
|
271
|
-
|
272
|
-
|
366
|
+
utf8_constexpr14_impl utf8_int8_t *utf8chr(const utf8_int8_t *src,
|
367
|
+
utf8_int32_t chr) {
|
368
|
+
utf8_int8_t c[5] = {'\0', '\0', '\0', '\0', '\0'};
|
273
369
|
|
274
370
|
if (0 == chr) {
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
s++;
|
371
|
+
/* being asked to return position of null terminating byte, so
|
372
|
+
* just run s to the end, and return! */
|
373
|
+
while ('\0' != *src) {
|
374
|
+
src++;
|
280
375
|
}
|
281
|
-
return (
|
376
|
+
return (utf8_int8_t *)src;
|
282
377
|
} else if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
|
283
|
-
|
284
|
-
|
285
|
-
c[0] = (
|
378
|
+
/* 1-byte/7-bit ascii
|
379
|
+
* (0b0xxxxxxx) */
|
380
|
+
c[0] = (utf8_int8_t)chr;
|
286
381
|
} else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
|
287
|
-
|
288
|
-
|
289
|
-
c[0] = 0xc0 | (
|
290
|
-
c[1] = 0x80 | (
|
382
|
+
/* 2-byte/11-bit utf8 code point
|
383
|
+
* (0b110xxxxx 0b10xxxxxx) */
|
384
|
+
c[0] = (utf8_int8_t)(0xc0 | (utf8_int8_t)(chr >> 6));
|
385
|
+
c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
291
386
|
} else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
|
292
|
-
|
293
|
-
|
294
|
-
c[0] = 0xe0 | (
|
295
|
-
c[1] = 0x80 | (
|
296
|
-
c[2] = 0x80 | (
|
297
|
-
} else {
|
298
|
-
|
299
|
-
|
300
|
-
c[0] = 0xf0 | (
|
301
|
-
c[1] = 0x80 | (
|
302
|
-
c[2] = 0x80 | (
|
303
|
-
c[3] = 0x80 | (
|
387
|
+
/* 3-byte/16-bit utf8 code point
|
388
|
+
* (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) */
|
389
|
+
c[0] = (utf8_int8_t)(0xe0 | (utf8_int8_t)(chr >> 12));
|
390
|
+
c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
|
391
|
+
c[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
392
|
+
} else { /* if (0 == ((int)0xffe00000 & chr)) { */
|
393
|
+
/* 4-byte/21-bit utf8 code point
|
394
|
+
* (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx) */
|
395
|
+
c[0] = (utf8_int8_t)(0xf0 | (utf8_int8_t)(chr >> 18));
|
396
|
+
c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 12) & 0x3f));
|
397
|
+
c[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
|
398
|
+
c[3] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
304
399
|
}
|
305
400
|
|
306
|
-
|
307
|
-
|
308
|
-
|
401
|
+
/* we've made c into a 2 utf8 codepoint string, one for the chr we are
|
402
|
+
* seeking, another for the null terminating byte. Now use utf8str to
|
403
|
+
* search */
|
309
404
|
return utf8str(src, c);
|
310
405
|
}
|
311
406
|
|
312
|
-
int utf8cmp(const
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
while (('\0' != *s1) || ('\0' != *s2)) {
|
317
|
-
if (*s1 < *s2) {
|
407
|
+
utf8_constexpr14_impl int utf8cmp(const utf8_int8_t *src1,
|
408
|
+
const utf8_int8_t *src2) {
|
409
|
+
while (('\0' != *src1) || ('\0' != *src2)) {
|
410
|
+
if (*src1 < *src2) {
|
318
411
|
return -1;
|
319
|
-
} else if (*
|
412
|
+
} else if (*src1 > *src2) {
|
320
413
|
return 1;
|
321
414
|
}
|
322
415
|
|
323
|
-
|
324
|
-
|
416
|
+
src1++;
|
417
|
+
src2++;
|
325
418
|
}
|
326
419
|
|
327
|
-
|
420
|
+
/* both utf8 strings matched */
|
328
421
|
return 0;
|
329
422
|
}
|
330
423
|
|
331
|
-
int utf8coll(const
|
424
|
+
utf8_constexpr14_impl int utf8coll(const utf8_int8_t *src1,
|
425
|
+
const utf8_int8_t *src2);
|
332
426
|
|
333
|
-
|
334
|
-
|
335
|
-
|
427
|
+
utf8_int8_t *utf8cpy(utf8_int8_t *utf8_restrict dst,
|
428
|
+
const utf8_int8_t *utf8_restrict src) {
|
429
|
+
utf8_int8_t *d = dst;
|
336
430
|
|
337
|
-
|
338
|
-
|
339
|
-
while ('\0' != *
|
340
|
-
*d++ = *
|
431
|
+
/* overwriting anything previously in dst, write byte-by-byte
|
432
|
+
* from src */
|
433
|
+
while ('\0' != *src) {
|
434
|
+
*d++ = *src++;
|
341
435
|
}
|
342
436
|
|
343
|
-
|
437
|
+
/* append null terminating byte */
|
344
438
|
*d = '\0';
|
345
439
|
|
346
440
|
return dst;
|
347
441
|
}
|
348
442
|
|
349
|
-
size_t utf8cspn(const
|
350
|
-
|
443
|
+
utf8_constexpr14_impl size_t utf8cspn(const utf8_int8_t *src,
|
444
|
+
const utf8_int8_t *reject) {
|
351
445
|
size_t chars = 0;
|
352
446
|
|
353
|
-
while ('\0' != *
|
354
|
-
const
|
447
|
+
while ('\0' != *src) {
|
448
|
+
const utf8_int8_t *r = reject;
|
355
449
|
size_t offset = 0;
|
356
450
|
|
357
451
|
while ('\0' != *r) {
|
358
|
-
|
359
|
-
|
360
|
-
|
452
|
+
/* checking that if *r is the start of a utf8 codepoint
|
453
|
+
* (it is not 0b10xxxxxx) and we have successfully matched
|
454
|
+
* a previous character (0 < offset) - we found a match */
|
361
455
|
if ((0x80 != (0xc0 & *r)) && (0 < offset)) {
|
362
456
|
return chars;
|
363
457
|
} else {
|
364
|
-
if (*r ==
|
365
|
-
|
366
|
-
|
458
|
+
if (*r == src[offset]) {
|
459
|
+
/* part of a utf8 codepoint matched, so move our checking
|
460
|
+
* onwards to the next byte */
|
367
461
|
offset++;
|
368
462
|
r++;
|
369
463
|
} else {
|
370
|
-
|
371
|
-
|
464
|
+
/* r could be in the middle of an unmatching utf8 code point,
|
465
|
+
* so we need to march it on to the next character beginning, */
|
372
466
|
|
373
467
|
do {
|
374
468
|
r++;
|
375
469
|
} while (0x80 == (0xc0 & *r));
|
376
470
|
|
377
|
-
|
471
|
+
/* reset offset too as we found a mismatch */
|
378
472
|
offset = 0;
|
379
473
|
}
|
380
474
|
}
|
381
475
|
}
|
382
476
|
|
383
|
-
|
384
|
-
|
385
|
-
|
477
|
+
/* found a match at the end of *r, so didn't get a chance to test it */
|
478
|
+
if (0 < offset) {
|
479
|
+
return chars;
|
480
|
+
}
|
481
|
+
|
482
|
+
/* the current utf8 codepoint in src did not match reject, but src
|
483
|
+
* could have been partway through a utf8 codepoint, so we need to
|
484
|
+
* march it onto the next utf8 codepoint starting byte */
|
386
485
|
do {
|
387
|
-
|
388
|
-
} while ((0x80 == (0xc0 & *
|
486
|
+
src++;
|
487
|
+
} while ((0x80 == (0xc0 & *src)));
|
389
488
|
chars++;
|
390
489
|
}
|
391
490
|
|
392
491
|
return chars;
|
393
492
|
}
|
394
493
|
|
395
|
-
|
494
|
+
utf8_int8_t *utf8dup(const utf8_int8_t *src) {
|
495
|
+
return utf8dup_ex(src, utf8_null, utf8_null);
|
496
|
+
}
|
396
497
|
|
397
|
-
|
398
|
-
|
399
|
-
|
498
|
+
utf8_int8_t *utf8dup_ex(const utf8_int8_t *src,
|
499
|
+
utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *, size_t),
|
500
|
+
utf8_int8_t *user_data) {
|
501
|
+
utf8_int8_t *n = utf8_null;
|
400
502
|
|
401
|
-
|
503
|
+
/* figure out how many bytes (including the terminator) we need to copy first
|
504
|
+
*/
|
402
505
|
size_t bytes = utf8size(src);
|
403
506
|
|
404
|
-
|
507
|
+
if (alloc_func_ptr) {
|
508
|
+
n = alloc_func_ptr(user_data, bytes);
|
509
|
+
} else {
|
510
|
+
#if !defined(UTF8_NO_STD_MALLOC)
|
511
|
+
n = (utf8_int8_t *)malloc(bytes);
|
512
|
+
#else
|
513
|
+
return utf8_null;
|
514
|
+
#endif
|
515
|
+
}
|
405
516
|
|
406
517
|
if (utf8_null == n) {
|
407
|
-
|
518
|
+
/* out of memory so we bail */
|
408
519
|
return utf8_null;
|
409
520
|
} else {
|
410
521
|
bytes = 0;
|
411
522
|
|
412
|
-
|
413
|
-
while ('\0' !=
|
414
|
-
n[bytes] =
|
523
|
+
/* copy src byte-by-byte into our new utf8 string */
|
524
|
+
while ('\0' != src[bytes]) {
|
525
|
+
n[bytes] = src[bytes];
|
415
526
|
bytes++;
|
416
527
|
}
|
417
528
|
|
418
|
-
|
529
|
+
/* append null terminating byte */
|
419
530
|
n[bytes] = '\0';
|
420
531
|
return n;
|
421
532
|
}
|
422
533
|
}
|
423
534
|
|
424
|
-
|
535
|
+
utf8_constexpr14_impl utf8_int8_t *utf8fry(const utf8_int8_t *str);
|
536
|
+
|
537
|
+
utf8_constexpr14_impl size_t utf8len(const utf8_int8_t *str) {
|
538
|
+
return utf8nlen(str, SIZE_MAX);
|
539
|
+
}
|
425
540
|
|
426
|
-
size_t
|
427
|
-
const
|
541
|
+
utf8_constexpr14_impl size_t utf8nlen(const utf8_int8_t *str, size_t n) {
|
542
|
+
const utf8_int8_t *t = str;
|
428
543
|
size_t length = 0;
|
429
544
|
|
430
|
-
while ('\0' != *
|
431
|
-
if (0xf0 == (0xf8 & *
|
432
|
-
|
433
|
-
|
434
|
-
} else if (0xe0 == (0xf0 & *
|
435
|
-
|
436
|
-
|
437
|
-
} else if (0xc0 == (0xe0 & *
|
438
|
-
|
439
|
-
|
440
|
-
} else {
|
441
|
-
|
442
|
-
|
545
|
+
while ((size_t)(str - t) < n && '\0' != *str) {
|
546
|
+
if (0xf0 == (0xf8 & *str)) {
|
547
|
+
/* 4-byte utf8 code point (began with 0b11110xxx) */
|
548
|
+
str += 4;
|
549
|
+
} else if (0xe0 == (0xf0 & *str)) {
|
550
|
+
/* 3-byte utf8 code point (began with 0b1110xxxx) */
|
551
|
+
str += 3;
|
552
|
+
} else if (0xc0 == (0xe0 & *str)) {
|
553
|
+
/* 2-byte utf8 code point (began with 0b110xxxxx) */
|
554
|
+
str += 2;
|
555
|
+
} else { /* if (0x00 == (0x80 & *s)) { */
|
556
|
+
/* 1-byte ascii (began with 0b0xxxxxxx) */
|
557
|
+
str += 1;
|
443
558
|
}
|
444
559
|
|
445
|
-
|
446
|
-
|
560
|
+
/* no matter the bytes we marched s forward by, it was
|
561
|
+
* only 1 utf8 codepoint */
|
447
562
|
length++;
|
448
563
|
}
|
449
564
|
|
565
|
+
if ((size_t)(str - t) > n) {
|
566
|
+
length--;
|
567
|
+
}
|
450
568
|
return length;
|
451
569
|
}
|
452
570
|
|
453
|
-
int utf8ncasecmp(const
|
454
|
-
|
571
|
+
utf8_constexpr14_impl int utf8ncasecmp(const utf8_int8_t *src1,
|
572
|
+
const utf8_int8_t *src2, size_t n) {
|
573
|
+
utf8_int32_t src1_lwr_cp = 0, src2_lwr_cp = 0, src1_upr_cp = 0,
|
574
|
+
src2_upr_cp = 0, src1_orig_cp = 0, src2_orig_cp = 0;
|
455
575
|
|
456
576
|
do {
|
457
|
-
const
|
458
|
-
const
|
577
|
+
const utf8_int8_t *const s1 = src1;
|
578
|
+
const utf8_int8_t *const s2 = src2;
|
459
579
|
|
460
|
-
|
461
|
-
|
580
|
+
/* first check that we have enough bytes left in n to contain an entire
|
581
|
+
* codepoint */
|
462
582
|
if (0 == n) {
|
463
583
|
return 0;
|
464
584
|
}
|
@@ -467,10 +587,8 @@ int utf8ncasecmp(const void *src1, const void *src2, size_t n) {
|
|
467
587
|
const utf8_int32_t c1 = (0xe0 & *s1);
|
468
588
|
const utf8_int32_t c2 = (0xe0 & *s2);
|
469
589
|
|
470
|
-
if (c1
|
471
|
-
return -
|
472
|
-
} else if (c1 > c2) {
|
473
|
-
return 1;
|
590
|
+
if (c1 != c2) {
|
591
|
+
return c1 - c2;
|
474
592
|
} else {
|
475
593
|
return 0;
|
476
594
|
}
|
@@ -480,10 +598,8 @@ int utf8ncasecmp(const void *src1, const void *src2, size_t n) {
|
|
480
598
|
const utf8_int32_t c1 = (0xf0 & *s1);
|
481
599
|
const utf8_int32_t c2 = (0xf0 & *s2);
|
482
600
|
|
483
|
-
if (c1
|
484
|
-
return -
|
485
|
-
} else if (c1 > c2) {
|
486
|
-
return 1;
|
601
|
+
if (c1 != c2) {
|
602
|
+
return c1 - c2;
|
487
603
|
} else {
|
488
604
|
return 0;
|
489
605
|
}
|
@@ -493,307 +609,343 @@ int utf8ncasecmp(const void *src1, const void *src2, size_t n) {
|
|
493
609
|
const utf8_int32_t c1 = (0xf8 & *s1);
|
494
610
|
const utf8_int32_t c2 = (0xf8 & *s2);
|
495
611
|
|
496
|
-
if (c1
|
497
|
-
return -
|
498
|
-
} else if (c1 > c2) {
|
499
|
-
return 1;
|
612
|
+
if (c1 != c2) {
|
613
|
+
return c1 - c2;
|
500
614
|
} else {
|
501
615
|
return 0;
|
502
616
|
}
|
503
617
|
}
|
504
618
|
|
505
|
-
src1 = utf8codepoint(src1, &
|
506
|
-
src2 = utf8codepoint(src2, &
|
507
|
-
n -= utf8codepointsize(
|
619
|
+
src1 = utf8codepoint(src1, &src1_orig_cp);
|
620
|
+
src2 = utf8codepoint(src2, &src2_orig_cp);
|
621
|
+
n -= utf8codepointsize(src1_orig_cp);
|
508
622
|
|
509
|
-
|
510
|
-
|
511
|
-
src2_orig_cp = src2_cp;
|
623
|
+
src1_lwr_cp = utf8lwrcodepoint(src1_orig_cp);
|
624
|
+
src2_lwr_cp = utf8lwrcodepoint(src2_orig_cp);
|
512
625
|
|
513
|
-
|
514
|
-
|
515
|
-
src2_cp = utf8lwrcodepoint(src2_cp);
|
626
|
+
src1_upr_cp = utf8uprcodepoint(src1_orig_cp);
|
627
|
+
src2_upr_cp = utf8uprcodepoint(src2_orig_cp);
|
516
628
|
|
517
|
-
|
629
|
+
/* check if the lowered codepoints match */
|
518
630
|
if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) {
|
519
631
|
return 0;
|
520
|
-
} else if (
|
632
|
+
} else if ((src1_lwr_cp == src2_lwr_cp) || (src1_upr_cp == src2_upr_cp)) {
|
521
633
|
continue;
|
522
634
|
}
|
523
635
|
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
} else if (src1_orig_cp > src2_orig_cp) {
|
528
|
-
return 1;
|
529
|
-
}
|
636
|
+
/* if they don't match, then we return the difference between the characters
|
637
|
+
*/
|
638
|
+
return src1_lwr_cp - src2_lwr_cp;
|
530
639
|
} while (0 < n);
|
531
640
|
|
532
|
-
|
641
|
+
/* both utf8 strings matched */
|
533
642
|
return 0;
|
534
643
|
}
|
535
644
|
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
const char *s = (const char *)src;
|
645
|
+
utf8_int8_t *utf8ncat(utf8_int8_t *utf8_restrict dst,
|
646
|
+
const utf8_int8_t *utf8_restrict src, size_t n) {
|
647
|
+
utf8_int8_t *d = dst;
|
540
648
|
|
541
|
-
|
649
|
+
/* find the null terminating byte in dst */
|
542
650
|
while ('\0' != *d) {
|
543
651
|
d++;
|
544
652
|
}
|
545
653
|
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
*d++ = *
|
550
|
-
}
|
654
|
+
/* overwriting the null terminating byte in dst, append src byte-by-byte
|
655
|
+
* stopping if we run out of space */
|
656
|
+
while (('\0' != *src) && (0 != n--)) {
|
657
|
+
*d++ = *src++;
|
658
|
+
}
|
551
659
|
|
552
|
-
|
660
|
+
/* write out a new null terminating byte into dst */
|
553
661
|
*d = '\0';
|
554
662
|
|
555
663
|
return dst;
|
556
664
|
}
|
557
665
|
|
558
|
-
int utf8ncmp(const
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
while ((('\0' != *s1) || ('\0' != *s2)) && (0 != n--)) {
|
563
|
-
if (*s1 < *s2) {
|
666
|
+
utf8_constexpr14_impl int utf8ncmp(const utf8_int8_t *src1,
|
667
|
+
const utf8_int8_t *src2, size_t n) {
|
668
|
+
while ((0 != n--) && (('\0' != *src1) || ('\0' != *src2))) {
|
669
|
+
if (*src1 < *src2) {
|
564
670
|
return -1;
|
565
|
-
} else if (*
|
671
|
+
} else if (*src1 > *src2) {
|
566
672
|
return 1;
|
567
673
|
}
|
568
674
|
|
569
|
-
|
570
|
-
|
675
|
+
src1++;
|
676
|
+
src2++;
|
571
677
|
}
|
572
678
|
|
573
|
-
|
679
|
+
/* both utf8 strings matched */
|
574
680
|
return 0;
|
575
681
|
}
|
576
682
|
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
683
|
+
utf8_int8_t *utf8ncpy(utf8_int8_t *utf8_restrict dst,
|
684
|
+
const utf8_int8_t *utf8_restrict src, size_t n) {
|
685
|
+
utf8_int8_t *d = dst;
|
686
|
+
size_t index = 0, check_index = 0;
|
581
687
|
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
*d++ = *s++;
|
586
|
-
} while (('\0' != *s) && (0 != --n));
|
688
|
+
if (n == 0) {
|
689
|
+
return dst;
|
690
|
+
}
|
587
691
|
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
692
|
+
/* overwriting anything previously in dst, write byte-by-byte
|
693
|
+
* from src */
|
694
|
+
for (index = 0; index < n; index++) {
|
695
|
+
d[index] = src[index];
|
696
|
+
if ('\0' == src[index]) {
|
697
|
+
break;
|
698
|
+
}
|
699
|
+
}
|
700
|
+
|
701
|
+
for (check_index = index - 1;
|
702
|
+
check_index > 0 && 0x80 == (0xc0 & d[check_index]); check_index--) {
|
703
|
+
/* just moving the index */
|
704
|
+
}
|
705
|
+
|
706
|
+
if (check_index < index &&
|
707
|
+
((index - check_index) < utf8codepointcalcsize(&d[check_index]) ||
|
708
|
+
(index - check_index) == n)) {
|
709
|
+
index = check_index;
|
710
|
+
}
|
711
|
+
|
712
|
+
/* append null terminating byte */
|
713
|
+
for (; index < n; index++) {
|
714
|
+
d[index] = 0;
|
592
715
|
}
|
593
716
|
|
594
717
|
return dst;
|
595
718
|
}
|
596
719
|
|
597
|
-
|
598
|
-
|
599
|
-
|
720
|
+
utf8_int8_t *utf8ndup(const utf8_int8_t *src, size_t n) {
|
721
|
+
return utf8ndup_ex(src, n, utf8_null, utf8_null);
|
722
|
+
}
|
723
|
+
|
724
|
+
utf8_int8_t *utf8ndup_ex(const utf8_int8_t *src, size_t n,
|
725
|
+
utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *, size_t),
|
726
|
+
utf8_int8_t *user_data) {
|
727
|
+
utf8_int8_t *c = utf8_null;
|
600
728
|
size_t bytes = 0;
|
601
729
|
|
602
|
-
|
603
|
-
while ('\0' !=
|
730
|
+
/* Find the end of the string or stop when n is reached */
|
731
|
+
while ('\0' != src[bytes] && bytes < n) {
|
604
732
|
bytes++;
|
605
733
|
}
|
606
734
|
|
607
|
-
|
608
|
-
|
735
|
+
/* In case bytes is actually less than n, we need to set it
|
736
|
+
* to be used later in the copy byte by byte. */
|
609
737
|
n = bytes;
|
610
738
|
|
611
|
-
|
739
|
+
if (alloc_func_ptr) {
|
740
|
+
c = alloc_func_ptr(user_data, bytes + 1);
|
741
|
+
} else {
|
742
|
+
#if !defined(UTF8_NO_STD_MALLOC)
|
743
|
+
c = (utf8_int8_t *)malloc(bytes + 1);
|
744
|
+
#else
|
745
|
+
c = utf8_null;
|
746
|
+
#endif
|
747
|
+
}
|
748
|
+
|
612
749
|
if (utf8_null == c) {
|
613
|
-
|
750
|
+
/* out of memory so we bail */
|
614
751
|
return utf8_null;
|
615
752
|
}
|
616
753
|
|
617
754
|
bytes = 0;
|
618
755
|
|
619
|
-
|
620
|
-
while ('\0' !=
|
621
|
-
c[bytes] =
|
756
|
+
/* copy src byte-by-byte into our new utf8 string */
|
757
|
+
while ('\0' != src[bytes] && bytes < n) {
|
758
|
+
c[bytes] = src[bytes];
|
622
759
|
bytes++;
|
623
760
|
}
|
624
761
|
|
625
|
-
|
762
|
+
/* append null terminating byte */
|
626
763
|
c[bytes] = '\0';
|
627
764
|
return c;
|
628
765
|
}
|
629
766
|
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
767
|
+
utf8_constexpr14_impl utf8_int8_t *utf8rchr(const utf8_int8_t *src, int chr) {
|
768
|
+
|
769
|
+
utf8_int8_t *match = utf8_null;
|
770
|
+
utf8_int8_t c[5] = {'\0', '\0', '\0', '\0', '\0'};
|
634
771
|
|
635
772
|
if (0 == chr) {
|
636
|
-
|
637
|
-
|
638
|
-
while ('\0' != *
|
639
|
-
|
773
|
+
/* being asked to return position of null terminating byte, so
|
774
|
+
* just run s to the end, and return! */
|
775
|
+
while ('\0' != *src) {
|
776
|
+
src++;
|
640
777
|
}
|
641
|
-
return (
|
778
|
+
return (utf8_int8_t *)src;
|
642
779
|
} else if (0 == ((int)0xffffff80 & chr)) {
|
643
|
-
|
644
|
-
|
645
|
-
c[0] = (
|
780
|
+
/* 1-byte/7-bit ascii
|
781
|
+
* (0b0xxxxxxx) */
|
782
|
+
c[0] = (utf8_int8_t)chr;
|
646
783
|
} else if (0 == ((int)0xfffff800 & chr)) {
|
647
|
-
|
648
|
-
|
649
|
-
c[0] = 0xc0 | (
|
650
|
-
c[1] = 0x80 | (
|
784
|
+
/* 2-byte/11-bit utf8 code point
|
785
|
+
* (0b110xxxxx 0b10xxxxxx) */
|
786
|
+
c[0] = (utf8_int8_t)(0xc0 | (utf8_int8_t)(chr >> 6));
|
787
|
+
c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
651
788
|
} else if (0 == ((int)0xffff0000 & chr)) {
|
652
|
-
|
653
|
-
|
654
|
-
c[0] = 0xe0 | (
|
655
|
-
c[1] = 0x80 | (
|
656
|
-
c[2] = 0x80 | (
|
657
|
-
} else {
|
658
|
-
|
659
|
-
|
660
|
-
c[0] = 0xf0 | (
|
661
|
-
c[1] = 0x80 | (
|
662
|
-
c[2] = 0x80 | (
|
663
|
-
c[3] = 0x80 | (
|
789
|
+
/* 3-byte/16-bit utf8 code point
|
790
|
+
* (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) */
|
791
|
+
c[0] = (utf8_int8_t)(0xe0 | (utf8_int8_t)(chr >> 12));
|
792
|
+
c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
|
793
|
+
c[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
794
|
+
} else { /* if (0 == ((int)0xffe00000 & chr)) { */
|
795
|
+
/* 4-byte/21-bit utf8 code point
|
796
|
+
* (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx) */
|
797
|
+
c[0] = (utf8_int8_t)(0xf0 | (utf8_int8_t)(chr >> 18));
|
798
|
+
c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 12) & 0x3f));
|
799
|
+
c[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
|
800
|
+
c[3] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
664
801
|
}
|
665
802
|
|
666
|
-
|
667
|
-
|
668
|
-
|
803
|
+
/* we've created a 2 utf8 codepoint string in c that is
|
804
|
+
* the utf8 character asked for by chr, and a null
|
805
|
+
* terminating byte */
|
669
806
|
|
670
|
-
while ('\0' != *
|
807
|
+
while ('\0' != *src) {
|
671
808
|
size_t offset = 0;
|
672
809
|
|
673
|
-
while (
|
810
|
+
while ((src[offset] == c[offset]) && ('\0' != src[offset])) {
|
674
811
|
offset++;
|
675
812
|
}
|
676
813
|
|
677
814
|
if ('\0' == c[offset]) {
|
678
|
-
|
679
|
-
match =
|
680
|
-
|
815
|
+
/* we found a matching utf8 code point */
|
816
|
+
match = (utf8_int8_t *)src;
|
817
|
+
src += offset;
|
818
|
+
|
819
|
+
if ('\0' == *src) {
|
820
|
+
break;
|
821
|
+
}
|
681
822
|
} else {
|
682
|
-
|
823
|
+
src += offset;
|
683
824
|
|
684
|
-
|
685
|
-
|
686
|
-
if ('\0' != *
|
825
|
+
/* need to march s along to next utf8 codepoint start
|
826
|
+
* (the next byte that doesn't match 0b10xxxxxx) */
|
827
|
+
if ('\0' != *src) {
|
687
828
|
do {
|
688
|
-
|
689
|
-
} while (0x80 == (0xc0 & *
|
829
|
+
src++;
|
830
|
+
} while (0x80 == (0xc0 & *src));
|
690
831
|
}
|
691
832
|
}
|
692
833
|
}
|
693
834
|
|
694
|
-
|
695
|
-
return
|
835
|
+
/* return the last match we found (or 0 if no match was found) */
|
836
|
+
return match;
|
696
837
|
}
|
697
838
|
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
const char *a = (const char *)accept;
|
839
|
+
utf8_constexpr14_impl utf8_int8_t *utf8pbrk(const utf8_int8_t *str,
|
840
|
+
const utf8_int8_t *accept) {
|
841
|
+
while ('\0' != *str) {
|
842
|
+
const utf8_int8_t *a = accept;
|
703
843
|
size_t offset = 0;
|
704
844
|
|
705
845
|
while ('\0' != *a) {
|
706
|
-
|
707
|
-
|
708
|
-
|
846
|
+
/* checking that if *a is the start of a utf8 codepoint
|
847
|
+
* (it is not 0b10xxxxxx) and we have successfully matched
|
848
|
+
* a previous character (0 < offset) - we found a match */
|
709
849
|
if ((0x80 != (0xc0 & *a)) && (0 < offset)) {
|
710
|
-
return (
|
850
|
+
return (utf8_int8_t *)str;
|
711
851
|
} else {
|
712
|
-
if (*a ==
|
713
|
-
|
714
|
-
|
852
|
+
if (*a == str[offset]) {
|
853
|
+
/* part of a utf8 codepoint matched, so move our checking
|
854
|
+
* onwards to the next byte */
|
715
855
|
offset++;
|
716
856
|
a++;
|
717
857
|
} else {
|
718
|
-
|
719
|
-
|
858
|
+
/* r could be in the middle of an unmatching utf8 code point,
|
859
|
+
* so we need to march it on to the next character beginning, */
|
720
860
|
|
721
861
|
do {
|
722
862
|
a++;
|
723
863
|
} while (0x80 == (0xc0 & *a));
|
724
864
|
|
725
|
-
|
865
|
+
/* reset offset too as we found a mismatch */
|
726
866
|
offset = 0;
|
727
867
|
}
|
728
868
|
}
|
729
869
|
}
|
730
870
|
|
731
|
-
|
871
|
+
/* we found a match on the last utf8 codepoint */
|
732
872
|
if (0 < offset) {
|
733
|
-
return (
|
873
|
+
return (utf8_int8_t *)str;
|
734
874
|
}
|
735
875
|
|
736
|
-
|
737
|
-
|
738
|
-
|
876
|
+
/* the current utf8 codepoint in src did not match accept, but src
|
877
|
+
* could have been partway through a utf8 codepoint, so we need to
|
878
|
+
* march it onto the next utf8 codepoint starting byte */
|
739
879
|
do {
|
740
|
-
|
741
|
-
} while ((0x80 == (0xc0 & *
|
880
|
+
str++;
|
881
|
+
} while ((0x80 == (0xc0 & *str)));
|
742
882
|
}
|
743
883
|
|
744
884
|
return utf8_null;
|
745
885
|
}
|
746
886
|
|
747
|
-
size_t utf8size(const
|
748
|
-
|
887
|
+
utf8_constexpr14_impl size_t utf8size(const utf8_int8_t *str) {
|
888
|
+
return utf8size_lazy(str) + 1;
|
889
|
+
}
|
890
|
+
|
891
|
+
utf8_constexpr14_impl size_t utf8size_lazy(const utf8_int8_t *str) {
|
892
|
+
return utf8nsize_lazy(str, SIZE_MAX);
|
893
|
+
}
|
894
|
+
|
895
|
+
utf8_constexpr14_impl size_t utf8nsize_lazy(const utf8_int8_t *str, size_t n) {
|
749
896
|
size_t size = 0;
|
750
|
-
while ('\0' !=
|
897
|
+
while (size < n && '\0' != str[size]) {
|
751
898
|
size++;
|
752
899
|
}
|
753
|
-
|
754
|
-
// we are including the null terminating byte in the size calculation
|
755
|
-
size++;
|
756
900
|
return size;
|
757
901
|
}
|
758
902
|
|
759
|
-
size_t utf8spn(const
|
760
|
-
|
903
|
+
utf8_constexpr14_impl size_t utf8spn(const utf8_int8_t *src,
|
904
|
+
const utf8_int8_t *accept) {
|
761
905
|
size_t chars = 0;
|
762
906
|
|
763
|
-
while ('\0' != *
|
764
|
-
const
|
907
|
+
while ('\0' != *src) {
|
908
|
+
const utf8_int8_t *a = accept;
|
765
909
|
size_t offset = 0;
|
766
910
|
|
767
911
|
while ('\0' != *a) {
|
768
|
-
|
769
|
-
|
770
|
-
|
912
|
+
/* checking that if *r is the start of a utf8 codepoint
|
913
|
+
* (it is not 0b10xxxxxx) and we have successfully matched
|
914
|
+
* a previous character (0 < offset) - we found a match */
|
771
915
|
if ((0x80 != (0xc0 & *a)) && (0 < offset)) {
|
772
|
-
|
773
|
-
|
774
|
-
|
916
|
+
/* found a match, so increment the number of utf8 codepoints
|
917
|
+
* that have matched and stop checking whether any other utf8
|
918
|
+
* codepoints in a match */
|
775
919
|
chars++;
|
776
|
-
|
920
|
+
src += offset;
|
921
|
+
offset = 0;
|
777
922
|
break;
|
778
923
|
} else {
|
779
|
-
if (*a ==
|
924
|
+
if (*a == src[offset]) {
|
780
925
|
offset++;
|
781
926
|
a++;
|
782
927
|
} else {
|
783
|
-
|
784
|
-
|
928
|
+
/* a could be in the middle of an unmatching utf8 codepoint,
|
929
|
+
* so we need to march it on to the next character beginning, */
|
785
930
|
do {
|
786
931
|
a++;
|
787
932
|
} while (0x80 == (0xc0 & *a));
|
788
933
|
|
789
|
-
|
934
|
+
/* reset offset too as we found a mismatch */
|
790
935
|
offset = 0;
|
791
936
|
}
|
792
937
|
}
|
793
938
|
}
|
794
939
|
|
795
|
-
|
796
|
-
|
940
|
+
/* found a match at the end of *a, so didn't get a chance to test it */
|
941
|
+
if (0 < offset) {
|
942
|
+
chars++;
|
943
|
+
src += offset;
|
944
|
+
continue;
|
945
|
+
}
|
946
|
+
|
947
|
+
/* if a got to its terminating null byte, then we didn't find a match.
|
948
|
+
* Return the current number of matched utf8 codepoints */
|
797
949
|
if ('\0' == *a) {
|
798
950
|
return chars;
|
799
951
|
}
|
@@ -802,302 +954,405 @@ size_t utf8spn(const void *src, const void *accept) {
|
|
802
954
|
return chars;
|
803
955
|
}
|
804
956
|
|
805
|
-
|
806
|
-
|
957
|
+
utf8_constexpr14_impl utf8_int8_t *utf8str(const utf8_int8_t *haystack,
|
958
|
+
const utf8_int8_t *needle) {
|
959
|
+
utf8_int32_t throwaway_codepoint = 0;
|
807
960
|
|
808
|
-
|
809
|
-
|
810
|
-
if ('\0' == *
|
811
|
-
return (
|
961
|
+
/* if needle has no utf8 codepoints before the null terminating
|
962
|
+
* byte then return haystack */
|
963
|
+
if ('\0' == *needle) {
|
964
|
+
return (utf8_int8_t *)haystack;
|
812
965
|
}
|
813
966
|
|
814
|
-
while ('\0' != *
|
815
|
-
const
|
816
|
-
const
|
967
|
+
while ('\0' != *haystack) {
|
968
|
+
const utf8_int8_t *maybeMatch = haystack;
|
969
|
+
const utf8_int8_t *n = needle;
|
817
970
|
|
818
|
-
while (*
|
971
|
+
while (*haystack == *n && (*haystack != '\0' && *n != '\0')) {
|
819
972
|
n++;
|
820
|
-
|
973
|
+
haystack++;
|
821
974
|
}
|
822
975
|
|
823
976
|
if ('\0' == *n) {
|
824
|
-
|
825
|
-
|
826
|
-
return (
|
977
|
+
/* we found the whole utf8 string for needle in haystack at
|
978
|
+
* maybeMatch, so return it */
|
979
|
+
return (utf8_int8_t *)maybeMatch;
|
827
980
|
} else {
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
h++;
|
833
|
-
} while (0x80 == (0xc0 & *h));
|
834
|
-
}
|
981
|
+
/* h could be in the middle of an unmatching utf8 codepoint,
|
982
|
+
* so we need to march it on to the next character beginning
|
983
|
+
* starting from the current character */
|
984
|
+
haystack = utf8codepoint(maybeMatch, &throwaway_codepoint);
|
835
985
|
}
|
836
986
|
}
|
837
987
|
|
838
|
-
|
988
|
+
/* no match */
|
839
989
|
return utf8_null;
|
840
990
|
}
|
841
991
|
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
return (void *)haystack;
|
992
|
+
utf8_constexpr14_impl utf8_int8_t *utf8casestr(const utf8_int8_t *haystack,
|
993
|
+
const utf8_int8_t *needle) {
|
994
|
+
/* if needle has no utf8 codepoints before the null terminating
|
995
|
+
* byte then return haystack */
|
996
|
+
if ('\0' == *needle) {
|
997
|
+
return (utf8_int8_t *)haystack;
|
849
998
|
}
|
850
999
|
|
851
1000
|
for (;;) {
|
852
|
-
const
|
853
|
-
const
|
854
|
-
utf8_int32_t h_cp, n_cp;
|
1001
|
+
const utf8_int8_t *maybeMatch = haystack;
|
1002
|
+
const utf8_int8_t *n = needle;
|
1003
|
+
utf8_int32_t h_cp = 0, n_cp = 0;
|
855
1004
|
|
856
|
-
|
1005
|
+
/* Get the next code point and track it */
|
1006
|
+
const utf8_int8_t *nextH = haystack = utf8codepoint(haystack, &h_cp);
|
857
1007
|
n = utf8codepoint(n, &n_cp);
|
858
1008
|
|
859
1009
|
while ((0 != h_cp) && (0 != n_cp)) {
|
860
1010
|
h_cp = utf8lwrcodepoint(h_cp);
|
861
1011
|
n_cp = utf8lwrcodepoint(n_cp);
|
862
1012
|
|
863
|
-
|
1013
|
+
/* if we find a mismatch, bail out! */
|
864
1014
|
if (h_cp != n_cp) {
|
865
1015
|
break;
|
866
1016
|
}
|
867
1017
|
|
868
|
-
|
1018
|
+
haystack = utf8codepoint(haystack, &h_cp);
|
869
1019
|
n = utf8codepoint(n, &n_cp);
|
870
1020
|
}
|
871
1021
|
|
872
1022
|
if (0 == n_cp) {
|
873
|
-
|
874
|
-
|
875
|
-
return (
|
1023
|
+
/* we found the whole utf8 string for needle in haystack at
|
1024
|
+
* maybeMatch, so return it */
|
1025
|
+
return (utf8_int8_t *)maybeMatch;
|
876
1026
|
}
|
877
1027
|
|
878
1028
|
if (0 == h_cp) {
|
879
|
-
|
1029
|
+
/* no match */
|
880
1030
|
return utf8_null;
|
881
1031
|
}
|
1032
|
+
|
1033
|
+
/* Roll back to the next code point in the haystack to test */
|
1034
|
+
haystack = nextH;
|
882
1035
|
}
|
883
1036
|
}
|
884
1037
|
|
885
|
-
|
886
|
-
|
1038
|
+
utf8_constexpr14_impl utf8_int8_t *utf8valid(const utf8_int8_t *str) {
|
1039
|
+
return utf8nvalid(str, SIZE_MAX);
|
1040
|
+
}
|
1041
|
+
|
1042
|
+
utf8_constexpr14_impl utf8_int8_t *utf8nvalid(const utf8_int8_t *str,
|
1043
|
+
size_t n) {
|
1044
|
+
const utf8_int8_t *t = str;
|
1045
|
+
size_t consumed = 0;
|
1046
|
+
|
1047
|
+
while ((void)(consumed = (size_t)(str - t)), consumed < n && '\0' != *str) {
|
1048
|
+
const size_t remaining = n - consumed;
|
1049
|
+
|
1050
|
+
if (0xf0 == (0xf8 & *str)) {
|
1051
|
+
/* ensure that there's 4 bytes or more remaining */
|
1052
|
+
if (remaining < 4) {
|
1053
|
+
return (utf8_int8_t *)str;
|
1054
|
+
}
|
1055
|
+
|
1056
|
+
/* ensure each of the 3 following bytes in this 4-byte
|
1057
|
+
* utf8 codepoint began with 0b10xxxxxx */
|
1058
|
+
if ((0x80 != (0xc0 & str[1])) || (0x80 != (0xc0 & str[2])) ||
|
1059
|
+
(0x80 != (0xc0 & str[3]))) {
|
1060
|
+
return (utf8_int8_t *)str;
|
1061
|
+
}
|
1062
|
+
|
1063
|
+
/* ensure that our utf8 codepoint ended after 4 bytes */
|
1064
|
+
if ((remaining != 4) && (0x80 == (0xc0 & str[4]))) {
|
1065
|
+
return (utf8_int8_t *)str;
|
1066
|
+
}
|
887
1067
|
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
(0x80 != (0xc0 & s[3]))) {
|
894
|
-
return (void *)s;
|
1068
|
+
/* ensure that the top 5 bits of this 4-byte utf8
|
1069
|
+
* codepoint were not 0, as then we could have used
|
1070
|
+
* one of the smaller encodings */
|
1071
|
+
if ((0 == (0x07 & str[0])) && (0 == (0x30 & str[1]))) {
|
1072
|
+
return (utf8_int8_t *)str;
|
895
1073
|
}
|
896
1074
|
|
897
|
-
|
898
|
-
|
899
|
-
|
1075
|
+
/* 4-byte utf8 code point (began with 0b11110xxx) */
|
1076
|
+
str += 4;
|
1077
|
+
} else if (0xe0 == (0xf0 & *str)) {
|
1078
|
+
/* ensure that there's 3 bytes or more remaining */
|
1079
|
+
if (remaining < 3) {
|
1080
|
+
return (utf8_int8_t *)str;
|
900
1081
|
}
|
901
1082
|
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
return (void *)s;
|
1083
|
+
/* ensure each of the 2 following bytes in this 3-byte
|
1084
|
+
* utf8 codepoint began with 0b10xxxxxx */
|
1085
|
+
if ((0x80 != (0xc0 & str[1])) || (0x80 != (0xc0 & str[2]))) {
|
1086
|
+
return (utf8_int8_t *)str;
|
907
1087
|
}
|
908
1088
|
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
// ensure each of the 2 following bytes in this 3-byte
|
913
|
-
// utf8 codepoint began with 0b10xxxxxx
|
914
|
-
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
|
915
|
-
return (void *)s;
|
1089
|
+
/* ensure that our utf8 codepoint ended after 3 bytes */
|
1090
|
+
if ((remaining != 3) && (0x80 == (0xc0 & str[3]))) {
|
1091
|
+
return (utf8_int8_t *)str;
|
916
1092
|
}
|
917
1093
|
|
918
|
-
|
919
|
-
|
920
|
-
|
1094
|
+
/* ensure that the top 5 bits of this 3-byte utf8
|
1095
|
+
* codepoint were not 0, as then we could have used
|
1096
|
+
* one of the smaller encodings */
|
1097
|
+
if ((0 == (0x0f & str[0])) && (0 == (0x20 & str[1]))) {
|
1098
|
+
return (utf8_int8_t *)str;
|
921
1099
|
}
|
922
1100
|
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
1101
|
+
/* 3-byte utf8 code point (began with 0b1110xxxx) */
|
1102
|
+
str += 3;
|
1103
|
+
} else if (0xc0 == (0xe0 & *str)) {
|
1104
|
+
/* ensure that there's 2 bytes or more remaining */
|
1105
|
+
if (remaining < 2) {
|
1106
|
+
return (utf8_int8_t *)str;
|
928
1107
|
}
|
929
1108
|
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
// utf8 codepoint began with 0b10xxxxxx
|
935
|
-
if (0x80 != (0xc0 & s[1])) {
|
936
|
-
return (void *)s;
|
1109
|
+
/* ensure the 1 following byte in this 2-byte
|
1110
|
+
* utf8 codepoint began with 0b10xxxxxx */
|
1111
|
+
if (0x80 != (0xc0 & str[1])) {
|
1112
|
+
return (utf8_int8_t *)str;
|
937
1113
|
}
|
938
1114
|
|
939
|
-
|
940
|
-
if (0x80 == (0xc0 &
|
941
|
-
return (
|
1115
|
+
/* ensure that our utf8 codepoint ended after 2 bytes */
|
1116
|
+
if ((remaining != 2) && (0x80 == (0xc0 & str[2]))) {
|
1117
|
+
return (utf8_int8_t *)str;
|
942
1118
|
}
|
943
1119
|
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
if (0 == (0x1e &
|
948
|
-
return (
|
1120
|
+
/* ensure that the top 4 bits of this 2-byte utf8
|
1121
|
+
* codepoint were not 0, as then we could have used
|
1122
|
+
* one of the smaller encodings */
|
1123
|
+
if (0 == (0x1e & str[0])) {
|
1124
|
+
return (utf8_int8_t *)str;
|
949
1125
|
}
|
950
1126
|
|
951
|
-
|
952
|
-
|
953
|
-
} else if (0x00 == (0x80 & *
|
954
|
-
|
955
|
-
|
1127
|
+
/* 2-byte utf8 code point (began with 0b110xxxxx) */
|
1128
|
+
str += 2;
|
1129
|
+
} else if (0x00 == (0x80 & *str)) {
|
1130
|
+
/* 1-byte ascii (began with 0b0xxxxxxx) */
|
1131
|
+
str += 1;
|
956
1132
|
} else {
|
957
|
-
|
958
|
-
return (
|
1133
|
+
/* we have an invalid 0b1xxxxxxx utf8 code point entry */
|
1134
|
+
return (utf8_int8_t *)str;
|
959
1135
|
}
|
960
1136
|
}
|
961
1137
|
|
962
1138
|
return utf8_null;
|
963
1139
|
}
|
964
1140
|
|
965
|
-
|
966
|
-
|
967
|
-
|
1141
|
+
int utf8makevalid(utf8_int8_t *str, const utf8_int32_t replacement) {
|
1142
|
+
utf8_int8_t *read = str;
|
1143
|
+
utf8_int8_t *write = read;
|
1144
|
+
const utf8_int8_t r = (utf8_int8_t)replacement;
|
1145
|
+
utf8_int32_t codepoint = 0;
|
968
1146
|
|
969
|
-
if (
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
1147
|
+
if (replacement > 0x7f) {
|
1148
|
+
return -1;
|
1149
|
+
}
|
1150
|
+
|
1151
|
+
while ('\0' != *read) {
|
1152
|
+
if (0xf0 == (0xf8 & *read)) {
|
1153
|
+
/* ensure each of the 3 following bytes in this 4-byte
|
1154
|
+
* utf8 codepoint began with 0b10xxxxxx */
|
1155
|
+
if ((0x80 != (0xc0 & read[1])) || (0x80 != (0xc0 & read[2])) ||
|
1156
|
+
(0x80 != (0xc0 & read[3]))) {
|
1157
|
+
*write++ = r;
|
1158
|
+
read++;
|
1159
|
+
continue;
|
1160
|
+
}
|
1161
|
+
|
1162
|
+
/* 4-byte utf8 code point (began with 0b11110xxx) */
|
1163
|
+
read = utf8codepoint(read, &codepoint);
|
1164
|
+
write = utf8catcodepoint(write, codepoint, 4);
|
1165
|
+
} else if (0xe0 == (0xf0 & *read)) {
|
1166
|
+
/* ensure each of the 2 following bytes in this 3-byte
|
1167
|
+
* utf8 codepoint began with 0b10xxxxxx */
|
1168
|
+
if ((0x80 != (0xc0 & read[1])) || (0x80 != (0xc0 & read[2]))) {
|
1169
|
+
*write++ = r;
|
1170
|
+
read++;
|
1171
|
+
continue;
|
1172
|
+
}
|
1173
|
+
|
1174
|
+
/* 3-byte utf8 code point (began with 0b1110xxxx) */
|
1175
|
+
read = utf8codepoint(read, &codepoint);
|
1176
|
+
write = utf8catcodepoint(write, codepoint, 3);
|
1177
|
+
} else if (0xc0 == (0xe0 & *read)) {
|
1178
|
+
/* ensure the 1 following byte in this 2-byte
|
1179
|
+
* utf8 codepoint began with 0b10xxxxxx */
|
1180
|
+
if (0x80 != (0xc0 & read[1])) {
|
1181
|
+
*write++ = r;
|
1182
|
+
read++;
|
1183
|
+
continue;
|
1184
|
+
}
|
1185
|
+
|
1186
|
+
/* 2-byte utf8 code point (began with 0b110xxxxx) */
|
1187
|
+
read = utf8codepoint(read, &codepoint);
|
1188
|
+
write = utf8catcodepoint(write, codepoint, 2);
|
1189
|
+
} else if (0x00 == (0x80 & *read)) {
|
1190
|
+
/* 1-byte ascii (began with 0b0xxxxxxx) */
|
1191
|
+
read = utf8codepoint(read, &codepoint);
|
1192
|
+
write = utf8catcodepoint(write, codepoint, 1);
|
1193
|
+
} else {
|
1194
|
+
/* if we got here then we've got a dangling continuation (0b10xxxxxx) */
|
1195
|
+
*write++ = r;
|
1196
|
+
read++;
|
1197
|
+
continue;
|
1198
|
+
}
|
1199
|
+
}
|
1200
|
+
|
1201
|
+
*write = '\0';
|
1202
|
+
|
1203
|
+
return 0;
|
1204
|
+
}
|
1205
|
+
|
1206
|
+
utf8_constexpr14_impl utf8_int8_t *
|
1207
|
+
utf8codepoint(const utf8_int8_t *utf8_restrict str,
|
1208
|
+
utf8_int32_t *utf8_restrict out_codepoint) {
|
1209
|
+
if (0xf0 == (0xf8 & str[0])) {
|
1210
|
+
/* 4 byte utf8 codepoint */
|
1211
|
+
*out_codepoint = ((0x07 & str[0]) << 18) | ((0x3f & str[1]) << 12) |
|
1212
|
+
((0x3f & str[2]) << 6) | (0x3f & str[3]);
|
1213
|
+
str += 4;
|
1214
|
+
} else if (0xe0 == (0xf0 & str[0])) {
|
1215
|
+
/* 3 byte utf8 codepoint */
|
976
1216
|
*out_codepoint =
|
977
|
-
((0x0f &
|
978
|
-
|
979
|
-
} else if (0xc0 == (0xe0 &
|
980
|
-
|
981
|
-
*out_codepoint = ((0x1f &
|
982
|
-
|
1217
|
+
((0x0f & str[0]) << 12) | ((0x3f & str[1]) << 6) | (0x3f & str[2]);
|
1218
|
+
str += 3;
|
1219
|
+
} else if (0xc0 == (0xe0 & str[0])) {
|
1220
|
+
/* 2 byte utf8 codepoint */
|
1221
|
+
*out_codepoint = ((0x1f & str[0]) << 6) | (0x3f & str[1]);
|
1222
|
+
str += 2;
|
983
1223
|
} else {
|
984
|
-
|
985
|
-
*out_codepoint =
|
986
|
-
|
1224
|
+
/* 1 byte utf8 codepoint otherwise */
|
1225
|
+
*out_codepoint = str[0];
|
1226
|
+
str += 1;
|
987
1227
|
}
|
988
1228
|
|
989
|
-
return (
|
1229
|
+
return (utf8_int8_t *)str;
|
990
1230
|
}
|
991
1231
|
|
992
|
-
size_t
|
1232
|
+
utf8_constexpr14_impl size_t utf8codepointcalcsize(const utf8_int8_t *str) {
|
1233
|
+
if (0xf0 == (0xf8 & str[0])) {
|
1234
|
+
/* 4 byte utf8 codepoint */
|
1235
|
+
return 4;
|
1236
|
+
} else if (0xe0 == (0xf0 & str[0])) {
|
1237
|
+
/* 3 byte utf8 codepoint */
|
1238
|
+
return 3;
|
1239
|
+
} else if (0xc0 == (0xe0 & str[0])) {
|
1240
|
+
/* 2 byte utf8 codepoint */
|
1241
|
+
return 2;
|
1242
|
+
}
|
1243
|
+
|
1244
|
+
/* 1 byte utf8 codepoint otherwise */
|
1245
|
+
return 1;
|
1246
|
+
}
|
1247
|
+
|
1248
|
+
utf8_constexpr14_impl size_t utf8codepointsize(utf8_int32_t chr) {
|
993
1249
|
if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
|
994
1250
|
return 1;
|
995
1251
|
} else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
|
996
1252
|
return 2;
|
997
1253
|
} else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
|
998
1254
|
return 3;
|
999
|
-
} else {
|
1255
|
+
} else { /* if (0 == ((int)0xffe00000 & chr)) { */
|
1000
1256
|
return 4;
|
1001
1257
|
}
|
1002
1258
|
}
|
1003
1259
|
|
1004
|
-
|
1005
|
-
char *s = (char *)str;
|
1006
|
-
|
1260
|
+
utf8_int8_t *utf8catcodepoint(utf8_int8_t *str, utf8_int32_t chr, size_t n) {
|
1007
1261
|
if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
|
1008
|
-
|
1009
|
-
|
1262
|
+
/* 1-byte/7-bit ascii
|
1263
|
+
* (0b0xxxxxxx) */
|
1010
1264
|
if (n < 1) {
|
1011
1265
|
return utf8_null;
|
1012
1266
|
}
|
1013
|
-
|
1014
|
-
|
1267
|
+
str[0] = (utf8_int8_t)chr;
|
1268
|
+
str += 1;
|
1015
1269
|
} else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
|
1016
|
-
|
1017
|
-
|
1270
|
+
/* 2-byte/11-bit utf8 code point
|
1271
|
+
* (0b110xxxxx 0b10xxxxxx) */
|
1018
1272
|
if (n < 2) {
|
1019
1273
|
return utf8_null;
|
1020
1274
|
}
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1275
|
+
str[0] = (utf8_int8_t)(0xc0 | (utf8_int8_t)((chr >> 6) & 0x1f));
|
1276
|
+
str[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
1277
|
+
str += 2;
|
1024
1278
|
} else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
|
1025
|
-
|
1026
|
-
|
1279
|
+
/* 3-byte/16-bit utf8 code point
|
1280
|
+
* (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) */
|
1027
1281
|
if (n < 3) {
|
1028
1282
|
return utf8_null;
|
1029
1283
|
}
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
} else {
|
1035
|
-
|
1036
|
-
|
1284
|
+
str[0] = (utf8_int8_t)(0xe0 | (utf8_int8_t)((chr >> 12) & 0x0f));
|
1285
|
+
str[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
|
1286
|
+
str[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
1287
|
+
str += 3;
|
1288
|
+
} else { /* if (0 == ((int)0xffe00000 & chr)) { */
|
1289
|
+
/* 4-byte/21-bit utf8 code point
|
1290
|
+
* (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx) */
|
1037
1291
|
if (n < 4) {
|
1038
1292
|
return utf8_null;
|
1039
1293
|
}
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1294
|
+
str[0] = (utf8_int8_t)(0xf0 | (utf8_int8_t)((chr >> 18) & 0x07));
|
1295
|
+
str[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 12) & 0x3f));
|
1296
|
+
str[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
|
1297
|
+
str[3] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
|
1298
|
+
str += 4;
|
1045
1299
|
}
|
1046
1300
|
|
1047
|
-
return
|
1301
|
+
return str;
|
1048
1302
|
}
|
1049
1303
|
|
1050
|
-
int utf8islower(utf8_int32_t chr) {
|
1051
|
-
|
1052
|
-
|
1304
|
+
utf8_constexpr14_impl int utf8islower(utf8_int32_t chr) {
|
1305
|
+
return chr != utf8uprcodepoint(chr);
|
1306
|
+
}
|
1053
1307
|
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1308
|
+
utf8_constexpr14_impl int utf8isupper(utf8_int32_t chr) {
|
1309
|
+
return chr != utf8lwrcodepoint(chr);
|
1310
|
+
}
|
1057
1311
|
|
1058
|
-
|
1059
|
-
|
1312
|
+
void utf8lwr(utf8_int8_t *utf8_restrict str) {
|
1313
|
+
utf8_int32_t cp = 0;
|
1314
|
+
utf8_int8_t *pn = utf8codepoint(str, &cp);
|
1060
1315
|
|
1061
1316
|
while (cp != 0) {
|
1062
1317
|
const utf8_int32_t lwr_cp = utf8lwrcodepoint(cp);
|
1063
1318
|
const size_t size = utf8codepointsize(lwr_cp);
|
1064
1319
|
|
1065
1320
|
if (lwr_cp != cp) {
|
1066
|
-
utf8catcodepoint(
|
1321
|
+
utf8catcodepoint(str, lwr_cp, size);
|
1067
1322
|
}
|
1068
1323
|
|
1069
|
-
|
1070
|
-
pn = utf8codepoint(
|
1324
|
+
str = pn;
|
1325
|
+
pn = utf8codepoint(str, &cp);
|
1071
1326
|
}
|
1072
1327
|
}
|
1073
1328
|
|
1074
|
-
void utf8upr(
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
p = (char *)str;
|
1079
|
-
pn = utf8codepoint(p, &cp);
|
1329
|
+
void utf8upr(utf8_int8_t *utf8_restrict str) {
|
1330
|
+
utf8_int32_t cp = 0;
|
1331
|
+
utf8_int8_t *pn = utf8codepoint(str, &cp);
|
1080
1332
|
|
1081
1333
|
while (cp != 0) {
|
1082
1334
|
const utf8_int32_t lwr_cp = utf8uprcodepoint(cp);
|
1083
1335
|
const size_t size = utf8codepointsize(lwr_cp);
|
1084
1336
|
|
1085
1337
|
if (lwr_cp != cp) {
|
1086
|
-
utf8catcodepoint(
|
1338
|
+
utf8catcodepoint(str, lwr_cp, size);
|
1087
1339
|
}
|
1088
1340
|
|
1089
|
-
|
1090
|
-
pn = utf8codepoint(
|
1341
|
+
str = pn;
|
1342
|
+
pn = utf8codepoint(str, &cp);
|
1091
1343
|
}
|
1092
1344
|
}
|
1093
1345
|
|
1094
|
-
utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
|
1346
|
+
utf8_constexpr14_impl utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
|
1095
1347
|
if (((0x0041 <= cp) && (0x005a >= cp)) ||
|
1096
1348
|
((0x00c0 <= cp) && (0x00d6 >= cp)) ||
|
1097
1349
|
((0x00d8 <= cp) && (0x00de >= cp)) ||
|
1098
1350
|
((0x0391 <= cp) && (0x03a1 >= cp)) ||
|
1099
|
-
((0x03a3 <= cp) && (0x03ab >= cp))
|
1351
|
+
((0x03a3 <= cp) && (0x03ab >= cp)) ||
|
1352
|
+
((0x0410 <= cp) && (0x042f >= cp))) {
|
1100
1353
|
cp += 32;
|
1354
|
+
} else if ((0x0400 <= cp) && (0x040f >= cp)) {
|
1355
|
+
cp += 80;
|
1101
1356
|
} else if (((0x0100 <= cp) && (0x012f >= cp)) ||
|
1102
1357
|
((0x0132 <= cp) && (0x0137 >= cp)) ||
|
1103
1358
|
((0x014a <= cp) && (0x0177 >= cp)) ||
|
@@ -1107,7 +1362,9 @@ utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
|
|
1107
1362
|
((0x01f8 <= cp) && (0x021f >= cp)) ||
|
1108
1363
|
((0x0222 <= cp) && (0x0233 >= cp)) ||
|
1109
1364
|
((0x0246 <= cp) && (0x024f >= cp)) ||
|
1110
|
-
((0x03d8 <= cp) && (0x03ef >= cp))
|
1365
|
+
((0x03d8 <= cp) && (0x03ef >= cp)) ||
|
1366
|
+
((0x0460 <= cp) && (0x0481 >= cp)) ||
|
1367
|
+
((0x048a <= cp) && (0x04ff >= cp))) {
|
1111
1368
|
cp |= 0x1;
|
1112
1369
|
} else if (((0x0139 <= cp) && (0x0148 >= cp)) ||
|
1113
1370
|
((0x0179 <= cp) && (0x017e >= cp)) ||
|
@@ -1118,62 +1375,147 @@ utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
|
|
1118
1375
|
cp &= ~0x1;
|
1119
1376
|
} else {
|
1120
1377
|
switch (cp) {
|
1121
|
-
default:
|
1122
|
-
|
1123
|
-
case
|
1124
|
-
|
1125
|
-
|
1126
|
-
case
|
1127
|
-
|
1128
|
-
|
1129
|
-
case
|
1130
|
-
|
1131
|
-
|
1132
|
-
case
|
1133
|
-
|
1134
|
-
|
1135
|
-
case
|
1136
|
-
|
1137
|
-
|
1138
|
-
case
|
1139
|
-
|
1140
|
-
|
1141
|
-
case
|
1142
|
-
|
1143
|
-
|
1144
|
-
case
|
1145
|
-
|
1146
|
-
|
1147
|
-
case
|
1148
|
-
|
1149
|
-
|
1150
|
-
case
|
1151
|
-
|
1152
|
-
|
1153
|
-
case
|
1154
|
-
|
1155
|
-
|
1156
|
-
case
|
1157
|
-
|
1158
|
-
|
1159
|
-
case
|
1160
|
-
|
1161
|
-
|
1162
|
-
case
|
1163
|
-
|
1164
|
-
|
1378
|
+
default:
|
1379
|
+
break;
|
1380
|
+
case 0x0178:
|
1381
|
+
cp = 0x00ff;
|
1382
|
+
break;
|
1383
|
+
case 0x0243:
|
1384
|
+
cp = 0x0180;
|
1385
|
+
break;
|
1386
|
+
case 0x018e:
|
1387
|
+
cp = 0x01dd;
|
1388
|
+
break;
|
1389
|
+
case 0x023d:
|
1390
|
+
cp = 0x019a;
|
1391
|
+
break;
|
1392
|
+
case 0x0220:
|
1393
|
+
cp = 0x019e;
|
1394
|
+
break;
|
1395
|
+
case 0x01b7:
|
1396
|
+
cp = 0x0292;
|
1397
|
+
break;
|
1398
|
+
case 0x01c4:
|
1399
|
+
cp = 0x01c6;
|
1400
|
+
break;
|
1401
|
+
case 0x01c7:
|
1402
|
+
cp = 0x01c9;
|
1403
|
+
break;
|
1404
|
+
case 0x01ca:
|
1405
|
+
cp = 0x01cc;
|
1406
|
+
break;
|
1407
|
+
case 0x01f1:
|
1408
|
+
cp = 0x01f3;
|
1409
|
+
break;
|
1410
|
+
case 0x01f7:
|
1411
|
+
cp = 0x01bf;
|
1412
|
+
break;
|
1413
|
+
case 0x0187:
|
1414
|
+
cp = 0x0188;
|
1415
|
+
break;
|
1416
|
+
case 0x018b:
|
1417
|
+
cp = 0x018c;
|
1418
|
+
break;
|
1419
|
+
case 0x0191:
|
1420
|
+
cp = 0x0192;
|
1421
|
+
break;
|
1422
|
+
case 0x0198:
|
1423
|
+
cp = 0x0199;
|
1424
|
+
break;
|
1425
|
+
case 0x01a7:
|
1426
|
+
cp = 0x01a8;
|
1427
|
+
break;
|
1428
|
+
case 0x01ac:
|
1429
|
+
cp = 0x01ad;
|
1430
|
+
break;
|
1431
|
+
case 0x01b8:
|
1432
|
+
cp = 0x01b9;
|
1433
|
+
break;
|
1434
|
+
case 0x01bc:
|
1435
|
+
cp = 0x01bd;
|
1436
|
+
break;
|
1437
|
+
case 0x01f4:
|
1438
|
+
cp = 0x01f5;
|
1439
|
+
break;
|
1440
|
+
case 0x023b:
|
1441
|
+
cp = 0x023c;
|
1442
|
+
break;
|
1443
|
+
case 0x0241:
|
1444
|
+
cp = 0x0242;
|
1445
|
+
break;
|
1446
|
+
case 0x03fd:
|
1447
|
+
cp = 0x037b;
|
1448
|
+
break;
|
1449
|
+
case 0x03fe:
|
1450
|
+
cp = 0x037c;
|
1451
|
+
break;
|
1452
|
+
case 0x03ff:
|
1453
|
+
cp = 0x037d;
|
1454
|
+
break;
|
1455
|
+
case 0x037f:
|
1456
|
+
cp = 0x03f3;
|
1457
|
+
break;
|
1458
|
+
case 0x0386:
|
1459
|
+
cp = 0x03ac;
|
1460
|
+
break;
|
1461
|
+
case 0x0388:
|
1462
|
+
cp = 0x03ad;
|
1463
|
+
break;
|
1464
|
+
case 0x0389:
|
1465
|
+
cp = 0x03ae;
|
1466
|
+
break;
|
1467
|
+
case 0x038a:
|
1468
|
+
cp = 0x03af;
|
1469
|
+
break;
|
1470
|
+
case 0x038c:
|
1471
|
+
cp = 0x03cc;
|
1472
|
+
break;
|
1473
|
+
case 0x038e:
|
1474
|
+
cp = 0x03cd;
|
1475
|
+
break;
|
1476
|
+
case 0x038f:
|
1477
|
+
cp = 0x03ce;
|
1478
|
+
break;
|
1479
|
+
case 0x0370:
|
1480
|
+
cp = 0x0371;
|
1481
|
+
break;
|
1482
|
+
case 0x0372:
|
1483
|
+
cp = 0x0373;
|
1484
|
+
break;
|
1485
|
+
case 0x0376:
|
1486
|
+
cp = 0x0377;
|
1487
|
+
break;
|
1488
|
+
case 0x03f4:
|
1489
|
+
cp = 0x03b8;
|
1490
|
+
break;
|
1491
|
+
case 0x03cf:
|
1492
|
+
cp = 0x03d7;
|
1493
|
+
break;
|
1494
|
+
case 0x03f9:
|
1495
|
+
cp = 0x03f2;
|
1496
|
+
break;
|
1497
|
+
case 0x03f7:
|
1498
|
+
cp = 0x03f8;
|
1499
|
+
break;
|
1500
|
+
case 0x03fa:
|
1501
|
+
cp = 0x03fb;
|
1502
|
+
break;
|
1503
|
+
}
|
1165
1504
|
}
|
1166
1505
|
|
1167
1506
|
return cp;
|
1168
1507
|
}
|
1169
1508
|
|
1170
|
-
utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
|
1509
|
+
utf8_constexpr14_impl utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
|
1171
1510
|
if (((0x0061 <= cp) && (0x007a >= cp)) ||
|
1172
1511
|
((0x00e0 <= cp) && (0x00f6 >= cp)) ||
|
1173
1512
|
((0x00f8 <= cp) && (0x00fe >= cp)) ||
|
1174
1513
|
((0x03b1 <= cp) && (0x03c1 >= cp)) ||
|
1175
|
-
((0x03c3 <= cp) && (0x03cb >= cp))
|
1514
|
+
((0x03c3 <= cp) && (0x03cb >= cp)) ||
|
1515
|
+
((0x0430 <= cp) && (0x044f >= cp))) {
|
1176
1516
|
cp -= 32;
|
1517
|
+
} else if ((0x0450 <= cp) && (0x045f >= cp)) {
|
1518
|
+
cp -= 80;
|
1177
1519
|
} else if (((0x0100 <= cp) && (0x012f >= cp)) ||
|
1178
1520
|
((0x0132 <= cp) && (0x0137 >= cp)) ||
|
1179
1521
|
((0x014a <= cp) && (0x0177 >= cp)) ||
|
@@ -1183,7 +1525,9 @@ utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
|
|
1183
1525
|
((0x01f8 <= cp) && (0x021f >= cp)) ||
|
1184
1526
|
((0x0222 <= cp) && (0x0233 >= cp)) ||
|
1185
1527
|
((0x0246 <= cp) && (0x024f >= cp)) ||
|
1186
|
-
((0x03d8 <= cp) && (0x03ef >= cp))
|
1528
|
+
((0x03d8 <= cp) && (0x03ef >= cp)) ||
|
1529
|
+
((0x0460 <= cp) && (0x0481 >= cp)) ||
|
1530
|
+
((0x048a <= cp) && (0x04ff >= cp))) {
|
1187
1531
|
cp &= ~0x1;
|
1188
1532
|
} else if (((0x0139 <= cp) && (0x0148 >= cp)) ||
|
1189
1533
|
((0x0179 <= cp) && (0x017e >= cp)) ||
|
@@ -1194,64 +1538,175 @@ utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
|
|
1194
1538
|
cp |= 0x1;
|
1195
1539
|
} else {
|
1196
1540
|
switch (cp) {
|
1197
|
-
default:
|
1198
|
-
|
1199
|
-
case
|
1200
|
-
|
1201
|
-
|
1202
|
-
case
|
1203
|
-
|
1204
|
-
|
1205
|
-
case
|
1206
|
-
|
1207
|
-
|
1208
|
-
case
|
1209
|
-
|
1210
|
-
|
1211
|
-
case
|
1212
|
-
|
1213
|
-
|
1214
|
-
case
|
1215
|
-
|
1216
|
-
|
1217
|
-
case
|
1218
|
-
|
1219
|
-
|
1220
|
-
case
|
1221
|
-
|
1222
|
-
|
1223
|
-
case
|
1224
|
-
|
1225
|
-
|
1226
|
-
case
|
1227
|
-
|
1228
|
-
|
1229
|
-
case
|
1230
|
-
|
1231
|
-
|
1232
|
-
case
|
1233
|
-
|
1234
|
-
|
1235
|
-
case
|
1236
|
-
|
1237
|
-
|
1238
|
-
case
|
1239
|
-
|
1240
|
-
|
1541
|
+
default:
|
1542
|
+
break;
|
1543
|
+
case 0x00ff:
|
1544
|
+
cp = 0x0178;
|
1545
|
+
break;
|
1546
|
+
case 0x0180:
|
1547
|
+
cp = 0x0243;
|
1548
|
+
break;
|
1549
|
+
case 0x01dd:
|
1550
|
+
cp = 0x018e;
|
1551
|
+
break;
|
1552
|
+
case 0x019a:
|
1553
|
+
cp = 0x023d;
|
1554
|
+
break;
|
1555
|
+
case 0x019e:
|
1556
|
+
cp = 0x0220;
|
1557
|
+
break;
|
1558
|
+
case 0x0292:
|
1559
|
+
cp = 0x01b7;
|
1560
|
+
break;
|
1561
|
+
case 0x01c6:
|
1562
|
+
cp = 0x01c4;
|
1563
|
+
break;
|
1564
|
+
case 0x01c9:
|
1565
|
+
cp = 0x01c7;
|
1566
|
+
break;
|
1567
|
+
case 0x01cc:
|
1568
|
+
cp = 0x01ca;
|
1569
|
+
break;
|
1570
|
+
case 0x01f3:
|
1571
|
+
cp = 0x01f1;
|
1572
|
+
break;
|
1573
|
+
case 0x01bf:
|
1574
|
+
cp = 0x01f7;
|
1575
|
+
break;
|
1576
|
+
case 0x0188:
|
1577
|
+
cp = 0x0187;
|
1578
|
+
break;
|
1579
|
+
case 0x018c:
|
1580
|
+
cp = 0x018b;
|
1581
|
+
break;
|
1582
|
+
case 0x0192:
|
1583
|
+
cp = 0x0191;
|
1584
|
+
break;
|
1585
|
+
case 0x0199:
|
1586
|
+
cp = 0x0198;
|
1587
|
+
break;
|
1588
|
+
case 0x01a8:
|
1589
|
+
cp = 0x01a7;
|
1590
|
+
break;
|
1591
|
+
case 0x01ad:
|
1592
|
+
cp = 0x01ac;
|
1593
|
+
break;
|
1594
|
+
case 0x01b9:
|
1595
|
+
cp = 0x01b8;
|
1596
|
+
break;
|
1597
|
+
case 0x01bd:
|
1598
|
+
cp = 0x01bc;
|
1599
|
+
break;
|
1600
|
+
case 0x01f5:
|
1601
|
+
cp = 0x01f4;
|
1602
|
+
break;
|
1603
|
+
case 0x023c:
|
1604
|
+
cp = 0x023b;
|
1605
|
+
break;
|
1606
|
+
case 0x0242:
|
1607
|
+
cp = 0x0241;
|
1608
|
+
break;
|
1609
|
+
case 0x037b:
|
1610
|
+
cp = 0x03fd;
|
1611
|
+
break;
|
1612
|
+
case 0x037c:
|
1613
|
+
cp = 0x03fe;
|
1614
|
+
break;
|
1615
|
+
case 0x037d:
|
1616
|
+
cp = 0x03ff;
|
1617
|
+
break;
|
1618
|
+
case 0x03f3:
|
1619
|
+
cp = 0x037f;
|
1620
|
+
break;
|
1621
|
+
case 0x03ac:
|
1622
|
+
cp = 0x0386;
|
1623
|
+
break;
|
1624
|
+
case 0x03ad:
|
1625
|
+
cp = 0x0388;
|
1626
|
+
break;
|
1627
|
+
case 0x03ae:
|
1628
|
+
cp = 0x0389;
|
1629
|
+
break;
|
1630
|
+
case 0x03af:
|
1631
|
+
cp = 0x038a;
|
1632
|
+
break;
|
1633
|
+
case 0x03cc:
|
1634
|
+
cp = 0x038c;
|
1635
|
+
break;
|
1636
|
+
case 0x03cd:
|
1637
|
+
cp = 0x038e;
|
1638
|
+
break;
|
1639
|
+
case 0x03ce:
|
1640
|
+
cp = 0x038f;
|
1641
|
+
break;
|
1642
|
+
case 0x0371:
|
1643
|
+
cp = 0x0370;
|
1644
|
+
break;
|
1645
|
+
case 0x0373:
|
1646
|
+
cp = 0x0372;
|
1647
|
+
break;
|
1648
|
+
case 0x0377:
|
1649
|
+
cp = 0x0376;
|
1650
|
+
break;
|
1651
|
+
case 0x03d1:
|
1652
|
+
cp = 0x0398;
|
1653
|
+
break;
|
1654
|
+
case 0x03d7:
|
1655
|
+
cp = 0x03cf;
|
1656
|
+
break;
|
1657
|
+
case 0x03f2:
|
1658
|
+
cp = 0x03f9;
|
1659
|
+
break;
|
1660
|
+
case 0x03f8:
|
1661
|
+
cp = 0x03f7;
|
1662
|
+
break;
|
1663
|
+
case 0x03fb:
|
1664
|
+
cp = 0x03fa;
|
1665
|
+
break;
|
1666
|
+
}
|
1241
1667
|
}
|
1242
1668
|
|
1243
1669
|
return cp;
|
1244
1670
|
}
|
1245
1671
|
|
1672
|
+
utf8_constexpr14_impl utf8_int8_t *
|
1673
|
+
utf8rcodepoint(const utf8_int8_t *utf8_restrict str,
|
1674
|
+
utf8_int32_t *utf8_restrict out_codepoint) {
|
1675
|
+
const utf8_int8_t *s = (const utf8_int8_t *)str;
|
1676
|
+
|
1677
|
+
if (0xf0 == (0xf8 & s[0])) {
|
1678
|
+
/* 4 byte utf8 codepoint */
|
1679
|
+
*out_codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) |
|
1680
|
+
((0x3f & s[2]) << 6) | (0x3f & s[3]);
|
1681
|
+
} else if (0xe0 == (0xf0 & s[0])) {
|
1682
|
+
/* 3 byte utf8 codepoint */
|
1683
|
+
*out_codepoint =
|
1684
|
+
((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]);
|
1685
|
+
} else if (0xc0 == (0xe0 & s[0])) {
|
1686
|
+
/* 2 byte utf8 codepoint */
|
1687
|
+
*out_codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]);
|
1688
|
+
} else {
|
1689
|
+
/* 1 byte utf8 codepoint otherwise */
|
1690
|
+
*out_codepoint = s[0];
|
1691
|
+
}
|
1692
|
+
|
1693
|
+
do {
|
1694
|
+
s--;
|
1695
|
+
} while ((0 != (0x80 & s[0])) && (0x80 == (0xc0 & s[0])));
|
1696
|
+
|
1697
|
+
return (utf8_int8_t *)s;
|
1698
|
+
}
|
1699
|
+
|
1246
1700
|
#undef utf8_restrict
|
1701
|
+
#undef utf8_constexpr14
|
1247
1702
|
#undef utf8_null
|
1248
1703
|
|
1249
|
-
#ifdef
|
1250
|
-
}
|
1704
|
+
#ifdef utf8_cplusplus
|
1705
|
+
} /* extern "C" */
|
1251
1706
|
#endif
|
1252
1707
|
|
1253
1708
|
#if defined(__clang__)
|
1254
1709
|
#pragma clang diagnostic pop
|
1255
1710
|
#endif
|
1256
1711
|
|
1257
|
-
#endif
|
1712
|
+
#endif /* SHEREDOM_UTF8_H_INCLUDED */
|