rugged 1.3.1 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (656) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +34 -2
  3. data/ext/rugged/extconf.rb +6 -3
  4. data/ext/rugged/rugged.c +16 -0
  5. data/ext/rugged/rugged.h +4 -0
  6. data/ext/rugged/rugged_allocator.c +0 -54
  7. data/ext/rugged/rugged_blame.c +2 -0
  8. data/ext/rugged/rugged_blob.c +3 -0
  9. data/ext/rugged/rugged_commit.c +1 -0
  10. data/ext/rugged/rugged_config.c +9 -2
  11. data/ext/rugged/rugged_diff.c +1 -0
  12. data/ext/rugged/rugged_index.c +2 -0
  13. data/ext/rugged/rugged_patch.c +1 -0
  14. data/ext/rugged/rugged_rebase.c +1 -0
  15. data/ext/rugged/rugged_reference.c +1 -0
  16. data/ext/rugged/rugged_remote.c +28 -10
  17. data/ext/rugged/rugged_repo.c +7 -9
  18. data/ext/rugged/rugged_revwalk.c +5 -1
  19. data/ext/rugged/rugged_settings.c +5 -0
  20. data/ext/rugged/rugged_submodule.c +1 -0
  21. data/ext/rugged/rugged_tag.c +1 -0
  22. data/ext/rugged/rugged_tree.c +4 -0
  23. data/lib/rugged/index.rb +1 -1
  24. data/lib/rugged/tree.rb +5 -1
  25. data/lib/rugged/version.rb +1 -1
  26. data/vendor/libgit2/AUTHORS +1 -0
  27. data/vendor/libgit2/CMakeLists.txt +141 -289
  28. data/vendor/libgit2/COPYING +301 -20
  29. data/vendor/libgit2/cmake/AddCFlagIfSupported.cmake +21 -21
  30. data/vendor/libgit2/cmake/AddClarTest.cmake +7 -0
  31. data/vendor/libgit2/cmake/CheckPrototypeDefinitionSafe.cmake +16 -0
  32. data/vendor/libgit2/cmake/DefaultCFlags.cmake +154 -0
  33. data/vendor/libgit2/cmake/EnableWarnings.cmake +13 -13
  34. data/vendor/libgit2/cmake/ExperimentalFeatures.cmake +23 -0
  35. data/vendor/libgit2/cmake/FindCoreFoundation.cmake +13 -13
  36. data/vendor/libgit2/cmake/FindGSSAPI.cmake +171 -287
  37. data/vendor/libgit2/cmake/FindGSSFramework.cmake +13 -13
  38. data/vendor/libgit2/cmake/FindHTTP_Parser.cmake +17 -17
  39. data/vendor/libgit2/cmake/FindIntlIconv.cmake +51 -0
  40. data/vendor/libgit2/cmake/FindLLHTTP.cmake +39 -0
  41. data/vendor/libgit2/cmake/FindLibSSH2.cmake +5 -5
  42. data/vendor/libgit2/cmake/FindPCRE.cmake +12 -13
  43. data/vendor/libgit2/cmake/FindPCRE2.cmake +12 -12
  44. data/vendor/libgit2/cmake/FindPkgLibraries.cmake +19 -19
  45. data/vendor/libgit2/cmake/FindSecurity.cmake +14 -14
  46. data/vendor/libgit2/cmake/FindStatNsec.cmake +12 -18
  47. data/vendor/libgit2/cmake/Findfutimens.cmake +8 -8
  48. data/vendor/libgit2/cmake/FindmbedTLS.cmake +63 -70
  49. data/vendor/libgit2/cmake/IdeSplitSources.cmake +18 -18
  50. data/vendor/libgit2/cmake/PkgBuildConfig.cmake +60 -60
  51. data/vendor/libgit2/cmake/SanitizeBool.cmake +20 -20
  52. data/vendor/libgit2/cmake/SelectGSSAPI.cmake +37 -37
  53. data/vendor/libgit2/cmake/SelectHTTPParser.cmake +34 -0
  54. data/vendor/libgit2/cmake/SelectHTTPSBackend.cmake +129 -101
  55. data/vendor/libgit2/cmake/SelectHashes.cmake +113 -54
  56. data/vendor/libgit2/cmake/SelectRegex.cmake +56 -0
  57. data/vendor/libgit2/cmake/SelectSSH.cmake +46 -0
  58. data/vendor/libgit2/cmake/SelectXdiff.cmake +9 -0
  59. data/vendor/libgit2/cmake/SelectZlib.cmake +38 -0
  60. data/vendor/libgit2/deps/chromium-zlib/CMakeLists.txt +6 -6
  61. data/vendor/libgit2/deps/llhttp/CMakeLists.txt +8 -0
  62. data/vendor/libgit2/deps/llhttp/LICENSE-MIT +22 -0
  63. data/vendor/libgit2/deps/llhttp/api.c +510 -0
  64. data/vendor/libgit2/deps/llhttp/http.c +170 -0
  65. data/vendor/libgit2/deps/llhttp/llhttp.c +10168 -0
  66. data/vendor/libgit2/deps/llhttp/llhttp.h +897 -0
  67. data/vendor/libgit2/deps/ntlmclient/CMakeLists.txt +33 -31
  68. data/vendor/libgit2/deps/ntlmclient/crypt_builtin_md4.c +311 -0
  69. data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.c +2 -1
  70. data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.c +0 -20
  71. data/vendor/libgit2/deps/ntlmclient/crypt_openssl.c +7 -5
  72. data/vendor/libgit2/deps/ntlmclient/ntlm.c +25 -25
  73. data/vendor/libgit2/deps/ntlmclient/ntlm.h +4 -4
  74. data/vendor/libgit2/deps/ntlmclient/ntlmclient.h +2 -2
  75. data/vendor/libgit2/deps/ntlmclient/unicode_builtin.c +5 -4
  76. data/vendor/libgit2/deps/ntlmclient/unicode_iconv.c +2 -1
  77. data/vendor/libgit2/deps/ntlmclient/utf8.h +1176 -721
  78. data/vendor/libgit2/deps/ntlmclient/util.h +11 -0
  79. data/vendor/libgit2/deps/pcre/CMakeLists.txt +89 -88
  80. data/vendor/libgit2/deps/pcre/LICENCE +5 -5
  81. data/vendor/libgit2/deps/pcre/pcre.h +2 -2
  82. data/vendor/libgit2/deps/pcre/pcre_compile.c +6 -3
  83. data/vendor/libgit2/deps/pcre/pcre_exec.c +2 -2
  84. data/vendor/libgit2/deps/winhttp/CMakeLists.txt +14 -16
  85. data/vendor/libgit2/deps/xdiff/CMakeLists.txt +28 -0
  86. data/vendor/libgit2/deps/xdiff/git-xdiff.h +56 -0
  87. data/vendor/libgit2/{src → deps}/xdiff/xdiff.h +15 -15
  88. data/vendor/libgit2/{src → deps}/xdiff/xdiffi.c +152 -125
  89. data/vendor/libgit2/{src → deps}/xdiff/xdiffi.h +2 -4
  90. data/vendor/libgit2/{src → deps}/xdiff/xemit.c +26 -10
  91. data/vendor/libgit2/{src → deps}/xdiff/xhistogram.c +92 -94
  92. data/vendor/libgit2/{src → deps}/xdiff/xinclude.h +1 -12
  93. data/vendor/libgit2/{src → deps}/xdiff/xmacros.h +18 -1
  94. data/vendor/libgit2/{src → deps}/xdiff/xmerge.c +126 -137
  95. data/vendor/libgit2/{src → deps}/xdiff/xpatience.c +26 -46
  96. data/vendor/libgit2/{src → deps}/xdiff/xprepare.c +24 -46
  97. data/vendor/libgit2/{src → deps}/xdiff/xutils.c +36 -8
  98. data/vendor/libgit2/{src → deps}/xdiff/xutils.h +2 -1
  99. data/vendor/libgit2/deps/zlib/CMakeLists.txt +6 -1
  100. data/vendor/libgit2/deps/zlib/LICENSE +22 -0
  101. data/vendor/libgit2/deps/zlib/adler32.c +7 -22
  102. data/vendor/libgit2/deps/zlib/crc32.c +931 -317
  103. data/vendor/libgit2/deps/zlib/crc32.h +9441 -436
  104. data/vendor/libgit2/deps/zlib/deflate.c +428 -453
  105. data/vendor/libgit2/deps/zlib/deflate.h +51 -23
  106. data/vendor/libgit2/deps/zlib/gzguts.h +15 -19
  107. data/vendor/libgit2/deps/zlib/infback.c +19 -31
  108. data/vendor/libgit2/deps/zlib/inffast.c +15 -18
  109. data/vendor/libgit2/deps/zlib/inffast.h +1 -1
  110. data/vendor/libgit2/deps/zlib/inflate.c +75 -110
  111. data/vendor/libgit2/deps/zlib/inflate.h +3 -2
  112. data/vendor/libgit2/deps/zlib/inftrees.c +6 -11
  113. data/vendor/libgit2/deps/zlib/inftrees.h +6 -6
  114. data/vendor/libgit2/deps/zlib/trees.c +294 -380
  115. data/vendor/libgit2/deps/zlib/zconf.h +23 -14
  116. data/vendor/libgit2/deps/zlib/zlib.h +310 -284
  117. data/vendor/libgit2/deps/zlib/zutil.c +20 -46
  118. data/vendor/libgit2/deps/zlib/zutil.h +24 -41
  119. data/vendor/libgit2/include/git2/annotated_commit.h +12 -5
  120. data/vendor/libgit2/include/git2/apply.h +43 -8
  121. data/vendor/libgit2/include/git2/attr.h +28 -6
  122. data/vendor/libgit2/include/git2/blame.h +137 -29
  123. data/vendor/libgit2/include/git2/blob.h +85 -29
  124. data/vendor/libgit2/include/git2/branch.h +25 -16
  125. data/vendor/libgit2/include/git2/buffer.h +24 -82
  126. data/vendor/libgit2/include/git2/cert.h +4 -3
  127. data/vendor/libgit2/include/git2/checkout.h +88 -34
  128. data/vendor/libgit2/include/git2/cherrypick.h +10 -3
  129. data/vendor/libgit2/include/git2/clone.h +28 -12
  130. data/vendor/libgit2/include/git2/commit.h +134 -3
  131. data/vendor/libgit2/include/git2/common.h +172 -59
  132. data/vendor/libgit2/include/git2/config.h +118 -32
  133. data/vendor/libgit2/include/git2/credential.h +32 -3
  134. data/vendor/libgit2/include/git2/credential_helpers.h +2 -0
  135. data/vendor/libgit2/include/git2/deprecated.h +141 -3
  136. data/vendor/libgit2/include/git2/describe.h +20 -3
  137. data/vendor/libgit2/include/git2/diff.h +95 -19
  138. data/vendor/libgit2/include/git2/email.h +10 -30
  139. data/vendor/libgit2/include/git2/errors.h +51 -61
  140. data/vendor/libgit2/include/git2/experimental.h +20 -0
  141. data/vendor/libgit2/include/git2/filter.h +21 -9
  142. data/vendor/libgit2/include/git2/global.h +8 -1
  143. data/vendor/libgit2/include/git2/graph.h +4 -2
  144. data/vendor/libgit2/include/git2/ignore.h +11 -1
  145. data/vendor/libgit2/include/git2/index.h +111 -11
  146. data/vendor/libgit2/include/git2/indexer.h +67 -2
  147. data/vendor/libgit2/include/git2/mailmap.h +7 -1
  148. data/vendor/libgit2/include/git2/merge.h +70 -5
  149. data/vendor/libgit2/include/git2/message.h +4 -2
  150. data/vendor/libgit2/include/git2/net.h +3 -1
  151. data/vendor/libgit2/include/git2/notes.h +9 -6
  152. data/vendor/libgit2/include/git2/object.h +57 -7
  153. data/vendor/libgit2/include/git2/odb.h +156 -33
  154. data/vendor/libgit2/include/git2/odb_backend.h +132 -16
  155. data/vendor/libgit2/include/git2/oid.h +116 -17
  156. data/vendor/libgit2/include/git2/oidarray.h +7 -1
  157. data/vendor/libgit2/include/git2/pack.h +37 -9
  158. data/vendor/libgit2/include/git2/patch.h +10 -3
  159. data/vendor/libgit2/include/git2/pathspec.h +10 -1
  160. data/vendor/libgit2/include/git2/proxy.h +11 -1
  161. data/vendor/libgit2/include/git2/rebase.h +18 -7
  162. data/vendor/libgit2/include/git2/refdb.h +5 -2
  163. data/vendor/libgit2/include/git2/reflog.h +4 -3
  164. data/vendor/libgit2/include/git2/refs.h +11 -8
  165. data/vendor/libgit2/include/git2/refspec.h +14 -4
  166. data/vendor/libgit2/include/git2/remote.h +295 -54
  167. data/vendor/libgit2/include/git2/repository.h +95 -25
  168. data/vendor/libgit2/include/git2/reset.h +18 -5
  169. data/vendor/libgit2/include/git2/revert.h +9 -4
  170. data/vendor/libgit2/include/git2/revparse.h +4 -4
  171. data/vendor/libgit2/include/git2/revwalk.h +7 -3
  172. data/vendor/libgit2/include/git2/signature.h +47 -2
  173. data/vendor/libgit2/include/git2/stash.h +78 -10
  174. data/vendor/libgit2/include/git2/status.h +24 -11
  175. data/vendor/libgit2/include/git2/stdint.h +87 -85
  176. data/vendor/libgit2/include/git2/strarray.h +2 -16
  177. data/vendor/libgit2/include/git2/submodule.h +27 -11
  178. data/vendor/libgit2/include/git2/sys/alloc.h +12 -34
  179. data/vendor/libgit2/include/git2/sys/commit.h +77 -3
  180. data/vendor/libgit2/include/git2/sys/commit_graph.h +110 -59
  181. data/vendor/libgit2/include/git2/sys/config.h +80 -4
  182. data/vendor/libgit2/include/git2/sys/credential.h +4 -3
  183. data/vendor/libgit2/include/git2/sys/diff.h +21 -1
  184. data/vendor/libgit2/include/git2/sys/email.h +7 -0
  185. data/vendor/libgit2/include/git2/sys/errors.h +76 -0
  186. data/vendor/libgit2/include/git2/sys/filter.h +66 -3
  187. data/vendor/libgit2/include/git2/sys/hashsig.h +11 -0
  188. data/vendor/libgit2/include/git2/sys/index.h +3 -2
  189. data/vendor/libgit2/include/git2/sys/mempack.h +32 -2
  190. data/vendor/libgit2/include/git2/sys/merge.h +55 -7
  191. data/vendor/libgit2/include/git2/sys/midx.h +47 -4
  192. data/vendor/libgit2/include/git2/sys/odb_backend.h +10 -9
  193. data/vendor/libgit2/include/git2/sys/openssl.h +8 -1
  194. data/vendor/libgit2/include/git2/sys/path.h +12 -1
  195. data/vendor/libgit2/include/git2/sys/refdb_backend.h +40 -36
  196. data/vendor/libgit2/include/git2/sys/refs.h +3 -2
  197. data/vendor/libgit2/include/git2/sys/remote.h +53 -0
  198. data/vendor/libgit2/include/git2/sys/repository.h +63 -3
  199. data/vendor/libgit2/include/git2/sys/stream.h +26 -3
  200. data/vendor/libgit2/include/git2/sys/transport.h +87 -41
  201. data/vendor/libgit2/include/git2/tag.h +4 -1
  202. data/vendor/libgit2/include/git2/trace.h +9 -3
  203. data/vendor/libgit2/include/git2/transaction.h +3 -2
  204. data/vendor/libgit2/include/git2/transport.h +11 -3
  205. data/vendor/libgit2/include/git2/tree.h +20 -8
  206. data/vendor/libgit2/include/git2/types.h +26 -10
  207. data/vendor/libgit2/include/git2/version.h +63 -6
  208. data/vendor/libgit2/include/git2/worktree.h +30 -8
  209. data/vendor/libgit2/include/git2.h +1 -0
  210. data/vendor/libgit2/src/CMakeLists.txt +203 -420
  211. data/vendor/libgit2/src/README.md +12 -0
  212. data/vendor/libgit2/src/cli/CMakeLists.txt +56 -0
  213. data/vendor/libgit2/src/cli/README.md +26 -0
  214. data/vendor/libgit2/src/{branch.h → cli/cmd.c} +10 -8
  215. data/vendor/libgit2/src/cli/cmd.h +37 -0
  216. data/vendor/libgit2/src/cli/cmd_blame.c +287 -0
  217. data/vendor/libgit2/src/cli/cmd_cat_file.c +202 -0
  218. data/vendor/libgit2/src/cli/cmd_clone.c +190 -0
  219. data/vendor/libgit2/src/cli/cmd_config.c +241 -0
  220. data/vendor/libgit2/src/cli/cmd_hash_object.c +152 -0
  221. data/vendor/libgit2/src/cli/cmd_help.c +85 -0
  222. data/vendor/libgit2/src/cli/cmd_index_pack.c +114 -0
  223. data/vendor/libgit2/src/cli/cmd_init.c +102 -0
  224. data/vendor/libgit2/src/cli/common.c +168 -0
  225. data/vendor/libgit2/src/cli/common.h +63 -0
  226. data/vendor/libgit2/src/cli/error.h +51 -0
  227. data/vendor/libgit2/src/cli/main.c +134 -0
  228. data/vendor/libgit2/src/cli/opt.c +695 -0
  229. data/vendor/libgit2/src/cli/opt.h +367 -0
  230. data/vendor/libgit2/src/cli/opt_usage.c +263 -0
  231. data/vendor/libgit2/src/cli/opt_usage.h +40 -0
  232. data/vendor/libgit2/src/cli/progress.c +395 -0
  233. data/vendor/libgit2/src/cli/progress.h +129 -0
  234. data/vendor/libgit2/src/cli/sighandler.h +20 -0
  235. data/vendor/libgit2/src/cli/unix/sighandler.c +37 -0
  236. data/vendor/libgit2/src/cli/win32/precompiled.h +3 -0
  237. data/vendor/libgit2/src/cli/win32/sighandler.c +37 -0
  238. data/vendor/libgit2/src/libgit2/CMakeLists.txt +140 -0
  239. data/vendor/libgit2/src/{annotated_commit.c → libgit2/annotated_commit.c} +2 -2
  240. data/vendor/libgit2/src/{annotated_commit.h → libgit2/annotated_commit.h} +2 -2
  241. data/vendor/libgit2/src/{apply.c → libgit2/apply.c} +32 -34
  242. data/vendor/libgit2/src/{apply.h → libgit2/apply.h} +2 -2
  243. data/vendor/libgit2/src/{attr.c → libgit2/attr.c} +48 -31
  244. data/vendor/libgit2/src/{attr_file.c → libgit2/attr_file.c} +25 -20
  245. data/vendor/libgit2/src/{attr_file.h → libgit2/attr_file.h} +6 -4
  246. data/vendor/libgit2/src/{attrcache.c → libgit2/attrcache.c} +87 -46
  247. data/vendor/libgit2/src/{attrcache.h → libgit2/attrcache.h} +5 -9
  248. data/vendor/libgit2/src/{blame.c → libgit2/blame.c} +152 -57
  249. data/vendor/libgit2/src/{blame.h → libgit2/blame.h} +1 -0
  250. data/vendor/libgit2/src/{blame_git.c → libgit2/blame_git.c} +1 -2
  251. data/vendor/libgit2/src/{blob.c → libgit2/blob.c} +38 -29
  252. data/vendor/libgit2/src/{blob.h → libgit2/blob.h} +3 -3
  253. data/vendor/libgit2/src/{branch.c → libgit2/branch.c} +164 -118
  254. data/vendor/libgit2/src/libgit2/branch.h +31 -0
  255. data/vendor/libgit2/src/libgit2/buf.c +126 -0
  256. data/vendor/libgit2/src/libgit2/buf.h +50 -0
  257. data/vendor/libgit2/src/{cache.c → libgit2/cache.c} +22 -17
  258. data/vendor/libgit2/src/{cache.h → libgit2/cache.h} +7 -9
  259. data/vendor/libgit2/src/{checkout.c → libgit2/checkout.c} +107 -91
  260. data/vendor/libgit2/src/{checkout.h → libgit2/checkout.h} +0 -2
  261. data/vendor/libgit2/src/{cherrypick.c → libgit2/cherrypick.c} +14 -15
  262. data/vendor/libgit2/src/{clone.c → libgit2/clone.c} +254 -203
  263. data/vendor/libgit2/src/{clone.h → libgit2/clone.h} +4 -1
  264. data/vendor/libgit2/src/{commit.c → libgit2/commit.c} +296 -77
  265. data/vendor/libgit2/src/libgit2/commit.h +87 -0
  266. data/vendor/libgit2/src/{commit_graph.c → libgit2/commit_graph.c} +246 -135
  267. data/vendor/libgit2/src/{commit_graph.h → libgit2/commit_graph.h} +33 -8
  268. data/vendor/libgit2/src/{commit_list.c → libgit2/commit_list.c} +17 -7
  269. data/vendor/libgit2/src/{commit_list.h → libgit2/commit_list.h} +1 -0
  270. data/vendor/libgit2/src/libgit2/common.h +55 -0
  271. data/vendor/libgit2/src/{config.c → libgit2/config.c} +490 -360
  272. data/vendor/libgit2/src/libgit2/config.cmake.in +3 -0
  273. data/vendor/libgit2/src/{config.h → libgit2/config.h} +24 -6
  274. data/vendor/libgit2/src/{config_backend.h → libgit2/config_backend.h} +8 -10
  275. data/vendor/libgit2/src/{config_cache.c → libgit2/config_cache.c} +4 -5
  276. data/vendor/libgit2/src/{config_file.c → libgit2/config_file.c} +212 -183
  277. data/vendor/libgit2/src/libgit2/config_list.c +285 -0
  278. data/vendor/libgit2/src/libgit2/config_list.h +32 -0
  279. data/vendor/libgit2/src/libgit2/config_mem.c +374 -0
  280. data/vendor/libgit2/src/{config_parse.c → libgit2/config_parse.c} +37 -32
  281. data/vendor/libgit2/src/{config_snapshot.c → libgit2/config_snapshot.c} +24 -31
  282. data/vendor/libgit2/src/{crlf.c → libgit2/crlf.c} +24 -21
  283. data/vendor/libgit2/src/{describe.c → libgit2/describe.c} +62 -51
  284. data/vendor/libgit2/src/{diff.c → libgit2/diff.c} +44 -14
  285. data/vendor/libgit2/src/{diff.h → libgit2/diff.h} +8 -10
  286. data/vendor/libgit2/src/{diff_driver.c → libgit2/diff_driver.c} +46 -55
  287. data/vendor/libgit2/src/{diff_driver.h → libgit2/diff_driver.h} +5 -5
  288. data/vendor/libgit2/src/{diff_file.c → libgit2/diff_file.c} +45 -27
  289. data/vendor/libgit2/src/{diff_generate.c → libgit2/diff_generate.c} +70 -20
  290. data/vendor/libgit2/src/{diff_generate.h → libgit2/diff_generate.h} +5 -3
  291. data/vendor/libgit2/src/{diff_parse.c → libgit2/diff_parse.c} +22 -6
  292. data/vendor/libgit2/src/{diff_print.c → libgit2/diff_print.c} +192 -105
  293. data/vendor/libgit2/src/{diff_stats.c → libgit2/diff_stats.c} +40 -29
  294. data/vendor/libgit2/src/libgit2/diff_stats.h +18 -0
  295. data/vendor/libgit2/src/{diff_tform.c → libgit2/diff_tform.c} +49 -16
  296. data/vendor/libgit2/src/{diff_xdiff.c → libgit2/diff_xdiff.c} +4 -8
  297. data/vendor/libgit2/src/{diff_xdiff.h → libgit2/diff_xdiff.h} +1 -1
  298. data/vendor/libgit2/src/{email.c → libgit2/email.c} +58 -40
  299. data/vendor/libgit2/src/{email.h → libgit2/email.h} +1 -1
  300. data/vendor/libgit2/src/{transports/ssh.h → libgit2/experimental.h.in} +3 -4
  301. data/vendor/libgit2/src/{fetch.c → libgit2/fetch.c} +105 -30
  302. data/vendor/libgit2/src/{fetch.h → libgit2/fetch.h} +1 -3
  303. data/vendor/libgit2/src/{fetchhead.c → libgit2/fetchhead.c} +30 -28
  304. data/vendor/libgit2/src/{filter.c → libgit2/filter.c} +132 -58
  305. data/vendor/libgit2/src/{filter.h → libgit2/filter.h} +26 -5
  306. data/vendor/libgit2/src/{win32 → libgit2}/git2.rc +3 -3
  307. data/vendor/libgit2/src/libgit2/grafts.c +270 -0
  308. data/vendor/libgit2/src/libgit2/grafts.h +35 -0
  309. data/vendor/libgit2/src/{graph.c → libgit2/graph.c} +1 -1
  310. data/vendor/libgit2/src/libgit2/hashmap_oid.h +30 -0
  311. data/vendor/libgit2/src/{ident.c → libgit2/ident.c} +20 -20
  312. data/vendor/libgit2/src/{ignore.c → libgit2/ignore.c} +44 -39
  313. data/vendor/libgit2/src/{ignore.h → libgit2/ignore.h} +2 -2
  314. data/vendor/libgit2/src/{index.c → libgit2/index.c} +460 -276
  315. data/vendor/libgit2/src/{index.h → libgit2/index.h} +21 -5
  316. data/vendor/libgit2/src/libgit2/index_map.c +95 -0
  317. data/vendor/libgit2/src/libgit2/index_map.h +28 -0
  318. data/vendor/libgit2/src/{indexer.c → libgit2/indexer.c} +208 -124
  319. data/vendor/libgit2/src/{iterator.c → libgit2/iterator.c} +102 -71
  320. data/vendor/libgit2/src/{iterator.h → libgit2/iterator.h} +8 -5
  321. data/vendor/libgit2/src/libgit2/libgit2.c +268 -0
  322. data/vendor/libgit2/src/{mailmap.c → libgit2/mailmap.c} +39 -37
  323. data/vendor/libgit2/src/{merge.c → libgit2/merge.c} +83 -73
  324. data/vendor/libgit2/src/{merge.h → libgit2/merge.h} +1 -14
  325. data/vendor/libgit2/src/{merge_driver.c → libgit2/merge_driver.c} +4 -4
  326. data/vendor/libgit2/src/{merge_file.c → libgit2/merge_file.c} +13 -5
  327. data/vendor/libgit2/src/{message.c → libgit2/message.c} +21 -10
  328. data/vendor/libgit2/src/{midx.c → libgit2/midx.c} +174 -112
  329. data/vendor/libgit2/src/{midx.h → libgit2/midx.h} +17 -6
  330. data/vendor/libgit2/src/{mwindow.c → libgit2/mwindow.c} +53 -57
  331. data/vendor/libgit2/src/{mwindow.h → libgit2/mwindow.h} +9 -2
  332. data/vendor/libgit2/src/{notes.c → libgit2/notes.c} +29 -37
  333. data/vendor/libgit2/src/{object.c → libgit2/object.c} +166 -35
  334. data/vendor/libgit2/src/{object.h → libgit2/object.h} +17 -2
  335. data/vendor/libgit2/src/{odb.c → libgit2/odb.c} +261 -88
  336. data/vendor/libgit2/src/{odb.h → libgit2/odb.h} +44 -5
  337. data/vendor/libgit2/src/{odb_loose.c → libgit2/odb_loose.c} +192 -134
  338. data/vendor/libgit2/src/{odb_mempack.c → libgit2/odb_mempack.c} +67 -22
  339. data/vendor/libgit2/src/{odb_pack.c → libgit2/odb_pack.c} +162 -89
  340. data/vendor/libgit2/src/{oid.c → libgit2/oid.c} +171 -92
  341. data/vendor/libgit2/src/libgit2/oid.h +284 -0
  342. data/vendor/libgit2/src/libgit2/oidarray.c +89 -0
  343. data/vendor/libgit2/src/{oidarray.h → libgit2/oidarray.h} +5 -1
  344. data/vendor/libgit2/src/{pack-objects.c → libgit2/pack-objects.c} +126 -66
  345. data/vendor/libgit2/src/{pack-objects.h → libgit2/pack-objects.h} +28 -12
  346. data/vendor/libgit2/src/{pack.c → libgit2/pack.c} +146 -111
  347. data/vendor/libgit2/src/{pack.h → libgit2/pack.h} +45 -25
  348. data/vendor/libgit2/src/{parse.c → libgit2/parse.c} +8 -4
  349. data/vendor/libgit2/src/{parse.h → libgit2/parse.h} +1 -1
  350. data/vendor/libgit2/src/{patch.c → libgit2/patch.c} +3 -3
  351. data/vendor/libgit2/src/{patch.h → libgit2/patch.h} +8 -1
  352. data/vendor/libgit2/src/{patch_generate.c → libgit2/patch_generate.c} +51 -16
  353. data/vendor/libgit2/src/{patch_generate.h → libgit2/patch_generate.h} +5 -5
  354. data/vendor/libgit2/src/{patch_parse.c → libgit2/patch_parse.c} +42 -34
  355. data/vendor/libgit2/src/libgit2/path.c +375 -0
  356. data/vendor/libgit2/src/libgit2/path.h +68 -0
  357. data/vendor/libgit2/src/{pathspec.c → libgit2/pathspec.c} +7 -7
  358. data/vendor/libgit2/src/{pathspec.h → libgit2/pathspec.h} +2 -2
  359. data/vendor/libgit2/src/{proxy.c → libgit2/proxy.c} +4 -1
  360. data/vendor/libgit2/src/{proxy.h → libgit2/proxy.h} +1 -1
  361. data/vendor/libgit2/src/{push.c → libgit2/push.c} +116 -60
  362. data/vendor/libgit2/src/{push.h → libgit2/push.h} +5 -16
  363. data/vendor/libgit2/src/{reader.c → libgit2/reader.c} +9 -9
  364. data/vendor/libgit2/src/{reader.h → libgit2/reader.h} +2 -2
  365. data/vendor/libgit2/src/{rebase.c → libgit2/rebase.c} +147 -147
  366. data/vendor/libgit2/src/{refdb_fs.c → libgit2/refdb_fs.c} +639 -254
  367. data/vendor/libgit2/src/{reflog.c → libgit2/reflog.c} +8 -7
  368. data/vendor/libgit2/src/{reflog.h → libgit2/reflog.h} +3 -2
  369. data/vendor/libgit2/src/{refs.c → libgit2/refs.c} +67 -39
  370. data/vendor/libgit2/src/{refs.h → libgit2/refs.h} +8 -3
  371. data/vendor/libgit2/src/{refspec.c → libgit2/refspec.c} +60 -38
  372. data/vendor/libgit2/src/{refspec.h → libgit2/refspec.h} +13 -2
  373. data/vendor/libgit2/src/{remote.c → libgit2/remote.c} +821 -454
  374. data/vendor/libgit2/src/libgit2/remote.h +101 -0
  375. data/vendor/libgit2/src/{repository.c → libgit2/repository.c} +1377 -594
  376. data/vendor/libgit2/src/{repository.h → libgit2/repository.h} +43 -12
  377. data/vendor/libgit2/src/{reset.c → libgit2/reset.c} +8 -5
  378. data/vendor/libgit2/src/{revert.c → libgit2/revert.c} +18 -22
  379. data/vendor/libgit2/src/{revparse.c → libgit2/revparse.c} +76 -44
  380. data/vendor/libgit2/src/{revwalk.c → libgit2/revwalk.c} +48 -19
  381. data/vendor/libgit2/src/{revwalk.h → libgit2/revwalk.h} +3 -3
  382. data/vendor/libgit2/src/{libgit2.c → libgit2/settings.c} +162 -95
  383. data/vendor/libgit2/src/{settings.h → libgit2/settings.h} +6 -2
  384. data/vendor/libgit2/src/{signature.c → libgit2/signature.c} +144 -21
  385. data/vendor/libgit2/src/{signature.h → libgit2/signature.h} +1 -2
  386. data/vendor/libgit2/src/{stash.c → libgit2/stash.c} +243 -68
  387. data/vendor/libgit2/src/{status.c → libgit2/status.c} +5 -2
  388. data/vendor/libgit2/src/{strarray.c → libgit2/strarray.c} +1 -0
  389. data/vendor/libgit2/src/libgit2/strarray.h +25 -0
  390. data/vendor/libgit2/src/{streams → libgit2/streams}/mbedtls.c +62 -67
  391. data/vendor/libgit2/src/{streams → libgit2/streams}/openssl.c +41 -24
  392. data/vendor/libgit2/src/{streams → libgit2/streams}/openssl.h +2 -0
  393. data/vendor/libgit2/src/{streams → libgit2/streams}/openssl_dynamic.c +11 -3
  394. data/vendor/libgit2/src/{streams → libgit2/streams}/openssl_dynamic.h +6 -3
  395. data/vendor/libgit2/src/libgit2/streams/schannel.c +715 -0
  396. data/vendor/libgit2/src/libgit2/streams/schannel.h +28 -0
  397. data/vendor/libgit2/src/libgit2/streams/socket.c +428 -0
  398. data/vendor/libgit2/src/{streams → libgit2/streams}/socket.h +3 -1
  399. data/vendor/libgit2/src/{streams → libgit2/streams}/stransport.c +79 -19
  400. data/vendor/libgit2/src/{streams → libgit2/streams}/tls.c +5 -0
  401. data/vendor/libgit2/src/{submodule.c → libgit2/submodule.c} +279 -220
  402. data/vendor/libgit2/src/{submodule.h → libgit2/submodule.h} +10 -11
  403. data/vendor/libgit2/src/libgit2/sysdir.c +650 -0
  404. data/vendor/libgit2/src/{sysdir.h → libgit2/sysdir.h} +55 -18
  405. data/vendor/libgit2/src/{tag.c → libgit2/tag.c} +74 -43
  406. data/vendor/libgit2/src/{tag.h → libgit2/tag.h} +2 -2
  407. data/vendor/libgit2/src/{trace.c → libgit2/trace.c} +1 -14
  408. data/vendor/libgit2/src/{trace.h → libgit2/trace.h} +5 -22
  409. data/vendor/libgit2/src/{trailer.c → libgit2/trailer.c} +7 -7
  410. data/vendor/libgit2/src/{transaction.c → libgit2/transaction.c} +27 -21
  411. data/vendor/libgit2/src/{transaction.h → libgit2/transaction.h} +4 -1
  412. data/vendor/libgit2/src/{transport.c → libgit2/transport.c} +14 -11
  413. data/vendor/libgit2/src/{transports → libgit2/transports}/auth.c +7 -9
  414. data/vendor/libgit2/src/{transports → libgit2/transports}/auth.h +3 -5
  415. data/vendor/libgit2/src/{transports/auth_negotiate.c → libgit2/transports/auth_gssapi.c} +44 -45
  416. data/vendor/libgit2/src/{transports → libgit2/transports}/auth_negotiate.h +1 -1
  417. data/vendor/libgit2/src/{transports → libgit2/transports}/auth_ntlm.h +1 -2
  418. data/vendor/libgit2/src/{transports/auth_ntlm.c → libgit2/transports/auth_ntlmclient.c} +22 -22
  419. data/vendor/libgit2/src/libgit2/transports/auth_sspi.c +341 -0
  420. data/vendor/libgit2/src/{transports → libgit2/transports}/credential.c +1 -1
  421. data/vendor/libgit2/src/{transports → libgit2/transports}/git.c +16 -19
  422. data/vendor/libgit2/src/{transports → libgit2/transports}/http.c +49 -24
  423. data/vendor/libgit2/src/{transports → libgit2/transports}/http.h +0 -11
  424. data/vendor/libgit2/src/{transports → libgit2/transports}/httpclient.c +188 -134
  425. data/vendor/libgit2/src/{transports → libgit2/transports}/httpclient.h +10 -0
  426. data/vendor/libgit2/src/libgit2/transports/httpparser.c +128 -0
  427. data/vendor/libgit2/src/libgit2/transports/httpparser.h +99 -0
  428. data/vendor/libgit2/src/{transports → libgit2/transports}/local.c +159 -127
  429. data/vendor/libgit2/src/{transports → libgit2/transports}/smart.c +142 -165
  430. data/vendor/libgit2/src/{transports → libgit2/transports}/smart.h +56 -36
  431. data/vendor/libgit2/src/{transports → libgit2/transports}/smart_pkt.c +307 -74
  432. data/vendor/libgit2/src/{transports → libgit2/transports}/smart_protocol.c +297 -97
  433. data/vendor/libgit2/src/libgit2/transports/ssh.c +85 -0
  434. data/vendor/libgit2/src/libgit2/transports/ssh_exec.c +347 -0
  435. data/vendor/libgit2/src/libgit2/transports/ssh_exec.h +26 -0
  436. data/vendor/libgit2/src/{transports/ssh.c → libgit2/transports/ssh_libssh2.c} +414 -268
  437. data/vendor/libgit2/src/libgit2/transports/ssh_libssh2.h +28 -0
  438. data/vendor/libgit2/src/{transports → libgit2/transports}/winhttp.c +101 -75
  439. data/vendor/libgit2/src/{tree-cache.c → libgit2/tree-cache.c} +30 -20
  440. data/vendor/libgit2/src/{tree-cache.h → libgit2/tree-cache.h} +7 -5
  441. data/vendor/libgit2/src/{tree.c → libgit2/tree.c} +128 -110
  442. data/vendor/libgit2/src/{tree.h → libgit2/tree.h} +7 -6
  443. data/vendor/libgit2/src/{worktree.c → libgit2/worktree.c} +160 -121
  444. data/vendor/libgit2/src/{worktree.h → libgit2/worktree.h} +1 -1
  445. data/vendor/libgit2/src/util/CMakeLists.txt +77 -0
  446. data/vendor/libgit2/src/{alloc.c → util/alloc.c} +69 -7
  447. data/vendor/libgit2/src/util/alloc.h +65 -0
  448. data/vendor/libgit2/src/util/allocators/debugalloc.c +73 -0
  449. data/vendor/libgit2/src/util/allocators/debugalloc.h +17 -0
  450. data/vendor/libgit2/src/util/allocators/failalloc.c +32 -0
  451. data/vendor/libgit2/src/util/allocators/failalloc.h +17 -0
  452. data/vendor/libgit2/src/util/allocators/stdalloc.c +37 -0
  453. data/vendor/libgit2/src/{allocators → util/allocators}/stdalloc.h +1 -1
  454. data/vendor/libgit2/src/util/allocators/win32_leakcheck.c +50 -0
  455. data/vendor/libgit2/src/{allocators → util/allocators}/win32_leakcheck.h +1 -1
  456. data/vendor/libgit2/src/{array.h → util/array.h} +25 -19
  457. data/vendor/libgit2/src/{assert_safe.h → util/assert_safe.h} +16 -0
  458. data/vendor/libgit2/src/{cc-compat.h → util/cc-compat.h} +5 -1
  459. data/vendor/libgit2/src/util/ctype_compat.h +70 -0
  460. data/vendor/libgit2/src/{date.c → util/date.c} +35 -33
  461. data/vendor/libgit2/src/util/date.h +45 -0
  462. data/vendor/libgit2/src/util/errors.c +401 -0
  463. data/vendor/libgit2/src/{errors.h → util/errors.h} +22 -19
  464. data/vendor/libgit2/src/{filebuf.c → util/filebuf.c} +35 -30
  465. data/vendor/libgit2/src/{filebuf.h → util/filebuf.h} +21 -8
  466. data/vendor/libgit2/src/{path.c → util/fs_path.c} +591 -615
  467. data/vendor/libgit2/src/{path.h → util/fs_path.h} +257 -181
  468. data/vendor/libgit2/src/{futils.c → util/futils.c} +144 -95
  469. data/vendor/libgit2/src/{futils.h → util/futils.h} +40 -18
  470. data/vendor/libgit2/src/{features.h.in → util/git2_features.h.in} +33 -2
  471. data/vendor/libgit2/src/{common.h → util/git2_util.h} +26 -59
  472. data/vendor/libgit2/src/util/hash/builtin.c +53 -0
  473. data/vendor/libgit2/src/{hash/sha1/openssl.h → util/hash/builtin.h} +6 -6
  474. data/vendor/libgit2/src/{hash/sha1 → util/hash}/collisiondetect.c +3 -3
  475. data/vendor/libgit2/src/{hash/sha1 → util/hash}/collisiondetect.h +3 -3
  476. data/vendor/libgit2/src/util/hash/common_crypto.c +112 -0
  477. data/vendor/libgit2/src/{hash/sha1 → util/hash}/common_crypto.h +11 -3
  478. data/vendor/libgit2/src/util/hash/mbedtls.c +92 -0
  479. data/vendor/libgit2/src/{hash/sha1 → util/hash}/mbedtls.h +14 -4
  480. data/vendor/libgit2/src/util/hash/openssl.c +347 -0
  481. data/vendor/libgit2/src/util/hash/openssl.h +61 -0
  482. data/vendor/libgit2/src/util/hash/rfc6234/sha.h +243 -0
  483. data/vendor/libgit2/src/util/hash/rfc6234/sha224-256.c +601 -0
  484. data/vendor/libgit2/src/util/hash/sha.h +73 -0
  485. data/vendor/libgit2/src/{hash/sha1 → util/hash}/sha1dc/sha1.c +1 -1
  486. data/vendor/libgit2/src/util/hash/win32.c +549 -0
  487. data/vendor/libgit2/src/util/hash/win32.h +60 -0
  488. data/vendor/libgit2/src/util/hash.c +158 -0
  489. data/vendor/libgit2/src/util/hash.h +61 -0
  490. data/vendor/libgit2/src/util/hashmap.h +424 -0
  491. data/vendor/libgit2/src/util/hashmap_str.h +43 -0
  492. data/vendor/libgit2/src/{integer.h → util/integer.h} +3 -1
  493. data/vendor/libgit2/src/{map.h → util/map.h} +1 -1
  494. data/vendor/libgit2/src/util/net.c +1160 -0
  495. data/vendor/libgit2/src/{net.h → util/net.h} +45 -4
  496. data/vendor/libgit2/src/{pool.c → util/pool.c} +1 -1
  497. data/vendor/libgit2/src/{pool.h → util/pool.h} +6 -1
  498. data/vendor/libgit2/src/{posix.c → util/posix.c} +57 -3
  499. data/vendor/libgit2/src/{posix.h → util/posix.h} +26 -1
  500. data/vendor/libgit2/src/{pqueue.h → util/pqueue.h} +3 -3
  501. data/vendor/libgit2/src/util/process.h +222 -0
  502. data/vendor/libgit2/src/util/rand.c +230 -0
  503. data/vendor/libgit2/src/util/rand.h +37 -0
  504. data/vendor/libgit2/src/{regexp.c → util/regexp.c} +5 -5
  505. data/vendor/libgit2/src/{regexp.h → util/regexp.h} +1 -1
  506. data/vendor/libgit2/src/{runtime.c → util/runtime.c} +1 -1
  507. data/vendor/libgit2/src/{runtime.h → util/runtime.h} +1 -1
  508. data/vendor/libgit2/src/{sortedcache.c → util/sortedcache.c} +15 -14
  509. data/vendor/libgit2/src/{sortedcache.h → util/sortedcache.h} +5 -5
  510. data/vendor/libgit2/src/util/staticstr.h +66 -0
  511. data/vendor/libgit2/src/{buffer.c → util/str.c} +159 -153
  512. data/vendor/libgit2/src/util/str.h +357 -0
  513. data/vendor/libgit2/src/util/strlist.c +108 -0
  514. data/vendor/libgit2/src/util/strlist.h +36 -0
  515. data/vendor/libgit2/src/{thread.c → util/thread.c} +1 -1
  516. data/vendor/libgit2/src/{thread.h → util/thread.h} +23 -22
  517. data/vendor/libgit2/src/{tsort.c → util/tsort.c} +1 -1
  518. data/vendor/libgit2/src/{unix → util/unix}/map.c +1 -3
  519. data/vendor/libgit2/src/{unix → util/unix}/posix.h +1 -6
  520. data/vendor/libgit2/src/util/unix/process.c +629 -0
  521. data/vendor/libgit2/src/{unix → util/unix}/realpath.c +24 -8
  522. data/vendor/libgit2/src/{utf8.c → util/utf8.c} +1 -1
  523. data/vendor/libgit2/src/{utf8.h → util/utf8.h} +1 -1
  524. data/vendor/libgit2/src/{util.c → util/util.c} +24 -19
  525. data/vendor/libgit2/src/{util.h → util/util.h} +30 -81
  526. data/vendor/libgit2/src/{varint.h → util/varint.h} +1 -1
  527. data/vendor/libgit2/src/{vector.c → util/vector.c} +3 -3
  528. data/vendor/libgit2/src/{vector.h → util/vector.h} +4 -4
  529. data/vendor/libgit2/src/{wildmatch.h → util/wildmatch.h} +1 -1
  530. data/vendor/libgit2/src/{win32 → util/win32}/dir.h +1 -1
  531. data/vendor/libgit2/src/{win32 → util/win32}/error.c +1 -1
  532. data/vendor/libgit2/src/{win32 → util/win32}/error.h +1 -1
  533. data/vendor/libgit2/src/{win32 → util/win32}/map.c +1 -1
  534. data/vendor/libgit2/src/{win32 → util/win32}/path_w32.c +148 -17
  535. data/vendor/libgit2/src/{win32 → util/win32}/path_w32.h +3 -1
  536. data/vendor/libgit2/src/{win32 → util/win32}/posix.h +1 -2
  537. data/vendor/libgit2/src/{win32 → util/win32}/posix_w32.c +42 -35
  538. data/vendor/libgit2/src/util/win32/precompiled.c +1 -0
  539. data/vendor/libgit2/src/{win32 → util/win32}/precompiled.h +1 -1
  540. data/vendor/libgit2/src/util/win32/process.c +506 -0
  541. data/vendor/libgit2/src/{win32 → util/win32}/thread.h +1 -1
  542. data/vendor/libgit2/src/util/win32/utf-conv.c +144 -0
  543. data/vendor/libgit2/src/util/win32/utf-conv.h +127 -0
  544. data/vendor/libgit2/src/{win32 → util/win32}/w32_buffer.c +2 -3
  545. data/vendor/libgit2/src/{win32 → util/win32}/w32_buffer.h +3 -4
  546. data/vendor/libgit2/src/{win32 → util/win32}/w32_leakcheck.c +1 -1
  547. data/vendor/libgit2/src/{win32 → util/win32}/w32_leakcheck.h +1 -1
  548. data/vendor/libgit2/src/{win32 → util/win32}/w32_util.c +1 -1
  549. data/vendor/libgit2/src/{win32 → util/win32}/w32_util.h +1 -1
  550. data/vendor/libgit2/src/{zstream.c → util/zstream.c} +5 -5
  551. data/vendor/libgit2/src/{zstream.h → util/zstream.h} +5 -5
  552. metadata +431 -362
  553. data/vendor/libgit2/cmake/FindIconv.cmake +0 -45
  554. data/vendor/libgit2/deps/http-parser/CMakeLists.txt +0 -6
  555. data/vendor/libgit2/deps/http-parser/COPYING +0 -23
  556. data/vendor/libgit2/deps/http-parser/http_parser.c +0 -2182
  557. data/vendor/libgit2/deps/http-parser/http_parser.h +0 -305
  558. data/vendor/libgit2/deps/zlib/COPYING +0 -27
  559. data/vendor/libgit2/include/git2/sys/reflog.h +0 -21
  560. data/vendor/libgit2/src/alloc.h +0 -40
  561. data/vendor/libgit2/src/allocators/failalloc.c +0 -92
  562. data/vendor/libgit2/src/allocators/failalloc.h +0 -23
  563. data/vendor/libgit2/src/allocators/stdalloc.c +0 -150
  564. data/vendor/libgit2/src/allocators/win32_leakcheck.c +0 -118
  565. data/vendor/libgit2/src/buffer.h +0 -374
  566. data/vendor/libgit2/src/commit.h +0 -46
  567. data/vendor/libgit2/src/config_entries.c +0 -237
  568. data/vendor/libgit2/src/config_entries.h +0 -24
  569. data/vendor/libgit2/src/config_mem.c +0 -220
  570. data/vendor/libgit2/src/errors.c +0 -238
  571. data/vendor/libgit2/src/hash/sha1/common_crypto.c +0 -57
  572. data/vendor/libgit2/src/hash/sha1/generic.c +0 -300
  573. data/vendor/libgit2/src/hash/sha1/generic.h +0 -19
  574. data/vendor/libgit2/src/hash/sha1/mbedtls.c +0 -46
  575. data/vendor/libgit2/src/hash/sha1/openssl.c +0 -59
  576. data/vendor/libgit2/src/hash/sha1/win32.c +0 -333
  577. data/vendor/libgit2/src/hash/sha1/win32.h +0 -128
  578. data/vendor/libgit2/src/hash/sha1.h +0 -38
  579. data/vendor/libgit2/src/hash.c +0 -110
  580. data/vendor/libgit2/src/hash.h +0 -46
  581. data/vendor/libgit2/src/idxmap.c +0 -157
  582. data/vendor/libgit2/src/idxmap.h +0 -177
  583. data/vendor/libgit2/src/khash.h +0 -615
  584. data/vendor/libgit2/src/libgit2.h +0 -15
  585. data/vendor/libgit2/src/message.h +0 -17
  586. data/vendor/libgit2/src/net.c +0 -540
  587. data/vendor/libgit2/src/netops.c +0 -125
  588. data/vendor/libgit2/src/netops.h +0 -68
  589. data/vendor/libgit2/src/offmap.c +0 -101
  590. data/vendor/libgit2/src/offmap.h +0 -133
  591. data/vendor/libgit2/src/oid.h +0 -51
  592. data/vendor/libgit2/src/oidarray.c +0 -43
  593. data/vendor/libgit2/src/oidmap.c +0 -107
  594. data/vendor/libgit2/src/oidmap.h +0 -128
  595. data/vendor/libgit2/src/remote.h +0 -55
  596. data/vendor/libgit2/src/streams/socket.c +0 -239
  597. data/vendor/libgit2/src/strmap.c +0 -100
  598. data/vendor/libgit2/src/strmap.h +0 -131
  599. data/vendor/libgit2/src/sysdir.c +0 -347
  600. data/vendor/libgit2/src/threadstate.c +0 -84
  601. data/vendor/libgit2/src/threadstate.h +0 -24
  602. data/vendor/libgit2/src/win32/findfile.c +0 -230
  603. data/vendor/libgit2/src/win32/findfile.h +0 -19
  604. data/vendor/libgit2/src/win32/utf-conv.c +0 -146
  605. data/vendor/libgit2/src/win32/utf-conv.h +0 -60
  606. /data/vendor/libgit2/{src → deps}/xdiff/xemit.h +0 -0
  607. /data/vendor/libgit2/{src → deps}/xdiff/xprepare.h +0 -0
  608. /data/vendor/libgit2/{src → deps}/xdiff/xtypes.h +0 -0
  609. /data/vendor/libgit2/src/{win32 → cli/win32}/precompiled.c +0 -0
  610. /data/vendor/libgit2/src/{attr.h → libgit2/attr.h} +0 -0
  611. /data/vendor/libgit2/src/{blame_git.h → libgit2/blame_git.h} +0 -0
  612. /data/vendor/libgit2/src/{config_parse.h → libgit2/config_parse.h} +0 -0
  613. /data/vendor/libgit2/src/{delta.c → libgit2/delta.c} +0 -0
  614. /data/vendor/libgit2/src/{delta.h → libgit2/delta.h} +0 -0
  615. /data/vendor/libgit2/src/{diff_file.h → libgit2/diff_file.h} +0 -0
  616. /data/vendor/libgit2/src/{diff_parse.h → libgit2/diff_parse.h} +0 -0
  617. /data/vendor/libgit2/src/{diff_tform.h → libgit2/diff_tform.h} +0 -0
  618. /data/vendor/libgit2/src/{fetchhead.h → libgit2/fetchhead.h} +0 -0
  619. /data/vendor/libgit2/src/{hashsig.c → libgit2/hashsig.c} +0 -0
  620. /data/vendor/libgit2/src/{indexer.h → libgit2/indexer.h} +0 -0
  621. /data/vendor/libgit2/src/{mailmap.h → libgit2/mailmap.h} +0 -0
  622. /data/vendor/libgit2/src/{merge_driver.h → libgit2/merge_driver.h} +0 -0
  623. /data/vendor/libgit2/src/{notes.h → libgit2/notes.h} +0 -0
  624. /data/vendor/libgit2/src/{object_api.c → libgit2/object_api.c} +0 -0
  625. /data/vendor/libgit2/src/{patch_parse.h → libgit2/patch_parse.h} +0 -0
  626. /data/vendor/libgit2/src/{refdb.c → libgit2/refdb.c} +0 -0
  627. /data/vendor/libgit2/src/{refdb.h → libgit2/refdb.h} +0 -0
  628. /data/vendor/libgit2/src/{repo_template.h → libgit2/repo_template.h} +0 -0
  629. /data/vendor/libgit2/src/{status.h → libgit2/status.h} +0 -0
  630. /data/vendor/libgit2/src/{stream.h → libgit2/stream.h} +0 -0
  631. /data/vendor/libgit2/src/{streams → libgit2/streams}/mbedtls.h +0 -0
  632. /data/vendor/libgit2/src/{streams → libgit2/streams}/openssl_legacy.c +0 -0
  633. /data/vendor/libgit2/src/{streams → libgit2/streams}/openssl_legacy.h +0 -0
  634. /data/vendor/libgit2/src/{streams → libgit2/streams}/registry.c +0 -0
  635. /data/vendor/libgit2/src/{streams → libgit2/streams}/registry.h +0 -0
  636. /data/vendor/libgit2/src/{streams → libgit2/streams}/stransport.h +0 -0
  637. /data/vendor/libgit2/src/{streams → libgit2/streams}/tls.h +0 -0
  638. /data/vendor/libgit2/src/{transports → libgit2/transports}/credential_helpers.c +0 -0
  639. /data/vendor/libgit2/src/{userdiff.h → libgit2/userdiff.h} +0 -0
  640. /data/vendor/libgit2/src/{bitvec.h → util/bitvec.h} +0 -0
  641. /data/vendor/libgit2/src/{hash/sha1 → util/hash}/sha1dc/sha1.h +0 -0
  642. /data/vendor/libgit2/src/{hash/sha1 → util/hash}/sha1dc/ubc_check.c +0 -0
  643. /data/vendor/libgit2/src/{hash/sha1 → util/hash}/sha1dc/ubc_check.h +0 -0
  644. /data/vendor/libgit2/src/{pqueue.c → util/pqueue.c} +0 -0
  645. /data/vendor/libgit2/src/{strnlen.h → util/strnlen.h} +0 -0
  646. /data/vendor/libgit2/src/{unix → util/unix}/pthread.h +0 -0
  647. /data/vendor/libgit2/src/{varint.c → util/varint.c} +0 -0
  648. /data/vendor/libgit2/src/{wildmatch.c → util/wildmatch.c} +0 -0
  649. /data/vendor/libgit2/src/{win32 → util/win32}/dir.c +0 -0
  650. /data/vendor/libgit2/src/{win32 → util/win32}/mingw-compat.h +0 -0
  651. /data/vendor/libgit2/src/{win32 → util/win32}/msvc-compat.h +0 -0
  652. /data/vendor/libgit2/src/{win32 → util/win32}/reparse.h +0 -0
  653. /data/vendor/libgit2/src/{win32 → util/win32}/thread.c +0 -0
  654. /data/vendor/libgit2/src/{win32 → util/win32}/version.h +0 -0
  655. /data/vendor/libgit2/src/{win32 → util/win32}/w32_common.h +0 -0
  656. /data/vendor/libgit2/src/{win32 → util/win32}/win32-compat.h +0 -0
@@ -1,30 +1,30 @@
1
- // The latest version of this library is available on GitHub;
2
- // https://github.com/sheredom/utf8.h
3
-
4
- // This is free and unencumbered software released into the public domain.
5
- //
6
- // Anyone is free to copy, modify, publish, use, compile, sell, or
7
- // distribute this software, either in source code form or as a compiled
8
- // binary, for any purpose, commercial or non-commercial, and by any
9
- // means.
10
- //
11
- // In jurisdictions that recognize copyright laws, the author or authors
12
- // of this software dedicate any and all copyright interest in the
13
- // software to the public domain. We make this dedication for the benefit
14
- // of the public at large and to the detriment of our heirs and
15
- // successors. We intend this dedication to be an overt act of
16
- // relinquishment in perpetuity of all present and future rights to this
17
- // software under copyright law.
18
- //
19
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
- // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
- // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22
- // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23
- // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24
- // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
- // OTHER DEALINGS IN THE SOFTWARE.
26
- //
27
- // For more information, please refer to <http://unlicense.org/>
1
+ /* The latest version of this library is available on GitHub;
2
+ * https://github.com/sheredom/utf8.h */
3
+
4
+ /* This is free and unencumbered software released into the public domain.
5
+ *
6
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
7
+ * distribute this software, either in source code form or as a compiled
8
+ * binary, for any purpose, commercial or non-commercial, and by any
9
+ * means.
10
+ *
11
+ * In jurisdictions that recognize copyright laws, the author or authors
12
+ * of this software dedicate any and all copyright interest in the
13
+ * software to the public domain. We make this dedication for the benefit
14
+ * of the public at large and to the detriment of our heirs and
15
+ * successors. We intend this dedication to be an overt act of
16
+ * relinquishment in perpetuity of all present and future rights to this
17
+ * software under copyright law.
18
+ *
19
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
+ * OTHER DEALINGS IN THE SOFTWARE.
26
+ *
27
+ * For more information, please refer to <http://unlicense.org/> */
28
28
 
29
29
  #ifndef SHEREDOM_UTF8_H_INCLUDED
30
30
  #define SHEREDOM_UTF8_H_INCLUDED
@@ -32,10 +32,25 @@
32
32
  #if defined(_MSC_VER)
33
33
  #pragma warning(push)
34
34
 
35
- // disable 'bytes padding added after construct' warning
35
+ /* disable warning: no function prototype given: converting '()' to '(void)' */
36
+ #pragma warning(disable : 4255)
37
+
38
+ /* disable warning: '__cplusplus' is not defined as a preprocessor macro,
39
+ * replacing with '0' for '#if/#elif' */
40
+ #pragma warning(disable : 4668)
41
+
42
+ /* disable warning: bytes padding added after construct */
36
43
  #pragma warning(disable : 4820)
37
44
  #endif
38
45
 
46
+ #if defined(__cplusplus)
47
+ #if defined(_MSC_VER)
48
+ #define utf8_cplusplus _MSVC_LANG
49
+ #else
50
+ #define utf8_cplusplus __cplusplus
51
+ #endif
52
+ #endif
53
+
39
54
  #include <stddef.h>
40
55
  #include <stdlib.h>
41
56
 
@@ -43,7 +58,7 @@
43
58
  #pragma warning(pop)
44
59
  #endif
45
60
 
46
- #if defined(_MSC_VER)
61
+ #if defined(_MSC_VER) && (_MSC_VER < 1920)
47
62
  typedef __int32 utf8_int32_t;
48
63
  #else
49
64
  #include <stdint.h>
@@ -54,411 +69,516 @@ typedef int32_t utf8_int32_t;
54
69
  #pragma clang diagnostic push
55
70
  #pragma clang diagnostic ignored "-Wold-style-cast"
56
71
  #pragma clang diagnostic ignored "-Wcast-qual"
72
+
73
+ #if __has_warning("-Wunsafe-buffer-usage")
74
+ #pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
75
+ #endif
57
76
  #endif
58
77
 
59
- #ifdef __cplusplus
78
+ #ifdef utf8_cplusplus
60
79
  extern "C" {
61
80
  #endif
62
81
 
63
- #if defined(__clang__) || defined(__GNUC__)
64
- #define utf8_nonnull __attribute__((nonnull))
65
- #define utf8_pure __attribute__((pure))
66
- #define utf8_restrict __restrict__
67
- #define utf8_weak __attribute__((weak))
68
- #elif defined(_MSC_VER)
82
+ #if defined(__TINYC__)
83
+ #define UTF8_ATTRIBUTE(a) __attribute((a))
84
+ #else
85
+ #define UTF8_ATTRIBUTE(a) __attribute__((a))
86
+ #endif
87
+
88
+ #if defined(_MSC_VER)
69
89
  #define utf8_nonnull
70
90
  #define utf8_pure
71
91
  #define utf8_restrict __restrict
72
92
  #define utf8_weak __inline
93
+ #elif defined(__clang__) || defined(__GNUC__)
94
+ #define utf8_nonnull UTF8_ATTRIBUTE(nonnull)
95
+ #define utf8_pure UTF8_ATTRIBUTE(pure)
96
+ #define utf8_restrict __restrict__
97
+ #define utf8_weak UTF8_ATTRIBUTE(weak)
98
+ #elif defined(__TINYC__)
99
+ #define utf8_nonnull UTF8_ATTRIBUTE(nonnull)
100
+ #define utf8_pure UTF8_ATTRIBUTE(pure)
101
+ #define utf8_restrict
102
+ #define utf8_weak UTF8_ATTRIBUTE(weak)
73
103
  #else
74
- #error Non clang, non gcc, non MSVC compiler found!
104
+ #error Non clang, non gcc, non MSVC, non tcc compiler found!
75
105
  #endif
76
106
 
77
- #ifdef __cplusplus
107
+ #ifdef utf8_cplusplus
78
108
  #define utf8_null NULL
79
109
  #else
80
110
  #define utf8_null 0
81
111
  #endif
82
112
 
83
- // Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
84
- // src2 respectively, case insensitive.
85
- utf8_nonnull utf8_pure utf8_weak int utf8casecmp(const void *src1,
86
- const void *src2);
87
-
88
- // Append the utf8 string src onto the utf8 string dst.
89
- utf8_nonnull utf8_weak void *utf8cat(void *utf8_restrict dst,
90
- const void *utf8_restrict src);
91
-
92
- // Find the first match of the utf8 codepoint chr in the utf8 string src.
93
- utf8_nonnull utf8_pure utf8_weak void *utf8chr(const void *src,
94
- utf8_int32_t chr);
95
-
96
- // Return less than 0, 0, greater than 0 if src1 < src2,
97
- // src1 == src2, src1 > src2 respectively.
98
- utf8_nonnull utf8_pure utf8_weak int utf8cmp(const void *src1,
99
- const void *src2);
100
-
101
- // Copy the utf8 string src onto the memory allocated in dst.
102
- utf8_nonnull utf8_weak void *utf8cpy(void *utf8_restrict dst,
103
- const void *utf8_restrict src);
104
-
105
- // Number of utf8 codepoints in the utf8 string src that consists entirely
106
- // of utf8 codepoints not from the utf8 string reject.
107
- utf8_nonnull utf8_pure utf8_weak size_t utf8cspn(const void *src,
108
- const void *reject);
109
-
110
- // Duplicate the utf8 string src by getting its size, malloc'ing a new buffer
111
- // copying over the data, and returning that. Or 0 if malloc failed.
112
- utf8_nonnull utf8_weak void *utf8dup(const void *src);
113
-
114
- // Number of utf8 codepoints in the utf8 string str,
115
- // excluding the null terminating byte.
116
- utf8_nonnull utf8_pure utf8_weak size_t utf8len(const void *str);
117
-
118
- // Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
119
- // src2 respectively, case insensitive. Checking at most n bytes of each utf8
120
- // string.
121
- utf8_nonnull utf8_pure utf8_weak int utf8ncasecmp(const void *src1,
122
- const void *src2, size_t n);
123
-
124
- // Append the utf8 string src onto the utf8 string dst,
125
- // writing at most n+1 bytes. Can produce an invalid utf8
126
- // string if n falls partway through a utf8 codepoint.
127
- utf8_nonnull utf8_weak void *utf8ncat(void *utf8_restrict dst,
128
- const void *utf8_restrict src, size_t n);
129
-
130
- // Return less than 0, 0, greater than 0 if src1 < src2,
131
- // src1 == src2, src1 > src2 respectively. Checking at most n
132
- // bytes of each utf8 string.
133
- utf8_nonnull utf8_pure utf8_weak int utf8ncmp(const void *src1,
134
- const void *src2, size_t n);
135
-
136
- // Copy the utf8 string src onto the memory allocated in dst.
137
- // Copies at most n bytes. If there is no terminating null byte in
138
- // the first n bytes of src, the string placed into dst will not be
139
- // null-terminated. If the size (in bytes) of src is less than n,
140
- // extra null terminating bytes are appended to dst such that at
141
- // total of n bytes are written. Can produce an invalid utf8
142
- // string if n falls partway through a utf8 codepoint.
143
- utf8_nonnull utf8_weak void *utf8ncpy(void *utf8_restrict dst,
144
- const void *utf8_restrict src, size_t n);
145
-
146
- // Similar to utf8dup, except that at most n bytes of src are copied. If src is
147
- // longer than n, only n bytes are copied and a null byte is added.
148
- //
149
- // Returns a new string if successful, 0 otherwise
150
- utf8_nonnull utf8_weak void *utf8ndup(const void *src, size_t n);
151
-
152
- // Locates the first occurence in the utf8 string str of any byte in the
153
- // utf8 string accept, or 0 if no match was found.
154
- utf8_nonnull utf8_pure utf8_weak void *utf8pbrk(const void *str,
155
- const void *accept);
156
-
157
- // Find the last match of the utf8 codepoint chr in the utf8 string src.
158
- utf8_nonnull utf8_pure utf8_weak void *utf8rchr(const void *src, int chr);
159
-
160
- // Number of bytes in the utf8 string str,
161
- // including the null terminating byte.
162
- utf8_nonnull utf8_pure utf8_weak size_t utf8size(const void *str);
163
-
164
- // Number of utf8 codepoints in the utf8 string src that consists entirely
165
- // of utf8 codepoints from the utf8 string accept.
166
- utf8_nonnull utf8_pure utf8_weak size_t utf8spn(const void *src,
167
- const void *accept);
168
-
169
- // The position of the utf8 string needle in the utf8 string haystack.
170
- utf8_nonnull utf8_pure utf8_weak void *utf8str(const void *haystack,
171
- const void *needle);
172
-
173
- // The position of the utf8 string needle in the utf8 string haystack, case
174
- // insensitive.
175
- utf8_nonnull utf8_pure utf8_weak void *utf8casestr(const void *haystack,
176
- const void *needle);
177
-
178
- // Return 0 on success, or the position of the invalid
179
- // utf8 codepoint on failure.
180
- utf8_nonnull utf8_pure utf8_weak void *utf8valid(const void *str);
181
-
182
- // Sets out_codepoint to the next utf8 codepoint in str, and returns the address
183
- // of the utf8 codepoint after the current one in str.
184
- utf8_nonnull utf8_weak void *
185
- utf8codepoint(const void *utf8_restrict str,
186
- utf8_int32_t *utf8_restrict out_codepoint);
187
-
188
- // Returns the size of the given codepoint in bytes.
189
- utf8_weak size_t utf8codepointsize(utf8_int32_t chr);
190
-
191
- // Write a codepoint to the given string, and return the address to the next
192
- // place after the written codepoint. Pass how many bytes left in the buffer to
193
- // n. If there is not enough space for the codepoint, this function returns
194
- // null.
195
- utf8_nonnull utf8_weak void *utf8catcodepoint(void *utf8_restrict str,
196
- utf8_int32_t chr, size_t n);
197
-
198
- // Returns 1 if the given character is lowercase, or 0 if it is not.
199
- utf8_weak int utf8islower(utf8_int32_t chr);
200
-
201
- // Returns 1 if the given character is uppercase, or 0 if it is not.
202
- utf8_weak int utf8isupper(utf8_int32_t chr);
203
-
204
- // Transform the given string into all lowercase codepoints.
205
- utf8_nonnull utf8_weak void utf8lwr(void *utf8_restrict str);
113
+ #if defined(utf8_cplusplus) && utf8_cplusplus >= 201402L && (!defined(_MSC_VER) || (defined(_MSC_VER) && _MSC_VER >= 1910))
114
+ #define utf8_constexpr14 constexpr
115
+ #define utf8_constexpr14_impl constexpr
116
+ #else
117
+ /* constexpr and weak are incompatible. so only enable one of them */
118
+ #define utf8_constexpr14 utf8_weak
119
+ #define utf8_constexpr14_impl
120
+ #endif
206
121
 
207
- // Transform the given string into all uppercase codepoints.
208
- utf8_nonnull utf8_weak void utf8upr(void *utf8_restrict str);
122
+ #if defined(utf8_cplusplus) && utf8_cplusplus >= 202002L
123
+ using utf8_int8_t = char8_t; /* Introduced in C++20 */
124
+ #else
125
+ typedef char utf8_int8_t;
126
+ #endif
209
127
 
210
- // Make a codepoint lower case if possible.
211
- utf8_weak utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp);
128
+ /* Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
129
+ * src2 respectively, case insensitive. */
130
+ utf8_constexpr14 utf8_nonnull utf8_pure int
131
+ utf8casecmp(const utf8_int8_t *src1, const utf8_int8_t *src2);
132
+
133
+ /* Append the utf8 string src onto the utf8 string dst. */
134
+ utf8_nonnull utf8_weak utf8_int8_t *
135
+ utf8cat(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src);
136
+
137
+ /* Find the first match of the utf8 codepoint chr in the utf8 string src. */
138
+ utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
139
+ utf8chr(const utf8_int8_t *src, utf8_int32_t chr);
140
+
141
+ /* Return less than 0, 0, greater than 0 if src1 < src2,
142
+ * src1 == src2, src1 > src2 respectively. */
143
+ utf8_constexpr14 utf8_nonnull utf8_pure int utf8cmp(const utf8_int8_t *src1,
144
+ const utf8_int8_t *src2);
145
+
146
+ /* Copy the utf8 string src onto the memory allocated in dst. */
147
+ utf8_nonnull utf8_weak utf8_int8_t *
148
+ utf8cpy(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src);
149
+
150
+ /* Number of utf8 codepoints in the utf8 string src that consists entirely
151
+ * of utf8 codepoints not from the utf8 string reject. */
152
+ utf8_constexpr14 utf8_nonnull utf8_pure size_t
153
+ utf8cspn(const utf8_int8_t *src, const utf8_int8_t *reject);
154
+
155
+ /* Duplicate the utf8 string src by getting its size, malloc'ing a new buffer
156
+ * copying over the data, and returning that. Or 0 if malloc failed. */
157
+ utf8_weak utf8_int8_t *utf8dup(const utf8_int8_t *src);
158
+
159
+ /* Number of utf8 codepoints in the utf8 string str,
160
+ * excluding the null terminating byte. */
161
+ utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8len(const utf8_int8_t *str);
162
+
163
+ /* Similar to utf8len, except that only at most n bytes of src are looked. */
164
+ utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8nlen(const utf8_int8_t *str,
165
+ size_t n);
166
+
167
+ /* Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
168
+ * src2 respectively, case insensitive. Checking at most n bytes of each utf8
169
+ * string. */
170
+ utf8_constexpr14 utf8_nonnull utf8_pure int
171
+ utf8ncasecmp(const utf8_int8_t *src1, const utf8_int8_t *src2, size_t n);
172
+
173
+ /* Append the utf8 string src onto the utf8 string dst,
174
+ * writing at most n+1 bytes. Can produce an invalid utf8
175
+ * string if n falls partway through a utf8 codepoint. */
176
+ utf8_nonnull utf8_weak utf8_int8_t *
177
+ utf8ncat(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src,
178
+ size_t n);
179
+
180
+ /* Return less than 0, 0, greater than 0 if src1 < src2,
181
+ * src1 == src2, src1 > src2 respectively. Checking at most n
182
+ * bytes of each utf8 string. */
183
+ utf8_constexpr14 utf8_nonnull utf8_pure int
184
+ utf8ncmp(const utf8_int8_t *src1, const utf8_int8_t *src2, size_t n);
185
+
186
+ /* Copy the utf8 string src onto the memory allocated in dst.
187
+ * Copies at most n bytes. If n falls partway through a utf8
188
+ * codepoint, or if dst doesn't have enough room for a null
189
+ * terminator, the final string will be cut short to preserve
190
+ * utf8 validity. */
191
+
192
+ utf8_nonnull utf8_weak utf8_int8_t *
193
+ utf8ncpy(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src,
194
+ size_t n);
195
+
196
+ /* Similar to utf8dup, except that at most n bytes of src are copied. If src is
197
+ * longer than n, only n bytes are copied and a null byte is added.
198
+ *
199
+ * Returns a new string if successful, 0 otherwise */
200
+ utf8_weak utf8_int8_t *utf8ndup(const utf8_int8_t *src, size_t n);
201
+
202
+ /* Locates the first occurrence in the utf8 string str of any byte in the
203
+ * utf8 string accept, or 0 if no match was found. */
204
+ utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
205
+ utf8pbrk(const utf8_int8_t *str, const utf8_int8_t *accept);
206
+
207
+ /* Find the last match of the utf8 codepoint chr in the utf8 string src. */
208
+ utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
209
+ utf8rchr(const utf8_int8_t *src, int chr);
210
+
211
+ /* Number of bytes in the utf8 string str,
212
+ * including the null terminating byte. */
213
+ utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8size(const utf8_int8_t *str);
214
+
215
+ /* Similar to utf8size, except that the null terminating byte is excluded. */
216
+ utf8_constexpr14 utf8_nonnull utf8_pure size_t
217
+ utf8size_lazy(const utf8_int8_t *str);
218
+
219
+ /* Similar to utf8size, except that only at most n bytes of src are looked and
220
+ * the null terminating byte is excluded. */
221
+ utf8_constexpr14 utf8_nonnull utf8_pure size_t
222
+ utf8nsize_lazy(const utf8_int8_t *str, size_t n);
223
+
224
+ /* Number of utf8 codepoints in the utf8 string src that consists entirely
225
+ * of utf8 codepoints from the utf8 string accept. */
226
+ utf8_constexpr14 utf8_nonnull utf8_pure size_t
227
+ utf8spn(const utf8_int8_t *src, const utf8_int8_t *accept);
228
+
229
+ /* The position of the utf8 string needle in the utf8 string haystack. */
230
+ utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
231
+ utf8str(const utf8_int8_t *haystack, const utf8_int8_t *needle);
232
+
233
+ /* The position of the utf8 string needle in the utf8 string haystack, case
234
+ * insensitive. */
235
+ utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
236
+ utf8casestr(const utf8_int8_t *haystack, const utf8_int8_t *needle);
237
+
238
+ /* Return 0 on success, or the position of the invalid
239
+ * utf8 codepoint on failure. */
240
+ utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
241
+ utf8valid(const utf8_int8_t *str);
242
+
243
+ /* Similar to utf8valid, except that only at most n bytes of src are looked. */
244
+ utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t *
245
+ utf8nvalid(const utf8_int8_t *str, size_t n);
246
+
247
+ /* Given a null-terminated string, makes the string valid by replacing invalid
248
+ * codepoints with a 1-byte replacement. Returns 0 on success. */
249
+ utf8_nonnull utf8_weak int utf8makevalid(utf8_int8_t *str,
250
+ const utf8_int32_t replacement);
251
+
252
+ /* Sets out_codepoint to the current utf8 codepoint in str, and returns the
253
+ * address of the next utf8 codepoint after the current one in str. */
254
+ utf8_constexpr14 utf8_nonnull utf8_int8_t *
255
+ utf8codepoint(const utf8_int8_t *utf8_restrict str,
256
+ utf8_int32_t *utf8_restrict out_codepoint);
212
257
 
213
- // Make a codepoint upper case if possible.
214
- utf8_weak utf8_int32_t utf8uprcodepoint(utf8_int32_t cp);
258
+ /* Calculates the size of the next utf8 codepoint in str. */
259
+ utf8_constexpr14 utf8_nonnull size_t
260
+ utf8codepointcalcsize(const utf8_int8_t *str);
261
+
262
+ /* Returns the size of the given codepoint in bytes. */
263
+ utf8_constexpr14 size_t utf8codepointsize(utf8_int32_t chr);
264
+
265
+ /* Write a codepoint to the given string, and return the address to the next
266
+ * place after the written codepoint. Pass how many bytes left in the buffer to
267
+ * n. If there is not enough space for the codepoint, this function returns
268
+ * null. */
269
+ utf8_nonnull utf8_weak utf8_int8_t *
270
+ utf8catcodepoint(utf8_int8_t *str, utf8_int32_t chr, size_t n);
271
+
272
+ /* Returns 1 if the given character is lowercase, or 0 if it is not. */
273
+ utf8_constexpr14 int utf8islower(utf8_int32_t chr);
274
+
275
+ /* Returns 1 if the given character is uppercase, or 0 if it is not. */
276
+ utf8_constexpr14 int utf8isupper(utf8_int32_t chr);
277
+
278
+ /* Transform the given string into all lowercase codepoints. */
279
+ utf8_nonnull utf8_weak void utf8lwr(utf8_int8_t *utf8_restrict str);
280
+
281
+ /* Transform the given string into all uppercase codepoints. */
282
+ utf8_nonnull utf8_weak void utf8upr(utf8_int8_t *utf8_restrict str);
283
+
284
+ /* Make a codepoint lower case if possible. */
285
+ utf8_constexpr14 utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp);
286
+
287
+ /* Make a codepoint upper case if possible. */
288
+ utf8_constexpr14 utf8_int32_t utf8uprcodepoint(utf8_int32_t cp);
289
+
290
+ /* Sets out_codepoint to the current utf8 codepoint in str, and returns the
291
+ * address of the previous utf8 codepoint before the current one in str. */
292
+ utf8_constexpr14 utf8_nonnull utf8_int8_t *
293
+ utf8rcodepoint(const utf8_int8_t *utf8_restrict str,
294
+ utf8_int32_t *utf8_restrict out_codepoint);
295
+
296
+ /* Duplicate the utf8 string src by getting its size, calling alloc_func_ptr to
297
+ * copy over data to a new buffer, and returning that. Or 0 if alloc_func_ptr
298
+ * returned null. */
299
+ utf8_weak utf8_int8_t *utf8dup_ex(const utf8_int8_t *src,
300
+ utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *,
301
+ size_t),
302
+ utf8_int8_t *user_data);
303
+
304
+ /* Similar to utf8dup, except that at most n bytes of src are copied. If src is
305
+ * longer than n, only n bytes are copied and a null byte is added.
306
+ *
307
+ * Returns a new string if successful, 0 otherwise. */
308
+ utf8_weak utf8_int8_t *utf8ndup_ex(const utf8_int8_t *src, size_t n,
309
+ utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *,
310
+ size_t),
311
+ utf8_int8_t *user_data);
215
312
 
216
313
  #undef utf8_weak
217
314
  #undef utf8_pure
218
315
  #undef utf8_nonnull
219
316
 
220
- int utf8casecmp(const void *src1, const void *src2) {
221
- utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
317
+ utf8_constexpr14_impl int utf8casecmp(const utf8_int8_t *src1,
318
+ const utf8_int8_t *src2) {
319
+ utf8_int32_t src1_lwr_cp = 0, src2_lwr_cp = 0, src1_upr_cp = 0,
320
+ src2_upr_cp = 0, src1_orig_cp = 0, src2_orig_cp = 0;
222
321
 
223
322
  for (;;) {
224
- src1 = utf8codepoint(src1, &src1_cp);
225
- src2 = utf8codepoint(src2, &src2_cp);
323
+ src1 = utf8codepoint(src1, &src1_orig_cp);
324
+ src2 = utf8codepoint(src2, &src2_orig_cp);
226
325
 
227
- // Take a copy of src1 & src2
228
- src1_orig_cp = src1_cp;
229
- src2_orig_cp = src2_cp;
326
+ /* lower the srcs if required */
327
+ src1_lwr_cp = utf8lwrcodepoint(src1_orig_cp);
328
+ src2_lwr_cp = utf8lwrcodepoint(src2_orig_cp);
230
329
 
231
- // Lower the srcs if required
232
- src1_cp = utf8lwrcodepoint(src1_cp);
233
- src2_cp = utf8lwrcodepoint(src2_cp);
330
+ /* lower the srcs if required */
331
+ src1_upr_cp = utf8uprcodepoint(src1_orig_cp);
332
+ src2_upr_cp = utf8uprcodepoint(src2_orig_cp);
234
333
 
235
- // Check if the lowered codepoints match
334
+ /* check if the lowered codepoints match */
236
335
  if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) {
237
336
  return 0;
238
- } else if (src1_cp == src2_cp) {
337
+ } else if ((src1_lwr_cp == src2_lwr_cp) || (src1_upr_cp == src2_upr_cp)) {
239
338
  continue;
240
339
  }
241
340
 
242
- // If they don't match, then we return which of the original's are less
243
- if (src1_orig_cp < src2_orig_cp) {
244
- return -1;
245
- } else if (src1_orig_cp > src2_orig_cp) {
246
- return 1;
247
- }
341
+ /* if they don't match, then we return the difference between the characters
342
+ */
343
+ return src1_lwr_cp - src2_lwr_cp;
248
344
  }
249
345
  }
250
346
 
251
- void *utf8cat(void *utf8_restrict dst, const void *utf8_restrict src) {
252
- char *d = (char *)dst;
253
- const char *s = (const char *)src;
254
-
255
- // find the null terminating byte in dst
347
+ utf8_int8_t *utf8cat(utf8_int8_t *utf8_restrict dst,
348
+ const utf8_int8_t *utf8_restrict src) {
349
+ utf8_int8_t *d = dst;
350
+ /* find the null terminating byte in dst */
256
351
  while ('\0' != *d) {
257
352
  d++;
258
353
  }
259
354
 
260
- // overwriting the null terminating byte in dst, append src byte-by-byte
261
- while ('\0' != *s) {
262
- *d++ = *s++;
355
+ /* overwriting the null terminating byte in dst, append src byte-by-byte */
356
+ while ('\0' != *src) {
357
+ *d++ = *src++;
263
358
  }
264
359
 
265
- // write out a new null terminating byte into dst
360
+ /* write out a new null terminating byte into dst */
266
361
  *d = '\0';
267
362
 
268
363
  return dst;
269
364
  }
270
365
 
271
- void *utf8chr(const void *src, utf8_int32_t chr) {
272
- char c[5] = {'\0', '\0', '\0', '\0', '\0'};
366
+ utf8_constexpr14_impl utf8_int8_t *utf8chr(const utf8_int8_t *src,
367
+ utf8_int32_t chr) {
368
+ utf8_int8_t c[5] = {'\0', '\0', '\0', '\0', '\0'};
273
369
 
274
370
  if (0 == chr) {
275
- // being asked to return position of null terminating byte, so
276
- // just run s to the end, and return!
277
- const char *s = (const char *)src;
278
- while ('\0' != *s) {
279
- s++;
371
+ /* being asked to return position of null terminating byte, so
372
+ * just run s to the end, and return! */
373
+ while ('\0' != *src) {
374
+ src++;
280
375
  }
281
- return (void *)s;
376
+ return (utf8_int8_t *)src;
282
377
  } else if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
283
- // 1-byte/7-bit ascii
284
- // (0b0xxxxxxx)
285
- c[0] = (char)chr;
378
+ /* 1-byte/7-bit ascii
379
+ * (0b0xxxxxxx) */
380
+ c[0] = (utf8_int8_t)chr;
286
381
  } else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
287
- // 2-byte/11-bit utf8 code point
288
- // (0b110xxxxx 0b10xxxxxx)
289
- c[0] = 0xc0 | (char)(chr >> 6);
290
- c[1] = 0x80 | (char)(chr & 0x3f);
382
+ /* 2-byte/11-bit utf8 code point
383
+ * (0b110xxxxx 0b10xxxxxx) */
384
+ c[0] = (utf8_int8_t)(0xc0 | (utf8_int8_t)(chr >> 6));
385
+ c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
291
386
  } else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
292
- // 3-byte/16-bit utf8 code point
293
- // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
294
- c[0] = 0xe0 | (char)(chr >> 12);
295
- c[1] = 0x80 | (char)((chr >> 6) & 0x3f);
296
- c[2] = 0x80 | (char)(chr & 0x3f);
297
- } else { // if (0 == ((int)0xffe00000 & chr)) {
298
- // 4-byte/21-bit utf8 code point
299
- // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
300
- c[0] = 0xf0 | (char)(chr >> 18);
301
- c[1] = 0x80 | (char)((chr >> 12) & 0x3f);
302
- c[2] = 0x80 | (char)((chr >> 6) & 0x3f);
303
- c[3] = 0x80 | (char)(chr & 0x3f);
387
+ /* 3-byte/16-bit utf8 code point
388
+ * (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) */
389
+ c[0] = (utf8_int8_t)(0xe0 | (utf8_int8_t)(chr >> 12));
390
+ c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
391
+ c[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
392
+ } else { /* if (0 == ((int)0xffe00000 & chr)) { */
393
+ /* 4-byte/21-bit utf8 code point
394
+ * (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx) */
395
+ c[0] = (utf8_int8_t)(0xf0 | (utf8_int8_t)(chr >> 18));
396
+ c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 12) & 0x3f));
397
+ c[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
398
+ c[3] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
304
399
  }
305
400
 
306
- // we've made c into a 2 utf8 codepoint string, one for the chr we are
307
- // seeking, another for the null terminating byte. Now use utf8str to
308
- // search
401
+ /* we've made c into a 2 utf8 codepoint string, one for the chr we are
402
+ * seeking, another for the null terminating byte. Now use utf8str to
403
+ * search */
309
404
  return utf8str(src, c);
310
405
  }
311
406
 
312
- int utf8cmp(const void *src1, const void *src2) {
313
- const unsigned char *s1 = (const unsigned char *)src1;
314
- const unsigned char *s2 = (const unsigned char *)src2;
315
-
316
- while (('\0' != *s1) || ('\0' != *s2)) {
317
- if (*s1 < *s2) {
407
+ utf8_constexpr14_impl int utf8cmp(const utf8_int8_t *src1,
408
+ const utf8_int8_t *src2) {
409
+ while (('\0' != *src1) || ('\0' != *src2)) {
410
+ if (*src1 < *src2) {
318
411
  return -1;
319
- } else if (*s1 > *s2) {
412
+ } else if (*src1 > *src2) {
320
413
  return 1;
321
414
  }
322
415
 
323
- s1++;
324
- s2++;
416
+ src1++;
417
+ src2++;
325
418
  }
326
419
 
327
- // both utf8 strings matched
420
+ /* both utf8 strings matched */
328
421
  return 0;
329
422
  }
330
423
 
331
- int utf8coll(const void *src1, const void *src2);
424
+ utf8_constexpr14_impl int utf8coll(const utf8_int8_t *src1,
425
+ const utf8_int8_t *src2);
332
426
 
333
- void *utf8cpy(void *utf8_restrict dst, const void *utf8_restrict src) {
334
- char *d = (char *)dst;
335
- const char *s = (const char *)src;
427
+ utf8_int8_t *utf8cpy(utf8_int8_t *utf8_restrict dst,
428
+ const utf8_int8_t *utf8_restrict src) {
429
+ utf8_int8_t *d = dst;
336
430
 
337
- // overwriting anything previously in dst, write byte-by-byte
338
- // from src
339
- while ('\0' != *s) {
340
- *d++ = *s++;
431
+ /* overwriting anything previously in dst, write byte-by-byte
432
+ * from src */
433
+ while ('\0' != *src) {
434
+ *d++ = *src++;
341
435
  }
342
436
 
343
- // append null terminating byte
437
+ /* append null terminating byte */
344
438
  *d = '\0';
345
439
 
346
440
  return dst;
347
441
  }
348
442
 
349
- size_t utf8cspn(const void *src, const void *reject) {
350
- const char *s = (const char *)src;
443
+ utf8_constexpr14_impl size_t utf8cspn(const utf8_int8_t *src,
444
+ const utf8_int8_t *reject) {
351
445
  size_t chars = 0;
352
446
 
353
- while ('\0' != *s) {
354
- const char *r = (const char *)reject;
447
+ while ('\0' != *src) {
448
+ const utf8_int8_t *r = reject;
355
449
  size_t offset = 0;
356
450
 
357
451
  while ('\0' != *r) {
358
- // checking that if *r is the start of a utf8 codepoint
359
- // (it is not 0b10xxxxxx) and we have successfully matched
360
- // a previous character (0 < offset) - we found a match
452
+ /* checking that if *r is the start of a utf8 codepoint
453
+ * (it is not 0b10xxxxxx) and we have successfully matched
454
+ * a previous character (0 < offset) - we found a match */
361
455
  if ((0x80 != (0xc0 & *r)) && (0 < offset)) {
362
456
  return chars;
363
457
  } else {
364
- if (*r == s[offset]) {
365
- // part of a utf8 codepoint matched, so move our checking
366
- // onwards to the next byte
458
+ if (*r == src[offset]) {
459
+ /* part of a utf8 codepoint matched, so move our checking
460
+ * onwards to the next byte */
367
461
  offset++;
368
462
  r++;
369
463
  } else {
370
- // r could be in the middle of an unmatching utf8 code point,
371
- // so we need to march it on to the next character beginning,
464
+ /* r could be in the middle of an unmatching utf8 code point,
465
+ * so we need to march it on to the next character beginning, */
372
466
 
373
467
  do {
374
468
  r++;
375
469
  } while (0x80 == (0xc0 & *r));
376
470
 
377
- // reset offset too as we found a mismatch
471
+ /* reset offset too as we found a mismatch */
378
472
  offset = 0;
379
473
  }
380
474
  }
381
475
  }
382
476
 
383
- // the current utf8 codepoint in src did not match reject, but src
384
- // could have been partway through a utf8 codepoint, so we need to
385
- // march it onto the next utf8 codepoint starting byte
477
+ /* found a match at the end of *r, so didn't get a chance to test it */
478
+ if (0 < offset) {
479
+ return chars;
480
+ }
481
+
482
+ /* the current utf8 codepoint in src did not match reject, but src
483
+ * could have been partway through a utf8 codepoint, so we need to
484
+ * march it onto the next utf8 codepoint starting byte */
386
485
  do {
387
- s++;
388
- } while ((0x80 == (0xc0 & *s)));
486
+ src++;
487
+ } while ((0x80 == (0xc0 & *src)));
389
488
  chars++;
390
489
  }
391
490
 
392
491
  return chars;
393
492
  }
394
493
 
395
- size_t utf8size(const void *str);
494
+ utf8_int8_t *utf8dup(const utf8_int8_t *src) {
495
+ return utf8dup_ex(src, utf8_null, utf8_null);
496
+ }
396
497
 
397
- void *utf8dup(const void *src) {
398
- const char *s = (const char *)src;
399
- char *n = utf8_null;
498
+ utf8_int8_t *utf8dup_ex(const utf8_int8_t *src,
499
+ utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *, size_t),
500
+ utf8_int8_t *user_data) {
501
+ utf8_int8_t *n = utf8_null;
400
502
 
401
- // figure out how many bytes (including the terminator) we need to copy first
503
+ /* figure out how many bytes (including the terminator) we need to copy first
504
+ */
402
505
  size_t bytes = utf8size(src);
403
506
 
404
- n = (char *)malloc(bytes);
507
+ if (alloc_func_ptr) {
508
+ n = alloc_func_ptr(user_data, bytes);
509
+ } else {
510
+ #if !defined(UTF8_NO_STD_MALLOC)
511
+ n = (utf8_int8_t *)malloc(bytes);
512
+ #else
513
+ return utf8_null;
514
+ #endif
515
+ }
405
516
 
406
517
  if (utf8_null == n) {
407
- // out of memory so we bail
518
+ /* out of memory so we bail */
408
519
  return utf8_null;
409
520
  } else {
410
521
  bytes = 0;
411
522
 
412
- // copy src byte-by-byte into our new utf8 string
413
- while ('\0' != s[bytes]) {
414
- n[bytes] = s[bytes];
523
+ /* copy src byte-by-byte into our new utf8 string */
524
+ while ('\0' != src[bytes]) {
525
+ n[bytes] = src[bytes];
415
526
  bytes++;
416
527
  }
417
528
 
418
- // append null terminating byte
529
+ /* append null terminating byte */
419
530
  n[bytes] = '\0';
420
531
  return n;
421
532
  }
422
533
  }
423
534
 
424
- void *utf8fry(const void *str);
535
+ utf8_constexpr14_impl utf8_int8_t *utf8fry(const utf8_int8_t *str);
536
+
537
+ utf8_constexpr14_impl size_t utf8len(const utf8_int8_t *str) {
538
+ return utf8nlen(str, SIZE_MAX);
539
+ }
425
540
 
426
- size_t utf8len(const void *str) {
427
- const unsigned char *s = (const unsigned char *)str;
541
+ utf8_constexpr14_impl size_t utf8nlen(const utf8_int8_t *str, size_t n) {
542
+ const utf8_int8_t *t = str;
428
543
  size_t length = 0;
429
544
 
430
- while ('\0' != *s) {
431
- if (0xf0 == (0xf8 & *s)) {
432
- // 4-byte utf8 code point (began with 0b11110xxx)
433
- s += 4;
434
- } else if (0xe0 == (0xf0 & *s)) {
435
- // 3-byte utf8 code point (began with 0b1110xxxx)
436
- s += 3;
437
- } else if (0xc0 == (0xe0 & *s)) {
438
- // 2-byte utf8 code point (began with 0b110xxxxx)
439
- s += 2;
440
- } else { // if (0x00 == (0x80 & *s)) {
441
- // 1-byte ascii (began with 0b0xxxxxxx)
442
- s += 1;
545
+ while ((size_t)(str - t) < n && '\0' != *str) {
546
+ if (0xf0 == (0xf8 & *str)) {
547
+ /* 4-byte utf8 code point (began with 0b11110xxx) */
548
+ str += 4;
549
+ } else if (0xe0 == (0xf0 & *str)) {
550
+ /* 3-byte utf8 code point (began with 0b1110xxxx) */
551
+ str += 3;
552
+ } else if (0xc0 == (0xe0 & *str)) {
553
+ /* 2-byte utf8 code point (began with 0b110xxxxx) */
554
+ str += 2;
555
+ } else { /* if (0x00 == (0x80 & *s)) { */
556
+ /* 1-byte ascii (began with 0b0xxxxxxx) */
557
+ str += 1;
443
558
  }
444
559
 
445
- // no matter the bytes we marched s forward by, it was
446
- // only 1 utf8 codepoint
560
+ /* no matter the bytes we marched s forward by, it was
561
+ * only 1 utf8 codepoint */
447
562
  length++;
448
563
  }
449
564
 
565
+ if ((size_t)(str - t) > n) {
566
+ length--;
567
+ }
450
568
  return length;
451
569
  }
452
570
 
453
- int utf8ncasecmp(const void *src1, const void *src2, size_t n) {
454
- utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
571
+ utf8_constexpr14_impl int utf8ncasecmp(const utf8_int8_t *src1,
572
+ const utf8_int8_t *src2, size_t n) {
573
+ utf8_int32_t src1_lwr_cp = 0, src2_lwr_cp = 0, src1_upr_cp = 0,
574
+ src2_upr_cp = 0, src1_orig_cp = 0, src2_orig_cp = 0;
455
575
 
456
576
  do {
457
- const unsigned char *const s1 = (const unsigned char *)src1;
458
- const unsigned char *const s2 = (const unsigned char *)src2;
577
+ const utf8_int8_t *const s1 = src1;
578
+ const utf8_int8_t *const s2 = src2;
459
579
 
460
- // first check that we have enough bytes left in n to contain an entire
461
- // codepoint
580
+ /* first check that we have enough bytes left in n to contain an entire
581
+ * codepoint */
462
582
  if (0 == n) {
463
583
  return 0;
464
584
  }
@@ -467,10 +587,8 @@ int utf8ncasecmp(const void *src1, const void *src2, size_t n) {
467
587
  const utf8_int32_t c1 = (0xe0 & *s1);
468
588
  const utf8_int32_t c2 = (0xe0 & *s2);
469
589
 
470
- if (c1 < c2) {
471
- return -1;
472
- } else if (c1 > c2) {
473
- return 1;
590
+ if (c1 != c2) {
591
+ return c1 - c2;
474
592
  } else {
475
593
  return 0;
476
594
  }
@@ -480,10 +598,8 @@ int utf8ncasecmp(const void *src1, const void *src2, size_t n) {
480
598
  const utf8_int32_t c1 = (0xf0 & *s1);
481
599
  const utf8_int32_t c2 = (0xf0 & *s2);
482
600
 
483
- if (c1 < c2) {
484
- return -1;
485
- } else if (c1 > c2) {
486
- return 1;
601
+ if (c1 != c2) {
602
+ return c1 - c2;
487
603
  } else {
488
604
  return 0;
489
605
  }
@@ -493,307 +609,343 @@ int utf8ncasecmp(const void *src1, const void *src2, size_t n) {
493
609
  const utf8_int32_t c1 = (0xf8 & *s1);
494
610
  const utf8_int32_t c2 = (0xf8 & *s2);
495
611
 
496
- if (c1 < c2) {
497
- return -1;
498
- } else if (c1 > c2) {
499
- return 1;
612
+ if (c1 != c2) {
613
+ return c1 - c2;
500
614
  } else {
501
615
  return 0;
502
616
  }
503
617
  }
504
618
 
505
- src1 = utf8codepoint(src1, &src1_cp);
506
- src2 = utf8codepoint(src2, &src2_cp);
507
- n -= utf8codepointsize(src1_cp);
619
+ src1 = utf8codepoint(src1, &src1_orig_cp);
620
+ src2 = utf8codepoint(src2, &src2_orig_cp);
621
+ n -= utf8codepointsize(src1_orig_cp);
508
622
 
509
- // Take a copy of src1 & src2
510
- src1_orig_cp = src1_cp;
511
- src2_orig_cp = src2_cp;
623
+ src1_lwr_cp = utf8lwrcodepoint(src1_orig_cp);
624
+ src2_lwr_cp = utf8lwrcodepoint(src2_orig_cp);
512
625
 
513
- // Lower srcs if required
514
- src1_cp = utf8lwrcodepoint(src1_cp);
515
- src2_cp = utf8lwrcodepoint(src2_cp);
626
+ src1_upr_cp = utf8uprcodepoint(src1_orig_cp);
627
+ src2_upr_cp = utf8uprcodepoint(src2_orig_cp);
516
628
 
517
- // Check if the lowered codepoints match
629
+ /* check if the lowered codepoints match */
518
630
  if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) {
519
631
  return 0;
520
- } else if (src1_cp == src2_cp) {
632
+ } else if ((src1_lwr_cp == src2_lwr_cp) || (src1_upr_cp == src2_upr_cp)) {
521
633
  continue;
522
634
  }
523
635
 
524
- // If they don't match, then we return which of the original's are less
525
- if (src1_orig_cp < src2_orig_cp) {
526
- return -1;
527
- } else if (src1_orig_cp > src2_orig_cp) {
528
- return 1;
529
- }
636
+ /* if they don't match, then we return the difference between the characters
637
+ */
638
+ return src1_lwr_cp - src2_lwr_cp;
530
639
  } while (0 < n);
531
640
 
532
- // both utf8 strings matched
641
+ /* both utf8 strings matched */
533
642
  return 0;
534
643
  }
535
644
 
536
- void *utf8ncat(void *utf8_restrict dst, const void *utf8_restrict src,
537
- size_t n) {
538
- char *d = (char *)dst;
539
- const char *s = (const char *)src;
645
+ utf8_int8_t *utf8ncat(utf8_int8_t *utf8_restrict dst,
646
+ const utf8_int8_t *utf8_restrict src, size_t n) {
647
+ utf8_int8_t *d = dst;
540
648
 
541
- // find the null terminating byte in dst
649
+ /* find the null terminating byte in dst */
542
650
  while ('\0' != *d) {
543
651
  d++;
544
652
  }
545
653
 
546
- // overwriting the null terminating byte in dst, append src byte-by-byte
547
- // stopping if we run out of space
548
- do {
549
- *d++ = *s++;
550
- } while (('\0' != *s) && (0 != --n));
654
+ /* overwriting the null terminating byte in dst, append src byte-by-byte
655
+ * stopping if we run out of space */
656
+ while (('\0' != *src) && (0 != n--)) {
657
+ *d++ = *src++;
658
+ }
551
659
 
552
- // write out a new null terminating byte into dst
660
+ /* write out a new null terminating byte into dst */
553
661
  *d = '\0';
554
662
 
555
663
  return dst;
556
664
  }
557
665
 
558
- int utf8ncmp(const void *src1, const void *src2, size_t n) {
559
- const unsigned char *s1 = (const unsigned char *)src1;
560
- const unsigned char *s2 = (const unsigned char *)src2;
561
-
562
- while ((('\0' != *s1) || ('\0' != *s2)) && (0 != n--)) {
563
- if (*s1 < *s2) {
666
+ utf8_constexpr14_impl int utf8ncmp(const utf8_int8_t *src1,
667
+ const utf8_int8_t *src2, size_t n) {
668
+ while ((0 != n--) && (('\0' != *src1) || ('\0' != *src2))) {
669
+ if (*src1 < *src2) {
564
670
  return -1;
565
- } else if (*s1 > *s2) {
671
+ } else if (*src1 > *src2) {
566
672
  return 1;
567
673
  }
568
674
 
569
- s1++;
570
- s2++;
675
+ src1++;
676
+ src2++;
571
677
  }
572
678
 
573
- // both utf8 strings matched
679
+ /* both utf8 strings matched */
574
680
  return 0;
575
681
  }
576
682
 
577
- void *utf8ncpy(void *utf8_restrict dst, const void *utf8_restrict src,
578
- size_t n) {
579
- char *d = (char *)dst;
580
- const char *s = (const char *)src;
683
+ utf8_int8_t *utf8ncpy(utf8_int8_t *utf8_restrict dst,
684
+ const utf8_int8_t *utf8_restrict src, size_t n) {
685
+ utf8_int8_t *d = dst;
686
+ size_t index = 0, check_index = 0;
581
687
 
582
- // overwriting anything previously in dst, write byte-by-byte
583
- // from src
584
- do {
585
- *d++ = *s++;
586
- } while (('\0' != *s) && (0 != --n));
688
+ if (n == 0) {
689
+ return dst;
690
+ }
587
691
 
588
- // append null terminating byte
589
- while (0 != n) {
590
- *d++ = '\0';
591
- n--;
692
+ /* overwriting anything previously in dst, write byte-by-byte
693
+ * from src */
694
+ for (index = 0; index < n; index++) {
695
+ d[index] = src[index];
696
+ if ('\0' == src[index]) {
697
+ break;
698
+ }
699
+ }
700
+
701
+ for (check_index = index - 1;
702
+ check_index > 0 && 0x80 == (0xc0 & d[check_index]); check_index--) {
703
+ /* just moving the index */
704
+ }
705
+
706
+ if (check_index < index &&
707
+ ((index - check_index) < utf8codepointcalcsize(&d[check_index]) ||
708
+ (index - check_index) == n)) {
709
+ index = check_index;
710
+ }
711
+
712
+ /* append null terminating byte */
713
+ for (; index < n; index++) {
714
+ d[index] = 0;
592
715
  }
593
716
 
594
717
  return dst;
595
718
  }
596
719
 
597
- void *utf8ndup(const void *src, size_t n) {
598
- const char *s = (const char *)src;
599
- char *c = utf8_null;
720
+ utf8_int8_t *utf8ndup(const utf8_int8_t *src, size_t n) {
721
+ return utf8ndup_ex(src, n, utf8_null, utf8_null);
722
+ }
723
+
724
+ utf8_int8_t *utf8ndup_ex(const utf8_int8_t *src, size_t n,
725
+ utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *, size_t),
726
+ utf8_int8_t *user_data) {
727
+ utf8_int8_t *c = utf8_null;
600
728
  size_t bytes = 0;
601
729
 
602
- // Find the end of the string or stop when n is reached
603
- while ('\0' != s[bytes] && bytes < n) {
730
+ /* Find the end of the string or stop when n is reached */
731
+ while ('\0' != src[bytes] && bytes < n) {
604
732
  bytes++;
605
733
  }
606
734
 
607
- // In case bytes is actually less than n, we need to set it
608
- // to be used later in the copy byte by byte.
735
+ /* In case bytes is actually less than n, we need to set it
736
+ * to be used later in the copy byte by byte. */
609
737
  n = bytes;
610
738
 
611
- c = (char *)malloc(bytes + 1);
739
+ if (alloc_func_ptr) {
740
+ c = alloc_func_ptr(user_data, bytes + 1);
741
+ } else {
742
+ #if !defined(UTF8_NO_STD_MALLOC)
743
+ c = (utf8_int8_t *)malloc(bytes + 1);
744
+ #else
745
+ c = utf8_null;
746
+ #endif
747
+ }
748
+
612
749
  if (utf8_null == c) {
613
- // out of memory so we bail
750
+ /* out of memory so we bail */
614
751
  return utf8_null;
615
752
  }
616
753
 
617
754
  bytes = 0;
618
755
 
619
- // copy src byte-by-byte into our new utf8 string
620
- while ('\0' != s[bytes] && bytes < n) {
621
- c[bytes] = s[bytes];
756
+ /* copy src byte-by-byte into our new utf8 string */
757
+ while ('\0' != src[bytes] && bytes < n) {
758
+ c[bytes] = src[bytes];
622
759
  bytes++;
623
760
  }
624
761
 
625
- // append null terminating byte
762
+ /* append null terminating byte */
626
763
  c[bytes] = '\0';
627
764
  return c;
628
765
  }
629
766
 
630
- void *utf8rchr(const void *src, int chr) {
631
- const char *s = (const char *)src;
632
- const char *match = utf8_null;
633
- char c[5] = {'\0', '\0', '\0', '\0', '\0'};
767
+ utf8_constexpr14_impl utf8_int8_t *utf8rchr(const utf8_int8_t *src, int chr) {
768
+
769
+ utf8_int8_t *match = utf8_null;
770
+ utf8_int8_t c[5] = {'\0', '\0', '\0', '\0', '\0'};
634
771
 
635
772
  if (0 == chr) {
636
- // being asked to return position of null terminating byte, so
637
- // just run s to the end, and return!
638
- while ('\0' != *s) {
639
- s++;
773
+ /* being asked to return position of null terminating byte, so
774
+ * just run s to the end, and return! */
775
+ while ('\0' != *src) {
776
+ src++;
640
777
  }
641
- return (void *)s;
778
+ return (utf8_int8_t *)src;
642
779
  } else if (0 == ((int)0xffffff80 & chr)) {
643
- // 1-byte/7-bit ascii
644
- // (0b0xxxxxxx)
645
- c[0] = (char)chr;
780
+ /* 1-byte/7-bit ascii
781
+ * (0b0xxxxxxx) */
782
+ c[0] = (utf8_int8_t)chr;
646
783
  } else if (0 == ((int)0xfffff800 & chr)) {
647
- // 2-byte/11-bit utf8 code point
648
- // (0b110xxxxx 0b10xxxxxx)
649
- c[0] = 0xc0 | (char)(chr >> 6);
650
- c[1] = 0x80 | (char)(chr & 0x3f);
784
+ /* 2-byte/11-bit utf8 code point
785
+ * (0b110xxxxx 0b10xxxxxx) */
786
+ c[0] = (utf8_int8_t)(0xc0 | (utf8_int8_t)(chr >> 6));
787
+ c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
651
788
  } else if (0 == ((int)0xffff0000 & chr)) {
652
- // 3-byte/16-bit utf8 code point
653
- // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
654
- c[0] = 0xe0 | (char)(chr >> 12);
655
- c[1] = 0x80 | (char)((chr >> 6) & 0x3f);
656
- c[2] = 0x80 | (char)(chr & 0x3f);
657
- } else { // if (0 == ((int)0xffe00000 & chr)) {
658
- // 4-byte/21-bit utf8 code point
659
- // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
660
- c[0] = 0xf0 | (char)(chr >> 18);
661
- c[1] = 0x80 | (char)((chr >> 12) & 0x3f);
662
- c[2] = 0x80 | (char)((chr >> 6) & 0x3f);
663
- c[3] = 0x80 | (char)(chr & 0x3f);
789
+ /* 3-byte/16-bit utf8 code point
790
+ * (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) */
791
+ c[0] = (utf8_int8_t)(0xe0 | (utf8_int8_t)(chr >> 12));
792
+ c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
793
+ c[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
794
+ } else { /* if (0 == ((int)0xffe00000 & chr)) { */
795
+ /* 4-byte/21-bit utf8 code point
796
+ * (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx) */
797
+ c[0] = (utf8_int8_t)(0xf0 | (utf8_int8_t)(chr >> 18));
798
+ c[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 12) & 0x3f));
799
+ c[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
800
+ c[3] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
664
801
  }
665
802
 
666
- // we've created a 2 utf8 codepoint string in c that is
667
- // the utf8 character asked for by chr, and a null
668
- // terminating byte
803
+ /* we've created a 2 utf8 codepoint string in c that is
804
+ * the utf8 character asked for by chr, and a null
805
+ * terminating byte */
669
806
 
670
- while ('\0' != *s) {
807
+ while ('\0' != *src) {
671
808
  size_t offset = 0;
672
809
 
673
- while (s[offset] == c[offset]) {
810
+ while ((src[offset] == c[offset]) && ('\0' != src[offset])) {
674
811
  offset++;
675
812
  }
676
813
 
677
814
  if ('\0' == c[offset]) {
678
- // we found a matching utf8 code point
679
- match = s;
680
- s += offset;
815
+ /* we found a matching utf8 code point */
816
+ match = (utf8_int8_t *)src;
817
+ src += offset;
818
+
819
+ if ('\0' == *src) {
820
+ break;
821
+ }
681
822
  } else {
682
- s += offset;
823
+ src += offset;
683
824
 
684
- // need to march s along to next utf8 codepoint start
685
- // (the next byte that doesn't match 0b10xxxxxx)
686
- if ('\0' != *s) {
825
+ /* need to march s along to next utf8 codepoint start
826
+ * (the next byte that doesn't match 0b10xxxxxx) */
827
+ if ('\0' != *src) {
687
828
  do {
688
- s++;
689
- } while (0x80 == (0xc0 & *s));
829
+ src++;
830
+ } while (0x80 == (0xc0 & *src));
690
831
  }
691
832
  }
692
833
  }
693
834
 
694
- // return the last match we found (or 0 if no match was found)
695
- return (void *)match;
835
+ /* return the last match we found (or 0 if no match was found) */
836
+ return match;
696
837
  }
697
838
 
698
- void *utf8pbrk(const void *str, const void *accept) {
699
- const char *s = (const char *)str;
700
-
701
- while ('\0' != *s) {
702
- const char *a = (const char *)accept;
839
+ utf8_constexpr14_impl utf8_int8_t *utf8pbrk(const utf8_int8_t *str,
840
+ const utf8_int8_t *accept) {
841
+ while ('\0' != *str) {
842
+ const utf8_int8_t *a = accept;
703
843
  size_t offset = 0;
704
844
 
705
845
  while ('\0' != *a) {
706
- // checking that if *a is the start of a utf8 codepoint
707
- // (it is not 0b10xxxxxx) and we have successfully matched
708
- // a previous character (0 < offset) - we found a match
846
+ /* checking that if *a is the start of a utf8 codepoint
847
+ * (it is not 0b10xxxxxx) and we have successfully matched
848
+ * a previous character (0 < offset) - we found a match */
709
849
  if ((0x80 != (0xc0 & *a)) && (0 < offset)) {
710
- return (void *)s;
850
+ return (utf8_int8_t *)str;
711
851
  } else {
712
- if (*a == s[offset]) {
713
- // part of a utf8 codepoint matched, so move our checking
714
- // onwards to the next byte
852
+ if (*a == str[offset]) {
853
+ /* part of a utf8 codepoint matched, so move our checking
854
+ * onwards to the next byte */
715
855
  offset++;
716
856
  a++;
717
857
  } else {
718
- // r could be in the middle of an unmatching utf8 code point,
719
- // so we need to march it on to the next character beginning,
858
+ /* r could be in the middle of an unmatching utf8 code point,
859
+ * so we need to march it on to the next character beginning, */
720
860
 
721
861
  do {
722
862
  a++;
723
863
  } while (0x80 == (0xc0 & *a));
724
864
 
725
- // reset offset too as we found a mismatch
865
+ /* reset offset too as we found a mismatch */
726
866
  offset = 0;
727
867
  }
728
868
  }
729
869
  }
730
870
 
731
- // we found a match on the last utf8 codepoint
871
+ /* we found a match on the last utf8 codepoint */
732
872
  if (0 < offset) {
733
- return (void *)s;
873
+ return (utf8_int8_t *)str;
734
874
  }
735
875
 
736
- // the current utf8 codepoint in src did not match accept, but src
737
- // could have been partway through a utf8 codepoint, so we need to
738
- // march it onto the next utf8 codepoint starting byte
876
+ /* the current utf8 codepoint in src did not match accept, but src
877
+ * could have been partway through a utf8 codepoint, so we need to
878
+ * march it onto the next utf8 codepoint starting byte */
739
879
  do {
740
- s++;
741
- } while ((0x80 == (0xc0 & *s)));
880
+ str++;
881
+ } while ((0x80 == (0xc0 & *str)));
742
882
  }
743
883
 
744
884
  return utf8_null;
745
885
  }
746
886
 
747
- size_t utf8size(const void *str) {
748
- const char *s = (const char *)str;
887
+ utf8_constexpr14_impl size_t utf8size(const utf8_int8_t *str) {
888
+ return utf8size_lazy(str) + 1;
889
+ }
890
+
891
+ utf8_constexpr14_impl size_t utf8size_lazy(const utf8_int8_t *str) {
892
+ return utf8nsize_lazy(str, SIZE_MAX);
893
+ }
894
+
895
+ utf8_constexpr14_impl size_t utf8nsize_lazy(const utf8_int8_t *str, size_t n) {
749
896
  size_t size = 0;
750
- while ('\0' != s[size]) {
897
+ while (size < n && '\0' != str[size]) {
751
898
  size++;
752
899
  }
753
-
754
- // we are including the null terminating byte in the size calculation
755
- size++;
756
900
  return size;
757
901
  }
758
902
 
759
- size_t utf8spn(const void *src, const void *accept) {
760
- const char *s = (const char *)src;
903
+ utf8_constexpr14_impl size_t utf8spn(const utf8_int8_t *src,
904
+ const utf8_int8_t *accept) {
761
905
  size_t chars = 0;
762
906
 
763
- while ('\0' != *s) {
764
- const char *a = (const char *)accept;
907
+ while ('\0' != *src) {
908
+ const utf8_int8_t *a = accept;
765
909
  size_t offset = 0;
766
910
 
767
911
  while ('\0' != *a) {
768
- // checking that if *r is the start of a utf8 codepoint
769
- // (it is not 0b10xxxxxx) and we have successfully matched
770
- // a previous character (0 < offset) - we found a match
912
+ /* checking that if *r is the start of a utf8 codepoint
913
+ * (it is not 0b10xxxxxx) and we have successfully matched
914
+ * a previous character (0 < offset) - we found a match */
771
915
  if ((0x80 != (0xc0 & *a)) && (0 < offset)) {
772
- // found a match, so increment the number of utf8 codepoints
773
- // that have matched and stop checking whether any other utf8
774
- // codepoints in a match
916
+ /* found a match, so increment the number of utf8 codepoints
917
+ * that have matched and stop checking whether any other utf8
918
+ * codepoints in a match */
775
919
  chars++;
776
- s += offset;
920
+ src += offset;
921
+ offset = 0;
777
922
  break;
778
923
  } else {
779
- if (*a == s[offset]) {
924
+ if (*a == src[offset]) {
780
925
  offset++;
781
926
  a++;
782
927
  } else {
783
- // a could be in the middle of an unmatching utf8 codepoint,
784
- // so we need to march it on to the next character beginning,
928
+ /* a could be in the middle of an unmatching utf8 codepoint,
929
+ * so we need to march it on to the next character beginning, */
785
930
  do {
786
931
  a++;
787
932
  } while (0x80 == (0xc0 & *a));
788
933
 
789
- // reset offset too as we found a mismatch
934
+ /* reset offset too as we found a mismatch */
790
935
  offset = 0;
791
936
  }
792
937
  }
793
938
  }
794
939
 
795
- // if a got to its terminating null byte, then we didn't find a match.
796
- // Return the current number of matched utf8 codepoints
940
+ /* found a match at the end of *a, so didn't get a chance to test it */
941
+ if (0 < offset) {
942
+ chars++;
943
+ src += offset;
944
+ continue;
945
+ }
946
+
947
+ /* if a got to its terminating null byte, then we didn't find a match.
948
+ * Return the current number of matched utf8 codepoints */
797
949
  if ('\0' == *a) {
798
950
  return chars;
799
951
  }
@@ -802,302 +954,405 @@ size_t utf8spn(const void *src, const void *accept) {
802
954
  return chars;
803
955
  }
804
956
 
805
- void *utf8str(const void *haystack, const void *needle) {
806
- const char *h = (const char *)haystack;
957
+ utf8_constexpr14_impl utf8_int8_t *utf8str(const utf8_int8_t *haystack,
958
+ const utf8_int8_t *needle) {
959
+ utf8_int32_t throwaway_codepoint = 0;
807
960
 
808
- // if needle has no utf8 codepoints before the null terminating
809
- // byte then return haystack
810
- if ('\0' == *((const char *)needle)) {
811
- return (void *)haystack;
961
+ /* if needle has no utf8 codepoints before the null terminating
962
+ * byte then return haystack */
963
+ if ('\0' == *needle) {
964
+ return (utf8_int8_t *)haystack;
812
965
  }
813
966
 
814
- while ('\0' != *h) {
815
- const char *maybeMatch = h;
816
- const char *n = (const char *)needle;
967
+ while ('\0' != *haystack) {
968
+ const utf8_int8_t *maybeMatch = haystack;
969
+ const utf8_int8_t *n = needle;
817
970
 
818
- while (*h == *n && (*h != '\0' && *n != '\0')) {
971
+ while (*haystack == *n && (*haystack != '\0' && *n != '\0')) {
819
972
  n++;
820
- h++;
973
+ haystack++;
821
974
  }
822
975
 
823
976
  if ('\0' == *n) {
824
- // we found the whole utf8 string for needle in haystack at
825
- // maybeMatch, so return it
826
- return (void *)maybeMatch;
977
+ /* we found the whole utf8 string for needle in haystack at
978
+ * maybeMatch, so return it */
979
+ return (utf8_int8_t *)maybeMatch;
827
980
  } else {
828
- // h could be in the middle of an unmatching utf8 codepoint,
829
- // so we need to march it on to the next character beginning,
830
- if ('\0' != *h) {
831
- do {
832
- h++;
833
- } while (0x80 == (0xc0 & *h));
834
- }
981
+ /* h could be in the middle of an unmatching utf8 codepoint,
982
+ * so we need to march it on to the next character beginning
983
+ * starting from the current character */
984
+ haystack = utf8codepoint(maybeMatch, &throwaway_codepoint);
835
985
  }
836
986
  }
837
987
 
838
- // no match
988
+ /* no match */
839
989
  return utf8_null;
840
990
  }
841
991
 
842
- void *utf8casestr(const void *haystack, const void *needle) {
843
- const void *h = haystack;
844
-
845
- // if needle has no utf8 codepoints before the null terminating
846
- // byte then return haystack
847
- if ('\0' == *((const char *)needle)) {
848
- return (void *)haystack;
992
+ utf8_constexpr14_impl utf8_int8_t *utf8casestr(const utf8_int8_t *haystack,
993
+ const utf8_int8_t *needle) {
994
+ /* if needle has no utf8 codepoints before the null terminating
995
+ * byte then return haystack */
996
+ if ('\0' == *needle) {
997
+ return (utf8_int8_t *)haystack;
849
998
  }
850
999
 
851
1000
  for (;;) {
852
- const void *maybeMatch = h;
853
- const void *n = needle;
854
- utf8_int32_t h_cp, n_cp;
1001
+ const utf8_int8_t *maybeMatch = haystack;
1002
+ const utf8_int8_t *n = needle;
1003
+ utf8_int32_t h_cp = 0, n_cp = 0;
855
1004
 
856
- h = utf8codepoint(h, &h_cp);
1005
+ /* Get the next code point and track it */
1006
+ const utf8_int8_t *nextH = haystack = utf8codepoint(haystack, &h_cp);
857
1007
  n = utf8codepoint(n, &n_cp);
858
1008
 
859
1009
  while ((0 != h_cp) && (0 != n_cp)) {
860
1010
  h_cp = utf8lwrcodepoint(h_cp);
861
1011
  n_cp = utf8lwrcodepoint(n_cp);
862
1012
 
863
- // if we find a mismatch, bail out!
1013
+ /* if we find a mismatch, bail out! */
864
1014
  if (h_cp != n_cp) {
865
1015
  break;
866
1016
  }
867
1017
 
868
- h = utf8codepoint(h, &h_cp);
1018
+ haystack = utf8codepoint(haystack, &h_cp);
869
1019
  n = utf8codepoint(n, &n_cp);
870
1020
  }
871
1021
 
872
1022
  if (0 == n_cp) {
873
- // we found the whole utf8 string for needle in haystack at
874
- // maybeMatch, so return it
875
- return (void *)maybeMatch;
1023
+ /* we found the whole utf8 string for needle in haystack at
1024
+ * maybeMatch, so return it */
1025
+ return (utf8_int8_t *)maybeMatch;
876
1026
  }
877
1027
 
878
1028
  if (0 == h_cp) {
879
- // no match
1029
+ /* no match */
880
1030
  return utf8_null;
881
1031
  }
1032
+
1033
+ /* Roll back to the next code point in the haystack to test */
1034
+ haystack = nextH;
882
1035
  }
883
1036
  }
884
1037
 
885
- void *utf8valid(const void *str) {
886
- const char *s = (const char *)str;
1038
+ utf8_constexpr14_impl utf8_int8_t *utf8valid(const utf8_int8_t *str) {
1039
+ return utf8nvalid(str, SIZE_MAX);
1040
+ }
1041
+
1042
+ utf8_constexpr14_impl utf8_int8_t *utf8nvalid(const utf8_int8_t *str,
1043
+ size_t n) {
1044
+ const utf8_int8_t *t = str;
1045
+ size_t consumed = 0;
1046
+
1047
+ while ((void)(consumed = (size_t)(str - t)), consumed < n && '\0' != *str) {
1048
+ const size_t remaining = n - consumed;
1049
+
1050
+ if (0xf0 == (0xf8 & *str)) {
1051
+ /* ensure that there's 4 bytes or more remaining */
1052
+ if (remaining < 4) {
1053
+ return (utf8_int8_t *)str;
1054
+ }
1055
+
1056
+ /* ensure each of the 3 following bytes in this 4-byte
1057
+ * utf8 codepoint began with 0b10xxxxxx */
1058
+ if ((0x80 != (0xc0 & str[1])) || (0x80 != (0xc0 & str[2])) ||
1059
+ (0x80 != (0xc0 & str[3]))) {
1060
+ return (utf8_int8_t *)str;
1061
+ }
1062
+
1063
+ /* ensure that our utf8 codepoint ended after 4 bytes */
1064
+ if ((remaining != 4) && (0x80 == (0xc0 & str[4]))) {
1065
+ return (utf8_int8_t *)str;
1066
+ }
887
1067
 
888
- while ('\0' != *s) {
889
- if (0xf0 == (0xf8 & *s)) {
890
- // ensure each of the 3 following bytes in this 4-byte
891
- // utf8 codepoint began with 0b10xxxxxx
892
- if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
893
- (0x80 != (0xc0 & s[3]))) {
894
- return (void *)s;
1068
+ /* ensure that the top 5 bits of this 4-byte utf8
1069
+ * codepoint were not 0, as then we could have used
1070
+ * one of the smaller encodings */
1071
+ if ((0 == (0x07 & str[0])) && (0 == (0x30 & str[1]))) {
1072
+ return (utf8_int8_t *)str;
895
1073
  }
896
1074
 
897
- // ensure that our utf8 codepoint ended after 4 bytes
898
- if (0x80 == (0xc0 & s[4])) {
899
- return (void *)s;
1075
+ /* 4-byte utf8 code point (began with 0b11110xxx) */
1076
+ str += 4;
1077
+ } else if (0xe0 == (0xf0 & *str)) {
1078
+ /* ensure that there's 3 bytes or more remaining */
1079
+ if (remaining < 3) {
1080
+ return (utf8_int8_t *)str;
900
1081
  }
901
1082
 
902
- // ensure that the top 5 bits of this 4-byte utf8
903
- // codepoint were not 0, as then we could have used
904
- // one of the smaller encodings
905
- if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
906
- return (void *)s;
1083
+ /* ensure each of the 2 following bytes in this 3-byte
1084
+ * utf8 codepoint began with 0b10xxxxxx */
1085
+ if ((0x80 != (0xc0 & str[1])) || (0x80 != (0xc0 & str[2]))) {
1086
+ return (utf8_int8_t *)str;
907
1087
  }
908
1088
 
909
- // 4-byte utf8 code point (began with 0b11110xxx)
910
- s += 4;
911
- } else if (0xe0 == (0xf0 & *s)) {
912
- // ensure each of the 2 following bytes in this 3-byte
913
- // utf8 codepoint began with 0b10xxxxxx
914
- if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
915
- return (void *)s;
1089
+ /* ensure that our utf8 codepoint ended after 3 bytes */
1090
+ if ((remaining != 3) && (0x80 == (0xc0 & str[3]))) {
1091
+ return (utf8_int8_t *)str;
916
1092
  }
917
1093
 
918
- // ensure that our utf8 codepoint ended after 3 bytes
919
- if (0x80 == (0xc0 & s[3])) {
920
- return (void *)s;
1094
+ /* ensure that the top 5 bits of this 3-byte utf8
1095
+ * codepoint were not 0, as then we could have used
1096
+ * one of the smaller encodings */
1097
+ if ((0 == (0x0f & str[0])) && (0 == (0x20 & str[1]))) {
1098
+ return (utf8_int8_t *)str;
921
1099
  }
922
1100
 
923
- // ensure that the top 5 bits of this 3-byte utf8
924
- // codepoint were not 0, as then we could have used
925
- // one of the smaller encodings
926
- if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
927
- return (void *)s;
1101
+ /* 3-byte utf8 code point (began with 0b1110xxxx) */
1102
+ str += 3;
1103
+ } else if (0xc0 == (0xe0 & *str)) {
1104
+ /* ensure that there's 2 bytes or more remaining */
1105
+ if (remaining < 2) {
1106
+ return (utf8_int8_t *)str;
928
1107
  }
929
1108
 
930
- // 3-byte utf8 code point (began with 0b1110xxxx)
931
- s += 3;
932
- } else if (0xc0 == (0xe0 & *s)) {
933
- // ensure the 1 following byte in this 2-byte
934
- // utf8 codepoint began with 0b10xxxxxx
935
- if (0x80 != (0xc0 & s[1])) {
936
- return (void *)s;
1109
+ /* ensure the 1 following byte in this 2-byte
1110
+ * utf8 codepoint began with 0b10xxxxxx */
1111
+ if (0x80 != (0xc0 & str[1])) {
1112
+ return (utf8_int8_t *)str;
937
1113
  }
938
1114
 
939
- // ensure that our utf8 codepoint ended after 2 bytes
940
- if (0x80 == (0xc0 & s[2])) {
941
- return (void *)s;
1115
+ /* ensure that our utf8 codepoint ended after 2 bytes */
1116
+ if ((remaining != 2) && (0x80 == (0xc0 & str[2]))) {
1117
+ return (utf8_int8_t *)str;
942
1118
  }
943
1119
 
944
- // ensure that the top 4 bits of this 2-byte utf8
945
- // codepoint were not 0, as then we could have used
946
- // one of the smaller encodings
947
- if (0 == (0x1e & s[0])) {
948
- return (void *)s;
1120
+ /* ensure that the top 4 bits of this 2-byte utf8
1121
+ * codepoint were not 0, as then we could have used
1122
+ * one of the smaller encodings */
1123
+ if (0 == (0x1e & str[0])) {
1124
+ return (utf8_int8_t *)str;
949
1125
  }
950
1126
 
951
- // 2-byte utf8 code point (began with 0b110xxxxx)
952
- s += 2;
953
- } else if (0x00 == (0x80 & *s)) {
954
- // 1-byte ascii (began with 0b0xxxxxxx)
955
- s += 1;
1127
+ /* 2-byte utf8 code point (began with 0b110xxxxx) */
1128
+ str += 2;
1129
+ } else if (0x00 == (0x80 & *str)) {
1130
+ /* 1-byte ascii (began with 0b0xxxxxxx) */
1131
+ str += 1;
956
1132
  } else {
957
- // we have an invalid 0b1xxxxxxx utf8 code point entry
958
- return (void *)s;
1133
+ /* we have an invalid 0b1xxxxxxx utf8 code point entry */
1134
+ return (utf8_int8_t *)str;
959
1135
  }
960
1136
  }
961
1137
 
962
1138
  return utf8_null;
963
1139
  }
964
1140
 
965
- void *utf8codepoint(const void *utf8_restrict str,
966
- utf8_int32_t *utf8_restrict out_codepoint) {
967
- const char *s = (const char *)str;
1141
+ int utf8makevalid(utf8_int8_t *str, const utf8_int32_t replacement) {
1142
+ utf8_int8_t *read = str;
1143
+ utf8_int8_t *write = read;
1144
+ const utf8_int8_t r = (utf8_int8_t)replacement;
1145
+ utf8_int32_t codepoint = 0;
968
1146
 
969
- if (0xf0 == (0xf8 & s[0])) {
970
- // 4 byte utf8 codepoint
971
- *out_codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) |
972
- ((0x3f & s[2]) << 6) | (0x3f & s[3]);
973
- s += 4;
974
- } else if (0xe0 == (0xf0 & s[0])) {
975
- // 3 byte utf8 codepoint
1147
+ if (replacement > 0x7f) {
1148
+ return -1;
1149
+ }
1150
+
1151
+ while ('\0' != *read) {
1152
+ if (0xf0 == (0xf8 & *read)) {
1153
+ /* ensure each of the 3 following bytes in this 4-byte
1154
+ * utf8 codepoint began with 0b10xxxxxx */
1155
+ if ((0x80 != (0xc0 & read[1])) || (0x80 != (0xc0 & read[2])) ||
1156
+ (0x80 != (0xc0 & read[3]))) {
1157
+ *write++ = r;
1158
+ read++;
1159
+ continue;
1160
+ }
1161
+
1162
+ /* 4-byte utf8 code point (began with 0b11110xxx) */
1163
+ read = utf8codepoint(read, &codepoint);
1164
+ write = utf8catcodepoint(write, codepoint, 4);
1165
+ } else if (0xe0 == (0xf0 & *read)) {
1166
+ /* ensure each of the 2 following bytes in this 3-byte
1167
+ * utf8 codepoint began with 0b10xxxxxx */
1168
+ if ((0x80 != (0xc0 & read[1])) || (0x80 != (0xc0 & read[2]))) {
1169
+ *write++ = r;
1170
+ read++;
1171
+ continue;
1172
+ }
1173
+
1174
+ /* 3-byte utf8 code point (began with 0b1110xxxx) */
1175
+ read = utf8codepoint(read, &codepoint);
1176
+ write = utf8catcodepoint(write, codepoint, 3);
1177
+ } else if (0xc0 == (0xe0 & *read)) {
1178
+ /* ensure the 1 following byte in this 2-byte
1179
+ * utf8 codepoint began with 0b10xxxxxx */
1180
+ if (0x80 != (0xc0 & read[1])) {
1181
+ *write++ = r;
1182
+ read++;
1183
+ continue;
1184
+ }
1185
+
1186
+ /* 2-byte utf8 code point (began with 0b110xxxxx) */
1187
+ read = utf8codepoint(read, &codepoint);
1188
+ write = utf8catcodepoint(write, codepoint, 2);
1189
+ } else if (0x00 == (0x80 & *read)) {
1190
+ /* 1-byte ascii (began with 0b0xxxxxxx) */
1191
+ read = utf8codepoint(read, &codepoint);
1192
+ write = utf8catcodepoint(write, codepoint, 1);
1193
+ } else {
1194
+ /* if we got here then we've got a dangling continuation (0b10xxxxxx) */
1195
+ *write++ = r;
1196
+ read++;
1197
+ continue;
1198
+ }
1199
+ }
1200
+
1201
+ *write = '\0';
1202
+
1203
+ return 0;
1204
+ }
1205
+
1206
+ utf8_constexpr14_impl utf8_int8_t *
1207
+ utf8codepoint(const utf8_int8_t *utf8_restrict str,
1208
+ utf8_int32_t *utf8_restrict out_codepoint) {
1209
+ if (0xf0 == (0xf8 & str[0])) {
1210
+ /* 4 byte utf8 codepoint */
1211
+ *out_codepoint = ((0x07 & str[0]) << 18) | ((0x3f & str[1]) << 12) |
1212
+ ((0x3f & str[2]) << 6) | (0x3f & str[3]);
1213
+ str += 4;
1214
+ } else if (0xe0 == (0xf0 & str[0])) {
1215
+ /* 3 byte utf8 codepoint */
976
1216
  *out_codepoint =
977
- ((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]);
978
- s += 3;
979
- } else if (0xc0 == (0xe0 & s[0])) {
980
- // 2 byte utf8 codepoint
981
- *out_codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]);
982
- s += 2;
1217
+ ((0x0f & str[0]) << 12) | ((0x3f & str[1]) << 6) | (0x3f & str[2]);
1218
+ str += 3;
1219
+ } else if (0xc0 == (0xe0 & str[0])) {
1220
+ /* 2 byte utf8 codepoint */
1221
+ *out_codepoint = ((0x1f & str[0]) << 6) | (0x3f & str[1]);
1222
+ str += 2;
983
1223
  } else {
984
- // 1 byte utf8 codepoint otherwise
985
- *out_codepoint = s[0];
986
- s += 1;
1224
+ /* 1 byte utf8 codepoint otherwise */
1225
+ *out_codepoint = str[0];
1226
+ str += 1;
987
1227
  }
988
1228
 
989
- return (void *)s;
1229
+ return (utf8_int8_t *)str;
990
1230
  }
991
1231
 
992
- size_t utf8codepointsize(utf8_int32_t chr) {
1232
+ utf8_constexpr14_impl size_t utf8codepointcalcsize(const utf8_int8_t *str) {
1233
+ if (0xf0 == (0xf8 & str[0])) {
1234
+ /* 4 byte utf8 codepoint */
1235
+ return 4;
1236
+ } else if (0xe0 == (0xf0 & str[0])) {
1237
+ /* 3 byte utf8 codepoint */
1238
+ return 3;
1239
+ } else if (0xc0 == (0xe0 & str[0])) {
1240
+ /* 2 byte utf8 codepoint */
1241
+ return 2;
1242
+ }
1243
+
1244
+ /* 1 byte utf8 codepoint otherwise */
1245
+ return 1;
1246
+ }
1247
+
1248
+ utf8_constexpr14_impl size_t utf8codepointsize(utf8_int32_t chr) {
993
1249
  if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
994
1250
  return 1;
995
1251
  } else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
996
1252
  return 2;
997
1253
  } else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
998
1254
  return 3;
999
- } else { // if (0 == ((int)0xffe00000 & chr)) {
1255
+ } else { /* if (0 == ((int)0xffe00000 & chr)) { */
1000
1256
  return 4;
1001
1257
  }
1002
1258
  }
1003
1259
 
1004
- void *utf8catcodepoint(void *utf8_restrict str, utf8_int32_t chr, size_t n) {
1005
- char *s = (char *)str;
1006
-
1260
+ utf8_int8_t *utf8catcodepoint(utf8_int8_t *str, utf8_int32_t chr, size_t n) {
1007
1261
  if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
1008
- // 1-byte/7-bit ascii
1009
- // (0b0xxxxxxx)
1262
+ /* 1-byte/7-bit ascii
1263
+ * (0b0xxxxxxx) */
1010
1264
  if (n < 1) {
1011
1265
  return utf8_null;
1012
1266
  }
1013
- s[0] = (char)chr;
1014
- s += 1;
1267
+ str[0] = (utf8_int8_t)chr;
1268
+ str += 1;
1015
1269
  } else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
1016
- // 2-byte/11-bit utf8 code point
1017
- // (0b110xxxxx 0b10xxxxxx)
1270
+ /* 2-byte/11-bit utf8 code point
1271
+ * (0b110xxxxx 0b10xxxxxx) */
1018
1272
  if (n < 2) {
1019
1273
  return utf8_null;
1020
1274
  }
1021
- s[0] = 0xc0 | (char)(chr >> 6);
1022
- s[1] = 0x80 | (char)(chr & 0x3f);
1023
- s += 2;
1275
+ str[0] = (utf8_int8_t)(0xc0 | (utf8_int8_t)((chr >> 6) & 0x1f));
1276
+ str[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
1277
+ str += 2;
1024
1278
  } else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
1025
- // 3-byte/16-bit utf8 code point
1026
- // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
1279
+ /* 3-byte/16-bit utf8 code point
1280
+ * (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) */
1027
1281
  if (n < 3) {
1028
1282
  return utf8_null;
1029
1283
  }
1030
- s[0] = 0xe0 | (char)(chr >> 12);
1031
- s[1] = 0x80 | (char)((chr >> 6) & 0x3f);
1032
- s[2] = 0x80 | (char)(chr & 0x3f);
1033
- s += 3;
1034
- } else { // if (0 == ((int)0xffe00000 & chr)) {
1035
- // 4-byte/21-bit utf8 code point
1036
- // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
1284
+ str[0] = (utf8_int8_t)(0xe0 | (utf8_int8_t)((chr >> 12) & 0x0f));
1285
+ str[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
1286
+ str[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
1287
+ str += 3;
1288
+ } else { /* if (0 == ((int)0xffe00000 & chr)) { */
1289
+ /* 4-byte/21-bit utf8 code point
1290
+ * (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx) */
1037
1291
  if (n < 4) {
1038
1292
  return utf8_null;
1039
1293
  }
1040
- s[0] = 0xf0 | (char)(chr >> 18);
1041
- s[1] = 0x80 | (char)((chr >> 12) & 0x3f);
1042
- s[2] = 0x80 | (char)((chr >> 6) & 0x3f);
1043
- s[3] = 0x80 | (char)(chr & 0x3f);
1044
- s += 4;
1294
+ str[0] = (utf8_int8_t)(0xf0 | (utf8_int8_t)((chr >> 18) & 0x07));
1295
+ str[1] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 12) & 0x3f));
1296
+ str[2] = (utf8_int8_t)(0x80 | (utf8_int8_t)((chr >> 6) & 0x3f));
1297
+ str[3] = (utf8_int8_t)(0x80 | (utf8_int8_t)(chr & 0x3f));
1298
+ str += 4;
1045
1299
  }
1046
1300
 
1047
- return s;
1301
+ return str;
1048
1302
  }
1049
1303
 
1050
- int utf8islower(utf8_int32_t chr) { return chr != utf8uprcodepoint(chr); }
1051
-
1052
- int utf8isupper(utf8_int32_t chr) { return chr != utf8lwrcodepoint(chr); }
1304
+ utf8_constexpr14_impl int utf8islower(utf8_int32_t chr) {
1305
+ return chr != utf8uprcodepoint(chr);
1306
+ }
1053
1307
 
1054
- void utf8lwr(void *utf8_restrict str) {
1055
- void *p, *pn;
1056
- utf8_int32_t cp;
1308
+ utf8_constexpr14_impl int utf8isupper(utf8_int32_t chr) {
1309
+ return chr != utf8lwrcodepoint(chr);
1310
+ }
1057
1311
 
1058
- p = (char *)str;
1059
- pn = utf8codepoint(p, &cp);
1312
+ void utf8lwr(utf8_int8_t *utf8_restrict str) {
1313
+ utf8_int32_t cp = 0;
1314
+ utf8_int8_t *pn = utf8codepoint(str, &cp);
1060
1315
 
1061
1316
  while (cp != 0) {
1062
1317
  const utf8_int32_t lwr_cp = utf8lwrcodepoint(cp);
1063
1318
  const size_t size = utf8codepointsize(lwr_cp);
1064
1319
 
1065
1320
  if (lwr_cp != cp) {
1066
- utf8catcodepoint(p, lwr_cp, size);
1321
+ utf8catcodepoint(str, lwr_cp, size);
1067
1322
  }
1068
1323
 
1069
- p = pn;
1070
- pn = utf8codepoint(p, &cp);
1324
+ str = pn;
1325
+ pn = utf8codepoint(str, &cp);
1071
1326
  }
1072
1327
  }
1073
1328
 
1074
- void utf8upr(void *utf8_restrict str) {
1075
- void *p, *pn;
1076
- utf8_int32_t cp;
1077
-
1078
- p = (char *)str;
1079
- pn = utf8codepoint(p, &cp);
1329
+ void utf8upr(utf8_int8_t *utf8_restrict str) {
1330
+ utf8_int32_t cp = 0;
1331
+ utf8_int8_t *pn = utf8codepoint(str, &cp);
1080
1332
 
1081
1333
  while (cp != 0) {
1082
1334
  const utf8_int32_t lwr_cp = utf8uprcodepoint(cp);
1083
1335
  const size_t size = utf8codepointsize(lwr_cp);
1084
1336
 
1085
1337
  if (lwr_cp != cp) {
1086
- utf8catcodepoint(p, lwr_cp, size);
1338
+ utf8catcodepoint(str, lwr_cp, size);
1087
1339
  }
1088
1340
 
1089
- p = pn;
1090
- pn = utf8codepoint(p, &cp);
1341
+ str = pn;
1342
+ pn = utf8codepoint(str, &cp);
1091
1343
  }
1092
1344
  }
1093
1345
 
1094
- utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
1346
+ utf8_constexpr14_impl utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
1095
1347
  if (((0x0041 <= cp) && (0x005a >= cp)) ||
1096
1348
  ((0x00c0 <= cp) && (0x00d6 >= cp)) ||
1097
1349
  ((0x00d8 <= cp) && (0x00de >= cp)) ||
1098
1350
  ((0x0391 <= cp) && (0x03a1 >= cp)) ||
1099
- ((0x03a3 <= cp) && (0x03ab >= cp))) {
1351
+ ((0x03a3 <= cp) && (0x03ab >= cp)) ||
1352
+ ((0x0410 <= cp) && (0x042f >= cp))) {
1100
1353
  cp += 32;
1354
+ } else if ((0x0400 <= cp) && (0x040f >= cp)) {
1355
+ cp += 80;
1101
1356
  } else if (((0x0100 <= cp) && (0x012f >= cp)) ||
1102
1357
  ((0x0132 <= cp) && (0x0137 >= cp)) ||
1103
1358
  ((0x014a <= cp) && (0x0177 >= cp)) ||
@@ -1107,7 +1362,9 @@ utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
1107
1362
  ((0x01f8 <= cp) && (0x021f >= cp)) ||
1108
1363
  ((0x0222 <= cp) && (0x0233 >= cp)) ||
1109
1364
  ((0x0246 <= cp) && (0x024f >= cp)) ||
1110
- ((0x03d8 <= cp) && (0x03ef >= cp))) {
1365
+ ((0x03d8 <= cp) && (0x03ef >= cp)) ||
1366
+ ((0x0460 <= cp) && (0x0481 >= cp)) ||
1367
+ ((0x048a <= cp) && (0x04ff >= cp))) {
1111
1368
  cp |= 0x1;
1112
1369
  } else if (((0x0139 <= cp) && (0x0148 >= cp)) ||
1113
1370
  ((0x0179 <= cp) && (0x017e >= cp)) ||
@@ -1118,62 +1375,147 @@ utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
1118
1375
  cp &= ~0x1;
1119
1376
  } else {
1120
1377
  switch (cp) {
1121
- default: break;
1122
- case 0x0178: cp = 0x00ff; break;
1123
- case 0x0243: cp = 0x0180; break;
1124
- case 0x018e: cp = 0x01dd; break;
1125
- case 0x023d: cp = 0x019a; break;
1126
- case 0x0220: cp = 0x019e; break;
1127
- case 0x01b7: cp = 0x0292; break;
1128
- case 0x01c4: cp = 0x01c6; break;
1129
- case 0x01c7: cp = 0x01c9; break;
1130
- case 0x01ca: cp = 0x01cc; break;
1131
- case 0x01f1: cp = 0x01f3; break;
1132
- case 0x01f7: cp = 0x01bf; break;
1133
- case 0x0187: cp = 0x0188; break;
1134
- case 0x018b: cp = 0x018c; break;
1135
- case 0x0191: cp = 0x0192; break;
1136
- case 0x0198: cp = 0x0199; break;
1137
- case 0x01a7: cp = 0x01a8; break;
1138
- case 0x01ac: cp = 0x01ad; break;
1139
- case 0x01af: cp = 0x01b0; break;
1140
- case 0x01b8: cp = 0x01b9; break;
1141
- case 0x01bc: cp = 0x01bd; break;
1142
- case 0x01f4: cp = 0x01f5; break;
1143
- case 0x023b: cp = 0x023c; break;
1144
- case 0x0241: cp = 0x0242; break;
1145
- case 0x03fd: cp = 0x037b; break;
1146
- case 0x03fe: cp = 0x037c; break;
1147
- case 0x03ff: cp = 0x037d; break;
1148
- case 0x037f: cp = 0x03f3; break;
1149
- case 0x0386: cp = 0x03ac; break;
1150
- case 0x0388: cp = 0x03ad; break;
1151
- case 0x0389: cp = 0x03ae; break;
1152
- case 0x038a: cp = 0x03af; break;
1153
- case 0x038c: cp = 0x03cc; break;
1154
- case 0x038e: cp = 0x03cd; break;
1155
- case 0x038f: cp = 0x03ce; break;
1156
- case 0x0370: cp = 0x0371; break;
1157
- case 0x0372: cp = 0x0373; break;
1158
- case 0x0376: cp = 0x0377; break;
1159
- case 0x03f4: cp = 0x03d1; break;
1160
- case 0x03cf: cp = 0x03d7; break;
1161
- case 0x03f9: cp = 0x03f2; break;
1162
- case 0x03f7: cp = 0x03f8; break;
1163
- case 0x03fa: cp = 0x03fb; break;
1164
- };
1378
+ default:
1379
+ break;
1380
+ case 0x0178:
1381
+ cp = 0x00ff;
1382
+ break;
1383
+ case 0x0243:
1384
+ cp = 0x0180;
1385
+ break;
1386
+ case 0x018e:
1387
+ cp = 0x01dd;
1388
+ break;
1389
+ case 0x023d:
1390
+ cp = 0x019a;
1391
+ break;
1392
+ case 0x0220:
1393
+ cp = 0x019e;
1394
+ break;
1395
+ case 0x01b7:
1396
+ cp = 0x0292;
1397
+ break;
1398
+ case 0x01c4:
1399
+ cp = 0x01c6;
1400
+ break;
1401
+ case 0x01c7:
1402
+ cp = 0x01c9;
1403
+ break;
1404
+ case 0x01ca:
1405
+ cp = 0x01cc;
1406
+ break;
1407
+ case 0x01f1:
1408
+ cp = 0x01f3;
1409
+ break;
1410
+ case 0x01f7:
1411
+ cp = 0x01bf;
1412
+ break;
1413
+ case 0x0187:
1414
+ cp = 0x0188;
1415
+ break;
1416
+ case 0x018b:
1417
+ cp = 0x018c;
1418
+ break;
1419
+ case 0x0191:
1420
+ cp = 0x0192;
1421
+ break;
1422
+ case 0x0198:
1423
+ cp = 0x0199;
1424
+ break;
1425
+ case 0x01a7:
1426
+ cp = 0x01a8;
1427
+ break;
1428
+ case 0x01ac:
1429
+ cp = 0x01ad;
1430
+ break;
1431
+ case 0x01b8:
1432
+ cp = 0x01b9;
1433
+ break;
1434
+ case 0x01bc:
1435
+ cp = 0x01bd;
1436
+ break;
1437
+ case 0x01f4:
1438
+ cp = 0x01f5;
1439
+ break;
1440
+ case 0x023b:
1441
+ cp = 0x023c;
1442
+ break;
1443
+ case 0x0241:
1444
+ cp = 0x0242;
1445
+ break;
1446
+ case 0x03fd:
1447
+ cp = 0x037b;
1448
+ break;
1449
+ case 0x03fe:
1450
+ cp = 0x037c;
1451
+ break;
1452
+ case 0x03ff:
1453
+ cp = 0x037d;
1454
+ break;
1455
+ case 0x037f:
1456
+ cp = 0x03f3;
1457
+ break;
1458
+ case 0x0386:
1459
+ cp = 0x03ac;
1460
+ break;
1461
+ case 0x0388:
1462
+ cp = 0x03ad;
1463
+ break;
1464
+ case 0x0389:
1465
+ cp = 0x03ae;
1466
+ break;
1467
+ case 0x038a:
1468
+ cp = 0x03af;
1469
+ break;
1470
+ case 0x038c:
1471
+ cp = 0x03cc;
1472
+ break;
1473
+ case 0x038e:
1474
+ cp = 0x03cd;
1475
+ break;
1476
+ case 0x038f:
1477
+ cp = 0x03ce;
1478
+ break;
1479
+ case 0x0370:
1480
+ cp = 0x0371;
1481
+ break;
1482
+ case 0x0372:
1483
+ cp = 0x0373;
1484
+ break;
1485
+ case 0x0376:
1486
+ cp = 0x0377;
1487
+ break;
1488
+ case 0x03f4:
1489
+ cp = 0x03b8;
1490
+ break;
1491
+ case 0x03cf:
1492
+ cp = 0x03d7;
1493
+ break;
1494
+ case 0x03f9:
1495
+ cp = 0x03f2;
1496
+ break;
1497
+ case 0x03f7:
1498
+ cp = 0x03f8;
1499
+ break;
1500
+ case 0x03fa:
1501
+ cp = 0x03fb;
1502
+ break;
1503
+ }
1165
1504
  }
1166
1505
 
1167
1506
  return cp;
1168
1507
  }
1169
1508
 
1170
- utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
1509
+ utf8_constexpr14_impl utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
1171
1510
  if (((0x0061 <= cp) && (0x007a >= cp)) ||
1172
1511
  ((0x00e0 <= cp) && (0x00f6 >= cp)) ||
1173
1512
  ((0x00f8 <= cp) && (0x00fe >= cp)) ||
1174
1513
  ((0x03b1 <= cp) && (0x03c1 >= cp)) ||
1175
- ((0x03c3 <= cp) && (0x03cb >= cp))) {
1514
+ ((0x03c3 <= cp) && (0x03cb >= cp)) ||
1515
+ ((0x0430 <= cp) && (0x044f >= cp))) {
1176
1516
  cp -= 32;
1517
+ } else if ((0x0450 <= cp) && (0x045f >= cp)) {
1518
+ cp -= 80;
1177
1519
  } else if (((0x0100 <= cp) && (0x012f >= cp)) ||
1178
1520
  ((0x0132 <= cp) && (0x0137 >= cp)) ||
1179
1521
  ((0x014a <= cp) && (0x0177 >= cp)) ||
@@ -1183,7 +1525,9 @@ utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
1183
1525
  ((0x01f8 <= cp) && (0x021f >= cp)) ||
1184
1526
  ((0x0222 <= cp) && (0x0233 >= cp)) ||
1185
1527
  ((0x0246 <= cp) && (0x024f >= cp)) ||
1186
- ((0x03d8 <= cp) && (0x03ef >= cp))) {
1528
+ ((0x03d8 <= cp) && (0x03ef >= cp)) ||
1529
+ ((0x0460 <= cp) && (0x0481 >= cp)) ||
1530
+ ((0x048a <= cp) && (0x04ff >= cp))) {
1187
1531
  cp &= ~0x1;
1188
1532
  } else if (((0x0139 <= cp) && (0x0148 >= cp)) ||
1189
1533
  ((0x0179 <= cp) && (0x017e >= cp)) ||
@@ -1194,64 +1538,175 @@ utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
1194
1538
  cp |= 0x1;
1195
1539
  } else {
1196
1540
  switch (cp) {
1197
- default: break;
1198
- case 0x00ff: cp = 0x0178; break;
1199
- case 0x0180: cp = 0x0243; break;
1200
- case 0x01dd: cp = 0x018e; break;
1201
- case 0x019a: cp = 0x023d; break;
1202
- case 0x019e: cp = 0x0220; break;
1203
- case 0x0292: cp = 0x01b7; break;
1204
- case 0x01c6: cp = 0x01c4; break;
1205
- case 0x01c9: cp = 0x01c7; break;
1206
- case 0x01cc: cp = 0x01ca; break;
1207
- case 0x01f3: cp = 0x01f1; break;
1208
- case 0x01bf: cp = 0x01f7; break;
1209
- case 0x0188: cp = 0x0187; break;
1210
- case 0x018c: cp = 0x018b; break;
1211
- case 0x0192: cp = 0x0191; break;
1212
- case 0x0199: cp = 0x0198; break;
1213
- case 0x01a8: cp = 0x01a7; break;
1214
- case 0x01ad: cp = 0x01ac; break;
1215
- case 0x01b0: cp = 0x01af; break;
1216
- case 0x01b9: cp = 0x01b8; break;
1217
- case 0x01bd: cp = 0x01bc; break;
1218
- case 0x01f5: cp = 0x01f4; break;
1219
- case 0x023c: cp = 0x023b; break;
1220
- case 0x0242: cp = 0x0241; break;
1221
- case 0x037b: cp = 0x03fd; break;
1222
- case 0x037c: cp = 0x03fe; break;
1223
- case 0x037d: cp = 0x03ff; break;
1224
- case 0x03f3: cp = 0x037f; break;
1225
- case 0x03ac: cp = 0x0386; break;
1226
- case 0x03ad: cp = 0x0388; break;
1227
- case 0x03ae: cp = 0x0389; break;
1228
- case 0x03af: cp = 0x038a; break;
1229
- case 0x03cc: cp = 0x038c; break;
1230
- case 0x03cd: cp = 0x038e; break;
1231
- case 0x03ce: cp = 0x038f; break;
1232
- case 0x0371: cp = 0x0370; break;
1233
- case 0x0373: cp = 0x0372; break;
1234
- case 0x0377: cp = 0x0376; break;
1235
- case 0x03d1: cp = 0x03f4; break;
1236
- case 0x03d7: cp = 0x03cf; break;
1237
- case 0x03f2: cp = 0x03f9; break;
1238
- case 0x03f8: cp = 0x03f7; break;
1239
- case 0x03fb: cp = 0x03fa; break;
1240
- };
1541
+ default:
1542
+ break;
1543
+ case 0x00ff:
1544
+ cp = 0x0178;
1545
+ break;
1546
+ case 0x0180:
1547
+ cp = 0x0243;
1548
+ break;
1549
+ case 0x01dd:
1550
+ cp = 0x018e;
1551
+ break;
1552
+ case 0x019a:
1553
+ cp = 0x023d;
1554
+ break;
1555
+ case 0x019e:
1556
+ cp = 0x0220;
1557
+ break;
1558
+ case 0x0292:
1559
+ cp = 0x01b7;
1560
+ break;
1561
+ case 0x01c6:
1562
+ cp = 0x01c4;
1563
+ break;
1564
+ case 0x01c9:
1565
+ cp = 0x01c7;
1566
+ break;
1567
+ case 0x01cc:
1568
+ cp = 0x01ca;
1569
+ break;
1570
+ case 0x01f3:
1571
+ cp = 0x01f1;
1572
+ break;
1573
+ case 0x01bf:
1574
+ cp = 0x01f7;
1575
+ break;
1576
+ case 0x0188:
1577
+ cp = 0x0187;
1578
+ break;
1579
+ case 0x018c:
1580
+ cp = 0x018b;
1581
+ break;
1582
+ case 0x0192:
1583
+ cp = 0x0191;
1584
+ break;
1585
+ case 0x0199:
1586
+ cp = 0x0198;
1587
+ break;
1588
+ case 0x01a8:
1589
+ cp = 0x01a7;
1590
+ break;
1591
+ case 0x01ad:
1592
+ cp = 0x01ac;
1593
+ break;
1594
+ case 0x01b9:
1595
+ cp = 0x01b8;
1596
+ break;
1597
+ case 0x01bd:
1598
+ cp = 0x01bc;
1599
+ break;
1600
+ case 0x01f5:
1601
+ cp = 0x01f4;
1602
+ break;
1603
+ case 0x023c:
1604
+ cp = 0x023b;
1605
+ break;
1606
+ case 0x0242:
1607
+ cp = 0x0241;
1608
+ break;
1609
+ case 0x037b:
1610
+ cp = 0x03fd;
1611
+ break;
1612
+ case 0x037c:
1613
+ cp = 0x03fe;
1614
+ break;
1615
+ case 0x037d:
1616
+ cp = 0x03ff;
1617
+ break;
1618
+ case 0x03f3:
1619
+ cp = 0x037f;
1620
+ break;
1621
+ case 0x03ac:
1622
+ cp = 0x0386;
1623
+ break;
1624
+ case 0x03ad:
1625
+ cp = 0x0388;
1626
+ break;
1627
+ case 0x03ae:
1628
+ cp = 0x0389;
1629
+ break;
1630
+ case 0x03af:
1631
+ cp = 0x038a;
1632
+ break;
1633
+ case 0x03cc:
1634
+ cp = 0x038c;
1635
+ break;
1636
+ case 0x03cd:
1637
+ cp = 0x038e;
1638
+ break;
1639
+ case 0x03ce:
1640
+ cp = 0x038f;
1641
+ break;
1642
+ case 0x0371:
1643
+ cp = 0x0370;
1644
+ break;
1645
+ case 0x0373:
1646
+ cp = 0x0372;
1647
+ break;
1648
+ case 0x0377:
1649
+ cp = 0x0376;
1650
+ break;
1651
+ case 0x03d1:
1652
+ cp = 0x0398;
1653
+ break;
1654
+ case 0x03d7:
1655
+ cp = 0x03cf;
1656
+ break;
1657
+ case 0x03f2:
1658
+ cp = 0x03f9;
1659
+ break;
1660
+ case 0x03f8:
1661
+ cp = 0x03f7;
1662
+ break;
1663
+ case 0x03fb:
1664
+ cp = 0x03fa;
1665
+ break;
1666
+ }
1241
1667
  }
1242
1668
 
1243
1669
  return cp;
1244
1670
  }
1245
1671
 
1672
+ utf8_constexpr14_impl utf8_int8_t *
1673
+ utf8rcodepoint(const utf8_int8_t *utf8_restrict str,
1674
+ utf8_int32_t *utf8_restrict out_codepoint) {
1675
+ const utf8_int8_t *s = (const utf8_int8_t *)str;
1676
+
1677
+ if (0xf0 == (0xf8 & s[0])) {
1678
+ /* 4 byte utf8 codepoint */
1679
+ *out_codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) |
1680
+ ((0x3f & s[2]) << 6) | (0x3f & s[3]);
1681
+ } else if (0xe0 == (0xf0 & s[0])) {
1682
+ /* 3 byte utf8 codepoint */
1683
+ *out_codepoint =
1684
+ ((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]);
1685
+ } else if (0xc0 == (0xe0 & s[0])) {
1686
+ /* 2 byte utf8 codepoint */
1687
+ *out_codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]);
1688
+ } else {
1689
+ /* 1 byte utf8 codepoint otherwise */
1690
+ *out_codepoint = s[0];
1691
+ }
1692
+
1693
+ do {
1694
+ s--;
1695
+ } while ((0 != (0x80 & s[0])) && (0x80 == (0xc0 & s[0])));
1696
+
1697
+ return (utf8_int8_t *)s;
1698
+ }
1699
+
1246
1700
  #undef utf8_restrict
1701
+ #undef utf8_constexpr14
1247
1702
  #undef utf8_null
1248
1703
 
1249
- #ifdef __cplusplus
1250
- } // extern "C"
1704
+ #ifdef utf8_cplusplus
1705
+ } /* extern "C" */
1251
1706
  #endif
1252
1707
 
1253
1708
  #if defined(__clang__)
1254
1709
  #pragma clang diagnostic pop
1255
1710
  #endif
1256
1711
 
1257
- #endif // SHEREDOM_UTF8_H_INCLUDED
1712
+ #endif /* SHEREDOM_UTF8_H_INCLUDED */