rugged 0.28.5 → 0.99.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (379) hide show
  1. checksums.yaml +4 -4
  2. data/ext/rugged/extconf.rb +3 -1
  3. data/ext/rugged/rugged.c +35 -31
  4. data/ext/rugged/rugged.h +13 -0
  5. data/ext/rugged/rugged_blob.c +11 -9
  6. data/ext/rugged/rugged_commit.c +17 -15
  7. data/ext/rugged/rugged_diff.c +4 -26
  8. data/ext/rugged/rugged_index.c +4 -2
  9. data/ext/rugged/rugged_note.c +5 -3
  10. data/ext/rugged/rugged_object.c +57 -10
  11. data/ext/rugged/rugged_rebase.c +3 -1
  12. data/ext/rugged/rugged_remote.c +0 -6
  13. data/ext/rugged/rugged_repo.c +222 -17
  14. data/ext/rugged/rugged_tag.c +8 -6
  15. data/ext/rugged/rugged_tree.c +18 -16
  16. data/lib/rugged/version.rb +1 -1
  17. data/vendor/libgit2/CMakeLists.txt +38 -19
  18. data/vendor/libgit2/COPYING +28 -0
  19. data/vendor/libgit2/cmake/Modules/EnableWarnings.cmake +5 -1
  20. data/vendor/libgit2/cmake/Modules/FindCoreFoundation.cmake +2 -2
  21. data/vendor/libgit2/cmake/Modules/FindGSSAPI.cmake +1 -1
  22. data/vendor/libgit2/cmake/Modules/FindGSSFramework.cmake +28 -0
  23. data/vendor/libgit2/cmake/Modules/FindPCRE.cmake +38 -0
  24. data/vendor/libgit2/cmake/Modules/FindPCRE2.cmake +37 -0
  25. data/vendor/libgit2/cmake/Modules/FindSecurity.cmake +2 -2
  26. data/vendor/libgit2/cmake/Modules/FindStatNsec.cmake +6 -0
  27. data/vendor/libgit2/cmake/Modules/PkgBuildConfig.cmake +110 -0
  28. data/vendor/libgit2/cmake/Modules/SelectGSSAPI.cmake +53 -0
  29. data/vendor/libgit2/cmake/Modules/SelectHTTPSBackend.cmake +124 -0
  30. data/vendor/libgit2/cmake/Modules/SelectHashes.cmake +66 -0
  31. data/vendor/libgit2/deps/http-parser/http_parser.c +11 -6
  32. data/vendor/libgit2/deps/ntlmclient/CMakeLists.txt +21 -0
  33. data/vendor/libgit2/deps/ntlmclient/compat.h +33 -0
  34. data/vendor/libgit2/deps/ntlmclient/crypt.h +64 -0
  35. data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.c +120 -0
  36. data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.h +18 -0
  37. data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.c +145 -0
  38. data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.h +18 -0
  39. data/vendor/libgit2/deps/ntlmclient/crypt_openssl.c +130 -0
  40. data/vendor/libgit2/deps/ntlmclient/crypt_openssl.h +21 -0
  41. data/vendor/libgit2/deps/ntlmclient/ntlm.c +1422 -0
  42. data/vendor/libgit2/deps/ntlmclient/ntlm.h +174 -0
  43. data/vendor/libgit2/deps/ntlmclient/ntlmclient.h +320 -0
  44. data/vendor/libgit2/deps/ntlmclient/unicode.h +36 -0
  45. data/vendor/libgit2/deps/ntlmclient/unicode_builtin.c +445 -0
  46. data/vendor/libgit2/deps/ntlmclient/unicode_iconv.c +201 -0
  47. data/vendor/libgit2/deps/ntlmclient/utf8.h +1257 -0
  48. data/vendor/libgit2/deps/ntlmclient/util.c +21 -0
  49. data/vendor/libgit2/deps/ntlmclient/util.h +14 -0
  50. data/vendor/libgit2/deps/pcre/CMakeLists.txt +140 -0
  51. data/vendor/libgit2/deps/pcre/COPYING +5 -0
  52. data/vendor/libgit2/deps/pcre/cmake/COPYING-CMAKE-SCRIPTS +22 -0
  53. data/vendor/libgit2/deps/pcre/cmake/FindEditline.cmake +17 -0
  54. data/vendor/libgit2/deps/pcre/cmake/FindPackageHandleStandardArgs.cmake +58 -0
  55. data/vendor/libgit2/deps/pcre/cmake/FindReadline.cmake +29 -0
  56. data/vendor/libgit2/deps/pcre/config.h.in +57 -0
  57. data/vendor/libgit2/deps/pcre/pcre.h +641 -0
  58. data/vendor/libgit2/deps/pcre/pcre_byte_order.c +319 -0
  59. data/vendor/libgit2/deps/pcre/pcre_chartables.c +198 -0
  60. data/vendor/libgit2/deps/pcre/pcre_compile.c +9800 -0
  61. data/vendor/libgit2/deps/pcre/pcre_config.c +190 -0
  62. data/vendor/libgit2/deps/pcre/pcre_dfa_exec.c +3676 -0
  63. data/vendor/libgit2/deps/pcre/pcre_exec.c +7173 -0
  64. data/vendor/libgit2/deps/pcre/pcre_fullinfo.c +245 -0
  65. data/vendor/libgit2/deps/pcre/pcre_get.c +669 -0
  66. data/vendor/libgit2/deps/pcre/pcre_globals.c +86 -0
  67. data/vendor/libgit2/deps/pcre/pcre_internal.h +2787 -0
  68. data/vendor/libgit2/deps/pcre/pcre_jit_compile.c +11913 -0
  69. data/vendor/libgit2/deps/pcre/pcre_maketables.c +156 -0
  70. data/vendor/libgit2/deps/pcre/pcre_newline.c +210 -0
  71. data/vendor/libgit2/deps/pcre/pcre_ord2utf8.c +94 -0
  72. data/vendor/libgit2/deps/pcre/pcre_printint.c +834 -0
  73. data/vendor/libgit2/deps/pcre/pcre_refcount.c +92 -0
  74. data/vendor/libgit2/deps/pcre/pcre_string_utils.c +211 -0
  75. data/vendor/libgit2/deps/pcre/pcre_study.c +1686 -0
  76. data/vendor/libgit2/deps/pcre/pcre_tables.c +727 -0
  77. data/vendor/libgit2/deps/pcre/pcre_ucd.c +3644 -0
  78. data/vendor/libgit2/deps/pcre/pcre_valid_utf8.c +301 -0
  79. data/vendor/libgit2/deps/pcre/pcre_version.c +98 -0
  80. data/vendor/libgit2/deps/pcre/pcre_xclass.c +268 -0
  81. data/vendor/libgit2/deps/pcre/pcreposix.c +421 -0
  82. data/vendor/libgit2/deps/pcre/pcreposix.h +117 -0
  83. data/vendor/libgit2/deps/pcre/ucp.h +224 -0
  84. data/vendor/libgit2/deps/zlib/adler32.c +0 -7
  85. data/vendor/libgit2/deps/zlib/crc32.c +0 -7
  86. data/vendor/libgit2/include/git2.h +2 -0
  87. data/vendor/libgit2/include/git2/apply.h +22 -2
  88. data/vendor/libgit2/include/git2/attr.h +23 -13
  89. data/vendor/libgit2/include/git2/blame.h +2 -2
  90. data/vendor/libgit2/include/git2/blob.h +44 -12
  91. data/vendor/libgit2/include/git2/branch.h +74 -57
  92. data/vendor/libgit2/include/git2/buffer.h +20 -14
  93. data/vendor/libgit2/include/git2/cert.h +135 -0
  94. data/vendor/libgit2/include/git2/checkout.h +46 -14
  95. data/vendor/libgit2/include/git2/cherrypick.h +3 -3
  96. data/vendor/libgit2/include/git2/clone.h +2 -2
  97. data/vendor/libgit2/include/git2/commit.h +23 -1
  98. data/vendor/libgit2/include/git2/common.h +15 -6
  99. data/vendor/libgit2/include/git2/config.h +12 -12
  100. data/vendor/libgit2/include/git2/cred_helpers.h +4 -42
  101. data/vendor/libgit2/include/git2/credential.h +314 -0
  102. data/vendor/libgit2/include/git2/credential_helpers.h +52 -0
  103. data/vendor/libgit2/include/git2/deprecated.h +314 -3
  104. data/vendor/libgit2/include/git2/describe.h +4 -4
  105. data/vendor/libgit2/include/git2/diff.h +16 -14
  106. data/vendor/libgit2/include/git2/errors.h +4 -2
  107. data/vendor/libgit2/include/git2/filter.h +8 -0
  108. data/vendor/libgit2/include/git2/index.h +2 -1
  109. data/vendor/libgit2/include/git2/indexer.h +48 -4
  110. data/vendor/libgit2/include/git2/merge.h +6 -10
  111. data/vendor/libgit2/include/git2/net.h +0 -5
  112. data/vendor/libgit2/include/git2/object.h +2 -14
  113. data/vendor/libgit2/include/git2/odb.h +3 -2
  114. data/vendor/libgit2/include/git2/odb_backend.h +5 -4
  115. data/vendor/libgit2/include/git2/oid.h +11 -6
  116. data/vendor/libgit2/include/git2/pack.h +12 -1
  117. data/vendor/libgit2/include/git2/proxy.h +6 -4
  118. data/vendor/libgit2/include/git2/rebase.h +46 -2
  119. data/vendor/libgit2/include/git2/refs.h +19 -0
  120. data/vendor/libgit2/include/git2/remote.h +40 -15
  121. data/vendor/libgit2/include/git2/repository.h +24 -2
  122. data/vendor/libgit2/include/git2/revert.h +1 -1
  123. data/vendor/libgit2/include/git2/revwalk.h +7 -3
  124. data/vendor/libgit2/include/git2/stash.h +4 -4
  125. data/vendor/libgit2/include/git2/status.h +25 -16
  126. data/vendor/libgit2/include/git2/submodule.h +20 -3
  127. data/vendor/libgit2/include/git2/sys/alloc.h +9 -9
  128. data/vendor/libgit2/include/git2/sys/cred.h +15 -0
  129. data/vendor/libgit2/include/git2/sys/credential.h +90 -0
  130. data/vendor/libgit2/include/git2/sys/index.h +4 -2
  131. data/vendor/libgit2/include/git2/sys/mempack.h +2 -1
  132. data/vendor/libgit2/include/git2/sys/merge.h +1 -1
  133. data/vendor/libgit2/include/git2/sys/odb_backend.h +48 -4
  134. data/vendor/libgit2/include/git2/sys/refdb_backend.h +57 -21
  135. data/vendor/libgit2/include/git2/sys/repository.h +17 -6
  136. data/vendor/libgit2/include/git2/sys/transport.h +4 -4
  137. data/vendor/libgit2/include/git2/tag.h +11 -2
  138. data/vendor/libgit2/include/git2/trace.h +2 -2
  139. data/vendor/libgit2/include/git2/transport.h +11 -340
  140. data/vendor/libgit2/include/git2/tree.h +5 -3
  141. data/vendor/libgit2/include/git2/types.h +4 -89
  142. data/vendor/libgit2/include/git2/version.h +4 -4
  143. data/vendor/libgit2/include/git2/worktree.h +5 -5
  144. data/vendor/libgit2/src/CMakeLists.txt +89 -224
  145. data/vendor/libgit2/src/alloc.c +2 -14
  146. data/vendor/libgit2/src/{stdalloc.c → allocators/stdalloc.c} +3 -4
  147. data/vendor/libgit2/src/{stdalloc.h → allocators/stdalloc.h} +4 -4
  148. data/vendor/libgit2/src/allocators/win32_crtdbg.c +118 -0
  149. data/vendor/libgit2/src/{transports/cred.h → allocators/win32_crtdbg.h} +5 -4
  150. data/vendor/libgit2/src/apply.c +31 -15
  151. data/vendor/libgit2/src/attr.c +70 -64
  152. data/vendor/libgit2/src/attr_file.c +189 -96
  153. data/vendor/libgit2/src/attr_file.h +9 -9
  154. data/vendor/libgit2/src/attrcache.c +47 -47
  155. data/vendor/libgit2/src/attrcache.h +2 -1
  156. data/vendor/libgit2/src/blame.c +17 -5
  157. data/vendor/libgit2/src/blame.h +1 -1
  158. data/vendor/libgit2/src/blame_git.c +21 -7
  159. data/vendor/libgit2/src/blob.c +81 -17
  160. data/vendor/libgit2/src/blob.h +2 -2
  161. data/vendor/libgit2/src/branch.c +60 -32
  162. data/vendor/libgit2/src/buffer.c +5 -0
  163. data/vendor/libgit2/src/buffer.h +1 -0
  164. data/vendor/libgit2/src/cache.c +26 -33
  165. data/vendor/libgit2/src/cache.h +1 -1
  166. data/vendor/libgit2/src/cc-compat.h +5 -0
  167. data/vendor/libgit2/src/checkout.c +26 -16
  168. data/vendor/libgit2/src/cherrypick.c +7 -1
  169. data/vendor/libgit2/src/clone.c +29 -7
  170. data/vendor/libgit2/src/clone.h +4 -0
  171. data/vendor/libgit2/src/commit.c +70 -22
  172. data/vendor/libgit2/src/commit.h +6 -0
  173. data/vendor/libgit2/src/commit_list.c +28 -76
  174. data/vendor/libgit2/src/commit_list.h +2 -2
  175. data/vendor/libgit2/src/common.h +3 -75
  176. data/vendor/libgit2/src/config.c +31 -40
  177. data/vendor/libgit2/src/config.h +7 -6
  178. data/vendor/libgit2/src/config_backend.h +12 -0
  179. data/vendor/libgit2/src/config_cache.c +39 -39
  180. data/vendor/libgit2/src/config_entries.c +69 -99
  181. data/vendor/libgit2/src/config_entries.h +1 -0
  182. data/vendor/libgit2/src/config_file.c +337 -380
  183. data/vendor/libgit2/src/config_mem.c +12 -16
  184. data/vendor/libgit2/src/config_parse.c +49 -29
  185. data/vendor/libgit2/src/config_parse.h +13 -12
  186. data/vendor/libgit2/src/config_snapshot.c +206 -0
  187. data/vendor/libgit2/src/crlf.c +14 -14
  188. data/vendor/libgit2/src/describe.c +21 -20
  189. data/vendor/libgit2/src/diff.c +43 -66
  190. data/vendor/libgit2/src/diff.h +4 -3
  191. data/vendor/libgit2/src/diff_driver.c +37 -38
  192. data/vendor/libgit2/src/diff_file.c +12 -10
  193. data/vendor/libgit2/src/diff_file.h +2 -2
  194. data/vendor/libgit2/src/diff_generate.c +26 -26
  195. data/vendor/libgit2/src/diff_generate.h +2 -2
  196. data/vendor/libgit2/src/diff_parse.c +1 -1
  197. data/vendor/libgit2/src/diff_print.c +25 -13
  198. data/vendor/libgit2/src/diff_stats.c +1 -1
  199. data/vendor/libgit2/src/diff_tform.c +11 -11
  200. data/vendor/libgit2/src/errors.c +21 -25
  201. data/vendor/libgit2/src/errors.h +81 -0
  202. data/vendor/libgit2/src/features.h.in +9 -2
  203. data/vendor/libgit2/src/fetch.c +7 -2
  204. data/vendor/libgit2/src/fetchhead.c +9 -9
  205. data/vendor/libgit2/src/filebuf.c +1 -1
  206. data/vendor/libgit2/src/filebuf.h +1 -1
  207. data/vendor/libgit2/src/filter.c +16 -8
  208. data/vendor/libgit2/src/{fileops.c → futils.c} +20 -17
  209. data/vendor/libgit2/src/{fileops.h → futils.h} +5 -5
  210. data/vendor/libgit2/src/hash.c +61 -0
  211. data/vendor/libgit2/src/hash.h +19 -21
  212. data/vendor/libgit2/src/hash/sha1.h +38 -0
  213. data/vendor/libgit2/src/hash/{hash_collisiondetect.h → sha1/collisiondetect.c} +14 -17
  214. data/vendor/libgit2/src/hash/sha1/collisiondetect.h +19 -0
  215. data/vendor/libgit2/src/hash/{hash_common_crypto.h → sha1/common_crypto.c} +15 -19
  216. data/vendor/libgit2/src/hash/sha1/common_crypto.h +19 -0
  217. data/vendor/libgit2/src/hash/{hash_generic.c → sha1/generic.c} +22 -10
  218. data/vendor/libgit2/src/hash/{hash_generic.h → sha1/generic.h} +4 -14
  219. data/vendor/libgit2/src/hash/{hash_mbedtls.c → sha1/mbedtls.c} +15 -7
  220. data/vendor/libgit2/src/hash/{hash_mbedtls.h → sha1/mbedtls.h} +6 -11
  221. data/vendor/libgit2/src/hash/{hash_openssl.h → sha1/openssl.c} +14 -18
  222. data/vendor/libgit2/src/hash/sha1/openssl.h +19 -0
  223. data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/sha1.c +14 -3
  224. data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/sha1.h +0 -0
  225. data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/ubc_check.c +0 -0
  226. data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/ubc_check.h +0 -0
  227. data/vendor/libgit2/src/hash/{hash_win32.c → sha1/win32.c} +34 -24
  228. data/vendor/libgit2/src/hash/{hash_win32.h → sha1/win32.h} +6 -19
  229. data/vendor/libgit2/src/hashsig.c +1 -1
  230. data/vendor/libgit2/src/idxmap.c +91 -65
  231. data/vendor/libgit2/src/idxmap.h +151 -15
  232. data/vendor/libgit2/src/ignore.c +26 -35
  233. data/vendor/libgit2/src/index.c +103 -81
  234. data/vendor/libgit2/src/index.h +1 -1
  235. data/vendor/libgit2/src/indexer.c +69 -70
  236. data/vendor/libgit2/src/integer.h +11 -4
  237. data/vendor/libgit2/src/iterator.c +32 -28
  238. data/vendor/libgit2/src/iterator.h +8 -8
  239. data/vendor/libgit2/src/map.h +1 -1
  240. data/vendor/libgit2/src/merge.c +55 -41
  241. data/vendor/libgit2/src/merge.h +2 -2
  242. data/vendor/libgit2/src/merge_driver.c +5 -5
  243. data/vendor/libgit2/src/merge_file.c +1 -1
  244. data/vendor/libgit2/src/mwindow.c +18 -23
  245. data/vendor/libgit2/src/mwindow.h +4 -4
  246. data/vendor/libgit2/src/net.c +411 -0
  247. data/vendor/libgit2/src/net.h +57 -0
  248. data/vendor/libgit2/src/netops.c +6 -222
  249. data/vendor/libgit2/src/netops.h +1 -37
  250. data/vendor/libgit2/src/notes.c +2 -2
  251. data/vendor/libgit2/src/object.c +3 -3
  252. data/vendor/libgit2/src/object.h +2 -0
  253. data/vendor/libgit2/src/odb.c +41 -23
  254. data/vendor/libgit2/src/odb.h +3 -2
  255. data/vendor/libgit2/src/odb_loose.c +17 -10
  256. data/vendor/libgit2/src/odb_mempack.c +13 -24
  257. data/vendor/libgit2/src/odb_pack.c +4 -4
  258. data/vendor/libgit2/src/offmap.c +43 -55
  259. data/vendor/libgit2/src/offmap.h +102 -24
  260. data/vendor/libgit2/src/oid.c +19 -8
  261. data/vendor/libgit2/src/oidmap.c +39 -57
  262. data/vendor/libgit2/src/oidmap.h +99 -19
  263. data/vendor/libgit2/src/pack-objects.c +25 -32
  264. data/vendor/libgit2/src/pack-objects.h +1 -1
  265. data/vendor/libgit2/src/pack.c +97 -129
  266. data/vendor/libgit2/src/pack.h +15 -18
  267. data/vendor/libgit2/src/parse.c +10 -0
  268. data/vendor/libgit2/src/parse.h +3 -3
  269. data/vendor/libgit2/src/patch.c +1 -1
  270. data/vendor/libgit2/src/patch_generate.c +1 -1
  271. data/vendor/libgit2/src/patch_parse.c +30 -9
  272. data/vendor/libgit2/src/path.c +43 -6
  273. data/vendor/libgit2/src/path.h +2 -0
  274. data/vendor/libgit2/src/pathspec.c +14 -14
  275. data/vendor/libgit2/src/pool.c +26 -22
  276. data/vendor/libgit2/src/pool.h +7 -7
  277. data/vendor/libgit2/src/posix.c +7 -7
  278. data/vendor/libgit2/src/posix.h +12 -1
  279. data/vendor/libgit2/src/proxy.c +7 -2
  280. data/vendor/libgit2/src/push.c +10 -5
  281. data/vendor/libgit2/src/reader.c +2 -2
  282. data/vendor/libgit2/src/rebase.c +87 -28
  283. data/vendor/libgit2/src/refdb.c +12 -0
  284. data/vendor/libgit2/src/refdb_fs.c +215 -169
  285. data/vendor/libgit2/src/reflog.c +11 -13
  286. data/vendor/libgit2/src/refs.c +34 -23
  287. data/vendor/libgit2/src/refs.h +8 -1
  288. data/vendor/libgit2/src/refspec.c +9 -16
  289. data/vendor/libgit2/src/regexp.c +221 -0
  290. data/vendor/libgit2/src/regexp.h +97 -0
  291. data/vendor/libgit2/src/remote.c +57 -55
  292. data/vendor/libgit2/src/remote.h +2 -2
  293. data/vendor/libgit2/src/repository.c +120 -103
  294. data/vendor/libgit2/src/repository.h +49 -40
  295. data/vendor/libgit2/src/revert.c +6 -1
  296. data/vendor/libgit2/src/revparse.c +18 -19
  297. data/vendor/libgit2/src/revwalk.c +71 -33
  298. data/vendor/libgit2/src/revwalk.h +20 -0
  299. data/vendor/libgit2/src/settings.c +13 -1
  300. data/vendor/libgit2/src/sortedcache.c +12 -26
  301. data/vendor/libgit2/src/sortedcache.h +1 -1
  302. data/vendor/libgit2/src/stash.c +45 -65
  303. data/vendor/libgit2/src/status.c +17 -11
  304. data/vendor/libgit2/src/streams/openssl.c +53 -1
  305. data/vendor/libgit2/src/streams/socket.c +2 -2
  306. data/vendor/libgit2/src/strmap.c +37 -84
  307. data/vendor/libgit2/src/strmap.h +105 -33
  308. data/vendor/libgit2/src/submodule.c +151 -126
  309. data/vendor/libgit2/src/submodule.h +1 -1
  310. data/vendor/libgit2/src/tag.c +10 -2
  311. data/vendor/libgit2/src/trace.c +1 -1
  312. data/vendor/libgit2/src/trace.h +3 -3
  313. data/vendor/libgit2/src/trailer.c +46 -32
  314. data/vendor/libgit2/src/transaction.c +3 -8
  315. data/vendor/libgit2/src/transports/auth.c +16 -15
  316. data/vendor/libgit2/src/transports/auth.h +18 -11
  317. data/vendor/libgit2/src/transports/auth_negotiate.c +64 -33
  318. data/vendor/libgit2/src/transports/auth_negotiate.h +2 -2
  319. data/vendor/libgit2/src/transports/auth_ntlm.c +223 -0
  320. data/vendor/libgit2/src/transports/auth_ntlm.h +38 -0
  321. data/vendor/libgit2/src/transports/credential.c +476 -0
  322. data/vendor/libgit2/src/transports/{cred_helpers.c → credential_helpers.c} +21 -8
  323. data/vendor/libgit2/src/transports/git.c +11 -16
  324. data/vendor/libgit2/src/transports/http.c +488 -1248
  325. data/vendor/libgit2/src/transports/http.h +4 -1
  326. data/vendor/libgit2/src/transports/httpclient.c +1526 -0
  327. data/vendor/libgit2/src/transports/httpclient.h +190 -0
  328. data/vendor/libgit2/src/transports/local.c +10 -10
  329. data/vendor/libgit2/src/transports/smart.c +19 -19
  330. data/vendor/libgit2/src/transports/smart.h +3 -3
  331. data/vendor/libgit2/src/transports/smart_protocol.c +40 -64
  332. data/vendor/libgit2/src/transports/ssh.c +77 -59
  333. data/vendor/libgit2/src/transports/winhttp.c +266 -241
  334. data/vendor/libgit2/src/tree-cache.c +14 -7
  335. data/vendor/libgit2/src/tree.c +16 -26
  336. data/vendor/libgit2/src/unix/map.c +1 -1
  337. data/vendor/libgit2/src/unix/posix.h +2 -12
  338. data/vendor/libgit2/src/userdiff.h +3 -1
  339. data/vendor/libgit2/src/util.c +51 -53
  340. data/vendor/libgit2/src/util.h +16 -21
  341. data/vendor/libgit2/src/wildmatch.c +320 -0
  342. data/vendor/libgit2/src/wildmatch.h +23 -0
  343. data/vendor/libgit2/src/win32/map.c +3 -5
  344. data/vendor/libgit2/src/win32/path_w32.c +12 -2
  345. data/vendor/libgit2/src/win32/path_w32.h +0 -29
  346. data/vendor/libgit2/src/win32/posix.h +1 -4
  347. data/vendor/libgit2/src/win32/posix_w32.c +48 -5
  348. data/vendor/libgit2/src/win32/precompiled.h +0 -2
  349. data/vendor/libgit2/src/win32/thread.c +5 -5
  350. data/vendor/libgit2/src/win32/w32_buffer.c +7 -3
  351. data/vendor/libgit2/src/win32/w32_common.h +39 -0
  352. data/vendor/libgit2/src/win32/w32_crtdbg_stacktrace.c +0 -93
  353. data/vendor/libgit2/src/win32/w32_crtdbg_stacktrace.h +0 -2
  354. data/vendor/libgit2/src/win32/w32_stack.c +4 -9
  355. data/vendor/libgit2/src/win32/w32_stack.h +3 -3
  356. data/vendor/libgit2/src/win32/w32_util.c +31 -0
  357. data/vendor/libgit2/src/win32/w32_util.h +6 -32
  358. data/vendor/libgit2/src/worktree.c +36 -22
  359. data/vendor/libgit2/src/xdiff/xdiffi.c +1 -1
  360. data/vendor/libgit2/src/xdiff/xmerge.c +12 -0
  361. data/vendor/libgit2/src/xdiff/xpatience.c +3 -0
  362. data/vendor/libgit2/src/zstream.c +5 -0
  363. data/vendor/libgit2/src/zstream.h +1 -0
  364. metadata +108 -38
  365. data/vendor/libgit2/deps/regex/CMakeLists.txt +0 -2
  366. data/vendor/libgit2/deps/regex/COPYING +0 -502
  367. data/vendor/libgit2/deps/regex/config.h +0 -7
  368. data/vendor/libgit2/deps/regex/regcomp.c +0 -3857
  369. data/vendor/libgit2/deps/regex/regex.c +0 -92
  370. data/vendor/libgit2/deps/regex/regex.h +0 -582
  371. data/vendor/libgit2/deps/regex/regex_internal.c +0 -1744
  372. data/vendor/libgit2/deps/regex/regex_internal.h +0 -819
  373. data/vendor/libgit2/deps/regex/regexec.c +0 -4369
  374. data/vendor/libgit2/include/git2/inttypes.h +0 -309
  375. data/vendor/libgit2/include/git2/sys/time.h +0 -31
  376. data/vendor/libgit2/libgit2.pc.in +0 -13
  377. data/vendor/libgit2/src/fnmatch.c +0 -248
  378. data/vendor/libgit2/src/fnmatch.h +0 -48
  379. data/vendor/libgit2/src/transports/cred.c +0 -390
@@ -0,0 +1,201 @@
1
+ /*
2
+ * Copyright (c) Edward Thomson. All rights reserved.
3
+ *
4
+ * This file is part of ntlmclient, distributed under the MIT license.
5
+ * For full terms and copyright information, and for third-party
6
+ * copyright information, see the included LICENSE.txt file.
7
+ */
8
+
9
+ #include <locale.h>
10
+ #include <iconv.h>
11
+ #include <string.h>
12
+ #include <errno.h>
13
+
14
+ #include "ntlmclient.h"
15
+ #include "unicode.h"
16
+ #include "ntlm.h"
17
+ #include "compat.h"
18
+
19
+ struct ntlm_unicode_ctx {
20
+ ntlm_client *ntlm;
21
+ iconv_t utf8_to_16;
22
+ iconv_t utf16_to_8;
23
+ };
24
+
25
+ ntlm_unicode_ctx *ntlm_unicode_ctx_init(ntlm_client *ntlm)
26
+ {
27
+ ntlm_unicode_ctx *ctx;
28
+
29
+ if ((ctx = calloc(1, sizeof(ntlm_unicode_ctx))) == NULL)
30
+ return NULL;
31
+
32
+ ctx->ntlm = ntlm;
33
+ ctx->utf8_to_16 = (iconv_t)-1;
34
+ ctx->utf16_to_8 = (iconv_t)-1;
35
+
36
+ return ctx;
37
+ }
38
+
39
+ typedef enum {
40
+ unicode_iconv_utf8_to_16,
41
+ unicode_iconv_utf16_to_8
42
+ } unicode_iconv_encoding_direction;
43
+
44
+ static inline bool unicode_iconv_init(ntlm_unicode_ctx *ctx)
45
+ {
46
+ if (ctx->utf8_to_16 != (iconv_t)-1 || ctx->utf16_to_8 != (iconv_t)-1)
47
+ return true;
48
+
49
+ if ((ctx->utf8_to_16 = iconv_open("UTF-16LE", "UTF-8")) == (iconv_t)-1 ||
50
+ (ctx->utf16_to_8 = iconv_open("UTF-8", "UTF-16LE")) == (iconv_t)-1) {
51
+ if (errno == EINVAL)
52
+ ntlm_client_set_errmsg(ctx->ntlm,
53
+ "iconv does not support UTF8 <-> UTF16 conversion");
54
+ else
55
+ ntlm_client_set_errmsg(ctx->ntlm, strerror(errno));
56
+
57
+ return false;
58
+ }
59
+
60
+ return true;
61
+ }
62
+
63
+ static inline bool unicode_iconv_encoding_convert(
64
+ char **converted,
65
+ size_t *converted_len,
66
+ ntlm_unicode_ctx *ctx,
67
+ const char *string,
68
+ size_t string_len,
69
+ unicode_iconv_encoding_direction direction)
70
+ {
71
+ char *in_start, *out_start, *out, *new_out;
72
+ size_t in_start_len, out_start_len, out_size, nul_size, ret, written = 0;
73
+ iconv_t converter;
74
+
75
+ *converted = NULL;
76
+ *converted_len = 0;
77
+
78
+ if (!unicode_iconv_init(ctx))
79
+ return false;
80
+
81
+ /*
82
+ * When translating UTF8 to UTF16, these strings are only used
83
+ * internally, and we obey the given length, so we can simply
84
+ * use a buffer that is 2x the size. When translating from UTF16
85
+ * to UTF8, we may need to return to callers, so we need to NUL
86
+ * terminate and expect an extra byte for UTF8, two for UTF16.
87
+ */
88
+ if (direction == unicode_iconv_utf8_to_16) {
89
+ converter = ctx->utf8_to_16;
90
+ out_size = (string_len * 2) + 2;
91
+ nul_size = 2;
92
+ } else {
93
+ converter = ctx->utf16_to_8;
94
+ out_size = (string_len / 2) + 1;
95
+ nul_size = 1;
96
+ }
97
+
98
+ /* Round to the nearest multiple of 8 */
99
+ out_size = (out_size + 7) & ~7;
100
+
101
+ if ((out = malloc(out_size)) == NULL) {
102
+ ntlm_client_set_errmsg(ctx->ntlm, "out of memory");
103
+ return false;
104
+ }
105
+
106
+ in_start = (char *)string;
107
+ in_start_len = string_len;
108
+
109
+ while (true) {
110
+ out_start = out + written;
111
+ out_start_len = (out_size - nul_size) - written;
112
+
113
+ ret = iconv(converter, &in_start, &in_start_len, &out_start, &out_start_len);
114
+ written = (out_size - nul_size) - out_start_len;
115
+
116
+ if (ret == 0)
117
+ break;
118
+
119
+ if (ret == (size_t)-1 && errno != E2BIG) {
120
+ ntlm_client_set_errmsg(ctx->ntlm, strerror(errno));
121
+ goto on_error;
122
+ }
123
+
124
+ /* Grow buffer size by 1.5 (rounded up to a multiple of 8) */
125
+ out_size = ((((out_size << 1) - (out_size >> 1)) + 7) & ~7);
126
+
127
+ if (out_size > NTLM_UNICODE_MAX_LEN) {
128
+ ntlm_client_set_errmsg(ctx->ntlm,
129
+ "unicode conversion too large");
130
+ goto on_error;
131
+ }
132
+
133
+ if ((new_out = realloc(out, out_size)) == NULL) {
134
+ ntlm_client_set_errmsg(ctx->ntlm, "out of memory");
135
+ goto on_error;
136
+ }
137
+
138
+ out = new_out;
139
+ }
140
+
141
+ if (in_start_len != 0) {
142
+ ntlm_client_set_errmsg(ctx->ntlm,
143
+ "invalid unicode string; trailing data remains");
144
+ goto on_error;
145
+ }
146
+
147
+ /* NUL terminate */
148
+ out[written] = '\0';
149
+
150
+ if (direction == unicode_iconv_utf8_to_16)
151
+ out[written + 1] = '\0';
152
+
153
+ *converted = out;
154
+
155
+ if (converted_len)
156
+ *converted_len = written;
157
+
158
+ return true;
159
+
160
+ on_error:
161
+ free(out);
162
+ return false;
163
+ }
164
+
165
+ bool ntlm_unicode_utf8_to_16(
166
+ char **converted,
167
+ size_t *converted_len,
168
+ ntlm_unicode_ctx *ctx,
169
+ const char *string,
170
+ size_t string_len)
171
+ {
172
+ return unicode_iconv_encoding_convert(
173
+ converted, converted_len, ctx, string, string_len,
174
+ unicode_iconv_utf8_to_16);
175
+ }
176
+
177
+ bool ntlm_unicode_utf16_to_8(
178
+ char **converted,
179
+ size_t *converted_len,
180
+ ntlm_unicode_ctx *ctx,
181
+ const char *string,
182
+ size_t string_len)
183
+ {
184
+ return unicode_iconv_encoding_convert(
185
+ converted, converted_len, ctx, string, string_len,
186
+ unicode_iconv_utf16_to_8);
187
+ }
188
+
189
+ void ntlm_unicode_ctx_free(ntlm_unicode_ctx *ctx)
190
+ {
191
+ if (!ctx)
192
+ return;
193
+
194
+ if (ctx->utf16_to_8 != (iconv_t)-1)
195
+ iconv_close(ctx->utf16_to_8);
196
+
197
+ if (ctx->utf8_to_16 != (iconv_t)-1)
198
+ iconv_close(ctx->utf8_to_16);
199
+
200
+ free(ctx);
201
+ }
@@ -0,0 +1,1257 @@
1
+ // The latest version of this library is available on GitHub;
2
+ // https://github.com/sheredom/utf8.h
3
+
4
+ // This is free and unencumbered software released into the public domain.
5
+ //
6
+ // Anyone is free to copy, modify, publish, use, compile, sell, or
7
+ // distribute this software, either in source code form or as a compiled
8
+ // binary, for any purpose, commercial or non-commercial, and by any
9
+ // means.
10
+ //
11
+ // In jurisdictions that recognize copyright laws, the author or authors
12
+ // of this software dedicate any and all copyright interest in the
13
+ // software to the public domain. We make this dedication for the benefit
14
+ // of the public at large and to the detriment of our heirs and
15
+ // successors. We intend this dedication to be an overt act of
16
+ // relinquishment in perpetuity of all present and future rights to this
17
+ // software under copyright law.
18
+ //
19
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
+ // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
+ // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22
+ // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23
+ // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24
+ // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
+ // OTHER DEALINGS IN THE SOFTWARE.
26
+ //
27
+ // For more information, please refer to <http://unlicense.org/>
28
+
29
+ #ifndef SHEREDOM_UTF8_H_INCLUDED
30
+ #define SHEREDOM_UTF8_H_INCLUDED
31
+
32
+ #if defined(_MSC_VER)
33
+ #pragma warning(push)
34
+
35
+ // disable 'bytes padding added after construct' warning
36
+ #pragma warning(disable : 4820)
37
+ #endif
38
+
39
+ #include <stddef.h>
40
+ #include <stdlib.h>
41
+
42
+ #if defined(_MSC_VER)
43
+ #pragma warning(pop)
44
+ #endif
45
+
46
+ #if defined(_MSC_VER)
47
+ typedef __int32 utf8_int32_t;
48
+ #else
49
+ #include <stdint.h>
50
+ typedef int32_t utf8_int32_t;
51
+ #endif
52
+
53
+ #if defined(__clang__)
54
+ #pragma clang diagnostic push
55
+ #pragma clang diagnostic ignored "-Wold-style-cast"
56
+ #pragma clang diagnostic ignored "-Wcast-qual"
57
+ #endif
58
+
59
+ #ifdef __cplusplus
60
+ extern "C" {
61
+ #endif
62
+
63
+ #if defined(__clang__) || defined(__GNUC__)
64
+ #define utf8_nonnull __attribute__((nonnull))
65
+ #define utf8_pure __attribute__((pure))
66
+ #define utf8_restrict __restrict__
67
+ #define utf8_weak __attribute__((weak))
68
+ #elif defined(_MSC_VER)
69
+ #define utf8_nonnull
70
+ #define utf8_pure
71
+ #define utf8_restrict __restrict
72
+ #define utf8_weak __inline
73
+ #else
74
+ #error Non clang, non gcc, non MSVC compiler found!
75
+ #endif
76
+
77
+ #ifdef __cplusplus
78
+ #define utf8_null NULL
79
+ #else
80
+ #define utf8_null 0
81
+ #endif
82
+
83
+ // Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
84
+ // src2 respectively, case insensitive.
85
+ utf8_nonnull utf8_pure utf8_weak int utf8casecmp(const void *src1,
86
+ const void *src2);
87
+
88
+ // Append the utf8 string src onto the utf8 string dst.
89
+ utf8_nonnull utf8_weak void *utf8cat(void *utf8_restrict dst,
90
+ const void *utf8_restrict src);
91
+
92
+ // Find the first match of the utf8 codepoint chr in the utf8 string src.
93
+ utf8_nonnull utf8_pure utf8_weak void *utf8chr(const void *src,
94
+ utf8_int32_t chr);
95
+
96
+ // Return less than 0, 0, greater than 0 if src1 < src2,
97
+ // src1 == src2, src1 > src2 respectively.
98
+ utf8_nonnull utf8_pure utf8_weak int utf8cmp(const void *src1,
99
+ const void *src2);
100
+
101
+ // Copy the utf8 string src onto the memory allocated in dst.
102
+ utf8_nonnull utf8_weak void *utf8cpy(void *utf8_restrict dst,
103
+ const void *utf8_restrict src);
104
+
105
+ // Number of utf8 codepoints in the utf8 string src that consists entirely
106
+ // of utf8 codepoints not from the utf8 string reject.
107
+ utf8_nonnull utf8_pure utf8_weak size_t utf8cspn(const void *src,
108
+ const void *reject);
109
+
110
+ // Duplicate the utf8 string src by getting its size, malloc'ing a new buffer
111
+ // copying over the data, and returning that. Or 0 if malloc failed.
112
+ utf8_nonnull utf8_weak void *utf8dup(const void *src);
113
+
114
+ // Number of utf8 codepoints in the utf8 string str,
115
+ // excluding the null terminating byte.
116
+ utf8_nonnull utf8_pure utf8_weak size_t utf8len(const void *str);
117
+
118
+ // Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 >
119
+ // src2 respectively, case insensitive. Checking at most n bytes of each utf8
120
+ // string.
121
+ utf8_nonnull utf8_pure utf8_weak int utf8ncasecmp(const void *src1,
122
+ const void *src2, size_t n);
123
+
124
+ // Append the utf8 string src onto the utf8 string dst,
125
+ // writing at most n+1 bytes. Can produce an invalid utf8
126
+ // string if n falls partway through a utf8 codepoint.
127
+ utf8_nonnull utf8_weak void *utf8ncat(void *utf8_restrict dst,
128
+ const void *utf8_restrict src, size_t n);
129
+
130
+ // Return less than 0, 0, greater than 0 if src1 < src2,
131
+ // src1 == src2, src1 > src2 respectively. Checking at most n
132
+ // bytes of each utf8 string.
133
+ utf8_nonnull utf8_pure utf8_weak int utf8ncmp(const void *src1,
134
+ const void *src2, size_t n);
135
+
136
+ // Copy the utf8 string src onto the memory allocated in dst.
137
+ // Copies at most n bytes. If there is no terminating null byte in
138
+ // the first n bytes of src, the string placed into dst will not be
139
+ // null-terminated. If the size (in bytes) of src is less than n,
140
+ // extra null terminating bytes are appended to dst such that at
141
+ // total of n bytes are written. Can produce an invalid utf8
142
+ // string if n falls partway through a utf8 codepoint.
143
+ utf8_nonnull utf8_weak void *utf8ncpy(void *utf8_restrict dst,
144
+ const void *utf8_restrict src, size_t n);
145
+
146
+ // Similar to utf8dup, except that at most n bytes of src are copied. If src is
147
+ // longer than n, only n bytes are copied and a null byte is added.
148
+ //
149
+ // Returns a new string if successful, 0 otherwise
150
+ utf8_nonnull utf8_weak void *utf8ndup(const void *src, size_t n);
151
+
152
+ // Locates the first occurence in the utf8 string str of any byte in the
153
+ // utf8 string accept, or 0 if no match was found.
154
+ utf8_nonnull utf8_pure utf8_weak void *utf8pbrk(const void *str,
155
+ const void *accept);
156
+
157
+ // Find the last match of the utf8 codepoint chr in the utf8 string src.
158
+ utf8_nonnull utf8_pure utf8_weak void *utf8rchr(const void *src, int chr);
159
+
160
+ // Number of bytes in the utf8 string str,
161
+ // including the null terminating byte.
162
+ utf8_nonnull utf8_pure utf8_weak size_t utf8size(const void *str);
163
+
164
+ // Number of utf8 codepoints in the utf8 string src that consists entirely
165
+ // of utf8 codepoints from the utf8 string accept.
166
+ utf8_nonnull utf8_pure utf8_weak size_t utf8spn(const void *src,
167
+ const void *accept);
168
+
169
+ // The position of the utf8 string needle in the utf8 string haystack.
170
+ utf8_nonnull utf8_pure utf8_weak void *utf8str(const void *haystack,
171
+ const void *needle);
172
+
173
+ // The position of the utf8 string needle in the utf8 string haystack, case
174
+ // insensitive.
175
+ utf8_nonnull utf8_pure utf8_weak void *utf8casestr(const void *haystack,
176
+ const void *needle);
177
+
178
+ // Return 0 on success, or the position of the invalid
179
+ // utf8 codepoint on failure.
180
+ utf8_nonnull utf8_pure utf8_weak void *utf8valid(const void *str);
181
+
182
+ // Sets out_codepoint to the next utf8 codepoint in str, and returns the address
183
+ // of the utf8 codepoint after the current one in str.
184
+ utf8_nonnull utf8_weak void *
185
+ utf8codepoint(const void *utf8_restrict str,
186
+ utf8_int32_t *utf8_restrict out_codepoint);
187
+
188
+ // Returns the size of the given codepoint in bytes.
189
+ utf8_weak size_t utf8codepointsize(utf8_int32_t chr);
190
+
191
+ // Write a codepoint to the given string, and return the address to the next
192
+ // place after the written codepoint. Pass how many bytes left in the buffer to
193
+ // n. If there is not enough space for the codepoint, this function returns
194
+ // null.
195
+ utf8_nonnull utf8_weak void *utf8catcodepoint(void *utf8_restrict str,
196
+ utf8_int32_t chr, size_t n);
197
+
198
+ // Returns 1 if the given character is lowercase, or 0 if it is not.
199
+ utf8_weak int utf8islower(utf8_int32_t chr);
200
+
201
+ // Returns 1 if the given character is uppercase, or 0 if it is not.
202
+ utf8_weak int utf8isupper(utf8_int32_t chr);
203
+
204
+ // Transform the given string into all lowercase codepoints.
205
+ utf8_nonnull utf8_weak void utf8lwr(void *utf8_restrict str);
206
+
207
+ // Transform the given string into all uppercase codepoints.
208
+ utf8_nonnull utf8_weak void utf8upr(void *utf8_restrict str);
209
+
210
+ // Make a codepoint lower case if possible.
211
+ utf8_weak utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp);
212
+
213
+ // Make a codepoint upper case if possible.
214
+ utf8_weak utf8_int32_t utf8uprcodepoint(utf8_int32_t cp);
215
+
216
+ #undef utf8_weak
217
+ #undef utf8_pure
218
+ #undef utf8_nonnull
219
+
220
+ int utf8casecmp(const void *src1, const void *src2) {
221
+ utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
222
+
223
+ for (;;) {
224
+ src1 = utf8codepoint(src1, &src1_cp);
225
+ src2 = utf8codepoint(src2, &src2_cp);
226
+
227
+ // Take a copy of src1 & src2
228
+ src1_orig_cp = src1_cp;
229
+ src2_orig_cp = src2_cp;
230
+
231
+ // Lower the srcs if required
232
+ src1_cp = utf8lwrcodepoint(src1_cp);
233
+ src2_cp = utf8lwrcodepoint(src2_cp);
234
+
235
+ // Check if the lowered codepoints match
236
+ if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) {
237
+ return 0;
238
+ } else if (src1_cp == src2_cp) {
239
+ continue;
240
+ }
241
+
242
+ // If they don't match, then we return which of the original's are less
243
+ if (src1_orig_cp < src2_orig_cp) {
244
+ return -1;
245
+ } else if (src1_orig_cp > src2_orig_cp) {
246
+ return 1;
247
+ }
248
+ }
249
+ }
250
+
251
+ void *utf8cat(void *utf8_restrict dst, const void *utf8_restrict src) {
252
+ char *d = (char *)dst;
253
+ const char *s = (const char *)src;
254
+
255
+ // find the null terminating byte in dst
256
+ while ('\0' != *d) {
257
+ d++;
258
+ }
259
+
260
+ // overwriting the null terminating byte in dst, append src byte-by-byte
261
+ while ('\0' != *s) {
262
+ *d++ = *s++;
263
+ }
264
+
265
+ // write out a new null terminating byte into dst
266
+ *d = '\0';
267
+
268
+ return dst;
269
+ }
270
+
271
+ void *utf8chr(const void *src, utf8_int32_t chr) {
272
+ char c[5] = {'\0', '\0', '\0', '\0', '\0'};
273
+
274
+ if (0 == chr) {
275
+ // being asked to return position of null terminating byte, so
276
+ // just run s to the end, and return!
277
+ const char *s = (const char *)src;
278
+ while ('\0' != *s) {
279
+ s++;
280
+ }
281
+ return (void *)s;
282
+ } else if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
283
+ // 1-byte/7-bit ascii
284
+ // (0b0xxxxxxx)
285
+ c[0] = (char)chr;
286
+ } else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
287
+ // 2-byte/11-bit utf8 code point
288
+ // (0b110xxxxx 0b10xxxxxx)
289
+ c[0] = 0xc0 | (char)(chr >> 6);
290
+ c[1] = 0x80 | (char)(chr & 0x3f);
291
+ } else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
292
+ // 3-byte/16-bit utf8 code point
293
+ // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
294
+ c[0] = 0xe0 | (char)(chr >> 12);
295
+ c[1] = 0x80 | (char)((chr >> 6) & 0x3f);
296
+ c[2] = 0x80 | (char)(chr & 0x3f);
297
+ } else { // if (0 == ((int)0xffe00000 & chr)) {
298
+ // 4-byte/21-bit utf8 code point
299
+ // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
300
+ c[0] = 0xf0 | (char)(chr >> 18);
301
+ c[1] = 0x80 | (char)((chr >> 12) & 0x3f);
302
+ c[2] = 0x80 | (char)((chr >> 6) & 0x3f);
303
+ c[3] = 0x80 | (char)(chr & 0x3f);
304
+ }
305
+
306
+ // we've made c into a 2 utf8 codepoint string, one for the chr we are
307
+ // seeking, another for the null terminating byte. Now use utf8str to
308
+ // search
309
+ return utf8str(src, c);
310
+ }
311
+
312
+ int utf8cmp(const void *src1, const void *src2) {
313
+ const unsigned char *s1 = (const unsigned char *)src1;
314
+ const unsigned char *s2 = (const unsigned char *)src2;
315
+
316
+ while (('\0' != *s1) || ('\0' != *s2)) {
317
+ if (*s1 < *s2) {
318
+ return -1;
319
+ } else if (*s1 > *s2) {
320
+ return 1;
321
+ }
322
+
323
+ s1++;
324
+ s2++;
325
+ }
326
+
327
+ // both utf8 strings matched
328
+ return 0;
329
+ }
330
+
331
+ int utf8coll(const void *src1, const void *src2);
332
+
333
+ void *utf8cpy(void *utf8_restrict dst, const void *utf8_restrict src) {
334
+ char *d = (char *)dst;
335
+ const char *s = (const char *)src;
336
+
337
+ // overwriting anything previously in dst, write byte-by-byte
338
+ // from src
339
+ while ('\0' != *s) {
340
+ *d++ = *s++;
341
+ }
342
+
343
+ // append null terminating byte
344
+ *d = '\0';
345
+
346
+ return dst;
347
+ }
348
+
349
+ size_t utf8cspn(const void *src, const void *reject) {
350
+ const char *s = (const char *)src;
351
+ size_t chars = 0;
352
+
353
+ while ('\0' != *s) {
354
+ const char *r = (const char *)reject;
355
+ size_t offset = 0;
356
+
357
+ while ('\0' != *r) {
358
+ // checking that if *r is the start of a utf8 codepoint
359
+ // (it is not 0b10xxxxxx) and we have successfully matched
360
+ // a previous character (0 < offset) - we found a match
361
+ if ((0x80 != (0xc0 & *r)) && (0 < offset)) {
362
+ return chars;
363
+ } else {
364
+ if (*r == s[offset]) {
365
+ // part of a utf8 codepoint matched, so move our checking
366
+ // onwards to the next byte
367
+ offset++;
368
+ r++;
369
+ } else {
370
+ // r could be in the middle of an unmatching utf8 code point,
371
+ // so we need to march it on to the next character beginning,
372
+
373
+ do {
374
+ r++;
375
+ } while (0x80 == (0xc0 & *r));
376
+
377
+ // reset offset too as we found a mismatch
378
+ offset = 0;
379
+ }
380
+ }
381
+ }
382
+
383
+ // the current utf8 codepoint in src did not match reject, but src
384
+ // could have been partway through a utf8 codepoint, so we need to
385
+ // march it onto the next utf8 codepoint starting byte
386
+ do {
387
+ s++;
388
+ } while ((0x80 == (0xc0 & *s)));
389
+ chars++;
390
+ }
391
+
392
+ return chars;
393
+ }
394
+
395
+ size_t utf8size(const void *str);
396
+
397
+ void *utf8dup(const void *src) {
398
+ const char *s = (const char *)src;
399
+ char *n = utf8_null;
400
+
401
+ // figure out how many bytes (including the terminator) we need to copy first
402
+ size_t bytes = utf8size(src);
403
+
404
+ n = (char *)malloc(bytes);
405
+
406
+ if (utf8_null == n) {
407
+ // out of memory so we bail
408
+ return utf8_null;
409
+ } else {
410
+ bytes = 0;
411
+
412
+ // copy src byte-by-byte into our new utf8 string
413
+ while ('\0' != s[bytes]) {
414
+ n[bytes] = s[bytes];
415
+ bytes++;
416
+ }
417
+
418
+ // append null terminating byte
419
+ n[bytes] = '\0';
420
+ return n;
421
+ }
422
+ }
423
+
424
+ void *utf8fry(const void *str);
425
+
426
+ size_t utf8len(const void *str) {
427
+ const unsigned char *s = (const unsigned char *)str;
428
+ size_t length = 0;
429
+
430
+ while ('\0' != *s) {
431
+ if (0xf0 == (0xf8 & *s)) {
432
+ // 4-byte utf8 code point (began with 0b11110xxx)
433
+ s += 4;
434
+ } else if (0xe0 == (0xf0 & *s)) {
435
+ // 3-byte utf8 code point (began with 0b1110xxxx)
436
+ s += 3;
437
+ } else if (0xc0 == (0xe0 & *s)) {
438
+ // 2-byte utf8 code point (began with 0b110xxxxx)
439
+ s += 2;
440
+ } else { // if (0x00 == (0x80 & *s)) {
441
+ // 1-byte ascii (began with 0b0xxxxxxx)
442
+ s += 1;
443
+ }
444
+
445
+ // no matter the bytes we marched s forward by, it was
446
+ // only 1 utf8 codepoint
447
+ length++;
448
+ }
449
+
450
+ return length;
451
+ }
452
+
453
+ int utf8ncasecmp(const void *src1, const void *src2, size_t n) {
454
+ utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp;
455
+
456
+ do {
457
+ const unsigned char *const s1 = (const unsigned char *)src1;
458
+ const unsigned char *const s2 = (const unsigned char *)src2;
459
+
460
+ // first check that we have enough bytes left in n to contain an entire
461
+ // codepoint
462
+ if (0 == n) {
463
+ return 0;
464
+ }
465
+
466
+ if ((1 == n) && ((0xc0 == (0xe0 & *s1)) || (0xc0 == (0xe0 & *s2)))) {
467
+ const utf8_int32_t c1 = (0xe0 & *s1);
468
+ const utf8_int32_t c2 = (0xe0 & *s2);
469
+
470
+ if (c1 < c2) {
471
+ return -1;
472
+ } else if (c1 > c2) {
473
+ return 1;
474
+ } else {
475
+ return 0;
476
+ }
477
+ }
478
+
479
+ if ((2 >= n) && ((0xe0 == (0xf0 & *s1)) || (0xe0 == (0xf0 & *s2)))) {
480
+ const utf8_int32_t c1 = (0xf0 & *s1);
481
+ const utf8_int32_t c2 = (0xf0 & *s2);
482
+
483
+ if (c1 < c2) {
484
+ return -1;
485
+ } else if (c1 > c2) {
486
+ return 1;
487
+ } else {
488
+ return 0;
489
+ }
490
+ }
491
+
492
+ if ((3 >= n) && ((0xf0 == (0xf8 & *s1)) || (0xf0 == (0xf8 & *s2)))) {
493
+ const utf8_int32_t c1 = (0xf8 & *s1);
494
+ const utf8_int32_t c2 = (0xf8 & *s2);
495
+
496
+ if (c1 < c2) {
497
+ return -1;
498
+ } else if (c1 > c2) {
499
+ return 1;
500
+ } else {
501
+ return 0;
502
+ }
503
+ }
504
+
505
+ src1 = utf8codepoint(src1, &src1_cp);
506
+ src2 = utf8codepoint(src2, &src2_cp);
507
+ n -= utf8codepointsize(src1_cp);
508
+
509
+ // Take a copy of src1 & src2
510
+ src1_orig_cp = src1_cp;
511
+ src2_orig_cp = src2_cp;
512
+
513
+ // Lower srcs if required
514
+ src1_cp = utf8lwrcodepoint(src1_cp);
515
+ src2_cp = utf8lwrcodepoint(src2_cp);
516
+
517
+ // Check if the lowered codepoints match
518
+ if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) {
519
+ return 0;
520
+ } else if (src1_cp == src2_cp) {
521
+ continue;
522
+ }
523
+
524
+ // If they don't match, then we return which of the original's are less
525
+ if (src1_orig_cp < src2_orig_cp) {
526
+ return -1;
527
+ } else if (src1_orig_cp > src2_orig_cp) {
528
+ return 1;
529
+ }
530
+ } while (0 < n);
531
+
532
+ // both utf8 strings matched
533
+ return 0;
534
+ }
535
+
536
+ void *utf8ncat(void *utf8_restrict dst, const void *utf8_restrict src,
537
+ size_t n) {
538
+ char *d = (char *)dst;
539
+ const char *s = (const char *)src;
540
+
541
+ // find the null terminating byte in dst
542
+ while ('\0' != *d) {
543
+ d++;
544
+ }
545
+
546
+ // overwriting the null terminating byte in dst, append src byte-by-byte
547
+ // stopping if we run out of space
548
+ do {
549
+ *d++ = *s++;
550
+ } while (('\0' != *s) && (0 != --n));
551
+
552
+ // write out a new null terminating byte into dst
553
+ *d = '\0';
554
+
555
+ return dst;
556
+ }
557
+
558
+ int utf8ncmp(const void *src1, const void *src2, size_t n) {
559
+ const unsigned char *s1 = (const unsigned char *)src1;
560
+ const unsigned char *s2 = (const unsigned char *)src2;
561
+
562
+ while ((('\0' != *s1) || ('\0' != *s2)) && (0 != n--)) {
563
+ if (*s1 < *s2) {
564
+ return -1;
565
+ } else if (*s1 > *s2) {
566
+ return 1;
567
+ }
568
+
569
+ s1++;
570
+ s2++;
571
+ }
572
+
573
+ // both utf8 strings matched
574
+ return 0;
575
+ }
576
+
577
+ void *utf8ncpy(void *utf8_restrict dst, const void *utf8_restrict src,
578
+ size_t n) {
579
+ char *d = (char *)dst;
580
+ const char *s = (const char *)src;
581
+
582
+ // overwriting anything previously in dst, write byte-by-byte
583
+ // from src
584
+ do {
585
+ *d++ = *s++;
586
+ } while (('\0' != *s) && (0 != --n));
587
+
588
+ // append null terminating byte
589
+ while (0 != n) {
590
+ *d++ = '\0';
591
+ n--;
592
+ }
593
+
594
+ return dst;
595
+ }
596
+
597
+ void *utf8ndup(const void *src, size_t n) {
598
+ const char *s = (const char *)src;
599
+ char *c = utf8_null;
600
+ size_t bytes = 0;
601
+
602
+ // Find the end of the string or stop when n is reached
603
+ while ('\0' != s[bytes] && bytes < n) {
604
+ bytes++;
605
+ }
606
+
607
+ // In case bytes is actually less than n, we need to set it
608
+ // to be used later in the copy byte by byte.
609
+ n = bytes;
610
+
611
+ c = (char *)malloc(bytes + 1);
612
+ if (utf8_null == c) {
613
+ // out of memory so we bail
614
+ return utf8_null;
615
+ }
616
+
617
+ bytes = 0;
618
+
619
+ // copy src byte-by-byte into our new utf8 string
620
+ while ('\0' != s[bytes] && bytes < n) {
621
+ c[bytes] = s[bytes];
622
+ bytes++;
623
+ }
624
+
625
+ // append null terminating byte
626
+ c[bytes] = '\0';
627
+ return c;
628
+ }
629
+
630
+ void *utf8rchr(const void *src, int chr) {
631
+ const char *s = (const char *)src;
632
+ const char *match = utf8_null;
633
+ char c[5] = {'\0', '\0', '\0', '\0', '\0'};
634
+
635
+ if (0 == chr) {
636
+ // being asked to return position of null terminating byte, so
637
+ // just run s to the end, and return!
638
+ while ('\0' != *s) {
639
+ s++;
640
+ }
641
+ return (void *)s;
642
+ } else if (0 == ((int)0xffffff80 & chr)) {
643
+ // 1-byte/7-bit ascii
644
+ // (0b0xxxxxxx)
645
+ c[0] = (char)chr;
646
+ } else if (0 == ((int)0xfffff800 & chr)) {
647
+ // 2-byte/11-bit utf8 code point
648
+ // (0b110xxxxx 0b10xxxxxx)
649
+ c[0] = 0xc0 | (char)(chr >> 6);
650
+ c[1] = 0x80 | (char)(chr & 0x3f);
651
+ } else if (0 == ((int)0xffff0000 & chr)) {
652
+ // 3-byte/16-bit utf8 code point
653
+ // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
654
+ c[0] = 0xe0 | (char)(chr >> 12);
655
+ c[1] = 0x80 | (char)((chr >> 6) & 0x3f);
656
+ c[2] = 0x80 | (char)(chr & 0x3f);
657
+ } else { // if (0 == ((int)0xffe00000 & chr)) {
658
+ // 4-byte/21-bit utf8 code point
659
+ // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
660
+ c[0] = 0xf0 | (char)(chr >> 18);
661
+ c[1] = 0x80 | (char)((chr >> 12) & 0x3f);
662
+ c[2] = 0x80 | (char)((chr >> 6) & 0x3f);
663
+ c[3] = 0x80 | (char)(chr & 0x3f);
664
+ }
665
+
666
+ // we've created a 2 utf8 codepoint string in c that is
667
+ // the utf8 character asked for by chr, and a null
668
+ // terminating byte
669
+
670
+ while ('\0' != *s) {
671
+ size_t offset = 0;
672
+
673
+ while (s[offset] == c[offset]) {
674
+ offset++;
675
+ }
676
+
677
+ if ('\0' == c[offset]) {
678
+ // we found a matching utf8 code point
679
+ match = s;
680
+ s += offset;
681
+ } else {
682
+ s += offset;
683
+
684
+ // need to march s along to next utf8 codepoint start
685
+ // (the next byte that doesn't match 0b10xxxxxx)
686
+ if ('\0' != *s) {
687
+ do {
688
+ s++;
689
+ } while (0x80 == (0xc0 & *s));
690
+ }
691
+ }
692
+ }
693
+
694
+ // return the last match we found (or 0 if no match was found)
695
+ return (void *)match;
696
+ }
697
+
698
+ void *utf8pbrk(const void *str, const void *accept) {
699
+ const char *s = (const char *)str;
700
+
701
+ while ('\0' != *s) {
702
+ const char *a = (const char *)accept;
703
+ size_t offset = 0;
704
+
705
+ while ('\0' != *a) {
706
+ // checking that if *a is the start of a utf8 codepoint
707
+ // (it is not 0b10xxxxxx) and we have successfully matched
708
+ // a previous character (0 < offset) - we found a match
709
+ if ((0x80 != (0xc0 & *a)) && (0 < offset)) {
710
+ return (void *)s;
711
+ } else {
712
+ if (*a == s[offset]) {
713
+ // part of a utf8 codepoint matched, so move our checking
714
+ // onwards to the next byte
715
+ offset++;
716
+ a++;
717
+ } else {
718
+ // r could be in the middle of an unmatching utf8 code point,
719
+ // so we need to march it on to the next character beginning,
720
+
721
+ do {
722
+ a++;
723
+ } while (0x80 == (0xc0 & *a));
724
+
725
+ // reset offset too as we found a mismatch
726
+ offset = 0;
727
+ }
728
+ }
729
+ }
730
+
731
+ // we found a match on the last utf8 codepoint
732
+ if (0 < offset) {
733
+ return (void *)s;
734
+ }
735
+
736
+ // the current utf8 codepoint in src did not match accept, but src
737
+ // could have been partway through a utf8 codepoint, so we need to
738
+ // march it onto the next utf8 codepoint starting byte
739
+ do {
740
+ s++;
741
+ } while ((0x80 == (0xc0 & *s)));
742
+ }
743
+
744
+ return utf8_null;
745
+ }
746
+
747
+ size_t utf8size(const void *str) {
748
+ const char *s = (const char *)str;
749
+ size_t size = 0;
750
+ while ('\0' != s[size]) {
751
+ size++;
752
+ }
753
+
754
+ // we are including the null terminating byte in the size calculation
755
+ size++;
756
+ return size;
757
+ }
758
+
759
+ size_t utf8spn(const void *src, const void *accept) {
760
+ const char *s = (const char *)src;
761
+ size_t chars = 0;
762
+
763
+ while ('\0' != *s) {
764
+ const char *a = (const char *)accept;
765
+ size_t offset = 0;
766
+
767
+ while ('\0' != *a) {
768
+ // checking that if *r is the start of a utf8 codepoint
769
+ // (it is not 0b10xxxxxx) and we have successfully matched
770
+ // a previous character (0 < offset) - we found a match
771
+ if ((0x80 != (0xc0 & *a)) && (0 < offset)) {
772
+ // found a match, so increment the number of utf8 codepoints
773
+ // that have matched and stop checking whether any other utf8
774
+ // codepoints in a match
775
+ chars++;
776
+ s += offset;
777
+ break;
778
+ } else {
779
+ if (*a == s[offset]) {
780
+ offset++;
781
+ a++;
782
+ } else {
783
+ // a could be in the middle of an unmatching utf8 codepoint,
784
+ // so we need to march it on to the next character beginning,
785
+ do {
786
+ a++;
787
+ } while (0x80 == (0xc0 & *a));
788
+
789
+ // reset offset too as we found a mismatch
790
+ offset = 0;
791
+ }
792
+ }
793
+ }
794
+
795
+ // if a got to its terminating null byte, then we didn't find a match.
796
+ // Return the current number of matched utf8 codepoints
797
+ if ('\0' == *a) {
798
+ return chars;
799
+ }
800
+ }
801
+
802
+ return chars;
803
+ }
804
+
805
+ void *utf8str(const void *haystack, const void *needle) {
806
+ const char *h = (const char *)haystack;
807
+
808
+ // if needle has no utf8 codepoints before the null terminating
809
+ // byte then return haystack
810
+ if ('\0' == *((const char *)needle)) {
811
+ return (void *)haystack;
812
+ }
813
+
814
+ while ('\0' != *h) {
815
+ const char *maybeMatch = h;
816
+ const char *n = (const char *)needle;
817
+
818
+ while (*h == *n && (*h != '\0' && *n != '\0')) {
819
+ n++;
820
+ h++;
821
+ }
822
+
823
+ if ('\0' == *n) {
824
+ // we found the whole utf8 string for needle in haystack at
825
+ // maybeMatch, so return it
826
+ return (void *)maybeMatch;
827
+ } else {
828
+ // h could be in the middle of an unmatching utf8 codepoint,
829
+ // so we need to march it on to the next character beginning,
830
+ if ('\0' != *h) {
831
+ do {
832
+ h++;
833
+ } while (0x80 == (0xc0 & *h));
834
+ }
835
+ }
836
+ }
837
+
838
+ // no match
839
+ return utf8_null;
840
+ }
841
+
842
+ void *utf8casestr(const void *haystack, const void *needle) {
843
+ const void *h = haystack;
844
+
845
+ // if needle has no utf8 codepoints before the null terminating
846
+ // byte then return haystack
847
+ if ('\0' == *((const char *)needle)) {
848
+ return (void *)haystack;
849
+ }
850
+
851
+ for (;;) {
852
+ const void *maybeMatch = h;
853
+ const void *n = needle;
854
+ utf8_int32_t h_cp, n_cp;
855
+
856
+ h = utf8codepoint(h, &h_cp);
857
+ n = utf8codepoint(n, &n_cp);
858
+
859
+ while ((0 != h_cp) && (0 != n_cp)) {
860
+ h_cp = utf8lwrcodepoint(h_cp);
861
+ n_cp = utf8lwrcodepoint(n_cp);
862
+
863
+ // if we find a mismatch, bail out!
864
+ if (h_cp != n_cp) {
865
+ break;
866
+ }
867
+
868
+ h = utf8codepoint(h, &h_cp);
869
+ n = utf8codepoint(n, &n_cp);
870
+ }
871
+
872
+ if (0 == n_cp) {
873
+ // we found the whole utf8 string for needle in haystack at
874
+ // maybeMatch, so return it
875
+ return (void *)maybeMatch;
876
+ }
877
+
878
+ if (0 == h_cp) {
879
+ // no match
880
+ return utf8_null;
881
+ }
882
+ }
883
+ }
884
+
885
+ void *utf8valid(const void *str) {
886
+ const char *s = (const char *)str;
887
+
888
+ while ('\0' != *s) {
889
+ if (0xf0 == (0xf8 & *s)) {
890
+ // ensure each of the 3 following bytes in this 4-byte
891
+ // utf8 codepoint began with 0b10xxxxxx
892
+ if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
893
+ (0x80 != (0xc0 & s[3]))) {
894
+ return (void *)s;
895
+ }
896
+
897
+ // ensure that our utf8 codepoint ended after 4 bytes
898
+ if (0x80 == (0xc0 & s[4])) {
899
+ return (void *)s;
900
+ }
901
+
902
+ // ensure that the top 5 bits of this 4-byte utf8
903
+ // codepoint were not 0, as then we could have used
904
+ // one of the smaller encodings
905
+ if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
906
+ return (void *)s;
907
+ }
908
+
909
+ // 4-byte utf8 code point (began with 0b11110xxx)
910
+ s += 4;
911
+ } else if (0xe0 == (0xf0 & *s)) {
912
+ // ensure each of the 2 following bytes in this 3-byte
913
+ // utf8 codepoint began with 0b10xxxxxx
914
+ if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
915
+ return (void *)s;
916
+ }
917
+
918
+ // ensure that our utf8 codepoint ended after 3 bytes
919
+ if (0x80 == (0xc0 & s[3])) {
920
+ return (void *)s;
921
+ }
922
+
923
+ // ensure that the top 5 bits of this 3-byte utf8
924
+ // codepoint were not 0, as then we could have used
925
+ // one of the smaller encodings
926
+ if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
927
+ return (void *)s;
928
+ }
929
+
930
+ // 3-byte utf8 code point (began with 0b1110xxxx)
931
+ s += 3;
932
+ } else if (0xc0 == (0xe0 & *s)) {
933
+ // ensure the 1 following byte in this 2-byte
934
+ // utf8 codepoint began with 0b10xxxxxx
935
+ if (0x80 != (0xc0 & s[1])) {
936
+ return (void *)s;
937
+ }
938
+
939
+ // ensure that our utf8 codepoint ended after 2 bytes
940
+ if (0x80 == (0xc0 & s[2])) {
941
+ return (void *)s;
942
+ }
943
+
944
+ // ensure that the top 4 bits of this 2-byte utf8
945
+ // codepoint were not 0, as then we could have used
946
+ // one of the smaller encodings
947
+ if (0 == (0x1e & s[0])) {
948
+ return (void *)s;
949
+ }
950
+
951
+ // 2-byte utf8 code point (began with 0b110xxxxx)
952
+ s += 2;
953
+ } else if (0x00 == (0x80 & *s)) {
954
+ // 1-byte ascii (began with 0b0xxxxxxx)
955
+ s += 1;
956
+ } else {
957
+ // we have an invalid 0b1xxxxxxx utf8 code point entry
958
+ return (void *)s;
959
+ }
960
+ }
961
+
962
+ return utf8_null;
963
+ }
964
+
965
+ void *utf8codepoint(const void *utf8_restrict str,
966
+ utf8_int32_t *utf8_restrict out_codepoint) {
967
+ const char *s = (const char *)str;
968
+
969
+ if (0xf0 == (0xf8 & s[0])) {
970
+ // 4 byte utf8 codepoint
971
+ *out_codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) |
972
+ ((0x3f & s[2]) << 6) | (0x3f & s[3]);
973
+ s += 4;
974
+ } else if (0xe0 == (0xf0 & s[0])) {
975
+ // 3 byte utf8 codepoint
976
+ *out_codepoint =
977
+ ((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]);
978
+ s += 3;
979
+ } else if (0xc0 == (0xe0 & s[0])) {
980
+ // 2 byte utf8 codepoint
981
+ *out_codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]);
982
+ s += 2;
983
+ } else {
984
+ // 1 byte utf8 codepoint otherwise
985
+ *out_codepoint = s[0];
986
+ s += 1;
987
+ }
988
+
989
+ return (void *)s;
990
+ }
991
+
992
+ size_t utf8codepointsize(utf8_int32_t chr) {
993
+ if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
994
+ return 1;
995
+ } else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
996
+ return 2;
997
+ } else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
998
+ return 3;
999
+ } else { // if (0 == ((int)0xffe00000 & chr)) {
1000
+ return 4;
1001
+ }
1002
+ }
1003
+
1004
+ void *utf8catcodepoint(void *utf8_restrict str, utf8_int32_t chr, size_t n) {
1005
+ char *s = (char *)str;
1006
+
1007
+ if (0 == ((utf8_int32_t)0xffffff80 & chr)) {
1008
+ // 1-byte/7-bit ascii
1009
+ // (0b0xxxxxxx)
1010
+ if (n < 1) {
1011
+ return utf8_null;
1012
+ }
1013
+ s[0] = (char)chr;
1014
+ s += 1;
1015
+ } else if (0 == ((utf8_int32_t)0xfffff800 & chr)) {
1016
+ // 2-byte/11-bit utf8 code point
1017
+ // (0b110xxxxx 0b10xxxxxx)
1018
+ if (n < 2) {
1019
+ return utf8_null;
1020
+ }
1021
+ s[0] = 0xc0 | (char)(chr >> 6);
1022
+ s[1] = 0x80 | (char)(chr & 0x3f);
1023
+ s += 2;
1024
+ } else if (0 == ((utf8_int32_t)0xffff0000 & chr)) {
1025
+ // 3-byte/16-bit utf8 code point
1026
+ // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx)
1027
+ if (n < 3) {
1028
+ return utf8_null;
1029
+ }
1030
+ s[0] = 0xe0 | (char)(chr >> 12);
1031
+ s[1] = 0x80 | (char)((chr >> 6) & 0x3f);
1032
+ s[2] = 0x80 | (char)(chr & 0x3f);
1033
+ s += 3;
1034
+ } else { // if (0 == ((int)0xffe00000 & chr)) {
1035
+ // 4-byte/21-bit utf8 code point
1036
+ // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx)
1037
+ if (n < 4) {
1038
+ return utf8_null;
1039
+ }
1040
+ s[0] = 0xf0 | (char)(chr >> 18);
1041
+ s[1] = 0x80 | (char)((chr >> 12) & 0x3f);
1042
+ s[2] = 0x80 | (char)((chr >> 6) & 0x3f);
1043
+ s[3] = 0x80 | (char)(chr & 0x3f);
1044
+ s += 4;
1045
+ }
1046
+
1047
+ return s;
1048
+ }
1049
+
1050
+ int utf8islower(utf8_int32_t chr) { return chr != utf8uprcodepoint(chr); }
1051
+
1052
+ int utf8isupper(utf8_int32_t chr) { return chr != utf8lwrcodepoint(chr); }
1053
+
1054
+ void utf8lwr(void *utf8_restrict str) {
1055
+ void *p, *pn;
1056
+ utf8_int32_t cp;
1057
+
1058
+ p = (char *)str;
1059
+ pn = utf8codepoint(p, &cp);
1060
+
1061
+ while (cp != 0) {
1062
+ const utf8_int32_t lwr_cp = utf8lwrcodepoint(cp);
1063
+ const size_t size = utf8codepointsize(lwr_cp);
1064
+
1065
+ if (lwr_cp != cp) {
1066
+ utf8catcodepoint(p, lwr_cp, size);
1067
+ }
1068
+
1069
+ p = pn;
1070
+ pn = utf8codepoint(p, &cp);
1071
+ }
1072
+ }
1073
+
1074
+ void utf8upr(void *utf8_restrict str) {
1075
+ void *p, *pn;
1076
+ utf8_int32_t cp;
1077
+
1078
+ p = (char *)str;
1079
+ pn = utf8codepoint(p, &cp);
1080
+
1081
+ while (cp != 0) {
1082
+ const utf8_int32_t lwr_cp = utf8uprcodepoint(cp);
1083
+ const size_t size = utf8codepointsize(lwr_cp);
1084
+
1085
+ if (lwr_cp != cp) {
1086
+ utf8catcodepoint(p, lwr_cp, size);
1087
+ }
1088
+
1089
+ p = pn;
1090
+ pn = utf8codepoint(p, &cp);
1091
+ }
1092
+ }
1093
+
1094
+ utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) {
1095
+ if (((0x0041 <= cp) && (0x005a >= cp)) ||
1096
+ ((0x00c0 <= cp) && (0x00d6 >= cp)) ||
1097
+ ((0x00d8 <= cp) && (0x00de >= cp)) ||
1098
+ ((0x0391 <= cp) && (0x03a1 >= cp)) ||
1099
+ ((0x03a3 <= cp) && (0x03ab >= cp))) {
1100
+ cp += 32;
1101
+ } else if (((0x0100 <= cp) && (0x012f >= cp)) ||
1102
+ ((0x0132 <= cp) && (0x0137 >= cp)) ||
1103
+ ((0x014a <= cp) && (0x0177 >= cp)) ||
1104
+ ((0x0182 <= cp) && (0x0185 >= cp)) ||
1105
+ ((0x01a0 <= cp) && (0x01a5 >= cp)) ||
1106
+ ((0x01de <= cp) && (0x01ef >= cp)) ||
1107
+ ((0x01f8 <= cp) && (0x021f >= cp)) ||
1108
+ ((0x0222 <= cp) && (0x0233 >= cp)) ||
1109
+ ((0x0246 <= cp) && (0x024f >= cp)) ||
1110
+ ((0x03d8 <= cp) && (0x03ef >= cp))) {
1111
+ cp |= 0x1;
1112
+ } else if (((0x0139 <= cp) && (0x0148 >= cp)) ||
1113
+ ((0x0179 <= cp) && (0x017e >= cp)) ||
1114
+ ((0x01af <= cp) && (0x01b0 >= cp)) ||
1115
+ ((0x01b3 <= cp) && (0x01b6 >= cp)) ||
1116
+ ((0x01cd <= cp) && (0x01dc >= cp))) {
1117
+ cp += 1;
1118
+ cp &= ~0x1;
1119
+ } else {
1120
+ switch (cp) {
1121
+ default: break;
1122
+ case 0x0178: cp = 0x00ff; break;
1123
+ case 0x0243: cp = 0x0180; break;
1124
+ case 0x018e: cp = 0x01dd; break;
1125
+ case 0x023d: cp = 0x019a; break;
1126
+ case 0x0220: cp = 0x019e; break;
1127
+ case 0x01b7: cp = 0x0292; break;
1128
+ case 0x01c4: cp = 0x01c6; break;
1129
+ case 0x01c7: cp = 0x01c9; break;
1130
+ case 0x01ca: cp = 0x01cc; break;
1131
+ case 0x01f1: cp = 0x01f3; break;
1132
+ case 0x01f7: cp = 0x01bf; break;
1133
+ case 0x0187: cp = 0x0188; break;
1134
+ case 0x018b: cp = 0x018c; break;
1135
+ case 0x0191: cp = 0x0192; break;
1136
+ case 0x0198: cp = 0x0199; break;
1137
+ case 0x01a7: cp = 0x01a8; break;
1138
+ case 0x01ac: cp = 0x01ad; break;
1139
+ case 0x01af: cp = 0x01b0; break;
1140
+ case 0x01b8: cp = 0x01b9; break;
1141
+ case 0x01bc: cp = 0x01bd; break;
1142
+ case 0x01f4: cp = 0x01f5; break;
1143
+ case 0x023b: cp = 0x023c; break;
1144
+ case 0x0241: cp = 0x0242; break;
1145
+ case 0x03fd: cp = 0x037b; break;
1146
+ case 0x03fe: cp = 0x037c; break;
1147
+ case 0x03ff: cp = 0x037d; break;
1148
+ case 0x037f: cp = 0x03f3; break;
1149
+ case 0x0386: cp = 0x03ac; break;
1150
+ case 0x0388: cp = 0x03ad; break;
1151
+ case 0x0389: cp = 0x03ae; break;
1152
+ case 0x038a: cp = 0x03af; break;
1153
+ case 0x038c: cp = 0x03cc; break;
1154
+ case 0x038e: cp = 0x03cd; break;
1155
+ case 0x038f: cp = 0x03ce; break;
1156
+ case 0x0370: cp = 0x0371; break;
1157
+ case 0x0372: cp = 0x0373; break;
1158
+ case 0x0376: cp = 0x0377; break;
1159
+ case 0x03f4: cp = 0x03d1; break;
1160
+ case 0x03cf: cp = 0x03d7; break;
1161
+ case 0x03f9: cp = 0x03f2; break;
1162
+ case 0x03f7: cp = 0x03f8; break;
1163
+ case 0x03fa: cp = 0x03fb; break;
1164
+ };
1165
+ }
1166
+
1167
+ return cp;
1168
+ }
1169
+
1170
+ utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) {
1171
+ if (((0x0061 <= cp) && (0x007a >= cp)) ||
1172
+ ((0x00e0 <= cp) && (0x00f6 >= cp)) ||
1173
+ ((0x00f8 <= cp) && (0x00fe >= cp)) ||
1174
+ ((0x03b1 <= cp) && (0x03c1 >= cp)) ||
1175
+ ((0x03c3 <= cp) && (0x03cb >= cp))) {
1176
+ cp -= 32;
1177
+ } else if (((0x0100 <= cp) && (0x012f >= cp)) ||
1178
+ ((0x0132 <= cp) && (0x0137 >= cp)) ||
1179
+ ((0x014a <= cp) && (0x0177 >= cp)) ||
1180
+ ((0x0182 <= cp) && (0x0185 >= cp)) ||
1181
+ ((0x01a0 <= cp) && (0x01a5 >= cp)) ||
1182
+ ((0x01de <= cp) && (0x01ef >= cp)) ||
1183
+ ((0x01f8 <= cp) && (0x021f >= cp)) ||
1184
+ ((0x0222 <= cp) && (0x0233 >= cp)) ||
1185
+ ((0x0246 <= cp) && (0x024f >= cp)) ||
1186
+ ((0x03d8 <= cp) && (0x03ef >= cp))) {
1187
+ cp &= ~0x1;
1188
+ } else if (((0x0139 <= cp) && (0x0148 >= cp)) ||
1189
+ ((0x0179 <= cp) && (0x017e >= cp)) ||
1190
+ ((0x01af <= cp) && (0x01b0 >= cp)) ||
1191
+ ((0x01b3 <= cp) && (0x01b6 >= cp)) ||
1192
+ ((0x01cd <= cp) && (0x01dc >= cp))) {
1193
+ cp -= 1;
1194
+ cp |= 0x1;
1195
+ } else {
1196
+ switch (cp) {
1197
+ default: break;
1198
+ case 0x00ff: cp = 0x0178; break;
1199
+ case 0x0180: cp = 0x0243; break;
1200
+ case 0x01dd: cp = 0x018e; break;
1201
+ case 0x019a: cp = 0x023d; break;
1202
+ case 0x019e: cp = 0x0220; break;
1203
+ case 0x0292: cp = 0x01b7; break;
1204
+ case 0x01c6: cp = 0x01c4; break;
1205
+ case 0x01c9: cp = 0x01c7; break;
1206
+ case 0x01cc: cp = 0x01ca; break;
1207
+ case 0x01f3: cp = 0x01f1; break;
1208
+ case 0x01bf: cp = 0x01f7; break;
1209
+ case 0x0188: cp = 0x0187; break;
1210
+ case 0x018c: cp = 0x018b; break;
1211
+ case 0x0192: cp = 0x0191; break;
1212
+ case 0x0199: cp = 0x0198; break;
1213
+ case 0x01a8: cp = 0x01a7; break;
1214
+ case 0x01ad: cp = 0x01ac; break;
1215
+ case 0x01b0: cp = 0x01af; break;
1216
+ case 0x01b9: cp = 0x01b8; break;
1217
+ case 0x01bd: cp = 0x01bc; break;
1218
+ case 0x01f5: cp = 0x01f4; break;
1219
+ case 0x023c: cp = 0x023b; break;
1220
+ case 0x0242: cp = 0x0241; break;
1221
+ case 0x037b: cp = 0x03fd; break;
1222
+ case 0x037c: cp = 0x03fe; break;
1223
+ case 0x037d: cp = 0x03ff; break;
1224
+ case 0x03f3: cp = 0x037f; break;
1225
+ case 0x03ac: cp = 0x0386; break;
1226
+ case 0x03ad: cp = 0x0388; break;
1227
+ case 0x03ae: cp = 0x0389; break;
1228
+ case 0x03af: cp = 0x038a; break;
1229
+ case 0x03cc: cp = 0x038c; break;
1230
+ case 0x03cd: cp = 0x038e; break;
1231
+ case 0x03ce: cp = 0x038f; break;
1232
+ case 0x0371: cp = 0x0370; break;
1233
+ case 0x0373: cp = 0x0372; break;
1234
+ case 0x0377: cp = 0x0376; break;
1235
+ case 0x03d1: cp = 0x03f4; break;
1236
+ case 0x03d7: cp = 0x03cf; break;
1237
+ case 0x03f2: cp = 0x03f9; break;
1238
+ case 0x03f8: cp = 0x03f7; break;
1239
+ case 0x03fb: cp = 0x03fa; break;
1240
+ };
1241
+ }
1242
+
1243
+ return cp;
1244
+ }
1245
+
1246
+ #undef utf8_restrict
1247
+ #undef utf8_null
1248
+
1249
+ #ifdef __cplusplus
1250
+ } // extern "C"
1251
+ #endif
1252
+
1253
+ #if defined(__clang__)
1254
+ #pragma clang diagnostic pop
1255
+ #endif
1256
+
1257
+ #endif // SHEREDOM_UTF8_H_INCLUDED