react-native-quick-crypto 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (815) hide show
  1. package/QuickCrypto.podspec +1 -0
  2. package/android/CMakeLists.txt +4 -0
  3. package/android/build.gradle +5 -1
  4. package/cpp/argon2/HybridArgon2.cpp +10 -3
  5. package/cpp/blake3/HybridBlake3.cpp +5 -3
  6. package/cpp/cipher/CCMCipher.cpp +35 -26
  7. package/cpp/cipher/CCMCipher.hpp +2 -4
  8. package/cpp/cipher/ChaCha20Cipher.cpp +19 -27
  9. package/cpp/cipher/ChaCha20Cipher.hpp +2 -4
  10. package/cpp/cipher/ChaCha20Poly1305Cipher.cpp +41 -36
  11. package/cpp/cipher/ChaCha20Poly1305Cipher.hpp +2 -4
  12. package/cpp/cipher/GCMCipher.cpp +17 -20
  13. package/cpp/cipher/HybridCipher.cpp +44 -47
  14. package/cpp/cipher/HybridCipher.hpp +17 -1
  15. package/cpp/cipher/HybridRsaCipher.cpp +93 -56
  16. package/cpp/cipher/OCBCipher.cpp +5 -5
  17. package/cpp/cipher/XChaCha20Poly1305Cipher.cpp +27 -32
  18. package/cpp/cipher/XSalsa20Cipher.cpp +78 -16
  19. package/cpp/cipher/XSalsa20Cipher.hpp +25 -3
  20. package/cpp/cipher/XSalsa20Poly1305Cipher.cpp +32 -41
  21. package/cpp/dh/HybridDiffieHellman.cpp +29 -0
  22. package/cpp/ec/HybridEcKeyPair.cpp +35 -33
  23. package/cpp/ec/HybridEcKeyPair.hpp +3 -7
  24. package/cpp/ecdh/HybridECDH.cpp +23 -0
  25. package/cpp/ed25519/HybridEdKeyPair.cpp +73 -117
  26. package/cpp/ed25519/HybridEdKeyPair.hpp +5 -9
  27. package/cpp/hash/HybridHash.cpp +5 -7
  28. package/cpp/hkdf/HybridHkdf.cpp +6 -4
  29. package/cpp/hmac/HybridHmac.cpp +4 -6
  30. package/cpp/keys/HybridKeyObjectHandle.cpp +630 -2
  31. package/cpp/keys/HybridKeyObjectHandle.hpp +21 -1
  32. package/cpp/kmac/HybridKmac.cpp +4 -4
  33. package/cpp/mldsa/HybridMlDsaKeyPair.cpp +37 -49
  34. package/cpp/mlkem/HybridMlKemKeyPair.cpp +39 -43
  35. package/cpp/pbkdf2/HybridPbkdf2.cpp +7 -8
  36. package/cpp/rsa/HybridRsaKeyPair.cpp +5 -8
  37. package/cpp/rsa/HybridRsaKeyPair.hpp +4 -7
  38. package/cpp/scrypt/HybridScrypt.cpp +6 -4
  39. package/cpp/sign/HybridSignHandle.cpp +47 -72
  40. package/cpp/sign/HybridVerifyHandle.cpp +47 -67
  41. package/cpp/slhdsa/HybridSlhDsaKeyPair.cpp +245 -0
  42. package/cpp/slhdsa/HybridSlhDsaKeyPair.hpp +48 -0
  43. package/cpp/turboshake/HybridTurboShake.cpp +379 -0
  44. package/cpp/turboshake/HybridTurboShake.hpp +28 -0
  45. package/cpp/utils/HybridUtils.cpp +195 -43
  46. package/cpp/utils/HybridUtils.hpp +9 -2
  47. package/cpp/utils/QuickCryptoUtils.hpp +72 -0
  48. package/deps/blake3/README.md +6 -7
  49. package/deps/blake3/c/blake3.c +3 -2
  50. package/deps/blake3/c/blake3.h +2 -2
  51. package/deps/blake3/c/blake3_dispatch.c +2 -2
  52. package/deps/blake3/c/blake3_impl.h +1 -1
  53. package/deps/blake3/c/blake3_neon.c +5 -4
  54. package/deps/ncrypto/include/ncrypto/version.h +2 -2
  55. package/deps/ncrypto/include/ncrypto.h +9 -2
  56. package/deps/ncrypto/src/ncrypto.cpp +130 -35
  57. package/lib/commonjs/argon2.js +51 -2
  58. package/lib/commonjs/argon2.js.map +1 -1
  59. package/lib/commonjs/cipher.js +109 -11
  60. package/lib/commonjs/cipher.js.map +1 -1
  61. package/lib/commonjs/dhKeyPair.js +3 -0
  62. package/lib/commonjs/dhKeyPair.js.map +1 -1
  63. package/lib/commonjs/dsa.js +11 -2
  64. package/lib/commonjs/dsa.js.map +1 -1
  65. package/lib/commonjs/ec.js +37 -30
  66. package/lib/commonjs/ec.js.map +1 -1
  67. package/lib/commonjs/ed.js +60 -6
  68. package/lib/commonjs/ed.js.map +1 -1
  69. package/lib/commonjs/hash.js +67 -10
  70. package/lib/commonjs/hash.js.map +1 -1
  71. package/lib/commonjs/hkdf.js +33 -6
  72. package/lib/commonjs/hkdf.js.map +1 -1
  73. package/lib/commonjs/hmac.js +15 -5
  74. package/lib/commonjs/hmac.js.map +1 -1
  75. package/lib/commonjs/keys/classes.js +33 -7
  76. package/lib/commonjs/keys/classes.js.map +1 -1
  77. package/lib/commonjs/keys/generateKeyPair.js +85 -4
  78. package/lib/commonjs/keys/generateKeyPair.js.map +1 -1
  79. package/lib/commonjs/keys/index.js +50 -2
  80. package/lib/commonjs/keys/index.js.map +1 -1
  81. package/lib/commonjs/keys/publicCipher.js +10 -4
  82. package/lib/commonjs/keys/publicCipher.js.map +1 -1
  83. package/lib/commonjs/keys/signVerify.js +9 -2
  84. package/lib/commonjs/keys/signVerify.js.map +1 -1
  85. package/lib/commonjs/keys/utils.js +59 -1
  86. package/lib/commonjs/keys/utils.js.map +1 -1
  87. package/lib/commonjs/random.js +74 -11
  88. package/lib/commonjs/random.js.map +1 -1
  89. package/lib/commonjs/rsa.js +15 -5
  90. package/lib/commonjs/rsa.js.map +1 -1
  91. package/lib/commonjs/scrypt.js +47 -6
  92. package/lib/commonjs/scrypt.js.map +1 -1
  93. package/lib/commonjs/slhdsa.js +70 -0
  94. package/lib/commonjs/slhdsa.js.map +1 -0
  95. package/lib/commonjs/specs/slhDsaKeyPair.nitro.js +6 -0
  96. package/lib/commonjs/specs/slhDsaKeyPair.nitro.js.map +1 -0
  97. package/lib/commonjs/specs/turboshake.nitro.js +6 -0
  98. package/lib/commonjs/specs/turboshake.nitro.js.map +1 -0
  99. package/lib/commonjs/subtle.js +975 -253
  100. package/lib/commonjs/subtle.js.map +1 -1
  101. package/lib/commonjs/utils/cipher.js +18 -7
  102. package/lib/commonjs/utils/cipher.js.map +1 -1
  103. package/lib/commonjs/utils/conversion.js +86 -28
  104. package/lib/commonjs/utils/conversion.js.map +1 -1
  105. package/lib/commonjs/utils/errors.js +63 -4
  106. package/lib/commonjs/utils/errors.js.map +1 -1
  107. package/lib/commonjs/utils/timingSafeEqual.js +7 -2
  108. package/lib/commonjs/utils/timingSafeEqual.js.map +1 -1
  109. package/lib/commonjs/utils/types.js.map +1 -1
  110. package/lib/commonjs/utils/validation.js +46 -0
  111. package/lib/commonjs/utils/validation.js.map +1 -1
  112. package/lib/commonjs/x509certificate.js +6 -6
  113. package/lib/commonjs/x509certificate.js.map +1 -1
  114. package/lib/module/argon2.js +51 -2
  115. package/lib/module/argon2.js.map +1 -1
  116. package/lib/module/cipher.js +109 -11
  117. package/lib/module/cipher.js.map +1 -1
  118. package/lib/module/dhKeyPair.js +3 -0
  119. package/lib/module/dhKeyPair.js.map +1 -1
  120. package/lib/module/dsa.js +11 -2
  121. package/lib/module/dsa.js.map +1 -1
  122. package/lib/module/ec.js +38 -31
  123. package/lib/module/ec.js.map +1 -1
  124. package/lib/module/ed.js +61 -7
  125. package/lib/module/ed.js.map +1 -1
  126. package/lib/module/hash.js +67 -10
  127. package/lib/module/hash.js.map +1 -1
  128. package/lib/module/hkdf.js +33 -6
  129. package/lib/module/hkdf.js.map +1 -1
  130. package/lib/module/hmac.js +15 -5
  131. package/lib/module/hmac.js.map +1 -1
  132. package/lib/module/keys/classes.js +31 -5
  133. package/lib/module/keys/classes.js.map +1 -1
  134. package/lib/module/keys/generateKeyPair.js +86 -5
  135. package/lib/module/keys/generateKeyPair.js.map +1 -1
  136. package/lib/module/keys/index.js +50 -2
  137. package/lib/module/keys/index.js.map +1 -1
  138. package/lib/module/keys/publicCipher.js +10 -4
  139. package/lib/module/keys/publicCipher.js.map +1 -1
  140. package/lib/module/keys/signVerify.js +9 -2
  141. package/lib/module/keys/signVerify.js.map +1 -1
  142. package/lib/module/keys/utils.js +57 -1
  143. package/lib/module/keys/utils.js.map +1 -1
  144. package/lib/module/random.js +74 -12
  145. package/lib/module/random.js.map +1 -1
  146. package/lib/module/rsa.js +14 -4
  147. package/lib/module/rsa.js.map +1 -1
  148. package/lib/module/scrypt.js +47 -6
  149. package/lib/module/scrypt.js.map +1 -1
  150. package/lib/module/slhdsa.js +64 -0
  151. package/lib/module/slhdsa.js.map +1 -0
  152. package/lib/module/specs/slhDsaKeyPair.nitro.js +4 -0
  153. package/lib/module/specs/slhDsaKeyPair.nitro.js.map +1 -0
  154. package/lib/module/specs/turboshake.nitro.js +4 -0
  155. package/lib/module/specs/turboshake.nitro.js.map +1 -0
  156. package/lib/module/subtle.js +976 -254
  157. package/lib/module/subtle.js.map +1 -1
  158. package/lib/module/utils/cipher.js +18 -7
  159. package/lib/module/utils/cipher.js.map +1 -1
  160. package/lib/module/utils/conversion.js +84 -28
  161. package/lib/module/utils/conversion.js.map +1 -1
  162. package/lib/module/utils/errors.js +61 -4
  163. package/lib/module/utils/errors.js.map +1 -1
  164. package/lib/module/utils/timingSafeEqual.js +8 -3
  165. package/lib/module/utils/timingSafeEqual.js.map +1 -1
  166. package/lib/module/utils/types.js.map +1 -1
  167. package/lib/module/utils/validation.js +44 -0
  168. package/lib/module/utils/validation.js.map +1 -1
  169. package/lib/module/x509certificate.js +6 -6
  170. package/lib/module/x509certificate.js.map +1 -1
  171. package/lib/typescript/argon2.d.ts.map +1 -1
  172. package/lib/typescript/cipher.d.ts +2 -2
  173. package/lib/typescript/cipher.d.ts.map +1 -1
  174. package/lib/typescript/dhKeyPair.d.ts.map +1 -1
  175. package/lib/typescript/dsa.d.ts.map +1 -1
  176. package/lib/typescript/ec.d.ts.map +1 -1
  177. package/lib/typescript/ed.d.ts.map +1 -1
  178. package/lib/typescript/hash.d.ts +2 -2
  179. package/lib/typescript/hash.d.ts.map +1 -1
  180. package/lib/typescript/hkdf.d.ts.map +1 -1
  181. package/lib/typescript/hmac.d.ts +2 -2
  182. package/lib/typescript/hmac.d.ts.map +1 -1
  183. package/lib/typescript/index.d.ts +13 -8
  184. package/lib/typescript/index.d.ts.map +1 -1
  185. package/lib/typescript/keys/classes.d.ts +10 -1
  186. package/lib/typescript/keys/classes.d.ts.map +1 -1
  187. package/lib/typescript/keys/generateKeyPair.d.ts +12 -1
  188. package/lib/typescript/keys/generateKeyPair.d.ts.map +1 -1
  189. package/lib/typescript/keys/index.d.ts +3 -1
  190. package/lib/typescript/keys/index.d.ts.map +1 -1
  191. package/lib/typescript/keys/publicCipher.d.ts.map +1 -1
  192. package/lib/typescript/keys/signVerify.d.ts.map +1 -1
  193. package/lib/typescript/keys/utils.d.ts +21 -4
  194. package/lib/typescript/keys/utils.d.ts.map +1 -1
  195. package/lib/typescript/random.d.ts +5 -1
  196. package/lib/typescript/random.d.ts.map +1 -1
  197. package/lib/typescript/rsa.d.ts.map +1 -1
  198. package/lib/typescript/scrypt.d.ts.map +1 -1
  199. package/lib/typescript/slhdsa.d.ts +19 -0
  200. package/lib/typescript/slhdsa.d.ts.map +1 -0
  201. package/lib/typescript/specs/keyObjectHandle.nitro.d.ts +9 -0
  202. package/lib/typescript/specs/keyObjectHandle.nitro.d.ts.map +1 -1
  203. package/lib/typescript/specs/slhDsaKeyPair.nitro.d.ts +16 -0
  204. package/lib/typescript/specs/slhDsaKeyPair.nitro.d.ts.map +1 -0
  205. package/lib/typescript/specs/turboshake.nitro.d.ts +11 -0
  206. package/lib/typescript/specs/turboshake.nitro.d.ts.map +1 -0
  207. package/lib/typescript/specs/utils.nitro.d.ts +0 -2
  208. package/lib/typescript/specs/utils.nitro.d.ts.map +1 -1
  209. package/lib/typescript/subtle.d.ts +3 -2
  210. package/lib/typescript/subtle.d.ts.map +1 -1
  211. package/lib/typescript/utils/cipher.d.ts +13 -1
  212. package/lib/typescript/utils/cipher.d.ts.map +1 -1
  213. package/lib/typescript/utils/conversion.d.ts +13 -9
  214. package/lib/typescript/utils/conversion.d.ts.map +1 -1
  215. package/lib/typescript/utils/errors.d.ts +12 -0
  216. package/lib/typescript/utils/errors.d.ts.map +1 -1
  217. package/lib/typescript/utils/timingSafeEqual.d.ts.map +1 -1
  218. package/lib/typescript/utils/types.d.ts +32 -15
  219. package/lib/typescript/utils/types.d.ts.map +1 -1
  220. package/lib/typescript/utils/validation.d.ts +3 -1
  221. package/lib/typescript/utils/validation.d.ts.map +1 -1
  222. package/lib/typescript/x509certificate.d.ts.map +1 -1
  223. package/nitrogen/generated/android/QuickCrypto+autolinking.cmake +2 -0
  224. package/nitrogen/generated/android/QuickCryptoOnLoad.cpp +20 -0
  225. package/nitrogen/generated/ios/QuickCryptoAutolinking.mm +20 -0
  226. package/nitrogen/generated/shared/c++/AsymmetricKeyType.hpp +48 -0
  227. package/nitrogen/generated/shared/c++/HybridKeyObjectHandleSpec.cpp +9 -0
  228. package/nitrogen/generated/shared/c++/HybridKeyObjectHandleSpec.hpp +9 -0
  229. package/nitrogen/generated/shared/c++/HybridSlhDsaKeyPairSpec.cpp +29 -0
  230. package/nitrogen/generated/shared/c++/HybridSlhDsaKeyPairSpec.hpp +72 -0
  231. package/nitrogen/generated/shared/c++/HybridTurboShakeSpec.cpp +22 -0
  232. package/nitrogen/generated/shared/c++/HybridTurboShakeSpec.hpp +70 -0
  233. package/nitrogen/generated/shared/c++/HybridUtilsSpec.cpp +0 -2
  234. package/nitrogen/generated/shared/c++/HybridUtilsSpec.hpp +0 -3
  235. package/nitrogen/generated/shared/c++/JWK.hpp +9 -1
  236. package/nitrogen/generated/shared/c++/JWKkty.hpp +4 -0
  237. package/nitrogen/generated/shared/c++/KangarooTwelveVariant.hpp +76 -0
  238. package/nitrogen/generated/shared/c++/TurboShakeVariant.hpp +76 -0
  239. package/package.json +38 -7
  240. package/src/argon2.ts +80 -2
  241. package/src/cipher.ts +139 -15
  242. package/src/dhKeyPair.ts +8 -0
  243. package/src/dsa.ts +19 -2
  244. package/src/ec.ts +52 -29
  245. package/src/ed.ts +95 -16
  246. package/src/hash.ts +125 -12
  247. package/src/hkdf.ts +44 -6
  248. package/src/hmac.ts +17 -7
  249. package/src/keys/classes.ts +46 -5
  250. package/src/keys/generateKeyPair.ts +151 -5
  251. package/src/keys/index.ts +73 -3
  252. package/src/keys/publicCipher.ts +10 -4
  253. package/src/keys/signVerify.ts +13 -2
  254. package/src/keys/utils.ts +78 -5
  255. package/src/random.ts +104 -11
  256. package/src/rsa.ts +26 -4
  257. package/src/scrypt.ts +73 -6
  258. package/src/slhdsa.ts +146 -0
  259. package/src/specs/keyObjectHandle.nitro.ts +17 -0
  260. package/src/specs/slhDsaKeyPair.nitro.ts +29 -0
  261. package/src/specs/turboshake.nitro.ts +21 -0
  262. package/src/specs/utils.nitro.ts +0 -2
  263. package/src/subtle.ts +1246 -333
  264. package/src/utils/cipher.ts +30 -8
  265. package/src/utils/conversion.ts +129 -40
  266. package/src/utils/errors.ts +72 -4
  267. package/src/utils/timingSafeEqual.ts +8 -3
  268. package/src/utils/types.ts +80 -15
  269. package/src/utils/validation.ts +70 -1
  270. package/src/x509certificate.ts +5 -6
  271. package/deps/blake3/.cargo/config.toml +0 -2
  272. package/deps/blake3/.git-blame-ignore-revs +0 -2
  273. package/deps/blake3/.github/workflows/build_b3sum.py +0 -38
  274. package/deps/blake3/.github/workflows/ci.yml +0 -491
  275. package/deps/blake3/.github/workflows/tag.yml +0 -43
  276. package/deps/blake3/.github/workflows/upload_github_release_asset.py +0 -73
  277. package/deps/blake3/CONTRIBUTING.md +0 -31
  278. package/deps/blake3/Cargo.toml +0 -135
  279. package/deps/blake3/b3sum/Cargo.lock +0 -513
  280. package/deps/blake3/b3sum/Cargo.toml +0 -26
  281. package/deps/blake3/b3sum/README.md +0 -72
  282. package/deps/blake3/b3sum/src/main.rs +0 -564
  283. package/deps/blake3/b3sum/src/unit_tests.rs +0 -235
  284. package/deps/blake3/b3sum/tests/cli_tests.rs +0 -680
  285. package/deps/blake3/b3sum/what_does_check_do.md +0 -176
  286. package/deps/blake3/benches/bench.rs +0 -623
  287. package/deps/blake3/build.rs +0 -389
  288. package/deps/blake3/c/CMakeLists.txt +0 -383
  289. package/deps/blake3/c/CMakePresets.json +0 -73
  290. package/deps/blake3/c/Makefile.testing +0 -82
  291. package/deps/blake3/c/blake3-config.cmake.in +0 -14
  292. package/deps/blake3/c/blake3_avx2.c +0 -326
  293. package/deps/blake3/c/blake3_avx2_x86-64_unix.S +0 -1815
  294. package/deps/blake3/c/blake3_avx2_x86-64_windows_gnu.S +0 -1817
  295. package/deps/blake3/c/blake3_avx2_x86-64_windows_msvc.asm +0 -1828
  296. package/deps/blake3/c/blake3_avx512.c +0 -1388
  297. package/deps/blake3/c/blake3_avx512_x86-64_unix.S +0 -4824
  298. package/deps/blake3/c/blake3_avx512_x86-64_windows_gnu.S +0 -2615
  299. package/deps/blake3/c/blake3_avx512_x86-64_windows_msvc.asm +0 -2634
  300. package/deps/blake3/c/blake3_c_rust_bindings/Cargo.toml +0 -32
  301. package/deps/blake3/c/blake3_c_rust_bindings/README.md +0 -4
  302. package/deps/blake3/c/blake3_c_rust_bindings/benches/bench.rs +0 -477
  303. package/deps/blake3/c/blake3_c_rust_bindings/build.rs +0 -253
  304. package/deps/blake3/c/blake3_c_rust_bindings/cross_test.sh +0 -31
  305. package/deps/blake3/c/blake3_c_rust_bindings/src/lib.rs +0 -333
  306. package/deps/blake3/c/blake3_c_rust_bindings/src/test.rs +0 -696
  307. package/deps/blake3/c/blake3_sse2.c +0 -566
  308. package/deps/blake3/c/blake3_sse2_x86-64_unix.S +0 -2291
  309. package/deps/blake3/c/blake3_sse2_x86-64_windows_gnu.S +0 -2332
  310. package/deps/blake3/c/blake3_sse2_x86-64_windows_msvc.asm +0 -2350
  311. package/deps/blake3/c/blake3_sse41.c +0 -560
  312. package/deps/blake3/c/blake3_sse41_x86-64_unix.S +0 -2028
  313. package/deps/blake3/c/blake3_sse41_x86-64_windows_gnu.S +0 -2069
  314. package/deps/blake3/c/blake3_sse41_x86-64_windows_msvc.asm +0 -2089
  315. package/deps/blake3/c/blake3_tbb.cpp +0 -37
  316. package/deps/blake3/c/dependencies/CMakeLists.txt +0 -3
  317. package/deps/blake3/c/dependencies/tbb/CMakeLists.txt +0 -28
  318. package/deps/blake3/c/example.c +0 -36
  319. package/deps/blake3/c/example_tbb.c +0 -57
  320. package/deps/blake3/c/libblake3.pc.in +0 -12
  321. package/deps/blake3/c/main.c +0 -166
  322. package/deps/blake3/c/test.py +0 -97
  323. package/deps/blake3/media/B3.svg +0 -70
  324. package/deps/blake3/media/BLAKE3.svg +0 -85
  325. package/deps/blake3/media/speed.svg +0 -1474
  326. package/deps/blake3/reference_impl/Cargo.toml +0 -8
  327. package/deps/blake3/reference_impl/README.md +0 -14
  328. package/deps/blake3/reference_impl/reference_impl.rs +0 -374
  329. package/deps/blake3/src/ffi_avx2.rs +0 -65
  330. package/deps/blake3/src/ffi_avx512.rs +0 -169
  331. package/deps/blake3/src/ffi_neon.rs +0 -82
  332. package/deps/blake3/src/ffi_sse2.rs +0 -126
  333. package/deps/blake3/src/ffi_sse41.rs +0 -126
  334. package/deps/blake3/src/guts.rs +0 -60
  335. package/deps/blake3/src/hazmat.rs +0 -704
  336. package/deps/blake3/src/io.rs +0 -64
  337. package/deps/blake3/src/join.rs +0 -92
  338. package/deps/blake3/src/lib.rs +0 -1835
  339. package/deps/blake3/src/platform.rs +0 -587
  340. package/deps/blake3/src/portable.rs +0 -198
  341. package/deps/blake3/src/rust_avx2.rs +0 -474
  342. package/deps/blake3/src/rust_sse2.rs +0 -775
  343. package/deps/blake3/src/rust_sse41.rs +0 -766
  344. package/deps/blake3/src/test.rs +0 -1049
  345. package/deps/blake3/src/traits.rs +0 -227
  346. package/deps/blake3/src/wasm32_simd.rs +0 -794
  347. package/deps/blake3/test_vectors/Cargo.toml +0 -19
  348. package/deps/blake3/test_vectors/cross_test.sh +0 -25
  349. package/deps/blake3/test_vectors/src/bin/generate.rs +0 -4
  350. package/deps/blake3/test_vectors/src/lib.rs +0 -350
  351. package/deps/blake3/test_vectors/test_vectors.json +0 -217
  352. package/deps/blake3/tools/compiler_version/Cargo.toml +0 -7
  353. package/deps/blake3/tools/compiler_version/build.rs +0 -6
  354. package/deps/blake3/tools/compiler_version/src/main.rs +0 -27
  355. package/deps/blake3/tools/instruction_set_support/Cargo.toml +0 -6
  356. package/deps/blake3/tools/instruction_set_support/src/main.rs +0 -10
  357. package/deps/blake3/tools/release.md +0 -16
  358. package/deps/ncrypto/.bazelignore +0 -4
  359. package/deps/ncrypto/.bazelrc +0 -1
  360. package/deps/ncrypto/.bazelversion +0 -1
  361. package/deps/ncrypto/.clang-format +0 -111
  362. package/deps/ncrypto/.github/workflows/bazel.yml +0 -58
  363. package/deps/ncrypto/.github/workflows/commitlint.yml +0 -16
  364. package/deps/ncrypto/.github/workflows/linter.yml +0 -38
  365. package/deps/ncrypto/.github/workflows/macos.yml +0 -43
  366. package/deps/ncrypto/.github/workflows/release-please.yml +0 -16
  367. package/deps/ncrypto/.github/workflows/ubuntu.yml +0 -128
  368. package/deps/ncrypto/.github/workflows/visual-studio.yml +0 -49
  369. package/deps/ncrypto/.python-version +0 -1
  370. package/deps/ncrypto/.release-please-manifest.json +0 -3
  371. package/deps/ncrypto/BUILD.bazel +0 -44
  372. package/deps/ncrypto/CHANGELOG.md +0 -37
  373. package/deps/ncrypto/CMakeLists.txt +0 -79
  374. package/deps/ncrypto/MODULE.bazel +0 -16
  375. package/deps/ncrypto/MODULE.bazel.lock +0 -461
  376. package/deps/ncrypto/cmake/CPM.cmake +0 -1225
  377. package/deps/ncrypto/cmake/ncrypto-flags.cmake +0 -17
  378. package/deps/ncrypto/ncrypto.pc.in +0 -10
  379. package/deps/ncrypto/patches/0001-Expose-libdecrepit-so-NodeJS-can-use-it-for-ncrypto.patch +0 -28
  380. package/deps/ncrypto/pyproject.toml +0 -38
  381. package/deps/ncrypto/release-please-config.json +0 -11
  382. package/deps/ncrypto/src/CMakeLists.txt +0 -40
  383. package/deps/ncrypto/tests/BUILD.bazel +0 -11
  384. package/deps/ncrypto/tests/CMakeLists.txt +0 -7
  385. package/deps/ncrypto/tests/basic.cpp +0 -856
  386. package/deps/ncrypto/tools/run-clang-format.sh +0 -42
  387. package/deps/simdutf/.clang-format +0 -4
  388. package/deps/simdutf/.github/ISSUE_TEMPLATE/bug_report.md +0 -62
  389. package/deps/simdutf/.github/ISSUE_TEMPLATE/config.yml +0 -1
  390. package/deps/simdutf/.github/ISSUE_TEMPLATE/feature_request.md +0 -35
  391. package/deps/simdutf/.github/ISSUE_TEMPLATE/standard-issue-template.md +0 -29
  392. package/deps/simdutf/.github/pull_request_template.md +0 -51
  393. package/deps/simdutf/.github/workflows/aarch64.yml +0 -39
  394. package/deps/simdutf/.github/workflows/alpine.yml +0 -27
  395. package/deps/simdutf/.github/workflows/amalgamation_demos.yml +0 -34
  396. package/deps/simdutf/.github/workflows/armv7.yml +0 -32
  397. package/deps/simdutf/.github/workflows/atomic_fuzz.yml +0 -25
  398. package/deps/simdutf/.github/workflows/cifuzz.yml +0 -37
  399. package/deps/simdutf/.github/workflows/clangformat.yml +0 -36
  400. package/deps/simdutf/.github/workflows/debian-latestcxxstandards.yml +0 -40
  401. package/deps/simdutf/.github/workflows/debian.yml +0 -33
  402. package/deps/simdutf/.github/workflows/documentation.yml +0 -36
  403. package/deps/simdutf/.github/workflows/emscripten.yml +0 -19
  404. package/deps/simdutf/.github/workflows/loongarch64-gcc-14.2.yml +0 -39
  405. package/deps/simdutf/.github/workflows/macos-latest.yml +0 -29
  406. package/deps/simdutf/.github/workflows/msys2-clang.yml +0 -48
  407. package/deps/simdutf/.github/workflows/msys2.yml +0 -50
  408. package/deps/simdutf/.github/workflows/ppc64le.yml +0 -29
  409. package/deps/simdutf/.github/workflows/rvv-1024-clang-18.yml +0 -35
  410. package/deps/simdutf/.github/workflows/rvv-128-clang-17.yml +0 -35
  411. package/deps/simdutf/.github/workflows/rvv-256-gcc-14.yml +0 -31
  412. package/deps/simdutf/.github/workflows/s390x.yml +0 -29
  413. package/deps/simdutf/.github/workflows/selective-amalgamation.yml +0 -29
  414. package/deps/simdutf/.github/workflows/typos.yml +0 -19
  415. package/deps/simdutf/.github/workflows/ubuntu22-cxx20.yml +0 -30
  416. package/deps/simdutf/.github/workflows/ubuntu22.yml +0 -32
  417. package/deps/simdutf/.github/workflows/ubuntu22_gcc12.yml +0 -27
  418. package/deps/simdutf/.github/workflows/ubuntu22sani.yml +0 -29
  419. package/deps/simdutf/.github/workflows/ubuntu24-cxxstandards.yml +0 -34
  420. package/deps/simdutf/.github/workflows/ubuntu24-unsignedchar.yml +0 -34
  421. package/deps/simdutf/.github/workflows/ubuntu24.yml +0 -32
  422. package/deps/simdutf/.github/workflows/ubuntu24sani.yml +0 -36
  423. package/deps/simdutf/.github/workflows/ubuntu24sani_clang.yml +0 -29
  424. package/deps/simdutf/.github/workflows/vs17-arm-ci.yml +0 -21
  425. package/deps/simdutf/.github/workflows/vs17-ci-cxx20.yml +0 -41
  426. package/deps/simdutf/.github/workflows/vs17-ci.yml +0 -41
  427. package/deps/simdutf/.github/workflows/vs17-clang-ci.yml +0 -41
  428. package/deps/simdutf/.github/workflows/vs17-cxxstandards.yml +0 -36
  429. package/deps/simdutf/AI_USAGE_POLICY.md +0 -56
  430. package/deps/simdutf/AUTHORS +0 -6
  431. package/deps/simdutf/CMakeLists.txt +0 -231
  432. package/deps/simdutf/CONTRIBUTING.md +0 -214
  433. package/deps/simdutf/CONTRIBUTORS +0 -1
  434. package/deps/simdutf/Doxyfile +0 -2584
  435. package/deps/simdutf/Makefile.crosscompile +0 -54
  436. package/deps/simdutf/README-RVV.md +0 -16
  437. package/deps/simdutf/SECURITY.md +0 -8
  438. package/deps/simdutf/benchmarks/CMakeLists.txt +0 -101
  439. package/deps/simdutf/benchmarks/alignment.cpp +0 -150
  440. package/deps/simdutf/benchmarks/base64/CMakeLists.txt +0 -30
  441. package/deps/simdutf/benchmarks/base64/benchmark_base64.cpp +0 -875
  442. package/deps/simdutf/benchmarks/base64/libbase64_spaces.h +0 -49
  443. package/deps/simdutf/benchmarks/base64/node_base64.h +0 -227
  444. package/deps/simdutf/benchmarks/base64/openssl3_base64.h +0 -334
  445. package/deps/simdutf/benchmarks/benchmark.cpp +0 -65
  446. package/deps/simdutf/benchmarks/benchmark_to_well_formed_utf16.cpp +0 -347
  447. package/deps/simdutf/benchmarks/competition/.clang-format-ignore +0 -5
  448. package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.cpp +0 -1276
  449. package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.h +0 -595
  450. package/deps/simdutf/benchmarks/competition/README.md +0 -7
  451. package/deps/simdutf/benchmarks/competition/hoehrmann/hoehrmann.h +0 -91
  452. package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16.h +0 -444
  453. package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16_tables.h +0 -13183
  454. package/deps/simdutf/benchmarks/competition/inoue2008/script.py +0 -73
  455. package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.cpp +0 -738
  456. package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.h +0 -293
  457. package/deps/simdutf/benchmarks/competition/u8u16/COPYRIGHT +0 -8
  458. package/deps/simdutf/benchmarks/competition/u8u16/Makefile +0 -44
  459. package/deps/simdutf/benchmarks/competition/u8u16/OSL3.0.txt +0 -169
  460. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/BOM_Profiler.h +0 -148
  461. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/i386_timer.h +0 -45
  462. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/ppc_timer.c +0 -34
  463. package/deps/simdutf/benchmarks/competition/u8u16/README +0 -56
  464. package/deps/simdutf/benchmarks/competition/u8u16/config/config_defs.h +0 -43
  465. package/deps/simdutf/benchmarks/competition/u8u16/config/g4_config.h +0 -27
  466. package/deps/simdutf/benchmarks/competition/u8u16/config/mmx_config.h +0 -16
  467. package/deps/simdutf/benchmarks/competition/u8u16/config/p4_config.h +0 -18
  468. package/deps/simdutf/benchmarks/competition/u8u16/config/p4_ideal_config.h +0 -16
  469. package/deps/simdutf/benchmarks/competition/u8u16/config/spu_config.h +0 -28
  470. package/deps/simdutf/benchmarks/competition/u8u16/config/ssse3_config.h +0 -20
  471. package/deps/simdutf/benchmarks/competition/u8u16/iconv_u8u16.c +0 -2
  472. package/deps/simdutf/benchmarks/competition/u8u16/lib/altivec_simd.h +0 -440
  473. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_basic_ops.py +0 -121
  474. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_half_operand_versions.py +0 -158
  475. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_test.py +0 -270
  476. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd.h +0 -141
  477. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_basic.h +0 -216
  478. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_built_in.h +0 -119
  479. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_modified.h +0 -2430
  480. package/deps/simdutf/benchmarks/competition/u8u16/lib/outline.txt +0 -39
  481. package/deps/simdutf/benchmarks/competition/u8u16/lib/spu_simd.h +0 -421
  482. package/deps/simdutf/benchmarks/competition/u8u16/lib/sse_simd.h +0 -836
  483. package/deps/simdutf/benchmarks/competition/u8u16/lib/stdint.h +0 -222
  484. package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_BE.c +0 -4
  485. package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_LE.c +0 -5
  486. package/deps/simdutf/benchmarks/competition/u8u16/proto/u8u16.py +0 -390
  487. package/deps/simdutf/benchmarks/competition/u8u16/src/Makefile +0 -18
  488. package/deps/simdutf/benchmarks/competition/u8u16/src/bytelex.h +0 -448
  489. package/deps/simdutf/benchmarks/competition/u8u16/src/charsets/ASCII_EBCDIC.h +0 -284
  490. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.c +0 -1975
  491. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.pdf +0 -0
  492. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.w +0 -2263
  493. package/deps/simdutf/benchmarks/competition/u8u16/src/multiliteral.h +0 -239
  494. package/deps/simdutf/benchmarks/competition/u8u16/src/u8u16.c +0 -232
  495. package/deps/simdutf/benchmarks/competition/u8u16/src/x8x16.c +0 -194
  496. package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.c +0 -193
  497. package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.h +0 -167
  498. package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.c +0 -288
  499. package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.h +0 -117
  500. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_g4.c +0 -2
  501. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_mmx.c +0 -2
  502. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4.c +0 -3
  503. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4_ideal.c +0 -2
  504. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_spu.c +0 -2
  505. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_ssse3.c +0 -3
  506. package/deps/simdutf/benchmarks/competition/u8u16/x8x16_p4.c +0 -2
  507. package/deps/simdutf/benchmarks/competition/utf8lut/LICENSE +0 -23
  508. package/deps/simdutf/benchmarks/competition/utf8lut/data/test_minimal.txt +0 -44
  509. package/deps/simdutf/benchmarks/competition/utf8lut/readme.md +0 -106
  510. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.cmd +0 -11
  511. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.sh +0 -13
  512. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_corr_tests.sh +0 -13
  513. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_example.sh +0 -13
  514. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_file_conv.sh +0 -14
  515. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_lib.sh +0 -11
  516. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_sample.sh +0 -8
  517. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_corr_tests.cmd +0 -12
  518. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_example.cmd +0 -13
  519. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_file_conv.cmd +0 -14
  520. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_lib.cmd +0 -11
  521. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_sample.cmd +0 -8
  522. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_corr_tests.cmd +0 -11
  523. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_example.cmd +0 -12
  524. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_file_conv.cmd +0 -13
  525. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_lib.cmd +0 -10
  526. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_sample.cmd +0 -9
  527. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/html_table.py +0 -25
  528. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/measure.py +0 -94
  529. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/resize.py +0 -20
  530. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_all.cmd +0 -2
  531. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_interm.cmd +0 -1
  532. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/CustomMemcpy.h +0 -75
  533. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/PerfDefs.h +0 -47
  534. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.cpp +0 -17
  535. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.h +0 -76
  536. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/AllProcessors.cpp +0 -35
  537. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.cpp +0 -117
  538. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.h +0 -210
  539. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferDecoder.h +0 -158
  540. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferEncoder.h +0 -104
  541. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorPlugins.h +0 -334
  542. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorSelector.h +0 -186
  543. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.cpp +0 -140
  544. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.h +0 -42
  545. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderProcess.h +0 -100
  546. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/Dfa.h +0 -57
  547. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.cpp +0 -85
  548. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.h +0 -27
  549. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderProcess.h +0 -126
  550. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/ProcessTrivial.h +0 -108
  551. package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.cpp +0 -139
  552. package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.h +0 -74
  553. package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.cpp +0 -65
  554. package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.h +0 -91
  555. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/CorrectnessTests.cpp +0 -772
  556. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/Example.cpp +0 -12
  557. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/FileConverter.cpp +0 -486
  558. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/iconv_sample.c +0 -162
  559. package/deps/simdutf/benchmarks/competition/utf8lut/src/utf8lut.h +0 -15
  560. package/deps/simdutf/benchmarks/competition/utf8sse4/fromutf8-sse.cpp +0 -292
  561. package/deps/simdutf/benchmarks/competition/utfcpp/LICENSE +0 -23
  562. package/deps/simdutf/benchmarks/competition/utfcpp/README.md +0 -1503
  563. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/checked.h +0 -335
  564. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/core.h +0 -338
  565. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp11.h +0 -103
  566. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp17.h +0 -103
  567. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/unchecked.h +0 -274
  568. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8.h +0 -34
  569. package/deps/simdutf/benchmarks/dataset/README.md +0 -155
  570. package/deps/simdutf/benchmarks/dataset/emoji.txt +0 -204
  571. package/deps/simdutf/benchmarks/dataset/scripts/utf8type.py +0 -40
  572. package/deps/simdutf/benchmarks/dataset/wikipedia_mars/Makefile +0 -80
  573. package/deps/simdutf/benchmarks/dataset/wikipedia_mars/convert_to_utf6.py +0 -20
  574. package/deps/simdutf/benchmarks/find/CMakeLists.txt +0 -6
  575. package/deps/simdutf/benchmarks/find/findbenchmark.cpp +0 -63
  576. package/deps/simdutf/benchmarks/find/findbenchmarker.h +0 -46
  577. package/deps/simdutf/benchmarks/shortbench.cpp +0 -555
  578. package/deps/simdutf/benchmarks/src/CMakeLists.txt +0 -52
  579. package/deps/simdutf/benchmarks/src/apple_arm_events.h +0 -1104
  580. package/deps/simdutf/benchmarks/src/benchmark.cpp +0 -3899
  581. package/deps/simdutf/benchmarks/src/benchmark.h +0 -317
  582. package/deps/simdutf/benchmarks/src/benchmark_base.cpp +0 -144
  583. package/deps/simdutf/benchmarks/src/benchmark_base.h +0 -98
  584. package/deps/simdutf/benchmarks/src/cmdline.cpp +0 -176
  585. package/deps/simdutf/benchmarks/src/cmdline.h +0 -35
  586. package/deps/simdutf/benchmarks/src/event_counter.h +0 -162
  587. package/deps/simdutf/benchmarks/src/linux-perf-events.h +0 -104
  588. package/deps/simdutf/benchmarks/stream.cpp +0 -209
  589. package/deps/simdutf/benchmarks/threaded.cpp +0 -123
  590. package/deps/simdutf/cmake/CPM.cmake +0 -1363
  591. package/deps/simdutf/cmake/JoinPaths.cmake +0 -23
  592. package/deps/simdutf/cmake/add_cpp_test.cmake +0 -68
  593. package/deps/simdutf/cmake/simdutf-config.cmake.in +0 -2
  594. package/deps/simdutf/cmake/simdutf-flags.cmake +0 -26
  595. package/deps/simdutf/cmake/toolchains-ci/riscv64-linux-gnu.cmake +0 -4
  596. package/deps/simdutf/cmake/toolchains-dev/README.md +0 -32
  597. package/deps/simdutf/cmake/toolchains-dev/aarch64.cmake +0 -14
  598. package/deps/simdutf/cmake/toolchains-dev/loongarch64.cmake +0 -22
  599. package/deps/simdutf/cmake/toolchains-dev/powerpc64.cmake +0 -16
  600. package/deps/simdutf/cmake/toolchains-dev/powerpc64le.cmake +0 -16
  601. package/deps/simdutf/cmake/toolchains-dev/riscv64.cmake +0 -16
  602. package/deps/simdutf/cmake/toolchains-dev/rvv-spike.cmake +0 -38
  603. package/deps/simdutf/doc/avx512.png +0 -0
  604. package/deps/simdutf/doc/logo.png +0 -0
  605. package/deps/simdutf/doc/logo.svg +0 -165
  606. package/deps/simdutf/doc/node2023.png +0 -0
  607. package/deps/simdutf/doc/shortinput.md +0 -78
  608. package/deps/simdutf/doc/utf16utf8.png +0 -0
  609. package/deps/simdutf/doc/utf8utf16.png +0 -0
  610. package/deps/simdutf/doc/widelogo.png +0 -0
  611. package/deps/simdutf/doxygen.py +0 -50
  612. package/deps/simdutf/fuzz/.clang-format +0 -9
  613. package/deps/simdutf/fuzz/CMakeLists.txt +0 -45
  614. package/deps/simdutf/fuzz/README.md +0 -168
  615. package/deps/simdutf/fuzz/atomic_base64.cpp +0 -448
  616. package/deps/simdutf/fuzz/base64.cpp +0 -278
  617. package/deps/simdutf/fuzz/build.sh +0 -83
  618. package/deps/simdutf/fuzz/conversion.cpp +0 -669
  619. package/deps/simdutf/fuzz/helpers/.clang-format-ignore +0 -1
  620. package/deps/simdutf/fuzz/helpers/common.h +0 -135
  621. package/deps/simdutf/fuzz/helpers/nameof.hpp +0 -1258
  622. package/deps/simdutf/fuzz/main.cpp +0 -72
  623. package/deps/simdutf/fuzz/minimize_and_cleanse.sh +0 -87
  624. package/deps/simdutf/fuzz/misc.cpp +0 -216
  625. package/deps/simdutf/fuzz/random_fuzz.sh +0 -154
  626. package/deps/simdutf/fuzz/roundtrip.cpp +0 -588
  627. package/deps/simdutf/fuzz/safe_conversion.cpp +0 -104
  628. package/deps/simdutf/riscv/Dockerfile +0 -16
  629. package/deps/simdutf/riscv/README.md +0 -24
  630. package/deps/simdutf/riscv/remove-docker-station +0 -8
  631. package/deps/simdutf/riscv/run-docker-station +0 -31
  632. package/deps/simdutf/scripts/.flake8 +0 -2
  633. package/deps/simdutf/scripts/Makefile +0 -2
  634. package/deps/simdutf/scripts/README_ADD_FUNCTION.md +0 -49
  635. package/deps/simdutf/scripts/add_function.py +0 -330
  636. package/deps/simdutf/scripts/amalgamation_tests.py +0 -156
  637. package/deps/simdutf/scripts/base64/Makefile +0 -2
  638. package/deps/simdutf/scripts/base64/README.md +0 -2
  639. package/deps/simdutf/scripts/base64/avx512.py +0 -76
  640. package/deps/simdutf/scripts/base64/neon_decode.py +0 -143
  641. package/deps/simdutf/scripts/base64/neon_generate_lut.py +0 -101
  642. package/deps/simdutf/scripts/base64/sse.py +0 -252
  643. package/deps/simdutf/scripts/base64/sseregular.py +0 -160
  644. package/deps/simdutf/scripts/base64/sseurl.py +0 -283
  645. package/deps/simdutf/scripts/base64/table.py +0 -59
  646. package/deps/simdutf/scripts/base64bench_print.py +0 -145
  647. package/deps/simdutf/scripts/benchmark-all.py +0 -119
  648. package/deps/simdutf/scripts/benchmark_print.py +0 -324
  649. package/deps/simdutf/scripts/check_feature_macros.py +0 -156
  650. package/deps/simdutf/scripts/check_typos.sh +0 -13
  651. package/deps/simdutf/scripts/clang_format.sh +0 -35
  652. package/deps/simdutf/scripts/clang_format_docker.sh +0 -38
  653. package/deps/simdutf/scripts/common.py +0 -24
  654. package/deps/simdutf/scripts/compilation_benchmark.py +0 -55
  655. package/deps/simdutf/scripts/compile_many_variations.sh +0 -64
  656. package/deps/simdutf/scripts/create_latex_table.py +0 -62
  657. package/deps/simdutf/scripts/docker/Dockerfile +0 -14
  658. package/deps/simdutf/scripts/docker/Makefile +0 -9
  659. package/deps/simdutf/scripts/docker/README.md +0 -30
  660. package/deps/simdutf/scripts/docker/llvm.gpg +0 -0
  661. package/deps/simdutf/scripts/ppc64_convert_utf16_to_utf8.py +0 -155
  662. package/deps/simdutf/scripts/prepare_doxygen.sh +0 -21
  663. package/deps/simdutf/scripts/release.py +0 -197
  664. package/deps/simdutf/scripts/shortinputplots.py +0 -97
  665. package/deps/simdutf/scripts/sse_convert_utf16_to_utf8.py +0 -422
  666. package/deps/simdutf/scripts/sse_convert_utf32_to_utf16.py +0 -105
  667. package/deps/simdutf/scripts/sse_utf8_utf16_decode.py +0 -186
  668. package/deps/simdutf/scripts/sse_validate_utf16le_proof.py +0 -137
  669. package/deps/simdutf/scripts/sse_validate_utf16le_testcases.py +0 -129
  670. package/deps/simdutf/scripts/table.py +0 -207
  671. package/deps/simdutf/scripts/tests/new.txt +0 -33
  672. package/deps/simdutf/scripts/tests/old.txt +0 -33
  673. package/deps/simdutf/scripts/tests/results.txt +0 -272
  674. package/deps/simdutf/simdutf.pc.in +0 -11
  675. package/deps/simdutf/singleheader/.flake8 +0 -2
  676. package/deps/simdutf/singleheader/CMakeLists.txt +0 -64
  677. package/deps/simdutf/singleheader/README-dev.md +0 -81
  678. package/deps/simdutf/singleheader/README.md +0 -19
  679. package/deps/simdutf/singleheader/amalgamate.py +0 -513
  680. package/deps/simdutf/singleheader/amalgamation_demo.c +0 -59
  681. package/deps/simdutf/singleheader/amalgamation_demo.cpp +0 -54
  682. package/deps/simdutf/singleheader/test-features.py +0 -262
  683. package/deps/simdutf/src/CMakeLists.txt +0 -78
  684. package/deps/simdutf/tests/CMakeLists.txt +0 -483
  685. package/deps/simdutf/tests/atomic_base64_tests.cpp +0 -2845
  686. package/deps/simdutf/tests/base64_tests.cpp +0 -3617
  687. package/deps/simdutf/tests/basic_fuzzer.cpp +0 -805
  688. package/deps/simdutf/tests/bele_tests.cpp +0 -182
  689. package/deps/simdutf/tests/constexpr_base64_tests.cpp +0 -387
  690. package/deps/simdutf/tests/convert_latin1_to_utf16be_tests.cpp +0 -52
  691. package/deps/simdutf/tests/convert_latin1_to_utf16le_tests.cpp +0 -80
  692. package/deps/simdutf/tests/convert_latin1_to_utf32_tests.cpp +0 -66
  693. package/deps/simdutf/tests/convert_latin1_to_utf8_tests.cpp +0 -120
  694. package/deps/simdutf/tests/convert_utf16_to_utf8_safe_tests.cpp +0 -203
  695. package/deps/simdutf/tests/convert_utf16_to_utf8_with_replacement_tests.cpp +0 -276
  696. package/deps/simdutf/tests/convert_utf16be_to_latin1_tests.cpp +0 -109
  697. package/deps/simdutf/tests/convert_utf16be_to_latin1_tests_with_errors.cpp +0 -136
  698. package/deps/simdutf/tests/convert_utf16be_to_utf32_tests.cpp +0 -193
  699. package/deps/simdutf/tests/convert_utf16be_to_utf32_with_errors_tests.cpp +0 -381
  700. package/deps/simdutf/tests/convert_utf16be_to_utf8_tests.cpp +0 -259
  701. package/deps/simdutf/tests/convert_utf16be_to_utf8_with_errors_tests.cpp +0 -266
  702. package/deps/simdutf/tests/convert_utf16le_to_latin1_tests.cpp +0 -148
  703. package/deps/simdutf/tests/convert_utf16le_to_latin1_tests_with_errors.cpp +0 -176
  704. package/deps/simdutf/tests/convert_utf16le_to_utf32_tests.cpp +0 -213
  705. package/deps/simdutf/tests/convert_utf16le_to_utf32_with_errors_tests.cpp +0 -318
  706. package/deps/simdutf/tests/convert_utf16le_to_utf8_tests.cpp +0 -343
  707. package/deps/simdutf/tests/convert_utf16le_to_utf8_with_errors_tests.cpp +0 -271
  708. package/deps/simdutf/tests/convert_utf32_to_latin1_tests.cpp +0 -111
  709. package/deps/simdutf/tests/convert_utf32_to_latin1_with_errors_tests.cpp +0 -96
  710. package/deps/simdutf/tests/convert_utf32_to_utf16be_tests.cpp +0 -148
  711. package/deps/simdutf/tests/convert_utf32_to_utf16be_with_errors_tests.cpp +0 -192
  712. package/deps/simdutf/tests/convert_utf32_to_utf16le_tests.cpp +0 -166
  713. package/deps/simdutf/tests/convert_utf32_to_utf16le_with_errors_tests.cpp +0 -215
  714. package/deps/simdutf/tests/convert_utf32_to_utf8_tests.cpp +0 -181
  715. package/deps/simdutf/tests/convert_utf32_to_utf8_with_errors_tests.cpp +0 -261
  716. package/deps/simdutf/tests/convert_utf8_to_latin1_tests.cpp +0 -516
  717. package/deps/simdutf/tests/convert_utf8_to_latin1_with_errors_tests.cpp +0 -579
  718. package/deps/simdutf/tests/convert_utf8_to_utf16be_tests.cpp +0 -412
  719. package/deps/simdutf/tests/convert_utf8_to_utf16be_with_errors_tests.cpp +0 -480
  720. package/deps/simdutf/tests/convert_utf8_to_utf16le_tests.cpp +0 -671
  721. package/deps/simdutf/tests/convert_utf8_to_utf16le_with_errors_tests.cpp +0 -455
  722. package/deps/simdutf/tests/convert_utf8_to_utf32_tests.cpp +0 -1204
  723. package/deps/simdutf/tests/convert_utf8_to_utf32_with_errors_tests.cpp +0 -337
  724. package/deps/simdutf/tests/convert_valid_utf16be_to_latin1_tests.cpp +0 -37
  725. package/deps/simdutf/tests/convert_valid_utf16be_to_utf32_tests.cpp +0 -97
  726. package/deps/simdutf/tests/convert_valid_utf16be_to_utf8_tests.cpp +0 -126
  727. package/deps/simdutf/tests/convert_valid_utf16le_to_latin1_tests.cpp +0 -71
  728. package/deps/simdutf/tests/convert_valid_utf16le_to_utf32_tests.cpp +0 -122
  729. package/deps/simdutf/tests/convert_valid_utf16le_to_utf8_tests.cpp +0 -244
  730. package/deps/simdutf/tests/convert_valid_utf32_to_latin1_tests.cpp +0 -49
  731. package/deps/simdutf/tests/convert_valid_utf32_to_utf16be_tests.cpp +0 -92
  732. package/deps/simdutf/tests/convert_valid_utf32_to_utf16le_tests.cpp +0 -114
  733. package/deps/simdutf/tests/convert_valid_utf32_to_utf8_tests.cpp +0 -109
  734. package/deps/simdutf/tests/convert_valid_utf8_to_latin1_tests.cpp +0 -84
  735. package/deps/simdutf/tests/convert_valid_utf8_to_utf16be_tests.cpp +0 -124
  736. package/deps/simdutf/tests/convert_valid_utf8_to_utf16le_tests.cpp +0 -221
  737. package/deps/simdutf/tests/convert_valid_utf8_to_utf32_tests.cpp +0 -155
  738. package/deps/simdutf/tests/count_utf16be.cpp +0 -64
  739. package/deps/simdutf/tests/count_utf16le.cpp +0 -61
  740. package/deps/simdutf/tests/count_utf8.cpp +0 -87
  741. package/deps/simdutf/tests/detect_encodings_tests.cpp +0 -312
  742. package/deps/simdutf/tests/embed/valid_utf8.txt +0 -1
  743. package/deps/simdutf/tests/embed_tests.cpp +0 -22
  744. package/deps/simdutf/tests/find_tests.cpp +0 -77
  745. package/deps/simdutf/tests/fixed_string_tests.cpp +0 -153
  746. package/deps/simdutf/tests/helpers/CMakeLists.txt +0 -25
  747. package/deps/simdutf/tests/helpers/compiletime_conversions.h +0 -222
  748. package/deps/simdutf/tests/helpers/fixed_string.h +0 -267
  749. package/deps/simdutf/tests/helpers/random_int.cpp +0 -30
  750. package/deps/simdutf/tests/helpers/random_int.h +0 -39
  751. package/deps/simdutf/tests/helpers/random_utf16.cpp +0 -123
  752. package/deps/simdutf/tests/helpers/random_utf16.h +0 -52
  753. package/deps/simdutf/tests/helpers/random_utf32.cpp +0 -41
  754. package/deps/simdutf/tests/helpers/random_utf32.h +0 -40
  755. package/deps/simdutf/tests/helpers/random_utf8.cpp +0 -93
  756. package/deps/simdutf/tests/helpers/random_utf8.h +0 -36
  757. package/deps/simdutf/tests/helpers/test.cpp +0 -231
  758. package/deps/simdutf/tests/helpers/test.h +0 -193
  759. package/deps/simdutf/tests/helpers/transcode_test_base.cpp +0 -1257
  760. package/deps/simdutf/tests/helpers/transcode_test_base.h +0 -683
  761. package/deps/simdutf/tests/helpers/utf16.h +0 -27
  762. package/deps/simdutf/tests/installation_tests/find/CMakeLists.txt +0 -43
  763. package/deps/simdutf/tests/installation_tests/from_fetch/CMakeLists.txt +0 -47
  764. package/deps/simdutf/tests/internal_tests.cpp +0 -27
  765. package/deps/simdutf/tests/null_safety_tests.cpp +0 -94
  766. package/deps/simdutf/tests/random_fuzzer.cpp +0 -779
  767. package/deps/simdutf/tests/readme_tests.cpp +0 -274
  768. package/deps/simdutf/tests/reference/CMakeLists.txt +0 -23
  769. package/deps/simdutf/tests/reference/decode_utf16.h +0 -81
  770. package/deps/simdutf/tests/reference/decode_utf32.h +0 -47
  771. package/deps/simdutf/tests/reference/encode_latin1.cpp +0 -1
  772. package/deps/simdutf/tests/reference/encode_latin1.h +0 -32
  773. package/deps/simdutf/tests/reference/encode_utf16.cpp +0 -49
  774. package/deps/simdutf/tests/reference/encode_utf16.h +0 -20
  775. package/deps/simdutf/tests/reference/encode_utf32.cpp +0 -1
  776. package/deps/simdutf/tests/reference/encode_utf32.h +0 -36
  777. package/deps/simdutf/tests/reference/encode_utf8.cpp +0 -1
  778. package/deps/simdutf/tests/reference/encode_utf8.h +0 -40
  779. package/deps/simdutf/tests/reference/validate_utf16.cpp +0 -60
  780. package/deps/simdutf/tests/reference/validate_utf16.h +0 -14
  781. package/deps/simdutf/tests/reference/validate_utf16_to_latin1.cpp +0 -35
  782. package/deps/simdutf/tests/reference/validate_utf16_to_latin1.h +0 -13
  783. package/deps/simdutf/tests/reference/validate_utf32.cpp +0 -27
  784. package/deps/simdutf/tests/reference/validate_utf32.h +0 -12
  785. package/deps/simdutf/tests/reference/validate_utf32_to_latin1.cpp +0 -27
  786. package/deps/simdutf/tests/reference/validate_utf32_to_latin1.h +0 -12
  787. package/deps/simdutf/tests/reference/validate_utf8.cpp +0 -82
  788. package/deps/simdutf/tests/reference/validate_utf8.h +0 -11
  789. package/deps/simdutf/tests/reference/validate_utf8_to_latin1.cpp +0 -43
  790. package/deps/simdutf/tests/reference/validate_utf8_to_latin1.h +0 -12
  791. package/deps/simdutf/tests/select_implementation.cpp +0 -43
  792. package/deps/simdutf/tests/simdutf_c_tests.cpp +0 -244
  793. package/deps/simdutf/tests/span_tests.cpp +0 -401
  794. package/deps/simdutf/tests/special_tests.cpp +0 -559
  795. package/deps/simdutf/tests/straight_c_test.c +0 -187
  796. package/deps/simdutf/tests/text_encoding_tests.cpp +0 -77
  797. package/deps/simdutf/tests/to_well_formed_utf16_tests.cpp +0 -377
  798. package/deps/simdutf/tests/utf8_length_from_utf16_tests.cpp +0 -202
  799. package/deps/simdutf/tests/validate_ascii_basic_tests.cpp +0 -165
  800. package/deps/simdutf/tests/validate_ascii_with_errors_tests.cpp +0 -77
  801. package/deps/simdutf/tests/validate_utf16be_basic_tests.cpp +0 -175
  802. package/deps/simdutf/tests/validate_utf16be_with_errors_tests.cpp +0 -188
  803. package/deps/simdutf/tests/validate_utf16le_basic_tests.cpp +0 -268
  804. package/deps/simdutf/tests/validate_utf16le_with_errors_tests.cpp +0 -274
  805. package/deps/simdutf/tests/validate_utf32_basic_tests.cpp +0 -92
  806. package/deps/simdutf/tests/validate_utf32_with_errors_tests.cpp +0 -114
  807. package/deps/simdutf/tests/validate_utf8_basic_tests.cpp +0 -178
  808. package/deps/simdutf/tests/validate_utf8_brute_force_tests.cpp +0 -88
  809. package/deps/simdutf/tests/validate_utf8_puzzler_tests.cpp +0 -33
  810. package/deps/simdutf/tests/validate_utf8_with_errors_tests.cpp +0 -228
  811. package/deps/simdutf/tools/CMakeLists.txt +0 -85
  812. package/deps/simdutf/tools/fastbase64.cpp +0 -250
  813. package/deps/simdutf/tools/sutf.cpp +0 -556
  814. package/deps/simdutf/tools/sutf.h +0 -40
  815. package/lib/tsconfig.tsbuildinfo +0 -1
@@ -1,3899 +0,0 @@
1
- #include "benchmark.h"
2
- #include "simdutf.h"
3
-
4
- #include <cassert>
5
- #include <array>
6
- #include <iostream>
7
- #include <chrono>
8
- #include <thread>
9
- #include <string>
10
- #include <vector>
11
- #ifdef __x86_64__
12
- /**
13
- * utf8lut: Vectorized UTF-8 converter.
14
- * by stgatilov (2019)
15
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
16
- */
17
- SIMDUTF_TARGET_WESTMERE
18
- namespace {
19
- #include "benchmarks/competition/utf8lut/src/utf8lut.h"
20
- }
21
- SIMDUTF_UNTARGET_REGION
22
-
23
- /**
24
- * Bob Steagall, CppCon2018
25
- * https://github.com/BobSteagall/CppCon2018/
26
- *
27
- * Fast Conversion From UTF-8 with C++, DFAs, and SSE Intrinsics
28
- * https://www.youtube.com/watch?v=5FQ87-Ecb-A
29
- */
30
- #include "benchmarks/competition/CppCon2018/utf_utils.cpp"
31
- #endif
32
-
33
- /**
34
- * Bjoern Hoehrmann
35
- * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
36
- */
37
- #include "benchmarks/competition/hoehrmann/hoehrmann.h"
38
- /**
39
- * LLVM relies on code from the Unicode Consortium
40
- * https://en.wikipedia.org/wiki/Unicode_Consortium
41
- */
42
- #include "benchmarks/competition/llvm/ConvertUTF.cpp"
43
- #ifdef __x86_64__
44
- /**
45
- * Olivier Goffart, UTF-8 processing using SIMD (SSE4), 2012.
46
- * https://woboq.com/blog/utf-8-processing-using-simd.html
47
- */
48
- #include "benchmarks/competition/utf8sse4/fromutf8-sse.cpp"
49
- #endif
50
-
51
- #ifdef __x86_64__
52
- /**
53
- * benchmarks/competition/u8u16 contains an open source version of u8u16,
54
- * referenced in Cameron, Robert D, A case study in SIMD text processing with
55
- * parallel bit streams: UTF-8 to UTF-16 transcoding, Proceedings of the 13th
56
- * ACM SIGPLAN Symposium on Principles and practice of parallel programming,
57
- * 91--98.
58
- */
59
- // It seems that u8u16 is not good at scoping macros.
60
- #undef LITTLE_ENDIAN
61
- #undef BYTE_ORDER
62
- #undef BIG_ENDIAN
63
- #include "benchmarks/competition/u8u16/config/p4_config.h"
64
- #include "benchmarks/competition/u8u16/src/libu8u16.c"
65
- #endif
66
-
67
- /**
68
- * Nemanja Trifunovic, UTF8-CPP: UTF-8 with C++ in a Portable Way
69
- * https://github.com/nemtrif/utfcpp/releases/tag/v3.2.2
70
- */
71
- #include "benchmarks/competition/utfcpp/source/utf8.h"
72
-
73
- namespace simdutf::benchmarks {
74
-
75
- template <typename Fn>
76
- void Benchmark::register_function(std::string name, Fn function,
77
- std::set<simdutf::encoding_type> set) {
78
-
79
- if (name.find('+') == std::string::npos) {
80
- // adding simdutf benchmark, populate for all known architectures
81
- for (const auto &impl : simdutf::get_available_implementations()) {
82
- const auto full_name = name + '+' + impl->name();
83
- benchmarks.insert({full_name, std::make_pair(function, set)});
84
- }
85
- } else {
86
- benchmarks.insert({name, std::make_pair(function, set)});
87
- }
88
- }
89
-
90
- template <typename Fn>
91
- void Benchmark::register_function(std::string name, Fn function,
92
- simdutf::encoding_type enc1) {
93
- std::set<simdutf::encoding_type> set{enc1};
94
- register_function(name, function, set);
95
- }
96
-
97
- template <typename Fn>
98
- void Benchmark::register_function(std::string name, Fn function,
99
- simdutf::encoding_type enc1,
100
- simdutf::encoding_type enc2) {
101
- std::set<simdutf::encoding_type> set{enc1, enc2};
102
- register_function(name, function, set);
103
- }
104
-
105
- template <typename Fn>
106
- void Benchmark::register_function(std::string name, Fn function,
107
- simdutf::encoding_type enc1,
108
- simdutf::encoding_type enc2,
109
- simdutf::encoding_type enc3) {
110
- std::set<simdutf::encoding_type> set{enc1, enc2, enc3};
111
- register_function(name, function, set);
112
- }
113
-
114
- Benchmark::Benchmark(std::vector<input::Testcase> &&testcases)
115
- : BenchmarkBase(std::move(testcases)) {
116
- register_function("to_well_formed_utf16le",
117
- &Benchmark::run_to_well_formed_utf16le,
118
- simdutf::encoding_type::UTF16_LE);
119
- register_function("naive_validate_ascii",
120
- &Benchmark::run_naive_validate_ascii,
121
- simdutf::encoding_type::UTF8);
122
- register_function("validate_ascii", &Benchmark::run_validate_ascii,
123
- simdutf::encoding_type::UTF8);
124
- register_function("validate_ascii_with_errors",
125
- &Benchmark::run_validate_ascii_with_errors,
126
- simdutf::encoding_type::UTF8);
127
- register_function("validate_utf8", &Benchmark::run_validate_utf8,
128
- simdutf::encoding_type::UTF8);
129
- register_function("validate_utf8_with_errors",
130
- &Benchmark::run_validate_utf8_with_errors,
131
- simdutf::encoding_type::UTF8);
132
- register_function("validate_utf16le", &Benchmark::run_validate_utf16le,
133
- simdutf::encoding_type::UTF16_LE);
134
- register_function("validate_utf16le_with_errors",
135
- &Benchmark::run_validate_utf16le_with_errors,
136
- simdutf::encoding_type::UTF16_LE);
137
- register_function("validate_utf32", &Benchmark::run_validate_utf32,
138
- simdutf::encoding_type::UTF32_LE);
139
- register_function("validate_utf32_with_errors",
140
- &Benchmark::run_validate_utf32_with_errors,
141
- simdutf::encoding_type::UTF32_LE);
142
-
143
- register_function("count_utf8", &Benchmark::run_count_utf8,
144
- simdutf::encoding_type::UTF8);
145
- register_function("count_utf16le", &Benchmark::run_count_utf16le,
146
- simdutf::encoding_type::UTF16_LE);
147
-
148
- register_function("utf8_length_from_latin1",
149
- &Benchmark::run_utf8_length_from_latin1,
150
- simdutf::encoding_type::Latin1);
151
- register_function("utf8_length_from_utf16le",
152
- &Benchmark::run_utf8_length_from_utf16le,
153
- simdutf::encoding_type::UTF16_LE);
154
- register_function("utf8_length_from_utf16le_with_replacement",
155
- &Benchmark::run_utf8_length_from_utf16le_with_replacement,
156
- simdutf::encoding_type::UTF16_LE);
157
- register_function("utf8_length_from_utf16be",
158
- &Benchmark::run_utf8_length_from_utf16be,
159
- simdutf::encoding_type::UTF16_BE);
160
- register_function("utf8_length_from_utf16be_with_replacement",
161
- &Benchmark::run_utf8_length_from_utf16be_with_replacement,
162
- simdutf::encoding_type::UTF16_BE);
163
- register_function("utf8_length_from_utf32",
164
- &Benchmark::run_utf8_length_from_utf32,
165
- simdutf::encoding_type::UTF32_LE);
166
- register_function("utf16_length_from_utf8",
167
- &Benchmark::run_utf16_length_from_utf8,
168
- simdutf::encoding_type::UTF8);
169
- register_function("convert_latin1_to_utf8",
170
- &Benchmark::run_convert_latin1_to_utf8,
171
- simdutf::encoding_type::Latin1);
172
- register_function("convert_latin1_to_utf16le",
173
- &Benchmark::run_convert_latin1_to_utf16le,
174
- simdutf::encoding_type::Latin1);
175
- register_function("convert_latin1_to_utf32",
176
- &Benchmark::run_convert_latin1_to_utf32,
177
- simdutf::encoding_type::Latin1);
178
-
179
- register_function("convert_utf8_to_latin1",
180
- &Benchmark::run_convert_utf8_to_latin1,
181
- simdutf::encoding_type::UTF8);
182
- register_function("convert_utf8_to_latin1_with_errors",
183
- &Benchmark::run_convert_utf8_to_latin1_with_errors,
184
- simdutf::encoding_type::UTF8);
185
- register_function("convert_valid_utf8_to_latin1",
186
- &Benchmark::run_convert_valid_utf8_to_latin1,
187
- simdutf::encoding_type::UTF8);
188
-
189
- register_function("convert_utf8_to_utf16le",
190
- &Benchmark::run_convert_utf8_to_utf16le,
191
- simdutf::encoding_type::UTF8);
192
- register_function("convert_utf8_to_utf16le_with_errors",
193
- &Benchmark::run_convert_utf8_to_utf16le_with_errors,
194
- simdutf::encoding_type::UTF8);
195
- register_function(
196
- "convert_utf8_to_utf16le_with_dynamic_allocation",
197
- &Benchmark::run_convert_utf8_to_utf16le_with_dynamic_allocation,
198
- simdutf::encoding_type::UTF8);
199
- register_function("convert_valid_utf8_to_utf16le",
200
- &Benchmark::run_convert_valid_utf8_to_utf16le,
201
- simdutf::encoding_type::UTF8);
202
-
203
- register_function("convert_utf8_to_utf32",
204
- &Benchmark::run_convert_utf8_to_utf32,
205
- simdutf::encoding_type::UTF8);
206
- register_function("convert_utf8_to_utf32_with_errors",
207
- &Benchmark::run_convert_utf8_to_utf32_with_errors,
208
- simdutf::encoding_type::UTF8);
209
- register_function(
210
- "convert_utf8_to_utf32_with_dynamic_allocation",
211
- &Benchmark::run_convert_utf8_to_utf32_with_dynamic_allocation,
212
- simdutf::encoding_type::UTF8);
213
- register_function("convert_valid_utf8_to_utf32",
214
- &Benchmark::run_convert_valid_utf8_to_utf32,
215
- simdutf::encoding_type::UTF8);
216
-
217
- register_function("convert_utf16le_to_latin1",
218
- &Benchmark::run_convert_utf16le_to_latin1,
219
- simdutf::encoding_type::UTF16_LE);
220
- register_function("convert_utf16le_to_latin1_with_errors",
221
- &Benchmark::run_convert_utf16le_to_latin1_with_errors,
222
- simdutf::encoding_type::UTF16_LE);
223
- register_function("convert_valid_utf16le_to_latin1",
224
- &Benchmark::run_convert_valid_utf16le_to_latin1,
225
- simdutf::encoding_type::UTF16_LE);
226
- #if SIMDUTF_IS_BIG_ENDIAN
227
- register_function("convert_utf16_to_utf8_safe",
228
- &Benchmark::run_convert_utf16_to_utf8_safe,
229
- simdutf::encoding_type::UTF16_BE);
230
- #else
231
- register_function("convert_utf16_to_utf8_safe",
232
- &Benchmark::run_convert_utf16_to_utf8_safe,
233
- simdutf::encoding_type::UTF16_LE);
234
- #endif // SIMDUTF_IS_BIG_ENDIAN
235
- register_function("convert_utf16le_to_utf8",
236
- &Benchmark::run_convert_utf16le_to_utf8,
237
- simdutf::encoding_type::UTF16_LE);
238
- register_function("convert_utf16le_to_utf8_with_errors",
239
- &Benchmark::run_convert_utf16le_to_utf8_with_errors,
240
- simdutf::encoding_type::UTF16_LE);
241
- register_function(
242
- "convert_utf16le_to_utf8_with_dynamic_allocation",
243
- &Benchmark::run_convert_utf16le_to_utf8_with_dynamic_allocation,
244
- simdutf::encoding_type::UTF16_LE);
245
- register_function("convert_valid_utf16le_to_utf8",
246
- &Benchmark::run_convert_valid_utf16le_to_utf8,
247
- simdutf::encoding_type::UTF16_LE);
248
-
249
- register_function("convert_utf16le_to_utf32",
250
- &Benchmark::run_convert_utf16le_to_utf32,
251
- simdutf::encoding_type::UTF16_LE);
252
- register_function("convert_utf16le_to_utf32_with_errors",
253
- &Benchmark::run_convert_utf16le_to_utf32_with_errors,
254
- simdutf::encoding_type::UTF16_LE);
255
- register_function(
256
- "convert_utf16le_to_utf32_with_dynamic_allocation",
257
- &Benchmark::run_convert_utf16le_to_utf32_with_dynamic_allocation,
258
- simdutf::encoding_type::UTF16_LE);
259
- register_function("convert_valid_utf16le_to_utf32",
260
- &Benchmark::run_convert_valid_utf16le_to_utf32,
261
- simdutf::encoding_type::UTF16_LE);
262
-
263
- register_function("convert_utf32_to_latin1",
264
- &Benchmark::run_convert_utf32_to_latin1,
265
- simdutf::encoding_type::UTF32_LE);
266
- register_function("convert_utf32_to_latin1_with_errors",
267
- &Benchmark::run_convert_utf32_to_latin1_with_errors,
268
- simdutf::encoding_type::UTF32_LE);
269
- register_function("convert_valid_utf32_to_latin1",
270
- &Benchmark::run_convert_valid_utf32_to_latin1,
271
- simdutf::encoding_type::UTF32_LE);
272
-
273
- register_function("convert_utf32_to_utf8",
274
- &Benchmark::run_convert_utf32_to_utf8,
275
- simdutf::encoding_type::UTF32_LE);
276
- register_function("convert_utf32_to_utf8_with_errors",
277
- &Benchmark::run_convert_utf32_to_utf8_with_errors,
278
- simdutf::encoding_type::UTF32_LE);
279
- register_function("convert_valid_utf32_to_utf8",
280
- &Benchmark::run_convert_valid_utf32_to_utf8,
281
- simdutf::encoding_type::UTF32_LE);
282
-
283
- register_function("convert_utf32_to_utf16le",
284
- &Benchmark::run_convert_utf32_to_utf16<endianness::LITTLE>,
285
- simdutf::encoding_type::UTF32_LE);
286
- register_function("convert_utf32_to_utf16be",
287
- &Benchmark::run_convert_utf32_to_utf16<endianness::BIG>,
288
- simdutf::encoding_type::UTF32_LE);
289
- register_function(
290
- "convert_utf32_to_utf16le_with_errors",
291
- &Benchmark::run_convert_utf32_to_utf16_with_errors<endianness::LITTLE>,
292
- simdutf::encoding_type::UTF32_LE);
293
- register_function(
294
- "convert_utf32_to_utf16be_with_errors",
295
- &Benchmark::run_convert_utf32_to_utf16_with_errors<endianness::BIG>,
296
- simdutf::encoding_type::UTF32_LE);
297
- register_function(
298
- "convert_valid_utf32_to_utf16le",
299
- &Benchmark::run_convert_valid_utf32_to_utf16<endianness::LITTLE>,
300
- simdutf::encoding_type::UTF32_LE);
301
- register_function(
302
- "convert_valid_utf32_to_utf16be",
303
- &Benchmark::run_convert_valid_utf32_to_utf16<endianness::BIG>,
304
- simdutf::encoding_type::UTF32_LE);
305
-
306
- register_function("detect_encodings", &Benchmark::run_detect_encodings,
307
- simdutf::encoding_type::UTF8,
308
- simdutf::encoding_type::UTF16_LE,
309
- simdutf::encoding_type::UTF32_LE);
310
-
311
- #ifdef ICU_AVAILABLE
312
- register_function("convert_latin1_to_utf8+icu",
313
- &Benchmark::run_convert_latin1_to_utf8_icu,
314
- simdutf::encoding_type::Latin1);
315
- register_function("convert_latin1_to_utf16+icu",
316
- &Benchmark::run_convert_latin1_to_utf16_icu,
317
- simdutf::encoding_type::Latin1);
318
- register_function("convert_latin1_to_utf32+icu",
319
- &Benchmark::run_convert_latin1_to_utf32_icu,
320
- simdutf::encoding_type::Latin1);
321
- register_function("convert_utf8_to_latin1+icu",
322
- &Benchmark::run_convert_utf8_to_latin1_icu,
323
- simdutf::encoding_type::UTF8);
324
- register_function("convert_utf8_to_utf16+icu",
325
- &Benchmark::run_convert_utf8_to_utf16_icu,
326
- simdutf::encoding_type::UTF8);
327
- register_function("convert_utf16_to_utf8+icu",
328
- &Benchmark::run_convert_utf16_to_utf8_icu,
329
- simdutf::encoding_type::UTF16_LE);
330
- register_function("convert_utf16_to_latin1+icu",
331
- &Benchmark::run_convert_utf16_to_latin1_icu,
332
- simdutf::encoding_type::UTF16_LE);
333
- register_function("convert_utf32_to_latin1+icu",
334
- &Benchmark::run_convert_utf32_to_latin1_icu,
335
- simdutf::encoding_type::UTF32_LE);
336
- #endif
337
- #ifdef ICONV_AVAILABLE
338
- register_function("convert_latin1_to_utf8+iconv",
339
- &Benchmark::run_convert_latin1_to_utf8_iconv,
340
- simdutf::encoding_type::Latin1);
341
- register_function("convert_latin1_to_utf16+iconv",
342
- &Benchmark::run_convert_latin1_to_utf16_iconv,
343
- simdutf::encoding_type::Latin1);
344
- register_function("convert_latin1_to_utf32+iconv",
345
- &Benchmark::run_convert_latin1_to_utf32_iconv,
346
- simdutf::encoding_type::Latin1);
347
- register_function("convert_utf8_to_latin1+iconv",
348
- &Benchmark::run_convert_utf8_to_latin1_iconv,
349
- simdutf::encoding_type::UTF8);
350
- register_function("convert_utf8_to_utf16+iconv",
351
- &Benchmark::run_convert_utf8_to_utf16_iconv,
352
- simdutf::encoding_type::UTF8);
353
- register_function("convert_utf16_to_utf8+iconv",
354
- &Benchmark::run_convert_utf16_to_utf8_iconv,
355
- simdutf::encoding_type::UTF16_LE);
356
- register_function("convert_utf16_to_latin1+iconv",
357
- &Benchmark::run_convert_utf16_to_latin1_iconv,
358
- simdutf::encoding_type::UTF16_LE);
359
- register_function("convert_utf32_to_latin1+iconv",
360
- &Benchmark::run_convert_utf32_to_latin1_iconv,
361
- simdutf::encoding_type::UTF32_LE);
362
- #endif
363
- #ifdef INOUE2008
364
- register_function("convert_valid_utf8_to_utf16+inoue2008",
365
- &Benchmark::run_convert_valid_utf8_to_utf16_inoue2008,
366
- simdutf::encoding_type::UTF8);
367
- #endif
368
- #ifdef __x86_64__
369
- register_function("convert_utf8_to_utf16+u8u16",
370
- &Benchmark::run_convert_utf8_to_utf16_u8u16,
371
- simdutf::encoding_type::UTF8);
372
- register_function("convert_utf16_to_utf8+utf8lut",
373
- &Benchmark::run_convert_valid_utf8_to_utf16_utf8lut,
374
- simdutf::encoding_type::UTF16_LE);
375
- register_function("convert_valid_utf16_to_utf8+utf8lut",
376
- &Benchmark::run_convert_valid_utf16_to_utf8_utf8lut,
377
- simdutf::encoding_type::UTF16_LE);
378
- register_function("convert_utf8_to_utf16+utf8lut",
379
- &Benchmark::run_convert_valid_utf8_to_utf16_utf8lut,
380
- simdutf::encoding_type::UTF8);
381
- register_function("convert_utf8_to_utf32+utf8lut",
382
- &Benchmark::run_convert_utf8_to_utf32_utf8lut,
383
- simdutf::encoding_type::UTF8);
384
- register_function("convert_valid_utf8_to_utf16+utf8lut",
385
- &Benchmark::run_convert_valid_utf8_to_utf16_utf8lut,
386
- simdutf::encoding_type::UTF8);
387
- register_function("convert_utf32_to_utf8+utf8lut",
388
- &Benchmark::run_convert_valid_utf32_to_utf8_utf8lut,
389
- simdutf::encoding_type::UTF32_LE);
390
- register_function("convert_valid_utf32_to_utf8+utf8lut",
391
- &Benchmark::run_convert_valid_utf32_to_utf8_utf8lut,
392
- simdutf::encoding_type::UTF32_BE);
393
- register_function("convert_valid_utf8_to_utf32+utf8lut",
394
- &Benchmark::run_convert_utf8_to_utf32_utf8lut,
395
- simdutf::encoding_type::UTF8);
396
- register_function("convert_utf8_to_utf16+utf8sse4",
397
- &Benchmark::run_convert_utf8_to_utf16_utf8sse4,
398
- simdutf::encoding_type::UTF8);
399
- register_function("convert_utf8_to_utf16+cppcon2018",
400
- &Benchmark::run_convert_utf8_to_utf16_cppcon2018,
401
- simdutf::encoding_type::UTF8);
402
- register_function("convert_utf8_to_utf32+cppcon2018",
403
- &Benchmark::run_convert_utf8_to_utf32_cppcon2018,
404
- simdutf::encoding_type::UTF8);
405
- #endif
406
- register_function("convert_utf8_to_utf16+hoehrmann",
407
- &Benchmark::run_convert_utf8_to_utf16_hoehrmann,
408
- simdutf::encoding_type::UTF8);
409
- register_function("convert_utf8_to_utf32+hoehrmann",
410
- &Benchmark::run_convert_utf8_to_utf32_hoehrmann,
411
- simdutf::encoding_type::UTF8);
412
-
413
- register_function("convert_utf8_to_utf16+llvm",
414
- &Benchmark::run_convert_utf8_to_utf16_llvm,
415
- simdutf::encoding_type::UTF8);
416
- register_function("convert_utf8_to_utf32+llvm",
417
- &Benchmark::run_convert_utf8_to_utf32_llvm,
418
- simdutf::encoding_type::UTF8);
419
- register_function("convert_utf16_to_utf8+llvm",
420
- &Benchmark::run_convert_utf16_to_utf8_llvm,
421
- simdutf::encoding_type::UTF16_LE);
422
- register_function("convert_utf32_to_utf8+llvm",
423
- &Benchmark::run_convert_utf32_to_utf8_llvm,
424
- simdutf::encoding_type::UTF32_LE);
425
- register_function("convert_utf32_to_utf16+llvm",
426
- &Benchmark::run_convert_utf32_to_utf16_llvm,
427
- simdutf::encoding_type::UTF32_LE);
428
- register_function("convert_utf16_to_utf32+llvm",
429
- &Benchmark::run_convert_utf16_to_utf32_llvm,
430
- simdutf::encoding_type::UTF16_LE);
431
-
432
- register_function("convert_utf8_to_utf16+utfcpp",
433
- &Benchmark::run_convert_utf8_to_utf16_utfcpp,
434
- simdutf::encoding_type::UTF8);
435
- register_function("convert_utf8_to_utf32+utfcpp",
436
- &Benchmark::run_convert_utf8_to_utf32_utfcpp,
437
- simdutf::encoding_type::UTF8);
438
- register_function("convert_utf16_to_utf8+utfcpp",
439
- &Benchmark::run_convert_utf16_to_utf8_utfcpp,
440
- simdutf::encoding_type::UTF16_LE);
441
- register_function("convert_utf32_to_utf8+utfcpp",
442
- &Benchmark::run_convert_utf32_to_utf8_utfcpp,
443
- simdutf::encoding_type::UTF32_LE);
444
-
445
- register_function("utf8_length_from_latin1+node",
446
- &Benchmark::run_utf8_length_from_latin1_node,
447
- simdutf::encoding_type::Latin1);
448
- }
449
-
450
- // static
451
- Benchmark Benchmark::create(const CommandLine &cmdline) {
452
- std::vector<input::Testcase> testcases;
453
-
454
- using input::File;
455
- using input::random_utf8;
456
- using input::Testcase;
457
-
458
- for (const size_t iterations : cmdline.iterations) {
459
- for (const auto &path : cmdline.files) {
460
- testcases.emplace_back(
461
- Testcase{cmdline.procedures, iterations, File{path}});
462
- }
463
-
464
- for (const size_t size : cmdline.random_size) {
465
- testcases.emplace_back(
466
- Testcase{cmdline.procedures, iterations, random_utf8{size}});
467
- }
468
- }
469
-
470
- return Benchmark{std::move(testcases)};
471
- }
472
-
473
- void Benchmark::list_procedures(ListingMode lm) const {
474
- switch (lm) {
475
- case ListingMode::None:
476
- break;
477
-
478
- case ListingMode::HumanReadable: {
479
- const auto &known_procedures = all_procedures();
480
- printf("Available procedures (%zu)\n", size_t(known_procedures.size()));
481
- for (const auto &name : known_procedures) {
482
- printf("- %s\n", name.c_str());
483
- }
484
- } break;
485
-
486
- case ListingMode::PlainLines: {
487
- const auto &known_procedures = all_procedures();
488
- for (const auto &name : known_procedures) {
489
- puts(name.c_str());
490
- }
491
- break;
492
- }
493
-
494
- case ListingMode::Json: {
495
- printf("[\n");
496
- auto first = true;
497
- for (const auto &item : benchmarks) {
498
- const auto &name = item.first;
499
- const auto &entry = item.second;
500
- if (!first) {
501
- putchar(',');
502
- }
503
- first = false;
504
-
505
- printf(" {\n");
506
- printf(" \"name\": \"%s\",\n", name.c_str());
507
- if (std::holds_alternative<thirdparty_fn>(entry.first)) {
508
- printf(" \"simdutf\": false,\n");
509
- } else if (std::holds_alternative<simdutf_fn>(entry.first)) {
510
- printf(" \"simdutf\": true,\n");
511
- }
512
-
513
- {
514
- printf(" \"encodings\": [");
515
- bool first = true;
516
- for (const auto &enc : entry.second) {
517
- if (!first) {
518
- putchar(',');
519
- }
520
- first = false;
521
-
522
- switch (enc) {
523
- case simdutf::UTF8:
524
- printf("\"utf8\"");
525
- break;
526
- case simdutf::UTF16_LE:
527
- printf("\"utf16le\"");
528
- break;
529
- case simdutf::UTF16_BE:
530
- printf("\"utf16be\"");
531
- break;
532
- case simdutf::UTF32_LE:
533
- printf("\"utf32le\"");
534
- break;
535
- case simdutf::UTF32_BE:
536
- printf("\"utf32be\"");
537
- break;
538
- case simdutf::Latin1:
539
- printf("\"latin1\"");
540
- break;
541
- default:
542
- printf("\"unknown\"");
543
- break;
544
- }
545
- }
546
- printf("]\n");
547
- } // encodings
548
- printf(" }");
549
- } // for
550
- printf("]\n");
551
- break;
552
- }
553
- }
554
- }
555
-
556
- void Benchmark::run(const std::string &procedure_name, size_t iterations) {
557
- const auto item = benchmarks.find(procedure_name);
558
- if (item == benchmarks.end()) {
559
- std::cerr << "Unsupported procedure: " << procedure_name << '\n';
560
- std::cerr << "Report the issue.\n";
561
- std::cerr << " Aborting ! " << '\n';
562
- exit(1);
563
- }
564
-
565
- const auto &entry = item->second;
566
- if (std::holds_alternative<thirdparty_fn>(entry.first)) {
567
- const auto fn = std::get<thirdparty_fn>(entry.first);
568
-
569
- (this->*fn)(iterations);
570
- } else if (std::holds_alternative<simdutf_fn>(entry.first)) {
571
- const auto p = procedure_name.find('+');
572
- const std::string name{procedure_name.substr(0, p)};
573
- const std::string impl{procedure_name.substr(p + 1)};
574
-
575
- auto implementation = simdutf::get_available_implementations()[impl];
576
- if (implementation == nullptr) {
577
- throw std::runtime_error("Wrong implementation " + impl);
578
- }
579
- // If you want to skip the CPU feature checks, you can set
580
- // a variable when calling the benchmark program. E.g.,
581
- // SIMDUTF_SKIP_CPU_CHECK=ON benchmark -F myfile.txt
582
- // This might result in a crash (E.g., Illegal instruction).
583
- SIMDUTF_PUSH_DISABLE_WARNINGS
584
- SIMDUTF_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC:
585
- // manually verified this is safe
586
- static const char *skip_check = getenv("SIMDUTF_SKIP_CPU_CHECK");
587
- SIMDUTF_POP_DISABLE_WARNINGS
588
- if (!skip_check && !implementation->supported_by_runtime_system()) {
589
- std::cout << procedure_name << ": unsupported by the system\n";
590
- return;
591
- }
592
-
593
- const auto fn = std::get<simdutf_fn>(entry.first);
594
- (this->*fn)(*implementation, iterations);
595
- } else {
596
- throw std::logic_error("The entry for '" + procedure_name +
597
- "' is not valid. Please report an issue.");
598
- }
599
-
600
- // We pause after each call to make sure
601
- // that other benchmarks are not affected by frequency throttling.
602
- // This was initially introduced for AVX-512 only, but it is probably
603
- // wise to have it always.
604
- std::this_thread::sleep_for(std::chrono::milliseconds(10));
605
- }
606
-
607
- void Benchmark::run_validate_utf8(const simdutf::implementation &implementation,
608
- size_t iterations) {
609
- const char *data = reinterpret_cast<const char *>(input_data.data());
610
- const size_t size = input_data.size();
611
- volatile bool sink{false};
612
-
613
- auto proc = [&implementation, data, size, &sink]() {
614
- sink = implementation.validate_utf8(data, size);
615
- };
616
-
617
- count_events(proc, iterations); // warming up!
618
- const auto result = count_events(proc, iterations);
619
- if ((sink == false) && (iterations > 0)) {
620
- std::cerr << "The input was declared invalid.\n";
621
- }
622
- size_t char_count = get_active_implementation()->count_utf8(data, size);
623
- print_summary(result, size, char_count);
624
- }
625
-
626
- void Benchmark::run_validate_utf8_with_errors(
627
- const simdutf::implementation &implementation, size_t iterations) {
628
- const char *data = reinterpret_cast<const char *>(input_data.data());
629
- const size_t size = input_data.size();
630
- volatile bool sink{false};
631
-
632
- auto proc = [&implementation, data, size, &sink]() {
633
- result res = implementation.validate_utf8_with_errors(data, size);
634
- sink = !(res.error);
635
- };
636
-
637
- count_events(proc, iterations); // warming up!
638
- const auto result = count_events(proc, iterations);
639
- if ((sink == false) && (iterations > 0)) {
640
- std::cerr << "The input was declared invalid.\n";
641
- }
642
- size_t char_count = get_active_implementation()->count_utf8(data, size);
643
- print_summary(result, size, char_count);
644
- }
645
-
646
- namespace details {
647
- bool ascii_is_valid(const char *data, size_t size) {
648
- unsigned char result = 0;
649
- for (size_t i = 0; i < size; i++) {
650
- result |= static_cast<unsigned char>(data[i]);
651
- }
652
- return (result <= 0x7F);
653
- }
654
- } // namespace details
655
-
656
- void Benchmark::run_naive_validate_ascii(
657
- const simdutf::implementation &implementation, size_t iterations) {
658
- const char *data = reinterpret_cast<const char *>(input_data.data());
659
- const size_t size = input_data.size();
660
- volatile bool sink{false};
661
- auto proc = [&implementation, data, size, &sink]() {
662
- sink = details::ascii_is_valid(data, size);
663
- };
664
-
665
- count_events(proc, iterations); // warming up!
666
- const auto result = count_events(proc, iterations);
667
- if ((sink == false) && (iterations > 0)) {
668
- std::cerr << "The input was declared invalid.\n";
669
- }
670
- size_t char_count = get_active_implementation()->count_utf8(data, size);
671
- print_summary(result, size, char_count);
672
- }
673
-
674
- void Benchmark::run_validate_ascii(
675
- const simdutf::implementation &implementation, size_t iterations) {
676
- const char *data = reinterpret_cast<const char *>(input_data.data());
677
- const size_t size = input_data.size();
678
- volatile bool sink{false};
679
-
680
- auto proc = [&implementation, data, size, &sink]() {
681
- sink = implementation.validate_ascii(data, size);
682
- };
683
-
684
- count_events(proc, iterations); // warming up!
685
- const auto result = count_events(proc, iterations);
686
- if ((sink == false) && (iterations > 0)) {
687
- std::cerr << "The input was declared invalid.\n";
688
- }
689
- size_t char_count = get_active_implementation()->count_utf8(data, size);
690
- print_summary(result, size, char_count);
691
- }
692
-
693
- void Benchmark::run_validate_ascii_with_errors(
694
- const simdutf::implementation &implementation, size_t iterations) {
695
- const char *data = reinterpret_cast<const char *>(input_data.data());
696
- const size_t size = input_data.size();
697
- volatile bool sink{false};
698
-
699
- auto proc = [&implementation, data, size, &sink]() {
700
- result res = implementation.validate_ascii_with_errors(data, size);
701
- sink = !(res.error);
702
- };
703
-
704
- count_events(proc, iterations); // warming up!
705
- const auto result = count_events(proc, iterations);
706
- if ((sink == false) && (iterations > 0)) {
707
- std::cerr << "The input was declared invalid.\n";
708
- }
709
- size_t char_count = get_active_implementation()->count_utf8(data, size);
710
- print_summary(result, size, char_count);
711
- }
712
-
713
- void Benchmark::run_validate_utf16le(
714
- const simdutf::implementation &implementation, size_t iterations) {
715
- const simdutf::encoding_type bom =
716
- BOM::check_bom(input_data.data(), input_data.size());
717
- const char16_t *data = reinterpret_cast<const char16_t *>(
718
- input_data.data() + BOM::bom_byte_size(bom));
719
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
720
- if (size % 2 != 0) {
721
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
722
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
723
- printf(" Running function on truncated input.\n");
724
- }
725
-
726
- size /= 2;
727
-
728
- volatile bool sink{false};
729
-
730
- auto proc = [&implementation, data, size, &sink]() {
731
- sink = implementation.validate_utf16le(data, size);
732
- };
733
- count_events(proc, iterations); // warming up!
734
- const auto result = count_events(proc, iterations);
735
- if ((sink == false) && (iterations > 0)) {
736
- std::cerr << "The input was declared invalid.\n";
737
- }
738
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
739
- print_summary(result, input_data.size(), char_count);
740
- }
741
-
742
- void Benchmark::run_validate_utf16le_with_errors(
743
- const simdutf::implementation &implementation, size_t iterations) {
744
- const simdutf::encoding_type bom =
745
- BOM::check_bom(input_data.data(), input_data.size());
746
- const char16_t *data = reinterpret_cast<const char16_t *>(
747
- input_data.data() + BOM::bom_byte_size(bom));
748
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
749
- if (size % 2 != 0) {
750
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
751
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
752
- printf(" Running function on truncated input.\n");
753
- }
754
-
755
- size /= 2;
756
-
757
- volatile bool sink{false};
758
-
759
- auto proc = [&implementation, data, size, &sink]() {
760
- result res = implementation.validate_utf16le_with_errors(data, size);
761
- sink = !(res.error);
762
- };
763
- count_events(proc, iterations); // warming up!
764
- const auto result = count_events(proc, iterations);
765
- if ((sink == false) && (iterations > 0)) {
766
- std::cerr << "The input was declared invalid.\n";
767
- }
768
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
769
- print_summary(result, input_data.size(), char_count);
770
- }
771
-
772
- void Benchmark::run_validate_utf32(
773
- const simdutf::implementation &implementation, size_t iterations) {
774
- const simdutf::encoding_type bom =
775
- BOM::check_bom(input_data.data(), input_data.size());
776
- const char32_t *data = reinterpret_cast<const char32_t *>(
777
- input_data.data() + BOM::bom_byte_size(bom));
778
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
779
- if (size % 2 != 0) {
780
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
781
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
782
- printf(" Running function on truncated input.\n");
783
- }
784
-
785
- size /= 4;
786
-
787
- volatile bool sink{false};
788
-
789
- auto proc = [&implementation, data, size, &sink]() {
790
- sink = implementation.validate_utf32(data, size);
791
- };
792
- count_events(proc, iterations); // warming up!
793
- const auto result = count_events(proc, iterations);
794
- if ((sink == false) && (iterations > 0)) {
795
- std::cerr << "The input was declared invalid.\n";
796
- }
797
- size_t char_count = size;
798
- print_summary(result, input_data.size(), char_count);
799
- }
800
-
801
- void Benchmark::run_validate_utf32_with_errors(
802
- const simdutf::implementation &implementation, size_t iterations) {
803
- const simdutf::encoding_type bom =
804
- BOM::check_bom(input_data.data(), input_data.size());
805
- const char32_t *data = reinterpret_cast<const char32_t *>(
806
- input_data.data() + BOM::bom_byte_size(bom));
807
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
808
- if (size % 4 != 0) {
809
- printf(
810
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
811
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
812
- printf(" Running function on truncated input.\n");
813
- }
814
-
815
- size /= 4;
816
-
817
- volatile bool sink{false};
818
-
819
- auto proc = [&implementation, data, size, &sink]() {
820
- result res = implementation.validate_utf32_with_errors(data, size);
821
- sink = !(res.error);
822
- };
823
- count_events(proc, iterations); // warming up!
824
- const auto result = count_events(proc, iterations);
825
- if ((sink == false) && (iterations > 0)) {
826
- std::cerr << "The input was declared invalid.\n";
827
- }
828
- size_t char_count = size;
829
- print_summary(result, input_data.size(), char_count);
830
- }
831
-
832
- void Benchmark::run_convert_latin1_to_utf8(
833
- const simdutf::implementation &implementation, size_t iterations) {
834
- const char *data = reinterpret_cast<const char *>(input_data.data());
835
- const size_t size = input_data.size();
836
- std::unique_ptr<char[]> output_buffer{new char[size * 2]};
837
- volatile size_t sink{0};
838
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
839
- sink =
840
- implementation.convert_latin1_to_utf8(data, size, output_buffer.get());
841
- };
842
- count_events(proc, iterations); // warming up!
843
- const auto result = count_events(proc, iterations);
844
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
845
- std::cerr << "The output is zero which might indicate an error.\n";
846
- }
847
- size_t char_count = size;
848
- print_summary(result, size, char_count);
849
- }
850
-
851
- void Benchmark::run_convert_latin1_to_utf16le(
852
- const simdutf::implementation &implementation, size_t iterations) {
853
- const char *data = reinterpret_cast<const char *>(input_data.data());
854
- const size_t size = input_data.size();
855
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size * 2]};
856
- volatile size_t sink{0};
857
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
858
- sink = implementation.convert_latin1_to_utf16le(data, size,
859
- output_buffer.get());
860
- };
861
- count_events(proc, iterations); // warming up!
862
- const auto result = count_events(proc, iterations);
863
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
864
- std::cerr << "The output is zero which might indicate an error.\n";
865
- }
866
- size_t char_count = size;
867
- print_summary(result, size, char_count);
868
- }
869
-
870
- void Benchmark::run_convert_latin1_to_utf32(
871
- const simdutf::implementation &implementation, size_t iterations) {
872
- const char *data = reinterpret_cast<const char *>(input_data.data());
873
- const size_t size = input_data.size();
874
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
875
- volatile size_t sink{0};
876
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
877
- sink =
878
- implementation.convert_latin1_to_utf32(data, size, output_buffer.get());
879
- };
880
- count_events(proc, iterations); // warming up!
881
- const auto result = count_events(proc, iterations);
882
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
883
- std::cerr << "The output is zero which might indicate an error.\n";
884
- }
885
- size_t char_count = size;
886
- print_summary(result, size, char_count);
887
- }
888
-
889
- void Benchmark::run_utf8_length_from_latin1(
890
- const simdutf::implementation &implementation, size_t iterations) {
891
- const char *data = reinterpret_cast<const char *>(input_data.data());
892
- const size_t size = input_data.size();
893
- volatile size_t sink{0};
894
-
895
- auto proc = [&implementation, data, size, &sink]() {
896
- sink = implementation.utf8_length_from_latin1(data, size);
897
- };
898
- count_events(proc, iterations); // warming up!
899
- const auto result = count_events(proc, iterations);
900
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
901
- std::cerr << "The output is zero which might indicate an error.\n";
902
- }
903
- size_t char_count = get_active_implementation()->count_utf8(data, size);
904
- print_summary(result, size, char_count);
905
- }
906
-
907
- void Benchmark::run_utf8_length_from_utf16le(
908
- const simdutf::implementation &implementation, size_t iterations) {
909
- const char16_t *data = reinterpret_cast<const char16_t *>(input_data.data());
910
- const size_t size = input_data.size() / 2;
911
- volatile size_t sink{0};
912
-
913
- auto proc = [&implementation, data, size, &sink]() {
914
- sink = implementation.utf8_length_from_utf16le(data, size);
915
- };
916
- count_events(proc, iterations); // warming up!
917
- const auto result = count_events(proc, iterations);
918
- print_summary(result, size, size);
919
- }
920
-
921
- void Benchmark::run_utf8_length_from_utf16be(
922
- const simdutf::implementation &implementation, size_t iterations) {
923
- const char16_t *data = reinterpret_cast<const char16_t *>(input_data.data());
924
- const size_t size = input_data.size() / 2;
925
- volatile size_t sink{0};
926
-
927
- auto proc = [&implementation, data, size, &sink]() {
928
- sink = implementation.utf8_length_from_utf16be(data, size);
929
- };
930
- count_events(proc, iterations); // warming up!
931
- const auto result = count_events(proc, iterations);
932
- print_summary(result, size, size);
933
- }
934
-
935
- void Benchmark::run_utf8_length_from_utf16le_with_replacement(
936
- const simdutf::implementation &implementation, size_t iterations) {
937
- const char16_t *data = reinterpret_cast<const char16_t *>(input_data.data());
938
- const size_t size = input_data.size() / 2;
939
- volatile size_t sink{0};
940
-
941
- auto proc = [&implementation, data, size, &sink]() {
942
- auto r =
943
- implementation.utf8_length_from_utf16le_with_replacement(data, size);
944
- sink = r.count;
945
- };
946
- count_events(proc, iterations); // warming up!
947
- const auto result = count_events(proc, iterations);
948
- print_summary(result, size, size);
949
- }
950
-
951
- void Benchmark::run_utf8_length_from_utf16be_with_replacement(
952
- const simdutf::implementation &implementation, size_t iterations) {
953
- const char16_t *data = reinterpret_cast<const char16_t *>(input_data.data());
954
- const size_t size = input_data.size() / 2;
955
- volatile size_t sink{0};
956
-
957
- auto proc = [&implementation, data, size, &sink]() {
958
- auto r =
959
- implementation.utf8_length_from_utf16be_with_replacement(data, size);
960
- sink = r.count;
961
- };
962
- count_events(proc, iterations); // warming up!
963
- const auto result = count_events(proc, iterations);
964
- print_summary(result, size, size);
965
- }
966
-
967
- void Benchmark::run_utf8_length_from_utf32(
968
- const simdutf::implementation &implementation, size_t iterations) {
969
- const char32_t *data = reinterpret_cast<const char32_t *>(input_data.data());
970
- const size_t size = input_data.size() / 4;
971
- volatile size_t sink{0};
972
-
973
- auto proc = [&implementation, data, size, &sink]() {
974
- sink = implementation.utf8_length_from_utf32(data, size);
975
- };
976
- count_events(proc, iterations); // warming up!
977
- const auto result = count_events(proc, iterations);
978
- print_summary(result, size, size);
979
- }
980
-
981
- void Benchmark::run_to_well_formed_utf16le(
982
- const simdutf::implementation &implementation, size_t iterations) {
983
- const char16_t *data = reinterpret_cast<const char16_t *>(input_data.data());
984
- const size_t size = input_data.size() / 2;
985
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
986
- auto proc = [&implementation, data, size, &output_buffer]() {
987
- implementation.to_well_formed_utf16le(data, size, output_buffer.get());
988
- };
989
- count_events(proc, iterations); // warming up!
990
- const auto result = count_events(proc, iterations);
991
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
992
- print_summary(result, input_data.size(), char_count);
993
- }
994
-
995
- void Benchmark::run_utf16_length_from_utf8(
996
- const simdutf::implementation &implementation, size_t iterations) {
997
- const char *data = reinterpret_cast<const char *>(input_data.data());
998
- const size_t size = input_data.size() / 4;
999
- volatile size_t sink{0};
1000
-
1001
- auto proc = [&implementation, data, size, &sink]() {
1002
- sink = implementation.utf16_length_from_utf8(data, size);
1003
- };
1004
- count_events(proc, iterations); // warming up!
1005
- const auto result = count_events(proc, iterations);
1006
- print_summary(result, size, size);
1007
- }
1008
-
1009
- static inline uint32_t portable_popcount(uint64_t v) {
1010
- #ifdef __GNUC__
1011
- return static_cast<uint32_t>(__builtin_popcountll(v));
1012
- #elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 && \
1013
- !defined(_M_ARM64)
1014
- return static_cast<uint32_t>(__popcnt64(static_cast<__int64>(v)));
1015
- #else
1016
- v = v - ((v >> 1) & 0x5555555555555555);
1017
- v = (v & 0x3333333333333333) + ((v >> 2) & 0x3333333333333333);
1018
- v = ((v + (v >> 4)) & 0x0F0F0F0F0F0F0F0F);
1019
- return static_cast<uint32_t>((v * (0x0101010101010101)) >> 56);
1020
- #endif
1021
- }
1022
-
1023
- void Benchmark::run_utf8_length_from_latin1_node(size_t iterations) {
1024
- const char *data = reinterpret_cast<const char *>(input_data.data());
1025
- const size_t size = input_data.size();
1026
- volatile size_t sink{0};
1027
-
1028
- auto proc = [data, size, &sink]() {
1029
- // from https://github.com/nodejs/node/pull/54345
1030
- uint32_t length = size;
1031
- uint32_t result = length;
1032
- uint32_t i = 0;
1033
- const auto length8 = length & ~0x7;
1034
- while (i < length8) {
1035
- // Original PR used std::popcount, but it is not available pre-C++20.
1036
- result += portable_popcount(
1037
- *reinterpret_cast<const uint64_t *>(data + i) & 0x8080808080808080);
1038
- i += 8;
1039
- }
1040
- while (i < length) {
1041
- result += (data[i] >> 7);
1042
- i++;
1043
- }
1044
- sink = result;
1045
- };
1046
- count_events(proc, iterations); // warming up!
1047
- const auto result = count_events(proc, iterations);
1048
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1049
- std::cerr << "The output is zero which might indicate an error.\n";
1050
- }
1051
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1052
- print_summary(result, size, char_count);
1053
- }
1054
-
1055
- void Benchmark::run_convert_utf8_to_latin1(
1056
- const simdutf::implementation &implementation, size_t iterations) {
1057
- const char *data = reinterpret_cast<const char *>(input_data.data());
1058
- const size_t size = input_data.size();
1059
- std::unique_ptr<char[]> output_buffer{new char[size]};
1060
- volatile size_t sink{0};
1061
-
1062
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1063
- sink =
1064
- implementation.convert_utf8_to_latin1(data, size, output_buffer.get());
1065
- };
1066
- count_events(proc, iterations); // warming up!
1067
- const auto result = count_events(proc, iterations);
1068
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1069
- std::cerr << "The output is zero which might indicate an error.\n";
1070
- }
1071
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1072
- print_summary(result, size, char_count);
1073
- }
1074
-
1075
- void Benchmark::run_convert_utf8_to_latin1_with_errors(
1076
- const simdutf::implementation &implementation, size_t iterations) {
1077
- const char *data = reinterpret_cast<const char *>(input_data.data());
1078
- const size_t size = input_data.size();
1079
- std::unique_ptr<char[]> output_buffer{new char[size]};
1080
- volatile bool sink{false};
1081
-
1082
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1083
- result res = implementation.convert_utf8_to_latin1_with_errors(
1084
- data, size, output_buffer.get());
1085
- sink = !(res.error);
1086
- };
1087
- count_events(proc, iterations); // warming up!
1088
- const auto result = count_events(proc, iterations);
1089
- if ((sink == false) && (iterations > 0)) {
1090
- std::cerr << "The input was declared invalid.\n";
1091
- }
1092
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1093
- print_summary(result, size, char_count);
1094
- }
1095
-
1096
- void Benchmark::run_convert_valid_utf8_to_latin1(
1097
- const simdutf::implementation &implementation, size_t iterations) {
1098
- const char *data = reinterpret_cast<const char *>(input_data.data());
1099
- const size_t size = input_data.size();
1100
- std::unique_ptr<char[]> output_buffer{new char[size]};
1101
- volatile size_t sink{0};
1102
-
1103
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1104
- sink = implementation.convert_valid_utf8_to_latin1(data, size,
1105
- output_buffer.get());
1106
- };
1107
- count_events(proc, iterations); // warming up!
1108
- const auto result = count_events(proc, iterations);
1109
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1110
- std::cerr << "The output is zero which might indicate an error.\n";
1111
- }
1112
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1113
- print_summary(result, size, char_count);
1114
- }
1115
-
1116
- void Benchmark::run_convert_utf8_to_utf16le(
1117
- const simdutf::implementation &implementation, size_t iterations) {
1118
- const char *data = reinterpret_cast<const char *>(input_data.data());
1119
- const size_t size = input_data.size();
1120
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
1121
- volatile size_t sink{0};
1122
-
1123
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1124
- sink =
1125
- implementation.convert_utf8_to_utf16le(data, size, output_buffer.get());
1126
- };
1127
- count_events(proc, iterations); // warming up!
1128
- const auto result = count_events(proc, iterations);
1129
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1130
- std::cerr << "The output is zero which might indicate an error.\n";
1131
- }
1132
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1133
- print_summary(result, size, char_count);
1134
- }
1135
-
1136
- void Benchmark::run_convert_utf8_to_utf16le_with_errors(
1137
- const simdutf::implementation &implementation, size_t iterations) {
1138
- const char *data = reinterpret_cast<const char *>(input_data.data());
1139
- const size_t size = input_data.size();
1140
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
1141
- volatile bool sink{false};
1142
-
1143
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1144
- result res = implementation.convert_utf8_to_utf16le_with_errors(
1145
- data, size, output_buffer.get());
1146
- sink = !(res.error);
1147
- };
1148
- count_events(proc, iterations); // warming up!
1149
- const auto result = count_events(proc, iterations);
1150
- if ((sink == false) && (iterations > 0)) {
1151
- std::cerr << "The input was declared invalid.\n";
1152
- }
1153
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1154
- print_summary(result, size, char_count);
1155
- }
1156
-
1157
- void Benchmark::run_convert_utf8_to_utf32(
1158
- const simdutf::implementation &implementation, size_t iterations) {
1159
- const char *data = reinterpret_cast<const char *>(input_data.data());
1160
- const size_t size = input_data.size();
1161
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
1162
- volatile size_t sink{0};
1163
-
1164
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1165
- sink =
1166
- implementation.convert_utf8_to_utf32(data, size, output_buffer.get());
1167
- };
1168
- count_events(proc, iterations); // warming up!
1169
- const auto result = count_events(proc, iterations);
1170
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1171
- std::cerr << "The output is zero which might indicate an error.\n";
1172
- }
1173
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1174
- print_summary(result, size, char_count);
1175
- }
1176
-
1177
- void Benchmark::run_convert_utf8_to_utf32_with_errors(
1178
- const simdutf::implementation &implementation, size_t iterations) {
1179
- const char *data = reinterpret_cast<const char *>(input_data.data());
1180
- const size_t size = input_data.size();
1181
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
1182
- volatile bool sink{false};
1183
-
1184
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1185
- result res = implementation.convert_utf8_to_utf32_with_errors(
1186
- data, size, output_buffer.get());
1187
- sink = !(res.error);
1188
- };
1189
- count_events(proc, iterations); // warming up!
1190
- const auto result = count_events(proc, iterations);
1191
- if ((sink == false) && (iterations > 0)) {
1192
- std::cerr << "The input was declared invalid.\n";
1193
- }
1194
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1195
- print_summary(result, size, char_count);
1196
- }
1197
-
1198
- void Benchmark::run_convert_utf8_to_utf16le_with_dynamic_allocation(
1199
- const simdutf::implementation &implementation, size_t iterations) {
1200
- const char *data = reinterpret_cast<const char *>(input_data.data());
1201
- const size_t size = input_data.size();
1202
- volatile size_t sink{0};
1203
- auto proc = [&implementation, data, size, &sink]() {
1204
- auto dyn_size = implementation.utf16_length_from_utf8(data, size);
1205
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[dyn_size]};
1206
- sink =
1207
- implementation.convert_utf8_to_utf16le(data, size, output_buffer.get());
1208
- };
1209
- count_events(proc, iterations); // warming up!
1210
- const auto result = count_events(proc, iterations);
1211
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1212
- std::cerr << "The output is zero which might indicate an error.\n";
1213
- }
1214
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1215
- print_summary(result, size, char_count);
1216
- }
1217
-
1218
- void Benchmark::run_convert_utf8_to_utf32_with_dynamic_allocation(
1219
- const simdutf::implementation &implementation, size_t iterations) {
1220
- const char *data = reinterpret_cast<const char *>(input_data.data());
1221
- const size_t size = input_data.size();
1222
- volatile size_t sink{0};
1223
- auto proc = [&implementation, data, size, &sink]() {
1224
- auto dyn_size = implementation.utf32_length_from_utf8(data, size);
1225
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[dyn_size]};
1226
- sink =
1227
- implementation.convert_utf8_to_utf32(data, size, output_buffer.get());
1228
- };
1229
- count_events(proc, iterations); // warming up!
1230
- const auto result = count_events(proc, iterations);
1231
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1232
- std::cerr << "The output is zero which might indicate an error.\n";
1233
- }
1234
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1235
- print_summary(result, size, char_count);
1236
- }
1237
-
1238
- #ifdef ICU_AVAILABLE
1239
-
1240
- void Benchmark::run_convert_latin1_to_utf8_icu(size_t iterations) {
1241
- const char *data = reinterpret_cast<const char *>(input_data.data());
1242
- const size_t size = input_data.size();
1243
- volatile size_t sink{0};
1244
-
1245
- // Allocate target buffer
1246
- int32_t targetCapacity = size * 2;
1247
- std::unique_ptr<char[]> target(new char[targetCapacity]);
1248
-
1249
- auto proc = [data, size, &sink, &target, targetCapacity]() {
1250
- UErrorCode status = U_ZERO_ERROR;
1251
-
1252
- // Open converters for source and target encodings
1253
- UConverter *latin1conv = ucnv_open("ISO-8859-1", &status);
1254
- assert(U_SUCCESS(status));
1255
- UConverter *utf8conv = ucnv_open("UTF-8", &status);
1256
- assert(U_SUCCESS(status));
1257
-
1258
- // Pointers for source and target
1259
- const char *source = data;
1260
- const char *sourceLimit = data + size;
1261
- char *targetStart = target.get();
1262
- char *targetLimit = target.get() + targetCapacity;
1263
-
1264
- // Convert from ISO-8859-1 to UTF-8
1265
- ucnv_convertEx(utf8conv, latin1conv, &targetStart, targetLimit, &source,
1266
- sourceLimit, nullptr, nullptr, nullptr, nullptr, true, true,
1267
- &status);
1268
- assert(U_SUCCESS(status));
1269
-
1270
- // Calculate the output size
1271
- sink = targetStart - target.get();
1272
-
1273
- // Clean up
1274
- ucnv_close(utf8conv);
1275
- ucnv_close(latin1conv);
1276
- };
1277
-
1278
- count_events(proc, iterations); // warming up!
1279
- const auto result = count_events(proc, iterations);
1280
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1281
- std::cerr
1282
- << "The output is zero which might indicate a misconfiguration.\n";
1283
- }
1284
- size_t char_count = size;
1285
- std::unique_ptr<char[]> output_buffer{new char[size * 2]};
1286
- size_t expected = get_active_implementation()->convert_latin1_to_utf8(
1287
- data, size, output_buffer.get());
1288
- if (expected != sink) {
1289
- std::cerr << "The number of characters outputted does not match.\n";
1290
- std::cout << "Expected: " << expected << ", Sink: " << sink
1291
- << std::endl; // print values
1292
- }
1293
-
1294
- if (memcmp(target.get(), output_buffer.get(), sink) != 0) {
1295
- std::cerr << "The output data does not match.\n";
1296
- }
1297
-
1298
- print_summary(result, size, char_count);
1299
- }
1300
-
1301
- void Benchmark::run_convert_latin1_to_utf16_icu(size_t iterations) {
1302
- const char *data = reinterpret_cast<const char *>(input_data.data());
1303
- const size_t size = input_data.size();
1304
- volatile size_t sink{0};
1305
-
1306
- // Allocate target buffer outside lambda
1307
- std::unique_ptr<UChar[]> target(new UChar[size * 2]);
1308
-
1309
- auto proc = [data, size, &sink, &target]() {
1310
- UErrorCode status = U_ZERO_ERROR;
1311
-
1312
- // Open converter for source encoding
1313
- UConverter *latin1conv = ucnv_open("ISO-8859-1", &status);
1314
- assert(U_SUCCESS(status));
1315
-
1316
- // Convert from ISO-8859-1 to UTF-16 directly
1317
- int32_t actualTargetSize =
1318
- ucnv_toUChars(latin1conv, target.get(), size * 2, data, size, &status);
1319
- assert(U_SUCCESS(status));
1320
-
1321
- // Calculate the output size in bytes
1322
- sink = actualTargetSize * sizeof(UChar);
1323
-
1324
- // Clean up
1325
- ucnv_close(latin1conv);
1326
- };
1327
-
1328
- count_events(proc, iterations); // warming up!
1329
- const auto result = count_events(proc, iterations);
1330
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1331
- std::cerr
1332
- << "The output is zero which might indicate a misconfiguration.\n";
1333
- }
1334
- size_t char_count = size;
1335
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
1336
- size_t expected = get_active_implementation()->convert_latin1_to_utf16le(
1337
- data, size, output_buffer.get()); // expected char16_t units
1338
- if (2 * expected != sink) {
1339
- std::cerr << "The number of utf16le code units does not match.\n";
1340
- std::cerr << "Expected: " << 2 * expected + 1 << ", Sink: " << sink
1341
- << std::endl; // print values
1342
- }
1343
-
1344
- if (memcmp(target.get(), output_buffer.get(), sink) != 0) {
1345
- std::cerr << "The output data does not match.\n";
1346
- // compare first 20 characters and print their hexadecimal values
1347
- std::cout << "First 20 characters of target data: ";
1348
- for (size_t i = 0; i < 20; i++) {
1349
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1350
- }
1351
- std::cout << "\nFirst 20 characters of output buffer: ";
1352
- for (size_t i = 0; i < 20; i++) {
1353
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1354
- }
1355
-
1356
- // compare last 20 characters and print their hexadecimal values
1357
- size_t num_chars = sink / sizeof(UChar);
1358
- size_t start = num_chars < 20 ? 0 : num_chars - 20;
1359
- std::cout << "\nLast 20 characters of target data: ";
1360
- for (size_t i = start; i < num_chars; i++) {
1361
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1362
- }
1363
- std::cout << "\nLast 20 characters of output buffer: ";
1364
- for (size_t i = start; i < num_chars; i++) {
1365
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1366
- }
1367
- }
1368
-
1369
- print_summary(result, size, char_count);
1370
- }
1371
-
1372
- void Benchmark::run_convert_latin1_to_utf32_icu(size_t iterations) {
1373
- const char *data = reinterpret_cast<const char *>(input_data.data());
1374
- const size_t size = input_data.size();
1375
- volatile size_t sink{0};
1376
-
1377
- std::unique_ptr<char[]> target;
1378
-
1379
- auto proc = [&target, data, size, &sink]() {
1380
- UErrorCode status = U_ZERO_ERROR;
1381
-
1382
- // Open converters for source and target encodings
1383
- UConverter *latin1conv = ucnv_open("ISO-8859-1", &status);
1384
- assert(U_SUCCESS(status));
1385
- UConverter *utf32conv = ucnv_open("UTF-32LE", &status);
1386
- assert(U_SUCCESS(status));
1387
-
1388
- // Allocate target buffer
1389
- int32_t targetCapacity = size * 4; // UTF-32 takes four bytes.
1390
- target.reset(new char[targetCapacity]);
1391
-
1392
- // Pointers for source and target
1393
- const char *source = data;
1394
- const char *sourceLimit = data + size;
1395
- char *targetStart = target.get();
1396
- char *targetLimit = target.get() + targetCapacity;
1397
-
1398
- // Convert from ISO-8859-1 to UTF-32
1399
- ucnv_convertEx(utf32conv, latin1conv, &targetStart, targetLimit, &source,
1400
- sourceLimit, nullptr, nullptr, nullptr, nullptr, true, true,
1401
- &status);
1402
- assert(U_SUCCESS(status));
1403
-
1404
- // Calculate the output size in bytes
1405
- sink = targetStart - target.get();
1406
-
1407
- // Clean up
1408
- ucnv_close(utf32conv);
1409
- ucnv_close(latin1conv);
1410
- };
1411
-
1412
- count_events(proc, iterations); // warming up!
1413
- const auto result = count_events(proc, iterations);
1414
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1415
- std::cerr
1416
- << "The output is zero which might indicate a misconfiguration.\n";
1417
- }
1418
- size_t char_count = size;
1419
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
1420
- size_t expected = get_active_implementation()->convert_latin1_to_utf32(
1421
- data, size, output_buffer.get()); // expected is the # of UTF32 characters
1422
- if (4 * expected != sink) {
1423
- std::cerr
1424
- << "The number of characters outputted does not match.\n"; // each UTF32
1425
- // character
1426
- // takes four
1427
- // bytes
1428
- std::cout << "Expected: " << expected << ", Sink: " << sink
1429
- << std::endl; // print values
1430
- }
1431
-
1432
- if (memcmp(target.get(), output_buffer.get(), sink) != 0) {
1433
- std::cerr << "The output data does not match.\n";
1434
- // compare first 20 characters and print their hexadecimal values
1435
- std::cout << "First 20 characters of target data: ";
1436
- for (size_t i = 0; i < 20; i++) {
1437
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1438
- }
1439
- std::cout << "\nFirst 20 characters of output buffer: ";
1440
- for (size_t i = 0; i < 20; i++) {
1441
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1442
- }
1443
-
1444
- // compare last 20 characters and print their hexadecimal values
1445
- size_t num_chars = sink / sizeof(UChar);
1446
- size_t start = num_chars < 20 ? 0 : num_chars - 20;
1447
- std::cout << "\nLast 20 characters of target data: ";
1448
- for (size_t i = start; i < num_chars; i++) {
1449
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1450
- }
1451
- std::cout << "\nLast 20 characters of output buffer: ";
1452
- for (size_t i = start; i < num_chars; i++) {
1453
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1454
- }
1455
- }
1456
-
1457
- print_summary(result, size, char_count);
1458
- }
1459
-
1460
- void Benchmark::run_convert_utf8_to_latin1_icu(size_t iterations) {
1461
- const char *data = reinterpret_cast<const char *>(input_data.data());
1462
- const size_t size = input_data.size();
1463
- volatile size_t sink{0};
1464
-
1465
- std::unique_ptr<char[]> target;
1466
-
1467
- auto proc = [&target, data, size, &sink]() {
1468
- UErrorCode status = U_ZERO_ERROR;
1469
-
1470
- // Open converters for source and target encodings
1471
- UConverter *utf8conv = ucnv_open("UTF-8", &status);
1472
- assert(U_SUCCESS(status));
1473
- UConverter *latin1conv = ucnv_open("ISO-8859-1", &status);
1474
- assert(U_SUCCESS(status));
1475
-
1476
- // Allocate target buffer
1477
- int32_t targetCapacity = size * 2;
1478
- target.reset(new char[targetCapacity]);
1479
-
1480
- // Pointers for source and target
1481
- const char *source = data;
1482
- const char *sourceLimit = data + size;
1483
- char *targetStart = target.get();
1484
- char *targetLimit = target.get() + targetCapacity;
1485
-
1486
- // Convert from ISO-8859-1 to UTF-8
1487
- ucnv_convertEx(latin1conv, utf8conv, &targetStart, targetLimit, &source,
1488
- sourceLimit, nullptr, nullptr, nullptr, nullptr, true, true,
1489
- &status);
1490
- assert(U_SUCCESS(status));
1491
-
1492
- // Calculate the output size
1493
- sink = targetStart - target.get();
1494
-
1495
- // Clean up
1496
- ucnv_close(utf8conv);
1497
- ucnv_close(latin1conv);
1498
- };
1499
-
1500
- count_events(proc, iterations); // warming up!
1501
- const auto result = count_events(proc, iterations);
1502
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1503
- std::cerr
1504
- << "The output is zero which might indicate a misconfiguration.\n";
1505
- }
1506
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1507
- std::unique_ptr<char[]> output_buffer{new char[size]};
1508
- size_t expected = get_active_implementation()->convert_utf8_to_latin1(
1509
- data, size, output_buffer.get());
1510
- if (expected != sink) {
1511
- std::cerr << "The number of latin1 code units does not match.\n";
1512
- }
1513
-
1514
- if (memcmp(target.get(), output_buffer.get(), sink) != 0) {
1515
- std::cerr << "The output data does not match.\n";
1516
- // compare first 20 characters and print their hexadecimal values
1517
- std::cout << "First 20 characters of target data: ";
1518
- for (size_t i = 0; i < 20; i++) {
1519
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1520
- }
1521
- std::cout << "\nFirst 20 characters of output buffer: ";
1522
- for (size_t i = 0; i < 20; i++) {
1523
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1524
- }
1525
- }
1526
-
1527
- print_summary(result, size, char_count);
1528
- }
1529
-
1530
- void Benchmark::run_convert_utf8_to_utf16_icu(size_t iterations) {
1531
- const char *data = reinterpret_cast<const char *>(input_data.data());
1532
- const size_t size = input_data.size();
1533
- volatile size_t sink{0};
1534
- auto proc = [data, size, &sink]() {
1535
- auto str =
1536
- U_ICU_NAMESPACE::UnicodeString::fromUTF8(std::string_view(data, size));
1537
- sink = str.length();
1538
- };
1539
- count_events(proc, iterations); // warming up!
1540
- const auto result = count_events(proc, iterations);
1541
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1542
- std::cerr
1543
- << "The output is zero which might indicate a misconfiguration.\n";
1544
- }
1545
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1546
- // checking
1547
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
1548
- size_t expected = convert_utf8_to_utf16le(data, size, output_buffer.get());
1549
- if (expected != sink) {
1550
- std::cerr << "The number of UTF-16 code units does not match.\n";
1551
- }
1552
- print_summary(result, size, char_count);
1553
- }
1554
- void Benchmark::run_convert_utf16_to_utf8_icu(size_t iterations) {
1555
- const simdutf::encoding_type bom =
1556
- BOM::check_bom(input_data.data(), input_data.size());
1557
- const char16_t *data = reinterpret_cast<const char16_t *>(
1558
- input_data.data() + BOM::bom_byte_size(bom));
1559
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
1560
- if (size % 2 != 0) {
1561
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
1562
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
1563
- printf(" Running function on truncated input.\n");
1564
- }
1565
- size /= 2;
1566
- volatile size_t sink{0};
1567
-
1568
- auto proc = [data, size, &sink]() {
1569
- U_ICU_NAMESPACE::UnicodeString str(data, size);
1570
- std::string out;
1571
- out = str.toUTF8String(out);
1572
- sink = out.size();
1573
- };
1574
- count_events(proc, iterations); // warming up!
1575
- const auto result = count_events(proc, iterations);
1576
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1577
- std::cerr << "The output is zero which might indicate an error.\n";
1578
- }
1579
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
1580
- print_summary(result, input_data.size(), char_count);
1581
- }
1582
-
1583
- void Benchmark::run_convert_utf16_to_latin1_icu(size_t iterations) {
1584
- const simdutf::encoding_type bom =
1585
- BOM::check_bom(input_data.data(), input_data.size());
1586
- const char16_t *data = reinterpret_cast<const char16_t *>(
1587
- input_data.data() + BOM::bom_byte_size(bom));
1588
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
1589
- if (size % 2 != 0) {
1590
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
1591
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
1592
- printf(" Running function on truncated input.\n");
1593
- }
1594
- size /= 2;
1595
- volatile size_t sink{0};
1596
-
1597
- std::unique_ptr<char[]> target;
1598
-
1599
- auto proc = [&target, data, size, &sink]() {
1600
- UErrorCode status = U_ZERO_ERROR;
1601
- UConverter *conv =
1602
- ucnv_open("ISO-8859-1", &status); // open a converter for ISO-8859-1
1603
- assert(U_SUCCESS(status));
1604
-
1605
- int32_t targetCapacity = size; // adjust as needed
1606
- target.reset(new char[targetCapacity]);
1607
- char *targetStart = target.get();
1608
-
1609
- sink =
1610
- ucnv_fromUChars(conv, targetStart, targetCapacity,
1611
- reinterpret_cast<const UChar *>(data), size, &status);
1612
- assert(U_SUCCESS(status));
1613
-
1614
- // Clean up
1615
- ucnv_close(conv);
1616
- };
1617
-
1618
- count_events(proc, iterations); // warming up!
1619
- const auto result = count_events(proc, iterations);
1620
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1621
- std::cerr
1622
- << "The output is zero which might indicate a misconfiguration.\n";
1623
- }
1624
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
1625
- std::unique_ptr<char[]> output_buffer{new char[size]};
1626
- size_t expected = get_active_implementation()->convert_utf16le_to_latin1(
1627
- data, size, output_buffer.get());
1628
- if (expected != sink) {
1629
- std::cerr << "The number of expected bytes does not match.\n";
1630
- std::cout << "Expected: " << expected << ", Sink: " << sink
1631
- << std::endl; // print values
1632
- }
1633
-
1634
- if (memcmp(target.get(), output_buffer.get(), sink) != 0) {
1635
- std::cerr << "The output data does not match.\n";
1636
- // compare first 20 characters and print their hexadecimal values
1637
- std::cout << "First 20 characters of target data: ";
1638
- for (size_t i = 0; i < 20; i++) {
1639
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1640
- }
1641
- std::cout << "\nFirst 20 characters of output buffer: ";
1642
- for (size_t i = 0; i < 20; i++) {
1643
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1644
- }
1645
- }
1646
-
1647
- print_summary(result, input_data.size(), char_count);
1648
- }
1649
-
1650
- void Benchmark::run_convert_utf32_to_latin1_icu(size_t iterations) {
1651
- const simdutf::encoding_type bom =
1652
- BOM::check_bom(input_data.data(), input_data.size());
1653
- const char32_t *data = reinterpret_cast<const char32_t *>(
1654
- input_data.data() + BOM::bom_byte_size(bom));
1655
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
1656
- if (size % 4 != 0) {
1657
- printf(
1658
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
1659
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
1660
- printf(" Running function on truncated input.\n");
1661
- }
1662
-
1663
- size /= 4;
1664
- volatile size_t sink{0};
1665
- std::unique_ptr<char[]> target;
1666
-
1667
- auto proc = [&target, data, size, &sink]() {
1668
- UErrorCode status = U_ZERO_ERROR;
1669
-
1670
- UConverter *utf32conv =
1671
- ucnv_open("UTF-32LE", &status); // create a UTF-32 converter
1672
- assert(U_SUCCESS(status));
1673
-
1674
- UConverter *latin1conv =
1675
- ucnv_open("ISO-8859-1", &status); // create a Latin1 converter
1676
- assert(U_SUCCESS(status));
1677
-
1678
- int32_t targetCapacity = size; // adjust as needed
1679
- target.reset(new char[targetCapacity]);
1680
- char *targetStart = target.get();
1681
-
1682
- const char *sourceStart = reinterpret_cast<const char *>(data);
1683
- const char *sourceEnd = sourceStart + size * sizeof(char32_t);
1684
-
1685
- // Convert from UTF-32 to Latin1
1686
- ucnv_convertEx(latin1conv, utf32conv, &targetStart,
1687
- targetStart + targetCapacity, &sourceStart, sourceEnd,
1688
- nullptr, nullptr, nullptr, nullptr, true, true, &status);
1689
- assert(U_SUCCESS(status));
1690
-
1691
- // Calculate the output size
1692
- sink = targetStart - target.get();
1693
-
1694
- // Clean up
1695
- ucnv_close(utf32conv);
1696
- ucnv_close(latin1conv);
1697
- };
1698
-
1699
- count_events(proc, iterations); // warming up!
1700
- const auto result = count_events(proc, iterations);
1701
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1702
- std::cerr
1703
- << "The output is zero which might indicate a misconfiguration.\n";
1704
- }
1705
- size_t char_count = size;
1706
- std::unique_ptr<char[]> output_buffer{new char[size]};
1707
- size_t expected = get_active_implementation()->convert_utf32_to_latin1(
1708
- data, size, output_buffer.get());
1709
- if (expected != sink) {
1710
- std::cerr << "The number of expected bytes does not match.\n";
1711
- std::cout << "Expected: " << expected << ", Sink: " << sink
1712
- << std::endl; // print values
1713
- }
1714
-
1715
- if (memcmp(target.get(), output_buffer.get(), sink) != 0) {
1716
- std::cerr << "The output data does not match.\n";
1717
- // compare first 20 characters and print their hexadecimal values
1718
- std::cout << "First 20 characters of target data: ";
1719
- for (size_t i = 0; i < 20; i++) {
1720
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1721
- }
1722
- std::cout << "\nFirst 20 characters of output buffer: ";
1723
- for (size_t i = 0; i < 20; i++) {
1724
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1725
- }
1726
- }
1727
-
1728
- print_summary(result, input_data.size(), char_count);
1729
- }
1730
-
1731
- #endif
1732
-
1733
- #ifdef ICONV_AVAILABLE
1734
- void Benchmark::run_convert_latin1_to_utf8_iconv(size_t iterations) {
1735
- iconv_t cv = iconv_open("UTF-8", "ISO-8859-1");
1736
- if (cv == (iconv_t)(-1)) {
1737
- fprintf(stderr,
1738
- "[iconv] cannot initialize ISO-8859-1 to UTF-8 converter\n");
1739
- return;
1740
- }
1741
- char *data = reinterpret_cast<char *>(input_data.data());
1742
- const size_t size = input_data.size();
1743
- std::unique_ptr<char[]> output_buffer{new char[size * 2]}; // 2 for safety
1744
- volatile size_t sink{0};
1745
- auto proc = [&cv, data, size, &output_buffer, &sink]() {
1746
- size_t inbytes = size;
1747
- size_t outbytes = sizeof(uint8_t) * size * 2;
1748
- #ifdef WINICONV_CONST
1749
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
1750
- #else
1751
- char *inptr = data;
1752
- #endif
1753
- char *outptr = reinterpret_cast<char *>(output_buffer.get());
1754
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
1755
- if (result == static_cast<size_t>(-1)) {
1756
- sink = 0;
1757
- } else {
1758
- sink = (sizeof(uint8_t) * size - outbytes) / sizeof(char);
1759
- }
1760
- };
1761
- count_events(proc, iterations); // warming up!
1762
- const auto result = count_events(proc, iterations);
1763
- iconv_close(cv);
1764
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1765
- std::cerr << "The output is zero which might indicate an error.\n";
1766
- }
1767
- size_t char_count = size;
1768
- print_summary(result, size, char_count);
1769
- }
1770
-
1771
- void Benchmark::run_convert_latin1_to_utf16_iconv(size_t iterations) {
1772
- iconv_t cv = iconv_open("UTF-16", "ISO-8859-1");
1773
- if (cv == (iconv_t)(-1)) {
1774
- fprintf(stderr,
1775
- "[iconv] cannot initialize ISO-8859-1 to UTF-16 converter\n");
1776
- return;
1777
- }
1778
- char *data = reinterpret_cast<char *>(input_data.data());
1779
- const size_t size = input_data.size();
1780
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
1781
- volatile size_t sink{0};
1782
- auto proc = [&cv, data, size, &output_buffer, &sink]() {
1783
- size_t inbytes = size;
1784
- size_t outbytes = sizeof(uint16_t) * size;
1785
- #ifdef WINICONV_CONST
1786
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
1787
- #else
1788
- char *inptr = data;
1789
- #endif
1790
- char *outptr = reinterpret_cast<char *>(output_buffer.get());
1791
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
1792
- if (result == static_cast<size_t>(-1)) {
1793
- sink = 0;
1794
- } else {
1795
- sink = (sizeof(uint16_t) * size - outbytes) / sizeof(char);
1796
- }
1797
- };
1798
- count_events(proc, iterations); // warming up!
1799
- const auto result = count_events(proc, iterations);
1800
- iconv_close(cv);
1801
- size_t char_count = size;
1802
- print_summary(result, size, char_count);
1803
- }
1804
-
1805
- void Benchmark::run_convert_latin1_to_utf32_iconv(size_t iterations) {
1806
- iconv_t cv = iconv_open("UTF-32LE", "ISO-8859-1");
1807
- if (cv == (iconv_t)(-1)) {
1808
- fprintf(stderr,
1809
- "[iconv] cannot initialize ISO-8859-1 to UTF-32 converter\n");
1810
- return;
1811
- }
1812
- char *data = reinterpret_cast<char *>(input_data.data());
1813
- const size_t size = input_data.size();
1814
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
1815
- volatile size_t sink{0};
1816
- auto proc = [&cv, data, size, &output_buffer, &sink]() {
1817
- size_t inbytes = size;
1818
- size_t outbytes = sizeof(uint32_t) * size;
1819
- #ifdef WINICONV_CONST
1820
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
1821
- #else
1822
- char *inptr = data;
1823
- #endif
1824
- char *outptr = reinterpret_cast<char *>(output_buffer.get());
1825
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
1826
- if (result == static_cast<size_t>(-1)) {
1827
- sink = 0;
1828
- } else {
1829
- sink = (sizeof(uint32_t) * size - outbytes) / sizeof(char);
1830
- ;
1831
- }
1832
- };
1833
- count_events(proc, iterations); // warming up!
1834
- const auto result = count_events(proc, iterations);
1835
- iconv_close(cv);
1836
- size_t char_count = size;
1837
- print_summary(result, size, char_count);
1838
- }
1839
-
1840
- void Benchmark::run_convert_utf8_to_latin1_iconv(size_t iterations) {
1841
- iconv_t cv = iconv_open("ISO-8859-1", "UTF-8");
1842
- if (cv == (iconv_t)(-1)) {
1843
- fprintf(stderr, "[iconv] cannot initialize UTF-8 to Latin1 converter\n");
1844
- return;
1845
- }
1846
- char *data = reinterpret_cast<char *>(input_data.data());
1847
- const size_t size = input_data.size();
1848
- std::unique_ptr<char[]> output_buffer{new char[size]};
1849
- volatile size_t sink{0};
1850
-
1851
- auto proc = [&cv, data, size, &output_buffer, &sink]() {
1852
- size_t inbytes = size;
1853
- size_t outbytes = sizeof(uint8_t) * size;
1854
- // win-iconv includes WINICONV_CONST in its function signatures
1855
- // https://github.com/simdutf/simdutf/pull/178
1856
- #ifdef WINICONV_CONST
1857
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
1858
- #else
1859
- char *inptr = data;
1860
- #endif
1861
- char *outptr = reinterpret_cast<char *>(output_buffer.get());
1862
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
1863
- if (result == static_cast<size_t>(-1)) {
1864
- sink = 0;
1865
- } else {
1866
- sink = (sizeof(uint8_t) * size - outbytes) / sizeof(char);
1867
- ;
1868
- }
1869
- };
1870
- count_events(proc, iterations); // warming up!
1871
- const auto result = count_events(proc, iterations);
1872
- iconv_close(cv);
1873
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1874
- std::cerr << "The output is zero which might indicate an error.\n";
1875
- }
1876
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1877
- print_summary(result, size, char_count);
1878
- }
1879
-
1880
- void Benchmark::run_convert_utf8_to_utf16_iconv(size_t iterations) {
1881
- iconv_t cv = iconv_open("UTF-16LE", "UTF-8");
1882
- if (cv == (iconv_t)(-1)) {
1883
- fprintf(stderr, "[iconv] cannot initialize UTF-8 to UTF-16LE converter\n");
1884
- return;
1885
- }
1886
- char *data = reinterpret_cast<char *>(input_data.data());
1887
- const size_t size = input_data.size();
1888
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
1889
- volatile size_t sink{0};
1890
-
1891
- auto proc = [&cv, data, size, &output_buffer, &sink]() {
1892
- size_t inbytes = size;
1893
- size_t outbytes = sizeof(uint16_t) * size;
1894
- // win-iconv includes WINICONV_CONST in its function signatures
1895
- // https://github.com/simdutf/simdutf/pull/178
1896
- #ifdef WINICONV_CONST
1897
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
1898
- #else
1899
- char *inptr = data;
1900
- #endif
1901
- char *outptr = reinterpret_cast<char *>(output_buffer.get());
1902
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
1903
- if (result == static_cast<size_t>(-1)) {
1904
- sink = 0;
1905
- } else {
1906
- sink = (sizeof(uint16_t) * size - outbytes) / sizeof(char);
1907
- ;
1908
- }
1909
- };
1910
- count_events(proc, iterations); // warming up!
1911
- const auto result = count_events(proc, iterations);
1912
- iconv_close(cv);
1913
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1914
- std::cerr << "The output is zero which might indicate an error.\n";
1915
- }
1916
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1917
- print_summary(result, size, char_count);
1918
- }
1919
-
1920
- void Benchmark::run_convert_utf16_to_latin1_iconv(size_t iterations) {
1921
- iconv_t cv = iconv_open("ISO-8859-1", "UTF-16LE");
1922
- if (cv == (iconv_t)(-1)) {
1923
- fprintf(stderr,
1924
- "[iconv] cannot initialize the UTF-16LE to ISO-8859-1 converter\n");
1925
- return;
1926
- }
1927
- const simdutf::encoding_type bom =
1928
- BOM::check_bom(input_data.data(), input_data.size());
1929
- char16_t *data =
1930
- reinterpret_cast<char16_t *>(input_data.data() + BOM::bom_byte_size(bom));
1931
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
1932
- if (size % 2 != 0) {
1933
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
1934
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
1935
- printf(" Running function on truncated input.\n");
1936
- }
1937
-
1938
- size /= 2;
1939
-
1940
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
1941
- // yields 4 bytes,
1942
- // thus we're making safe assumption that each 16-bit word will be
1943
- // expanded to four bytes.
1944
- std::unique_ptr<char[]> output_buffer{new char[size]};
1945
-
1946
- volatile size_t sink{0};
1947
-
1948
- auto proc = [cv, data, size, &output_buffer, &sink]() {
1949
- size_t inbytes = sizeof(uint16_t) * size;
1950
- size_t outbytes = size;
1951
- #ifdef WINICONV_CONST
1952
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
1953
- #else
1954
- char *inptr = reinterpret_cast<char *>(data);
1955
- #endif
1956
- char *outptr = output_buffer.get();
1957
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
1958
- if (result == static_cast<size_t>(-1)) {
1959
- sink = 0;
1960
- } else {
1961
- sink = (size - outbytes) / sizeof(char16_t);
1962
- }
1963
- };
1964
- count_events(proc, iterations); // warming up!
1965
- const auto result = count_events(proc, iterations);
1966
- iconv_close(cv);
1967
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1968
- std::cerr << "The output is zero which might indicate an error.\n";
1969
- }
1970
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
1971
- print_summary(result, input_data.size(), char_count);
1972
- }
1973
-
1974
- void Benchmark::run_convert_utf16_to_utf8_iconv(size_t iterations) {
1975
- iconv_t cv = iconv_open("UTF-8", "UTF-16LE");
1976
- if (cv == (iconv_t)(-1)) {
1977
- fprintf(stderr,
1978
- "[iconv] cannot initialize the UTF-16LE to UTF-8 converter\n");
1979
- return;
1980
- }
1981
- const simdutf::encoding_type bom =
1982
- BOM::check_bom(input_data.data(), input_data.size());
1983
- char16_t *data =
1984
- reinterpret_cast<char16_t *>(input_data.data() + BOM::bom_byte_size(bom));
1985
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
1986
- if (size % 2 != 0) {
1987
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
1988
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
1989
- printf(" Running function on truncated input.\n");
1990
- }
1991
-
1992
- size /= 2;
1993
-
1994
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
1995
- // yields 4 bytes,
1996
- // thus we're making safe assumption that each 16-bit word will be
1997
- // expanded to four bytes.
1998
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
1999
-
2000
- volatile size_t sink{0};
2001
-
2002
- auto proc = [cv, data, size, &output_buffer, &sink]() {
2003
- size_t inbytes = sizeof(uint16_t) * size;
2004
- size_t outbytes = 4 * size;
2005
- #ifdef WINICONV_CONST
2006
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
2007
- #else
2008
- char *inptr = reinterpret_cast<char *>(data);
2009
- #endif
2010
- char *outptr = output_buffer.get();
2011
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
2012
- if (result == static_cast<size_t>(-1)) {
2013
- sink = 0;
2014
- } else {
2015
- sink = (4 * size - outbytes) / sizeof(char16_t);
2016
- }
2017
- };
2018
- count_events(proc, iterations); // warming up!
2019
- const auto result = count_events(proc, iterations);
2020
- iconv_close(cv);
2021
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2022
- std::cerr << "The output is zero which might indicate an error.\n";
2023
- }
2024
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2025
- print_summary(result, input_data.size(), char_count);
2026
- }
2027
-
2028
- void Benchmark::run_convert_utf32_to_latin1_iconv(size_t iterations) {
2029
- iconv_t cv = iconv_open("ISO-8859-1", "UTF-32LE");
2030
- if (cv == (iconv_t)(-1)) {
2031
- fprintf(stderr,
2032
- "[iconv] cannot initialize the UTF-32 to ISO-8859-1 converter\n");
2033
- return;
2034
- }
2035
- const simdutf::encoding_type bom =
2036
- BOM::check_bom(input_data.data(), input_data.size());
2037
- char32_t *data =
2038
- reinterpret_cast<char32_t *>(input_data.data() + BOM::bom_byte_size(bom));
2039
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2040
- if (size % 4 != 0) {
2041
- printf(
2042
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
2043
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2044
- printf(" Running function on truncated input.\n");
2045
- }
2046
-
2047
- size /= 4;
2048
-
2049
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
2050
- // yields 4 bytes,
2051
- // thus we're making safe assumption that each 16-bit word will be
2052
- // expanded to four bytes.
2053
- std::unique_ptr<char[]> output_buffer{new char[size]};
2054
-
2055
- volatile size_t sink{0};
2056
-
2057
- auto proc = [cv, data, size, &output_buffer, &sink]() {
2058
- size_t inbytes = sizeof(uint32_t) * size;
2059
- size_t outbytes = size;
2060
- #ifdef WINICONV_CONST
2061
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
2062
- #else
2063
- char *inptr = reinterpret_cast<char *>(data);
2064
- #endif
2065
- char *outptr = output_buffer.get();
2066
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
2067
- if (result == static_cast<size_t>(-1)) {
2068
- sink = 0;
2069
- abort();
2070
- } else {
2071
- sink = (size - outbytes) / sizeof(char32_t);
2072
- }
2073
- };
2074
- count_events(proc, iterations); // warming up!
2075
- const auto result = count_events(proc, iterations);
2076
- iconv_close(cv);
2077
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2078
- std::cerr << "The output is zero which might indicate an error.\n";
2079
- }
2080
- size_t char_count = size;
2081
- print_summary(result, input_data.size(), char_count);
2082
- }
2083
- #endif
2084
-
2085
- #ifdef INOUE2008
2086
- void Benchmark::run_convert_valid_utf8_to_utf16_inoue2008(size_t iterations) {
2087
- // Inoue2008 is only up to 3-byte UTF8 sequence.
2088
- for (uint8_t c : input_data) {
2089
- if (c >= 0b11110000) {
2090
- std::cerr << "Warning: Inoue 2008 does not support 4-byte inputs!"
2091
- << std::endl;
2092
- break;
2093
- }
2094
- }
2095
- // This is currently minimally tested. It is possible that the transcoding
2096
- // could be wrong. It is also unsafe: it could fail in disastrous ways if the
2097
- // input is adversarial.
2098
- const char *data = reinterpret_cast<const char *>(input_data.data());
2099
- const size_t size = input_data.size();
2100
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
2101
- volatile size_t sink{0};
2102
- auto proc = [data, size, &output_buffer, &sink]() {
2103
- sink = inoue2008::convert_valid(data, size, output_buffer.get());
2104
- };
2105
- count_events(proc, iterations); // warming up!
2106
- const auto result = count_events(proc, iterations);
2107
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2108
- std::cerr
2109
- << "The output is zero which might indicate a misconfiguration.\n";
2110
- }
2111
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2112
- print_summary(result, size, char_count);
2113
- }
2114
- #endif
2115
- /**
2116
- * Bjoern Hoehrmann
2117
- * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
2118
- */
2119
- void Benchmark::run_convert_utf8_to_utf16_hoehrmann(size_t iterations) {
2120
- uint8_t const *data = input_data.data();
2121
- const size_t size = input_data.size();
2122
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
2123
- volatile size_t sink{0};
2124
- auto proc = [data, size, &output_buffer, &sink]() {
2125
- sink = hoehrmann::toUtf16(data, size, output_buffer.get());
2126
- };
2127
- count_events(proc, iterations); // warming up!
2128
- const auto result = count_events(proc, iterations);
2129
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2130
- std::cerr << "The output is zero which might indicate an error.\n";
2131
- }
2132
- size_t char_count = get_active_implementation()->count_utf8(
2133
- reinterpret_cast<const char *>(data), size);
2134
- print_summary(result, size, char_count);
2135
- }
2136
- /**
2137
- * Bjoern Hoehrmann
2138
- * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
2139
- */
2140
- void Benchmark::run_convert_utf8_to_utf32_hoehrmann(size_t iterations) {
2141
- uint8_t const *data = input_data.data();
2142
- const size_t size = input_data.size();
2143
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
2144
- volatile size_t sink{0};
2145
- auto proc = [data, size, &output_buffer, &sink]() {
2146
- sink = hoehrmann::toUtf32(data, size, output_buffer.get());
2147
- };
2148
- count_events(proc, iterations); // warming up!
2149
- const auto result = count_events(proc, iterations);
2150
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2151
- std::cerr << "The output is zero which might indicate an error.\n";
2152
- }
2153
- size_t char_count = get_active_implementation()->count_utf8(
2154
- reinterpret_cast<const char *>(data), size);
2155
- print_summary(result, size, char_count);
2156
- }
2157
-
2158
- #ifdef __x86_64__
2159
- /**
2160
- * utf8lut: Vectorized UTF-8 converter.
2161
- * by stgatilov (2019)
2162
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2163
- */
2164
- void Benchmark::run_convert_utf16_to_utf8_utf8lut(size_t iterations) {
2165
- const simdutf::encoding_type bom =
2166
- BOM::check_bom(input_data.data(), input_data.size());
2167
- const char16_t *data = reinterpret_cast<const char16_t *>(
2168
- input_data.data() + BOM::bom_byte_size(bom));
2169
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2170
- if (size % 2 != 0) {
2171
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2172
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2173
- printf(" Running function on truncated input.\n");
2174
- }
2175
-
2176
- size /= 2;
2177
-
2178
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
2179
- // yields 4 bytes,
2180
- // thus we're making safe assumption that each 16-bit word will be
2181
- // expanded to four bytes.
2182
- // utf8lut requires an extra 16 bytes of padding.
2183
- std::unique_ptr<char[]> output_buffer{new char[size * 4 + 16]};
2184
-
2185
- volatile size_t sink{0};
2186
-
2187
- auto proc = [data, size, &output_buffer, &sink]() {
2188
- std::unique_ptr<BaseBufferProcessor> processor(
2189
- ProcessorSelector<dfUtf16, dfUtf8>::WithOptions<cmValidate>::Create());
2190
- ConversionResult result = ConvertInMemory(
2191
- *processor, reinterpret_cast<const char *>(data), 2 * size,
2192
- reinterpret_cast<char *>(output_buffer.get()), size * 4 + 16);
2193
- if (result.status != 0) {
2194
- sink = 0;
2195
- } else {
2196
- sink = result.outputSize;
2197
- }
2198
- };
2199
- count_events(proc, iterations); // warming up!
2200
- const auto result = count_events(proc, iterations);
2201
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2202
- std::cerr << "The output is zero which might indicate an error.\n";
2203
- }
2204
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2205
- print_summary(result, input_data.size(), char_count);
2206
- }
2207
- /**
2208
- * utf8lut: Vectorized UTF-8 converter.
2209
- * by stgatilov (2019)
2210
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2211
- */
2212
- void Benchmark::run_convert_valid_utf16_to_utf8_utf8lut(size_t iterations) {
2213
- const simdutf::encoding_type bom =
2214
- BOM::check_bom(input_data.data(), input_data.size());
2215
- const char16_t *data = reinterpret_cast<const char16_t *>(
2216
- input_data.data() + BOM::bom_byte_size(bom));
2217
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2218
- if (size % 2 != 0) {
2219
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2220
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2221
- printf(" Running function on truncated input.\n");
2222
- }
2223
-
2224
- size /= 2;
2225
-
2226
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
2227
- // yields 4 bytes,
2228
- // thus we're making safe assumption that each 16-bit word will be
2229
- // expanded to four bytes.
2230
- // utf8lut requires an extra 16 bytes of padding.
2231
- std::unique_ptr<char[]> output_buffer{new char[size * 4 + 16]};
2232
-
2233
- volatile size_t sink{0};
2234
-
2235
- auto proc = [data, size, &output_buffer, &sink]() {
2236
- std::unique_ptr<BaseBufferProcessor> processor(
2237
- ProcessorSelector<dfUtf16, dfUtf8>::WithOptions<cmFull>::Create());
2238
- ConversionResult result = ConvertInMemory(
2239
- *processor, reinterpret_cast<const char *>(data), 2 * size,
2240
- reinterpret_cast<char *>(output_buffer.get()), size * 4 + 16);
2241
- if (result.status != 0) {
2242
- sink = 0;
2243
- } else {
2244
- sink = result.outputSize;
2245
- }
2246
- };
2247
- count_events(proc, iterations); // warming up!
2248
- const auto result = count_events(proc, iterations);
2249
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2250
- std::cerr << "The output is zero which might indicate an error.\n";
2251
- }
2252
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2253
- print_summary(result, input_data.size(), char_count);
2254
- }
2255
- /**
2256
- * utf8lut: Vectorized UTF-8 converter.
2257
- * by stgatilov (2019)
2258
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2259
- */
2260
- void Benchmark::run_convert_utf8_to_utf16_utf8lut(size_t iterations) {
2261
- const char *data = reinterpret_cast<const char *>(input_data.data());
2262
- const size_t size = input_data.size();
2263
- // utf8lut requires an extra 8 bytes of padding.
2264
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size * 2 + 8]};
2265
- volatile size_t sink{0};
2266
- auto proc = [data, size, &output_buffer, &sink]() {
2267
- std::unique_ptr<BaseBufferProcessor> processor(
2268
- ProcessorSelector<dfUtf8, dfUtf16>::WithOptions<cmValidate>::Create());
2269
- ConversionResult result = ConvertInMemory(
2270
- *processor, data, size, reinterpret_cast<char *>(output_buffer.get()),
2271
- size * 2 + 16);
2272
- if (result.status != 0) {
2273
- sink = 0;
2274
- } else {
2275
- sink = result.outputSize / 2;
2276
- }
2277
- };
2278
- count_events(proc, iterations); // warming up!
2279
- const auto result = count_events(proc, iterations);
2280
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2281
- std::cerr
2282
- << "The output is zero which might indicate a misconfiguration.\n";
2283
- }
2284
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2285
- print_summary(result, size, char_count);
2286
- }
2287
- /**
2288
- * utf8lut: Vectorized UTF-8 converter.
2289
- * by stgatilov (2019)
2290
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2291
- */
2292
- void Benchmark::run_convert_utf8_to_utf32_utf8lut(size_t iterations) {
2293
- const char *data = reinterpret_cast<const char *>(input_data.data());
2294
- const size_t size = input_data.size();
2295
-
2296
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size + 4]};
2297
- volatile size_t sink{0};
2298
- auto proc = [data, size, &output_buffer, &sink]() {
2299
- std::unique_ptr<BaseBufferProcessor> processor(
2300
- ProcessorSelector<dfUtf8, dfUtf32>::WithOptions<cmValidate>::Create());
2301
- ConversionResult result = ConvertInMemory(
2302
- *processor, data, size, reinterpret_cast<char *>(output_buffer.get()),
2303
- size * 4 + 16);
2304
- if (result.status != 0) {
2305
- sink = 0;
2306
- } else {
2307
- sink = result.outputSize / 2;
2308
- }
2309
- };
2310
- count_events(proc, iterations); // warming up!
2311
- const auto result = count_events(proc, iterations);
2312
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2313
- std::cerr
2314
- << "The output is zero which might indicate a misconfiguration.\n";
2315
- }
2316
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2317
- print_summary(result, size, char_count);
2318
- }
2319
- /**
2320
- * utf8lut: Vectorized UTF-8 converter.
2321
- * by stgatilov (2019)
2322
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2323
- */
2324
- void Benchmark::run_convert_valid_utf8_to_utf16_utf8lut(size_t iterations) {
2325
- const char *data = reinterpret_cast<const char *>(input_data.data());
2326
- const size_t size = input_data.size();
2327
- // utf8lut requires an extra 8 bytes of padding.
2328
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size * 2 + 8]};
2329
- volatile size_t sink{0};
2330
- auto proc = [data, size, &output_buffer, &sink]() {
2331
- std::unique_ptr<BaseBufferProcessor> processor(
2332
- ProcessorSelector<dfUtf8, dfUtf16>::WithOptions<cmFull>::Create());
2333
- ConversionResult result = ConvertInMemory(
2334
- *processor, data, size, reinterpret_cast<char *>(output_buffer.get()),
2335
- size * 2 + 16);
2336
- if (result.status != 0) {
2337
- sink = 0;
2338
- } else {
2339
- sink = result.outputSize / 2;
2340
- }
2341
- };
2342
- count_events(proc, iterations); // warming up!
2343
- const auto result = count_events(proc, iterations);
2344
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2345
- std::cerr
2346
- << "The output is zero which might indicate a misconfiguration.\n";
2347
- }
2348
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2349
- print_summary(result, size, char_count);
2350
- }
2351
-
2352
- /**
2353
- * utf8lut: Vectorized UTF-8 converter.
2354
- * by stgatilov (2019)
2355
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2356
- */
2357
- void Benchmark::run_convert_utf32_to_utf8_utf8lut(size_t iterations) {
2358
- const simdutf::encoding_type bom =
2359
- BOM::check_bom(input_data.data(), input_data.size());
2360
- const char32_t *data = reinterpret_cast<const char32_t *>(
2361
- input_data.data() + BOM::bom_byte_size(bom));
2362
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2363
- if (size % 4 != 0) {
2364
- printf(
2365
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
2366
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2367
- printf(" Running function on truncated input.\n");
2368
- }
2369
-
2370
- size /= 4;
2371
-
2372
- // Note: a single 32-bit word can yield up to four UTF-8 bytes. We are
2373
- // making a safe assumption that each 32-bit word will yield four
2374
- // UTF-8 bytes.
2375
- // utf8lut requires an extra 16 bytes of padding.
2376
- std::unique_ptr<char[]> output_buffer{new char[size * 4 + 16]};
2377
-
2378
- volatile size_t sink{0};
2379
-
2380
- auto proc = [data, size, &output_buffer, &sink]() {
2381
- std::unique_ptr<BaseBufferProcessor> processor(
2382
- ProcessorSelector<dfUtf32, dfUtf8>::WithOptions<cmValidate>::Create());
2383
- ConversionResult result = ConvertInMemory(
2384
- *processor, reinterpret_cast<const char *>(data), 4 * size,
2385
- reinterpret_cast<char *>(output_buffer.get()), size * 4 + 16);
2386
- if (result.status != 0) {
2387
- sink = 0;
2388
- } else {
2389
- sink = result.outputSize;
2390
- }
2391
- };
2392
- count_events(proc, iterations); // warming up!
2393
- const auto result = count_events(proc, iterations);
2394
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2395
- std::cerr << "The output is zero which might indicate an error.\n";
2396
- }
2397
- size_t char_count = size;
2398
- print_summary(result, input_data.size(), char_count);
2399
- }
2400
-
2401
- /**
2402
- * utf8lut: Vectorized UTF-8 converter.
2403
- * by stgatilov (2019)
2404
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2405
- */
2406
- void Benchmark::run_convert_valid_utf8_to_utf32_utf8lut(size_t iterations) {
2407
- const char *data = reinterpret_cast<const char *>(input_data.data());
2408
- const size_t size = input_data.size();
2409
-
2410
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size + 4]};
2411
- volatile size_t sink{0};
2412
- auto proc = [data, size, &output_buffer, &sink]() {
2413
- std::unique_ptr<BaseBufferProcessor> processor(
2414
- ProcessorSelector<dfUtf8, dfUtf32>::WithOptions<cmFull>::Create());
2415
- ConversionResult result = ConvertInMemory(
2416
- *processor, data, size, reinterpret_cast<char *>(output_buffer.get()),
2417
- size * 4 + 16);
2418
- if (result.status != 0) {
2419
- sink = 0;
2420
- } else {
2421
- sink = result.outputSize / 2;
2422
- }
2423
- };
2424
- count_events(proc, iterations); // warming up!
2425
- const auto result = count_events(proc, iterations);
2426
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2427
- std::cerr << "The output is zero which might indicate an error.\n";
2428
- }
2429
- size_t char_count = size;
2430
- print_summary(result, input_data.size(), char_count);
2431
- }
2432
- /**
2433
- * utf8lut: Vectorized UTF-8 converter.
2434
- * by stgatilov (2019)
2435
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2436
- */
2437
- /**
2438
- * utf8lut: Vectorized UTF-8 converter.
2439
- * by stgatilov (2019)
2440
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2441
- */
2442
- void Benchmark::run_convert_valid_utf32_to_utf8_utf8lut(size_t iterations) {
2443
- const simdutf::encoding_type bom =
2444
- BOM::check_bom(input_data.data(), input_data.size());
2445
- const char32_t *data = reinterpret_cast<const char32_t *>(
2446
- input_data.data() + BOM::bom_byte_size(bom));
2447
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2448
- if (size % 4 != 0) {
2449
- printf(
2450
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
2451
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2452
- printf(" Running function on truncated input.\n");
2453
- }
2454
-
2455
- size /= 4;
2456
-
2457
- // Note: a single 32-bit word can yield up to four UTF-8 bytes. We are
2458
- // making a safe assumption that each 32-bit word will yield four
2459
- // UTF-8 bytes.
2460
- // utf8lut requires an extra 16 bytes of padding.
2461
- std::unique_ptr<char[]> output_buffer{new char[size * 4 + 16]};
2462
-
2463
- volatile size_t sink{0};
2464
-
2465
- auto proc = [data, size, &output_buffer, &sink]() {
2466
- std::unique_ptr<BaseBufferProcessor> processor(
2467
- ProcessorSelector<dfUtf32, dfUtf8>::WithOptions<cmFull>::Create());
2468
- ConversionResult result = ConvertInMemory(
2469
- *processor, reinterpret_cast<const char *>(data), 4 * size,
2470
- reinterpret_cast<char *>(output_buffer.get()), size * 4 + 16);
2471
- if (result.status != 0) {
2472
- sink = 0;
2473
- } else {
2474
- sink = result.outputSize;
2475
- }
2476
- };
2477
- count_events(proc, iterations); // warming up!
2478
- const auto result = count_events(proc, iterations);
2479
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2480
- std::cerr << "The output is zero which might indicate an error.\n";
2481
- }
2482
- size_t char_count = size;
2483
- print_summary(result, input_data.size(), char_count);
2484
- }
2485
- /**
2486
- * Bob Steagall, CppCon2018
2487
- * https://github.com/BobSteagall/CppCon2018/
2488
- *
2489
- * Fast Conversion From UTF-8 with C++, DFAs, and SSE Intrinsics
2490
- * https://www.youtube.com/watch?v=5FQ87-Ecb-A
2491
- */
2492
- void Benchmark::run_convert_utf8_to_utf16_cppcon2018(size_t iterations) {
2493
- using char8_t = unsigned char;
2494
- const char8_t *data = reinterpret_cast<const char8_t *>(input_data.data());
2495
- const size_t size = input_data.size();
2496
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
2497
- volatile size_t sink{0};
2498
- auto proc = [data, size, &output_buffer, &sink]() {
2499
- sink = uu::UtfUtils::SseConvert(data, data + size, output_buffer.get());
2500
- };
2501
- count_events(proc, iterations); // warming up!
2502
- const auto result = count_events(proc, iterations);
2503
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2504
- std::cerr
2505
- << "The output is zero which might indicate a misconfiguration.\n";
2506
- }
2507
- size_t char_count = get_active_implementation()->count_utf8(
2508
- reinterpret_cast<const char *>(data), size);
2509
- print_summary(result, size, char_count);
2510
- }
2511
- /**
2512
- * Bob Steagall, CppCon2018
2513
- * https://github.com/BobSteagall/CppCon2018/
2514
- *
2515
- * Fast Conversion From UTF-8 with C++, DFAs, and SSE Intrinsics
2516
- * https://www.youtube.com/watch?v=5FQ87-Ecb-A
2517
- */
2518
- void Benchmark::run_convert_utf8_to_utf32_cppcon2018(size_t iterations) {
2519
- using char8_t = unsigned char;
2520
- const char8_t *data = reinterpret_cast<const char8_t *>(input_data.data());
2521
- const size_t size = input_data.size();
2522
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
2523
- volatile size_t sink{0};
2524
- auto proc = [data, size, &output_buffer, &sink]() {
2525
- sink = uu::UtfUtils::SseConvert(data, data + size, output_buffer.get());
2526
- };
2527
- count_events(proc, iterations); // warming up!
2528
- const auto result = count_events(proc, iterations);
2529
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2530
- std::cerr
2531
- << "The output is zero which might indicate a misconfiguration.\n";
2532
- }
2533
- size_t char_count = get_active_implementation()->count_utf8(
2534
- reinterpret_cast<const char *>(data), size);
2535
- print_summary(result, size, char_count);
2536
- }
2537
- /**
2538
- * Cameron, Robert D, A case study in SIMD text processing with parallel bit
2539
- * streams: UTF-8 to UTF-16 transcoding, Proceedings of the 13th ACM SIGPLAN
2540
- * Symposium on Principles and practice of parallel programming, 91--98.
2541
- */
2542
- void Benchmark::run_convert_utf8_to_utf16_u8u16(size_t iterations) {
2543
- // u8u16 wants to take mutable chars, let us hope it does not actually mutate
2544
- // anything!
2545
- //
2546
- // This is currently untested. At a glance it looks fine, but
2547
- // it is possible that the transcoding could be wrong.
2548
- char *data = reinterpret_cast<char *>(input_data.data());
2549
- const size_t size = input_data.size();
2550
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
2551
- volatile size_t sink{0};
2552
- auto proc = [data, size, &output_buffer, &sink]() {
2553
- char *srcbuf_ptr = data;
2554
- size_t inbytes_left = size;
2555
- char *trgtbuf_ptr = reinterpret_cast<char *>(output_buffer.get());
2556
- size_t outbytes_left = size * sizeof(char16_t);
2557
- size_t result_code =
2558
- u8u16(&srcbuf_ptr, &inbytes_left, &trgtbuf_ptr, &outbytes_left);
2559
- bool is_ok = (result_code != size_t(-1));
2560
- if (is_ok) {
2561
- sink = (reinterpret_cast<char16_t *>(trgtbuf_ptr) - output_buffer.get());
2562
- } else {
2563
- sink = 0;
2564
- }
2565
- };
2566
- count_events(proc, iterations); // warming up!
2567
- const auto result = count_events(proc, iterations);
2568
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2569
- std::cerr
2570
- << "The output is zero which might indicate a misconfiguration.\n";
2571
- }
2572
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2573
- print_summary(result, size, char_count);
2574
- }
2575
-
2576
- /**
2577
- * Olivier Goffart, UTF-8 processing using SIMD (SSE4), 2012.
2578
- * https://woboq.com/blog/utf-8-processing-using-simd.html
2579
- */
2580
- void Benchmark::run_convert_utf8_to_utf16_utf8sse4(size_t iterations) {
2581
- const char *data = reinterpret_cast<const char *>(input_data.data());
2582
- const size_t size = input_data.size();
2583
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
2584
- volatile size_t sink{0};
2585
- auto proc = [data, size, &output_buffer, &sink]() {
2586
- const char *srcbuf_ptr = data;
2587
- size_t inbytes_left = size;
2588
- char *trgtbuf_ptr = reinterpret_cast<char *>(output_buffer.get());
2589
- size_t outbytes_left = size * sizeof(char16_t);
2590
- size_t result_code = utf8sse4::fromUtf8(&srcbuf_ptr, &inbytes_left,
2591
- &trgtbuf_ptr, &outbytes_left);
2592
- bool is_ok = (result_code != size_t(-1));
2593
- if (is_ok) {
2594
- sink = (reinterpret_cast<char16_t *>(trgtbuf_ptr) - output_buffer.get());
2595
- } else {
2596
- sink = 0;
2597
- }
2598
- };
2599
- count_events(proc, iterations); // warming up!
2600
- const auto result = count_events(proc, iterations);
2601
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2602
- std::cerr
2603
- << "The output is zero which might indicate a misconfiguration.\n";
2604
- }
2605
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2606
- print_summary(result, size, char_count);
2607
- }
2608
- #endif
2609
-
2610
- void Benchmark::run_convert_valid_utf8_to_utf16le(
2611
- const simdutf::implementation &implementation, size_t iterations) {
2612
- const char *data = reinterpret_cast<const char *>(input_data.data());
2613
- const size_t size = input_data.size();
2614
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
2615
- volatile size_t sink{0};
2616
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2617
- sink = implementation.convert_valid_utf8_to_utf16le(data, size,
2618
- output_buffer.get());
2619
- };
2620
- count_events(proc, iterations); // warming up!
2621
- const auto result = count_events(proc, iterations);
2622
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2623
- std::cerr
2624
- << "The output is zero which might indicate a misconfiguration.\n";
2625
- }
2626
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2627
- print_summary(result, size, char_count);
2628
- }
2629
-
2630
- void Benchmark::run_convert_valid_utf8_to_utf32(
2631
- const simdutf::implementation &implementation, size_t iterations) {
2632
- const char *data = reinterpret_cast<const char *>(input_data.data());
2633
- const size_t size = input_data.size();
2634
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
2635
- volatile size_t sink{0};
2636
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2637
- sink = implementation.convert_valid_utf8_to_utf32(data, size,
2638
- output_buffer.get());
2639
- };
2640
- count_events(proc, iterations); // warming up!
2641
- const auto result = count_events(proc, iterations);
2642
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2643
- std::cerr
2644
- << "The output is zero which might indicate a misconfiguration.\n";
2645
- }
2646
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2647
- print_summary(result, size, char_count);
2648
- }
2649
-
2650
- void Benchmark::run_convert_utf16le_to_latin1(
2651
- const simdutf::implementation &implementation, size_t iterations) {
2652
- const simdutf::encoding_type bom =
2653
- BOM::check_bom(input_data.data(), input_data.size());
2654
- const char16_t *data = reinterpret_cast<const char16_t *>(
2655
- input_data.data() + BOM::bom_byte_size(bom));
2656
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2657
- if (size % 2 != 0) {
2658
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2659
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2660
- printf(" Running function on truncated input.\n");
2661
- }
2662
-
2663
- size /= 2;
2664
- std::unique_ptr<char[]> output_buffer{new char[size]};
2665
- volatile size_t sink{0};
2666
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2667
- sink = implementation.convert_utf16le_to_latin1(data, size,
2668
- output_buffer.get());
2669
- };
2670
- count_events(proc, iterations); // warming up!
2671
- const auto result = count_events(proc, iterations);
2672
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2673
- std::cerr << "The output is zero which might indicate an error.\n";
2674
- }
2675
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2676
- print_summary(result, input_data.size(), char_count);
2677
- }
2678
-
2679
- void Benchmark::run_convert_utf16le_to_latin1_with_errors(
2680
- const simdutf::implementation &implementation, size_t iterations) {
2681
- const simdutf::encoding_type bom =
2682
- BOM::check_bom(input_data.data(), input_data.size());
2683
- const char16_t *data = reinterpret_cast<const char16_t *>(
2684
- input_data.data() + BOM::bom_byte_size(bom));
2685
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2686
- if (size % 2 != 0) {
2687
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2688
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2689
- printf(" Running function on truncated input.\n");
2690
- }
2691
-
2692
- size /= 2;
2693
- std::unique_ptr<char[]> output_buffer{new char[size]};
2694
- volatile bool sink{false};
2695
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2696
- result res = implementation.convert_utf16le_to_latin1_with_errors(
2697
- data, size, output_buffer.get());
2698
- sink = !(res.error);
2699
- };
2700
- count_events(proc, iterations); // warming up!
2701
- const auto result = count_events(proc, iterations);
2702
- if ((sink == false) && (iterations > 0)) {
2703
- std::cerr << "The input was declared invalid.\n";
2704
- }
2705
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2706
- print_summary(result, input_data.size(), char_count);
2707
- }
2708
-
2709
- void Benchmark::run_convert_valid_utf16le_to_latin1(
2710
- const simdutf::implementation &implementation, size_t iterations) {
2711
- const simdutf::encoding_type bom =
2712
- BOM::check_bom(input_data.data(), input_data.size());
2713
- const char16_t *data = reinterpret_cast<const char16_t *>(
2714
- input_data.data() + BOM::bom_byte_size(bom));
2715
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2716
- if (size % 2 != 0) {
2717
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2718
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2719
- printf(" Running function on truncated input.\n");
2720
- }
2721
-
2722
- size /= 2;
2723
- std::unique_ptr<char[]> output_buffer{new char[size]};
2724
- volatile size_t sink{0};
2725
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2726
- sink = implementation.convert_valid_utf16le_to_latin1(data, size,
2727
- output_buffer.get());
2728
- };
2729
- count_events(proc, iterations); // warming up!
2730
- const auto result = count_events(proc, iterations);
2731
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2732
- std::cerr << "The output is zero which might indicate an error.\n";
2733
- }
2734
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2735
- print_summary(result, input_data.size(), char_count);
2736
- }
2737
-
2738
- void Benchmark::run_convert_utf16_to_utf8_safe(
2739
- const simdutf::implementation &implementation, size_t iterations) {
2740
- const simdutf::implementation *active_implementation =
2741
- simdutf::get_active_implementation();
2742
- simdutf::get_active_implementation() =
2743
- &implementation; // set the active implementation
2744
- const simdutf::encoding_type bom =
2745
- BOM::check_bom(input_data.data(), input_data.size());
2746
- const char16_t *data = reinterpret_cast<const char16_t *>(
2747
- input_data.data() + BOM::bom_byte_size(bom));
2748
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2749
- if (size % 2 != 0) {
2750
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2751
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2752
- printf(" Running function on truncated input.\n");
2753
- }
2754
-
2755
- size /= 2;
2756
-
2757
- size_t budget = simdutf::utf8_length_from_utf16(data, size);
2758
-
2759
- std::unique_ptr<char[]> output_buffer{new char[budget]};
2760
-
2761
- volatile size_t sink{0};
2762
-
2763
- auto proc = [&implementation, data, size, &output_buffer, &sink, &budget]() {
2764
- sink = simdutf::convert_utf16_to_utf8_safe(data, size, output_buffer.get(),
2765
- budget);
2766
- };
2767
- count_events(proc, iterations); // warming up!
2768
- const auto result = count_events(proc, iterations);
2769
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2770
- std::cerr << "The output is zero which might indicate an error.\n";
2771
- }
2772
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2773
- print_summary(result, input_data.size(), char_count);
2774
- simdutf::get_active_implementation() =
2775
- active_implementation; // restore the active implementation
2776
- }
2777
-
2778
- void Benchmark::run_convert_utf16le_to_utf8(
2779
- const simdutf::implementation &implementation, size_t iterations) {
2780
- const simdutf::encoding_type bom =
2781
- BOM::check_bom(input_data.data(), input_data.size());
2782
- const char16_t *data = reinterpret_cast<const char16_t *>(
2783
- input_data.data() + BOM::bom_byte_size(bom));
2784
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2785
- if (size % 2 != 0) {
2786
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2787
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2788
- printf(" Running function on truncated input.\n");
2789
- }
2790
-
2791
- size /= 2;
2792
-
2793
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
2794
- // yields 4 bytes,
2795
- // thus we're making safe assumption that each 16-bit word will be
2796
- // expanded to four bytes.
2797
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
2798
-
2799
- volatile size_t sink{0};
2800
-
2801
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2802
- sink =
2803
- implementation.convert_utf16le_to_utf8(data, size, output_buffer.get());
2804
- };
2805
- count_events(proc, iterations); // warming up!
2806
- const auto result = count_events(proc, iterations);
2807
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2808
- std::cerr << "The output is zero which might indicate an error.\n";
2809
- }
2810
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2811
- print_summary(result, input_data.size(), char_count);
2812
- }
2813
-
2814
- void Benchmark::run_convert_utf16le_to_utf8_with_errors(
2815
- const simdutf::implementation &implementation, size_t iterations) {
2816
- const simdutf::encoding_type bom =
2817
- BOM::check_bom(input_data.data(), input_data.size());
2818
- const char16_t *data = reinterpret_cast<const char16_t *>(
2819
- input_data.data() + BOM::bom_byte_size(bom));
2820
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2821
- if (size % 2 != 0) {
2822
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2823
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2824
- printf(" Running function on truncated input.\n");
2825
- }
2826
-
2827
- size /= 2;
2828
-
2829
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
2830
- // yields 4 bytes,
2831
- // thus we're making safe assumption that each 16-bit word will be
2832
- // expanded to four bytes.
2833
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
2834
-
2835
- volatile bool sink{false};
2836
-
2837
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2838
- result res = implementation.convert_utf16le_to_utf8_with_errors(
2839
- data, size, output_buffer.get());
2840
- sink = !(res.error);
2841
- };
2842
- count_events(proc, iterations); // warming up!
2843
- const auto result = count_events(proc, iterations);
2844
- if ((sink == false) && (iterations > 0)) {
2845
- std::cerr << "The input was declared invalid.\n";
2846
- }
2847
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2848
- print_summary(result, input_data.size(), char_count);
2849
- }
2850
-
2851
- void Benchmark::run_convert_utf16le_to_utf32(
2852
- const simdutf::implementation &implementation, size_t iterations) {
2853
- const simdutf::encoding_type bom =
2854
- BOM::check_bom(input_data.data(), input_data.size());
2855
- const char16_t *data = reinterpret_cast<const char16_t *>(
2856
- input_data.data() + BOM::bom_byte_size(bom));
2857
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2858
- if (size % 2 != 0) {
2859
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2860
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2861
- printf(" Running function on truncated input.\n");
2862
- }
2863
-
2864
- size /= 2;
2865
-
2866
- // Note: all code units yield 4 bytes. We are making a safe assumption that
2867
- // all code units will be non-surrogate code units so the size would get
2868
- // doubled (16 bits -> 32 bits).
2869
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size * 2]};
2870
-
2871
- volatile size_t sink{0};
2872
-
2873
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2874
- sink = implementation.convert_utf16le_to_utf32(data, size,
2875
- output_buffer.get());
2876
- };
2877
- count_events(proc, iterations); // warming up!
2878
- const auto result = count_events(proc, iterations);
2879
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2880
- std::cerr << "The output is zero which might indicate an error.\n";
2881
- }
2882
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2883
- print_summary(result, input_data.size(), char_count);
2884
- }
2885
-
2886
- void Benchmark::run_convert_utf16le_to_utf32_with_errors(
2887
- const simdutf::implementation &implementation, size_t iterations) {
2888
- const simdutf::encoding_type bom =
2889
- BOM::check_bom(input_data.data(), input_data.size());
2890
- const char16_t *data = reinterpret_cast<const char16_t *>(
2891
- input_data.data() + BOM::bom_byte_size(bom));
2892
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2893
- if (size % 2 != 0) {
2894
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2895
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2896
- printf(" Running function on truncated input.\n");
2897
- }
2898
-
2899
- size /= 2;
2900
-
2901
- // Note: all code units yield 4 bytes. We are making a safe assumption that
2902
- // all code units will be non-surrogate code units so the size would get
2903
- // doubled (16 bits -> 32 bits).
2904
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size * 2]};
2905
-
2906
- volatile bool sink{false};
2907
-
2908
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2909
- result res = implementation.convert_utf16le_to_utf32_with_errors(
2910
- data, size, output_buffer.get());
2911
- sink = !(res.error);
2912
- };
2913
- count_events(proc, iterations); // warming up!
2914
- const auto result = count_events(proc, iterations);
2915
- if ((sink == false) && (iterations > 0)) {
2916
- std::cerr << "The input was declared invalid.\n";
2917
- }
2918
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2919
- print_summary(result, input_data.size(), char_count);
2920
- }
2921
-
2922
- void Benchmark::run_convert_utf16le_to_utf8_with_dynamic_allocation(
2923
- const simdutf::implementation &implementation, size_t iterations) {
2924
- const simdutf::encoding_type bom =
2925
- BOM::check_bom(input_data.data(), input_data.size());
2926
- const char16_t *data = reinterpret_cast<const char16_t *>(
2927
- input_data.data() + BOM::bom_byte_size(bom));
2928
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2929
- if (size % 2 != 0) {
2930
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2931
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2932
- printf(" Running function on truncated input.\n");
2933
- }
2934
-
2935
- size /= 2;
2936
-
2937
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
2938
- // yields 4 bytes,
2939
- // thus we're making safe assumption that each 16-bit word will be
2940
- // expanded to four bytes.
2941
-
2942
- volatile size_t sink{0};
2943
-
2944
- auto proc = [&implementation, data, size, &sink]() {
2945
- auto dyn_size = implementation.utf8_length_from_utf16le(data, size);
2946
- std::unique_ptr<char[]> output_buffer{new char[dyn_size]};
2947
- sink =
2948
- implementation.convert_utf16le_to_utf8(data, size, output_buffer.get());
2949
- };
2950
- count_events(proc, iterations); // warming up!
2951
- const auto result = count_events(proc, iterations);
2952
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2953
- std::cerr << "The output is zero which might indicate an error.\n";
2954
- }
2955
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2956
- print_summary(result, input_data.size(), char_count);
2957
- }
2958
-
2959
- void Benchmark::run_convert_utf16le_to_utf32_with_dynamic_allocation(
2960
- const simdutf::implementation &implementation, size_t iterations) {
2961
- const simdutf::encoding_type bom =
2962
- BOM::check_bom(input_data.data(), input_data.size());
2963
- const char16_t *data = reinterpret_cast<const char16_t *>(
2964
- input_data.data() + BOM::bom_byte_size(bom));
2965
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2966
- if (size % 2 != 0) {
2967
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2968
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2969
- printf(" Running function on truncated input.\n");
2970
- }
2971
-
2972
- size /= 2;
2973
-
2974
- // Note: all code units yield 4 bytes. We are making a safe assumption that
2975
- // all code units will be non-surrogate code units so the size would get
2976
- // doubled (16 bits -> 32 bits).
2977
-
2978
- volatile size_t sink{0};
2979
-
2980
- auto proc = [&implementation, data, size, &sink]() {
2981
- auto dyn_size = implementation.utf32_length_from_utf16le(data, size);
2982
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[dyn_size]};
2983
- sink = implementation.convert_utf16le_to_utf32(data, size,
2984
- output_buffer.get());
2985
- };
2986
- count_events(proc, iterations); // warming up!
2987
- const auto result = count_events(proc, iterations);
2988
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2989
- std::cerr << "The output is zero which might indicate an error.\n";
2990
- }
2991
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2992
- print_summary(result, input_data.size(), char_count);
2993
- }
2994
-
2995
- void Benchmark::run_convert_valid_utf16le_to_utf8(
2996
- const simdutf::implementation &implementation, size_t iterations) {
2997
- const simdutf::encoding_type bom =
2998
- BOM::check_bom(input_data.data(), input_data.size());
2999
- const char16_t *data = reinterpret_cast<const char16_t *>(
3000
- input_data.data() + BOM::bom_byte_size(bom));
3001
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3002
- if (size % 2 != 0) {
3003
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
3004
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3005
- printf(" Running function on truncated input.\n");
3006
- }
3007
-
3008
- size /= 2;
3009
-
3010
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
3011
- // yields 4 bytes,
3012
- // thus we're making safe assumption that each 16-bit word will be
3013
- // expanded to four bytes.
3014
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
3015
-
3016
- volatile size_t sink{0};
3017
-
3018
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3019
- sink = implementation.convert_valid_utf16le_to_utf8(data, size,
3020
- output_buffer.get());
3021
- };
3022
- count_events(proc, iterations); // warming up!
3023
- const auto result = count_events(proc, iterations);
3024
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3025
- std::cerr << "The output is zero which might indicate an error.\n";
3026
- }
3027
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
3028
- print_summary(result, input_data.size(), char_count);
3029
- }
3030
-
3031
- void Benchmark::run_convert_utf32_to_latin1(
3032
- const simdutf::implementation &implementation, size_t iterations) {
3033
- const simdutf::encoding_type bom =
3034
- BOM::check_bom(input_data.data(), input_data.size());
3035
- const char32_t *data = reinterpret_cast<const char32_t *>(
3036
- input_data.data() + BOM::bom_byte_size(bom));
3037
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3038
- if (size % 4 != 0) {
3039
- printf(
3040
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3041
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3042
- printf(" Running function on truncated input.\n");
3043
- }
3044
-
3045
- size /= 4;
3046
-
3047
- std::unique_ptr<char[]> output_buffer{new char[size]};
3048
- volatile size_t sink{0};
3049
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3050
- sink =
3051
- implementation.convert_utf32_to_latin1(data, size, output_buffer.get());
3052
- };
3053
- count_events(proc, iterations); // warming up!
3054
- const auto result = count_events(proc, iterations);
3055
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3056
- std::cerr << "The output is zero which might indicate an error.\n";
3057
- }
3058
- size_t char_count = size;
3059
- print_summary(result, input_data.size(), char_count);
3060
- }
3061
- void Benchmark::run_convert_utf32_to_latin1_with_errors(
3062
- const simdutf::implementation &implementation, size_t iterations) {
3063
- const simdutf::encoding_type bom =
3064
- BOM::check_bom(input_data.data(), input_data.size());
3065
- const char32_t *data = reinterpret_cast<const char32_t *>(
3066
- input_data.data() + BOM::bom_byte_size(bom));
3067
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3068
- if (size % 4 != 0) {
3069
- printf(
3070
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3071
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3072
- printf(" Running function on truncated input.\n");
3073
- }
3074
-
3075
- size /= 4;
3076
-
3077
- std::unique_ptr<char[]> output_buffer{new char[size]};
3078
- volatile bool sink{false};
3079
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3080
- result res = implementation.convert_utf32_to_latin1_with_errors(
3081
- data, size, output_buffer.get());
3082
- sink = !(res.error);
3083
- };
3084
- count_events(proc, iterations); // warming up!
3085
- const auto result = count_events(proc, iterations);
3086
- if ((sink == false) && (iterations > 0)) {
3087
- std::cerr << "The input was declared invalid.\n";
3088
- }
3089
- size_t char_count = size;
3090
- print_summary(result, input_data.size(), char_count);
3091
- }
3092
- void Benchmark::run_convert_valid_utf32_to_latin1(
3093
- const simdutf::implementation &implementation, size_t iterations) {
3094
- const simdutf::encoding_type bom =
3095
- BOM::check_bom(input_data.data(), input_data.size());
3096
- const char32_t *data = reinterpret_cast<const char32_t *>(
3097
- input_data.data() + BOM::bom_byte_size(bom));
3098
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3099
- if (size % 4 != 0) {
3100
- printf(
3101
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3102
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3103
- printf(" Running function on truncated input.\n");
3104
- }
3105
-
3106
- size /= 4;
3107
-
3108
- std::unique_ptr<char[]> output_buffer{new char[size]};
3109
- volatile size_t sink{0};
3110
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3111
- sink = implementation.convert_valid_utf32_to_latin1(data, size,
3112
- output_buffer.get());
3113
- };
3114
- count_events(proc, iterations); // warming up!
3115
- const auto result = count_events(proc, iterations);
3116
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3117
- std::cerr << "The output is zero which might indicate an error.\n";
3118
- }
3119
- size_t char_count = size;
3120
- print_summary(result, input_data.size(), char_count);
3121
- }
3122
-
3123
- void Benchmark::run_convert_utf32_to_utf8(
3124
- const simdutf::implementation &implementation, size_t iterations) {
3125
- const simdutf::encoding_type bom =
3126
- BOM::check_bom(input_data.data(), input_data.size());
3127
- const char32_t *data = reinterpret_cast<const char32_t *>(
3128
- input_data.data() + BOM::bom_byte_size(bom));
3129
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3130
- if (size % 4 != 0) {
3131
- printf(
3132
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3133
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3134
- printf(" Running function on truncated input.\n");
3135
- }
3136
-
3137
- size /= 4;
3138
-
3139
- // Note: In the "worst" case, a 32-bit word will yield 4 UTF-8 bytes. So, we
3140
- // are making a safe assumption that each word will produce 4 bytes.
3141
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
3142
-
3143
- volatile size_t sink{0};
3144
-
3145
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3146
- sink =
3147
- implementation.convert_utf32_to_utf8(data, size, output_buffer.get());
3148
- };
3149
- count_events(proc, iterations); // warming up!
3150
- const auto result = count_events(proc, iterations);
3151
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3152
- std::cerr << "The output is zero which might indicate an error.\n";
3153
- }
3154
- size_t char_count = size;
3155
- print_summary(result, input_data.size(), char_count);
3156
- }
3157
-
3158
- void Benchmark::run_convert_utf32_to_utf8_with_errors(
3159
- const simdutf::implementation &implementation, size_t iterations) {
3160
- const simdutf::encoding_type bom =
3161
- BOM::check_bom(input_data.data(), input_data.size());
3162
- const char32_t *data = reinterpret_cast<const char32_t *>(
3163
- input_data.data() + BOM::bom_byte_size(bom));
3164
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3165
- if (size % 4 != 0) {
3166
- printf(
3167
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3168
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3169
- printf(" Running function on truncated input.\n");
3170
- }
3171
-
3172
- size /= 4;
3173
-
3174
- // Note: In the "worst" case, a 32-bit word will yield 4 UTF-8 bytes. So, we
3175
- // are making a safe assumption that each word will produce 4 bytes.
3176
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
3177
-
3178
- volatile bool sink{false};
3179
-
3180
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3181
- result res = implementation.convert_utf32_to_utf8_with_errors(
3182
- data, size, output_buffer.get());
3183
- sink = !(res.error);
3184
- };
3185
- count_events(proc, iterations); // warming up!
3186
- const auto result = count_events(proc, iterations);
3187
- if ((sink == false) && (iterations > 0)) {
3188
- std::cerr << "The input was declared invalid.\n";
3189
- }
3190
- size_t char_count = size;
3191
- print_summary(result, input_data.size(), char_count);
3192
- }
3193
-
3194
- void Benchmark::run_convert_valid_utf32_to_utf8(
3195
- const simdutf::implementation &implementation, size_t iterations) {
3196
- const simdutf::encoding_type bom =
3197
- BOM::check_bom(input_data.data(), input_data.size());
3198
- const char32_t *data = reinterpret_cast<const char32_t *>(
3199
- input_data.data() + BOM::bom_byte_size(bom));
3200
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3201
- if (size % 4 != 0) {
3202
- printf(
3203
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3204
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3205
- printf(" Running function on truncated input.\n");
3206
- }
3207
-
3208
- size /= 4;
3209
-
3210
- // Note: In the "worst" case, a 32-bit word will yield 4 UTF-8 bytes. So, we
3211
- // are making a safe assumption that each word will produce 4 bytes.
3212
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
3213
-
3214
- volatile size_t sink{0};
3215
-
3216
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3217
- sink = implementation.convert_valid_utf32_to_utf8(data, size,
3218
- output_buffer.get());
3219
- };
3220
- count_events(proc, iterations); // warming up!
3221
- const auto result = count_events(proc, iterations);
3222
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3223
- std::cerr << "The output is zero which might indicate an error.\n";
3224
- }
3225
- size_t char_count = size;
3226
- print_summary(result, input_data.size(), char_count);
3227
- }
3228
-
3229
- void Benchmark::run_convert_valid_utf16le_to_utf32(
3230
- const simdutf::implementation &implementation, size_t iterations) {
3231
- const simdutf::encoding_type bom =
3232
- BOM::check_bom(input_data.data(), input_data.size());
3233
- const char16_t *data = reinterpret_cast<const char16_t *>(
3234
- input_data.data() + BOM::bom_byte_size(bom));
3235
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3236
- if (size % 2 != 0) {
3237
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
3238
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3239
- printf(" Running function on truncated input.\n");
3240
- }
3241
-
3242
- size /= 2;
3243
-
3244
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
3245
- // yields 4 bytes,
3246
- // thus we're making safe assumption that each 16-bit word will be
3247
- // expanded to four bytes.
3248
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size * 4]};
3249
-
3250
- volatile size_t sink{0};
3251
-
3252
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3253
- sink = implementation.convert_valid_utf16le_to_utf32(data, size,
3254
- output_buffer.get());
3255
- };
3256
- count_events(proc, iterations); // warming up!
3257
- const auto result = count_events(proc, iterations);
3258
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3259
- std::cerr << "The output is zero which might indicate an error.\n";
3260
- }
3261
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
3262
- print_summary(result, input_data.size(), char_count);
3263
- }
3264
-
3265
- template <endianness byte_order>
3266
- void Benchmark::run_convert_utf32_to_utf16(
3267
- const simdutf::implementation &implementation, size_t iterations) {
3268
- const simdutf::encoding_type bom =
3269
- BOM::check_bom(input_data.data(), input_data.size());
3270
- const char32_t *data = reinterpret_cast<const char32_t *>(
3271
- input_data.data() + BOM::bom_byte_size(bom));
3272
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3273
- if (size % 4 != 0) {
3274
- printf(
3275
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3276
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3277
- printf(" Running function on truncated input.\n");
3278
- }
3279
-
3280
- size /= 4;
3281
-
3282
- // Note: In the "worst" case, a 32-bit word will yield two 16-bit code units.
3283
- // So, we are making a safe assumption that each word will produce 2 bytes.
3284
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size * 2]};
3285
-
3286
- volatile size_t sink{0};
3287
-
3288
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3289
- if (byte_order == endianness::LITTLE) {
3290
- sink = implementation.convert_utf32_to_utf16le(data, size,
3291
- output_buffer.get());
3292
- } else {
3293
- sink = implementation.convert_utf32_to_utf16be(data, size,
3294
- output_buffer.get());
3295
- }
3296
- };
3297
- count_events(proc, iterations); // warming up!
3298
- const auto result = count_events(proc, iterations);
3299
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3300
- std::cerr << "The output is zero which might indicate an error.\n";
3301
- }
3302
- size_t char_count = size;
3303
- print_summary(result, input_data.size(), char_count);
3304
- }
3305
-
3306
- template <endianness byte_order>
3307
- void Benchmark::run_convert_utf32_to_utf16_with_errors(
3308
- const simdutf::implementation &implementation, size_t iterations) {
3309
- const simdutf::encoding_type bom =
3310
- BOM::check_bom(input_data.data(), input_data.size());
3311
- const char32_t *data = reinterpret_cast<const char32_t *>(
3312
- input_data.data() + BOM::bom_byte_size(bom));
3313
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3314
- if (size % 4 != 0) {
3315
- printf(
3316
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3317
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3318
- printf(" Running function on truncated input.\n");
3319
- }
3320
-
3321
- size /= 4;
3322
-
3323
- // Note: In the "worst" case, a 32-bit word will yield two 16-bit code units.
3324
- // So, we are making a safe assumption that each word will produce 2 bytes.
3325
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size * 2]};
3326
-
3327
- volatile bool sink{false};
3328
-
3329
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3330
- if (byte_order == endianness::LITTLE) {
3331
- result res = implementation.convert_utf32_to_utf16le_with_errors(
3332
- data, size, output_buffer.get());
3333
- sink = !(res.error);
3334
- } else {
3335
- result res = implementation.convert_utf32_to_utf16be_with_errors(
3336
- data, size, output_buffer.get());
3337
- sink = !(res.error);
3338
- }
3339
- };
3340
- count_events(proc, iterations); // warming up!
3341
- const auto result = count_events(proc, iterations);
3342
- if ((sink == false) && (iterations > 0)) {
3343
- std::cerr << "The input was declared invalid.\n";
3344
- }
3345
- size_t char_count = size;
3346
- print_summary(result, input_data.size(), char_count);
3347
- }
3348
-
3349
- template <endianness byte_order>
3350
- void Benchmark::run_convert_valid_utf32_to_utf16(
3351
- const simdutf::implementation &implementation, size_t iterations) {
3352
- const simdutf::encoding_type bom =
3353
- BOM::check_bom(input_data.data(), input_data.size());
3354
- const char32_t *data = reinterpret_cast<const char32_t *>(
3355
- input_data.data() + BOM::bom_byte_size(bom));
3356
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3357
- if (size % 4 != 0) {
3358
- printf(
3359
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3360
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3361
- printf(" Running function on truncated input.\n");
3362
- }
3363
-
3364
- size /= 4;
3365
-
3366
- // Note: In the "worst" case, a 32-bit word will yield two 16-bit code units.
3367
- // So, we are making a safe assumption that each word will produce 2 bytes.
3368
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size * 2]};
3369
-
3370
- volatile size_t sink{0};
3371
-
3372
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3373
- if (byte_order == endianness::LITTLE) {
3374
- sink = implementation.convert_valid_utf32_to_utf16le(data, size,
3375
- output_buffer.get());
3376
- } else {
3377
- sink = implementation.convert_valid_utf32_to_utf16be(data, size,
3378
- output_buffer.get());
3379
- }
3380
- };
3381
- count_events(proc, iterations); // warming up!
3382
- const auto result = count_events(proc, iterations);
3383
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3384
- std::cerr << "The output is zero which might indicate an error.\n";
3385
- }
3386
- size_t char_count = size;
3387
- print_summary(result, input_data.size(), char_count);
3388
- }
3389
-
3390
- void Benchmark::run_count_utf8(const simdutf::implementation &implementation,
3391
- size_t iterations) {
3392
- const char *data = reinterpret_cast<const char *>(input_data.data());
3393
- const size_t size = input_data.size();
3394
- volatile size_t sink{0};
3395
-
3396
- auto proc = [&implementation, data, size, &sink]() {
3397
- sink = implementation.count_utf8(data, size);
3398
- };
3399
- count_events(proc, iterations); // warming up!
3400
- const auto result = count_events(proc, iterations);
3401
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3402
- std::cerr << "The output is zero which might indicate an error.\n";
3403
- }
3404
- size_t char_count = get_active_implementation()->count_utf8(data, size);
3405
- print_summary(result, size, char_count);
3406
- }
3407
-
3408
- void Benchmark::run_count_utf16le(const simdutf::implementation &implementation,
3409
- size_t iterations) {
3410
- const simdutf::encoding_type bom =
3411
- BOM::check_bom(input_data.data(), input_data.size());
3412
- const char16_t *data = reinterpret_cast<const char16_t *>(input_data.data());
3413
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3414
- if (size % 2 != 0) {
3415
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
3416
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3417
- printf(" Running function on truncated input.\n");
3418
- }
3419
- size /= 2;
3420
- volatile size_t sink{0};
3421
- auto proc = [&implementation, data, size, &sink]() {
3422
- sink = implementation.count_utf16le(data, size);
3423
- };
3424
- count_events(proc, iterations); // warming up!
3425
- const auto result = count_events(proc, iterations);
3426
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3427
- std::cerr << "The output is zero which might indicate an error.\n";
3428
- }
3429
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
3430
- print_summary(result, input_data.size(), char_count);
3431
- }
3432
-
3433
- void Benchmark::run_detect_encodings(
3434
- const simdutf::implementation &implementation, size_t iterations) {
3435
- const simdutf::encoding_type bom =
3436
- BOM::check_bom(input_data.data(), input_data.size());
3437
- const char *data = reinterpret_cast<const char *>(input_data.data() +
3438
- BOM::bom_byte_size(bom));
3439
- const size_t size = input_data.size() - BOM::bom_byte_size(bom);
3440
- volatile size_t sink{0};
3441
- auto proc = [&implementation, data, size, &sink]() {
3442
- sink = implementation.detect_encodings(data, size);
3443
- };
3444
- count_events(proc, iterations); // warming up!
3445
- const auto result = count_events(proc, iterations);
3446
- size_t char_count = size;
3447
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3448
- std::cerr << "The output is zero which might indicate an error.\n";
3449
- } else {
3450
- std::cout << "Detected format: ";
3451
- if (sink & simdutf::encoding_type::UTF8) {
3452
- char_count = get_active_implementation()->count_utf8(data, size);
3453
- std::cout << " UTF8";
3454
- }
3455
- if (sink & simdutf::encoding_type::UTF16_LE) {
3456
- std::cout << " UTF16LE";
3457
- char_count = get_active_implementation()->count_utf16le(
3458
- reinterpret_cast<const char16_t *>(data), size / 2);
3459
- }
3460
- if (sink & simdutf::encoding_type::UTF32_LE) {
3461
- std::cout << " UTF32LE";
3462
- char_count = size / 4;
3463
- }
3464
- std::cout << std::endl;
3465
- }
3466
- if ((bom) && (bom & ~sink)) {
3467
- std::cerr << "[Error] BOM format : ";
3468
- if (bom & simdutf::encoding_type::UTF8) {
3469
- std::cerr << " UTF8";
3470
- } else if (bom & simdutf::encoding_type::UTF16_LE) {
3471
- std::cerr << " UTF16LE";
3472
- } else if (bom & simdutf::encoding_type::UTF32_LE) {
3473
- std::cerr << " UTF32LE";
3474
- }
3475
- std::cerr << std::endl;
3476
- }
3477
- if ((sink & (sink - 1)) != 0) {
3478
- std::cout << "More than one format possible, character count is ambiguous."
3479
- << std::endl;
3480
- }
3481
- print_summary(result, size, char_count);
3482
- }
3483
-
3484
- const std::set<std::string> Benchmark::all_procedures() const {
3485
- std::set<std::string> result;
3486
- for (const auto &item : benchmarks) {
3487
- result.insert(item.first);
3488
- }
3489
-
3490
- return result;
3491
- }
3492
-
3493
- std::set<simdutf::encoding_type>
3494
- Benchmark::expected_encodings(const std::string &procedure) {
3495
- return benchmarks[procedure].second;
3496
- }
3497
-
3498
- /**
3499
- * LLVM relies on code from the Unicode Consortium
3500
- * https://en.wikipedia.org/wiki/Unicode_Consortium
3501
- */
3502
- void Benchmark::run_convert_utf8_to_utf16_llvm(size_t iterations) {
3503
- const char *data = reinterpret_cast<const char *>(input_data.data());
3504
- const size_t size = input_data.size();
3505
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
3506
- volatile size_t sink{0};
3507
- auto proc = [data, size, &output_buffer, &sink]() {
3508
- const unsigned char *sourceStart =
3509
- reinterpret_cast<const unsigned char *>(data);
3510
- const unsigned char *sourceEnd = sourceStart + size;
3511
- short unsigned int *targetStart =
3512
- reinterpret_cast<short unsigned int *>(output_buffer.get());
3513
- short unsigned int *targetEnd = targetStart + size;
3514
- bool is_ok = (llvm::conversionOK ==
3515
- llvm::ConvertUTF8toUTF16(
3516
- &sourceStart, sourceEnd, &targetStart, targetEnd,
3517
- llvm::ConversionFlags::strictConversion));
3518
- if (is_ok) {
3519
- sink = (targetStart -
3520
- reinterpret_cast<short unsigned int *>(output_buffer.get()));
3521
- } else {
3522
- sink = 0;
3523
- }
3524
- };
3525
- count_events(proc, iterations); // warming up!
3526
- const auto result = count_events(proc, iterations);
3527
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3528
- std::cerr
3529
- << "The output is zero which might indicate a misconfiguration.\n";
3530
- }
3531
- size_t char_count = get_active_implementation()->count_utf8(data, size);
3532
- print_summary(result, size, char_count);
3533
- }
3534
-
3535
- void Benchmark::run_convert_utf8_to_utf32_llvm(size_t iterations) {
3536
- const char *data = reinterpret_cast<const char *>(input_data.data());
3537
- const size_t size = input_data.size();
3538
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
3539
- volatile size_t sink{0};
3540
- auto proc = [data, size, &output_buffer, &sink]() {
3541
- const unsigned char *sourceStart =
3542
- reinterpret_cast<const unsigned char *>(data);
3543
- const unsigned char *sourceEnd = sourceStart + size;
3544
- unsigned int *targetStart =
3545
- reinterpret_cast<unsigned int *>(output_buffer.get());
3546
- unsigned int *targetEnd = targetStart + size;
3547
- bool is_ok = (llvm::conversionOK ==
3548
- llvm::ConvertUTF8toUTF32(
3549
- &sourceStart, sourceEnd, &targetStart, targetEnd,
3550
- llvm::ConversionFlags::strictConversion));
3551
- if (is_ok) {
3552
- sink =
3553
- (targetStart - reinterpret_cast<unsigned int *>(output_buffer.get()));
3554
- } else {
3555
- sink = 0;
3556
- }
3557
- };
3558
- count_events(proc, iterations); // warming up!
3559
- const auto result = count_events(proc, iterations);
3560
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3561
- std::cerr
3562
- << "The output is zero which might indicate a misconfiguration.\n";
3563
- }
3564
- size_t char_count = get_active_implementation()->count_utf8(data, size);
3565
- print_summary(result, size, char_count);
3566
- }
3567
-
3568
- void Benchmark::run_convert_utf16_to_utf8_llvm(size_t iterations) {
3569
- const simdutf::encoding_type bom =
3570
- BOM::check_bom(input_data.data(), input_data.size());
3571
- const char16_t *data = reinterpret_cast<const char16_t *>(
3572
- input_data.data() + BOM::bom_byte_size(bom));
3573
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3574
- if (size % 2 != 0) {
3575
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
3576
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3577
- printf(" Running function on truncated input.\n");
3578
- }
3579
-
3580
- size /= 2;
3581
-
3582
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
3583
- // yields 4 bytes,
3584
- // thus we're making safe assumption that each 16-bit word will be
3585
- // expanded to four bytes.
3586
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
3587
-
3588
- volatile size_t sink{0};
3589
-
3590
- auto proc = [data, size, &output_buffer, &sink]() {
3591
- const short unsigned int *sourceStart =
3592
- reinterpret_cast<const short unsigned int *>(data);
3593
- const short unsigned int *sourceEnd = sourceStart + size;
3594
- unsigned char *targetStart =
3595
- reinterpret_cast<unsigned char *>(output_buffer.get());
3596
- unsigned char *targetEnd = targetStart + size * 4;
3597
- bool is_ok = (llvm::conversionOK ==
3598
- llvm::ConvertUTF16toUTF8(
3599
- &sourceStart, sourceEnd, &targetStart, targetEnd,
3600
- llvm::ConversionFlags::strictConversion));
3601
- if (is_ok) {
3602
- sink = (targetStart -
3603
- reinterpret_cast<unsigned char *>(output_buffer.get()));
3604
- } else {
3605
- sink = 0;
3606
- }
3607
- };
3608
- count_events(proc, iterations); // warming up!
3609
- const auto result = count_events(proc, iterations);
3610
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3611
- std::cerr << "The output is zero which might indicate an error.\n";
3612
- }
3613
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
3614
- print_summary(result, input_data.size(), char_count);
3615
- }
3616
-
3617
- void Benchmark::run_convert_utf32_to_utf8_llvm(size_t iterations) {
3618
- const simdutf::encoding_type bom =
3619
- BOM::check_bom(input_data.data(), input_data.size());
3620
- const char32_t *data = reinterpret_cast<const char32_t *>(
3621
- input_data.data() + BOM::bom_byte_size(bom));
3622
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3623
- if (size % 4 != 0) {
3624
- printf(
3625
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3626
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3627
- printf(" Running function on truncated input.\n");
3628
- }
3629
-
3630
- size /= 4;
3631
-
3632
- // Note: a single 32-bit word can yield up to four UTF-8 bytes. We are
3633
- // making a safe assumption that each 32-bit word will yield four
3634
- // UTF-8 bytes.
3635
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
3636
-
3637
- volatile size_t sink{0};
3638
-
3639
- auto proc = [data, size, &output_buffer, &sink]() {
3640
- const unsigned int *sourceStart =
3641
- reinterpret_cast<const unsigned int *>(data);
3642
- const unsigned int *sourceEnd = sourceStart + size;
3643
- unsigned char *targetStart =
3644
- reinterpret_cast<unsigned char *>(output_buffer.get());
3645
- unsigned char *targetEnd = targetStart + size * 4;
3646
- bool is_ok = (llvm::conversionOK ==
3647
- llvm::ConvertUTF32toUTF8(
3648
- &sourceStart, sourceEnd, &targetStart, targetEnd,
3649
- llvm::ConversionFlags::strictConversion));
3650
- if (is_ok) {
3651
- sink = (targetStart -
3652
- reinterpret_cast<unsigned char *>(output_buffer.get()));
3653
- } else {
3654
- sink = 0;
3655
- }
3656
- };
3657
- count_events(proc, iterations); // warming up!
3658
- const auto result = count_events(proc, iterations);
3659
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3660
- std::cerr << "The output is zero which might indicate an error.\n";
3661
- }
3662
- size_t char_count = size;
3663
- print_summary(result, input_data.size(), char_count);
3664
- }
3665
-
3666
- void Benchmark::run_convert_utf16_to_utf32_llvm(size_t iterations) {
3667
- const simdutf::encoding_type bom =
3668
- BOM::check_bom(input_data.data(), input_data.size());
3669
- const char16_t *data = reinterpret_cast<const char16_t *>(
3670
- input_data.data() + BOM::bom_byte_size(bom));
3671
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3672
- if (size % 2 != 0) {
3673
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
3674
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3675
- printf(" Running function on truncated input.\n");
3676
- }
3677
-
3678
- size /= 2;
3679
-
3680
- // Note: all code units yield four bytes. We make the safe assumption that all
3681
- // code units will be non surrogate code units so the size will double (16
3682
- // bits -> 32 bits).
3683
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size * 2]};
3684
-
3685
- volatile size_t sink{0};
3686
-
3687
- auto proc = [data, size, &output_buffer, &sink]() {
3688
- const short unsigned int *sourceStart =
3689
- reinterpret_cast<const short unsigned int *>(data);
3690
- const short unsigned int *sourceEnd = sourceStart + size;
3691
- unsigned int *targetStart =
3692
- reinterpret_cast<unsigned int *>(output_buffer.get());
3693
- unsigned int *targetEnd = targetStart + 2 * size;
3694
- bool is_ok = (llvm::conversionOK ==
3695
- llvm::ConvertUTF16toUTF32(
3696
- &sourceStart, sourceEnd, &targetStart, targetEnd,
3697
- llvm::ConversionFlags::strictConversion));
3698
- if (is_ok) {
3699
- sink =
3700
- (targetStart - reinterpret_cast<unsigned int *>(output_buffer.get()));
3701
- } else {
3702
- sink = 0;
3703
- }
3704
- };
3705
- count_events(proc, iterations); // warming up!
3706
- const auto result = count_events(proc, iterations);
3707
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3708
- std::cerr << "The output is zero which might indicate an error.\n";
3709
- }
3710
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
3711
- print_summary(result, input_data.size(), char_count);
3712
- }
3713
-
3714
- void Benchmark::run_convert_utf32_to_utf16_llvm(size_t iterations) {
3715
- const simdutf::encoding_type bom =
3716
- BOM::check_bom(input_data.data(), input_data.size());
3717
- const char32_t *data = reinterpret_cast<const char32_t *>(
3718
- input_data.data() + BOM::bom_byte_size(bom));
3719
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3720
- if (size % 4 != 0) {
3721
- printf(
3722
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3723
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3724
- printf(" Running function on truncated input.\n");
3725
- }
3726
-
3727
- size /= 4;
3728
-
3729
- // Note: a single 32-bit word can produce a surrogate pair, i.e. two
3730
- // 16-bit code units. We are making a safe assumption that each 32-
3731
- // bit word will yield two 16-bit code units.
3732
- std::unique_ptr<char[]> output_buffer{new char[size * 2]};
3733
-
3734
- volatile size_t sink{0};
3735
-
3736
- auto proc = [data, size, &output_buffer, &sink]() {
3737
- const unsigned int *sourceStart =
3738
- reinterpret_cast<const unsigned int *>(data);
3739
- const unsigned int *sourceEnd = sourceStart + size;
3740
- short unsigned int *targetStart =
3741
- reinterpret_cast<short unsigned int *>(output_buffer.get());
3742
- short unsigned int *targetEnd = targetStart + size * 2;
3743
- bool is_ok = (llvm::conversionOK ==
3744
- llvm::ConvertUTF32toUTF16(
3745
- &sourceStart, sourceEnd, &targetStart, targetEnd,
3746
- llvm::ConversionFlags::strictConversion));
3747
- if (is_ok) {
3748
- sink = (targetStart -
3749
- reinterpret_cast<short unsigned int *>(output_buffer.get()));
3750
- } else {
3751
- sink = 0;
3752
- }
3753
- };
3754
- count_events(proc, iterations); // warming up!
3755
- const auto result = count_events(proc, iterations);
3756
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3757
- std::cerr << "The output is zero which might indicate an error.\n";
3758
- }
3759
- size_t char_count = size;
3760
- print_summary(result, input_data.size(), char_count);
3761
- }
3762
-
3763
- /**
3764
- * Nemanja Trifunovic, UTF8-CPP: UTF-8 with C++ in a Portable Way
3765
- * https://github.com/nemtrif/utfcpp/releases/tag/v3.2.2
3766
- */
3767
- void Benchmark::run_convert_utf8_to_utf16_utfcpp(size_t iterations) {
3768
- const char *data = reinterpret_cast<const char *>(input_data.data());
3769
- const size_t size = input_data.size();
3770
- volatile size_t sink{0};
3771
-
3772
- auto proc = [data, size, &sink]() {
3773
- try {
3774
- std::vector<unsigned short> str;
3775
- utf8::utf8to16(data, data + size, std::back_inserter(str));
3776
- sink = str.size();
3777
- } catch (const char *msg) {
3778
- std::cout << msg << std::endl;
3779
- sink = 0;
3780
- } catch (...) {
3781
- sink = 0;
3782
- }
3783
- };
3784
- count_events(proc, iterations); // warming up!
3785
- const auto result = count_events(proc, iterations);
3786
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3787
- std::cerr
3788
- << "The output is zero which might indicate a misconfiguration.\n";
3789
- }
3790
- size_t char_count = get_active_implementation()->count_utf8(data, size);
3791
- // checking
3792
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
3793
- size_t expected = convert_utf8_to_utf16le(data, size, output_buffer.get());
3794
- if (expected != sink) {
3795
- std::cerr << "The number of UTF-16 code units does not match.\n";
3796
- }
3797
- print_summary(result, size, char_count);
3798
- }
3799
-
3800
- void Benchmark::run_convert_utf16_to_utf8_utfcpp(size_t iterations) {
3801
- const simdutf::encoding_type bom =
3802
- BOM::check_bom(input_data.data(), input_data.size());
3803
- const char16_t *data = reinterpret_cast<const char16_t *>(
3804
- input_data.data() + BOM::bom_byte_size(bom));
3805
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3806
- if (size % 2 != 0) {
3807
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
3808
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3809
- printf(" Running function on truncated input.\n");
3810
- }
3811
-
3812
- volatile size_t sink{0};
3813
- auto proc = [data, size, &sink]() {
3814
- try {
3815
- std::string str;
3816
- utf8::utf16to8(data, data + size, std::back_inserter(str));
3817
- sink = str.size();
3818
- } catch (const char *msg) {
3819
- std::cout << msg << std::endl;
3820
- sink = 0;
3821
- } catch (...) {
3822
- sink = 0;
3823
- }
3824
- };
3825
- count_events(proc, iterations); // warming up!
3826
- const auto result = count_events(proc, iterations);
3827
- size /= 2;
3828
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3829
- std::cerr << "The output is zero which might indicate an error.\n";
3830
- }
3831
-
3832
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
3833
- print_summary(result, input_data.size(), char_count);
3834
- }
3835
-
3836
- void Benchmark::run_convert_utf8_to_utf32_utfcpp(size_t iterations) {
3837
- const char *data = reinterpret_cast<const char *>(input_data.data());
3838
- const size_t size = input_data.size();
3839
- volatile size_t sink{0};
3840
-
3841
- auto proc = [data, size, &sink]() {
3842
- try {
3843
- std::vector<int> str;
3844
- utf8::utf8to32(data, data + size, std::back_inserter(str));
3845
- sink = str.size();
3846
- } catch (const char *msg) {
3847
- std::cout << msg << std::endl;
3848
- sink = 0;
3849
- } catch (...) {
3850
- sink = 0;
3851
- }
3852
- };
3853
- count_events(proc, iterations); // warming up!
3854
- const auto result = count_events(proc, iterations);
3855
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3856
- std::cerr
3857
- << "The output is zero which might indicate a misconfiguration.\n";
3858
- }
3859
- size_t char_count = get_active_implementation()->count_utf8(data, size);
3860
- print_summary(result, size, char_count);
3861
- }
3862
-
3863
- void Benchmark::run_convert_utf32_to_utf8_utfcpp(size_t iterations) {
3864
- const simdutf::encoding_type bom =
3865
- BOM::check_bom(input_data.data(), input_data.size());
3866
- const char32_t *data = reinterpret_cast<const char32_t *>(
3867
- input_data.data() + BOM::bom_byte_size(bom));
3868
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3869
- if (size % 4 != 0) {
3870
- printf(
3871
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3872
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3873
- printf(" Running function on truncated input.\n");
3874
- }
3875
-
3876
- volatile size_t sink{0};
3877
-
3878
- auto proc = [data, size, &sink]() {
3879
- try {
3880
- std::string str;
3881
- utf8::utf16to8(data, data + size, std::back_inserter(str));
3882
- sink = str.size();
3883
- } catch (const char *msg) {
3884
- std::cout << msg << std::endl;
3885
- sink = 0;
3886
- } catch (...) {
3887
- sink = 0;
3888
- }
3889
- };
3890
- count_events(proc, iterations); // warming up!
3891
- const auto result = count_events(proc, iterations);
3892
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3893
- std::cerr << "The output is zero which might indicate an error.\n";
3894
- }
3895
- size_t char_count = size / 4;
3896
- print_summary(result, input_data.size(), char_count);
3897
- }
3898
-
3899
- } // namespace simdutf::benchmarks