react-native-quick-crypto 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (687) hide show
  1. package/android/build.gradle +5 -1
  2. package/cpp/argon2/HybridArgon2.cpp +10 -3
  3. package/cpp/blake3/HybridBlake3.cpp +5 -3
  4. package/cpp/cipher/CCMCipher.cpp +29 -16
  5. package/cpp/cipher/CCMCipher.hpp +2 -4
  6. package/cpp/cipher/ChaCha20Cipher.cpp +14 -18
  7. package/cpp/cipher/ChaCha20Cipher.hpp +2 -4
  8. package/cpp/cipher/ChaCha20Poly1305Cipher.cpp +34 -23
  9. package/cpp/cipher/ChaCha20Poly1305Cipher.hpp +2 -4
  10. package/cpp/cipher/GCMCipher.cpp +14 -15
  11. package/cpp/cipher/HybridCipher.cpp +39 -36
  12. package/cpp/cipher/HybridCipher.hpp +17 -1
  13. package/cpp/cipher/HybridRsaCipher.cpp +74 -29
  14. package/cpp/cipher/OCBCipher.cpp +4 -3
  15. package/cpp/cipher/XChaCha20Poly1305Cipher.cpp +14 -13
  16. package/cpp/cipher/XSalsa20Cipher.cpp +72 -6
  17. package/cpp/cipher/XSalsa20Cipher.hpp +25 -3
  18. package/cpp/cipher/XSalsa20Poly1305Cipher.cpp +21 -25
  19. package/cpp/dh/HybridDiffieHellman.cpp +29 -0
  20. package/cpp/ec/HybridEcKeyPair.cpp +35 -33
  21. package/cpp/ec/HybridEcKeyPair.hpp +3 -7
  22. package/cpp/ecdh/HybridECDH.cpp +23 -0
  23. package/cpp/ed25519/HybridEdKeyPair.cpp +73 -117
  24. package/cpp/ed25519/HybridEdKeyPair.hpp +5 -9
  25. package/cpp/hash/HybridHash.cpp +5 -7
  26. package/cpp/hkdf/HybridHkdf.cpp +6 -4
  27. package/cpp/hmac/HybridHmac.cpp +4 -6
  28. package/cpp/kmac/HybridKmac.cpp +4 -4
  29. package/cpp/mldsa/HybridMlDsaKeyPair.cpp +37 -49
  30. package/cpp/mlkem/HybridMlKemKeyPair.cpp +39 -43
  31. package/cpp/pbkdf2/HybridPbkdf2.cpp +7 -8
  32. package/cpp/rsa/HybridRsaKeyPair.cpp +5 -8
  33. package/cpp/rsa/HybridRsaKeyPair.hpp +4 -7
  34. package/cpp/scrypt/HybridScrypt.cpp +6 -4
  35. package/cpp/sign/HybridSignHandle.cpp +25 -68
  36. package/cpp/sign/HybridVerifyHandle.cpp +23 -60
  37. package/cpp/utils/HybridUtils.cpp +183 -43
  38. package/cpp/utils/HybridUtils.hpp +9 -2
  39. package/cpp/utils/QuickCryptoUtils.hpp +72 -0
  40. package/lib/commonjs/argon2.js +51 -2
  41. package/lib/commonjs/argon2.js.map +1 -1
  42. package/lib/commonjs/cipher.js +109 -11
  43. package/lib/commonjs/cipher.js.map +1 -1
  44. package/lib/commonjs/dsa.js +8 -2
  45. package/lib/commonjs/dsa.js.map +1 -1
  46. package/lib/commonjs/hash.js +15 -5
  47. package/lib/commonjs/hash.js.map +1 -1
  48. package/lib/commonjs/hkdf.js +33 -6
  49. package/lib/commonjs/hkdf.js.map +1 -1
  50. package/lib/commonjs/hmac.js +15 -5
  51. package/lib/commonjs/hmac.js.map +1 -1
  52. package/lib/commonjs/keys/publicCipher.js +10 -4
  53. package/lib/commonjs/keys/publicCipher.js.map +1 -1
  54. package/lib/commonjs/random.js +11 -2
  55. package/lib/commonjs/random.js.map +1 -1
  56. package/lib/commonjs/rsa.js +12 -5
  57. package/lib/commonjs/rsa.js.map +1 -1
  58. package/lib/commonjs/scrypt.js +47 -6
  59. package/lib/commonjs/scrypt.js.map +1 -1
  60. package/lib/commonjs/subtle.js +76 -5
  61. package/lib/commonjs/subtle.js.map +1 -1
  62. package/lib/commonjs/utils/cipher.js +18 -7
  63. package/lib/commonjs/utils/cipher.js.map +1 -1
  64. package/lib/commonjs/utils/conversion.js +33 -9
  65. package/lib/commonjs/utils/conversion.js.map +1 -1
  66. package/lib/commonjs/utils/timingSafeEqual.js +7 -2
  67. package/lib/commonjs/utils/timingSafeEqual.js.map +1 -1
  68. package/lib/commonjs/x509certificate.js +6 -6
  69. package/lib/commonjs/x509certificate.js.map +1 -1
  70. package/lib/module/argon2.js +51 -2
  71. package/lib/module/argon2.js.map +1 -1
  72. package/lib/module/cipher.js +109 -11
  73. package/lib/module/cipher.js.map +1 -1
  74. package/lib/module/dsa.js +8 -2
  75. package/lib/module/dsa.js.map +1 -1
  76. package/lib/module/hash.js +15 -5
  77. package/lib/module/hash.js.map +1 -1
  78. package/lib/module/hkdf.js +33 -6
  79. package/lib/module/hkdf.js.map +1 -1
  80. package/lib/module/hmac.js +15 -5
  81. package/lib/module/hmac.js.map +1 -1
  82. package/lib/module/keys/publicCipher.js +10 -4
  83. package/lib/module/keys/publicCipher.js.map +1 -1
  84. package/lib/module/random.js +11 -2
  85. package/lib/module/random.js.map +1 -1
  86. package/lib/module/rsa.js +11 -4
  87. package/lib/module/rsa.js.map +1 -1
  88. package/lib/module/scrypt.js +47 -6
  89. package/lib/module/scrypt.js.map +1 -1
  90. package/lib/module/subtle.js +76 -5
  91. package/lib/module/subtle.js.map +1 -1
  92. package/lib/module/utils/cipher.js +18 -7
  93. package/lib/module/utils/cipher.js.map +1 -1
  94. package/lib/module/utils/conversion.js +33 -9
  95. package/lib/module/utils/conversion.js.map +1 -1
  96. package/lib/module/utils/timingSafeEqual.js +8 -3
  97. package/lib/module/utils/timingSafeEqual.js.map +1 -1
  98. package/lib/module/x509certificate.js +6 -6
  99. package/lib/module/x509certificate.js.map +1 -1
  100. package/lib/typescript/argon2.d.ts.map +1 -1
  101. package/lib/typescript/cipher.d.ts +2 -2
  102. package/lib/typescript/cipher.d.ts.map +1 -1
  103. package/lib/typescript/dsa.d.ts.map +1 -1
  104. package/lib/typescript/hash.d.ts +2 -2
  105. package/lib/typescript/hash.d.ts.map +1 -1
  106. package/lib/typescript/hkdf.d.ts.map +1 -1
  107. package/lib/typescript/hmac.d.ts +2 -2
  108. package/lib/typescript/hmac.d.ts.map +1 -1
  109. package/lib/typescript/index.d.ts +1 -1
  110. package/lib/typescript/index.d.ts.map +1 -1
  111. package/lib/typescript/keys/publicCipher.d.ts.map +1 -1
  112. package/lib/typescript/random.d.ts.map +1 -1
  113. package/lib/typescript/rsa.d.ts.map +1 -1
  114. package/lib/typescript/scrypt.d.ts.map +1 -1
  115. package/lib/typescript/specs/utils.nitro.d.ts +0 -2
  116. package/lib/typescript/specs/utils.nitro.d.ts.map +1 -1
  117. package/lib/typescript/subtle.d.ts.map +1 -1
  118. package/lib/typescript/utils/cipher.d.ts +13 -1
  119. package/lib/typescript/utils/cipher.d.ts.map +1 -1
  120. package/lib/typescript/utils/conversion.d.ts +9 -6
  121. package/lib/typescript/utils/conversion.d.ts.map +1 -1
  122. package/lib/typescript/utils/timingSafeEqual.d.ts.map +1 -1
  123. package/lib/typescript/x509certificate.d.ts.map +1 -1
  124. package/nitrogen/generated/shared/c++/HybridUtilsSpec.cpp +0 -2
  125. package/nitrogen/generated/shared/c++/HybridUtilsSpec.hpp +0 -3
  126. package/package.json +37 -5
  127. package/src/argon2.ts +80 -2
  128. package/src/cipher.ts +139 -15
  129. package/src/dsa.ts +11 -2
  130. package/src/hash.ts +17 -7
  131. package/src/hkdf.ts +44 -6
  132. package/src/hmac.ts +17 -7
  133. package/src/keys/publicCipher.ts +10 -4
  134. package/src/random.ts +11 -2
  135. package/src/rsa.ts +18 -4
  136. package/src/scrypt.ts +73 -6
  137. package/src/specs/utils.nitro.ts +0 -2
  138. package/src/subtle.ts +90 -8
  139. package/src/utils/cipher.ts +30 -8
  140. package/src/utils/conversion.ts +58 -20
  141. package/src/utils/timingSafeEqual.ts +8 -3
  142. package/src/x509certificate.ts +5 -6
  143. package/deps/blake3/.cargo/config.toml +0 -2
  144. package/deps/blake3/.git-blame-ignore-revs +0 -2
  145. package/deps/blake3/.github/workflows/build_b3sum.py +0 -38
  146. package/deps/blake3/.github/workflows/ci.yml +0 -491
  147. package/deps/blake3/.github/workflows/tag.yml +0 -43
  148. package/deps/blake3/.github/workflows/upload_github_release_asset.py +0 -73
  149. package/deps/blake3/CONTRIBUTING.md +0 -31
  150. package/deps/blake3/Cargo.toml +0 -135
  151. package/deps/blake3/b3sum/Cargo.lock +0 -513
  152. package/deps/blake3/b3sum/Cargo.toml +0 -26
  153. package/deps/blake3/b3sum/README.md +0 -72
  154. package/deps/blake3/b3sum/src/main.rs +0 -564
  155. package/deps/blake3/b3sum/src/unit_tests.rs +0 -235
  156. package/deps/blake3/b3sum/tests/cli_tests.rs +0 -680
  157. package/deps/blake3/b3sum/what_does_check_do.md +0 -176
  158. package/deps/blake3/benches/bench.rs +0 -623
  159. package/deps/blake3/build.rs +0 -389
  160. package/deps/blake3/c/CMakeLists.txt +0 -383
  161. package/deps/blake3/c/CMakePresets.json +0 -73
  162. package/deps/blake3/c/Makefile.testing +0 -82
  163. package/deps/blake3/c/blake3-config.cmake.in +0 -14
  164. package/deps/blake3/c/blake3_avx2.c +0 -326
  165. package/deps/blake3/c/blake3_avx2_x86-64_unix.S +0 -1815
  166. package/deps/blake3/c/blake3_avx2_x86-64_windows_gnu.S +0 -1817
  167. package/deps/blake3/c/blake3_avx2_x86-64_windows_msvc.asm +0 -1828
  168. package/deps/blake3/c/blake3_avx512.c +0 -1388
  169. package/deps/blake3/c/blake3_avx512_x86-64_unix.S +0 -4824
  170. package/deps/blake3/c/blake3_avx512_x86-64_windows_gnu.S +0 -2615
  171. package/deps/blake3/c/blake3_avx512_x86-64_windows_msvc.asm +0 -2634
  172. package/deps/blake3/c/blake3_c_rust_bindings/Cargo.toml +0 -32
  173. package/deps/blake3/c/blake3_c_rust_bindings/README.md +0 -4
  174. package/deps/blake3/c/blake3_c_rust_bindings/benches/bench.rs +0 -477
  175. package/deps/blake3/c/blake3_c_rust_bindings/build.rs +0 -253
  176. package/deps/blake3/c/blake3_c_rust_bindings/cross_test.sh +0 -31
  177. package/deps/blake3/c/blake3_c_rust_bindings/src/lib.rs +0 -333
  178. package/deps/blake3/c/blake3_c_rust_bindings/src/test.rs +0 -696
  179. package/deps/blake3/c/blake3_sse2.c +0 -566
  180. package/deps/blake3/c/blake3_sse2_x86-64_unix.S +0 -2291
  181. package/deps/blake3/c/blake3_sse2_x86-64_windows_gnu.S +0 -2332
  182. package/deps/blake3/c/blake3_sse2_x86-64_windows_msvc.asm +0 -2350
  183. package/deps/blake3/c/blake3_sse41.c +0 -560
  184. package/deps/blake3/c/blake3_sse41_x86-64_unix.S +0 -2028
  185. package/deps/blake3/c/blake3_sse41_x86-64_windows_gnu.S +0 -2069
  186. package/deps/blake3/c/blake3_sse41_x86-64_windows_msvc.asm +0 -2089
  187. package/deps/blake3/c/blake3_tbb.cpp +0 -37
  188. package/deps/blake3/c/dependencies/CMakeLists.txt +0 -3
  189. package/deps/blake3/c/dependencies/tbb/CMakeLists.txt +0 -28
  190. package/deps/blake3/c/example.c +0 -36
  191. package/deps/blake3/c/example_tbb.c +0 -57
  192. package/deps/blake3/c/libblake3.pc.in +0 -12
  193. package/deps/blake3/c/main.c +0 -166
  194. package/deps/blake3/c/test.py +0 -97
  195. package/deps/blake3/media/B3.svg +0 -70
  196. package/deps/blake3/media/BLAKE3.svg +0 -85
  197. package/deps/blake3/media/speed.svg +0 -1474
  198. package/deps/blake3/reference_impl/Cargo.toml +0 -8
  199. package/deps/blake3/reference_impl/README.md +0 -14
  200. package/deps/blake3/reference_impl/reference_impl.rs +0 -374
  201. package/deps/blake3/src/ffi_avx2.rs +0 -65
  202. package/deps/blake3/src/ffi_avx512.rs +0 -169
  203. package/deps/blake3/src/ffi_neon.rs +0 -82
  204. package/deps/blake3/src/ffi_sse2.rs +0 -126
  205. package/deps/blake3/src/ffi_sse41.rs +0 -126
  206. package/deps/blake3/src/guts.rs +0 -60
  207. package/deps/blake3/src/hazmat.rs +0 -704
  208. package/deps/blake3/src/io.rs +0 -64
  209. package/deps/blake3/src/join.rs +0 -92
  210. package/deps/blake3/src/lib.rs +0 -1835
  211. package/deps/blake3/src/platform.rs +0 -587
  212. package/deps/blake3/src/portable.rs +0 -198
  213. package/deps/blake3/src/rust_avx2.rs +0 -474
  214. package/deps/blake3/src/rust_sse2.rs +0 -775
  215. package/deps/blake3/src/rust_sse41.rs +0 -766
  216. package/deps/blake3/src/test.rs +0 -1049
  217. package/deps/blake3/src/traits.rs +0 -227
  218. package/deps/blake3/src/wasm32_simd.rs +0 -794
  219. package/deps/blake3/test_vectors/Cargo.toml +0 -19
  220. package/deps/blake3/test_vectors/cross_test.sh +0 -25
  221. package/deps/blake3/test_vectors/src/bin/generate.rs +0 -4
  222. package/deps/blake3/test_vectors/src/lib.rs +0 -350
  223. package/deps/blake3/test_vectors/test_vectors.json +0 -217
  224. package/deps/blake3/tools/compiler_version/Cargo.toml +0 -7
  225. package/deps/blake3/tools/compiler_version/build.rs +0 -6
  226. package/deps/blake3/tools/compiler_version/src/main.rs +0 -27
  227. package/deps/blake3/tools/instruction_set_support/Cargo.toml +0 -6
  228. package/deps/blake3/tools/instruction_set_support/src/main.rs +0 -10
  229. package/deps/blake3/tools/release.md +0 -16
  230. package/deps/ncrypto/.bazelignore +0 -4
  231. package/deps/ncrypto/.bazelrc +0 -1
  232. package/deps/ncrypto/.bazelversion +0 -1
  233. package/deps/ncrypto/.clang-format +0 -111
  234. package/deps/ncrypto/.github/workflows/bazel.yml +0 -58
  235. package/deps/ncrypto/.github/workflows/commitlint.yml +0 -16
  236. package/deps/ncrypto/.github/workflows/linter.yml +0 -38
  237. package/deps/ncrypto/.github/workflows/macos.yml +0 -43
  238. package/deps/ncrypto/.github/workflows/release-please.yml +0 -16
  239. package/deps/ncrypto/.github/workflows/ubuntu.yml +0 -128
  240. package/deps/ncrypto/.github/workflows/visual-studio.yml +0 -49
  241. package/deps/ncrypto/.python-version +0 -1
  242. package/deps/ncrypto/.release-please-manifest.json +0 -3
  243. package/deps/ncrypto/BUILD.bazel +0 -44
  244. package/deps/ncrypto/CHANGELOG.md +0 -37
  245. package/deps/ncrypto/CMakeLists.txt +0 -79
  246. package/deps/ncrypto/MODULE.bazel +0 -16
  247. package/deps/ncrypto/MODULE.bazel.lock +0 -461
  248. package/deps/ncrypto/cmake/CPM.cmake +0 -1225
  249. package/deps/ncrypto/cmake/ncrypto-flags.cmake +0 -17
  250. package/deps/ncrypto/ncrypto.pc.in +0 -10
  251. package/deps/ncrypto/patches/0001-Expose-libdecrepit-so-NodeJS-can-use-it-for-ncrypto.patch +0 -28
  252. package/deps/ncrypto/pyproject.toml +0 -38
  253. package/deps/ncrypto/release-please-config.json +0 -11
  254. package/deps/ncrypto/src/CMakeLists.txt +0 -40
  255. package/deps/ncrypto/tests/BUILD.bazel +0 -11
  256. package/deps/ncrypto/tests/CMakeLists.txt +0 -7
  257. package/deps/ncrypto/tests/basic.cpp +0 -856
  258. package/deps/ncrypto/tools/run-clang-format.sh +0 -42
  259. package/deps/simdutf/.clang-format +0 -4
  260. package/deps/simdutf/.github/ISSUE_TEMPLATE/bug_report.md +0 -62
  261. package/deps/simdutf/.github/ISSUE_TEMPLATE/config.yml +0 -1
  262. package/deps/simdutf/.github/ISSUE_TEMPLATE/feature_request.md +0 -35
  263. package/deps/simdutf/.github/ISSUE_TEMPLATE/standard-issue-template.md +0 -29
  264. package/deps/simdutf/.github/pull_request_template.md +0 -51
  265. package/deps/simdutf/.github/workflows/aarch64.yml +0 -39
  266. package/deps/simdutf/.github/workflows/alpine.yml +0 -27
  267. package/deps/simdutf/.github/workflows/amalgamation_demos.yml +0 -34
  268. package/deps/simdutf/.github/workflows/armv7.yml +0 -32
  269. package/deps/simdutf/.github/workflows/atomic_fuzz.yml +0 -25
  270. package/deps/simdutf/.github/workflows/cifuzz.yml +0 -37
  271. package/deps/simdutf/.github/workflows/clangformat.yml +0 -36
  272. package/deps/simdutf/.github/workflows/debian-latestcxxstandards.yml +0 -40
  273. package/deps/simdutf/.github/workflows/debian.yml +0 -33
  274. package/deps/simdutf/.github/workflows/documentation.yml +0 -36
  275. package/deps/simdutf/.github/workflows/emscripten.yml +0 -19
  276. package/deps/simdutf/.github/workflows/loongarch64-gcc-14.2.yml +0 -39
  277. package/deps/simdutf/.github/workflows/macos-latest.yml +0 -29
  278. package/deps/simdutf/.github/workflows/msys2-clang.yml +0 -48
  279. package/deps/simdutf/.github/workflows/msys2.yml +0 -50
  280. package/deps/simdutf/.github/workflows/ppc64le.yml +0 -29
  281. package/deps/simdutf/.github/workflows/rvv-1024-clang-18.yml +0 -35
  282. package/deps/simdutf/.github/workflows/rvv-128-clang-17.yml +0 -35
  283. package/deps/simdutf/.github/workflows/rvv-256-gcc-14.yml +0 -31
  284. package/deps/simdutf/.github/workflows/s390x.yml +0 -29
  285. package/deps/simdutf/.github/workflows/selective-amalgamation.yml +0 -29
  286. package/deps/simdutf/.github/workflows/typos.yml +0 -19
  287. package/deps/simdutf/.github/workflows/ubuntu22-cxx20.yml +0 -30
  288. package/deps/simdutf/.github/workflows/ubuntu22.yml +0 -32
  289. package/deps/simdutf/.github/workflows/ubuntu22_gcc12.yml +0 -27
  290. package/deps/simdutf/.github/workflows/ubuntu22sani.yml +0 -29
  291. package/deps/simdutf/.github/workflows/ubuntu24-cxxstandards.yml +0 -34
  292. package/deps/simdutf/.github/workflows/ubuntu24-unsignedchar.yml +0 -34
  293. package/deps/simdutf/.github/workflows/ubuntu24.yml +0 -32
  294. package/deps/simdutf/.github/workflows/ubuntu24sani.yml +0 -36
  295. package/deps/simdutf/.github/workflows/ubuntu24sani_clang.yml +0 -29
  296. package/deps/simdutf/.github/workflows/vs17-arm-ci.yml +0 -21
  297. package/deps/simdutf/.github/workflows/vs17-ci-cxx20.yml +0 -41
  298. package/deps/simdutf/.github/workflows/vs17-ci.yml +0 -41
  299. package/deps/simdutf/.github/workflows/vs17-clang-ci.yml +0 -41
  300. package/deps/simdutf/.github/workflows/vs17-cxxstandards.yml +0 -36
  301. package/deps/simdutf/AI_USAGE_POLICY.md +0 -56
  302. package/deps/simdutf/AUTHORS +0 -6
  303. package/deps/simdutf/CMakeLists.txt +0 -231
  304. package/deps/simdutf/CONTRIBUTING.md +0 -214
  305. package/deps/simdutf/CONTRIBUTORS +0 -1
  306. package/deps/simdutf/Doxyfile +0 -2584
  307. package/deps/simdutf/Makefile.crosscompile +0 -54
  308. package/deps/simdutf/README-RVV.md +0 -16
  309. package/deps/simdutf/SECURITY.md +0 -8
  310. package/deps/simdutf/benchmarks/CMakeLists.txt +0 -101
  311. package/deps/simdutf/benchmarks/alignment.cpp +0 -150
  312. package/deps/simdutf/benchmarks/base64/CMakeLists.txt +0 -30
  313. package/deps/simdutf/benchmarks/base64/benchmark_base64.cpp +0 -875
  314. package/deps/simdutf/benchmarks/base64/libbase64_spaces.h +0 -49
  315. package/deps/simdutf/benchmarks/base64/node_base64.h +0 -227
  316. package/deps/simdutf/benchmarks/base64/openssl3_base64.h +0 -334
  317. package/deps/simdutf/benchmarks/benchmark.cpp +0 -65
  318. package/deps/simdutf/benchmarks/benchmark_to_well_formed_utf16.cpp +0 -347
  319. package/deps/simdutf/benchmarks/competition/.clang-format-ignore +0 -5
  320. package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.cpp +0 -1276
  321. package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.h +0 -595
  322. package/deps/simdutf/benchmarks/competition/README.md +0 -7
  323. package/deps/simdutf/benchmarks/competition/hoehrmann/hoehrmann.h +0 -91
  324. package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16.h +0 -444
  325. package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16_tables.h +0 -13183
  326. package/deps/simdutf/benchmarks/competition/inoue2008/script.py +0 -73
  327. package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.cpp +0 -738
  328. package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.h +0 -293
  329. package/deps/simdutf/benchmarks/competition/u8u16/COPYRIGHT +0 -8
  330. package/deps/simdutf/benchmarks/competition/u8u16/Makefile +0 -44
  331. package/deps/simdutf/benchmarks/competition/u8u16/OSL3.0.txt +0 -169
  332. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/BOM_Profiler.h +0 -148
  333. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/i386_timer.h +0 -45
  334. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/ppc_timer.c +0 -34
  335. package/deps/simdutf/benchmarks/competition/u8u16/README +0 -56
  336. package/deps/simdutf/benchmarks/competition/u8u16/config/config_defs.h +0 -43
  337. package/deps/simdutf/benchmarks/competition/u8u16/config/g4_config.h +0 -27
  338. package/deps/simdutf/benchmarks/competition/u8u16/config/mmx_config.h +0 -16
  339. package/deps/simdutf/benchmarks/competition/u8u16/config/p4_config.h +0 -18
  340. package/deps/simdutf/benchmarks/competition/u8u16/config/p4_ideal_config.h +0 -16
  341. package/deps/simdutf/benchmarks/competition/u8u16/config/spu_config.h +0 -28
  342. package/deps/simdutf/benchmarks/competition/u8u16/config/ssse3_config.h +0 -20
  343. package/deps/simdutf/benchmarks/competition/u8u16/iconv_u8u16.c +0 -2
  344. package/deps/simdutf/benchmarks/competition/u8u16/lib/altivec_simd.h +0 -440
  345. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_basic_ops.py +0 -121
  346. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_half_operand_versions.py +0 -158
  347. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_test.py +0 -270
  348. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd.h +0 -141
  349. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_basic.h +0 -216
  350. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_built_in.h +0 -119
  351. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_modified.h +0 -2430
  352. package/deps/simdutf/benchmarks/competition/u8u16/lib/outline.txt +0 -39
  353. package/deps/simdutf/benchmarks/competition/u8u16/lib/spu_simd.h +0 -421
  354. package/deps/simdutf/benchmarks/competition/u8u16/lib/sse_simd.h +0 -836
  355. package/deps/simdutf/benchmarks/competition/u8u16/lib/stdint.h +0 -222
  356. package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_BE.c +0 -4
  357. package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_LE.c +0 -5
  358. package/deps/simdutf/benchmarks/competition/u8u16/proto/u8u16.py +0 -390
  359. package/deps/simdutf/benchmarks/competition/u8u16/src/Makefile +0 -18
  360. package/deps/simdutf/benchmarks/competition/u8u16/src/bytelex.h +0 -448
  361. package/deps/simdutf/benchmarks/competition/u8u16/src/charsets/ASCII_EBCDIC.h +0 -284
  362. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.c +0 -1975
  363. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.pdf +0 -0
  364. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.w +0 -2263
  365. package/deps/simdutf/benchmarks/competition/u8u16/src/multiliteral.h +0 -239
  366. package/deps/simdutf/benchmarks/competition/u8u16/src/u8u16.c +0 -232
  367. package/deps/simdutf/benchmarks/competition/u8u16/src/x8x16.c +0 -194
  368. package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.c +0 -193
  369. package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.h +0 -167
  370. package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.c +0 -288
  371. package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.h +0 -117
  372. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_g4.c +0 -2
  373. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_mmx.c +0 -2
  374. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4.c +0 -3
  375. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4_ideal.c +0 -2
  376. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_spu.c +0 -2
  377. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_ssse3.c +0 -3
  378. package/deps/simdutf/benchmarks/competition/u8u16/x8x16_p4.c +0 -2
  379. package/deps/simdutf/benchmarks/competition/utf8lut/LICENSE +0 -23
  380. package/deps/simdutf/benchmarks/competition/utf8lut/data/test_minimal.txt +0 -44
  381. package/deps/simdutf/benchmarks/competition/utf8lut/readme.md +0 -106
  382. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.cmd +0 -11
  383. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.sh +0 -13
  384. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_corr_tests.sh +0 -13
  385. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_example.sh +0 -13
  386. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_file_conv.sh +0 -14
  387. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_lib.sh +0 -11
  388. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_sample.sh +0 -8
  389. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_corr_tests.cmd +0 -12
  390. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_example.cmd +0 -13
  391. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_file_conv.cmd +0 -14
  392. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_lib.cmd +0 -11
  393. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_sample.cmd +0 -8
  394. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_corr_tests.cmd +0 -11
  395. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_example.cmd +0 -12
  396. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_file_conv.cmd +0 -13
  397. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_lib.cmd +0 -10
  398. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_sample.cmd +0 -9
  399. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/html_table.py +0 -25
  400. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/measure.py +0 -94
  401. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/resize.py +0 -20
  402. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_all.cmd +0 -2
  403. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_interm.cmd +0 -1
  404. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/CustomMemcpy.h +0 -75
  405. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/PerfDefs.h +0 -47
  406. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.cpp +0 -17
  407. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.h +0 -76
  408. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/AllProcessors.cpp +0 -35
  409. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.cpp +0 -117
  410. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.h +0 -210
  411. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferDecoder.h +0 -158
  412. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferEncoder.h +0 -104
  413. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorPlugins.h +0 -334
  414. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorSelector.h +0 -186
  415. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.cpp +0 -140
  416. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.h +0 -42
  417. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderProcess.h +0 -100
  418. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/Dfa.h +0 -57
  419. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.cpp +0 -85
  420. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.h +0 -27
  421. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderProcess.h +0 -126
  422. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/ProcessTrivial.h +0 -108
  423. package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.cpp +0 -139
  424. package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.h +0 -74
  425. package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.cpp +0 -65
  426. package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.h +0 -91
  427. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/CorrectnessTests.cpp +0 -772
  428. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/Example.cpp +0 -12
  429. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/FileConverter.cpp +0 -486
  430. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/iconv_sample.c +0 -162
  431. package/deps/simdutf/benchmarks/competition/utf8lut/src/utf8lut.h +0 -15
  432. package/deps/simdutf/benchmarks/competition/utf8sse4/fromutf8-sse.cpp +0 -292
  433. package/deps/simdutf/benchmarks/competition/utfcpp/LICENSE +0 -23
  434. package/deps/simdutf/benchmarks/competition/utfcpp/README.md +0 -1503
  435. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/checked.h +0 -335
  436. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/core.h +0 -338
  437. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp11.h +0 -103
  438. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp17.h +0 -103
  439. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/unchecked.h +0 -274
  440. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8.h +0 -34
  441. package/deps/simdutf/benchmarks/dataset/README.md +0 -155
  442. package/deps/simdutf/benchmarks/dataset/emoji.txt +0 -204
  443. package/deps/simdutf/benchmarks/dataset/scripts/utf8type.py +0 -40
  444. package/deps/simdutf/benchmarks/dataset/wikipedia_mars/Makefile +0 -80
  445. package/deps/simdutf/benchmarks/dataset/wikipedia_mars/convert_to_utf6.py +0 -20
  446. package/deps/simdutf/benchmarks/find/CMakeLists.txt +0 -6
  447. package/deps/simdutf/benchmarks/find/findbenchmark.cpp +0 -63
  448. package/deps/simdutf/benchmarks/find/findbenchmarker.h +0 -46
  449. package/deps/simdutf/benchmarks/shortbench.cpp +0 -555
  450. package/deps/simdutf/benchmarks/src/CMakeLists.txt +0 -52
  451. package/deps/simdutf/benchmarks/src/apple_arm_events.h +0 -1104
  452. package/deps/simdutf/benchmarks/src/benchmark.cpp +0 -3899
  453. package/deps/simdutf/benchmarks/src/benchmark.h +0 -317
  454. package/deps/simdutf/benchmarks/src/benchmark_base.cpp +0 -144
  455. package/deps/simdutf/benchmarks/src/benchmark_base.h +0 -98
  456. package/deps/simdutf/benchmarks/src/cmdline.cpp +0 -176
  457. package/deps/simdutf/benchmarks/src/cmdline.h +0 -35
  458. package/deps/simdutf/benchmarks/src/event_counter.h +0 -162
  459. package/deps/simdutf/benchmarks/src/linux-perf-events.h +0 -104
  460. package/deps/simdutf/benchmarks/stream.cpp +0 -209
  461. package/deps/simdutf/benchmarks/threaded.cpp +0 -123
  462. package/deps/simdutf/cmake/CPM.cmake +0 -1363
  463. package/deps/simdutf/cmake/JoinPaths.cmake +0 -23
  464. package/deps/simdutf/cmake/add_cpp_test.cmake +0 -68
  465. package/deps/simdutf/cmake/simdutf-config.cmake.in +0 -2
  466. package/deps/simdutf/cmake/simdutf-flags.cmake +0 -26
  467. package/deps/simdutf/cmake/toolchains-ci/riscv64-linux-gnu.cmake +0 -4
  468. package/deps/simdutf/cmake/toolchains-dev/README.md +0 -32
  469. package/deps/simdutf/cmake/toolchains-dev/aarch64.cmake +0 -14
  470. package/deps/simdutf/cmake/toolchains-dev/loongarch64.cmake +0 -22
  471. package/deps/simdutf/cmake/toolchains-dev/powerpc64.cmake +0 -16
  472. package/deps/simdutf/cmake/toolchains-dev/powerpc64le.cmake +0 -16
  473. package/deps/simdutf/cmake/toolchains-dev/riscv64.cmake +0 -16
  474. package/deps/simdutf/cmake/toolchains-dev/rvv-spike.cmake +0 -38
  475. package/deps/simdutf/doc/avx512.png +0 -0
  476. package/deps/simdutf/doc/logo.png +0 -0
  477. package/deps/simdutf/doc/logo.svg +0 -165
  478. package/deps/simdutf/doc/node2023.png +0 -0
  479. package/deps/simdutf/doc/shortinput.md +0 -78
  480. package/deps/simdutf/doc/utf16utf8.png +0 -0
  481. package/deps/simdutf/doc/utf8utf16.png +0 -0
  482. package/deps/simdutf/doc/widelogo.png +0 -0
  483. package/deps/simdutf/doxygen.py +0 -50
  484. package/deps/simdutf/fuzz/.clang-format +0 -9
  485. package/deps/simdutf/fuzz/CMakeLists.txt +0 -45
  486. package/deps/simdutf/fuzz/README.md +0 -168
  487. package/deps/simdutf/fuzz/atomic_base64.cpp +0 -448
  488. package/deps/simdutf/fuzz/base64.cpp +0 -278
  489. package/deps/simdutf/fuzz/build.sh +0 -83
  490. package/deps/simdutf/fuzz/conversion.cpp +0 -669
  491. package/deps/simdutf/fuzz/helpers/.clang-format-ignore +0 -1
  492. package/deps/simdutf/fuzz/helpers/common.h +0 -135
  493. package/deps/simdutf/fuzz/helpers/nameof.hpp +0 -1258
  494. package/deps/simdutf/fuzz/main.cpp +0 -72
  495. package/deps/simdutf/fuzz/minimize_and_cleanse.sh +0 -87
  496. package/deps/simdutf/fuzz/misc.cpp +0 -216
  497. package/deps/simdutf/fuzz/random_fuzz.sh +0 -154
  498. package/deps/simdutf/fuzz/roundtrip.cpp +0 -588
  499. package/deps/simdutf/fuzz/safe_conversion.cpp +0 -104
  500. package/deps/simdutf/riscv/Dockerfile +0 -16
  501. package/deps/simdutf/riscv/README.md +0 -24
  502. package/deps/simdutf/riscv/remove-docker-station +0 -8
  503. package/deps/simdutf/riscv/run-docker-station +0 -31
  504. package/deps/simdutf/scripts/.flake8 +0 -2
  505. package/deps/simdutf/scripts/Makefile +0 -2
  506. package/deps/simdutf/scripts/README_ADD_FUNCTION.md +0 -49
  507. package/deps/simdutf/scripts/add_function.py +0 -330
  508. package/deps/simdutf/scripts/amalgamation_tests.py +0 -156
  509. package/deps/simdutf/scripts/base64/Makefile +0 -2
  510. package/deps/simdutf/scripts/base64/README.md +0 -2
  511. package/deps/simdutf/scripts/base64/avx512.py +0 -76
  512. package/deps/simdutf/scripts/base64/neon_decode.py +0 -143
  513. package/deps/simdutf/scripts/base64/neon_generate_lut.py +0 -101
  514. package/deps/simdutf/scripts/base64/sse.py +0 -252
  515. package/deps/simdutf/scripts/base64/sseregular.py +0 -160
  516. package/deps/simdutf/scripts/base64/sseurl.py +0 -283
  517. package/deps/simdutf/scripts/base64/table.py +0 -59
  518. package/deps/simdutf/scripts/base64bench_print.py +0 -145
  519. package/deps/simdutf/scripts/benchmark-all.py +0 -119
  520. package/deps/simdutf/scripts/benchmark_print.py +0 -324
  521. package/deps/simdutf/scripts/check_feature_macros.py +0 -156
  522. package/deps/simdutf/scripts/check_typos.sh +0 -13
  523. package/deps/simdutf/scripts/clang_format.sh +0 -35
  524. package/deps/simdutf/scripts/clang_format_docker.sh +0 -38
  525. package/deps/simdutf/scripts/common.py +0 -24
  526. package/deps/simdutf/scripts/compilation_benchmark.py +0 -55
  527. package/deps/simdutf/scripts/compile_many_variations.sh +0 -64
  528. package/deps/simdutf/scripts/create_latex_table.py +0 -62
  529. package/deps/simdutf/scripts/docker/Dockerfile +0 -14
  530. package/deps/simdutf/scripts/docker/Makefile +0 -9
  531. package/deps/simdutf/scripts/docker/README.md +0 -30
  532. package/deps/simdutf/scripts/docker/llvm.gpg +0 -0
  533. package/deps/simdutf/scripts/ppc64_convert_utf16_to_utf8.py +0 -155
  534. package/deps/simdutf/scripts/prepare_doxygen.sh +0 -21
  535. package/deps/simdutf/scripts/release.py +0 -197
  536. package/deps/simdutf/scripts/shortinputplots.py +0 -97
  537. package/deps/simdutf/scripts/sse_convert_utf16_to_utf8.py +0 -422
  538. package/deps/simdutf/scripts/sse_convert_utf32_to_utf16.py +0 -105
  539. package/deps/simdutf/scripts/sse_utf8_utf16_decode.py +0 -186
  540. package/deps/simdutf/scripts/sse_validate_utf16le_proof.py +0 -137
  541. package/deps/simdutf/scripts/sse_validate_utf16le_testcases.py +0 -129
  542. package/deps/simdutf/scripts/table.py +0 -207
  543. package/deps/simdutf/scripts/tests/new.txt +0 -33
  544. package/deps/simdutf/scripts/tests/old.txt +0 -33
  545. package/deps/simdutf/scripts/tests/results.txt +0 -272
  546. package/deps/simdutf/simdutf.pc.in +0 -11
  547. package/deps/simdutf/singleheader/.flake8 +0 -2
  548. package/deps/simdutf/singleheader/CMakeLists.txt +0 -64
  549. package/deps/simdutf/singleheader/README-dev.md +0 -81
  550. package/deps/simdutf/singleheader/README.md +0 -19
  551. package/deps/simdutf/singleheader/amalgamate.py +0 -513
  552. package/deps/simdutf/singleheader/amalgamation_demo.c +0 -59
  553. package/deps/simdutf/singleheader/amalgamation_demo.cpp +0 -54
  554. package/deps/simdutf/singleheader/test-features.py +0 -262
  555. package/deps/simdutf/src/CMakeLists.txt +0 -78
  556. package/deps/simdutf/tests/CMakeLists.txt +0 -483
  557. package/deps/simdutf/tests/atomic_base64_tests.cpp +0 -2845
  558. package/deps/simdutf/tests/base64_tests.cpp +0 -3617
  559. package/deps/simdutf/tests/basic_fuzzer.cpp +0 -805
  560. package/deps/simdutf/tests/bele_tests.cpp +0 -182
  561. package/deps/simdutf/tests/constexpr_base64_tests.cpp +0 -387
  562. package/deps/simdutf/tests/convert_latin1_to_utf16be_tests.cpp +0 -52
  563. package/deps/simdutf/tests/convert_latin1_to_utf16le_tests.cpp +0 -80
  564. package/deps/simdutf/tests/convert_latin1_to_utf32_tests.cpp +0 -66
  565. package/deps/simdutf/tests/convert_latin1_to_utf8_tests.cpp +0 -120
  566. package/deps/simdutf/tests/convert_utf16_to_utf8_safe_tests.cpp +0 -203
  567. package/deps/simdutf/tests/convert_utf16_to_utf8_with_replacement_tests.cpp +0 -276
  568. package/deps/simdutf/tests/convert_utf16be_to_latin1_tests.cpp +0 -109
  569. package/deps/simdutf/tests/convert_utf16be_to_latin1_tests_with_errors.cpp +0 -136
  570. package/deps/simdutf/tests/convert_utf16be_to_utf32_tests.cpp +0 -193
  571. package/deps/simdutf/tests/convert_utf16be_to_utf32_with_errors_tests.cpp +0 -381
  572. package/deps/simdutf/tests/convert_utf16be_to_utf8_tests.cpp +0 -259
  573. package/deps/simdutf/tests/convert_utf16be_to_utf8_with_errors_tests.cpp +0 -266
  574. package/deps/simdutf/tests/convert_utf16le_to_latin1_tests.cpp +0 -148
  575. package/deps/simdutf/tests/convert_utf16le_to_latin1_tests_with_errors.cpp +0 -176
  576. package/deps/simdutf/tests/convert_utf16le_to_utf32_tests.cpp +0 -213
  577. package/deps/simdutf/tests/convert_utf16le_to_utf32_with_errors_tests.cpp +0 -318
  578. package/deps/simdutf/tests/convert_utf16le_to_utf8_tests.cpp +0 -343
  579. package/deps/simdutf/tests/convert_utf16le_to_utf8_with_errors_tests.cpp +0 -271
  580. package/deps/simdutf/tests/convert_utf32_to_latin1_tests.cpp +0 -111
  581. package/deps/simdutf/tests/convert_utf32_to_latin1_with_errors_tests.cpp +0 -96
  582. package/deps/simdutf/tests/convert_utf32_to_utf16be_tests.cpp +0 -148
  583. package/deps/simdutf/tests/convert_utf32_to_utf16be_with_errors_tests.cpp +0 -192
  584. package/deps/simdutf/tests/convert_utf32_to_utf16le_tests.cpp +0 -166
  585. package/deps/simdutf/tests/convert_utf32_to_utf16le_with_errors_tests.cpp +0 -215
  586. package/deps/simdutf/tests/convert_utf32_to_utf8_tests.cpp +0 -181
  587. package/deps/simdutf/tests/convert_utf32_to_utf8_with_errors_tests.cpp +0 -261
  588. package/deps/simdutf/tests/convert_utf8_to_latin1_tests.cpp +0 -516
  589. package/deps/simdutf/tests/convert_utf8_to_latin1_with_errors_tests.cpp +0 -579
  590. package/deps/simdutf/tests/convert_utf8_to_utf16be_tests.cpp +0 -412
  591. package/deps/simdutf/tests/convert_utf8_to_utf16be_with_errors_tests.cpp +0 -480
  592. package/deps/simdutf/tests/convert_utf8_to_utf16le_tests.cpp +0 -671
  593. package/deps/simdutf/tests/convert_utf8_to_utf16le_with_errors_tests.cpp +0 -455
  594. package/deps/simdutf/tests/convert_utf8_to_utf32_tests.cpp +0 -1204
  595. package/deps/simdutf/tests/convert_utf8_to_utf32_with_errors_tests.cpp +0 -337
  596. package/deps/simdutf/tests/convert_valid_utf16be_to_latin1_tests.cpp +0 -37
  597. package/deps/simdutf/tests/convert_valid_utf16be_to_utf32_tests.cpp +0 -97
  598. package/deps/simdutf/tests/convert_valid_utf16be_to_utf8_tests.cpp +0 -126
  599. package/deps/simdutf/tests/convert_valid_utf16le_to_latin1_tests.cpp +0 -71
  600. package/deps/simdutf/tests/convert_valid_utf16le_to_utf32_tests.cpp +0 -122
  601. package/deps/simdutf/tests/convert_valid_utf16le_to_utf8_tests.cpp +0 -244
  602. package/deps/simdutf/tests/convert_valid_utf32_to_latin1_tests.cpp +0 -49
  603. package/deps/simdutf/tests/convert_valid_utf32_to_utf16be_tests.cpp +0 -92
  604. package/deps/simdutf/tests/convert_valid_utf32_to_utf16le_tests.cpp +0 -114
  605. package/deps/simdutf/tests/convert_valid_utf32_to_utf8_tests.cpp +0 -109
  606. package/deps/simdutf/tests/convert_valid_utf8_to_latin1_tests.cpp +0 -84
  607. package/deps/simdutf/tests/convert_valid_utf8_to_utf16be_tests.cpp +0 -124
  608. package/deps/simdutf/tests/convert_valid_utf8_to_utf16le_tests.cpp +0 -221
  609. package/deps/simdutf/tests/convert_valid_utf8_to_utf32_tests.cpp +0 -155
  610. package/deps/simdutf/tests/count_utf16be.cpp +0 -64
  611. package/deps/simdutf/tests/count_utf16le.cpp +0 -61
  612. package/deps/simdutf/tests/count_utf8.cpp +0 -87
  613. package/deps/simdutf/tests/detect_encodings_tests.cpp +0 -312
  614. package/deps/simdutf/tests/embed/valid_utf8.txt +0 -1
  615. package/deps/simdutf/tests/embed_tests.cpp +0 -22
  616. package/deps/simdutf/tests/find_tests.cpp +0 -77
  617. package/deps/simdutf/tests/fixed_string_tests.cpp +0 -153
  618. package/deps/simdutf/tests/helpers/CMakeLists.txt +0 -25
  619. package/deps/simdutf/tests/helpers/compiletime_conversions.h +0 -222
  620. package/deps/simdutf/tests/helpers/fixed_string.h +0 -267
  621. package/deps/simdutf/tests/helpers/random_int.cpp +0 -30
  622. package/deps/simdutf/tests/helpers/random_int.h +0 -39
  623. package/deps/simdutf/tests/helpers/random_utf16.cpp +0 -123
  624. package/deps/simdutf/tests/helpers/random_utf16.h +0 -52
  625. package/deps/simdutf/tests/helpers/random_utf32.cpp +0 -41
  626. package/deps/simdutf/tests/helpers/random_utf32.h +0 -40
  627. package/deps/simdutf/tests/helpers/random_utf8.cpp +0 -93
  628. package/deps/simdutf/tests/helpers/random_utf8.h +0 -36
  629. package/deps/simdutf/tests/helpers/test.cpp +0 -231
  630. package/deps/simdutf/tests/helpers/test.h +0 -193
  631. package/deps/simdutf/tests/helpers/transcode_test_base.cpp +0 -1257
  632. package/deps/simdutf/tests/helpers/transcode_test_base.h +0 -683
  633. package/deps/simdutf/tests/helpers/utf16.h +0 -27
  634. package/deps/simdutf/tests/installation_tests/find/CMakeLists.txt +0 -43
  635. package/deps/simdutf/tests/installation_tests/from_fetch/CMakeLists.txt +0 -47
  636. package/deps/simdutf/tests/internal_tests.cpp +0 -27
  637. package/deps/simdutf/tests/null_safety_tests.cpp +0 -94
  638. package/deps/simdutf/tests/random_fuzzer.cpp +0 -779
  639. package/deps/simdutf/tests/readme_tests.cpp +0 -274
  640. package/deps/simdutf/tests/reference/CMakeLists.txt +0 -23
  641. package/deps/simdutf/tests/reference/decode_utf16.h +0 -81
  642. package/deps/simdutf/tests/reference/decode_utf32.h +0 -47
  643. package/deps/simdutf/tests/reference/encode_latin1.cpp +0 -1
  644. package/deps/simdutf/tests/reference/encode_latin1.h +0 -32
  645. package/deps/simdutf/tests/reference/encode_utf16.cpp +0 -49
  646. package/deps/simdutf/tests/reference/encode_utf16.h +0 -20
  647. package/deps/simdutf/tests/reference/encode_utf32.cpp +0 -1
  648. package/deps/simdutf/tests/reference/encode_utf32.h +0 -36
  649. package/deps/simdutf/tests/reference/encode_utf8.cpp +0 -1
  650. package/deps/simdutf/tests/reference/encode_utf8.h +0 -40
  651. package/deps/simdutf/tests/reference/validate_utf16.cpp +0 -60
  652. package/deps/simdutf/tests/reference/validate_utf16.h +0 -14
  653. package/deps/simdutf/tests/reference/validate_utf16_to_latin1.cpp +0 -35
  654. package/deps/simdutf/tests/reference/validate_utf16_to_latin1.h +0 -13
  655. package/deps/simdutf/tests/reference/validate_utf32.cpp +0 -27
  656. package/deps/simdutf/tests/reference/validate_utf32.h +0 -12
  657. package/deps/simdutf/tests/reference/validate_utf32_to_latin1.cpp +0 -27
  658. package/deps/simdutf/tests/reference/validate_utf32_to_latin1.h +0 -12
  659. package/deps/simdutf/tests/reference/validate_utf8.cpp +0 -82
  660. package/deps/simdutf/tests/reference/validate_utf8.h +0 -11
  661. package/deps/simdutf/tests/reference/validate_utf8_to_latin1.cpp +0 -43
  662. package/deps/simdutf/tests/reference/validate_utf8_to_latin1.h +0 -12
  663. package/deps/simdutf/tests/select_implementation.cpp +0 -43
  664. package/deps/simdutf/tests/simdutf_c_tests.cpp +0 -244
  665. package/deps/simdutf/tests/span_tests.cpp +0 -401
  666. package/deps/simdutf/tests/special_tests.cpp +0 -559
  667. package/deps/simdutf/tests/straight_c_test.c +0 -187
  668. package/deps/simdutf/tests/text_encoding_tests.cpp +0 -77
  669. package/deps/simdutf/tests/to_well_formed_utf16_tests.cpp +0 -377
  670. package/deps/simdutf/tests/utf8_length_from_utf16_tests.cpp +0 -202
  671. package/deps/simdutf/tests/validate_ascii_basic_tests.cpp +0 -165
  672. package/deps/simdutf/tests/validate_ascii_with_errors_tests.cpp +0 -77
  673. package/deps/simdutf/tests/validate_utf16be_basic_tests.cpp +0 -175
  674. package/deps/simdutf/tests/validate_utf16be_with_errors_tests.cpp +0 -188
  675. package/deps/simdutf/tests/validate_utf16le_basic_tests.cpp +0 -268
  676. package/deps/simdutf/tests/validate_utf16le_with_errors_tests.cpp +0 -274
  677. package/deps/simdutf/tests/validate_utf32_basic_tests.cpp +0 -92
  678. package/deps/simdutf/tests/validate_utf32_with_errors_tests.cpp +0 -114
  679. package/deps/simdutf/tests/validate_utf8_basic_tests.cpp +0 -178
  680. package/deps/simdutf/tests/validate_utf8_brute_force_tests.cpp +0 -88
  681. package/deps/simdutf/tests/validate_utf8_puzzler_tests.cpp +0 -33
  682. package/deps/simdutf/tests/validate_utf8_with_errors_tests.cpp +0 -228
  683. package/deps/simdutf/tools/CMakeLists.txt +0 -85
  684. package/deps/simdutf/tools/fastbase64.cpp +0 -250
  685. package/deps/simdutf/tools/sutf.cpp +0 -556
  686. package/deps/simdutf/tools/sutf.h +0 -40
  687. package/lib/tsconfig.tsbuildinfo +0 -1
@@ -1,3899 +0,0 @@
1
- #include "benchmark.h"
2
- #include "simdutf.h"
3
-
4
- #include <cassert>
5
- #include <array>
6
- #include <iostream>
7
- #include <chrono>
8
- #include <thread>
9
- #include <string>
10
- #include <vector>
11
- #ifdef __x86_64__
12
- /**
13
- * utf8lut: Vectorized UTF-8 converter.
14
- * by stgatilov (2019)
15
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
16
- */
17
- SIMDUTF_TARGET_WESTMERE
18
- namespace {
19
- #include "benchmarks/competition/utf8lut/src/utf8lut.h"
20
- }
21
- SIMDUTF_UNTARGET_REGION
22
-
23
- /**
24
- * Bob Steagall, CppCon2018
25
- * https://github.com/BobSteagall/CppCon2018/
26
- *
27
- * Fast Conversion From UTF-8 with C++, DFAs, and SSE Intrinsics
28
- * https://www.youtube.com/watch?v=5FQ87-Ecb-A
29
- */
30
- #include "benchmarks/competition/CppCon2018/utf_utils.cpp"
31
- #endif
32
-
33
- /**
34
- * Bjoern Hoehrmann
35
- * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
36
- */
37
- #include "benchmarks/competition/hoehrmann/hoehrmann.h"
38
- /**
39
- * LLVM relies on code from the Unicode Consortium
40
- * https://en.wikipedia.org/wiki/Unicode_Consortium
41
- */
42
- #include "benchmarks/competition/llvm/ConvertUTF.cpp"
43
- #ifdef __x86_64__
44
- /**
45
- * Olivier Goffart, UTF-8 processing using SIMD (SSE4), 2012.
46
- * https://woboq.com/blog/utf-8-processing-using-simd.html
47
- */
48
- #include "benchmarks/competition/utf8sse4/fromutf8-sse.cpp"
49
- #endif
50
-
51
- #ifdef __x86_64__
52
- /**
53
- * benchmarks/competition/u8u16 contains an open source version of u8u16,
54
- * referenced in Cameron, Robert D, A case study in SIMD text processing with
55
- * parallel bit streams: UTF-8 to UTF-16 transcoding, Proceedings of the 13th
56
- * ACM SIGPLAN Symposium on Principles and practice of parallel programming,
57
- * 91--98.
58
- */
59
- // It seems that u8u16 is not good at scoping macros.
60
- #undef LITTLE_ENDIAN
61
- #undef BYTE_ORDER
62
- #undef BIG_ENDIAN
63
- #include "benchmarks/competition/u8u16/config/p4_config.h"
64
- #include "benchmarks/competition/u8u16/src/libu8u16.c"
65
- #endif
66
-
67
- /**
68
- * Nemanja Trifunovic, UTF8-CPP: UTF-8 with C++ in a Portable Way
69
- * https://github.com/nemtrif/utfcpp/releases/tag/v3.2.2
70
- */
71
- #include "benchmarks/competition/utfcpp/source/utf8.h"
72
-
73
- namespace simdutf::benchmarks {
74
-
75
- template <typename Fn>
76
- void Benchmark::register_function(std::string name, Fn function,
77
- std::set<simdutf::encoding_type> set) {
78
-
79
- if (name.find('+') == std::string::npos) {
80
- // adding simdutf benchmark, populate for all known architectures
81
- for (const auto &impl : simdutf::get_available_implementations()) {
82
- const auto full_name = name + '+' + impl->name();
83
- benchmarks.insert({full_name, std::make_pair(function, set)});
84
- }
85
- } else {
86
- benchmarks.insert({name, std::make_pair(function, set)});
87
- }
88
- }
89
-
90
- template <typename Fn>
91
- void Benchmark::register_function(std::string name, Fn function,
92
- simdutf::encoding_type enc1) {
93
- std::set<simdutf::encoding_type> set{enc1};
94
- register_function(name, function, set);
95
- }
96
-
97
- template <typename Fn>
98
- void Benchmark::register_function(std::string name, Fn function,
99
- simdutf::encoding_type enc1,
100
- simdutf::encoding_type enc2) {
101
- std::set<simdutf::encoding_type> set{enc1, enc2};
102
- register_function(name, function, set);
103
- }
104
-
105
- template <typename Fn>
106
- void Benchmark::register_function(std::string name, Fn function,
107
- simdutf::encoding_type enc1,
108
- simdutf::encoding_type enc2,
109
- simdutf::encoding_type enc3) {
110
- std::set<simdutf::encoding_type> set{enc1, enc2, enc3};
111
- register_function(name, function, set);
112
- }
113
-
114
- Benchmark::Benchmark(std::vector<input::Testcase> &&testcases)
115
- : BenchmarkBase(std::move(testcases)) {
116
- register_function("to_well_formed_utf16le",
117
- &Benchmark::run_to_well_formed_utf16le,
118
- simdutf::encoding_type::UTF16_LE);
119
- register_function("naive_validate_ascii",
120
- &Benchmark::run_naive_validate_ascii,
121
- simdutf::encoding_type::UTF8);
122
- register_function("validate_ascii", &Benchmark::run_validate_ascii,
123
- simdutf::encoding_type::UTF8);
124
- register_function("validate_ascii_with_errors",
125
- &Benchmark::run_validate_ascii_with_errors,
126
- simdutf::encoding_type::UTF8);
127
- register_function("validate_utf8", &Benchmark::run_validate_utf8,
128
- simdutf::encoding_type::UTF8);
129
- register_function("validate_utf8_with_errors",
130
- &Benchmark::run_validate_utf8_with_errors,
131
- simdutf::encoding_type::UTF8);
132
- register_function("validate_utf16le", &Benchmark::run_validate_utf16le,
133
- simdutf::encoding_type::UTF16_LE);
134
- register_function("validate_utf16le_with_errors",
135
- &Benchmark::run_validate_utf16le_with_errors,
136
- simdutf::encoding_type::UTF16_LE);
137
- register_function("validate_utf32", &Benchmark::run_validate_utf32,
138
- simdutf::encoding_type::UTF32_LE);
139
- register_function("validate_utf32_with_errors",
140
- &Benchmark::run_validate_utf32_with_errors,
141
- simdutf::encoding_type::UTF32_LE);
142
-
143
- register_function("count_utf8", &Benchmark::run_count_utf8,
144
- simdutf::encoding_type::UTF8);
145
- register_function("count_utf16le", &Benchmark::run_count_utf16le,
146
- simdutf::encoding_type::UTF16_LE);
147
-
148
- register_function("utf8_length_from_latin1",
149
- &Benchmark::run_utf8_length_from_latin1,
150
- simdutf::encoding_type::Latin1);
151
- register_function("utf8_length_from_utf16le",
152
- &Benchmark::run_utf8_length_from_utf16le,
153
- simdutf::encoding_type::UTF16_LE);
154
- register_function("utf8_length_from_utf16le_with_replacement",
155
- &Benchmark::run_utf8_length_from_utf16le_with_replacement,
156
- simdutf::encoding_type::UTF16_LE);
157
- register_function("utf8_length_from_utf16be",
158
- &Benchmark::run_utf8_length_from_utf16be,
159
- simdutf::encoding_type::UTF16_BE);
160
- register_function("utf8_length_from_utf16be_with_replacement",
161
- &Benchmark::run_utf8_length_from_utf16be_with_replacement,
162
- simdutf::encoding_type::UTF16_BE);
163
- register_function("utf8_length_from_utf32",
164
- &Benchmark::run_utf8_length_from_utf32,
165
- simdutf::encoding_type::UTF32_LE);
166
- register_function("utf16_length_from_utf8",
167
- &Benchmark::run_utf16_length_from_utf8,
168
- simdutf::encoding_type::UTF8);
169
- register_function("convert_latin1_to_utf8",
170
- &Benchmark::run_convert_latin1_to_utf8,
171
- simdutf::encoding_type::Latin1);
172
- register_function("convert_latin1_to_utf16le",
173
- &Benchmark::run_convert_latin1_to_utf16le,
174
- simdutf::encoding_type::Latin1);
175
- register_function("convert_latin1_to_utf32",
176
- &Benchmark::run_convert_latin1_to_utf32,
177
- simdutf::encoding_type::Latin1);
178
-
179
- register_function("convert_utf8_to_latin1",
180
- &Benchmark::run_convert_utf8_to_latin1,
181
- simdutf::encoding_type::UTF8);
182
- register_function("convert_utf8_to_latin1_with_errors",
183
- &Benchmark::run_convert_utf8_to_latin1_with_errors,
184
- simdutf::encoding_type::UTF8);
185
- register_function("convert_valid_utf8_to_latin1",
186
- &Benchmark::run_convert_valid_utf8_to_latin1,
187
- simdutf::encoding_type::UTF8);
188
-
189
- register_function("convert_utf8_to_utf16le",
190
- &Benchmark::run_convert_utf8_to_utf16le,
191
- simdutf::encoding_type::UTF8);
192
- register_function("convert_utf8_to_utf16le_with_errors",
193
- &Benchmark::run_convert_utf8_to_utf16le_with_errors,
194
- simdutf::encoding_type::UTF8);
195
- register_function(
196
- "convert_utf8_to_utf16le_with_dynamic_allocation",
197
- &Benchmark::run_convert_utf8_to_utf16le_with_dynamic_allocation,
198
- simdutf::encoding_type::UTF8);
199
- register_function("convert_valid_utf8_to_utf16le",
200
- &Benchmark::run_convert_valid_utf8_to_utf16le,
201
- simdutf::encoding_type::UTF8);
202
-
203
- register_function("convert_utf8_to_utf32",
204
- &Benchmark::run_convert_utf8_to_utf32,
205
- simdutf::encoding_type::UTF8);
206
- register_function("convert_utf8_to_utf32_with_errors",
207
- &Benchmark::run_convert_utf8_to_utf32_with_errors,
208
- simdutf::encoding_type::UTF8);
209
- register_function(
210
- "convert_utf8_to_utf32_with_dynamic_allocation",
211
- &Benchmark::run_convert_utf8_to_utf32_with_dynamic_allocation,
212
- simdutf::encoding_type::UTF8);
213
- register_function("convert_valid_utf8_to_utf32",
214
- &Benchmark::run_convert_valid_utf8_to_utf32,
215
- simdutf::encoding_type::UTF8);
216
-
217
- register_function("convert_utf16le_to_latin1",
218
- &Benchmark::run_convert_utf16le_to_latin1,
219
- simdutf::encoding_type::UTF16_LE);
220
- register_function("convert_utf16le_to_latin1_with_errors",
221
- &Benchmark::run_convert_utf16le_to_latin1_with_errors,
222
- simdutf::encoding_type::UTF16_LE);
223
- register_function("convert_valid_utf16le_to_latin1",
224
- &Benchmark::run_convert_valid_utf16le_to_latin1,
225
- simdutf::encoding_type::UTF16_LE);
226
- #if SIMDUTF_IS_BIG_ENDIAN
227
- register_function("convert_utf16_to_utf8_safe",
228
- &Benchmark::run_convert_utf16_to_utf8_safe,
229
- simdutf::encoding_type::UTF16_BE);
230
- #else
231
- register_function("convert_utf16_to_utf8_safe",
232
- &Benchmark::run_convert_utf16_to_utf8_safe,
233
- simdutf::encoding_type::UTF16_LE);
234
- #endif // SIMDUTF_IS_BIG_ENDIAN
235
- register_function("convert_utf16le_to_utf8",
236
- &Benchmark::run_convert_utf16le_to_utf8,
237
- simdutf::encoding_type::UTF16_LE);
238
- register_function("convert_utf16le_to_utf8_with_errors",
239
- &Benchmark::run_convert_utf16le_to_utf8_with_errors,
240
- simdutf::encoding_type::UTF16_LE);
241
- register_function(
242
- "convert_utf16le_to_utf8_with_dynamic_allocation",
243
- &Benchmark::run_convert_utf16le_to_utf8_with_dynamic_allocation,
244
- simdutf::encoding_type::UTF16_LE);
245
- register_function("convert_valid_utf16le_to_utf8",
246
- &Benchmark::run_convert_valid_utf16le_to_utf8,
247
- simdutf::encoding_type::UTF16_LE);
248
-
249
- register_function("convert_utf16le_to_utf32",
250
- &Benchmark::run_convert_utf16le_to_utf32,
251
- simdutf::encoding_type::UTF16_LE);
252
- register_function("convert_utf16le_to_utf32_with_errors",
253
- &Benchmark::run_convert_utf16le_to_utf32_with_errors,
254
- simdutf::encoding_type::UTF16_LE);
255
- register_function(
256
- "convert_utf16le_to_utf32_with_dynamic_allocation",
257
- &Benchmark::run_convert_utf16le_to_utf32_with_dynamic_allocation,
258
- simdutf::encoding_type::UTF16_LE);
259
- register_function("convert_valid_utf16le_to_utf32",
260
- &Benchmark::run_convert_valid_utf16le_to_utf32,
261
- simdutf::encoding_type::UTF16_LE);
262
-
263
- register_function("convert_utf32_to_latin1",
264
- &Benchmark::run_convert_utf32_to_latin1,
265
- simdutf::encoding_type::UTF32_LE);
266
- register_function("convert_utf32_to_latin1_with_errors",
267
- &Benchmark::run_convert_utf32_to_latin1_with_errors,
268
- simdutf::encoding_type::UTF32_LE);
269
- register_function("convert_valid_utf32_to_latin1",
270
- &Benchmark::run_convert_valid_utf32_to_latin1,
271
- simdutf::encoding_type::UTF32_LE);
272
-
273
- register_function("convert_utf32_to_utf8",
274
- &Benchmark::run_convert_utf32_to_utf8,
275
- simdutf::encoding_type::UTF32_LE);
276
- register_function("convert_utf32_to_utf8_with_errors",
277
- &Benchmark::run_convert_utf32_to_utf8_with_errors,
278
- simdutf::encoding_type::UTF32_LE);
279
- register_function("convert_valid_utf32_to_utf8",
280
- &Benchmark::run_convert_valid_utf32_to_utf8,
281
- simdutf::encoding_type::UTF32_LE);
282
-
283
- register_function("convert_utf32_to_utf16le",
284
- &Benchmark::run_convert_utf32_to_utf16<endianness::LITTLE>,
285
- simdutf::encoding_type::UTF32_LE);
286
- register_function("convert_utf32_to_utf16be",
287
- &Benchmark::run_convert_utf32_to_utf16<endianness::BIG>,
288
- simdutf::encoding_type::UTF32_LE);
289
- register_function(
290
- "convert_utf32_to_utf16le_with_errors",
291
- &Benchmark::run_convert_utf32_to_utf16_with_errors<endianness::LITTLE>,
292
- simdutf::encoding_type::UTF32_LE);
293
- register_function(
294
- "convert_utf32_to_utf16be_with_errors",
295
- &Benchmark::run_convert_utf32_to_utf16_with_errors<endianness::BIG>,
296
- simdutf::encoding_type::UTF32_LE);
297
- register_function(
298
- "convert_valid_utf32_to_utf16le",
299
- &Benchmark::run_convert_valid_utf32_to_utf16<endianness::LITTLE>,
300
- simdutf::encoding_type::UTF32_LE);
301
- register_function(
302
- "convert_valid_utf32_to_utf16be",
303
- &Benchmark::run_convert_valid_utf32_to_utf16<endianness::BIG>,
304
- simdutf::encoding_type::UTF32_LE);
305
-
306
- register_function("detect_encodings", &Benchmark::run_detect_encodings,
307
- simdutf::encoding_type::UTF8,
308
- simdutf::encoding_type::UTF16_LE,
309
- simdutf::encoding_type::UTF32_LE);
310
-
311
- #ifdef ICU_AVAILABLE
312
- register_function("convert_latin1_to_utf8+icu",
313
- &Benchmark::run_convert_latin1_to_utf8_icu,
314
- simdutf::encoding_type::Latin1);
315
- register_function("convert_latin1_to_utf16+icu",
316
- &Benchmark::run_convert_latin1_to_utf16_icu,
317
- simdutf::encoding_type::Latin1);
318
- register_function("convert_latin1_to_utf32+icu",
319
- &Benchmark::run_convert_latin1_to_utf32_icu,
320
- simdutf::encoding_type::Latin1);
321
- register_function("convert_utf8_to_latin1+icu",
322
- &Benchmark::run_convert_utf8_to_latin1_icu,
323
- simdutf::encoding_type::UTF8);
324
- register_function("convert_utf8_to_utf16+icu",
325
- &Benchmark::run_convert_utf8_to_utf16_icu,
326
- simdutf::encoding_type::UTF8);
327
- register_function("convert_utf16_to_utf8+icu",
328
- &Benchmark::run_convert_utf16_to_utf8_icu,
329
- simdutf::encoding_type::UTF16_LE);
330
- register_function("convert_utf16_to_latin1+icu",
331
- &Benchmark::run_convert_utf16_to_latin1_icu,
332
- simdutf::encoding_type::UTF16_LE);
333
- register_function("convert_utf32_to_latin1+icu",
334
- &Benchmark::run_convert_utf32_to_latin1_icu,
335
- simdutf::encoding_type::UTF32_LE);
336
- #endif
337
- #ifdef ICONV_AVAILABLE
338
- register_function("convert_latin1_to_utf8+iconv",
339
- &Benchmark::run_convert_latin1_to_utf8_iconv,
340
- simdutf::encoding_type::Latin1);
341
- register_function("convert_latin1_to_utf16+iconv",
342
- &Benchmark::run_convert_latin1_to_utf16_iconv,
343
- simdutf::encoding_type::Latin1);
344
- register_function("convert_latin1_to_utf32+iconv",
345
- &Benchmark::run_convert_latin1_to_utf32_iconv,
346
- simdutf::encoding_type::Latin1);
347
- register_function("convert_utf8_to_latin1+iconv",
348
- &Benchmark::run_convert_utf8_to_latin1_iconv,
349
- simdutf::encoding_type::UTF8);
350
- register_function("convert_utf8_to_utf16+iconv",
351
- &Benchmark::run_convert_utf8_to_utf16_iconv,
352
- simdutf::encoding_type::UTF8);
353
- register_function("convert_utf16_to_utf8+iconv",
354
- &Benchmark::run_convert_utf16_to_utf8_iconv,
355
- simdutf::encoding_type::UTF16_LE);
356
- register_function("convert_utf16_to_latin1+iconv",
357
- &Benchmark::run_convert_utf16_to_latin1_iconv,
358
- simdutf::encoding_type::UTF16_LE);
359
- register_function("convert_utf32_to_latin1+iconv",
360
- &Benchmark::run_convert_utf32_to_latin1_iconv,
361
- simdutf::encoding_type::UTF32_LE);
362
- #endif
363
- #ifdef INOUE2008
364
- register_function("convert_valid_utf8_to_utf16+inoue2008",
365
- &Benchmark::run_convert_valid_utf8_to_utf16_inoue2008,
366
- simdutf::encoding_type::UTF8);
367
- #endif
368
- #ifdef __x86_64__
369
- register_function("convert_utf8_to_utf16+u8u16",
370
- &Benchmark::run_convert_utf8_to_utf16_u8u16,
371
- simdutf::encoding_type::UTF8);
372
- register_function("convert_utf16_to_utf8+utf8lut",
373
- &Benchmark::run_convert_valid_utf8_to_utf16_utf8lut,
374
- simdutf::encoding_type::UTF16_LE);
375
- register_function("convert_valid_utf16_to_utf8+utf8lut",
376
- &Benchmark::run_convert_valid_utf16_to_utf8_utf8lut,
377
- simdutf::encoding_type::UTF16_LE);
378
- register_function("convert_utf8_to_utf16+utf8lut",
379
- &Benchmark::run_convert_valid_utf8_to_utf16_utf8lut,
380
- simdutf::encoding_type::UTF8);
381
- register_function("convert_utf8_to_utf32+utf8lut",
382
- &Benchmark::run_convert_utf8_to_utf32_utf8lut,
383
- simdutf::encoding_type::UTF8);
384
- register_function("convert_valid_utf8_to_utf16+utf8lut",
385
- &Benchmark::run_convert_valid_utf8_to_utf16_utf8lut,
386
- simdutf::encoding_type::UTF8);
387
- register_function("convert_utf32_to_utf8+utf8lut",
388
- &Benchmark::run_convert_valid_utf32_to_utf8_utf8lut,
389
- simdutf::encoding_type::UTF32_LE);
390
- register_function("convert_valid_utf32_to_utf8+utf8lut",
391
- &Benchmark::run_convert_valid_utf32_to_utf8_utf8lut,
392
- simdutf::encoding_type::UTF32_BE);
393
- register_function("convert_valid_utf8_to_utf32+utf8lut",
394
- &Benchmark::run_convert_utf8_to_utf32_utf8lut,
395
- simdutf::encoding_type::UTF8);
396
- register_function("convert_utf8_to_utf16+utf8sse4",
397
- &Benchmark::run_convert_utf8_to_utf16_utf8sse4,
398
- simdutf::encoding_type::UTF8);
399
- register_function("convert_utf8_to_utf16+cppcon2018",
400
- &Benchmark::run_convert_utf8_to_utf16_cppcon2018,
401
- simdutf::encoding_type::UTF8);
402
- register_function("convert_utf8_to_utf32+cppcon2018",
403
- &Benchmark::run_convert_utf8_to_utf32_cppcon2018,
404
- simdutf::encoding_type::UTF8);
405
- #endif
406
- register_function("convert_utf8_to_utf16+hoehrmann",
407
- &Benchmark::run_convert_utf8_to_utf16_hoehrmann,
408
- simdutf::encoding_type::UTF8);
409
- register_function("convert_utf8_to_utf32+hoehrmann",
410
- &Benchmark::run_convert_utf8_to_utf32_hoehrmann,
411
- simdutf::encoding_type::UTF8);
412
-
413
- register_function("convert_utf8_to_utf16+llvm",
414
- &Benchmark::run_convert_utf8_to_utf16_llvm,
415
- simdutf::encoding_type::UTF8);
416
- register_function("convert_utf8_to_utf32+llvm",
417
- &Benchmark::run_convert_utf8_to_utf32_llvm,
418
- simdutf::encoding_type::UTF8);
419
- register_function("convert_utf16_to_utf8+llvm",
420
- &Benchmark::run_convert_utf16_to_utf8_llvm,
421
- simdutf::encoding_type::UTF16_LE);
422
- register_function("convert_utf32_to_utf8+llvm",
423
- &Benchmark::run_convert_utf32_to_utf8_llvm,
424
- simdutf::encoding_type::UTF32_LE);
425
- register_function("convert_utf32_to_utf16+llvm",
426
- &Benchmark::run_convert_utf32_to_utf16_llvm,
427
- simdutf::encoding_type::UTF32_LE);
428
- register_function("convert_utf16_to_utf32+llvm",
429
- &Benchmark::run_convert_utf16_to_utf32_llvm,
430
- simdutf::encoding_type::UTF16_LE);
431
-
432
- register_function("convert_utf8_to_utf16+utfcpp",
433
- &Benchmark::run_convert_utf8_to_utf16_utfcpp,
434
- simdutf::encoding_type::UTF8);
435
- register_function("convert_utf8_to_utf32+utfcpp",
436
- &Benchmark::run_convert_utf8_to_utf32_utfcpp,
437
- simdutf::encoding_type::UTF8);
438
- register_function("convert_utf16_to_utf8+utfcpp",
439
- &Benchmark::run_convert_utf16_to_utf8_utfcpp,
440
- simdutf::encoding_type::UTF16_LE);
441
- register_function("convert_utf32_to_utf8+utfcpp",
442
- &Benchmark::run_convert_utf32_to_utf8_utfcpp,
443
- simdutf::encoding_type::UTF32_LE);
444
-
445
- register_function("utf8_length_from_latin1+node",
446
- &Benchmark::run_utf8_length_from_latin1_node,
447
- simdutf::encoding_type::Latin1);
448
- }
449
-
450
- // static
451
- Benchmark Benchmark::create(const CommandLine &cmdline) {
452
- std::vector<input::Testcase> testcases;
453
-
454
- using input::File;
455
- using input::random_utf8;
456
- using input::Testcase;
457
-
458
- for (const size_t iterations : cmdline.iterations) {
459
- for (const auto &path : cmdline.files) {
460
- testcases.emplace_back(
461
- Testcase{cmdline.procedures, iterations, File{path}});
462
- }
463
-
464
- for (const size_t size : cmdline.random_size) {
465
- testcases.emplace_back(
466
- Testcase{cmdline.procedures, iterations, random_utf8{size}});
467
- }
468
- }
469
-
470
- return Benchmark{std::move(testcases)};
471
- }
472
-
473
- void Benchmark::list_procedures(ListingMode lm) const {
474
- switch (lm) {
475
- case ListingMode::None:
476
- break;
477
-
478
- case ListingMode::HumanReadable: {
479
- const auto &known_procedures = all_procedures();
480
- printf("Available procedures (%zu)\n", size_t(known_procedures.size()));
481
- for (const auto &name : known_procedures) {
482
- printf("- %s\n", name.c_str());
483
- }
484
- } break;
485
-
486
- case ListingMode::PlainLines: {
487
- const auto &known_procedures = all_procedures();
488
- for (const auto &name : known_procedures) {
489
- puts(name.c_str());
490
- }
491
- break;
492
- }
493
-
494
- case ListingMode::Json: {
495
- printf("[\n");
496
- auto first = true;
497
- for (const auto &item : benchmarks) {
498
- const auto &name = item.first;
499
- const auto &entry = item.second;
500
- if (!first) {
501
- putchar(',');
502
- }
503
- first = false;
504
-
505
- printf(" {\n");
506
- printf(" \"name\": \"%s\",\n", name.c_str());
507
- if (std::holds_alternative<thirdparty_fn>(entry.first)) {
508
- printf(" \"simdutf\": false,\n");
509
- } else if (std::holds_alternative<simdutf_fn>(entry.first)) {
510
- printf(" \"simdutf\": true,\n");
511
- }
512
-
513
- {
514
- printf(" \"encodings\": [");
515
- bool first = true;
516
- for (const auto &enc : entry.second) {
517
- if (!first) {
518
- putchar(',');
519
- }
520
- first = false;
521
-
522
- switch (enc) {
523
- case simdutf::UTF8:
524
- printf("\"utf8\"");
525
- break;
526
- case simdutf::UTF16_LE:
527
- printf("\"utf16le\"");
528
- break;
529
- case simdutf::UTF16_BE:
530
- printf("\"utf16be\"");
531
- break;
532
- case simdutf::UTF32_LE:
533
- printf("\"utf32le\"");
534
- break;
535
- case simdutf::UTF32_BE:
536
- printf("\"utf32be\"");
537
- break;
538
- case simdutf::Latin1:
539
- printf("\"latin1\"");
540
- break;
541
- default:
542
- printf("\"unknown\"");
543
- break;
544
- }
545
- }
546
- printf("]\n");
547
- } // encodings
548
- printf(" }");
549
- } // for
550
- printf("]\n");
551
- break;
552
- }
553
- }
554
- }
555
-
556
- void Benchmark::run(const std::string &procedure_name, size_t iterations) {
557
- const auto item = benchmarks.find(procedure_name);
558
- if (item == benchmarks.end()) {
559
- std::cerr << "Unsupported procedure: " << procedure_name << '\n';
560
- std::cerr << "Report the issue.\n";
561
- std::cerr << " Aborting ! " << '\n';
562
- exit(1);
563
- }
564
-
565
- const auto &entry = item->second;
566
- if (std::holds_alternative<thirdparty_fn>(entry.first)) {
567
- const auto fn = std::get<thirdparty_fn>(entry.first);
568
-
569
- (this->*fn)(iterations);
570
- } else if (std::holds_alternative<simdutf_fn>(entry.first)) {
571
- const auto p = procedure_name.find('+');
572
- const std::string name{procedure_name.substr(0, p)};
573
- const std::string impl{procedure_name.substr(p + 1)};
574
-
575
- auto implementation = simdutf::get_available_implementations()[impl];
576
- if (implementation == nullptr) {
577
- throw std::runtime_error("Wrong implementation " + impl);
578
- }
579
- // If you want to skip the CPU feature checks, you can set
580
- // a variable when calling the benchmark program. E.g.,
581
- // SIMDUTF_SKIP_CPU_CHECK=ON benchmark -F myfile.txt
582
- // This might result in a crash (E.g., Illegal instruction).
583
- SIMDUTF_PUSH_DISABLE_WARNINGS
584
- SIMDUTF_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC:
585
- // manually verified this is safe
586
- static const char *skip_check = getenv("SIMDUTF_SKIP_CPU_CHECK");
587
- SIMDUTF_POP_DISABLE_WARNINGS
588
- if (!skip_check && !implementation->supported_by_runtime_system()) {
589
- std::cout << procedure_name << ": unsupported by the system\n";
590
- return;
591
- }
592
-
593
- const auto fn = std::get<simdutf_fn>(entry.first);
594
- (this->*fn)(*implementation, iterations);
595
- } else {
596
- throw std::logic_error("The entry for '" + procedure_name +
597
- "' is not valid. Please report an issue.");
598
- }
599
-
600
- // We pause after each call to make sure
601
- // that other benchmarks are not affected by frequency throttling.
602
- // This was initially introduced for AVX-512 only, but it is probably
603
- // wise to have it always.
604
- std::this_thread::sleep_for(std::chrono::milliseconds(10));
605
- }
606
-
607
- void Benchmark::run_validate_utf8(const simdutf::implementation &implementation,
608
- size_t iterations) {
609
- const char *data = reinterpret_cast<const char *>(input_data.data());
610
- const size_t size = input_data.size();
611
- volatile bool sink{false};
612
-
613
- auto proc = [&implementation, data, size, &sink]() {
614
- sink = implementation.validate_utf8(data, size);
615
- };
616
-
617
- count_events(proc, iterations); // warming up!
618
- const auto result = count_events(proc, iterations);
619
- if ((sink == false) && (iterations > 0)) {
620
- std::cerr << "The input was declared invalid.\n";
621
- }
622
- size_t char_count = get_active_implementation()->count_utf8(data, size);
623
- print_summary(result, size, char_count);
624
- }
625
-
626
- void Benchmark::run_validate_utf8_with_errors(
627
- const simdutf::implementation &implementation, size_t iterations) {
628
- const char *data = reinterpret_cast<const char *>(input_data.data());
629
- const size_t size = input_data.size();
630
- volatile bool sink{false};
631
-
632
- auto proc = [&implementation, data, size, &sink]() {
633
- result res = implementation.validate_utf8_with_errors(data, size);
634
- sink = !(res.error);
635
- };
636
-
637
- count_events(proc, iterations); // warming up!
638
- const auto result = count_events(proc, iterations);
639
- if ((sink == false) && (iterations > 0)) {
640
- std::cerr << "The input was declared invalid.\n";
641
- }
642
- size_t char_count = get_active_implementation()->count_utf8(data, size);
643
- print_summary(result, size, char_count);
644
- }
645
-
646
- namespace details {
647
- bool ascii_is_valid(const char *data, size_t size) {
648
- unsigned char result = 0;
649
- for (size_t i = 0; i < size; i++) {
650
- result |= static_cast<unsigned char>(data[i]);
651
- }
652
- return (result <= 0x7F);
653
- }
654
- } // namespace details
655
-
656
- void Benchmark::run_naive_validate_ascii(
657
- const simdutf::implementation &implementation, size_t iterations) {
658
- const char *data = reinterpret_cast<const char *>(input_data.data());
659
- const size_t size = input_data.size();
660
- volatile bool sink{false};
661
- auto proc = [&implementation, data, size, &sink]() {
662
- sink = details::ascii_is_valid(data, size);
663
- };
664
-
665
- count_events(proc, iterations); // warming up!
666
- const auto result = count_events(proc, iterations);
667
- if ((sink == false) && (iterations > 0)) {
668
- std::cerr << "The input was declared invalid.\n";
669
- }
670
- size_t char_count = get_active_implementation()->count_utf8(data, size);
671
- print_summary(result, size, char_count);
672
- }
673
-
674
- void Benchmark::run_validate_ascii(
675
- const simdutf::implementation &implementation, size_t iterations) {
676
- const char *data = reinterpret_cast<const char *>(input_data.data());
677
- const size_t size = input_data.size();
678
- volatile bool sink{false};
679
-
680
- auto proc = [&implementation, data, size, &sink]() {
681
- sink = implementation.validate_ascii(data, size);
682
- };
683
-
684
- count_events(proc, iterations); // warming up!
685
- const auto result = count_events(proc, iterations);
686
- if ((sink == false) && (iterations > 0)) {
687
- std::cerr << "The input was declared invalid.\n";
688
- }
689
- size_t char_count = get_active_implementation()->count_utf8(data, size);
690
- print_summary(result, size, char_count);
691
- }
692
-
693
- void Benchmark::run_validate_ascii_with_errors(
694
- const simdutf::implementation &implementation, size_t iterations) {
695
- const char *data = reinterpret_cast<const char *>(input_data.data());
696
- const size_t size = input_data.size();
697
- volatile bool sink{false};
698
-
699
- auto proc = [&implementation, data, size, &sink]() {
700
- result res = implementation.validate_ascii_with_errors(data, size);
701
- sink = !(res.error);
702
- };
703
-
704
- count_events(proc, iterations); // warming up!
705
- const auto result = count_events(proc, iterations);
706
- if ((sink == false) && (iterations > 0)) {
707
- std::cerr << "The input was declared invalid.\n";
708
- }
709
- size_t char_count = get_active_implementation()->count_utf8(data, size);
710
- print_summary(result, size, char_count);
711
- }
712
-
713
- void Benchmark::run_validate_utf16le(
714
- const simdutf::implementation &implementation, size_t iterations) {
715
- const simdutf::encoding_type bom =
716
- BOM::check_bom(input_data.data(), input_data.size());
717
- const char16_t *data = reinterpret_cast<const char16_t *>(
718
- input_data.data() + BOM::bom_byte_size(bom));
719
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
720
- if (size % 2 != 0) {
721
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
722
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
723
- printf(" Running function on truncated input.\n");
724
- }
725
-
726
- size /= 2;
727
-
728
- volatile bool sink{false};
729
-
730
- auto proc = [&implementation, data, size, &sink]() {
731
- sink = implementation.validate_utf16le(data, size);
732
- };
733
- count_events(proc, iterations); // warming up!
734
- const auto result = count_events(proc, iterations);
735
- if ((sink == false) && (iterations > 0)) {
736
- std::cerr << "The input was declared invalid.\n";
737
- }
738
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
739
- print_summary(result, input_data.size(), char_count);
740
- }
741
-
742
- void Benchmark::run_validate_utf16le_with_errors(
743
- const simdutf::implementation &implementation, size_t iterations) {
744
- const simdutf::encoding_type bom =
745
- BOM::check_bom(input_data.data(), input_data.size());
746
- const char16_t *data = reinterpret_cast<const char16_t *>(
747
- input_data.data() + BOM::bom_byte_size(bom));
748
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
749
- if (size % 2 != 0) {
750
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
751
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
752
- printf(" Running function on truncated input.\n");
753
- }
754
-
755
- size /= 2;
756
-
757
- volatile bool sink{false};
758
-
759
- auto proc = [&implementation, data, size, &sink]() {
760
- result res = implementation.validate_utf16le_with_errors(data, size);
761
- sink = !(res.error);
762
- };
763
- count_events(proc, iterations); // warming up!
764
- const auto result = count_events(proc, iterations);
765
- if ((sink == false) && (iterations > 0)) {
766
- std::cerr << "The input was declared invalid.\n";
767
- }
768
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
769
- print_summary(result, input_data.size(), char_count);
770
- }
771
-
772
- void Benchmark::run_validate_utf32(
773
- const simdutf::implementation &implementation, size_t iterations) {
774
- const simdutf::encoding_type bom =
775
- BOM::check_bom(input_data.data(), input_data.size());
776
- const char32_t *data = reinterpret_cast<const char32_t *>(
777
- input_data.data() + BOM::bom_byte_size(bom));
778
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
779
- if (size % 2 != 0) {
780
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
781
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
782
- printf(" Running function on truncated input.\n");
783
- }
784
-
785
- size /= 4;
786
-
787
- volatile bool sink{false};
788
-
789
- auto proc = [&implementation, data, size, &sink]() {
790
- sink = implementation.validate_utf32(data, size);
791
- };
792
- count_events(proc, iterations); // warming up!
793
- const auto result = count_events(proc, iterations);
794
- if ((sink == false) && (iterations > 0)) {
795
- std::cerr << "The input was declared invalid.\n";
796
- }
797
- size_t char_count = size;
798
- print_summary(result, input_data.size(), char_count);
799
- }
800
-
801
- void Benchmark::run_validate_utf32_with_errors(
802
- const simdutf::implementation &implementation, size_t iterations) {
803
- const simdutf::encoding_type bom =
804
- BOM::check_bom(input_data.data(), input_data.size());
805
- const char32_t *data = reinterpret_cast<const char32_t *>(
806
- input_data.data() + BOM::bom_byte_size(bom));
807
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
808
- if (size % 4 != 0) {
809
- printf(
810
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
811
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
812
- printf(" Running function on truncated input.\n");
813
- }
814
-
815
- size /= 4;
816
-
817
- volatile bool sink{false};
818
-
819
- auto proc = [&implementation, data, size, &sink]() {
820
- result res = implementation.validate_utf32_with_errors(data, size);
821
- sink = !(res.error);
822
- };
823
- count_events(proc, iterations); // warming up!
824
- const auto result = count_events(proc, iterations);
825
- if ((sink == false) && (iterations > 0)) {
826
- std::cerr << "The input was declared invalid.\n";
827
- }
828
- size_t char_count = size;
829
- print_summary(result, input_data.size(), char_count);
830
- }
831
-
832
- void Benchmark::run_convert_latin1_to_utf8(
833
- const simdutf::implementation &implementation, size_t iterations) {
834
- const char *data = reinterpret_cast<const char *>(input_data.data());
835
- const size_t size = input_data.size();
836
- std::unique_ptr<char[]> output_buffer{new char[size * 2]};
837
- volatile size_t sink{0};
838
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
839
- sink =
840
- implementation.convert_latin1_to_utf8(data, size, output_buffer.get());
841
- };
842
- count_events(proc, iterations); // warming up!
843
- const auto result = count_events(proc, iterations);
844
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
845
- std::cerr << "The output is zero which might indicate an error.\n";
846
- }
847
- size_t char_count = size;
848
- print_summary(result, size, char_count);
849
- }
850
-
851
- void Benchmark::run_convert_latin1_to_utf16le(
852
- const simdutf::implementation &implementation, size_t iterations) {
853
- const char *data = reinterpret_cast<const char *>(input_data.data());
854
- const size_t size = input_data.size();
855
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size * 2]};
856
- volatile size_t sink{0};
857
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
858
- sink = implementation.convert_latin1_to_utf16le(data, size,
859
- output_buffer.get());
860
- };
861
- count_events(proc, iterations); // warming up!
862
- const auto result = count_events(proc, iterations);
863
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
864
- std::cerr << "The output is zero which might indicate an error.\n";
865
- }
866
- size_t char_count = size;
867
- print_summary(result, size, char_count);
868
- }
869
-
870
- void Benchmark::run_convert_latin1_to_utf32(
871
- const simdutf::implementation &implementation, size_t iterations) {
872
- const char *data = reinterpret_cast<const char *>(input_data.data());
873
- const size_t size = input_data.size();
874
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
875
- volatile size_t sink{0};
876
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
877
- sink =
878
- implementation.convert_latin1_to_utf32(data, size, output_buffer.get());
879
- };
880
- count_events(proc, iterations); // warming up!
881
- const auto result = count_events(proc, iterations);
882
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
883
- std::cerr << "The output is zero which might indicate an error.\n";
884
- }
885
- size_t char_count = size;
886
- print_summary(result, size, char_count);
887
- }
888
-
889
- void Benchmark::run_utf8_length_from_latin1(
890
- const simdutf::implementation &implementation, size_t iterations) {
891
- const char *data = reinterpret_cast<const char *>(input_data.data());
892
- const size_t size = input_data.size();
893
- volatile size_t sink{0};
894
-
895
- auto proc = [&implementation, data, size, &sink]() {
896
- sink = implementation.utf8_length_from_latin1(data, size);
897
- };
898
- count_events(proc, iterations); // warming up!
899
- const auto result = count_events(proc, iterations);
900
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
901
- std::cerr << "The output is zero which might indicate an error.\n";
902
- }
903
- size_t char_count = get_active_implementation()->count_utf8(data, size);
904
- print_summary(result, size, char_count);
905
- }
906
-
907
- void Benchmark::run_utf8_length_from_utf16le(
908
- const simdutf::implementation &implementation, size_t iterations) {
909
- const char16_t *data = reinterpret_cast<const char16_t *>(input_data.data());
910
- const size_t size = input_data.size() / 2;
911
- volatile size_t sink{0};
912
-
913
- auto proc = [&implementation, data, size, &sink]() {
914
- sink = implementation.utf8_length_from_utf16le(data, size);
915
- };
916
- count_events(proc, iterations); // warming up!
917
- const auto result = count_events(proc, iterations);
918
- print_summary(result, size, size);
919
- }
920
-
921
- void Benchmark::run_utf8_length_from_utf16be(
922
- const simdutf::implementation &implementation, size_t iterations) {
923
- const char16_t *data = reinterpret_cast<const char16_t *>(input_data.data());
924
- const size_t size = input_data.size() / 2;
925
- volatile size_t sink{0};
926
-
927
- auto proc = [&implementation, data, size, &sink]() {
928
- sink = implementation.utf8_length_from_utf16be(data, size);
929
- };
930
- count_events(proc, iterations); // warming up!
931
- const auto result = count_events(proc, iterations);
932
- print_summary(result, size, size);
933
- }
934
-
935
- void Benchmark::run_utf8_length_from_utf16le_with_replacement(
936
- const simdutf::implementation &implementation, size_t iterations) {
937
- const char16_t *data = reinterpret_cast<const char16_t *>(input_data.data());
938
- const size_t size = input_data.size() / 2;
939
- volatile size_t sink{0};
940
-
941
- auto proc = [&implementation, data, size, &sink]() {
942
- auto r =
943
- implementation.utf8_length_from_utf16le_with_replacement(data, size);
944
- sink = r.count;
945
- };
946
- count_events(proc, iterations); // warming up!
947
- const auto result = count_events(proc, iterations);
948
- print_summary(result, size, size);
949
- }
950
-
951
- void Benchmark::run_utf8_length_from_utf16be_with_replacement(
952
- const simdutf::implementation &implementation, size_t iterations) {
953
- const char16_t *data = reinterpret_cast<const char16_t *>(input_data.data());
954
- const size_t size = input_data.size() / 2;
955
- volatile size_t sink{0};
956
-
957
- auto proc = [&implementation, data, size, &sink]() {
958
- auto r =
959
- implementation.utf8_length_from_utf16be_with_replacement(data, size);
960
- sink = r.count;
961
- };
962
- count_events(proc, iterations); // warming up!
963
- const auto result = count_events(proc, iterations);
964
- print_summary(result, size, size);
965
- }
966
-
967
- void Benchmark::run_utf8_length_from_utf32(
968
- const simdutf::implementation &implementation, size_t iterations) {
969
- const char32_t *data = reinterpret_cast<const char32_t *>(input_data.data());
970
- const size_t size = input_data.size() / 4;
971
- volatile size_t sink{0};
972
-
973
- auto proc = [&implementation, data, size, &sink]() {
974
- sink = implementation.utf8_length_from_utf32(data, size);
975
- };
976
- count_events(proc, iterations); // warming up!
977
- const auto result = count_events(proc, iterations);
978
- print_summary(result, size, size);
979
- }
980
-
981
- void Benchmark::run_to_well_formed_utf16le(
982
- const simdutf::implementation &implementation, size_t iterations) {
983
- const char16_t *data = reinterpret_cast<const char16_t *>(input_data.data());
984
- const size_t size = input_data.size() / 2;
985
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
986
- auto proc = [&implementation, data, size, &output_buffer]() {
987
- implementation.to_well_formed_utf16le(data, size, output_buffer.get());
988
- };
989
- count_events(proc, iterations); // warming up!
990
- const auto result = count_events(proc, iterations);
991
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
992
- print_summary(result, input_data.size(), char_count);
993
- }
994
-
995
- void Benchmark::run_utf16_length_from_utf8(
996
- const simdutf::implementation &implementation, size_t iterations) {
997
- const char *data = reinterpret_cast<const char *>(input_data.data());
998
- const size_t size = input_data.size() / 4;
999
- volatile size_t sink{0};
1000
-
1001
- auto proc = [&implementation, data, size, &sink]() {
1002
- sink = implementation.utf16_length_from_utf8(data, size);
1003
- };
1004
- count_events(proc, iterations); // warming up!
1005
- const auto result = count_events(proc, iterations);
1006
- print_summary(result, size, size);
1007
- }
1008
-
1009
- static inline uint32_t portable_popcount(uint64_t v) {
1010
- #ifdef __GNUC__
1011
- return static_cast<uint32_t>(__builtin_popcountll(v));
1012
- #elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 && \
1013
- !defined(_M_ARM64)
1014
- return static_cast<uint32_t>(__popcnt64(static_cast<__int64>(v)));
1015
- #else
1016
- v = v - ((v >> 1) & 0x5555555555555555);
1017
- v = (v & 0x3333333333333333) + ((v >> 2) & 0x3333333333333333);
1018
- v = ((v + (v >> 4)) & 0x0F0F0F0F0F0F0F0F);
1019
- return static_cast<uint32_t>((v * (0x0101010101010101)) >> 56);
1020
- #endif
1021
- }
1022
-
1023
- void Benchmark::run_utf8_length_from_latin1_node(size_t iterations) {
1024
- const char *data = reinterpret_cast<const char *>(input_data.data());
1025
- const size_t size = input_data.size();
1026
- volatile size_t sink{0};
1027
-
1028
- auto proc = [data, size, &sink]() {
1029
- // from https://github.com/nodejs/node/pull/54345
1030
- uint32_t length = size;
1031
- uint32_t result = length;
1032
- uint32_t i = 0;
1033
- const auto length8 = length & ~0x7;
1034
- while (i < length8) {
1035
- // Original PR used std::popcount, but it is not available pre-C++20.
1036
- result += portable_popcount(
1037
- *reinterpret_cast<const uint64_t *>(data + i) & 0x8080808080808080);
1038
- i += 8;
1039
- }
1040
- while (i < length) {
1041
- result += (data[i] >> 7);
1042
- i++;
1043
- }
1044
- sink = result;
1045
- };
1046
- count_events(proc, iterations); // warming up!
1047
- const auto result = count_events(proc, iterations);
1048
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1049
- std::cerr << "The output is zero which might indicate an error.\n";
1050
- }
1051
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1052
- print_summary(result, size, char_count);
1053
- }
1054
-
1055
- void Benchmark::run_convert_utf8_to_latin1(
1056
- const simdutf::implementation &implementation, size_t iterations) {
1057
- const char *data = reinterpret_cast<const char *>(input_data.data());
1058
- const size_t size = input_data.size();
1059
- std::unique_ptr<char[]> output_buffer{new char[size]};
1060
- volatile size_t sink{0};
1061
-
1062
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1063
- sink =
1064
- implementation.convert_utf8_to_latin1(data, size, output_buffer.get());
1065
- };
1066
- count_events(proc, iterations); // warming up!
1067
- const auto result = count_events(proc, iterations);
1068
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1069
- std::cerr << "The output is zero which might indicate an error.\n";
1070
- }
1071
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1072
- print_summary(result, size, char_count);
1073
- }
1074
-
1075
- void Benchmark::run_convert_utf8_to_latin1_with_errors(
1076
- const simdutf::implementation &implementation, size_t iterations) {
1077
- const char *data = reinterpret_cast<const char *>(input_data.data());
1078
- const size_t size = input_data.size();
1079
- std::unique_ptr<char[]> output_buffer{new char[size]};
1080
- volatile bool sink{false};
1081
-
1082
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1083
- result res = implementation.convert_utf8_to_latin1_with_errors(
1084
- data, size, output_buffer.get());
1085
- sink = !(res.error);
1086
- };
1087
- count_events(proc, iterations); // warming up!
1088
- const auto result = count_events(proc, iterations);
1089
- if ((sink == false) && (iterations > 0)) {
1090
- std::cerr << "The input was declared invalid.\n";
1091
- }
1092
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1093
- print_summary(result, size, char_count);
1094
- }
1095
-
1096
- void Benchmark::run_convert_valid_utf8_to_latin1(
1097
- const simdutf::implementation &implementation, size_t iterations) {
1098
- const char *data = reinterpret_cast<const char *>(input_data.data());
1099
- const size_t size = input_data.size();
1100
- std::unique_ptr<char[]> output_buffer{new char[size]};
1101
- volatile size_t sink{0};
1102
-
1103
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1104
- sink = implementation.convert_valid_utf8_to_latin1(data, size,
1105
- output_buffer.get());
1106
- };
1107
- count_events(proc, iterations); // warming up!
1108
- const auto result = count_events(proc, iterations);
1109
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1110
- std::cerr << "The output is zero which might indicate an error.\n";
1111
- }
1112
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1113
- print_summary(result, size, char_count);
1114
- }
1115
-
1116
- void Benchmark::run_convert_utf8_to_utf16le(
1117
- const simdutf::implementation &implementation, size_t iterations) {
1118
- const char *data = reinterpret_cast<const char *>(input_data.data());
1119
- const size_t size = input_data.size();
1120
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
1121
- volatile size_t sink{0};
1122
-
1123
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1124
- sink =
1125
- implementation.convert_utf8_to_utf16le(data, size, output_buffer.get());
1126
- };
1127
- count_events(proc, iterations); // warming up!
1128
- const auto result = count_events(proc, iterations);
1129
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1130
- std::cerr << "The output is zero which might indicate an error.\n";
1131
- }
1132
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1133
- print_summary(result, size, char_count);
1134
- }
1135
-
1136
- void Benchmark::run_convert_utf8_to_utf16le_with_errors(
1137
- const simdutf::implementation &implementation, size_t iterations) {
1138
- const char *data = reinterpret_cast<const char *>(input_data.data());
1139
- const size_t size = input_data.size();
1140
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
1141
- volatile bool sink{false};
1142
-
1143
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1144
- result res = implementation.convert_utf8_to_utf16le_with_errors(
1145
- data, size, output_buffer.get());
1146
- sink = !(res.error);
1147
- };
1148
- count_events(proc, iterations); // warming up!
1149
- const auto result = count_events(proc, iterations);
1150
- if ((sink == false) && (iterations > 0)) {
1151
- std::cerr << "The input was declared invalid.\n";
1152
- }
1153
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1154
- print_summary(result, size, char_count);
1155
- }
1156
-
1157
- void Benchmark::run_convert_utf8_to_utf32(
1158
- const simdutf::implementation &implementation, size_t iterations) {
1159
- const char *data = reinterpret_cast<const char *>(input_data.data());
1160
- const size_t size = input_data.size();
1161
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
1162
- volatile size_t sink{0};
1163
-
1164
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1165
- sink =
1166
- implementation.convert_utf8_to_utf32(data, size, output_buffer.get());
1167
- };
1168
- count_events(proc, iterations); // warming up!
1169
- const auto result = count_events(proc, iterations);
1170
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1171
- std::cerr << "The output is zero which might indicate an error.\n";
1172
- }
1173
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1174
- print_summary(result, size, char_count);
1175
- }
1176
-
1177
- void Benchmark::run_convert_utf8_to_utf32_with_errors(
1178
- const simdutf::implementation &implementation, size_t iterations) {
1179
- const char *data = reinterpret_cast<const char *>(input_data.data());
1180
- const size_t size = input_data.size();
1181
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
1182
- volatile bool sink{false};
1183
-
1184
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
1185
- result res = implementation.convert_utf8_to_utf32_with_errors(
1186
- data, size, output_buffer.get());
1187
- sink = !(res.error);
1188
- };
1189
- count_events(proc, iterations); // warming up!
1190
- const auto result = count_events(proc, iterations);
1191
- if ((sink == false) && (iterations > 0)) {
1192
- std::cerr << "The input was declared invalid.\n";
1193
- }
1194
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1195
- print_summary(result, size, char_count);
1196
- }
1197
-
1198
- void Benchmark::run_convert_utf8_to_utf16le_with_dynamic_allocation(
1199
- const simdutf::implementation &implementation, size_t iterations) {
1200
- const char *data = reinterpret_cast<const char *>(input_data.data());
1201
- const size_t size = input_data.size();
1202
- volatile size_t sink{0};
1203
- auto proc = [&implementation, data, size, &sink]() {
1204
- auto dyn_size = implementation.utf16_length_from_utf8(data, size);
1205
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[dyn_size]};
1206
- sink =
1207
- implementation.convert_utf8_to_utf16le(data, size, output_buffer.get());
1208
- };
1209
- count_events(proc, iterations); // warming up!
1210
- const auto result = count_events(proc, iterations);
1211
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1212
- std::cerr << "The output is zero which might indicate an error.\n";
1213
- }
1214
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1215
- print_summary(result, size, char_count);
1216
- }
1217
-
1218
- void Benchmark::run_convert_utf8_to_utf32_with_dynamic_allocation(
1219
- const simdutf::implementation &implementation, size_t iterations) {
1220
- const char *data = reinterpret_cast<const char *>(input_data.data());
1221
- const size_t size = input_data.size();
1222
- volatile size_t sink{0};
1223
- auto proc = [&implementation, data, size, &sink]() {
1224
- auto dyn_size = implementation.utf32_length_from_utf8(data, size);
1225
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[dyn_size]};
1226
- sink =
1227
- implementation.convert_utf8_to_utf32(data, size, output_buffer.get());
1228
- };
1229
- count_events(proc, iterations); // warming up!
1230
- const auto result = count_events(proc, iterations);
1231
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1232
- std::cerr << "The output is zero which might indicate an error.\n";
1233
- }
1234
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1235
- print_summary(result, size, char_count);
1236
- }
1237
-
1238
- #ifdef ICU_AVAILABLE
1239
-
1240
- void Benchmark::run_convert_latin1_to_utf8_icu(size_t iterations) {
1241
- const char *data = reinterpret_cast<const char *>(input_data.data());
1242
- const size_t size = input_data.size();
1243
- volatile size_t sink{0};
1244
-
1245
- // Allocate target buffer
1246
- int32_t targetCapacity = size * 2;
1247
- std::unique_ptr<char[]> target(new char[targetCapacity]);
1248
-
1249
- auto proc = [data, size, &sink, &target, targetCapacity]() {
1250
- UErrorCode status = U_ZERO_ERROR;
1251
-
1252
- // Open converters for source and target encodings
1253
- UConverter *latin1conv = ucnv_open("ISO-8859-1", &status);
1254
- assert(U_SUCCESS(status));
1255
- UConverter *utf8conv = ucnv_open("UTF-8", &status);
1256
- assert(U_SUCCESS(status));
1257
-
1258
- // Pointers for source and target
1259
- const char *source = data;
1260
- const char *sourceLimit = data + size;
1261
- char *targetStart = target.get();
1262
- char *targetLimit = target.get() + targetCapacity;
1263
-
1264
- // Convert from ISO-8859-1 to UTF-8
1265
- ucnv_convertEx(utf8conv, latin1conv, &targetStart, targetLimit, &source,
1266
- sourceLimit, nullptr, nullptr, nullptr, nullptr, true, true,
1267
- &status);
1268
- assert(U_SUCCESS(status));
1269
-
1270
- // Calculate the output size
1271
- sink = targetStart - target.get();
1272
-
1273
- // Clean up
1274
- ucnv_close(utf8conv);
1275
- ucnv_close(latin1conv);
1276
- };
1277
-
1278
- count_events(proc, iterations); // warming up!
1279
- const auto result = count_events(proc, iterations);
1280
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1281
- std::cerr
1282
- << "The output is zero which might indicate a misconfiguration.\n";
1283
- }
1284
- size_t char_count = size;
1285
- std::unique_ptr<char[]> output_buffer{new char[size * 2]};
1286
- size_t expected = get_active_implementation()->convert_latin1_to_utf8(
1287
- data, size, output_buffer.get());
1288
- if (expected != sink) {
1289
- std::cerr << "The number of characters outputted does not match.\n";
1290
- std::cout << "Expected: " << expected << ", Sink: " << sink
1291
- << std::endl; // print values
1292
- }
1293
-
1294
- if (memcmp(target.get(), output_buffer.get(), sink) != 0) {
1295
- std::cerr << "The output data does not match.\n";
1296
- }
1297
-
1298
- print_summary(result, size, char_count);
1299
- }
1300
-
1301
- void Benchmark::run_convert_latin1_to_utf16_icu(size_t iterations) {
1302
- const char *data = reinterpret_cast<const char *>(input_data.data());
1303
- const size_t size = input_data.size();
1304
- volatile size_t sink{0};
1305
-
1306
- // Allocate target buffer outside lambda
1307
- std::unique_ptr<UChar[]> target(new UChar[size * 2]);
1308
-
1309
- auto proc = [data, size, &sink, &target]() {
1310
- UErrorCode status = U_ZERO_ERROR;
1311
-
1312
- // Open converter for source encoding
1313
- UConverter *latin1conv = ucnv_open("ISO-8859-1", &status);
1314
- assert(U_SUCCESS(status));
1315
-
1316
- // Convert from ISO-8859-1 to UTF-16 directly
1317
- int32_t actualTargetSize =
1318
- ucnv_toUChars(latin1conv, target.get(), size * 2, data, size, &status);
1319
- assert(U_SUCCESS(status));
1320
-
1321
- // Calculate the output size in bytes
1322
- sink = actualTargetSize * sizeof(UChar);
1323
-
1324
- // Clean up
1325
- ucnv_close(latin1conv);
1326
- };
1327
-
1328
- count_events(proc, iterations); // warming up!
1329
- const auto result = count_events(proc, iterations);
1330
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1331
- std::cerr
1332
- << "The output is zero which might indicate a misconfiguration.\n";
1333
- }
1334
- size_t char_count = size;
1335
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
1336
- size_t expected = get_active_implementation()->convert_latin1_to_utf16le(
1337
- data, size, output_buffer.get()); // expected char16_t units
1338
- if (2 * expected != sink) {
1339
- std::cerr << "The number of utf16le code units does not match.\n";
1340
- std::cerr << "Expected: " << 2 * expected + 1 << ", Sink: " << sink
1341
- << std::endl; // print values
1342
- }
1343
-
1344
- if (memcmp(target.get(), output_buffer.get(), sink) != 0) {
1345
- std::cerr << "The output data does not match.\n";
1346
- // compare first 20 characters and print their hexadecimal values
1347
- std::cout << "First 20 characters of target data: ";
1348
- for (size_t i = 0; i < 20; i++) {
1349
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1350
- }
1351
- std::cout << "\nFirst 20 characters of output buffer: ";
1352
- for (size_t i = 0; i < 20; i++) {
1353
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1354
- }
1355
-
1356
- // compare last 20 characters and print their hexadecimal values
1357
- size_t num_chars = sink / sizeof(UChar);
1358
- size_t start = num_chars < 20 ? 0 : num_chars - 20;
1359
- std::cout << "\nLast 20 characters of target data: ";
1360
- for (size_t i = start; i < num_chars; i++) {
1361
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1362
- }
1363
- std::cout << "\nLast 20 characters of output buffer: ";
1364
- for (size_t i = start; i < num_chars; i++) {
1365
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1366
- }
1367
- }
1368
-
1369
- print_summary(result, size, char_count);
1370
- }
1371
-
1372
- void Benchmark::run_convert_latin1_to_utf32_icu(size_t iterations) {
1373
- const char *data = reinterpret_cast<const char *>(input_data.data());
1374
- const size_t size = input_data.size();
1375
- volatile size_t sink{0};
1376
-
1377
- std::unique_ptr<char[]> target;
1378
-
1379
- auto proc = [&target, data, size, &sink]() {
1380
- UErrorCode status = U_ZERO_ERROR;
1381
-
1382
- // Open converters for source and target encodings
1383
- UConverter *latin1conv = ucnv_open("ISO-8859-1", &status);
1384
- assert(U_SUCCESS(status));
1385
- UConverter *utf32conv = ucnv_open("UTF-32LE", &status);
1386
- assert(U_SUCCESS(status));
1387
-
1388
- // Allocate target buffer
1389
- int32_t targetCapacity = size * 4; // UTF-32 takes four bytes.
1390
- target.reset(new char[targetCapacity]);
1391
-
1392
- // Pointers for source and target
1393
- const char *source = data;
1394
- const char *sourceLimit = data + size;
1395
- char *targetStart = target.get();
1396
- char *targetLimit = target.get() + targetCapacity;
1397
-
1398
- // Convert from ISO-8859-1 to UTF-32
1399
- ucnv_convertEx(utf32conv, latin1conv, &targetStart, targetLimit, &source,
1400
- sourceLimit, nullptr, nullptr, nullptr, nullptr, true, true,
1401
- &status);
1402
- assert(U_SUCCESS(status));
1403
-
1404
- // Calculate the output size in bytes
1405
- sink = targetStart - target.get();
1406
-
1407
- // Clean up
1408
- ucnv_close(utf32conv);
1409
- ucnv_close(latin1conv);
1410
- };
1411
-
1412
- count_events(proc, iterations); // warming up!
1413
- const auto result = count_events(proc, iterations);
1414
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1415
- std::cerr
1416
- << "The output is zero which might indicate a misconfiguration.\n";
1417
- }
1418
- size_t char_count = size;
1419
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
1420
- size_t expected = get_active_implementation()->convert_latin1_to_utf32(
1421
- data, size, output_buffer.get()); // expected is the # of UTF32 characters
1422
- if (4 * expected != sink) {
1423
- std::cerr
1424
- << "The number of characters outputted does not match.\n"; // each UTF32
1425
- // character
1426
- // takes four
1427
- // bytes
1428
- std::cout << "Expected: " << expected << ", Sink: " << sink
1429
- << std::endl; // print values
1430
- }
1431
-
1432
- if (memcmp(target.get(), output_buffer.get(), sink) != 0) {
1433
- std::cerr << "The output data does not match.\n";
1434
- // compare first 20 characters and print their hexadecimal values
1435
- std::cout << "First 20 characters of target data: ";
1436
- for (size_t i = 0; i < 20; i++) {
1437
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1438
- }
1439
- std::cout << "\nFirst 20 characters of output buffer: ";
1440
- for (size_t i = 0; i < 20; i++) {
1441
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1442
- }
1443
-
1444
- // compare last 20 characters and print their hexadecimal values
1445
- size_t num_chars = sink / sizeof(UChar);
1446
- size_t start = num_chars < 20 ? 0 : num_chars - 20;
1447
- std::cout << "\nLast 20 characters of target data: ";
1448
- for (size_t i = start; i < num_chars; i++) {
1449
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1450
- }
1451
- std::cout << "\nLast 20 characters of output buffer: ";
1452
- for (size_t i = start; i < num_chars; i++) {
1453
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1454
- }
1455
- }
1456
-
1457
- print_summary(result, size, char_count);
1458
- }
1459
-
1460
- void Benchmark::run_convert_utf8_to_latin1_icu(size_t iterations) {
1461
- const char *data = reinterpret_cast<const char *>(input_data.data());
1462
- const size_t size = input_data.size();
1463
- volatile size_t sink{0};
1464
-
1465
- std::unique_ptr<char[]> target;
1466
-
1467
- auto proc = [&target, data, size, &sink]() {
1468
- UErrorCode status = U_ZERO_ERROR;
1469
-
1470
- // Open converters for source and target encodings
1471
- UConverter *utf8conv = ucnv_open("UTF-8", &status);
1472
- assert(U_SUCCESS(status));
1473
- UConverter *latin1conv = ucnv_open("ISO-8859-1", &status);
1474
- assert(U_SUCCESS(status));
1475
-
1476
- // Allocate target buffer
1477
- int32_t targetCapacity = size * 2;
1478
- target.reset(new char[targetCapacity]);
1479
-
1480
- // Pointers for source and target
1481
- const char *source = data;
1482
- const char *sourceLimit = data + size;
1483
- char *targetStart = target.get();
1484
- char *targetLimit = target.get() + targetCapacity;
1485
-
1486
- // Convert from ISO-8859-1 to UTF-8
1487
- ucnv_convertEx(latin1conv, utf8conv, &targetStart, targetLimit, &source,
1488
- sourceLimit, nullptr, nullptr, nullptr, nullptr, true, true,
1489
- &status);
1490
- assert(U_SUCCESS(status));
1491
-
1492
- // Calculate the output size
1493
- sink = targetStart - target.get();
1494
-
1495
- // Clean up
1496
- ucnv_close(utf8conv);
1497
- ucnv_close(latin1conv);
1498
- };
1499
-
1500
- count_events(proc, iterations); // warming up!
1501
- const auto result = count_events(proc, iterations);
1502
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1503
- std::cerr
1504
- << "The output is zero which might indicate a misconfiguration.\n";
1505
- }
1506
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1507
- std::unique_ptr<char[]> output_buffer{new char[size]};
1508
- size_t expected = get_active_implementation()->convert_utf8_to_latin1(
1509
- data, size, output_buffer.get());
1510
- if (expected != sink) {
1511
- std::cerr << "The number of latin1 code units does not match.\n";
1512
- }
1513
-
1514
- if (memcmp(target.get(), output_buffer.get(), sink) != 0) {
1515
- std::cerr << "The output data does not match.\n";
1516
- // compare first 20 characters and print their hexadecimal values
1517
- std::cout << "First 20 characters of target data: ";
1518
- for (size_t i = 0; i < 20; i++) {
1519
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1520
- }
1521
- std::cout << "\nFirst 20 characters of output buffer: ";
1522
- for (size_t i = 0; i < 20; i++) {
1523
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1524
- }
1525
- }
1526
-
1527
- print_summary(result, size, char_count);
1528
- }
1529
-
1530
- void Benchmark::run_convert_utf8_to_utf16_icu(size_t iterations) {
1531
- const char *data = reinterpret_cast<const char *>(input_data.data());
1532
- const size_t size = input_data.size();
1533
- volatile size_t sink{0};
1534
- auto proc = [data, size, &sink]() {
1535
- auto str =
1536
- U_ICU_NAMESPACE::UnicodeString::fromUTF8(std::string_view(data, size));
1537
- sink = str.length();
1538
- };
1539
- count_events(proc, iterations); // warming up!
1540
- const auto result = count_events(proc, iterations);
1541
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1542
- std::cerr
1543
- << "The output is zero which might indicate a misconfiguration.\n";
1544
- }
1545
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1546
- // checking
1547
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
1548
- size_t expected = convert_utf8_to_utf16le(data, size, output_buffer.get());
1549
- if (expected != sink) {
1550
- std::cerr << "The number of UTF-16 code units does not match.\n";
1551
- }
1552
- print_summary(result, size, char_count);
1553
- }
1554
- void Benchmark::run_convert_utf16_to_utf8_icu(size_t iterations) {
1555
- const simdutf::encoding_type bom =
1556
- BOM::check_bom(input_data.data(), input_data.size());
1557
- const char16_t *data = reinterpret_cast<const char16_t *>(
1558
- input_data.data() + BOM::bom_byte_size(bom));
1559
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
1560
- if (size % 2 != 0) {
1561
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
1562
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
1563
- printf(" Running function on truncated input.\n");
1564
- }
1565
- size /= 2;
1566
- volatile size_t sink{0};
1567
-
1568
- auto proc = [data, size, &sink]() {
1569
- U_ICU_NAMESPACE::UnicodeString str(data, size);
1570
- std::string out;
1571
- out = str.toUTF8String(out);
1572
- sink = out.size();
1573
- };
1574
- count_events(proc, iterations); // warming up!
1575
- const auto result = count_events(proc, iterations);
1576
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1577
- std::cerr << "The output is zero which might indicate an error.\n";
1578
- }
1579
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
1580
- print_summary(result, input_data.size(), char_count);
1581
- }
1582
-
1583
- void Benchmark::run_convert_utf16_to_latin1_icu(size_t iterations) {
1584
- const simdutf::encoding_type bom =
1585
- BOM::check_bom(input_data.data(), input_data.size());
1586
- const char16_t *data = reinterpret_cast<const char16_t *>(
1587
- input_data.data() + BOM::bom_byte_size(bom));
1588
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
1589
- if (size % 2 != 0) {
1590
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
1591
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
1592
- printf(" Running function on truncated input.\n");
1593
- }
1594
- size /= 2;
1595
- volatile size_t sink{0};
1596
-
1597
- std::unique_ptr<char[]> target;
1598
-
1599
- auto proc = [&target, data, size, &sink]() {
1600
- UErrorCode status = U_ZERO_ERROR;
1601
- UConverter *conv =
1602
- ucnv_open("ISO-8859-1", &status); // open a converter for ISO-8859-1
1603
- assert(U_SUCCESS(status));
1604
-
1605
- int32_t targetCapacity = size; // adjust as needed
1606
- target.reset(new char[targetCapacity]);
1607
- char *targetStart = target.get();
1608
-
1609
- sink =
1610
- ucnv_fromUChars(conv, targetStart, targetCapacity,
1611
- reinterpret_cast<const UChar *>(data), size, &status);
1612
- assert(U_SUCCESS(status));
1613
-
1614
- // Clean up
1615
- ucnv_close(conv);
1616
- };
1617
-
1618
- count_events(proc, iterations); // warming up!
1619
- const auto result = count_events(proc, iterations);
1620
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1621
- std::cerr
1622
- << "The output is zero which might indicate a misconfiguration.\n";
1623
- }
1624
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
1625
- std::unique_ptr<char[]> output_buffer{new char[size]};
1626
- size_t expected = get_active_implementation()->convert_utf16le_to_latin1(
1627
- data, size, output_buffer.get());
1628
- if (expected != sink) {
1629
- std::cerr << "The number of expected bytes does not match.\n";
1630
- std::cout << "Expected: " << expected << ", Sink: " << sink
1631
- << std::endl; // print values
1632
- }
1633
-
1634
- if (memcmp(target.get(), output_buffer.get(), sink) != 0) {
1635
- std::cerr << "The output data does not match.\n";
1636
- // compare first 20 characters and print their hexadecimal values
1637
- std::cout << "First 20 characters of target data: ";
1638
- for (size_t i = 0; i < 20; i++) {
1639
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1640
- }
1641
- std::cout << "\nFirst 20 characters of output buffer: ";
1642
- for (size_t i = 0; i < 20; i++) {
1643
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1644
- }
1645
- }
1646
-
1647
- print_summary(result, input_data.size(), char_count);
1648
- }
1649
-
1650
- void Benchmark::run_convert_utf32_to_latin1_icu(size_t iterations) {
1651
- const simdutf::encoding_type bom =
1652
- BOM::check_bom(input_data.data(), input_data.size());
1653
- const char32_t *data = reinterpret_cast<const char32_t *>(
1654
- input_data.data() + BOM::bom_byte_size(bom));
1655
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
1656
- if (size % 4 != 0) {
1657
- printf(
1658
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
1659
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
1660
- printf(" Running function on truncated input.\n");
1661
- }
1662
-
1663
- size /= 4;
1664
- volatile size_t sink{0};
1665
- std::unique_ptr<char[]> target;
1666
-
1667
- auto proc = [&target, data, size, &sink]() {
1668
- UErrorCode status = U_ZERO_ERROR;
1669
-
1670
- UConverter *utf32conv =
1671
- ucnv_open("UTF-32LE", &status); // create a UTF-32 converter
1672
- assert(U_SUCCESS(status));
1673
-
1674
- UConverter *latin1conv =
1675
- ucnv_open("ISO-8859-1", &status); // create a Latin1 converter
1676
- assert(U_SUCCESS(status));
1677
-
1678
- int32_t targetCapacity = size; // adjust as needed
1679
- target.reset(new char[targetCapacity]);
1680
- char *targetStart = target.get();
1681
-
1682
- const char *sourceStart = reinterpret_cast<const char *>(data);
1683
- const char *sourceEnd = sourceStart + size * sizeof(char32_t);
1684
-
1685
- // Convert from UTF-32 to Latin1
1686
- ucnv_convertEx(latin1conv, utf32conv, &targetStart,
1687
- targetStart + targetCapacity, &sourceStart, sourceEnd,
1688
- nullptr, nullptr, nullptr, nullptr, true, true, &status);
1689
- assert(U_SUCCESS(status));
1690
-
1691
- // Calculate the output size
1692
- sink = targetStart - target.get();
1693
-
1694
- // Clean up
1695
- ucnv_close(utf32conv);
1696
- ucnv_close(latin1conv);
1697
- };
1698
-
1699
- count_events(proc, iterations); // warming up!
1700
- const auto result = count_events(proc, iterations);
1701
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1702
- std::cerr
1703
- << "The output is zero which might indicate a misconfiguration.\n";
1704
- }
1705
- size_t char_count = size;
1706
- std::unique_ptr<char[]> output_buffer{new char[size]};
1707
- size_t expected = get_active_implementation()->convert_utf32_to_latin1(
1708
- data, size, output_buffer.get());
1709
- if (expected != sink) {
1710
- std::cerr << "The number of expected bytes does not match.\n";
1711
- std::cout << "Expected: " << expected << ", Sink: " << sink
1712
- << std::endl; // print values
1713
- }
1714
-
1715
- if (memcmp(target.get(), output_buffer.get(), sink) != 0) {
1716
- std::cerr << "The output data does not match.\n";
1717
- // compare first 20 characters and print their hexadecimal values
1718
- std::cout << "First 20 characters of target data: ";
1719
- for (size_t i = 0; i < 20; i++) {
1720
- std::cout << std::hex << static_cast<int>(target.get()[i]) << " ";
1721
- }
1722
- std::cout << "\nFirst 20 characters of output buffer: ";
1723
- for (size_t i = 0; i < 20; i++) {
1724
- std::cout << std::hex << static_cast<int>(output_buffer[i]) << " ";
1725
- }
1726
- }
1727
-
1728
- print_summary(result, input_data.size(), char_count);
1729
- }
1730
-
1731
- #endif
1732
-
1733
- #ifdef ICONV_AVAILABLE
1734
- void Benchmark::run_convert_latin1_to_utf8_iconv(size_t iterations) {
1735
- iconv_t cv = iconv_open("UTF-8", "ISO-8859-1");
1736
- if (cv == (iconv_t)(-1)) {
1737
- fprintf(stderr,
1738
- "[iconv] cannot initialize ISO-8859-1 to UTF-8 converter\n");
1739
- return;
1740
- }
1741
- char *data = reinterpret_cast<char *>(input_data.data());
1742
- const size_t size = input_data.size();
1743
- std::unique_ptr<char[]> output_buffer{new char[size * 2]}; // 2 for safety
1744
- volatile size_t sink{0};
1745
- auto proc = [&cv, data, size, &output_buffer, &sink]() {
1746
- size_t inbytes = size;
1747
- size_t outbytes = sizeof(uint8_t) * size * 2;
1748
- #ifdef WINICONV_CONST
1749
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
1750
- #else
1751
- char *inptr = data;
1752
- #endif
1753
- char *outptr = reinterpret_cast<char *>(output_buffer.get());
1754
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
1755
- if (result == static_cast<size_t>(-1)) {
1756
- sink = 0;
1757
- } else {
1758
- sink = (sizeof(uint8_t) * size - outbytes) / sizeof(char);
1759
- }
1760
- };
1761
- count_events(proc, iterations); // warming up!
1762
- const auto result = count_events(proc, iterations);
1763
- iconv_close(cv);
1764
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1765
- std::cerr << "The output is zero which might indicate an error.\n";
1766
- }
1767
- size_t char_count = size;
1768
- print_summary(result, size, char_count);
1769
- }
1770
-
1771
- void Benchmark::run_convert_latin1_to_utf16_iconv(size_t iterations) {
1772
- iconv_t cv = iconv_open("UTF-16", "ISO-8859-1");
1773
- if (cv == (iconv_t)(-1)) {
1774
- fprintf(stderr,
1775
- "[iconv] cannot initialize ISO-8859-1 to UTF-16 converter\n");
1776
- return;
1777
- }
1778
- char *data = reinterpret_cast<char *>(input_data.data());
1779
- const size_t size = input_data.size();
1780
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
1781
- volatile size_t sink{0};
1782
- auto proc = [&cv, data, size, &output_buffer, &sink]() {
1783
- size_t inbytes = size;
1784
- size_t outbytes = sizeof(uint16_t) * size;
1785
- #ifdef WINICONV_CONST
1786
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
1787
- #else
1788
- char *inptr = data;
1789
- #endif
1790
- char *outptr = reinterpret_cast<char *>(output_buffer.get());
1791
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
1792
- if (result == static_cast<size_t>(-1)) {
1793
- sink = 0;
1794
- } else {
1795
- sink = (sizeof(uint16_t) * size - outbytes) / sizeof(char);
1796
- }
1797
- };
1798
- count_events(proc, iterations); // warming up!
1799
- const auto result = count_events(proc, iterations);
1800
- iconv_close(cv);
1801
- size_t char_count = size;
1802
- print_summary(result, size, char_count);
1803
- }
1804
-
1805
- void Benchmark::run_convert_latin1_to_utf32_iconv(size_t iterations) {
1806
- iconv_t cv = iconv_open("UTF-32LE", "ISO-8859-1");
1807
- if (cv == (iconv_t)(-1)) {
1808
- fprintf(stderr,
1809
- "[iconv] cannot initialize ISO-8859-1 to UTF-32 converter\n");
1810
- return;
1811
- }
1812
- char *data = reinterpret_cast<char *>(input_data.data());
1813
- const size_t size = input_data.size();
1814
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
1815
- volatile size_t sink{0};
1816
- auto proc = [&cv, data, size, &output_buffer, &sink]() {
1817
- size_t inbytes = size;
1818
- size_t outbytes = sizeof(uint32_t) * size;
1819
- #ifdef WINICONV_CONST
1820
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
1821
- #else
1822
- char *inptr = data;
1823
- #endif
1824
- char *outptr = reinterpret_cast<char *>(output_buffer.get());
1825
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
1826
- if (result == static_cast<size_t>(-1)) {
1827
- sink = 0;
1828
- } else {
1829
- sink = (sizeof(uint32_t) * size - outbytes) / sizeof(char);
1830
- ;
1831
- }
1832
- };
1833
- count_events(proc, iterations); // warming up!
1834
- const auto result = count_events(proc, iterations);
1835
- iconv_close(cv);
1836
- size_t char_count = size;
1837
- print_summary(result, size, char_count);
1838
- }
1839
-
1840
- void Benchmark::run_convert_utf8_to_latin1_iconv(size_t iterations) {
1841
- iconv_t cv = iconv_open("ISO-8859-1", "UTF-8");
1842
- if (cv == (iconv_t)(-1)) {
1843
- fprintf(stderr, "[iconv] cannot initialize UTF-8 to Latin1 converter\n");
1844
- return;
1845
- }
1846
- char *data = reinterpret_cast<char *>(input_data.data());
1847
- const size_t size = input_data.size();
1848
- std::unique_ptr<char[]> output_buffer{new char[size]};
1849
- volatile size_t sink{0};
1850
-
1851
- auto proc = [&cv, data, size, &output_buffer, &sink]() {
1852
- size_t inbytes = size;
1853
- size_t outbytes = sizeof(uint8_t) * size;
1854
- // win-iconv includes WINICONV_CONST in its function signatures
1855
- // https://github.com/simdutf/simdutf/pull/178
1856
- #ifdef WINICONV_CONST
1857
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
1858
- #else
1859
- char *inptr = data;
1860
- #endif
1861
- char *outptr = reinterpret_cast<char *>(output_buffer.get());
1862
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
1863
- if (result == static_cast<size_t>(-1)) {
1864
- sink = 0;
1865
- } else {
1866
- sink = (sizeof(uint8_t) * size - outbytes) / sizeof(char);
1867
- ;
1868
- }
1869
- };
1870
- count_events(proc, iterations); // warming up!
1871
- const auto result = count_events(proc, iterations);
1872
- iconv_close(cv);
1873
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1874
- std::cerr << "The output is zero which might indicate an error.\n";
1875
- }
1876
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1877
- print_summary(result, size, char_count);
1878
- }
1879
-
1880
- void Benchmark::run_convert_utf8_to_utf16_iconv(size_t iterations) {
1881
- iconv_t cv = iconv_open("UTF-16LE", "UTF-8");
1882
- if (cv == (iconv_t)(-1)) {
1883
- fprintf(stderr, "[iconv] cannot initialize UTF-8 to UTF-16LE converter\n");
1884
- return;
1885
- }
1886
- char *data = reinterpret_cast<char *>(input_data.data());
1887
- const size_t size = input_data.size();
1888
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
1889
- volatile size_t sink{0};
1890
-
1891
- auto proc = [&cv, data, size, &output_buffer, &sink]() {
1892
- size_t inbytes = size;
1893
- size_t outbytes = sizeof(uint16_t) * size;
1894
- // win-iconv includes WINICONV_CONST in its function signatures
1895
- // https://github.com/simdutf/simdutf/pull/178
1896
- #ifdef WINICONV_CONST
1897
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
1898
- #else
1899
- char *inptr = data;
1900
- #endif
1901
- char *outptr = reinterpret_cast<char *>(output_buffer.get());
1902
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
1903
- if (result == static_cast<size_t>(-1)) {
1904
- sink = 0;
1905
- } else {
1906
- sink = (sizeof(uint16_t) * size - outbytes) / sizeof(char);
1907
- ;
1908
- }
1909
- };
1910
- count_events(proc, iterations); // warming up!
1911
- const auto result = count_events(proc, iterations);
1912
- iconv_close(cv);
1913
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1914
- std::cerr << "The output is zero which might indicate an error.\n";
1915
- }
1916
- size_t char_count = get_active_implementation()->count_utf8(data, size);
1917
- print_summary(result, size, char_count);
1918
- }
1919
-
1920
- void Benchmark::run_convert_utf16_to_latin1_iconv(size_t iterations) {
1921
- iconv_t cv = iconv_open("ISO-8859-1", "UTF-16LE");
1922
- if (cv == (iconv_t)(-1)) {
1923
- fprintf(stderr,
1924
- "[iconv] cannot initialize the UTF-16LE to ISO-8859-1 converter\n");
1925
- return;
1926
- }
1927
- const simdutf::encoding_type bom =
1928
- BOM::check_bom(input_data.data(), input_data.size());
1929
- char16_t *data =
1930
- reinterpret_cast<char16_t *>(input_data.data() + BOM::bom_byte_size(bom));
1931
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
1932
- if (size % 2 != 0) {
1933
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
1934
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
1935
- printf(" Running function on truncated input.\n");
1936
- }
1937
-
1938
- size /= 2;
1939
-
1940
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
1941
- // yields 4 bytes,
1942
- // thus we're making safe assumption that each 16-bit word will be
1943
- // expanded to four bytes.
1944
- std::unique_ptr<char[]> output_buffer{new char[size]};
1945
-
1946
- volatile size_t sink{0};
1947
-
1948
- auto proc = [cv, data, size, &output_buffer, &sink]() {
1949
- size_t inbytes = sizeof(uint16_t) * size;
1950
- size_t outbytes = size;
1951
- #ifdef WINICONV_CONST
1952
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
1953
- #else
1954
- char *inptr = reinterpret_cast<char *>(data);
1955
- #endif
1956
- char *outptr = output_buffer.get();
1957
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
1958
- if (result == static_cast<size_t>(-1)) {
1959
- sink = 0;
1960
- } else {
1961
- sink = (size - outbytes) / sizeof(char16_t);
1962
- }
1963
- };
1964
- count_events(proc, iterations); // warming up!
1965
- const auto result = count_events(proc, iterations);
1966
- iconv_close(cv);
1967
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
1968
- std::cerr << "The output is zero which might indicate an error.\n";
1969
- }
1970
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
1971
- print_summary(result, input_data.size(), char_count);
1972
- }
1973
-
1974
- void Benchmark::run_convert_utf16_to_utf8_iconv(size_t iterations) {
1975
- iconv_t cv = iconv_open("UTF-8", "UTF-16LE");
1976
- if (cv == (iconv_t)(-1)) {
1977
- fprintf(stderr,
1978
- "[iconv] cannot initialize the UTF-16LE to UTF-8 converter\n");
1979
- return;
1980
- }
1981
- const simdutf::encoding_type bom =
1982
- BOM::check_bom(input_data.data(), input_data.size());
1983
- char16_t *data =
1984
- reinterpret_cast<char16_t *>(input_data.data() + BOM::bom_byte_size(bom));
1985
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
1986
- if (size % 2 != 0) {
1987
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
1988
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
1989
- printf(" Running function on truncated input.\n");
1990
- }
1991
-
1992
- size /= 2;
1993
-
1994
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
1995
- // yields 4 bytes,
1996
- // thus we're making safe assumption that each 16-bit word will be
1997
- // expanded to four bytes.
1998
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
1999
-
2000
- volatile size_t sink{0};
2001
-
2002
- auto proc = [cv, data, size, &output_buffer, &sink]() {
2003
- size_t inbytes = sizeof(uint16_t) * size;
2004
- size_t outbytes = 4 * size;
2005
- #ifdef WINICONV_CONST
2006
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
2007
- #else
2008
- char *inptr = reinterpret_cast<char *>(data);
2009
- #endif
2010
- char *outptr = output_buffer.get();
2011
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
2012
- if (result == static_cast<size_t>(-1)) {
2013
- sink = 0;
2014
- } else {
2015
- sink = (4 * size - outbytes) / sizeof(char16_t);
2016
- }
2017
- };
2018
- count_events(proc, iterations); // warming up!
2019
- const auto result = count_events(proc, iterations);
2020
- iconv_close(cv);
2021
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2022
- std::cerr << "The output is zero which might indicate an error.\n";
2023
- }
2024
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2025
- print_summary(result, input_data.size(), char_count);
2026
- }
2027
-
2028
- void Benchmark::run_convert_utf32_to_latin1_iconv(size_t iterations) {
2029
- iconv_t cv = iconv_open("ISO-8859-1", "UTF-32LE");
2030
- if (cv == (iconv_t)(-1)) {
2031
- fprintf(stderr,
2032
- "[iconv] cannot initialize the UTF-32 to ISO-8859-1 converter\n");
2033
- return;
2034
- }
2035
- const simdutf::encoding_type bom =
2036
- BOM::check_bom(input_data.data(), input_data.size());
2037
- char32_t *data =
2038
- reinterpret_cast<char32_t *>(input_data.data() + BOM::bom_byte_size(bom));
2039
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2040
- if (size % 4 != 0) {
2041
- printf(
2042
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
2043
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2044
- printf(" Running function on truncated input.\n");
2045
- }
2046
-
2047
- size /= 4;
2048
-
2049
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
2050
- // yields 4 bytes,
2051
- // thus we're making safe assumption that each 16-bit word will be
2052
- // expanded to four bytes.
2053
- std::unique_ptr<char[]> output_buffer{new char[size]};
2054
-
2055
- volatile size_t sink{0};
2056
-
2057
- auto proc = [cv, data, size, &output_buffer, &sink]() {
2058
- size_t inbytes = sizeof(uint32_t) * size;
2059
- size_t outbytes = size;
2060
- #ifdef WINICONV_CONST
2061
- WINICONV_CONST char *inptr = reinterpret_cast<WINICONV_CONST char *>(data);
2062
- #else
2063
- char *inptr = reinterpret_cast<char *>(data);
2064
- #endif
2065
- char *outptr = output_buffer.get();
2066
- size_t result = iconv(cv, &inptr, &inbytes, &outptr, &outbytes);
2067
- if (result == static_cast<size_t>(-1)) {
2068
- sink = 0;
2069
- abort();
2070
- } else {
2071
- sink = (size - outbytes) / sizeof(char32_t);
2072
- }
2073
- };
2074
- count_events(proc, iterations); // warming up!
2075
- const auto result = count_events(proc, iterations);
2076
- iconv_close(cv);
2077
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2078
- std::cerr << "The output is zero which might indicate an error.\n";
2079
- }
2080
- size_t char_count = size;
2081
- print_summary(result, input_data.size(), char_count);
2082
- }
2083
- #endif
2084
-
2085
- #ifdef INOUE2008
2086
- void Benchmark::run_convert_valid_utf8_to_utf16_inoue2008(size_t iterations) {
2087
- // Inoue2008 is only up to 3-byte UTF8 sequence.
2088
- for (uint8_t c : input_data) {
2089
- if (c >= 0b11110000) {
2090
- std::cerr << "Warning: Inoue 2008 does not support 4-byte inputs!"
2091
- << std::endl;
2092
- break;
2093
- }
2094
- }
2095
- // This is currently minimally tested. It is possible that the transcoding
2096
- // could be wrong. It is also unsafe: it could fail in disastrous ways if the
2097
- // input is adversarial.
2098
- const char *data = reinterpret_cast<const char *>(input_data.data());
2099
- const size_t size = input_data.size();
2100
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
2101
- volatile size_t sink{0};
2102
- auto proc = [data, size, &output_buffer, &sink]() {
2103
- sink = inoue2008::convert_valid(data, size, output_buffer.get());
2104
- };
2105
- count_events(proc, iterations); // warming up!
2106
- const auto result = count_events(proc, iterations);
2107
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2108
- std::cerr
2109
- << "The output is zero which might indicate a misconfiguration.\n";
2110
- }
2111
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2112
- print_summary(result, size, char_count);
2113
- }
2114
- #endif
2115
- /**
2116
- * Bjoern Hoehrmann
2117
- * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
2118
- */
2119
- void Benchmark::run_convert_utf8_to_utf16_hoehrmann(size_t iterations) {
2120
- uint8_t const *data = input_data.data();
2121
- const size_t size = input_data.size();
2122
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
2123
- volatile size_t sink{0};
2124
- auto proc = [data, size, &output_buffer, &sink]() {
2125
- sink = hoehrmann::toUtf16(data, size, output_buffer.get());
2126
- };
2127
- count_events(proc, iterations); // warming up!
2128
- const auto result = count_events(proc, iterations);
2129
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2130
- std::cerr << "The output is zero which might indicate an error.\n";
2131
- }
2132
- size_t char_count = get_active_implementation()->count_utf8(
2133
- reinterpret_cast<const char *>(data), size);
2134
- print_summary(result, size, char_count);
2135
- }
2136
- /**
2137
- * Bjoern Hoehrmann
2138
- * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
2139
- */
2140
- void Benchmark::run_convert_utf8_to_utf32_hoehrmann(size_t iterations) {
2141
- uint8_t const *data = input_data.data();
2142
- const size_t size = input_data.size();
2143
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
2144
- volatile size_t sink{0};
2145
- auto proc = [data, size, &output_buffer, &sink]() {
2146
- sink = hoehrmann::toUtf32(data, size, output_buffer.get());
2147
- };
2148
- count_events(proc, iterations); // warming up!
2149
- const auto result = count_events(proc, iterations);
2150
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2151
- std::cerr << "The output is zero which might indicate an error.\n";
2152
- }
2153
- size_t char_count = get_active_implementation()->count_utf8(
2154
- reinterpret_cast<const char *>(data), size);
2155
- print_summary(result, size, char_count);
2156
- }
2157
-
2158
- #ifdef __x86_64__
2159
- /**
2160
- * utf8lut: Vectorized UTF-8 converter.
2161
- * by stgatilov (2019)
2162
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2163
- */
2164
- void Benchmark::run_convert_utf16_to_utf8_utf8lut(size_t iterations) {
2165
- const simdutf::encoding_type bom =
2166
- BOM::check_bom(input_data.data(), input_data.size());
2167
- const char16_t *data = reinterpret_cast<const char16_t *>(
2168
- input_data.data() + BOM::bom_byte_size(bom));
2169
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2170
- if (size % 2 != 0) {
2171
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2172
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2173
- printf(" Running function on truncated input.\n");
2174
- }
2175
-
2176
- size /= 2;
2177
-
2178
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
2179
- // yields 4 bytes,
2180
- // thus we're making safe assumption that each 16-bit word will be
2181
- // expanded to four bytes.
2182
- // utf8lut requires an extra 16 bytes of padding.
2183
- std::unique_ptr<char[]> output_buffer{new char[size * 4 + 16]};
2184
-
2185
- volatile size_t sink{0};
2186
-
2187
- auto proc = [data, size, &output_buffer, &sink]() {
2188
- std::unique_ptr<BaseBufferProcessor> processor(
2189
- ProcessorSelector<dfUtf16, dfUtf8>::WithOptions<cmValidate>::Create());
2190
- ConversionResult result = ConvertInMemory(
2191
- *processor, reinterpret_cast<const char *>(data), 2 * size,
2192
- reinterpret_cast<char *>(output_buffer.get()), size * 4 + 16);
2193
- if (result.status != 0) {
2194
- sink = 0;
2195
- } else {
2196
- sink = result.outputSize;
2197
- }
2198
- };
2199
- count_events(proc, iterations); // warming up!
2200
- const auto result = count_events(proc, iterations);
2201
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2202
- std::cerr << "The output is zero which might indicate an error.\n";
2203
- }
2204
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2205
- print_summary(result, input_data.size(), char_count);
2206
- }
2207
- /**
2208
- * utf8lut: Vectorized UTF-8 converter.
2209
- * by stgatilov (2019)
2210
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2211
- */
2212
- void Benchmark::run_convert_valid_utf16_to_utf8_utf8lut(size_t iterations) {
2213
- const simdutf::encoding_type bom =
2214
- BOM::check_bom(input_data.data(), input_data.size());
2215
- const char16_t *data = reinterpret_cast<const char16_t *>(
2216
- input_data.data() + BOM::bom_byte_size(bom));
2217
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2218
- if (size % 2 != 0) {
2219
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2220
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2221
- printf(" Running function on truncated input.\n");
2222
- }
2223
-
2224
- size /= 2;
2225
-
2226
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
2227
- // yields 4 bytes,
2228
- // thus we're making safe assumption that each 16-bit word will be
2229
- // expanded to four bytes.
2230
- // utf8lut requires an extra 16 bytes of padding.
2231
- std::unique_ptr<char[]> output_buffer{new char[size * 4 + 16]};
2232
-
2233
- volatile size_t sink{0};
2234
-
2235
- auto proc = [data, size, &output_buffer, &sink]() {
2236
- std::unique_ptr<BaseBufferProcessor> processor(
2237
- ProcessorSelector<dfUtf16, dfUtf8>::WithOptions<cmFull>::Create());
2238
- ConversionResult result = ConvertInMemory(
2239
- *processor, reinterpret_cast<const char *>(data), 2 * size,
2240
- reinterpret_cast<char *>(output_buffer.get()), size * 4 + 16);
2241
- if (result.status != 0) {
2242
- sink = 0;
2243
- } else {
2244
- sink = result.outputSize;
2245
- }
2246
- };
2247
- count_events(proc, iterations); // warming up!
2248
- const auto result = count_events(proc, iterations);
2249
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2250
- std::cerr << "The output is zero which might indicate an error.\n";
2251
- }
2252
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2253
- print_summary(result, input_data.size(), char_count);
2254
- }
2255
- /**
2256
- * utf8lut: Vectorized UTF-8 converter.
2257
- * by stgatilov (2019)
2258
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2259
- */
2260
- void Benchmark::run_convert_utf8_to_utf16_utf8lut(size_t iterations) {
2261
- const char *data = reinterpret_cast<const char *>(input_data.data());
2262
- const size_t size = input_data.size();
2263
- // utf8lut requires an extra 8 bytes of padding.
2264
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size * 2 + 8]};
2265
- volatile size_t sink{0};
2266
- auto proc = [data, size, &output_buffer, &sink]() {
2267
- std::unique_ptr<BaseBufferProcessor> processor(
2268
- ProcessorSelector<dfUtf8, dfUtf16>::WithOptions<cmValidate>::Create());
2269
- ConversionResult result = ConvertInMemory(
2270
- *processor, data, size, reinterpret_cast<char *>(output_buffer.get()),
2271
- size * 2 + 16);
2272
- if (result.status != 0) {
2273
- sink = 0;
2274
- } else {
2275
- sink = result.outputSize / 2;
2276
- }
2277
- };
2278
- count_events(proc, iterations); // warming up!
2279
- const auto result = count_events(proc, iterations);
2280
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2281
- std::cerr
2282
- << "The output is zero which might indicate a misconfiguration.\n";
2283
- }
2284
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2285
- print_summary(result, size, char_count);
2286
- }
2287
- /**
2288
- * utf8lut: Vectorized UTF-8 converter.
2289
- * by stgatilov (2019)
2290
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2291
- */
2292
- void Benchmark::run_convert_utf8_to_utf32_utf8lut(size_t iterations) {
2293
- const char *data = reinterpret_cast<const char *>(input_data.data());
2294
- const size_t size = input_data.size();
2295
-
2296
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size + 4]};
2297
- volatile size_t sink{0};
2298
- auto proc = [data, size, &output_buffer, &sink]() {
2299
- std::unique_ptr<BaseBufferProcessor> processor(
2300
- ProcessorSelector<dfUtf8, dfUtf32>::WithOptions<cmValidate>::Create());
2301
- ConversionResult result = ConvertInMemory(
2302
- *processor, data, size, reinterpret_cast<char *>(output_buffer.get()),
2303
- size * 4 + 16);
2304
- if (result.status != 0) {
2305
- sink = 0;
2306
- } else {
2307
- sink = result.outputSize / 2;
2308
- }
2309
- };
2310
- count_events(proc, iterations); // warming up!
2311
- const auto result = count_events(proc, iterations);
2312
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2313
- std::cerr
2314
- << "The output is zero which might indicate a misconfiguration.\n";
2315
- }
2316
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2317
- print_summary(result, size, char_count);
2318
- }
2319
- /**
2320
- * utf8lut: Vectorized UTF-8 converter.
2321
- * by stgatilov (2019)
2322
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2323
- */
2324
- void Benchmark::run_convert_valid_utf8_to_utf16_utf8lut(size_t iterations) {
2325
- const char *data = reinterpret_cast<const char *>(input_data.data());
2326
- const size_t size = input_data.size();
2327
- // utf8lut requires an extra 8 bytes of padding.
2328
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size * 2 + 8]};
2329
- volatile size_t sink{0};
2330
- auto proc = [data, size, &output_buffer, &sink]() {
2331
- std::unique_ptr<BaseBufferProcessor> processor(
2332
- ProcessorSelector<dfUtf8, dfUtf16>::WithOptions<cmFull>::Create());
2333
- ConversionResult result = ConvertInMemory(
2334
- *processor, data, size, reinterpret_cast<char *>(output_buffer.get()),
2335
- size * 2 + 16);
2336
- if (result.status != 0) {
2337
- sink = 0;
2338
- } else {
2339
- sink = result.outputSize / 2;
2340
- }
2341
- };
2342
- count_events(proc, iterations); // warming up!
2343
- const auto result = count_events(proc, iterations);
2344
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2345
- std::cerr
2346
- << "The output is zero which might indicate a misconfiguration.\n";
2347
- }
2348
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2349
- print_summary(result, size, char_count);
2350
- }
2351
-
2352
- /**
2353
- * utf8lut: Vectorized UTF-8 converter.
2354
- * by stgatilov (2019)
2355
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2356
- */
2357
- void Benchmark::run_convert_utf32_to_utf8_utf8lut(size_t iterations) {
2358
- const simdutf::encoding_type bom =
2359
- BOM::check_bom(input_data.data(), input_data.size());
2360
- const char32_t *data = reinterpret_cast<const char32_t *>(
2361
- input_data.data() + BOM::bom_byte_size(bom));
2362
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2363
- if (size % 4 != 0) {
2364
- printf(
2365
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
2366
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2367
- printf(" Running function on truncated input.\n");
2368
- }
2369
-
2370
- size /= 4;
2371
-
2372
- // Note: a single 32-bit word can yield up to four UTF-8 bytes. We are
2373
- // making a safe assumption that each 32-bit word will yield four
2374
- // UTF-8 bytes.
2375
- // utf8lut requires an extra 16 bytes of padding.
2376
- std::unique_ptr<char[]> output_buffer{new char[size * 4 + 16]};
2377
-
2378
- volatile size_t sink{0};
2379
-
2380
- auto proc = [data, size, &output_buffer, &sink]() {
2381
- std::unique_ptr<BaseBufferProcessor> processor(
2382
- ProcessorSelector<dfUtf32, dfUtf8>::WithOptions<cmValidate>::Create());
2383
- ConversionResult result = ConvertInMemory(
2384
- *processor, reinterpret_cast<const char *>(data), 4 * size,
2385
- reinterpret_cast<char *>(output_buffer.get()), size * 4 + 16);
2386
- if (result.status != 0) {
2387
- sink = 0;
2388
- } else {
2389
- sink = result.outputSize;
2390
- }
2391
- };
2392
- count_events(proc, iterations); // warming up!
2393
- const auto result = count_events(proc, iterations);
2394
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2395
- std::cerr << "The output is zero which might indicate an error.\n";
2396
- }
2397
- size_t char_count = size;
2398
- print_summary(result, input_data.size(), char_count);
2399
- }
2400
-
2401
- /**
2402
- * utf8lut: Vectorized UTF-8 converter.
2403
- * by stgatilov (2019)
2404
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2405
- */
2406
- void Benchmark::run_convert_valid_utf8_to_utf32_utf8lut(size_t iterations) {
2407
- const char *data = reinterpret_cast<const char *>(input_data.data());
2408
- const size_t size = input_data.size();
2409
-
2410
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size + 4]};
2411
- volatile size_t sink{0};
2412
- auto proc = [data, size, &output_buffer, &sink]() {
2413
- std::unique_ptr<BaseBufferProcessor> processor(
2414
- ProcessorSelector<dfUtf8, dfUtf32>::WithOptions<cmFull>::Create());
2415
- ConversionResult result = ConvertInMemory(
2416
- *processor, data, size, reinterpret_cast<char *>(output_buffer.get()),
2417
- size * 4 + 16);
2418
- if (result.status != 0) {
2419
- sink = 0;
2420
- } else {
2421
- sink = result.outputSize / 2;
2422
- }
2423
- };
2424
- count_events(proc, iterations); // warming up!
2425
- const auto result = count_events(proc, iterations);
2426
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2427
- std::cerr << "The output is zero which might indicate an error.\n";
2428
- }
2429
- size_t char_count = size;
2430
- print_summary(result, input_data.size(), char_count);
2431
- }
2432
- /**
2433
- * utf8lut: Vectorized UTF-8 converter.
2434
- * by stgatilov (2019)
2435
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2436
- */
2437
- /**
2438
- * utf8lut: Vectorized UTF-8 converter.
2439
- * by stgatilov (2019)
2440
- * https://dirtyhandscoding.github.io/posts/utf8lut-vectorized-utf-8-converter-introduction.html
2441
- */
2442
- void Benchmark::run_convert_valid_utf32_to_utf8_utf8lut(size_t iterations) {
2443
- const simdutf::encoding_type bom =
2444
- BOM::check_bom(input_data.data(), input_data.size());
2445
- const char32_t *data = reinterpret_cast<const char32_t *>(
2446
- input_data.data() + BOM::bom_byte_size(bom));
2447
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2448
- if (size % 4 != 0) {
2449
- printf(
2450
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
2451
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2452
- printf(" Running function on truncated input.\n");
2453
- }
2454
-
2455
- size /= 4;
2456
-
2457
- // Note: a single 32-bit word can yield up to four UTF-8 bytes. We are
2458
- // making a safe assumption that each 32-bit word will yield four
2459
- // UTF-8 bytes.
2460
- // utf8lut requires an extra 16 bytes of padding.
2461
- std::unique_ptr<char[]> output_buffer{new char[size * 4 + 16]};
2462
-
2463
- volatile size_t sink{0};
2464
-
2465
- auto proc = [data, size, &output_buffer, &sink]() {
2466
- std::unique_ptr<BaseBufferProcessor> processor(
2467
- ProcessorSelector<dfUtf32, dfUtf8>::WithOptions<cmFull>::Create());
2468
- ConversionResult result = ConvertInMemory(
2469
- *processor, reinterpret_cast<const char *>(data), 4 * size,
2470
- reinterpret_cast<char *>(output_buffer.get()), size * 4 + 16);
2471
- if (result.status != 0) {
2472
- sink = 0;
2473
- } else {
2474
- sink = result.outputSize;
2475
- }
2476
- };
2477
- count_events(proc, iterations); // warming up!
2478
- const auto result = count_events(proc, iterations);
2479
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2480
- std::cerr << "The output is zero which might indicate an error.\n";
2481
- }
2482
- size_t char_count = size;
2483
- print_summary(result, input_data.size(), char_count);
2484
- }
2485
- /**
2486
- * Bob Steagall, CppCon2018
2487
- * https://github.com/BobSteagall/CppCon2018/
2488
- *
2489
- * Fast Conversion From UTF-8 with C++, DFAs, and SSE Intrinsics
2490
- * https://www.youtube.com/watch?v=5FQ87-Ecb-A
2491
- */
2492
- void Benchmark::run_convert_utf8_to_utf16_cppcon2018(size_t iterations) {
2493
- using char8_t = unsigned char;
2494
- const char8_t *data = reinterpret_cast<const char8_t *>(input_data.data());
2495
- const size_t size = input_data.size();
2496
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
2497
- volatile size_t sink{0};
2498
- auto proc = [data, size, &output_buffer, &sink]() {
2499
- sink = uu::UtfUtils::SseConvert(data, data + size, output_buffer.get());
2500
- };
2501
- count_events(proc, iterations); // warming up!
2502
- const auto result = count_events(proc, iterations);
2503
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2504
- std::cerr
2505
- << "The output is zero which might indicate a misconfiguration.\n";
2506
- }
2507
- size_t char_count = get_active_implementation()->count_utf8(
2508
- reinterpret_cast<const char *>(data), size);
2509
- print_summary(result, size, char_count);
2510
- }
2511
- /**
2512
- * Bob Steagall, CppCon2018
2513
- * https://github.com/BobSteagall/CppCon2018/
2514
- *
2515
- * Fast Conversion From UTF-8 with C++, DFAs, and SSE Intrinsics
2516
- * https://www.youtube.com/watch?v=5FQ87-Ecb-A
2517
- */
2518
- void Benchmark::run_convert_utf8_to_utf32_cppcon2018(size_t iterations) {
2519
- using char8_t = unsigned char;
2520
- const char8_t *data = reinterpret_cast<const char8_t *>(input_data.data());
2521
- const size_t size = input_data.size();
2522
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
2523
- volatile size_t sink{0};
2524
- auto proc = [data, size, &output_buffer, &sink]() {
2525
- sink = uu::UtfUtils::SseConvert(data, data + size, output_buffer.get());
2526
- };
2527
- count_events(proc, iterations); // warming up!
2528
- const auto result = count_events(proc, iterations);
2529
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2530
- std::cerr
2531
- << "The output is zero which might indicate a misconfiguration.\n";
2532
- }
2533
- size_t char_count = get_active_implementation()->count_utf8(
2534
- reinterpret_cast<const char *>(data), size);
2535
- print_summary(result, size, char_count);
2536
- }
2537
- /**
2538
- * Cameron, Robert D, A case study in SIMD text processing with parallel bit
2539
- * streams: UTF-8 to UTF-16 transcoding, Proceedings of the 13th ACM SIGPLAN
2540
- * Symposium on Principles and practice of parallel programming, 91--98.
2541
- */
2542
- void Benchmark::run_convert_utf8_to_utf16_u8u16(size_t iterations) {
2543
- // u8u16 wants to take mutable chars, let us hope it does not actually mutate
2544
- // anything!
2545
- //
2546
- // This is currently untested. At a glance it looks fine, but
2547
- // it is possible that the transcoding could be wrong.
2548
- char *data = reinterpret_cast<char *>(input_data.data());
2549
- const size_t size = input_data.size();
2550
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
2551
- volatile size_t sink{0};
2552
- auto proc = [data, size, &output_buffer, &sink]() {
2553
- char *srcbuf_ptr = data;
2554
- size_t inbytes_left = size;
2555
- char *trgtbuf_ptr = reinterpret_cast<char *>(output_buffer.get());
2556
- size_t outbytes_left = size * sizeof(char16_t);
2557
- size_t result_code =
2558
- u8u16(&srcbuf_ptr, &inbytes_left, &trgtbuf_ptr, &outbytes_left);
2559
- bool is_ok = (result_code != size_t(-1));
2560
- if (is_ok) {
2561
- sink = (reinterpret_cast<char16_t *>(trgtbuf_ptr) - output_buffer.get());
2562
- } else {
2563
- sink = 0;
2564
- }
2565
- };
2566
- count_events(proc, iterations); // warming up!
2567
- const auto result = count_events(proc, iterations);
2568
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2569
- std::cerr
2570
- << "The output is zero which might indicate a misconfiguration.\n";
2571
- }
2572
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2573
- print_summary(result, size, char_count);
2574
- }
2575
-
2576
- /**
2577
- * Olivier Goffart, UTF-8 processing using SIMD (SSE4), 2012.
2578
- * https://woboq.com/blog/utf-8-processing-using-simd.html
2579
- */
2580
- void Benchmark::run_convert_utf8_to_utf16_utf8sse4(size_t iterations) {
2581
- const char *data = reinterpret_cast<const char *>(input_data.data());
2582
- const size_t size = input_data.size();
2583
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
2584
- volatile size_t sink{0};
2585
- auto proc = [data, size, &output_buffer, &sink]() {
2586
- const char *srcbuf_ptr = data;
2587
- size_t inbytes_left = size;
2588
- char *trgtbuf_ptr = reinterpret_cast<char *>(output_buffer.get());
2589
- size_t outbytes_left = size * sizeof(char16_t);
2590
- size_t result_code = utf8sse4::fromUtf8(&srcbuf_ptr, &inbytes_left,
2591
- &trgtbuf_ptr, &outbytes_left);
2592
- bool is_ok = (result_code != size_t(-1));
2593
- if (is_ok) {
2594
- sink = (reinterpret_cast<char16_t *>(trgtbuf_ptr) - output_buffer.get());
2595
- } else {
2596
- sink = 0;
2597
- }
2598
- };
2599
- count_events(proc, iterations); // warming up!
2600
- const auto result = count_events(proc, iterations);
2601
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2602
- std::cerr
2603
- << "The output is zero which might indicate a misconfiguration.\n";
2604
- }
2605
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2606
- print_summary(result, size, char_count);
2607
- }
2608
- #endif
2609
-
2610
- void Benchmark::run_convert_valid_utf8_to_utf16le(
2611
- const simdutf::implementation &implementation, size_t iterations) {
2612
- const char *data = reinterpret_cast<const char *>(input_data.data());
2613
- const size_t size = input_data.size();
2614
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
2615
- volatile size_t sink{0};
2616
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2617
- sink = implementation.convert_valid_utf8_to_utf16le(data, size,
2618
- output_buffer.get());
2619
- };
2620
- count_events(proc, iterations); // warming up!
2621
- const auto result = count_events(proc, iterations);
2622
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2623
- std::cerr
2624
- << "The output is zero which might indicate a misconfiguration.\n";
2625
- }
2626
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2627
- print_summary(result, size, char_count);
2628
- }
2629
-
2630
- void Benchmark::run_convert_valid_utf8_to_utf32(
2631
- const simdutf::implementation &implementation, size_t iterations) {
2632
- const char *data = reinterpret_cast<const char *>(input_data.data());
2633
- const size_t size = input_data.size();
2634
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
2635
- volatile size_t sink{0};
2636
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2637
- sink = implementation.convert_valid_utf8_to_utf32(data, size,
2638
- output_buffer.get());
2639
- };
2640
- count_events(proc, iterations); // warming up!
2641
- const auto result = count_events(proc, iterations);
2642
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2643
- std::cerr
2644
- << "The output is zero which might indicate a misconfiguration.\n";
2645
- }
2646
- size_t char_count = get_active_implementation()->count_utf8(data, size);
2647
- print_summary(result, size, char_count);
2648
- }
2649
-
2650
- void Benchmark::run_convert_utf16le_to_latin1(
2651
- const simdutf::implementation &implementation, size_t iterations) {
2652
- const simdutf::encoding_type bom =
2653
- BOM::check_bom(input_data.data(), input_data.size());
2654
- const char16_t *data = reinterpret_cast<const char16_t *>(
2655
- input_data.data() + BOM::bom_byte_size(bom));
2656
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2657
- if (size % 2 != 0) {
2658
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2659
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2660
- printf(" Running function on truncated input.\n");
2661
- }
2662
-
2663
- size /= 2;
2664
- std::unique_ptr<char[]> output_buffer{new char[size]};
2665
- volatile size_t sink{0};
2666
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2667
- sink = implementation.convert_utf16le_to_latin1(data, size,
2668
- output_buffer.get());
2669
- };
2670
- count_events(proc, iterations); // warming up!
2671
- const auto result = count_events(proc, iterations);
2672
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2673
- std::cerr << "The output is zero which might indicate an error.\n";
2674
- }
2675
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2676
- print_summary(result, input_data.size(), char_count);
2677
- }
2678
-
2679
- void Benchmark::run_convert_utf16le_to_latin1_with_errors(
2680
- const simdutf::implementation &implementation, size_t iterations) {
2681
- const simdutf::encoding_type bom =
2682
- BOM::check_bom(input_data.data(), input_data.size());
2683
- const char16_t *data = reinterpret_cast<const char16_t *>(
2684
- input_data.data() + BOM::bom_byte_size(bom));
2685
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2686
- if (size % 2 != 0) {
2687
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2688
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2689
- printf(" Running function on truncated input.\n");
2690
- }
2691
-
2692
- size /= 2;
2693
- std::unique_ptr<char[]> output_buffer{new char[size]};
2694
- volatile bool sink{false};
2695
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2696
- result res = implementation.convert_utf16le_to_latin1_with_errors(
2697
- data, size, output_buffer.get());
2698
- sink = !(res.error);
2699
- };
2700
- count_events(proc, iterations); // warming up!
2701
- const auto result = count_events(proc, iterations);
2702
- if ((sink == false) && (iterations > 0)) {
2703
- std::cerr << "The input was declared invalid.\n";
2704
- }
2705
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2706
- print_summary(result, input_data.size(), char_count);
2707
- }
2708
-
2709
- void Benchmark::run_convert_valid_utf16le_to_latin1(
2710
- const simdutf::implementation &implementation, size_t iterations) {
2711
- const simdutf::encoding_type bom =
2712
- BOM::check_bom(input_data.data(), input_data.size());
2713
- const char16_t *data = reinterpret_cast<const char16_t *>(
2714
- input_data.data() + BOM::bom_byte_size(bom));
2715
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2716
- if (size % 2 != 0) {
2717
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2718
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2719
- printf(" Running function on truncated input.\n");
2720
- }
2721
-
2722
- size /= 2;
2723
- std::unique_ptr<char[]> output_buffer{new char[size]};
2724
- volatile size_t sink{0};
2725
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2726
- sink = implementation.convert_valid_utf16le_to_latin1(data, size,
2727
- output_buffer.get());
2728
- };
2729
- count_events(proc, iterations); // warming up!
2730
- const auto result = count_events(proc, iterations);
2731
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2732
- std::cerr << "The output is zero which might indicate an error.\n";
2733
- }
2734
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2735
- print_summary(result, input_data.size(), char_count);
2736
- }
2737
-
2738
- void Benchmark::run_convert_utf16_to_utf8_safe(
2739
- const simdutf::implementation &implementation, size_t iterations) {
2740
- const simdutf::implementation *active_implementation =
2741
- simdutf::get_active_implementation();
2742
- simdutf::get_active_implementation() =
2743
- &implementation; // set the active implementation
2744
- const simdutf::encoding_type bom =
2745
- BOM::check_bom(input_data.data(), input_data.size());
2746
- const char16_t *data = reinterpret_cast<const char16_t *>(
2747
- input_data.data() + BOM::bom_byte_size(bom));
2748
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2749
- if (size % 2 != 0) {
2750
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2751
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2752
- printf(" Running function on truncated input.\n");
2753
- }
2754
-
2755
- size /= 2;
2756
-
2757
- size_t budget = simdutf::utf8_length_from_utf16(data, size);
2758
-
2759
- std::unique_ptr<char[]> output_buffer{new char[budget]};
2760
-
2761
- volatile size_t sink{0};
2762
-
2763
- auto proc = [&implementation, data, size, &output_buffer, &sink, &budget]() {
2764
- sink = simdutf::convert_utf16_to_utf8_safe(data, size, output_buffer.get(),
2765
- budget);
2766
- };
2767
- count_events(proc, iterations); // warming up!
2768
- const auto result = count_events(proc, iterations);
2769
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2770
- std::cerr << "The output is zero which might indicate an error.\n";
2771
- }
2772
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2773
- print_summary(result, input_data.size(), char_count);
2774
- simdutf::get_active_implementation() =
2775
- active_implementation; // restore the active implementation
2776
- }
2777
-
2778
- void Benchmark::run_convert_utf16le_to_utf8(
2779
- const simdutf::implementation &implementation, size_t iterations) {
2780
- const simdutf::encoding_type bom =
2781
- BOM::check_bom(input_data.data(), input_data.size());
2782
- const char16_t *data = reinterpret_cast<const char16_t *>(
2783
- input_data.data() + BOM::bom_byte_size(bom));
2784
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2785
- if (size % 2 != 0) {
2786
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2787
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2788
- printf(" Running function on truncated input.\n");
2789
- }
2790
-
2791
- size /= 2;
2792
-
2793
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
2794
- // yields 4 bytes,
2795
- // thus we're making safe assumption that each 16-bit word will be
2796
- // expanded to four bytes.
2797
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
2798
-
2799
- volatile size_t sink{0};
2800
-
2801
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2802
- sink =
2803
- implementation.convert_utf16le_to_utf8(data, size, output_buffer.get());
2804
- };
2805
- count_events(proc, iterations); // warming up!
2806
- const auto result = count_events(proc, iterations);
2807
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2808
- std::cerr << "The output is zero which might indicate an error.\n";
2809
- }
2810
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2811
- print_summary(result, input_data.size(), char_count);
2812
- }
2813
-
2814
- void Benchmark::run_convert_utf16le_to_utf8_with_errors(
2815
- const simdutf::implementation &implementation, size_t iterations) {
2816
- const simdutf::encoding_type bom =
2817
- BOM::check_bom(input_data.data(), input_data.size());
2818
- const char16_t *data = reinterpret_cast<const char16_t *>(
2819
- input_data.data() + BOM::bom_byte_size(bom));
2820
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2821
- if (size % 2 != 0) {
2822
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2823
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2824
- printf(" Running function on truncated input.\n");
2825
- }
2826
-
2827
- size /= 2;
2828
-
2829
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
2830
- // yields 4 bytes,
2831
- // thus we're making safe assumption that each 16-bit word will be
2832
- // expanded to four bytes.
2833
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
2834
-
2835
- volatile bool sink{false};
2836
-
2837
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2838
- result res = implementation.convert_utf16le_to_utf8_with_errors(
2839
- data, size, output_buffer.get());
2840
- sink = !(res.error);
2841
- };
2842
- count_events(proc, iterations); // warming up!
2843
- const auto result = count_events(proc, iterations);
2844
- if ((sink == false) && (iterations > 0)) {
2845
- std::cerr << "The input was declared invalid.\n";
2846
- }
2847
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2848
- print_summary(result, input_data.size(), char_count);
2849
- }
2850
-
2851
- void Benchmark::run_convert_utf16le_to_utf32(
2852
- const simdutf::implementation &implementation, size_t iterations) {
2853
- const simdutf::encoding_type bom =
2854
- BOM::check_bom(input_data.data(), input_data.size());
2855
- const char16_t *data = reinterpret_cast<const char16_t *>(
2856
- input_data.data() + BOM::bom_byte_size(bom));
2857
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2858
- if (size % 2 != 0) {
2859
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2860
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2861
- printf(" Running function on truncated input.\n");
2862
- }
2863
-
2864
- size /= 2;
2865
-
2866
- // Note: all code units yield 4 bytes. We are making a safe assumption that
2867
- // all code units will be non-surrogate code units so the size would get
2868
- // doubled (16 bits -> 32 bits).
2869
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size * 2]};
2870
-
2871
- volatile size_t sink{0};
2872
-
2873
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2874
- sink = implementation.convert_utf16le_to_utf32(data, size,
2875
- output_buffer.get());
2876
- };
2877
- count_events(proc, iterations); // warming up!
2878
- const auto result = count_events(proc, iterations);
2879
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2880
- std::cerr << "The output is zero which might indicate an error.\n";
2881
- }
2882
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2883
- print_summary(result, input_data.size(), char_count);
2884
- }
2885
-
2886
- void Benchmark::run_convert_utf16le_to_utf32_with_errors(
2887
- const simdutf::implementation &implementation, size_t iterations) {
2888
- const simdutf::encoding_type bom =
2889
- BOM::check_bom(input_data.data(), input_data.size());
2890
- const char16_t *data = reinterpret_cast<const char16_t *>(
2891
- input_data.data() + BOM::bom_byte_size(bom));
2892
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2893
- if (size % 2 != 0) {
2894
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2895
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2896
- printf(" Running function on truncated input.\n");
2897
- }
2898
-
2899
- size /= 2;
2900
-
2901
- // Note: all code units yield 4 bytes. We are making a safe assumption that
2902
- // all code units will be non-surrogate code units so the size would get
2903
- // doubled (16 bits -> 32 bits).
2904
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size * 2]};
2905
-
2906
- volatile bool sink{false};
2907
-
2908
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
2909
- result res = implementation.convert_utf16le_to_utf32_with_errors(
2910
- data, size, output_buffer.get());
2911
- sink = !(res.error);
2912
- };
2913
- count_events(proc, iterations); // warming up!
2914
- const auto result = count_events(proc, iterations);
2915
- if ((sink == false) && (iterations > 0)) {
2916
- std::cerr << "The input was declared invalid.\n";
2917
- }
2918
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2919
- print_summary(result, input_data.size(), char_count);
2920
- }
2921
-
2922
- void Benchmark::run_convert_utf16le_to_utf8_with_dynamic_allocation(
2923
- const simdutf::implementation &implementation, size_t iterations) {
2924
- const simdutf::encoding_type bom =
2925
- BOM::check_bom(input_data.data(), input_data.size());
2926
- const char16_t *data = reinterpret_cast<const char16_t *>(
2927
- input_data.data() + BOM::bom_byte_size(bom));
2928
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2929
- if (size % 2 != 0) {
2930
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2931
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2932
- printf(" Running function on truncated input.\n");
2933
- }
2934
-
2935
- size /= 2;
2936
-
2937
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
2938
- // yields 4 bytes,
2939
- // thus we're making safe assumption that each 16-bit word will be
2940
- // expanded to four bytes.
2941
-
2942
- volatile size_t sink{0};
2943
-
2944
- auto proc = [&implementation, data, size, &sink]() {
2945
- auto dyn_size = implementation.utf8_length_from_utf16le(data, size);
2946
- std::unique_ptr<char[]> output_buffer{new char[dyn_size]};
2947
- sink =
2948
- implementation.convert_utf16le_to_utf8(data, size, output_buffer.get());
2949
- };
2950
- count_events(proc, iterations); // warming up!
2951
- const auto result = count_events(proc, iterations);
2952
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2953
- std::cerr << "The output is zero which might indicate an error.\n";
2954
- }
2955
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2956
- print_summary(result, input_data.size(), char_count);
2957
- }
2958
-
2959
- void Benchmark::run_convert_utf16le_to_utf32_with_dynamic_allocation(
2960
- const simdutf::implementation &implementation, size_t iterations) {
2961
- const simdutf::encoding_type bom =
2962
- BOM::check_bom(input_data.data(), input_data.size());
2963
- const char16_t *data = reinterpret_cast<const char16_t *>(
2964
- input_data.data() + BOM::bom_byte_size(bom));
2965
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
2966
- if (size % 2 != 0) {
2967
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
2968
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
2969
- printf(" Running function on truncated input.\n");
2970
- }
2971
-
2972
- size /= 2;
2973
-
2974
- // Note: all code units yield 4 bytes. We are making a safe assumption that
2975
- // all code units will be non-surrogate code units so the size would get
2976
- // doubled (16 bits -> 32 bits).
2977
-
2978
- volatile size_t sink{0};
2979
-
2980
- auto proc = [&implementation, data, size, &sink]() {
2981
- auto dyn_size = implementation.utf32_length_from_utf16le(data, size);
2982
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[dyn_size]};
2983
- sink = implementation.convert_utf16le_to_utf32(data, size,
2984
- output_buffer.get());
2985
- };
2986
- count_events(proc, iterations); // warming up!
2987
- const auto result = count_events(proc, iterations);
2988
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
2989
- std::cerr << "The output is zero which might indicate an error.\n";
2990
- }
2991
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
2992
- print_summary(result, input_data.size(), char_count);
2993
- }
2994
-
2995
- void Benchmark::run_convert_valid_utf16le_to_utf8(
2996
- const simdutf::implementation &implementation, size_t iterations) {
2997
- const simdutf::encoding_type bom =
2998
- BOM::check_bom(input_data.data(), input_data.size());
2999
- const char16_t *data = reinterpret_cast<const char16_t *>(
3000
- input_data.data() + BOM::bom_byte_size(bom));
3001
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3002
- if (size % 2 != 0) {
3003
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
3004
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3005
- printf(" Running function on truncated input.\n");
3006
- }
3007
-
3008
- size /= 2;
3009
-
3010
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
3011
- // yields 4 bytes,
3012
- // thus we're making safe assumption that each 16-bit word will be
3013
- // expanded to four bytes.
3014
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
3015
-
3016
- volatile size_t sink{0};
3017
-
3018
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3019
- sink = implementation.convert_valid_utf16le_to_utf8(data, size,
3020
- output_buffer.get());
3021
- };
3022
- count_events(proc, iterations); // warming up!
3023
- const auto result = count_events(proc, iterations);
3024
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3025
- std::cerr << "The output is zero which might indicate an error.\n";
3026
- }
3027
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
3028
- print_summary(result, input_data.size(), char_count);
3029
- }
3030
-
3031
- void Benchmark::run_convert_utf32_to_latin1(
3032
- const simdutf::implementation &implementation, size_t iterations) {
3033
- const simdutf::encoding_type bom =
3034
- BOM::check_bom(input_data.data(), input_data.size());
3035
- const char32_t *data = reinterpret_cast<const char32_t *>(
3036
- input_data.data() + BOM::bom_byte_size(bom));
3037
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3038
- if (size % 4 != 0) {
3039
- printf(
3040
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3041
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3042
- printf(" Running function on truncated input.\n");
3043
- }
3044
-
3045
- size /= 4;
3046
-
3047
- std::unique_ptr<char[]> output_buffer{new char[size]};
3048
- volatile size_t sink{0};
3049
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3050
- sink =
3051
- implementation.convert_utf32_to_latin1(data, size, output_buffer.get());
3052
- };
3053
- count_events(proc, iterations); // warming up!
3054
- const auto result = count_events(proc, iterations);
3055
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3056
- std::cerr << "The output is zero which might indicate an error.\n";
3057
- }
3058
- size_t char_count = size;
3059
- print_summary(result, input_data.size(), char_count);
3060
- }
3061
- void Benchmark::run_convert_utf32_to_latin1_with_errors(
3062
- const simdutf::implementation &implementation, size_t iterations) {
3063
- const simdutf::encoding_type bom =
3064
- BOM::check_bom(input_data.data(), input_data.size());
3065
- const char32_t *data = reinterpret_cast<const char32_t *>(
3066
- input_data.data() + BOM::bom_byte_size(bom));
3067
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3068
- if (size % 4 != 0) {
3069
- printf(
3070
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3071
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3072
- printf(" Running function on truncated input.\n");
3073
- }
3074
-
3075
- size /= 4;
3076
-
3077
- std::unique_ptr<char[]> output_buffer{new char[size]};
3078
- volatile bool sink{false};
3079
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3080
- result res = implementation.convert_utf32_to_latin1_with_errors(
3081
- data, size, output_buffer.get());
3082
- sink = !(res.error);
3083
- };
3084
- count_events(proc, iterations); // warming up!
3085
- const auto result = count_events(proc, iterations);
3086
- if ((sink == false) && (iterations > 0)) {
3087
- std::cerr << "The input was declared invalid.\n";
3088
- }
3089
- size_t char_count = size;
3090
- print_summary(result, input_data.size(), char_count);
3091
- }
3092
- void Benchmark::run_convert_valid_utf32_to_latin1(
3093
- const simdutf::implementation &implementation, size_t iterations) {
3094
- const simdutf::encoding_type bom =
3095
- BOM::check_bom(input_data.data(), input_data.size());
3096
- const char32_t *data = reinterpret_cast<const char32_t *>(
3097
- input_data.data() + BOM::bom_byte_size(bom));
3098
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3099
- if (size % 4 != 0) {
3100
- printf(
3101
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3102
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3103
- printf(" Running function on truncated input.\n");
3104
- }
3105
-
3106
- size /= 4;
3107
-
3108
- std::unique_ptr<char[]> output_buffer{new char[size]};
3109
- volatile size_t sink{0};
3110
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3111
- sink = implementation.convert_valid_utf32_to_latin1(data, size,
3112
- output_buffer.get());
3113
- };
3114
- count_events(proc, iterations); // warming up!
3115
- const auto result = count_events(proc, iterations);
3116
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3117
- std::cerr << "The output is zero which might indicate an error.\n";
3118
- }
3119
- size_t char_count = size;
3120
- print_summary(result, input_data.size(), char_count);
3121
- }
3122
-
3123
- void Benchmark::run_convert_utf32_to_utf8(
3124
- const simdutf::implementation &implementation, size_t iterations) {
3125
- const simdutf::encoding_type bom =
3126
- BOM::check_bom(input_data.data(), input_data.size());
3127
- const char32_t *data = reinterpret_cast<const char32_t *>(
3128
- input_data.data() + BOM::bom_byte_size(bom));
3129
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3130
- if (size % 4 != 0) {
3131
- printf(
3132
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3133
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3134
- printf(" Running function on truncated input.\n");
3135
- }
3136
-
3137
- size /= 4;
3138
-
3139
- // Note: In the "worst" case, a 32-bit word will yield 4 UTF-8 bytes. So, we
3140
- // are making a safe assumption that each word will produce 4 bytes.
3141
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
3142
-
3143
- volatile size_t sink{0};
3144
-
3145
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3146
- sink =
3147
- implementation.convert_utf32_to_utf8(data, size, output_buffer.get());
3148
- };
3149
- count_events(proc, iterations); // warming up!
3150
- const auto result = count_events(proc, iterations);
3151
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3152
- std::cerr << "The output is zero which might indicate an error.\n";
3153
- }
3154
- size_t char_count = size;
3155
- print_summary(result, input_data.size(), char_count);
3156
- }
3157
-
3158
- void Benchmark::run_convert_utf32_to_utf8_with_errors(
3159
- const simdutf::implementation &implementation, size_t iterations) {
3160
- const simdutf::encoding_type bom =
3161
- BOM::check_bom(input_data.data(), input_data.size());
3162
- const char32_t *data = reinterpret_cast<const char32_t *>(
3163
- input_data.data() + BOM::bom_byte_size(bom));
3164
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3165
- if (size % 4 != 0) {
3166
- printf(
3167
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3168
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3169
- printf(" Running function on truncated input.\n");
3170
- }
3171
-
3172
- size /= 4;
3173
-
3174
- // Note: In the "worst" case, a 32-bit word will yield 4 UTF-8 bytes. So, we
3175
- // are making a safe assumption that each word will produce 4 bytes.
3176
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
3177
-
3178
- volatile bool sink{false};
3179
-
3180
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3181
- result res = implementation.convert_utf32_to_utf8_with_errors(
3182
- data, size, output_buffer.get());
3183
- sink = !(res.error);
3184
- };
3185
- count_events(proc, iterations); // warming up!
3186
- const auto result = count_events(proc, iterations);
3187
- if ((sink == false) && (iterations > 0)) {
3188
- std::cerr << "The input was declared invalid.\n";
3189
- }
3190
- size_t char_count = size;
3191
- print_summary(result, input_data.size(), char_count);
3192
- }
3193
-
3194
- void Benchmark::run_convert_valid_utf32_to_utf8(
3195
- const simdutf::implementation &implementation, size_t iterations) {
3196
- const simdutf::encoding_type bom =
3197
- BOM::check_bom(input_data.data(), input_data.size());
3198
- const char32_t *data = reinterpret_cast<const char32_t *>(
3199
- input_data.data() + BOM::bom_byte_size(bom));
3200
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3201
- if (size % 4 != 0) {
3202
- printf(
3203
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3204
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3205
- printf(" Running function on truncated input.\n");
3206
- }
3207
-
3208
- size /= 4;
3209
-
3210
- // Note: In the "worst" case, a 32-bit word will yield 4 UTF-8 bytes. So, we
3211
- // are making a safe assumption that each word will produce 4 bytes.
3212
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
3213
-
3214
- volatile size_t sink{0};
3215
-
3216
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3217
- sink = implementation.convert_valid_utf32_to_utf8(data, size,
3218
- output_buffer.get());
3219
- };
3220
- count_events(proc, iterations); // warming up!
3221
- const auto result = count_events(proc, iterations);
3222
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3223
- std::cerr << "The output is zero which might indicate an error.\n";
3224
- }
3225
- size_t char_count = size;
3226
- print_summary(result, input_data.size(), char_count);
3227
- }
3228
-
3229
- void Benchmark::run_convert_valid_utf16le_to_utf32(
3230
- const simdutf::implementation &implementation, size_t iterations) {
3231
- const simdutf::encoding_type bom =
3232
- BOM::check_bom(input_data.data(), input_data.size());
3233
- const char16_t *data = reinterpret_cast<const char16_t *>(
3234
- input_data.data() + BOM::bom_byte_size(bom));
3235
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3236
- if (size % 2 != 0) {
3237
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
3238
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3239
- printf(" Running function on truncated input.\n");
3240
- }
3241
-
3242
- size /= 2;
3243
-
3244
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
3245
- // yields 4 bytes,
3246
- // thus we're making safe assumption that each 16-bit word will be
3247
- // expanded to four bytes.
3248
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size * 4]};
3249
-
3250
- volatile size_t sink{0};
3251
-
3252
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3253
- sink = implementation.convert_valid_utf16le_to_utf32(data, size,
3254
- output_buffer.get());
3255
- };
3256
- count_events(proc, iterations); // warming up!
3257
- const auto result = count_events(proc, iterations);
3258
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3259
- std::cerr << "The output is zero which might indicate an error.\n";
3260
- }
3261
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
3262
- print_summary(result, input_data.size(), char_count);
3263
- }
3264
-
3265
- template <endianness byte_order>
3266
- void Benchmark::run_convert_utf32_to_utf16(
3267
- const simdutf::implementation &implementation, size_t iterations) {
3268
- const simdutf::encoding_type bom =
3269
- BOM::check_bom(input_data.data(), input_data.size());
3270
- const char32_t *data = reinterpret_cast<const char32_t *>(
3271
- input_data.data() + BOM::bom_byte_size(bom));
3272
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3273
- if (size % 4 != 0) {
3274
- printf(
3275
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3276
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3277
- printf(" Running function on truncated input.\n");
3278
- }
3279
-
3280
- size /= 4;
3281
-
3282
- // Note: In the "worst" case, a 32-bit word will yield two 16-bit code units.
3283
- // So, we are making a safe assumption that each word will produce 2 bytes.
3284
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size * 2]};
3285
-
3286
- volatile size_t sink{0};
3287
-
3288
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3289
- if (byte_order == endianness::LITTLE) {
3290
- sink = implementation.convert_utf32_to_utf16le(data, size,
3291
- output_buffer.get());
3292
- } else {
3293
- sink = implementation.convert_utf32_to_utf16be(data, size,
3294
- output_buffer.get());
3295
- }
3296
- };
3297
- count_events(proc, iterations); // warming up!
3298
- const auto result = count_events(proc, iterations);
3299
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3300
- std::cerr << "The output is zero which might indicate an error.\n";
3301
- }
3302
- size_t char_count = size;
3303
- print_summary(result, input_data.size(), char_count);
3304
- }
3305
-
3306
- template <endianness byte_order>
3307
- void Benchmark::run_convert_utf32_to_utf16_with_errors(
3308
- const simdutf::implementation &implementation, size_t iterations) {
3309
- const simdutf::encoding_type bom =
3310
- BOM::check_bom(input_data.data(), input_data.size());
3311
- const char32_t *data = reinterpret_cast<const char32_t *>(
3312
- input_data.data() + BOM::bom_byte_size(bom));
3313
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3314
- if (size % 4 != 0) {
3315
- printf(
3316
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3317
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3318
- printf(" Running function on truncated input.\n");
3319
- }
3320
-
3321
- size /= 4;
3322
-
3323
- // Note: In the "worst" case, a 32-bit word will yield two 16-bit code units.
3324
- // So, we are making a safe assumption that each word will produce 2 bytes.
3325
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size * 2]};
3326
-
3327
- volatile bool sink{false};
3328
-
3329
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3330
- if (byte_order == endianness::LITTLE) {
3331
- result res = implementation.convert_utf32_to_utf16le_with_errors(
3332
- data, size, output_buffer.get());
3333
- sink = !(res.error);
3334
- } else {
3335
- result res = implementation.convert_utf32_to_utf16be_with_errors(
3336
- data, size, output_buffer.get());
3337
- sink = !(res.error);
3338
- }
3339
- };
3340
- count_events(proc, iterations); // warming up!
3341
- const auto result = count_events(proc, iterations);
3342
- if ((sink == false) && (iterations > 0)) {
3343
- std::cerr << "The input was declared invalid.\n";
3344
- }
3345
- size_t char_count = size;
3346
- print_summary(result, input_data.size(), char_count);
3347
- }
3348
-
3349
- template <endianness byte_order>
3350
- void Benchmark::run_convert_valid_utf32_to_utf16(
3351
- const simdutf::implementation &implementation, size_t iterations) {
3352
- const simdutf::encoding_type bom =
3353
- BOM::check_bom(input_data.data(), input_data.size());
3354
- const char32_t *data = reinterpret_cast<const char32_t *>(
3355
- input_data.data() + BOM::bom_byte_size(bom));
3356
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3357
- if (size % 4 != 0) {
3358
- printf(
3359
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3360
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3361
- printf(" Running function on truncated input.\n");
3362
- }
3363
-
3364
- size /= 4;
3365
-
3366
- // Note: In the "worst" case, a 32-bit word will yield two 16-bit code units.
3367
- // So, we are making a safe assumption that each word will produce 2 bytes.
3368
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size * 2]};
3369
-
3370
- volatile size_t sink{0};
3371
-
3372
- auto proc = [&implementation, data, size, &output_buffer, &sink]() {
3373
- if (byte_order == endianness::LITTLE) {
3374
- sink = implementation.convert_valid_utf32_to_utf16le(data, size,
3375
- output_buffer.get());
3376
- } else {
3377
- sink = implementation.convert_valid_utf32_to_utf16be(data, size,
3378
- output_buffer.get());
3379
- }
3380
- };
3381
- count_events(proc, iterations); // warming up!
3382
- const auto result = count_events(proc, iterations);
3383
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3384
- std::cerr << "The output is zero which might indicate an error.\n";
3385
- }
3386
- size_t char_count = size;
3387
- print_summary(result, input_data.size(), char_count);
3388
- }
3389
-
3390
- void Benchmark::run_count_utf8(const simdutf::implementation &implementation,
3391
- size_t iterations) {
3392
- const char *data = reinterpret_cast<const char *>(input_data.data());
3393
- const size_t size = input_data.size();
3394
- volatile size_t sink{0};
3395
-
3396
- auto proc = [&implementation, data, size, &sink]() {
3397
- sink = implementation.count_utf8(data, size);
3398
- };
3399
- count_events(proc, iterations); // warming up!
3400
- const auto result = count_events(proc, iterations);
3401
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3402
- std::cerr << "The output is zero which might indicate an error.\n";
3403
- }
3404
- size_t char_count = get_active_implementation()->count_utf8(data, size);
3405
- print_summary(result, size, char_count);
3406
- }
3407
-
3408
- void Benchmark::run_count_utf16le(const simdutf::implementation &implementation,
3409
- size_t iterations) {
3410
- const simdutf::encoding_type bom =
3411
- BOM::check_bom(input_data.data(), input_data.size());
3412
- const char16_t *data = reinterpret_cast<const char16_t *>(input_data.data());
3413
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3414
- if (size % 2 != 0) {
3415
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
3416
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3417
- printf(" Running function on truncated input.\n");
3418
- }
3419
- size /= 2;
3420
- volatile size_t sink{0};
3421
- auto proc = [&implementation, data, size, &sink]() {
3422
- sink = implementation.count_utf16le(data, size);
3423
- };
3424
- count_events(proc, iterations); // warming up!
3425
- const auto result = count_events(proc, iterations);
3426
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3427
- std::cerr << "The output is zero which might indicate an error.\n";
3428
- }
3429
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
3430
- print_summary(result, input_data.size(), char_count);
3431
- }
3432
-
3433
- void Benchmark::run_detect_encodings(
3434
- const simdutf::implementation &implementation, size_t iterations) {
3435
- const simdutf::encoding_type bom =
3436
- BOM::check_bom(input_data.data(), input_data.size());
3437
- const char *data = reinterpret_cast<const char *>(input_data.data() +
3438
- BOM::bom_byte_size(bom));
3439
- const size_t size = input_data.size() - BOM::bom_byte_size(bom);
3440
- volatile size_t sink{0};
3441
- auto proc = [&implementation, data, size, &sink]() {
3442
- sink = implementation.detect_encodings(data, size);
3443
- };
3444
- count_events(proc, iterations); // warming up!
3445
- const auto result = count_events(proc, iterations);
3446
- size_t char_count = size;
3447
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3448
- std::cerr << "The output is zero which might indicate an error.\n";
3449
- } else {
3450
- std::cout << "Detected format: ";
3451
- if (sink & simdutf::encoding_type::UTF8) {
3452
- char_count = get_active_implementation()->count_utf8(data, size);
3453
- std::cout << " UTF8";
3454
- }
3455
- if (sink & simdutf::encoding_type::UTF16_LE) {
3456
- std::cout << " UTF16LE";
3457
- char_count = get_active_implementation()->count_utf16le(
3458
- reinterpret_cast<const char16_t *>(data), size / 2);
3459
- }
3460
- if (sink & simdutf::encoding_type::UTF32_LE) {
3461
- std::cout << " UTF32LE";
3462
- char_count = size / 4;
3463
- }
3464
- std::cout << std::endl;
3465
- }
3466
- if ((bom) && (bom & ~sink)) {
3467
- std::cerr << "[Error] BOM format : ";
3468
- if (bom & simdutf::encoding_type::UTF8) {
3469
- std::cerr << " UTF8";
3470
- } else if (bom & simdutf::encoding_type::UTF16_LE) {
3471
- std::cerr << " UTF16LE";
3472
- } else if (bom & simdutf::encoding_type::UTF32_LE) {
3473
- std::cerr << " UTF32LE";
3474
- }
3475
- std::cerr << std::endl;
3476
- }
3477
- if ((sink & (sink - 1)) != 0) {
3478
- std::cout << "More than one format possible, character count is ambiguous."
3479
- << std::endl;
3480
- }
3481
- print_summary(result, size, char_count);
3482
- }
3483
-
3484
- const std::set<std::string> Benchmark::all_procedures() const {
3485
- std::set<std::string> result;
3486
- for (const auto &item : benchmarks) {
3487
- result.insert(item.first);
3488
- }
3489
-
3490
- return result;
3491
- }
3492
-
3493
- std::set<simdutf::encoding_type>
3494
- Benchmark::expected_encodings(const std::string &procedure) {
3495
- return benchmarks[procedure].second;
3496
- }
3497
-
3498
- /**
3499
- * LLVM relies on code from the Unicode Consortium
3500
- * https://en.wikipedia.org/wiki/Unicode_Consortium
3501
- */
3502
- void Benchmark::run_convert_utf8_to_utf16_llvm(size_t iterations) {
3503
- const char *data = reinterpret_cast<const char *>(input_data.data());
3504
- const size_t size = input_data.size();
3505
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
3506
- volatile size_t sink{0};
3507
- auto proc = [data, size, &output_buffer, &sink]() {
3508
- const unsigned char *sourceStart =
3509
- reinterpret_cast<const unsigned char *>(data);
3510
- const unsigned char *sourceEnd = sourceStart + size;
3511
- short unsigned int *targetStart =
3512
- reinterpret_cast<short unsigned int *>(output_buffer.get());
3513
- short unsigned int *targetEnd = targetStart + size;
3514
- bool is_ok = (llvm::conversionOK ==
3515
- llvm::ConvertUTF8toUTF16(
3516
- &sourceStart, sourceEnd, &targetStart, targetEnd,
3517
- llvm::ConversionFlags::strictConversion));
3518
- if (is_ok) {
3519
- sink = (targetStart -
3520
- reinterpret_cast<short unsigned int *>(output_buffer.get()));
3521
- } else {
3522
- sink = 0;
3523
- }
3524
- };
3525
- count_events(proc, iterations); // warming up!
3526
- const auto result = count_events(proc, iterations);
3527
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3528
- std::cerr
3529
- << "The output is zero which might indicate a misconfiguration.\n";
3530
- }
3531
- size_t char_count = get_active_implementation()->count_utf8(data, size);
3532
- print_summary(result, size, char_count);
3533
- }
3534
-
3535
- void Benchmark::run_convert_utf8_to_utf32_llvm(size_t iterations) {
3536
- const char *data = reinterpret_cast<const char *>(input_data.data());
3537
- const size_t size = input_data.size();
3538
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size]};
3539
- volatile size_t sink{0};
3540
- auto proc = [data, size, &output_buffer, &sink]() {
3541
- const unsigned char *sourceStart =
3542
- reinterpret_cast<const unsigned char *>(data);
3543
- const unsigned char *sourceEnd = sourceStart + size;
3544
- unsigned int *targetStart =
3545
- reinterpret_cast<unsigned int *>(output_buffer.get());
3546
- unsigned int *targetEnd = targetStart + size;
3547
- bool is_ok = (llvm::conversionOK ==
3548
- llvm::ConvertUTF8toUTF32(
3549
- &sourceStart, sourceEnd, &targetStart, targetEnd,
3550
- llvm::ConversionFlags::strictConversion));
3551
- if (is_ok) {
3552
- sink =
3553
- (targetStart - reinterpret_cast<unsigned int *>(output_buffer.get()));
3554
- } else {
3555
- sink = 0;
3556
- }
3557
- };
3558
- count_events(proc, iterations); // warming up!
3559
- const auto result = count_events(proc, iterations);
3560
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3561
- std::cerr
3562
- << "The output is zero which might indicate a misconfiguration.\n";
3563
- }
3564
- size_t char_count = get_active_implementation()->count_utf8(data, size);
3565
- print_summary(result, size, char_count);
3566
- }
3567
-
3568
- void Benchmark::run_convert_utf16_to_utf8_llvm(size_t iterations) {
3569
- const simdutf::encoding_type bom =
3570
- BOM::check_bom(input_data.data(), input_data.size());
3571
- const char16_t *data = reinterpret_cast<const char16_t *>(
3572
- input_data.data() + BOM::bom_byte_size(bom));
3573
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3574
- if (size % 2 != 0) {
3575
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
3576
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3577
- printf(" Running function on truncated input.\n");
3578
- }
3579
-
3580
- size /= 2;
3581
-
3582
- // Note: non-surrogate code units can yield up to 3 bytes, a surrogate pair
3583
- // yields 4 bytes,
3584
- // thus we're making safe assumption that each 16-bit word will be
3585
- // expanded to four bytes.
3586
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
3587
-
3588
- volatile size_t sink{0};
3589
-
3590
- auto proc = [data, size, &output_buffer, &sink]() {
3591
- const short unsigned int *sourceStart =
3592
- reinterpret_cast<const short unsigned int *>(data);
3593
- const short unsigned int *sourceEnd = sourceStart + size;
3594
- unsigned char *targetStart =
3595
- reinterpret_cast<unsigned char *>(output_buffer.get());
3596
- unsigned char *targetEnd = targetStart + size * 4;
3597
- bool is_ok = (llvm::conversionOK ==
3598
- llvm::ConvertUTF16toUTF8(
3599
- &sourceStart, sourceEnd, &targetStart, targetEnd,
3600
- llvm::ConversionFlags::strictConversion));
3601
- if (is_ok) {
3602
- sink = (targetStart -
3603
- reinterpret_cast<unsigned char *>(output_buffer.get()));
3604
- } else {
3605
- sink = 0;
3606
- }
3607
- };
3608
- count_events(proc, iterations); // warming up!
3609
- const auto result = count_events(proc, iterations);
3610
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3611
- std::cerr << "The output is zero which might indicate an error.\n";
3612
- }
3613
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
3614
- print_summary(result, input_data.size(), char_count);
3615
- }
3616
-
3617
- void Benchmark::run_convert_utf32_to_utf8_llvm(size_t iterations) {
3618
- const simdutf::encoding_type bom =
3619
- BOM::check_bom(input_data.data(), input_data.size());
3620
- const char32_t *data = reinterpret_cast<const char32_t *>(
3621
- input_data.data() + BOM::bom_byte_size(bom));
3622
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3623
- if (size % 4 != 0) {
3624
- printf(
3625
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3626
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3627
- printf(" Running function on truncated input.\n");
3628
- }
3629
-
3630
- size /= 4;
3631
-
3632
- // Note: a single 32-bit word can yield up to four UTF-8 bytes. We are
3633
- // making a safe assumption that each 32-bit word will yield four
3634
- // UTF-8 bytes.
3635
- std::unique_ptr<char[]> output_buffer{new char[size * 4]};
3636
-
3637
- volatile size_t sink{0};
3638
-
3639
- auto proc = [data, size, &output_buffer, &sink]() {
3640
- const unsigned int *sourceStart =
3641
- reinterpret_cast<const unsigned int *>(data);
3642
- const unsigned int *sourceEnd = sourceStart + size;
3643
- unsigned char *targetStart =
3644
- reinterpret_cast<unsigned char *>(output_buffer.get());
3645
- unsigned char *targetEnd = targetStart + size * 4;
3646
- bool is_ok = (llvm::conversionOK ==
3647
- llvm::ConvertUTF32toUTF8(
3648
- &sourceStart, sourceEnd, &targetStart, targetEnd,
3649
- llvm::ConversionFlags::strictConversion));
3650
- if (is_ok) {
3651
- sink = (targetStart -
3652
- reinterpret_cast<unsigned char *>(output_buffer.get()));
3653
- } else {
3654
- sink = 0;
3655
- }
3656
- };
3657
- count_events(proc, iterations); // warming up!
3658
- const auto result = count_events(proc, iterations);
3659
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3660
- std::cerr << "The output is zero which might indicate an error.\n";
3661
- }
3662
- size_t char_count = size;
3663
- print_summary(result, input_data.size(), char_count);
3664
- }
3665
-
3666
- void Benchmark::run_convert_utf16_to_utf32_llvm(size_t iterations) {
3667
- const simdutf::encoding_type bom =
3668
- BOM::check_bom(input_data.data(), input_data.size());
3669
- const char16_t *data = reinterpret_cast<const char16_t *>(
3670
- input_data.data() + BOM::bom_byte_size(bom));
3671
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3672
- if (size % 2 != 0) {
3673
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
3674
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3675
- printf(" Running function on truncated input.\n");
3676
- }
3677
-
3678
- size /= 2;
3679
-
3680
- // Note: all code units yield four bytes. We make the safe assumption that all
3681
- // code units will be non surrogate code units so the size will double (16
3682
- // bits -> 32 bits).
3683
- std::unique_ptr<char32_t[]> output_buffer{new char32_t[size * 2]};
3684
-
3685
- volatile size_t sink{0};
3686
-
3687
- auto proc = [data, size, &output_buffer, &sink]() {
3688
- const short unsigned int *sourceStart =
3689
- reinterpret_cast<const short unsigned int *>(data);
3690
- const short unsigned int *sourceEnd = sourceStart + size;
3691
- unsigned int *targetStart =
3692
- reinterpret_cast<unsigned int *>(output_buffer.get());
3693
- unsigned int *targetEnd = targetStart + 2 * size;
3694
- bool is_ok = (llvm::conversionOK ==
3695
- llvm::ConvertUTF16toUTF32(
3696
- &sourceStart, sourceEnd, &targetStart, targetEnd,
3697
- llvm::ConversionFlags::strictConversion));
3698
- if (is_ok) {
3699
- sink =
3700
- (targetStart - reinterpret_cast<unsigned int *>(output_buffer.get()));
3701
- } else {
3702
- sink = 0;
3703
- }
3704
- };
3705
- count_events(proc, iterations); // warming up!
3706
- const auto result = count_events(proc, iterations);
3707
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3708
- std::cerr << "The output is zero which might indicate an error.\n";
3709
- }
3710
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
3711
- print_summary(result, input_data.size(), char_count);
3712
- }
3713
-
3714
- void Benchmark::run_convert_utf32_to_utf16_llvm(size_t iterations) {
3715
- const simdutf::encoding_type bom =
3716
- BOM::check_bom(input_data.data(), input_data.size());
3717
- const char32_t *data = reinterpret_cast<const char32_t *>(
3718
- input_data.data() + BOM::bom_byte_size(bom));
3719
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3720
- if (size % 4 != 0) {
3721
- printf(
3722
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3723
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3724
- printf(" Running function on truncated input.\n");
3725
- }
3726
-
3727
- size /= 4;
3728
-
3729
- // Note: a single 32-bit word can produce a surrogate pair, i.e. two
3730
- // 16-bit code units. We are making a safe assumption that each 32-
3731
- // bit word will yield two 16-bit code units.
3732
- std::unique_ptr<char[]> output_buffer{new char[size * 2]};
3733
-
3734
- volatile size_t sink{0};
3735
-
3736
- auto proc = [data, size, &output_buffer, &sink]() {
3737
- const unsigned int *sourceStart =
3738
- reinterpret_cast<const unsigned int *>(data);
3739
- const unsigned int *sourceEnd = sourceStart + size;
3740
- short unsigned int *targetStart =
3741
- reinterpret_cast<short unsigned int *>(output_buffer.get());
3742
- short unsigned int *targetEnd = targetStart + size * 2;
3743
- bool is_ok = (llvm::conversionOK ==
3744
- llvm::ConvertUTF32toUTF16(
3745
- &sourceStart, sourceEnd, &targetStart, targetEnd,
3746
- llvm::ConversionFlags::strictConversion));
3747
- if (is_ok) {
3748
- sink = (targetStart -
3749
- reinterpret_cast<short unsigned int *>(output_buffer.get()));
3750
- } else {
3751
- sink = 0;
3752
- }
3753
- };
3754
- count_events(proc, iterations); // warming up!
3755
- const auto result = count_events(proc, iterations);
3756
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3757
- std::cerr << "The output is zero which might indicate an error.\n";
3758
- }
3759
- size_t char_count = size;
3760
- print_summary(result, input_data.size(), char_count);
3761
- }
3762
-
3763
- /**
3764
- * Nemanja Trifunovic, UTF8-CPP: UTF-8 with C++ in a Portable Way
3765
- * https://github.com/nemtrif/utfcpp/releases/tag/v3.2.2
3766
- */
3767
- void Benchmark::run_convert_utf8_to_utf16_utfcpp(size_t iterations) {
3768
- const char *data = reinterpret_cast<const char *>(input_data.data());
3769
- const size_t size = input_data.size();
3770
- volatile size_t sink{0};
3771
-
3772
- auto proc = [data, size, &sink]() {
3773
- try {
3774
- std::vector<unsigned short> str;
3775
- utf8::utf8to16(data, data + size, std::back_inserter(str));
3776
- sink = str.size();
3777
- } catch (const char *msg) {
3778
- std::cout << msg << std::endl;
3779
- sink = 0;
3780
- } catch (...) {
3781
- sink = 0;
3782
- }
3783
- };
3784
- count_events(proc, iterations); // warming up!
3785
- const auto result = count_events(proc, iterations);
3786
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3787
- std::cerr
3788
- << "The output is zero which might indicate a misconfiguration.\n";
3789
- }
3790
- size_t char_count = get_active_implementation()->count_utf8(data, size);
3791
- // checking
3792
- std::unique_ptr<char16_t[]> output_buffer{new char16_t[size]};
3793
- size_t expected = convert_utf8_to_utf16le(data, size, output_buffer.get());
3794
- if (expected != sink) {
3795
- std::cerr << "The number of UTF-16 code units does not match.\n";
3796
- }
3797
- print_summary(result, size, char_count);
3798
- }
3799
-
3800
- void Benchmark::run_convert_utf16_to_utf8_utfcpp(size_t iterations) {
3801
- const simdutf::encoding_type bom =
3802
- BOM::check_bom(input_data.data(), input_data.size());
3803
- const char16_t *data = reinterpret_cast<const char16_t *>(
3804
- input_data.data() + BOM::bom_byte_size(bom));
3805
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3806
- if (size % 2 != 0) {
3807
- printf("# The input size is not divisible by two (it is %zu + %zu for BOM)",
3808
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3809
- printf(" Running function on truncated input.\n");
3810
- }
3811
-
3812
- volatile size_t sink{0};
3813
- auto proc = [data, size, &sink]() {
3814
- try {
3815
- std::string str;
3816
- utf8::utf16to8(data, data + size, std::back_inserter(str));
3817
- sink = str.size();
3818
- } catch (const char *msg) {
3819
- std::cout << msg << std::endl;
3820
- sink = 0;
3821
- } catch (...) {
3822
- sink = 0;
3823
- }
3824
- };
3825
- count_events(proc, iterations); // warming up!
3826
- const auto result = count_events(proc, iterations);
3827
- size /= 2;
3828
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3829
- std::cerr << "The output is zero which might indicate an error.\n";
3830
- }
3831
-
3832
- size_t char_count = get_active_implementation()->count_utf16le(data, size);
3833
- print_summary(result, input_data.size(), char_count);
3834
- }
3835
-
3836
- void Benchmark::run_convert_utf8_to_utf32_utfcpp(size_t iterations) {
3837
- const char *data = reinterpret_cast<const char *>(input_data.data());
3838
- const size_t size = input_data.size();
3839
- volatile size_t sink{0};
3840
-
3841
- auto proc = [data, size, &sink]() {
3842
- try {
3843
- std::vector<int> str;
3844
- utf8::utf8to32(data, data + size, std::back_inserter(str));
3845
- sink = str.size();
3846
- } catch (const char *msg) {
3847
- std::cout << msg << std::endl;
3848
- sink = 0;
3849
- } catch (...) {
3850
- sink = 0;
3851
- }
3852
- };
3853
- count_events(proc, iterations); // warming up!
3854
- const auto result = count_events(proc, iterations);
3855
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3856
- std::cerr
3857
- << "The output is zero which might indicate a misconfiguration.\n";
3858
- }
3859
- size_t char_count = get_active_implementation()->count_utf8(data, size);
3860
- print_summary(result, size, char_count);
3861
- }
3862
-
3863
- void Benchmark::run_convert_utf32_to_utf8_utfcpp(size_t iterations) {
3864
- const simdutf::encoding_type bom =
3865
- BOM::check_bom(input_data.data(), input_data.size());
3866
- const char32_t *data = reinterpret_cast<const char32_t *>(
3867
- input_data.data() + BOM::bom_byte_size(bom));
3868
- size_t size = input_data.size() - BOM::bom_byte_size(bom);
3869
- if (size % 4 != 0) {
3870
- printf(
3871
- "# The input size is not divisible by four (it is %zu + %zu for BOM)",
3872
- size_t(input_data.size()), size_t(BOM::bom_byte_size(bom)));
3873
- printf(" Running function on truncated input.\n");
3874
- }
3875
-
3876
- volatile size_t sink{0};
3877
-
3878
- auto proc = [data, size, &sink]() {
3879
- try {
3880
- std::string str;
3881
- utf8::utf16to8(data, data + size, std::back_inserter(str));
3882
- sink = str.size();
3883
- } catch (const char *msg) {
3884
- std::cout << msg << std::endl;
3885
- sink = 0;
3886
- } catch (...) {
3887
- sink = 0;
3888
- }
3889
- };
3890
- count_events(proc, iterations); // warming up!
3891
- const auto result = count_events(proc, iterations);
3892
- if ((sink == 0) && (size != 0) && (iterations > 0)) {
3893
- std::cerr << "The output is zero which might indicate an error.\n";
3894
- }
3895
- size_t char_count = size / 4;
3896
- print_summary(result, input_data.size(), char_count);
3897
- }
3898
-
3899
- } // namespace simdutf::benchmarks