react-native-quick-crypto 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (687) hide show
  1. package/android/build.gradle +5 -1
  2. package/cpp/argon2/HybridArgon2.cpp +10 -3
  3. package/cpp/blake3/HybridBlake3.cpp +5 -3
  4. package/cpp/cipher/CCMCipher.cpp +29 -16
  5. package/cpp/cipher/CCMCipher.hpp +2 -4
  6. package/cpp/cipher/ChaCha20Cipher.cpp +14 -18
  7. package/cpp/cipher/ChaCha20Cipher.hpp +2 -4
  8. package/cpp/cipher/ChaCha20Poly1305Cipher.cpp +34 -23
  9. package/cpp/cipher/ChaCha20Poly1305Cipher.hpp +2 -4
  10. package/cpp/cipher/GCMCipher.cpp +14 -15
  11. package/cpp/cipher/HybridCipher.cpp +39 -36
  12. package/cpp/cipher/HybridCipher.hpp +17 -1
  13. package/cpp/cipher/HybridRsaCipher.cpp +74 -29
  14. package/cpp/cipher/OCBCipher.cpp +4 -3
  15. package/cpp/cipher/XChaCha20Poly1305Cipher.cpp +14 -13
  16. package/cpp/cipher/XSalsa20Cipher.cpp +72 -6
  17. package/cpp/cipher/XSalsa20Cipher.hpp +25 -3
  18. package/cpp/cipher/XSalsa20Poly1305Cipher.cpp +21 -25
  19. package/cpp/dh/HybridDiffieHellman.cpp +29 -0
  20. package/cpp/ec/HybridEcKeyPair.cpp +35 -33
  21. package/cpp/ec/HybridEcKeyPair.hpp +3 -7
  22. package/cpp/ecdh/HybridECDH.cpp +23 -0
  23. package/cpp/ed25519/HybridEdKeyPair.cpp +73 -117
  24. package/cpp/ed25519/HybridEdKeyPair.hpp +5 -9
  25. package/cpp/hash/HybridHash.cpp +5 -7
  26. package/cpp/hkdf/HybridHkdf.cpp +6 -4
  27. package/cpp/hmac/HybridHmac.cpp +4 -6
  28. package/cpp/kmac/HybridKmac.cpp +4 -4
  29. package/cpp/mldsa/HybridMlDsaKeyPair.cpp +37 -49
  30. package/cpp/mlkem/HybridMlKemKeyPair.cpp +39 -43
  31. package/cpp/pbkdf2/HybridPbkdf2.cpp +7 -8
  32. package/cpp/rsa/HybridRsaKeyPair.cpp +5 -8
  33. package/cpp/rsa/HybridRsaKeyPair.hpp +4 -7
  34. package/cpp/scrypt/HybridScrypt.cpp +6 -4
  35. package/cpp/sign/HybridSignHandle.cpp +25 -68
  36. package/cpp/sign/HybridVerifyHandle.cpp +23 -60
  37. package/cpp/utils/HybridUtils.cpp +183 -43
  38. package/cpp/utils/HybridUtils.hpp +9 -2
  39. package/cpp/utils/QuickCryptoUtils.hpp +72 -0
  40. package/lib/commonjs/argon2.js +51 -2
  41. package/lib/commonjs/argon2.js.map +1 -1
  42. package/lib/commonjs/cipher.js +109 -11
  43. package/lib/commonjs/cipher.js.map +1 -1
  44. package/lib/commonjs/dsa.js +8 -2
  45. package/lib/commonjs/dsa.js.map +1 -1
  46. package/lib/commonjs/hash.js +15 -5
  47. package/lib/commonjs/hash.js.map +1 -1
  48. package/lib/commonjs/hkdf.js +33 -6
  49. package/lib/commonjs/hkdf.js.map +1 -1
  50. package/lib/commonjs/hmac.js +15 -5
  51. package/lib/commonjs/hmac.js.map +1 -1
  52. package/lib/commonjs/keys/publicCipher.js +10 -4
  53. package/lib/commonjs/keys/publicCipher.js.map +1 -1
  54. package/lib/commonjs/random.js +11 -2
  55. package/lib/commonjs/random.js.map +1 -1
  56. package/lib/commonjs/rsa.js +12 -5
  57. package/lib/commonjs/rsa.js.map +1 -1
  58. package/lib/commonjs/scrypt.js +47 -6
  59. package/lib/commonjs/scrypt.js.map +1 -1
  60. package/lib/commonjs/subtle.js +76 -5
  61. package/lib/commonjs/subtle.js.map +1 -1
  62. package/lib/commonjs/utils/cipher.js +18 -7
  63. package/lib/commonjs/utils/cipher.js.map +1 -1
  64. package/lib/commonjs/utils/conversion.js +33 -9
  65. package/lib/commonjs/utils/conversion.js.map +1 -1
  66. package/lib/commonjs/utils/timingSafeEqual.js +7 -2
  67. package/lib/commonjs/utils/timingSafeEqual.js.map +1 -1
  68. package/lib/commonjs/x509certificate.js +6 -6
  69. package/lib/commonjs/x509certificate.js.map +1 -1
  70. package/lib/module/argon2.js +51 -2
  71. package/lib/module/argon2.js.map +1 -1
  72. package/lib/module/cipher.js +109 -11
  73. package/lib/module/cipher.js.map +1 -1
  74. package/lib/module/dsa.js +8 -2
  75. package/lib/module/dsa.js.map +1 -1
  76. package/lib/module/hash.js +15 -5
  77. package/lib/module/hash.js.map +1 -1
  78. package/lib/module/hkdf.js +33 -6
  79. package/lib/module/hkdf.js.map +1 -1
  80. package/lib/module/hmac.js +15 -5
  81. package/lib/module/hmac.js.map +1 -1
  82. package/lib/module/keys/publicCipher.js +10 -4
  83. package/lib/module/keys/publicCipher.js.map +1 -1
  84. package/lib/module/random.js +11 -2
  85. package/lib/module/random.js.map +1 -1
  86. package/lib/module/rsa.js +11 -4
  87. package/lib/module/rsa.js.map +1 -1
  88. package/lib/module/scrypt.js +47 -6
  89. package/lib/module/scrypt.js.map +1 -1
  90. package/lib/module/subtle.js +76 -5
  91. package/lib/module/subtle.js.map +1 -1
  92. package/lib/module/utils/cipher.js +18 -7
  93. package/lib/module/utils/cipher.js.map +1 -1
  94. package/lib/module/utils/conversion.js +33 -9
  95. package/lib/module/utils/conversion.js.map +1 -1
  96. package/lib/module/utils/timingSafeEqual.js +8 -3
  97. package/lib/module/utils/timingSafeEqual.js.map +1 -1
  98. package/lib/module/x509certificate.js +6 -6
  99. package/lib/module/x509certificate.js.map +1 -1
  100. package/lib/typescript/argon2.d.ts.map +1 -1
  101. package/lib/typescript/cipher.d.ts +2 -2
  102. package/lib/typescript/cipher.d.ts.map +1 -1
  103. package/lib/typescript/dsa.d.ts.map +1 -1
  104. package/lib/typescript/hash.d.ts +2 -2
  105. package/lib/typescript/hash.d.ts.map +1 -1
  106. package/lib/typescript/hkdf.d.ts.map +1 -1
  107. package/lib/typescript/hmac.d.ts +2 -2
  108. package/lib/typescript/hmac.d.ts.map +1 -1
  109. package/lib/typescript/index.d.ts +1 -1
  110. package/lib/typescript/index.d.ts.map +1 -1
  111. package/lib/typescript/keys/publicCipher.d.ts.map +1 -1
  112. package/lib/typescript/random.d.ts.map +1 -1
  113. package/lib/typescript/rsa.d.ts.map +1 -1
  114. package/lib/typescript/scrypt.d.ts.map +1 -1
  115. package/lib/typescript/specs/utils.nitro.d.ts +0 -2
  116. package/lib/typescript/specs/utils.nitro.d.ts.map +1 -1
  117. package/lib/typescript/subtle.d.ts.map +1 -1
  118. package/lib/typescript/utils/cipher.d.ts +13 -1
  119. package/lib/typescript/utils/cipher.d.ts.map +1 -1
  120. package/lib/typescript/utils/conversion.d.ts +9 -6
  121. package/lib/typescript/utils/conversion.d.ts.map +1 -1
  122. package/lib/typescript/utils/timingSafeEqual.d.ts.map +1 -1
  123. package/lib/typescript/x509certificate.d.ts.map +1 -1
  124. package/nitrogen/generated/shared/c++/HybridUtilsSpec.cpp +0 -2
  125. package/nitrogen/generated/shared/c++/HybridUtilsSpec.hpp +0 -3
  126. package/package.json +37 -5
  127. package/src/argon2.ts +80 -2
  128. package/src/cipher.ts +139 -15
  129. package/src/dsa.ts +11 -2
  130. package/src/hash.ts +17 -7
  131. package/src/hkdf.ts +44 -6
  132. package/src/hmac.ts +17 -7
  133. package/src/keys/publicCipher.ts +10 -4
  134. package/src/random.ts +11 -2
  135. package/src/rsa.ts +18 -4
  136. package/src/scrypt.ts +73 -6
  137. package/src/specs/utils.nitro.ts +0 -2
  138. package/src/subtle.ts +90 -8
  139. package/src/utils/cipher.ts +30 -8
  140. package/src/utils/conversion.ts +58 -20
  141. package/src/utils/timingSafeEqual.ts +8 -3
  142. package/src/x509certificate.ts +5 -6
  143. package/deps/blake3/.cargo/config.toml +0 -2
  144. package/deps/blake3/.git-blame-ignore-revs +0 -2
  145. package/deps/blake3/.github/workflows/build_b3sum.py +0 -38
  146. package/deps/blake3/.github/workflows/ci.yml +0 -491
  147. package/deps/blake3/.github/workflows/tag.yml +0 -43
  148. package/deps/blake3/.github/workflows/upload_github_release_asset.py +0 -73
  149. package/deps/blake3/CONTRIBUTING.md +0 -31
  150. package/deps/blake3/Cargo.toml +0 -135
  151. package/deps/blake3/b3sum/Cargo.lock +0 -513
  152. package/deps/blake3/b3sum/Cargo.toml +0 -26
  153. package/deps/blake3/b3sum/README.md +0 -72
  154. package/deps/blake3/b3sum/src/main.rs +0 -564
  155. package/deps/blake3/b3sum/src/unit_tests.rs +0 -235
  156. package/deps/blake3/b3sum/tests/cli_tests.rs +0 -680
  157. package/deps/blake3/b3sum/what_does_check_do.md +0 -176
  158. package/deps/blake3/benches/bench.rs +0 -623
  159. package/deps/blake3/build.rs +0 -389
  160. package/deps/blake3/c/CMakeLists.txt +0 -383
  161. package/deps/blake3/c/CMakePresets.json +0 -73
  162. package/deps/blake3/c/Makefile.testing +0 -82
  163. package/deps/blake3/c/blake3-config.cmake.in +0 -14
  164. package/deps/blake3/c/blake3_avx2.c +0 -326
  165. package/deps/blake3/c/blake3_avx2_x86-64_unix.S +0 -1815
  166. package/deps/blake3/c/blake3_avx2_x86-64_windows_gnu.S +0 -1817
  167. package/deps/blake3/c/blake3_avx2_x86-64_windows_msvc.asm +0 -1828
  168. package/deps/blake3/c/blake3_avx512.c +0 -1388
  169. package/deps/blake3/c/blake3_avx512_x86-64_unix.S +0 -4824
  170. package/deps/blake3/c/blake3_avx512_x86-64_windows_gnu.S +0 -2615
  171. package/deps/blake3/c/blake3_avx512_x86-64_windows_msvc.asm +0 -2634
  172. package/deps/blake3/c/blake3_c_rust_bindings/Cargo.toml +0 -32
  173. package/deps/blake3/c/blake3_c_rust_bindings/README.md +0 -4
  174. package/deps/blake3/c/blake3_c_rust_bindings/benches/bench.rs +0 -477
  175. package/deps/blake3/c/blake3_c_rust_bindings/build.rs +0 -253
  176. package/deps/blake3/c/blake3_c_rust_bindings/cross_test.sh +0 -31
  177. package/deps/blake3/c/blake3_c_rust_bindings/src/lib.rs +0 -333
  178. package/deps/blake3/c/blake3_c_rust_bindings/src/test.rs +0 -696
  179. package/deps/blake3/c/blake3_sse2.c +0 -566
  180. package/deps/blake3/c/blake3_sse2_x86-64_unix.S +0 -2291
  181. package/deps/blake3/c/blake3_sse2_x86-64_windows_gnu.S +0 -2332
  182. package/deps/blake3/c/blake3_sse2_x86-64_windows_msvc.asm +0 -2350
  183. package/deps/blake3/c/blake3_sse41.c +0 -560
  184. package/deps/blake3/c/blake3_sse41_x86-64_unix.S +0 -2028
  185. package/deps/blake3/c/blake3_sse41_x86-64_windows_gnu.S +0 -2069
  186. package/deps/blake3/c/blake3_sse41_x86-64_windows_msvc.asm +0 -2089
  187. package/deps/blake3/c/blake3_tbb.cpp +0 -37
  188. package/deps/blake3/c/dependencies/CMakeLists.txt +0 -3
  189. package/deps/blake3/c/dependencies/tbb/CMakeLists.txt +0 -28
  190. package/deps/blake3/c/example.c +0 -36
  191. package/deps/blake3/c/example_tbb.c +0 -57
  192. package/deps/blake3/c/libblake3.pc.in +0 -12
  193. package/deps/blake3/c/main.c +0 -166
  194. package/deps/blake3/c/test.py +0 -97
  195. package/deps/blake3/media/B3.svg +0 -70
  196. package/deps/blake3/media/BLAKE3.svg +0 -85
  197. package/deps/blake3/media/speed.svg +0 -1474
  198. package/deps/blake3/reference_impl/Cargo.toml +0 -8
  199. package/deps/blake3/reference_impl/README.md +0 -14
  200. package/deps/blake3/reference_impl/reference_impl.rs +0 -374
  201. package/deps/blake3/src/ffi_avx2.rs +0 -65
  202. package/deps/blake3/src/ffi_avx512.rs +0 -169
  203. package/deps/blake3/src/ffi_neon.rs +0 -82
  204. package/deps/blake3/src/ffi_sse2.rs +0 -126
  205. package/deps/blake3/src/ffi_sse41.rs +0 -126
  206. package/deps/blake3/src/guts.rs +0 -60
  207. package/deps/blake3/src/hazmat.rs +0 -704
  208. package/deps/blake3/src/io.rs +0 -64
  209. package/deps/blake3/src/join.rs +0 -92
  210. package/deps/blake3/src/lib.rs +0 -1835
  211. package/deps/blake3/src/platform.rs +0 -587
  212. package/deps/blake3/src/portable.rs +0 -198
  213. package/deps/blake3/src/rust_avx2.rs +0 -474
  214. package/deps/blake3/src/rust_sse2.rs +0 -775
  215. package/deps/blake3/src/rust_sse41.rs +0 -766
  216. package/deps/blake3/src/test.rs +0 -1049
  217. package/deps/blake3/src/traits.rs +0 -227
  218. package/deps/blake3/src/wasm32_simd.rs +0 -794
  219. package/deps/blake3/test_vectors/Cargo.toml +0 -19
  220. package/deps/blake3/test_vectors/cross_test.sh +0 -25
  221. package/deps/blake3/test_vectors/src/bin/generate.rs +0 -4
  222. package/deps/blake3/test_vectors/src/lib.rs +0 -350
  223. package/deps/blake3/test_vectors/test_vectors.json +0 -217
  224. package/deps/blake3/tools/compiler_version/Cargo.toml +0 -7
  225. package/deps/blake3/tools/compiler_version/build.rs +0 -6
  226. package/deps/blake3/tools/compiler_version/src/main.rs +0 -27
  227. package/deps/blake3/tools/instruction_set_support/Cargo.toml +0 -6
  228. package/deps/blake3/tools/instruction_set_support/src/main.rs +0 -10
  229. package/deps/blake3/tools/release.md +0 -16
  230. package/deps/ncrypto/.bazelignore +0 -4
  231. package/deps/ncrypto/.bazelrc +0 -1
  232. package/deps/ncrypto/.bazelversion +0 -1
  233. package/deps/ncrypto/.clang-format +0 -111
  234. package/deps/ncrypto/.github/workflows/bazel.yml +0 -58
  235. package/deps/ncrypto/.github/workflows/commitlint.yml +0 -16
  236. package/deps/ncrypto/.github/workflows/linter.yml +0 -38
  237. package/deps/ncrypto/.github/workflows/macos.yml +0 -43
  238. package/deps/ncrypto/.github/workflows/release-please.yml +0 -16
  239. package/deps/ncrypto/.github/workflows/ubuntu.yml +0 -128
  240. package/deps/ncrypto/.github/workflows/visual-studio.yml +0 -49
  241. package/deps/ncrypto/.python-version +0 -1
  242. package/deps/ncrypto/.release-please-manifest.json +0 -3
  243. package/deps/ncrypto/BUILD.bazel +0 -44
  244. package/deps/ncrypto/CHANGELOG.md +0 -37
  245. package/deps/ncrypto/CMakeLists.txt +0 -79
  246. package/deps/ncrypto/MODULE.bazel +0 -16
  247. package/deps/ncrypto/MODULE.bazel.lock +0 -461
  248. package/deps/ncrypto/cmake/CPM.cmake +0 -1225
  249. package/deps/ncrypto/cmake/ncrypto-flags.cmake +0 -17
  250. package/deps/ncrypto/ncrypto.pc.in +0 -10
  251. package/deps/ncrypto/patches/0001-Expose-libdecrepit-so-NodeJS-can-use-it-for-ncrypto.patch +0 -28
  252. package/deps/ncrypto/pyproject.toml +0 -38
  253. package/deps/ncrypto/release-please-config.json +0 -11
  254. package/deps/ncrypto/src/CMakeLists.txt +0 -40
  255. package/deps/ncrypto/tests/BUILD.bazel +0 -11
  256. package/deps/ncrypto/tests/CMakeLists.txt +0 -7
  257. package/deps/ncrypto/tests/basic.cpp +0 -856
  258. package/deps/ncrypto/tools/run-clang-format.sh +0 -42
  259. package/deps/simdutf/.clang-format +0 -4
  260. package/deps/simdutf/.github/ISSUE_TEMPLATE/bug_report.md +0 -62
  261. package/deps/simdutf/.github/ISSUE_TEMPLATE/config.yml +0 -1
  262. package/deps/simdutf/.github/ISSUE_TEMPLATE/feature_request.md +0 -35
  263. package/deps/simdutf/.github/ISSUE_TEMPLATE/standard-issue-template.md +0 -29
  264. package/deps/simdutf/.github/pull_request_template.md +0 -51
  265. package/deps/simdutf/.github/workflows/aarch64.yml +0 -39
  266. package/deps/simdutf/.github/workflows/alpine.yml +0 -27
  267. package/deps/simdutf/.github/workflows/amalgamation_demos.yml +0 -34
  268. package/deps/simdutf/.github/workflows/armv7.yml +0 -32
  269. package/deps/simdutf/.github/workflows/atomic_fuzz.yml +0 -25
  270. package/deps/simdutf/.github/workflows/cifuzz.yml +0 -37
  271. package/deps/simdutf/.github/workflows/clangformat.yml +0 -36
  272. package/deps/simdutf/.github/workflows/debian-latestcxxstandards.yml +0 -40
  273. package/deps/simdutf/.github/workflows/debian.yml +0 -33
  274. package/deps/simdutf/.github/workflows/documentation.yml +0 -36
  275. package/deps/simdutf/.github/workflows/emscripten.yml +0 -19
  276. package/deps/simdutf/.github/workflows/loongarch64-gcc-14.2.yml +0 -39
  277. package/deps/simdutf/.github/workflows/macos-latest.yml +0 -29
  278. package/deps/simdutf/.github/workflows/msys2-clang.yml +0 -48
  279. package/deps/simdutf/.github/workflows/msys2.yml +0 -50
  280. package/deps/simdutf/.github/workflows/ppc64le.yml +0 -29
  281. package/deps/simdutf/.github/workflows/rvv-1024-clang-18.yml +0 -35
  282. package/deps/simdutf/.github/workflows/rvv-128-clang-17.yml +0 -35
  283. package/deps/simdutf/.github/workflows/rvv-256-gcc-14.yml +0 -31
  284. package/deps/simdutf/.github/workflows/s390x.yml +0 -29
  285. package/deps/simdutf/.github/workflows/selective-amalgamation.yml +0 -29
  286. package/deps/simdutf/.github/workflows/typos.yml +0 -19
  287. package/deps/simdutf/.github/workflows/ubuntu22-cxx20.yml +0 -30
  288. package/deps/simdutf/.github/workflows/ubuntu22.yml +0 -32
  289. package/deps/simdutf/.github/workflows/ubuntu22_gcc12.yml +0 -27
  290. package/deps/simdutf/.github/workflows/ubuntu22sani.yml +0 -29
  291. package/deps/simdutf/.github/workflows/ubuntu24-cxxstandards.yml +0 -34
  292. package/deps/simdutf/.github/workflows/ubuntu24-unsignedchar.yml +0 -34
  293. package/deps/simdutf/.github/workflows/ubuntu24.yml +0 -32
  294. package/deps/simdutf/.github/workflows/ubuntu24sani.yml +0 -36
  295. package/deps/simdutf/.github/workflows/ubuntu24sani_clang.yml +0 -29
  296. package/deps/simdutf/.github/workflows/vs17-arm-ci.yml +0 -21
  297. package/deps/simdutf/.github/workflows/vs17-ci-cxx20.yml +0 -41
  298. package/deps/simdutf/.github/workflows/vs17-ci.yml +0 -41
  299. package/deps/simdutf/.github/workflows/vs17-clang-ci.yml +0 -41
  300. package/deps/simdutf/.github/workflows/vs17-cxxstandards.yml +0 -36
  301. package/deps/simdutf/AI_USAGE_POLICY.md +0 -56
  302. package/deps/simdutf/AUTHORS +0 -6
  303. package/deps/simdutf/CMakeLists.txt +0 -231
  304. package/deps/simdutf/CONTRIBUTING.md +0 -214
  305. package/deps/simdutf/CONTRIBUTORS +0 -1
  306. package/deps/simdutf/Doxyfile +0 -2584
  307. package/deps/simdutf/Makefile.crosscompile +0 -54
  308. package/deps/simdutf/README-RVV.md +0 -16
  309. package/deps/simdutf/SECURITY.md +0 -8
  310. package/deps/simdutf/benchmarks/CMakeLists.txt +0 -101
  311. package/deps/simdutf/benchmarks/alignment.cpp +0 -150
  312. package/deps/simdutf/benchmarks/base64/CMakeLists.txt +0 -30
  313. package/deps/simdutf/benchmarks/base64/benchmark_base64.cpp +0 -875
  314. package/deps/simdutf/benchmarks/base64/libbase64_spaces.h +0 -49
  315. package/deps/simdutf/benchmarks/base64/node_base64.h +0 -227
  316. package/deps/simdutf/benchmarks/base64/openssl3_base64.h +0 -334
  317. package/deps/simdutf/benchmarks/benchmark.cpp +0 -65
  318. package/deps/simdutf/benchmarks/benchmark_to_well_formed_utf16.cpp +0 -347
  319. package/deps/simdutf/benchmarks/competition/.clang-format-ignore +0 -5
  320. package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.cpp +0 -1276
  321. package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.h +0 -595
  322. package/deps/simdutf/benchmarks/competition/README.md +0 -7
  323. package/deps/simdutf/benchmarks/competition/hoehrmann/hoehrmann.h +0 -91
  324. package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16.h +0 -444
  325. package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16_tables.h +0 -13183
  326. package/deps/simdutf/benchmarks/competition/inoue2008/script.py +0 -73
  327. package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.cpp +0 -738
  328. package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.h +0 -293
  329. package/deps/simdutf/benchmarks/competition/u8u16/COPYRIGHT +0 -8
  330. package/deps/simdutf/benchmarks/competition/u8u16/Makefile +0 -44
  331. package/deps/simdutf/benchmarks/competition/u8u16/OSL3.0.txt +0 -169
  332. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/BOM_Profiler.h +0 -148
  333. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/i386_timer.h +0 -45
  334. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/ppc_timer.c +0 -34
  335. package/deps/simdutf/benchmarks/competition/u8u16/README +0 -56
  336. package/deps/simdutf/benchmarks/competition/u8u16/config/config_defs.h +0 -43
  337. package/deps/simdutf/benchmarks/competition/u8u16/config/g4_config.h +0 -27
  338. package/deps/simdutf/benchmarks/competition/u8u16/config/mmx_config.h +0 -16
  339. package/deps/simdutf/benchmarks/competition/u8u16/config/p4_config.h +0 -18
  340. package/deps/simdutf/benchmarks/competition/u8u16/config/p4_ideal_config.h +0 -16
  341. package/deps/simdutf/benchmarks/competition/u8u16/config/spu_config.h +0 -28
  342. package/deps/simdutf/benchmarks/competition/u8u16/config/ssse3_config.h +0 -20
  343. package/deps/simdutf/benchmarks/competition/u8u16/iconv_u8u16.c +0 -2
  344. package/deps/simdutf/benchmarks/competition/u8u16/lib/altivec_simd.h +0 -440
  345. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_basic_ops.py +0 -121
  346. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_half_operand_versions.py +0 -158
  347. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_test.py +0 -270
  348. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd.h +0 -141
  349. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_basic.h +0 -216
  350. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_built_in.h +0 -119
  351. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_modified.h +0 -2430
  352. package/deps/simdutf/benchmarks/competition/u8u16/lib/outline.txt +0 -39
  353. package/deps/simdutf/benchmarks/competition/u8u16/lib/spu_simd.h +0 -421
  354. package/deps/simdutf/benchmarks/competition/u8u16/lib/sse_simd.h +0 -836
  355. package/deps/simdutf/benchmarks/competition/u8u16/lib/stdint.h +0 -222
  356. package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_BE.c +0 -4
  357. package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_LE.c +0 -5
  358. package/deps/simdutf/benchmarks/competition/u8u16/proto/u8u16.py +0 -390
  359. package/deps/simdutf/benchmarks/competition/u8u16/src/Makefile +0 -18
  360. package/deps/simdutf/benchmarks/competition/u8u16/src/bytelex.h +0 -448
  361. package/deps/simdutf/benchmarks/competition/u8u16/src/charsets/ASCII_EBCDIC.h +0 -284
  362. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.c +0 -1975
  363. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.pdf +0 -0
  364. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.w +0 -2263
  365. package/deps/simdutf/benchmarks/competition/u8u16/src/multiliteral.h +0 -239
  366. package/deps/simdutf/benchmarks/competition/u8u16/src/u8u16.c +0 -232
  367. package/deps/simdutf/benchmarks/competition/u8u16/src/x8x16.c +0 -194
  368. package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.c +0 -193
  369. package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.h +0 -167
  370. package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.c +0 -288
  371. package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.h +0 -117
  372. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_g4.c +0 -2
  373. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_mmx.c +0 -2
  374. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4.c +0 -3
  375. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4_ideal.c +0 -2
  376. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_spu.c +0 -2
  377. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_ssse3.c +0 -3
  378. package/deps/simdutf/benchmarks/competition/u8u16/x8x16_p4.c +0 -2
  379. package/deps/simdutf/benchmarks/competition/utf8lut/LICENSE +0 -23
  380. package/deps/simdutf/benchmarks/competition/utf8lut/data/test_minimal.txt +0 -44
  381. package/deps/simdutf/benchmarks/competition/utf8lut/readme.md +0 -106
  382. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.cmd +0 -11
  383. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.sh +0 -13
  384. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_corr_tests.sh +0 -13
  385. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_example.sh +0 -13
  386. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_file_conv.sh +0 -14
  387. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_lib.sh +0 -11
  388. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_sample.sh +0 -8
  389. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_corr_tests.cmd +0 -12
  390. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_example.cmd +0 -13
  391. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_file_conv.cmd +0 -14
  392. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_lib.cmd +0 -11
  393. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_sample.cmd +0 -8
  394. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_corr_tests.cmd +0 -11
  395. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_example.cmd +0 -12
  396. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_file_conv.cmd +0 -13
  397. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_lib.cmd +0 -10
  398. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_sample.cmd +0 -9
  399. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/html_table.py +0 -25
  400. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/measure.py +0 -94
  401. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/resize.py +0 -20
  402. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_all.cmd +0 -2
  403. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_interm.cmd +0 -1
  404. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/CustomMemcpy.h +0 -75
  405. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/PerfDefs.h +0 -47
  406. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.cpp +0 -17
  407. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.h +0 -76
  408. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/AllProcessors.cpp +0 -35
  409. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.cpp +0 -117
  410. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.h +0 -210
  411. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferDecoder.h +0 -158
  412. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferEncoder.h +0 -104
  413. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorPlugins.h +0 -334
  414. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorSelector.h +0 -186
  415. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.cpp +0 -140
  416. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.h +0 -42
  417. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderProcess.h +0 -100
  418. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/Dfa.h +0 -57
  419. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.cpp +0 -85
  420. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.h +0 -27
  421. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderProcess.h +0 -126
  422. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/ProcessTrivial.h +0 -108
  423. package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.cpp +0 -139
  424. package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.h +0 -74
  425. package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.cpp +0 -65
  426. package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.h +0 -91
  427. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/CorrectnessTests.cpp +0 -772
  428. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/Example.cpp +0 -12
  429. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/FileConverter.cpp +0 -486
  430. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/iconv_sample.c +0 -162
  431. package/deps/simdutf/benchmarks/competition/utf8lut/src/utf8lut.h +0 -15
  432. package/deps/simdutf/benchmarks/competition/utf8sse4/fromutf8-sse.cpp +0 -292
  433. package/deps/simdutf/benchmarks/competition/utfcpp/LICENSE +0 -23
  434. package/deps/simdutf/benchmarks/competition/utfcpp/README.md +0 -1503
  435. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/checked.h +0 -335
  436. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/core.h +0 -338
  437. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp11.h +0 -103
  438. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp17.h +0 -103
  439. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/unchecked.h +0 -274
  440. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8.h +0 -34
  441. package/deps/simdutf/benchmarks/dataset/README.md +0 -155
  442. package/deps/simdutf/benchmarks/dataset/emoji.txt +0 -204
  443. package/deps/simdutf/benchmarks/dataset/scripts/utf8type.py +0 -40
  444. package/deps/simdutf/benchmarks/dataset/wikipedia_mars/Makefile +0 -80
  445. package/deps/simdutf/benchmarks/dataset/wikipedia_mars/convert_to_utf6.py +0 -20
  446. package/deps/simdutf/benchmarks/find/CMakeLists.txt +0 -6
  447. package/deps/simdutf/benchmarks/find/findbenchmark.cpp +0 -63
  448. package/deps/simdutf/benchmarks/find/findbenchmarker.h +0 -46
  449. package/deps/simdutf/benchmarks/shortbench.cpp +0 -555
  450. package/deps/simdutf/benchmarks/src/CMakeLists.txt +0 -52
  451. package/deps/simdutf/benchmarks/src/apple_arm_events.h +0 -1104
  452. package/deps/simdutf/benchmarks/src/benchmark.cpp +0 -3899
  453. package/deps/simdutf/benchmarks/src/benchmark.h +0 -317
  454. package/deps/simdutf/benchmarks/src/benchmark_base.cpp +0 -144
  455. package/deps/simdutf/benchmarks/src/benchmark_base.h +0 -98
  456. package/deps/simdutf/benchmarks/src/cmdline.cpp +0 -176
  457. package/deps/simdutf/benchmarks/src/cmdline.h +0 -35
  458. package/deps/simdutf/benchmarks/src/event_counter.h +0 -162
  459. package/deps/simdutf/benchmarks/src/linux-perf-events.h +0 -104
  460. package/deps/simdutf/benchmarks/stream.cpp +0 -209
  461. package/deps/simdutf/benchmarks/threaded.cpp +0 -123
  462. package/deps/simdutf/cmake/CPM.cmake +0 -1363
  463. package/deps/simdutf/cmake/JoinPaths.cmake +0 -23
  464. package/deps/simdutf/cmake/add_cpp_test.cmake +0 -68
  465. package/deps/simdutf/cmake/simdutf-config.cmake.in +0 -2
  466. package/deps/simdutf/cmake/simdutf-flags.cmake +0 -26
  467. package/deps/simdutf/cmake/toolchains-ci/riscv64-linux-gnu.cmake +0 -4
  468. package/deps/simdutf/cmake/toolchains-dev/README.md +0 -32
  469. package/deps/simdutf/cmake/toolchains-dev/aarch64.cmake +0 -14
  470. package/deps/simdutf/cmake/toolchains-dev/loongarch64.cmake +0 -22
  471. package/deps/simdutf/cmake/toolchains-dev/powerpc64.cmake +0 -16
  472. package/deps/simdutf/cmake/toolchains-dev/powerpc64le.cmake +0 -16
  473. package/deps/simdutf/cmake/toolchains-dev/riscv64.cmake +0 -16
  474. package/deps/simdutf/cmake/toolchains-dev/rvv-spike.cmake +0 -38
  475. package/deps/simdutf/doc/avx512.png +0 -0
  476. package/deps/simdutf/doc/logo.png +0 -0
  477. package/deps/simdutf/doc/logo.svg +0 -165
  478. package/deps/simdutf/doc/node2023.png +0 -0
  479. package/deps/simdutf/doc/shortinput.md +0 -78
  480. package/deps/simdutf/doc/utf16utf8.png +0 -0
  481. package/deps/simdutf/doc/utf8utf16.png +0 -0
  482. package/deps/simdutf/doc/widelogo.png +0 -0
  483. package/deps/simdutf/doxygen.py +0 -50
  484. package/deps/simdutf/fuzz/.clang-format +0 -9
  485. package/deps/simdutf/fuzz/CMakeLists.txt +0 -45
  486. package/deps/simdutf/fuzz/README.md +0 -168
  487. package/deps/simdutf/fuzz/atomic_base64.cpp +0 -448
  488. package/deps/simdutf/fuzz/base64.cpp +0 -278
  489. package/deps/simdutf/fuzz/build.sh +0 -83
  490. package/deps/simdutf/fuzz/conversion.cpp +0 -669
  491. package/deps/simdutf/fuzz/helpers/.clang-format-ignore +0 -1
  492. package/deps/simdutf/fuzz/helpers/common.h +0 -135
  493. package/deps/simdutf/fuzz/helpers/nameof.hpp +0 -1258
  494. package/deps/simdutf/fuzz/main.cpp +0 -72
  495. package/deps/simdutf/fuzz/minimize_and_cleanse.sh +0 -87
  496. package/deps/simdutf/fuzz/misc.cpp +0 -216
  497. package/deps/simdutf/fuzz/random_fuzz.sh +0 -154
  498. package/deps/simdutf/fuzz/roundtrip.cpp +0 -588
  499. package/deps/simdutf/fuzz/safe_conversion.cpp +0 -104
  500. package/deps/simdutf/riscv/Dockerfile +0 -16
  501. package/deps/simdutf/riscv/README.md +0 -24
  502. package/deps/simdutf/riscv/remove-docker-station +0 -8
  503. package/deps/simdutf/riscv/run-docker-station +0 -31
  504. package/deps/simdutf/scripts/.flake8 +0 -2
  505. package/deps/simdutf/scripts/Makefile +0 -2
  506. package/deps/simdutf/scripts/README_ADD_FUNCTION.md +0 -49
  507. package/deps/simdutf/scripts/add_function.py +0 -330
  508. package/deps/simdutf/scripts/amalgamation_tests.py +0 -156
  509. package/deps/simdutf/scripts/base64/Makefile +0 -2
  510. package/deps/simdutf/scripts/base64/README.md +0 -2
  511. package/deps/simdutf/scripts/base64/avx512.py +0 -76
  512. package/deps/simdutf/scripts/base64/neon_decode.py +0 -143
  513. package/deps/simdutf/scripts/base64/neon_generate_lut.py +0 -101
  514. package/deps/simdutf/scripts/base64/sse.py +0 -252
  515. package/deps/simdutf/scripts/base64/sseregular.py +0 -160
  516. package/deps/simdutf/scripts/base64/sseurl.py +0 -283
  517. package/deps/simdutf/scripts/base64/table.py +0 -59
  518. package/deps/simdutf/scripts/base64bench_print.py +0 -145
  519. package/deps/simdutf/scripts/benchmark-all.py +0 -119
  520. package/deps/simdutf/scripts/benchmark_print.py +0 -324
  521. package/deps/simdutf/scripts/check_feature_macros.py +0 -156
  522. package/deps/simdutf/scripts/check_typos.sh +0 -13
  523. package/deps/simdutf/scripts/clang_format.sh +0 -35
  524. package/deps/simdutf/scripts/clang_format_docker.sh +0 -38
  525. package/deps/simdutf/scripts/common.py +0 -24
  526. package/deps/simdutf/scripts/compilation_benchmark.py +0 -55
  527. package/deps/simdutf/scripts/compile_many_variations.sh +0 -64
  528. package/deps/simdutf/scripts/create_latex_table.py +0 -62
  529. package/deps/simdutf/scripts/docker/Dockerfile +0 -14
  530. package/deps/simdutf/scripts/docker/Makefile +0 -9
  531. package/deps/simdutf/scripts/docker/README.md +0 -30
  532. package/deps/simdutf/scripts/docker/llvm.gpg +0 -0
  533. package/deps/simdutf/scripts/ppc64_convert_utf16_to_utf8.py +0 -155
  534. package/deps/simdutf/scripts/prepare_doxygen.sh +0 -21
  535. package/deps/simdutf/scripts/release.py +0 -197
  536. package/deps/simdutf/scripts/shortinputplots.py +0 -97
  537. package/deps/simdutf/scripts/sse_convert_utf16_to_utf8.py +0 -422
  538. package/deps/simdutf/scripts/sse_convert_utf32_to_utf16.py +0 -105
  539. package/deps/simdutf/scripts/sse_utf8_utf16_decode.py +0 -186
  540. package/deps/simdutf/scripts/sse_validate_utf16le_proof.py +0 -137
  541. package/deps/simdutf/scripts/sse_validate_utf16le_testcases.py +0 -129
  542. package/deps/simdutf/scripts/table.py +0 -207
  543. package/deps/simdutf/scripts/tests/new.txt +0 -33
  544. package/deps/simdutf/scripts/tests/old.txt +0 -33
  545. package/deps/simdutf/scripts/tests/results.txt +0 -272
  546. package/deps/simdutf/simdutf.pc.in +0 -11
  547. package/deps/simdutf/singleheader/.flake8 +0 -2
  548. package/deps/simdutf/singleheader/CMakeLists.txt +0 -64
  549. package/deps/simdutf/singleheader/README-dev.md +0 -81
  550. package/deps/simdutf/singleheader/README.md +0 -19
  551. package/deps/simdutf/singleheader/amalgamate.py +0 -513
  552. package/deps/simdutf/singleheader/amalgamation_demo.c +0 -59
  553. package/deps/simdutf/singleheader/amalgamation_demo.cpp +0 -54
  554. package/deps/simdutf/singleheader/test-features.py +0 -262
  555. package/deps/simdutf/src/CMakeLists.txt +0 -78
  556. package/deps/simdutf/tests/CMakeLists.txt +0 -483
  557. package/deps/simdutf/tests/atomic_base64_tests.cpp +0 -2845
  558. package/deps/simdutf/tests/base64_tests.cpp +0 -3617
  559. package/deps/simdutf/tests/basic_fuzzer.cpp +0 -805
  560. package/deps/simdutf/tests/bele_tests.cpp +0 -182
  561. package/deps/simdutf/tests/constexpr_base64_tests.cpp +0 -387
  562. package/deps/simdutf/tests/convert_latin1_to_utf16be_tests.cpp +0 -52
  563. package/deps/simdutf/tests/convert_latin1_to_utf16le_tests.cpp +0 -80
  564. package/deps/simdutf/tests/convert_latin1_to_utf32_tests.cpp +0 -66
  565. package/deps/simdutf/tests/convert_latin1_to_utf8_tests.cpp +0 -120
  566. package/deps/simdutf/tests/convert_utf16_to_utf8_safe_tests.cpp +0 -203
  567. package/deps/simdutf/tests/convert_utf16_to_utf8_with_replacement_tests.cpp +0 -276
  568. package/deps/simdutf/tests/convert_utf16be_to_latin1_tests.cpp +0 -109
  569. package/deps/simdutf/tests/convert_utf16be_to_latin1_tests_with_errors.cpp +0 -136
  570. package/deps/simdutf/tests/convert_utf16be_to_utf32_tests.cpp +0 -193
  571. package/deps/simdutf/tests/convert_utf16be_to_utf32_with_errors_tests.cpp +0 -381
  572. package/deps/simdutf/tests/convert_utf16be_to_utf8_tests.cpp +0 -259
  573. package/deps/simdutf/tests/convert_utf16be_to_utf8_with_errors_tests.cpp +0 -266
  574. package/deps/simdutf/tests/convert_utf16le_to_latin1_tests.cpp +0 -148
  575. package/deps/simdutf/tests/convert_utf16le_to_latin1_tests_with_errors.cpp +0 -176
  576. package/deps/simdutf/tests/convert_utf16le_to_utf32_tests.cpp +0 -213
  577. package/deps/simdutf/tests/convert_utf16le_to_utf32_with_errors_tests.cpp +0 -318
  578. package/deps/simdutf/tests/convert_utf16le_to_utf8_tests.cpp +0 -343
  579. package/deps/simdutf/tests/convert_utf16le_to_utf8_with_errors_tests.cpp +0 -271
  580. package/deps/simdutf/tests/convert_utf32_to_latin1_tests.cpp +0 -111
  581. package/deps/simdutf/tests/convert_utf32_to_latin1_with_errors_tests.cpp +0 -96
  582. package/deps/simdutf/tests/convert_utf32_to_utf16be_tests.cpp +0 -148
  583. package/deps/simdutf/tests/convert_utf32_to_utf16be_with_errors_tests.cpp +0 -192
  584. package/deps/simdutf/tests/convert_utf32_to_utf16le_tests.cpp +0 -166
  585. package/deps/simdutf/tests/convert_utf32_to_utf16le_with_errors_tests.cpp +0 -215
  586. package/deps/simdutf/tests/convert_utf32_to_utf8_tests.cpp +0 -181
  587. package/deps/simdutf/tests/convert_utf32_to_utf8_with_errors_tests.cpp +0 -261
  588. package/deps/simdutf/tests/convert_utf8_to_latin1_tests.cpp +0 -516
  589. package/deps/simdutf/tests/convert_utf8_to_latin1_with_errors_tests.cpp +0 -579
  590. package/deps/simdutf/tests/convert_utf8_to_utf16be_tests.cpp +0 -412
  591. package/deps/simdutf/tests/convert_utf8_to_utf16be_with_errors_tests.cpp +0 -480
  592. package/deps/simdutf/tests/convert_utf8_to_utf16le_tests.cpp +0 -671
  593. package/deps/simdutf/tests/convert_utf8_to_utf16le_with_errors_tests.cpp +0 -455
  594. package/deps/simdutf/tests/convert_utf8_to_utf32_tests.cpp +0 -1204
  595. package/deps/simdutf/tests/convert_utf8_to_utf32_with_errors_tests.cpp +0 -337
  596. package/deps/simdutf/tests/convert_valid_utf16be_to_latin1_tests.cpp +0 -37
  597. package/deps/simdutf/tests/convert_valid_utf16be_to_utf32_tests.cpp +0 -97
  598. package/deps/simdutf/tests/convert_valid_utf16be_to_utf8_tests.cpp +0 -126
  599. package/deps/simdutf/tests/convert_valid_utf16le_to_latin1_tests.cpp +0 -71
  600. package/deps/simdutf/tests/convert_valid_utf16le_to_utf32_tests.cpp +0 -122
  601. package/deps/simdutf/tests/convert_valid_utf16le_to_utf8_tests.cpp +0 -244
  602. package/deps/simdutf/tests/convert_valid_utf32_to_latin1_tests.cpp +0 -49
  603. package/deps/simdutf/tests/convert_valid_utf32_to_utf16be_tests.cpp +0 -92
  604. package/deps/simdutf/tests/convert_valid_utf32_to_utf16le_tests.cpp +0 -114
  605. package/deps/simdutf/tests/convert_valid_utf32_to_utf8_tests.cpp +0 -109
  606. package/deps/simdutf/tests/convert_valid_utf8_to_latin1_tests.cpp +0 -84
  607. package/deps/simdutf/tests/convert_valid_utf8_to_utf16be_tests.cpp +0 -124
  608. package/deps/simdutf/tests/convert_valid_utf8_to_utf16le_tests.cpp +0 -221
  609. package/deps/simdutf/tests/convert_valid_utf8_to_utf32_tests.cpp +0 -155
  610. package/deps/simdutf/tests/count_utf16be.cpp +0 -64
  611. package/deps/simdutf/tests/count_utf16le.cpp +0 -61
  612. package/deps/simdutf/tests/count_utf8.cpp +0 -87
  613. package/deps/simdutf/tests/detect_encodings_tests.cpp +0 -312
  614. package/deps/simdutf/tests/embed/valid_utf8.txt +0 -1
  615. package/deps/simdutf/tests/embed_tests.cpp +0 -22
  616. package/deps/simdutf/tests/find_tests.cpp +0 -77
  617. package/deps/simdutf/tests/fixed_string_tests.cpp +0 -153
  618. package/deps/simdutf/tests/helpers/CMakeLists.txt +0 -25
  619. package/deps/simdutf/tests/helpers/compiletime_conversions.h +0 -222
  620. package/deps/simdutf/tests/helpers/fixed_string.h +0 -267
  621. package/deps/simdutf/tests/helpers/random_int.cpp +0 -30
  622. package/deps/simdutf/tests/helpers/random_int.h +0 -39
  623. package/deps/simdutf/tests/helpers/random_utf16.cpp +0 -123
  624. package/deps/simdutf/tests/helpers/random_utf16.h +0 -52
  625. package/deps/simdutf/tests/helpers/random_utf32.cpp +0 -41
  626. package/deps/simdutf/tests/helpers/random_utf32.h +0 -40
  627. package/deps/simdutf/tests/helpers/random_utf8.cpp +0 -93
  628. package/deps/simdutf/tests/helpers/random_utf8.h +0 -36
  629. package/deps/simdutf/tests/helpers/test.cpp +0 -231
  630. package/deps/simdutf/tests/helpers/test.h +0 -193
  631. package/deps/simdutf/tests/helpers/transcode_test_base.cpp +0 -1257
  632. package/deps/simdutf/tests/helpers/transcode_test_base.h +0 -683
  633. package/deps/simdutf/tests/helpers/utf16.h +0 -27
  634. package/deps/simdutf/tests/installation_tests/find/CMakeLists.txt +0 -43
  635. package/deps/simdutf/tests/installation_tests/from_fetch/CMakeLists.txt +0 -47
  636. package/deps/simdutf/tests/internal_tests.cpp +0 -27
  637. package/deps/simdutf/tests/null_safety_tests.cpp +0 -94
  638. package/deps/simdutf/tests/random_fuzzer.cpp +0 -779
  639. package/deps/simdutf/tests/readme_tests.cpp +0 -274
  640. package/deps/simdutf/tests/reference/CMakeLists.txt +0 -23
  641. package/deps/simdutf/tests/reference/decode_utf16.h +0 -81
  642. package/deps/simdutf/tests/reference/decode_utf32.h +0 -47
  643. package/deps/simdutf/tests/reference/encode_latin1.cpp +0 -1
  644. package/deps/simdutf/tests/reference/encode_latin1.h +0 -32
  645. package/deps/simdutf/tests/reference/encode_utf16.cpp +0 -49
  646. package/deps/simdutf/tests/reference/encode_utf16.h +0 -20
  647. package/deps/simdutf/tests/reference/encode_utf32.cpp +0 -1
  648. package/deps/simdutf/tests/reference/encode_utf32.h +0 -36
  649. package/deps/simdutf/tests/reference/encode_utf8.cpp +0 -1
  650. package/deps/simdutf/tests/reference/encode_utf8.h +0 -40
  651. package/deps/simdutf/tests/reference/validate_utf16.cpp +0 -60
  652. package/deps/simdutf/tests/reference/validate_utf16.h +0 -14
  653. package/deps/simdutf/tests/reference/validate_utf16_to_latin1.cpp +0 -35
  654. package/deps/simdutf/tests/reference/validate_utf16_to_latin1.h +0 -13
  655. package/deps/simdutf/tests/reference/validate_utf32.cpp +0 -27
  656. package/deps/simdutf/tests/reference/validate_utf32.h +0 -12
  657. package/deps/simdutf/tests/reference/validate_utf32_to_latin1.cpp +0 -27
  658. package/deps/simdutf/tests/reference/validate_utf32_to_latin1.h +0 -12
  659. package/deps/simdutf/tests/reference/validate_utf8.cpp +0 -82
  660. package/deps/simdutf/tests/reference/validate_utf8.h +0 -11
  661. package/deps/simdutf/tests/reference/validate_utf8_to_latin1.cpp +0 -43
  662. package/deps/simdutf/tests/reference/validate_utf8_to_latin1.h +0 -12
  663. package/deps/simdutf/tests/select_implementation.cpp +0 -43
  664. package/deps/simdutf/tests/simdutf_c_tests.cpp +0 -244
  665. package/deps/simdutf/tests/span_tests.cpp +0 -401
  666. package/deps/simdutf/tests/special_tests.cpp +0 -559
  667. package/deps/simdutf/tests/straight_c_test.c +0 -187
  668. package/deps/simdutf/tests/text_encoding_tests.cpp +0 -77
  669. package/deps/simdutf/tests/to_well_formed_utf16_tests.cpp +0 -377
  670. package/deps/simdutf/tests/utf8_length_from_utf16_tests.cpp +0 -202
  671. package/deps/simdutf/tests/validate_ascii_basic_tests.cpp +0 -165
  672. package/deps/simdutf/tests/validate_ascii_with_errors_tests.cpp +0 -77
  673. package/deps/simdutf/tests/validate_utf16be_basic_tests.cpp +0 -175
  674. package/deps/simdutf/tests/validate_utf16be_with_errors_tests.cpp +0 -188
  675. package/deps/simdutf/tests/validate_utf16le_basic_tests.cpp +0 -268
  676. package/deps/simdutf/tests/validate_utf16le_with_errors_tests.cpp +0 -274
  677. package/deps/simdutf/tests/validate_utf32_basic_tests.cpp +0 -92
  678. package/deps/simdutf/tests/validate_utf32_with_errors_tests.cpp +0 -114
  679. package/deps/simdutf/tests/validate_utf8_basic_tests.cpp +0 -178
  680. package/deps/simdutf/tests/validate_utf8_brute_force_tests.cpp +0 -88
  681. package/deps/simdutf/tests/validate_utf8_puzzler_tests.cpp +0 -33
  682. package/deps/simdutf/tests/validate_utf8_with_errors_tests.cpp +0 -228
  683. package/deps/simdutf/tools/CMakeLists.txt +0 -85
  684. package/deps/simdutf/tools/fastbase64.cpp +0 -250
  685. package/deps/simdutf/tools/sutf.cpp +0 -556
  686. package/deps/simdutf/tools/sutf.h +0 -40
  687. package/lib/tsconfig.tsbuildinfo +0 -1
@@ -1,2263 +0,0 @@
1
- \documentclass{cweb}
2
- \usepackage[in]{fullpage}
3
- % The following eliminates excess page ejects between sections.
4
- \def\CwebRankNoEject{1}
5
- \begin{document}
6
- \title{{\tt u8u16} - A High-Speed UTF-8 to UTF-16 Transcoder Using
7
- Parallel Bit Streams\\}
8
- \author{Robert D. Cameron\\
9
- Technical Report 2007-18\\
10
- (Revised Aug. 2008)\\
11
- School of Computing Science\\
12
- Simon Fraser University\\}
13
- \maketitle
14
- \tableofcontents
15
-
16
- @* Introduction.
17
-
18
- The |u8u16| program is a high-performance UTF-8 to UTF-16
19
- transcoder using a SIMD programming technique
20
- based on parallel bit streams. In essence, character
21
- data is processed in blocks of size |BLOCKSIZE|, where
22
- |BLOCKSIZE| is the number of bits that are held in a
23
- SIMD register. A set of parallel registers each contain
24
- one bit per character code unit for |BLOCKSIZE| consecutive
25
- code unit positions. For example, in UTF-8 processing,
26
- eight parallel registers are used for the eight
27
- individual bits of the UTF-8 code units.
28
-
29
- The |u8u16| transcoder written in this way takes
30
- advantage the pipelining and SIMD capabilities
31
- of modern processor architectures to achieve
32
- substantially better performance than byte-at-a-time
33
- conversion.
34
-
35
- The |u8u16| program is open source software written by
36
- Professor Rob Cameron of Simon Fraser University.
37
- International Characters, Inc., distributes and licenses
38
- |u8u16| to the general public under the terms of Open
39
- Software License 3.0. The program contains patent-pending
40
- technology of International Characters, Inc., licensed
41
- under the terms of OSL 3.0. Commercial licenses are also
42
- available.
43
-
44
- The |u8u16| program is written using the CWEB system
45
- for literate programming in C (Donald E. Knuth and
46
- Silvio Levy, The CWEB System of Structured Documentation,
47
- Addison Wesley, 1993).
48
-
49
- @* Idealized SIMD Library.
50
-
51
- The |u8u16| program is written using a library of idealized
52
- SIMD operations. The library simplifies implementation on
53
- multiple SIMD instruction set architectures by providing a
54
- common set of operations available on each architecture.
55
- It also simplifies programming of SIMD algorithms in
56
- general, by providing an orthogonal set of capabilities
57
- that apply at each field width 2, 4, 8, $ldots$, |BLOCKSIZE|, as
58
- well as providing half-operand modifiers to support
59
- inductive doubling algorithms. These simplifications
60
- lead to substantial reductions in instruction count for
61
- key algorithms such as transposition, bit counting and bit
62
- deletion.
63
-
64
- Beyond simplification of the programming task, the use
65
- of the idealized operations anticipates the day that SIMD
66
- architectures will provide native support for the inductive
67
- doubling extensions, at which point reduced instruction
68
- counts ought to translate directly to further
69
- performance improvements.
70
-
71
- The idealized library defines a type |SIMD_type| as the
72
- type of SIMD register values and a set of operations on
73
- these registers.
74
- Detailed documentation of the idealized library is provided
75
- elsewhere, but the following brief description introduces
76
- the operations used by |u8u16|.
77
-
78
- In general, operations specify a field width as part of
79
- the operation name. For example, |r0 = simd_add_16(r1, r2)|
80
- adds corresponding 16-bit fields of two register values |r1|
81
- and |r2| to produce a result register |r0|. Working with
82
- 128-bit registers, for example, 8 simultaneous additions are performed.
83
- Similarly, |simd_add_2| allows simultaneous addition of
84
- 64 sets of 2-bit fields, |simd_add_4| allows simultaneous addition
85
- of 32 sets of 4-bit fields and so on. Subtraction follows
86
- the same pattern: |simd_sub_8| provides for 16 simultaneous
87
- subtractions within 8-bit fields, while |simd_sub_128|
88
- provides subtraction of entire register values considered
89
- as 128-bit fields.
90
-
91
- Shift and rotate operations allow shifting of the field values
92
- of one register by independent shift counts in the fields of
93
- a second register. For example, |b = simd_rotl_4(a, s)| assigns
94
- to |b| the result of rotating each 4-bit field of |a| left by
95
- the corresponding 4-bit shift value from register |s|.
96
- Similarly |simd_sll_8| and |simd_srl_2| represent shift left logical
97
- of 8-bit fields and shift right logical of 2-bit fields.
98
- Howevever, shift values are interpreted as modulo field size;
99
- the maximum shift within an 8-bit field is thus 7.
100
-
101
- Each of the shift operations also has an immediate form,
102
- in which all fields are shifted by a particular constant value.
103
- Thus |simd_rotli_8(a, 2)| yields a result in which the 8-bit
104
- fields of a have each been rotated left 2 positions.
105
- These immediate forms are both convenient for programming and
106
- support efficient implementation.
107
-
108
- The prefix |sisd| (single-instruction single-data) is available
109
- for any of the arithmetic and shift operations when the
110
- entire register is to be considered as a single field.
111
- Thus, |sisd_slli(a, 1)| is equivalent to |simd_slli_64(a, 1)|
112
- when working with 64-bit SIMD registers (e.g., MMX) or
113
- |simd_slli_128(a, 1)| when working with 128-bit registers (Altivec, SSE).
114
-
115
- Half-operand modifiers permit convenient transitions between
116
- processing of $n/2$ bit fields and $n$ bit fields in
117
- inductive algorithms. Given an operation on $n$ bit fields,
118
- the |l| modifier specifies that only the low $n/2$ bits
119
- of each input field are to be used for the operation, while the
120
- |h| modifier specifies that the high $n/2$ bits are to be used
121
- (shifted into the low $n/2$ positions). For example,
122
- |cts2 = simd_add_2_lh(a, a)| specifies that the low bit of
123
- each 2-bit field of |a| is to be added to the high bit of
124
- each 2-bit field. Each 2-bit field of |cts2| is thus the
125
- count of the number of bits (0, 1 or 2) in the corresponding 2-bit
126
- field of |a|. Similarly, |cts4 = simd_add_4_lh(cts2, cts2)|
127
- determines each 4-bit field of |cts4| as the sum of the
128
- low 2-bit count and the high 2-bit count of each 4-bit field
129
- of |cts2|. The bit counts in 4-bit fields of
130
- |a| are thus computed after two inductive steps. Further
131
- operations |cts8 = simd_add_8_lh(cts4, cts4)|
132
- and |cts16 = simd_add_16_lh(cts8, cts8)| give the bit counts
133
- in the 16-bit fields of |a| after four total steps of
134
- inductive doubling.
135
-
136
- Merge and pack operations also support inductive doubling
137
- transitions. The |simd_mergeh_4(a, b)| operation returns the
138
- result of merging alternating 4-bit fields from the high halves
139
- of |a| and |b|, while |simd_mergel_4(a, b)| performs
140
- the complementary merge from the low halves. The |simd_pack_4(a, b)|
141
- packs each consecutive pair of 4-bit fields from the concatenation of
142
- |a| and |b| into a single 4-bit field by unsigned saturation.
143
-
144
- Bitwise logical operations are considered as simultaneous
145
- logical operations with an implicit field size of 1.
146
- The functions |simd_and|, |simd_or|, |simd_xor|, |simd_andc|
147
- and |simd_nor| each take two register values as arguments and
148
- return the register value representing the specified
149
- bitwise logical combination of the register values. The one-argument
150
- function |simd_not| provides bitwise negation, while the three-argument function
151
- |simd_if(a, b, c)| is equivalent to |simd_or(simd_and(v, a), simd_andc(c, a))|.
152
-
153
- The |simd_const| operations load a specified immediate constant
154
- into all fields of a register. Thus, |simd_const_8(12)| loads the
155
- value 12 into every byte, while |simd_const_1(1)| loads 1
156
- into every bit.
157
-
158
- Loading and storing of registers to and from memory is
159
- provided by the |sisd_load_unaligned|, |sisd_load_aligned|,
160
- |sisd_store_unaligned| and |sisd_store_aligned| operations.
161
- The |sisd_load_unaligned(addr)| operation returns the
162
- register value at memory address |addr|, which may have
163
- arbitrary alignment, while |sisd_load_aligned(addr)| requires
164
- that the address be aligned. The |sisd_store_unaligned(val, addr)|
165
- operations stores a value at an arbitrary memory address |addr|,
166
- while the aligned version again requires that the address
167
- be aligned on a natural boundary.
168
-
169
- The actual SIMD library used is selected based on
170
- a configuration option |U8U16_TARGET|. Definitions for
171
- particular targets are given in later sections.
172
-
173
- @s SIMD_type char
174
-
175
- @<Import idealized SIMD operations@>=
176
- #ifndef U8U16_TARGET
177
- #error "No U8U16_TARGET defined."
178
- #endif
179
-
180
- @* Calling Conventions - iconv-based.
181
-
182
- The |u8u16| routine uses an interface based on the |iconv|
183
- specification (|iconv| - codeset conversion function,
184
- The Single UNIX Specification, Version 2, 1997, The Open Group).
185
- However, the first argument to |iconv| (the conversion descriptor)
186
- is omitted as |u8u16| is specialized to the task of UTF-8 to UTF-16
187
- conversion.
188
-
189
- In normal operation, |u8u16| is given an input buffer |**inbuf|
190
- containing |*inbytesleft| bytes of UTF-8 data and an
191
- output buffer |**outbuf| having room for |*outbytesleft| bytes of
192
- UTF-16 output. UTF-8 data is converted and written to the
193
- output buffer so long as the data is valid in accord with
194
- UTF-8 requirements and neither the input nor output buffer is
195
- exhausted.
196
-
197
- @c
198
-
199
- size_t
200
- u8u16(char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
201
-
202
- @ On exit, the value of |*inbuf| will be adjusted to the first
203
- position after the last UTF-8 input sequence processed and the
204
- the value |*inbytesleft| will be reduced
205
- by the number of bytes of UTF-8 data converted.
206
- Similarly, the value of |*outbuf| will be adjusted to the first
207
- position after the last converted UTF-16 unit written to output and the
208
- value |*outbytesleft| will be reduced to indicate the remaining
209
- space available in the output buffer. Contents of the output
210
- buffer after the end of converted output (that is, at locations
211
- past the final value of |*outbuf|) are undefined.
212
- Preexisting values in these locations may or may not be preserved.
213
-
214
- If it is necessary to ensure that preexisting values
215
- of the output buffer past the final value of |*outbuf|
216
- are preserved, a buffered version of |u8u16| may be
217
- used. This routine is also used internally by |u8u16|
218
- when the size of the output buffer is potentially too
219
- small to hold UTF-16 data corresponding to |*inbytesleft|
220
- UTF-8 data.
221
-
222
- @c
223
- size_t
224
- buffered_u8u16(char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
225
-
226
- @ Return codes for each of |u8u16| and |buffered_u8u16| are
227
- set as follows.
228
- If the complete input buffer is successfully converted,
229
- |*inbytesleft| will be set to 0 and 0 will be returned as the
230
- result of the call. Otherwise -1 is returned and |errno| is set
231
- according to the encountered error. If conversion terminates
232
- due to an an invalid UTF-8 sequence, |errno| is set to |EILSEQ|
233
- If conversion terminates because insufficient space remains
234
- in the output buffer |errno| is set to |E2BIG|. If the
235
- input buffer ends with an incomplete UTF-8 code unit sequence,
236
- |errno| is set to |EINVAL|.
237
-
238
- For compatibility with |iconv|, if either |inbuf| or
239
- |*inbuf| is null, the call to |u8u16| is treated as
240
- an initialization call with an empty buffer and 0 is
241
- returned. If either |outbuf| or |*outbuf| is null,
242
- the output buffer is treated as having no room.
243
-
244
- The top-level structure of |u8u16| implements the initial
245
- null checks on the buffer pointers and determines
246
- whether the output buffer is of sufficient size
247
- to avoid overflow with the aligned output methods
248
- of |u8u16|. The efficient blok processing code
249
- of |u8u16| is used directly if the output buffer
250
- is guaranteed to be big enough; otherwise the
251
- buffered version of the converter is invoked.
252
-
253
- @c
254
- #include <stdlib.h>
255
- #include <errno.h>
256
- #include <stdint.h>
257
- #include <string.h>
258
- @<Import idealized SIMD operations@>@;
259
- @<Type declarations@>@;
260
- @h
261
- @<Endianness definitions@>@;
262
- size_t
263
- u8u16(char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) {
264
- @<Local variable declarations@>@;
265
- if (inbuf && *inbuf && outbuf && *outbuf) /* are all non-NULL */ @+ {
266
- if (@<Output buffer is guaranteed to be big enough@>)
267
- @<Main block processing algorithm of |u8u16|@>@;
268
- else return buffered_u8u16(inbuf, inbytesleft, outbuf, outbytesleft);
269
- }
270
- else if (inbuf == NULL || *inbuf == NULL || *inbytesleft == 0) @+
271
- return (size_t) 0;
272
- else {@+ errno = E2BIG; @+ return (size_t) -1; @+ }
273
- }
274
-
275
- @ In the case of aligned output, guaranteeing that the
276
- output buffer contains sufficient space
277
- to hold the UTF-16 data corresponding to |*inbytesleft| UTF-8 code
278
- units requires that the buffer
279
- contain 2 bytes for each input byte plus any
280
- additional space needed to reach the next |BytePack|
281
- alignment boundary.
282
-
283
- @d align_ceil(addr) ((addr + PACKSIZE - 1) & -PACKSIZE)
284
-
285
- @<Output buffer is guaranteed to be big enough@>=
286
- (intptr_t) *outbuf + *outbytesleft > align_ceil((intptr_t) *outbuf + 2*(*inbytesleft))
287
-
288
-
289
-
290
- @* Data Representations.
291
-
292
- @*1 Serial BytePacks and Parallel BitBlocks.
293
-
294
- The |BytePack| and the |BitBlock| are the two fundamental
295
- types used by the |u8u16| program for data held in
296
- SIMD registers, representing, respectively, the byte-oriented
297
- and bit-oriented views of character data.
298
-
299
- @d PACKSIZE sizeof(SIMD_type)
300
- @d BLOCKSIZE (sizeof(SIMD_type) * 8)
301
- @s BytePack char
302
- @s BitBlock char
303
- @<Type declarations@>=
304
- typedef SIMD_type BytePack;
305
- typedef SIMD_type BitBlock;
306
-
307
-
308
- @ A block of UTF-8 character data is initially loaded and represented
309
- as a series of eight consecutive bytepacks |U8s0|, |U8s1|, $\ldots$, |U8s7|.
310
- Upon transposition to bit-parallel form, the same data is represented
311
- as eight parallel bitblocks |u8bit0|, |u8bit1|, $\ldots$, |u8bit7|.
312
-
313
- @<Local variable declarations@>=
314
- BytePack U8s0, U8s1, U8s2, U8s3, U8s4, U8s5, U8s6, U8s7;
315
- BitBlock u8bit0, u8bit1, u8bit2, u8bit3, u8bit4, u8bit5, u8bit6, u8bit7;
316
-
317
- @ UTF-16 data may then be computed in the form of sixteen parallel
318
- bitblocks for each of the individual bits of UTF-16 code units.
319
- The registers |u16hi0|, |u16hi1|, $\ldots$, |u16hi7| are used to store
320
- this data for the high byte of each UTF-16
321
- code unit, while |u16lo0|, |u16lo1|, $\ldots$, |u16lo7| are used for
322
- the bits of the corresponding low byte.
323
- Upon conversion of the parallel bit stream data back to byte
324
- streams,
325
- registers |U16h0|, |U16h1|, $\ldots$, |U16h7| are used for the
326
- high byte of each UTF-16 code unit, while |U16l0|, |U16l1|, $\ldots$, |U16l7| are used for
327
- the corresponding low byte.
328
- Finally, the registers |U16s0|, |U16s1|, $\ldots$, |U16s15| are then used for
329
- UTF-16 data in serial code unit form (2 bytes per code unit) after
330
- merging the high and low byte streams.
331
-
332
- @<Local variable declarations@>=
333
- BitBlock u16hi0, u16hi1, u16hi2, u16hi3, u16hi4, u16hi5, u16hi6, u16hi7;
334
- BitBlock u16lo0, u16lo1, u16lo2, u16lo3, u16lo4, u16lo5, u16lo6, u16lo7;
335
- BytePack U16h0, U16h1, U16h2, U16h3, U16h4, U16h5, U16h6, U16h7;
336
- BytePack U16l0, U16l1, U16l2, U16l3, U16l4, U16l5, U16l6, U16l7;
337
- BytePack U16s0, U16s1, U16s2, U16s3, U16s4, U16s5, U16s6, U16s7,
338
- U16s8, U16s9, U16s10, U16s11, U16s12, U16s13, U16s14, U16s15;
339
-
340
- @*1 BitBlock Control Masks.
341
-
342
- @ Several block-based bitmasks are used to control processing.
343
- The |input_select_mask| is used to identify
344
- with a 1 bit those positions within the block to be included in
345
- processing. Normally consisting of a block of all ones during
346
- processing of full blocks, |input_select_mask| allows a final
347
- partial block to be processed using the logic for full blocks
348
- with a masking operation to zero out data positions that
349
- are out of range. In some algorithm variations, certain
350
- positions in the processing of full blocks may be zeroed out
351
- with |input_select_mask| in order to handle alignment or
352
- boundary issues.
353
-
354
- @<Local variable declarations@>=
355
- BitBlock input_select_mask;
356
-
357
- @ When UTF-8 validation identifies errors in the
358
- input stream, the positions of these errors are signalled
359
- by |error_mask|. Errors in the scope of |input_select_mask|
360
- represent and must be reported as actual errors in UTF-8
361
- sequence formation. In processing a final partial block,
362
- an error just past the final |input_select_mask| position
363
- indicates an incomplete UTF-8 sequence at the end of the input.
364
-
365
- @<Local variable declarations@>=
366
- BitBlock error_mask;
367
-
368
- @ The generation of UTF-16 data
369
- is controlled by |delmask|. One doublebyte code unit is
370
- to be generated for each nondeleted position; while no
371
- output is generated for deleted positions.
372
-
373
- @<Local variable declarations@>=
374
- BitBlock delmask;
375
-
376
- @* Endianness.
377
-
378
- @ Depending on the endianness of the machine, the
379
- ordering of bytes within SIMD registers may be from left to right
380
- (big endian) or right to left (little endian).
381
- Upon transformation
382
- to parallel bit streams, the ordering of bit values may
383
- similarly vary. To remove the dependencies of core bit-stream
384
- algorithms on endianness, logical ``shift forward'' and
385
- ``shift back'' operations are defined for bitblocks.
386
-
387
- @<Endianness definitions@>=
388
- #if BYTE_ORDER == BIG_ENDIAN
389
- #define sisd_sfl(blk, n) sisd_srl(blk, n)
390
- #define sisd_sbl(blk, n) sisd_sll(blk, n)
391
- #define sisd_sfli(blk, n) sisd_srli(blk, n)
392
- #define sisd_sbli(blk, n) sisd_slli(blk, n)
393
- #endif
394
- #if BYTE_ORDER == LITTLE_ENDIAN
395
- #define sisd_sfl(blk, n) sisd_sll(blk, n)
396
- #define sisd_sbl(blk, n) sisd_srl(blk, n)
397
- #define sisd_sfli(blk, n) sisd_slli(blk, n)
398
- #define sisd_sbli(blk, n) sisd_srli(blk, n)
399
- #endif
400
-
401
- #define bitblock_sfl(blk, n) sisd_sfl(blk, n)
402
- #define bitblock_sbl(blk, n) sisd_sbl(blk, n)
403
- #define bitblock_sfli(blk, n) sisd_sfli(blk, n)
404
- #define bitblock_sbli(blk, n) sisd_sbli(blk, n)
405
-
406
- @ The |u8u16| program may also be configured to assemble
407
- UTF-16 code units in accord with either the UTF-16BE
408
- conventions (the default) or those of UTF-16LE.
409
- To accomodate these variations, the
410
- |u16_merge0| and |u16_merge1| macros are defined to
411
- control assembly of UTF-16 doublebyte streams
412
- from the individual high and low byte streams.
413
-
414
- @<Endianness definitions@>=
415
- #if BYTE_ORDER == BIG_ENDIAN
416
- #ifdef UTF16_LE
417
- #define u16_merge0(a, b) simd_mergeh_8(b, a)
418
- #define u16_merge1(a, b) simd_mergel_8(b, a)
419
- #endif
420
- #ifndef UTF16_LE
421
- #define u16_merge0(a, b) simd_mergeh_8(a, b)
422
- #define u16_merge1(a, b) simd_mergel_8(a, b)
423
- #endif
424
- #endif
425
-
426
- #if BYTE_ORDER == LITTLE_ENDIAN
427
- #ifdef UTF16_LE
428
- #define u16_merge0(a, b) simd_mergel_8(a, b)
429
- #define u16_merge1(a, b) simd_mergeh_8(a, b)
430
- #endif
431
- #ifndef UTF16_LE
432
- #define u16_merge0(a, b) simd_mergel_8(b, a)
433
- #define u16_merge1(a, b) simd_mergeh_8(b, a)
434
- #endif
435
- #endif
436
-
437
- @* Transposition Between Serial Byte Streams and Parallel Bit Streams.
438
-
439
- Core to the |u8u16| transcoder are algorithms for converting
440
- serial byte streams of character data to bit parallel form (|s2p|),
441
- and the corresponding inverse transformation (|p2s|).
442
-
443
- Conversion of serial byte data to and from parallel bit streams
444
- is performed using either generic transposition algorithms
445
- for the idealized SIMD architecture or algorithms better tuned
446
- to specific processor architectures. The generic versions described
447
- here are the simplest and most efficient in terms of idealized
448
- operations, requiring a mere 24 operations for transposition
449
- of a data block in either direction. However, these versions
450
- use operations which must be simulated on existing architectures.
451
- The appendices provide implementations for common alternative
452
- architectures. The specific algorithm to be chosen is specified
453
- by the value of the preprocessor constants |S2P_ALGORITHM| and
454
- |P2S_ALGORITHM|.
455
-
456
- @*1 Serial To Parallel Transposition.
457
- The |s2p_ideal| transposition
458
- is achieved in three stages.
459
- In the first stage, the input stream of serial byte
460
- data is separated into two streams of serial nybble data.
461
- Eight consecutive registers of
462
- byte data |r0|, |r1|, |r2|, |r3|, |r4|, |r5|, |r6|, |r7| are
463
- transformed into two sets of four parallel registers: |bit0123_0|,
464
- |bit0123_1|, |bit0123_2|, |bit0123_3| for the high nybbles
465
- of each byte and |bit4567_0|,
466
- |bit4567_1|, |bit4567_2|, |bit4567_3| for the low nybbles of
467
- each byte. In the second stage, each stream of serial byte
468
- data is transformed into two streams of serial bit pairs.
469
- For example, serial nybble data in the four registers |bit0123_0|,
470
- |bit0123_1|, |bit0123_2|, and |bit0123_3| is transformed
471
- into two sets of parallel registers for the high and low bit pairs,
472
- namely |bit01_0| and |bit01_1| for bits 0 and 1 of the original byte
473
- data and |bit23_0| and |bit23_1| for bits 2 and 3 of the original
474
- byte data. The third stage completes the transposition process
475
- by transforming streams of bit pairs into the individual bit streams.
476
- Using the idealized architecture, each of these stages
477
- is implemented using a set of eight |simd_pack| operations.
478
-
479
- @d s2p_ideal(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7)
480
- {
481
- BitBlock bit0123_0, bit0123_1, bit0123_2, bit0123_3,
482
- bit4567_0, bit4567_1, bit4567_2, bit4567_3;
483
- BitBlock bit01_0, bit01_1, bit23_0, bit23_1, bit45_0, bit45_1, bit67_0, bit67_1;
484
- bit0123_0 = simd_pack_8_hh(s0, s1);
485
- bit0123_1 = simd_pack_8_hh(s2, s3);
486
- bit0123_2 = simd_pack_8_hh(s4, s5);
487
- bit0123_3 = simd_pack_8_hh(s6, s7);
488
- bit4567_0 = simd_pack_8_ll(s0, s1);
489
- bit4567_1 = simd_pack_8_ll(s2, s3);
490
- bit4567_2 = simd_pack_8_ll(s4, s5);
491
- bit4567_3 = simd_pack_8_ll(s6, s7);
492
- bit01_0 = simd_pack_4_hh(bit0123_0, bit0123_1);
493
- bit01_1 = simd_pack_4_hh(bit0123_2, bit0123_3);
494
- bit23_0 = simd_pack_4_ll(bit0123_0, bit0123_1);
495
- bit23_1 = simd_pack_4_ll(bit0123_2, bit0123_3);
496
- bit45_0 = simd_pack_4_hh(bit4567_0, bit4567_1);
497
- bit45_1 = simd_pack_4_hh(bit4567_2, bit4567_3);
498
- bit67_0 = simd_pack_4_ll(bit4567_0, bit4567_1);
499
- bit67_1 = simd_pack_4_ll(bit4567_2, bit4567_3);
500
- p0 = simd_pack_2_hh(bit01_0, bit01_1);
501
- p1 = simd_pack_2_ll(bit01_0, bit01_1);
502
- p2 = simd_pack_2_hh(bit23_0, bit23_1);
503
- p3 = simd_pack_2_ll(bit23_0, bit23_1);
504
- p4 = simd_pack_2_hh(bit45_0, bit45_1);
505
- p5 = simd_pack_2_ll(bit45_0, bit45_1);
506
- p6 = simd_pack_2_hh(bit67_0, bit67_1);
507
- p7 = simd_pack_2_ll(bit67_0, bit67_1);
508
- }
509
-
510
- @<Transpose to parallel bit streams |u8bit0| through |u8bit7|@>=
511
- #if (S2P_ALGORITHM == S2P_IDEAL)
512
- #if (BYTE_ORDER == BIG_ENDIAN)
513
- s2p_ideal(U8s0, U8s1, U8s2, U8s3, U8s4, U8s5, U8s6, U8s7, @/
514
- u8bit0, u8bit1, u8bit2, u8bit3, u8bit4, u8bit5, u8bit6, u8bit7)@;
515
- #endif
516
- #if (BYTE_ORDER == LITTLE_ENDIAN)
517
- s2p_ideal(U8s7, U8s6, U8s5, U8s4, U8s3, U8s2, U8s1, U8s0, @/
518
- u8bit0, u8bit1, u8bit2, u8bit3, u8bit4, u8bit5, u8bit6, u8bit7)@;
519
- #endif
520
- #endif
521
-
522
- @*1 Parallel to Serial Transposition.
523
- The inverse |p2s_ideal| transposition creates serial byte data by successively
524
- merging 8 parallel bit streams to become 4 streams of bit pairs,
525
- merging these 4 streams of bit pairs to become 2 streams of nybbles,
526
- and finally merging the 2 streams of nybbles into a serial byte stream.
527
-
528
- @d p2s_ideal(p0, p1, p2, p3, p4, p5, p6, p7, s0, s1, s2, s3, s4, s5, s6, s7)
529
- {
530
- BitBlock bit01_r0, bit01_r1, bit23_r0, bit23_r1, bit45_r0, bit45_r1, bit67_r0, bit67_r1;
531
- BitBlock bit0123_r0, bit0123_r1, bit0123_r2, bit0123_r3,
532
- bit4567_r0, bit4567_r1, bit4567_r2, bit4567_r3;
533
- bit01_r0 = simd_mergeh_1(p0, p1);
534
- bit01_r1 = simd_mergel_1(p0, p1);
535
- bit23_r0 = simd_mergeh_1(p2, p3);
536
- bit23_r1 = simd_mergel_1(p2, p3);
537
- bit45_r0 = simd_mergeh_1(p4, p5);
538
- bit45_r1 = simd_mergel_1(p4, p5);
539
- bit67_r0 = simd_mergeh_1(p6, p7);
540
- bit67_r1 = simd_mergel_1(p6, p7);
541
- bit0123_r0 = simd_mergeh_2(bit01_r0, bit23_r0);
542
- bit0123_r1 = simd_mergel_2(bit01_r0, bit23_r0);
543
- bit0123_r2 = simd_mergeh_2(bit01_r1, bit23_r1);
544
- bit0123_r3 = simd_mergel_2(bit01_r1, bit23_r1);
545
- bit4567_r0 = simd_mergeh_2(bit45_r0, bit67_r0);
546
- bit4567_r1 = simd_mergel_2(bit45_r0, bit67_r0);
547
- bit4567_r2 = simd_mergeh_2(bit45_r1, bit67_r1);
548
- bit4567_r3 = simd_mergel_2(bit45_r1, bit67_r1);
549
- s0 = simd_mergeh_4(bit0123_r0, bit4567_r0);
550
- s1 = simd_mergel_4(bit0123_r0, bit4567_r0);
551
- s2 = simd_mergeh_4(bit0123_r1, bit4567_r1);
552
- s3 = simd_mergel_4(bit0123_r1, bit4567_r1);
553
- s4 = simd_mergeh_4(bit0123_r2, bit4567_r2);
554
- s5 = simd_mergel_4(bit0123_r2, bit4567_r2);
555
- s6 = simd_mergeh_4(bit0123_r3, bit4567_r3);
556
- s7 = simd_mergel_4(bit0123_r3, bit4567_r3);
557
- }
558
-
559
- @<Transpose high UTF-16 bit streams to high byte stream@>=
560
- #if (P2S_ALGORITHM == P2S_IDEAL)
561
- #if (BYTE_ORDER == BIG_ENDIAN)
562
- p2s_ideal(u16hi0, u16hi1, u16hi2, u16hi3, u16hi4, u16hi5, u16hi6, u16hi7, @/
563
- U16h0, U16h1, U16h2, U16h3, U16h4, U16h5, U16h6, U16h7)@;
564
- #endif
565
- #if (BYTE_ORDER == LITTLE_ENDIAN)
566
- p2s_ideal(u16hi0, u16hi1, u16hi2, u16hi3, u16hi4, u16hi5, u16hi6, u16hi7, @/
567
- U16h7, U16h6, U16h5, U16h4, U16h3, U16h2, U16h1, U16h0)@;
568
- #endif
569
- #endif
570
-
571
- @ @<Transpose low UTF-16 bit streams to low byte stream@>=
572
- #if (P2S_ALGORITHM == P2S_IDEAL)
573
- #if (BYTE_ORDER == BIG_ENDIAN)
574
- p2s_ideal(u16lo0, u16lo1, u16lo2, u16lo3, u16lo4, u16lo5, u16lo6, u16lo7, @/
575
- U16l0, U16l1, U16l2, U16l3, U16l4, U16l5, U16l6, U16l7)@;
576
- #endif
577
- #if (BYTE_ORDER == LITTLE_ENDIAN)
578
- p2s_ideal(u16lo0, u16lo1, u16lo2, u16lo3, u16lo4, u16lo5, u16lo6, u16lo7, @/
579
- U16l7, U16l6, U16l5, U16l4, U16l3, U16l2, U16l1, U16l0)@;
580
- #endif
581
- #endif
582
-
583
-
584
- @ When a block of input consists of single and two-byte sequences only,
585
- the high 5 bits of the UTF-16 representation are always zero.
586
- Transposition of the remaining three bit streams
587
- (|16hi5| through |u16hi7| to high UTF-16 bytes is simplified
588
- in this case.
589
-
590
- @d p2s_567_ideal(p5, p6, p7, s0, s1, s2, s3, s4, s5, s6, s7)
591
- {
592
- BitBlock bit45_r0, bit45_r1, bit67_r0, bit67_r1;
593
- BitBlock bit4567_r0, bit4567_r1, bit4567_r2, bit4567_r3;
594
- bit45_r0 = simd_mergeh_1(simd_const_8(0), p5);
595
- bit45_r1 = simd_mergel_1(simd_const_8(0), p5);
596
- bit67_r0 = simd_mergeh_1(p6, p7);
597
- bit67_r1 = simd_mergel_1(p6, p7);
598
- bit4567_r0 = simd_mergeh_2(bit45_r0, bit67_r0);
599
- bit4567_r1 = simd_mergel_2(bit45_r0, bit67_r0);
600
- bit4567_r2 = simd_mergeh_2(bit45_r1, bit67_r1);
601
- bit4567_r3 = simd_mergel_2(bit45_r1, bit67_r1);
602
- s0 = simd_mergeh_4(simd_const_8(0), bit4567_r0);
603
- s1 = simd_mergel_4(simd_const_8(0), bit4567_r0);
604
- s2 = simd_mergeh_4(simd_const_8(0), bit4567_r1);
605
- s3 = simd_mergel_4(simd_const_8(0), bit4567_r1);
606
- s4 = simd_mergeh_4(simd_const_8(0), bit4567_r2);
607
- s5 = simd_mergel_4(simd_const_8(0), bit4567_r2);
608
- s6 = simd_mergeh_4(simd_const_8(0), bit4567_r3);
609
- s7 = simd_mergel_4(simd_const_8(0), bit4567_r3);
610
- }
611
-
612
- @<Transpose three high UTF-16 bit streams to high byte stream@>=
613
- #if (P2S_ALGORITHM == P2S_IDEAL)
614
- #if (BYTE_ORDER == BIG_ENDIAN)
615
- p2s_567_ideal(u16hi5, u16hi6, u16hi7, @/
616
- U16h0, U16h1, U16h2, U16h3, U16h4, U16h5, U16h6, U16h7)@;
617
- #endif
618
- #if (BYTE_ORDER == LITTLE_ENDIAN)
619
- p2s_567_ideal(u16hi5, u16hi6, u16hi7, @/
620
- U16h7, U16h6, U16h5, U16h4, U16h3, U16h2, U16h1, U16h0)@;
621
- #endif
622
- #endif
623
-
624
- @*1 Merging of High and Low Byte Streams.
625
- The high and low byte streams from parallel to serial conversion
626
- must be merged together to form doublebyte streams of UTF-16 data.
627
- The |u16_merge0| and |u16merge1| operations perform the merging
628
- in endian-dependent fashion.
629
-
630
- @<Merge high and low byte streams to doublebyte streams@>=
631
- U16s0 = u16_merge0(U16h0, U16l0);
632
- U16s1 = u16_merge1(U16h0, U16l0);
633
- U16s2 = u16_merge0(U16h1, U16l1);
634
- U16s3 = u16_merge1(U16h1, U16l1);
635
- U16s4 = u16_merge0(U16h2, U16l2);
636
- U16s5 = u16_merge1(U16h2, U16l2);
637
- U16s6 = u16_merge0(U16h3, U16l3);
638
- U16s7 = u16_merge1(U16h3, U16l3);
639
- U16s8 = u16_merge0(U16h4, U16l4);
640
- U16s9 = u16_merge1(U16h4, U16l4);
641
- U16s10 = u16_merge0(U16h5, U16l5);
642
- U16s11 = u16_merge1(U16h5, U16l5);
643
- U16s12 = u16_merge0(U16h6, U16l6);
644
- U16s13 = u16_merge1(U16h6, U16l6);
645
- U16s14 = u16_merge0(U16h7, U16l7);
646
- U16s15 = u16_merge1(U16h7, U16l7);
647
-
648
-
649
- @* Block Processing Structure for UTF-8 to UTF-16 Conversion.
650
-
651
- The overall structure of the UTF-8 to UTF-16 conversion algorithm
652
- consists of a main loop for processing blocks of UTF-8
653
- byte data. An ASCII short-cut optimization is first applied
654
- to process any significant run of UTF-8 data confined to the
655
- ASCII subset. When a region of input containing non-ASCII data is
656
- identified, it is then subject to block processing using
657
- parallel bit streams. After loading and transposing
658
- the block to parallel bit streams, UTF-8 validation
659
- constraints are checked and decoding to UTF-16 bit streams
660
- takes place. These bit streams must then be compressed
661
- from {\em u8-indexed} form (one to four positions per
662
- character based on UTF-8 sequence length) to {\em u16-indexed}
663
- form (one position per character for the basic multilingual
664
- plane, two positions for supplementary plane characters
665
- requiring surrogate pairs). The UTF-16 bit
666
- streams are then transposed to doublebyte streams and
667
- placed in the output buffer.
668
-
669
- @<Main block processing algorithm of |u8u16|@>=
670
- {
671
- unsigned char * U8data = (unsigned char *) *inbuf;
672
- unsigned char * U16out = (unsigned char *) *outbuf;
673
- size_t inbytes = *inbytesleft;
674
- while (inbytes > 0) {
675
- @<Apply ASCII short-cut optimization and continue@>;
676
- @<Load a block into serial bytepacks |U8s0| through |U8s7|@>@;
677
- @<Transpose to parallel bit streams |u8bit0| through |u8bit7|@>@;
678
- @<Apply validation, decoding and control logic on bit streams@>@;
679
- @<Compress bit streams and transpose to UTF-16 doublebyte streams@>@;
680
- if (bitblock_has_bit(error_mask)) @<Adjust to error position and signal the error@>@;
681
- @<Advance pointers and counters@>@;
682
- }
683
- @<Determine return values and exit@>@;
684
- }
685
-
686
- @ Local variables |u8advance| and |u16advance| are
687
- calculated in each block processing step to represent the
688
- number of bytes by which the input and output buffers are
689
- expected to advance. When a full block is loaded,
690
- the value of |u8advance| is set to |BLOCKSIZE|,
691
- possibly reduced by one to three bytes for an incomplete
692
- UTF-8 sequence at the end of the block. Otherwise,
693
- |u8advance| is set to the remaining |inbytes| when a partial
694
- block is loaded.
695
- The value of |u16advance| depends on the distribution
696
- of different lengths of UTF-8 sequences within
697
- the input block.
698
-
699
- @<Local variable declarations@>=
700
- intptr_t u8advance, u16advance;
701
-
702
- @ When a block is successfully converted, the pointers
703
- and counters are updated.
704
- @<Advance pointers and counters@>=
705
- inbytes -= u8advance;
706
- U8data += u8advance;
707
- U16out += u16advance;
708
-
709
- @ Validation, decoding and control logic is divided
710
- into three cases corresponding to the three possible
711
- maximum byte lengths for UTF-8 blocks containing non-ASCII
712
- input. This allows simplified processing in the
713
- event that input is confined to two-byte or three-byte
714
- sequences. A maximum sequence length of two is frequently found in
715
- applications dealing with international texts from Europe,
716
- the Middle East, Africa and South America. A maximum
717
- sequence length of three accounts for texts confined
718
- to the basic multilingual plane of Unicode, including all the normally
719
- used characters of languages world-wide.
720
- The final case deals with those rare blocks that require
721
- the additional logic complexity to process four-byte
722
- UTF-8 sequences corresponding to the supplementary planes
723
- of Unicode.
724
-
725
-
726
- @ @<Apply validation, decoding and control logic on bit streams@>=
727
- @<Compute classifications of UTF-8 bytes@>@;
728
- @<Compute scope classifications for common decoding@>@;
729
- @<Initiate validation for two-byte sequences@>@;
730
- @<Perform initial decoding of low eleven UTF-16 bit streams@>@;
731
- @<Identify deleted positions for basic multilingual plane giving |delmask|@>@;
732
- #ifndef NO_OPTIMIZATION
733
- if (@<Test whether the block is above the two-byte subplane@>) {
734
- @<Extend scope classifications for three-byte sequences@>@;
735
- @<Extend validation for errors in three-byte sequences@>@;
736
- @<Perform initial decoding of high five UTF-16 bit streams@>@;
737
- if (@<Test whether the block is above the basic multilingual plane@>) {
738
- @<Extend scope classifications for four-byte sequences@>@;
739
- @<Extend validation for errors in four-byte sequences@>@;
740
- @<Extend decoding for four-byte sequences@>@;
741
- @<Identify deleted positions for general Unicode giving |delmask|@>@;
742
- }
743
- }
744
- #endif
745
- #ifdef NO_OPTIMIZATION
746
- @<Extend scope classifications for three-byte sequences@>@;
747
- @<Extend validation for errors in three-byte sequences@>@;
748
- @<Perform initial decoding of high five UTF-16 bit streams@>@;
749
- @<Extend scope classifications for four-byte sequences@>@;
750
- @<Extend validation for errors in four-byte sequences@>@;
751
- @<Extend decoding for four-byte sequences@>@;
752
- @<Identify deleted positions for general Unicode giving |delmask|@>@;
753
- #endif
754
-
755
- @<Complete validation by checking for prefix-suffix mismatches@>@;
756
-
757
-
758
- @ Upon completion of the main block processing loop,
759
- all input data up to the cutoff point has been converted and
760
- written to the output buffer. Update the external pointers
761
- and counters and return.
762
-
763
- @<Determine return values and exit@> =
764
- *outbytesleft -= (intptr_t) U16out - (intptr_t) *outbuf;
765
- *inbuf = (char *) U8data;
766
- *inbytesleft = inbytes;
767
- *outbuf = (char *) U16out;
768
- @<Clear SIMD state@>;
769
- if (inbytes == 0) return (size_t) 0;
770
- else return (size_t) -1;
771
-
772
-
773
- @* Loading Block Data into SIMD Registers.
774
-
775
- @
776
- @<Load a block into serial bytepacks |U8s0| through |U8s7|@>=
777
- if (inbytes < BLOCKSIZE) {
778
- input_select_mask = sisd_sbl(simd_const_8(-1), sisd_from_int(BLOCKSIZE-inbytes));
779
- @<Load a block fragment@>@;
780
- }
781
- else {
782
- input_select_mask = simd_const_8(-1);
783
- @<Load a full block of UTF-8 byte data@>@;
784
- }
785
-
786
-
787
- @ Generic loading of a full block of UTF-8 byte
788
- data assumes that nonaligned loads are available.
789
-
790
- @<Load a full block of UTF-8 byte data@>=
791
- #ifdef INBUF_READ_NONALIGNED
792
- {
793
- BytePack * U8pack = (BytePack *) U8data;
794
- U8s0 = sisd_load_unaligned(&U8pack[0]);
795
- U8s1 = sisd_load_unaligned(&U8pack[1]);
796
- U8s2 = sisd_load_unaligned(&U8pack[2]);
797
- U8s3 = sisd_load_unaligned(&U8pack[3]);
798
- U8s4 = sisd_load_unaligned(&U8pack[4]);
799
- U8s5 = sisd_load_unaligned(&U8pack[5]);
800
- U8s6 = sisd_load_unaligned(&U8pack[6]);
801
- U8s7 = sisd_load_unaligned(&U8pack[7]);
802
- u8advance = BLOCKSIZE;
803
- @<Apply block shortening@>@;
804
- }
805
- #endif
806
-
807
- @ A block of UTF-8 data may end in an incomplete
808
- UTF-8 sequence with any |u8prefix| at the least significant position,
809
- with a |u8prefix3or4| at the second last position, or with a
810
- |u8prefix4| at the third last position. If so, |u8advance|
811
- is reduced by one, two or three positions, as appropriate.
812
-
813
- The logic here is simplified for correct UTF-8 input (assuming
814
- only one of these three conditions may be true);
815
- the |u8advance| value calculated must not be used until
816
- validation is complete.
817
-
818
- @d is_prefix_byte(byte) (byte >= 0xC0)
819
- @d is_prefix3or4_byte(byte) (byte >= 0xE0)
820
- @d is_prefix4_byte(byte) (byte >= 0xF0)
821
-
822
- @<Apply block shortening@>=
823
- u8advance -= is_prefix_byte(U8data[u8advance-1]) @|
824
- + 2 * is_prefix3or4_byte(U8data[u8advance-2]) @|
825
- + 3 * is_prefix4_byte(U8data[u8advance-3]);
826
-
827
-
828
-
829
- @*1 Loading the Final Block Fragment.
830
-
831
- @ When loading a block fragment at the end of the input
832
- buffer, care must be taken to avoid any possibility of
833
- a page fault. For a short fragment, a page fault could
834
- occur either by reading across an alignment boundary
835
- prior to the first byte or after the last byte.
836
-
837
- @d pack_base_addr(addr) ((BytePack *) (((intptr_t) (addr)) & (-PACKSIZE)))
838
-
839
- @<Load a block fragment@>=
840
- #ifdef INBUF_READ_NONALIGNED
841
- {
842
- BytePack * U8pack = (BytePack *) U8data;
843
- size_t full_packs = inbytes / PACKSIZE;
844
- size_t excess_bytes = inbytes % PACKSIZE;
845
- intptr_t U8data_offset = ((intptr_t) U8data) % PACKSIZE;
846
- BytePack partial_pack;
847
- if (excess_bytes == 0) partial_pack = simd_const_8(0);
848
- else if (U8data_offset + excess_bytes > PACKSIZE)
849
- /* unaligned load safe and required. */
850
- partial_pack = sisd_load_unaligned(&U8pack[full_packs]);
851
- else {
852
- /* aligned load required for safety */
853
- partial_pack = sisd_load_aligned(pack_base_addr(&U8pack[full_packs]));
854
- partial_pack = sisd_sbl(partial_pack, sisd_from_int(8*U8data_offset));
855
- }
856
- switch (full_packs) {
857
- case 0: U8s0 = partial_pack; break;
858
- case 1: U8s0 = sisd_load_unaligned(&U8pack[0]);
859
- U8s1 = partial_pack;
860
- break;
861
- case 2: U8s0 = sisd_load_unaligned(&U8pack[0]);
862
- U8s1 = sisd_load_unaligned(&U8pack[1]);
863
- U8s2 = partial_pack;
864
- break;
865
- case 3: U8s0 = sisd_load_unaligned(&U8pack[0]);
866
- U8s1 = sisd_load_unaligned(&U8pack[1]);
867
- U8s2 = sisd_load_unaligned(&U8pack[2]);
868
- U8s3 = partial_pack;
869
- break;
870
- case 4: U8s0 = sisd_load_unaligned(&U8pack[0]);
871
- U8s1 = sisd_load_unaligned(&U8pack[1]);
872
- U8s2 = sisd_load_unaligned(&U8pack[2]);
873
- U8s3 = sisd_load_unaligned(&U8pack[3]);
874
- U8s4 = partial_pack;
875
- break;
876
- case 5: U8s0 = sisd_load_unaligned(&U8pack[0]);
877
- U8s1 = sisd_load_unaligned(&U8pack[1]);
878
- U8s2 = sisd_load_unaligned(&U8pack[2]);
879
- U8s3 = sisd_load_unaligned(&U8pack[3]);
880
- U8s4 = sisd_load_unaligned(&U8pack[4]);
881
- U8s5 = partial_pack;
882
- break;
883
- case 6: U8s0 = sisd_load_unaligned(&U8pack[0]);
884
- U8s1 = sisd_load_unaligned(&U8pack[1]);
885
- U8s2 = sisd_load_unaligned(&U8pack[2]);
886
- U8s3 = sisd_load_unaligned(&U8pack[3]);
887
- U8s4 = sisd_load_unaligned(&U8pack[4]);
888
- U8s5 = sisd_load_unaligned(&U8pack[5]);
889
- U8s6 = partial_pack;
890
- break;
891
- case 7: U8s0 = sisd_load_unaligned(&U8pack[0]);
892
- U8s1 = sisd_load_unaligned(&U8pack[1]);
893
- U8s2 = sisd_load_unaligned(&U8pack[2]);
894
- U8s3 = sisd_load_unaligned(&U8pack[3]);
895
- U8s4 = sisd_load_unaligned(&U8pack[4]);
896
- U8s5 = sisd_load_unaligned(&U8pack[5]);
897
- U8s6 = sisd_load_unaligned(&U8pack[6]);
898
- U8s7 = partial_pack;
899
- break;
900
- }
901
- input_select_mask = sisd_sbl(simd_const_8(-1), sisd_from_int(BLOCKSIZE-inbytes));
902
- u8advance = inbytes;
903
- }
904
- #endif
905
-
906
-
907
-
908
- @* ASCII Optimization.
909
- Runs of ASCII characters can be converted to UTF-16 using
910
- an optimized process that avoids conversion to and from
911
- parallel bit streams. Given a bytepack of ASCII
912
- characters, two consecutive bytepacks of corresponding UTF-16
913
- output may be produced by merging a bytepack of all zeroes with
914
- the ASCII data. Further optimizations are applied for
915
- runs consisting of multiple bytepacks:
916
- converting to aligned output and
917
- using an unrolled loop to handle 4 bytepacks per iteration.
918
-
919
- @d align_offset(addr) (((intptr_t) addr) & (PACKSIZE - 1))
920
- @<Apply ASCII short-cut optimization and continue@>=
921
- #ifndef NO_ASCII_OPTIMIZATION
922
- BitBlock vec_0 = simd_const_8(0);
923
- if (inbytes > PACKSIZE) {
924
- U8s0 = sisd_load_unaligned((BytePack *) U8data);
925
- if (!simd_any_sign_bit_8(U8s0)) {
926
- intptr_t fill_to_align = PACKSIZE - align_offset(U16out);
927
- U16s0 = u16_merge0(vec_0, U8s0);
928
- sisd_store_unaligned(U16s0, (BytePack *) U16out);
929
- u8advance = fill_to_align/2;
930
- u16advance = fill_to_align;
931
- @<Advance pointers and counters@>@;
932
- while (inbytes > 4 * PACKSIZE) {
933
- BytePack * U8pack = (BytePack *) U8data;
934
- BytePack * U16pack = (BytePack *) U16out;
935
- U8s0 = sisd_load_unaligned(U8pack);
936
- U8s1 = sisd_load_unaligned(&U8pack[1]);
937
- U8s2 = sisd_load_unaligned(&U8pack[2]);
938
- U8s3 = sisd_load_unaligned(&U8pack[3]);
939
- if (simd_any_sign_bit_8(simd_or(simd_or(U8s0, U8s1), simd_or(U8s2, U8s3)))) break;
940
- sisd_store_aligned(u16_merge0(vec_0, U8s0), U16pack);
941
- sisd_store_aligned(u16_merge1(vec_0, U8s0), &U16pack[1]);
942
- sisd_store_aligned(u16_merge0(vec_0, U8s1), &U16pack[2]);
943
- sisd_store_aligned(u16_merge1(vec_0, U8s1), &U16pack[3]);
944
- sisd_store_aligned(u16_merge0(vec_0, U8s2), &U16pack[4]);
945
- sisd_store_aligned(u16_merge1(vec_0, U8s2), &U16pack[5]);
946
- sisd_store_aligned(u16_merge0(vec_0, U8s3), &U16pack[6]);
947
- sisd_store_aligned(u16_merge1(vec_0, U8s3), &U16pack[7]);
948
- u8advance = 4*PACKSIZE;
949
- u16advance = 8*PACKSIZE;
950
- @<Advance pointers and counters@>@;
951
- }
952
- while (inbytes > PACKSIZE) {
953
- BytePack * U16pack = (BytePack *) U16out;
954
- U8s0 = sisd_load_unaligned((BytePack *) U8data);
955
- if (simd_any_sign_bit_8(U8s0)) break;
956
- sisd_store_aligned(u16_merge0(vec_0, U8s0), U16pack);
957
- sisd_store_aligned(u16_merge1(vec_0, U8s0), &U16pack[1]);
958
- u8advance = PACKSIZE;
959
- u16advance = 2*PACKSIZE;
960
- @<Advance pointers and counters@>@;
961
- }
962
- }
963
- }
964
- if (inbytes <= PACKSIZE) {
965
- intptr_t U8data_offset = ((intptr_t) U8data) & (PACKSIZE - 1);
966
- if (U8data_offset + inbytes <= PACKSIZE) {
967
- /* Avoid a nonaligned load that could create a page fault. */
968
- U8s0 = sisd_sbl(sisd_load_aligned((BytePack *) pack_base_addr((intptr_t) U8data)),
969
- sisd_from_int(8*U8data_offset));
970
- }
971
- else U8s0 = sisd_load_unaligned((BytePack *) U8data);
972
- U8s0 = simd_and(U8s0, sisd_sbl(simd_const_8(-1),
973
- sisd_from_int(8 * (PACKSIZE - inbytes))));
974
- if (!simd_any_sign_bit_8(U8s0)) {
975
- sisd_store_unaligned(u16_merge0(vec_0, U8s0), (BytePack *) U16out);
976
- if (inbytes > PACKSIZE/2)
977
- sisd_store_unaligned(u16_merge1(vec_0, U8s0), (BytePack *) &U16out[PACKSIZE]);
978
- u8advance = inbytes;
979
- u16advance = 2*inbytes;
980
- @<Advance pointers and counters@>@;
981
- @<Determine return values and exit@>@;
982
- }
983
- }
984
- #endif
985
-
986
-
987
-
988
- @* UTF-8 Byte Classification.
989
-
990
- A set of bit streams are used to classify UTF-8 bytes
991
- based on their role in forming single and multibyte
992
- sequences. The |u8prefix| and |u8suffix| streams
993
- identify bytes that represent, respectively, prefix
994
- or suffix bytes of multibyte sequences, while
995
- the |u8unibyte| stream identifies those
996
- bytes that may be considered single-byte sequences,
997
- each representing a character by itself.
998
-
999
- Prefix bytes are further classified by whether
1000
- they code for 2, 3 or 4 byte sequences.
1001
-
1002
- @<Local variable declarations@>=
1003
- BitBlock u8unibyte, u8prefix, u8suffix, u8prefix2, u8prefix3or4, u8prefix3, u8prefix4;
1004
-
1005
- @ These bit streams are computed by straightforward logical combinations
1006
- reflecting the definition of UTF-8. However,
1007
- the streams are defined only for valid input positions
1008
- in accord with |input_select_mask|.
1009
-
1010
- @<Compute classifications of UTF-8 bytes@>=
1011
- {
1012
- BitBlock bit0_selected = simd_and(input_select_mask, u8bit0);
1013
- u8unibyte = simd_andc(input_select_mask, u8bit0);
1014
- u8prefix = simd_and(bit0_selected, u8bit1);
1015
- u8suffix = simd_andc(bit0_selected, u8bit1);
1016
- u8prefix3or4 = simd_and(u8prefix, u8bit2);
1017
- u8prefix2 = simd_andc(u8prefix, u8bit2);
1018
- u8prefix3 = simd_andc(u8prefix3or4, u8bit3);
1019
- u8prefix4 = simd_and(u8prefix3or4, u8bit3);
1020
- }
1021
-
1022
- @ When a block of UTF-8 input is confined to single-byte or two-byte
1023
- sequences only, processing may be considerably simplified.
1024
- A convenient bit test determines whether the logic for three- or four-byte
1025
- UTF-8 sequences is required.
1026
- @<Test whether the block is above the two-byte subplane@>=
1027
- bitblock_has_bit(u8prefix3or4)
1028
-
1029
- @ A similar bit test determines whether the logic sufficient
1030
- for the basic multilingual plane of Unicode (including up to three-byte
1031
- sequences) is sufficient, or whether the extended logic for the
1032
- four-byte sequences is required.
1033
- @<Test whether the block is above the basic multilingual plane@>=
1034
- bitblock_has_bit(u8prefix4)
1035
-
1036
- @*1 Scope streams.
1037
-
1038
- @ Scope streams represent expectations established
1039
- by prefix bytes. For example, |u8scope22| represents
1040
- the positions at which the second byte of a two-byte
1041
- sequence is expected based on the occurrence of
1042
- two-byte prefixes in the immediately preceding positions.
1043
- Other scope streams represent combined classifications.
1044
-
1045
- @<Local variable declarations@>=
1046
- BitBlock u8scope22, u8scope32, u8scope33, u8scope42, u8scope43, u8scope44;
1047
- BitBlock u8lastsuffix, u8lastbyte, u8surrogate;
1048
-
1049
- @ For the decoding operations common to all cases, the
1050
- |u8lastsuffix| and |u8lastbyte| classifications are needed.
1051
-
1052
- @<Compute scope classifications for common decoding@>=
1053
- u8scope22 = bitblock_sfli(u8prefix2, 1);
1054
- u8scope33 = bitblock_sfli(u8prefix3, 2);
1055
- u8scope44 = bitblock_sfli(u8prefix4, 3);
1056
- u8lastsuffix = simd_or(simd_or(u8scope22, u8scope33), u8scope44);
1057
- u8lastbyte = simd_or(u8unibyte, u8lastsuffix);
1058
-
1059
- @ When a block is known to include three-byte sequences, the
1060
- |u8scope32| stream is relevant.
1061
-
1062
- @<Extend scope classifications for three-byte sequences@>=
1063
- u8scope32 = bitblock_sfli(u8prefix3, 1);
1064
-
1065
- @ Additional classifications become relevant when a block is known
1066
- to include four-byte sequences.
1067
- @<Extend scope classifications for four-byte sequences@>=
1068
- u8scope42 = bitblock_sfli(u8prefix4, 1);
1069
- u8scope43 = bitblock_sfli(u8prefix4, 2);
1070
- u8surrogate = simd_or(u8scope42, u8scope44);
1071
-
1072
-
1073
-
1074
- @* UTF-8 Validation.
1075
-
1076
- Any UTF-8 errors in a block of input data are identified
1077
- through the process of UTF-8 validation. The result of
1078
- the process is an |error_mask| identifying those positions
1079
- at which an error is positively identified. Blocks are
1080
- assumed to start with complete UTF-8 sequences; any
1081
- suffix found at the beginning of a block is marked
1082
- as an error. An incomplete sequence at the end of the block
1083
- is not marked as an error if it is possible to produce
1084
- a legal sequence by adding one or more bytes.
1085
-
1086
- UTF-8 validation involves checking that UTF-8 suffixes match with
1087
- scope expectations, that invalid prefix codes |0xC0|, |0xC1|,
1088
- and |0xF5| through |0xFF| do not occur, and that constraints on
1089
- the first suffix byte following certain special prefixes are
1090
- obeyed, namely
1091
- |0xE0|:|0xA0|\,--|0xBF|, |0xED|:|0x80|\,--|0x9F|, |0xF0|:|0x90|\,--|0xBF|,
1092
- and |0xF4|:|0x80|\,--|0x8F|.
1093
- The variable |suffix_required_scope| is used to identify positions
1094
- at which a suffix byte is expected.
1095
-
1096
- @<Local variable declarations@>=
1097
- BitBlock suffix_required_scope;
1098
-
1099
- @ The logic is staged to initialize |error_mask|
1100
- and |suffix_required_scope| for errors in
1101
- two-byte sequences followed by additional logic stages for
1102
- three-byte sequences and four-byte sequences.
1103
-
1104
- For two-byte sequences, |error_mask| is initialized
1105
- by a check for invalid prefixes |0xC0| or |0xC1|, and
1106
- |suffix_required_scope| is initialized for the suffix
1107
- position of two-byte sequences.
1108
-
1109
- @<Initiate validation for two-byte sequences@>=
1110
- error_mask = simd_andc(u8prefix2, simd_or(simd_or(u8bit3, u8bit4),
1111
- simd_or(u8bit5, u8bit6)));
1112
- suffix_required_scope = u8scope22;
1113
-
1114
- @ Error checking for three-byte sequences involves local variable
1115
- |prefix_E0ED| to identify occurrences of r |0xE0| or |0xED| prefix
1116
- bytes, and |E0ED_constraint| to indicate positions at which the
1117
- required suffix constraint holds.
1118
-
1119
- @<Extend validation for errors in three-byte sequences@>=
1120
- {
1121
- BitBlock prefix_E0ED, E0ED_constraint;
1122
- prefix_E0ED = simd_andc(u8prefix3,
1123
- simd_or(simd_or(u8bit6, simd_xor(u8bit4, u8bit7)),
1124
- simd_xor(u8bit4, u8bit5)));
1125
- E0ED_constraint = simd_xor(bitblock_sfli(u8bit5, 1), u8bit2);
1126
- error_mask = simd_or(error_mask,
1127
- simd_andc(bitblock_sfli(prefix_E0ED, 1), E0ED_constraint));
1128
- suffix_required_scope = simd_or(u8lastsuffix, u8scope32);
1129
- }
1130
-
1131
- @ In the case of validation for general Unicode includling four-byte
1132
- sequences, additional local variables
1133
- |prefix_F5FF| (for any prefix byte |0xF5| through |0xFF|), |prefix_F0F4| and
1134
- |F0F4_constraint| are defined.
1135
-
1136
- @<Extend validation for errors in four-byte sequences@>=
1137
- {
1138
- BitBlock prefix_F5FF, prefix_F0F4, F0F4_constraint;
1139
- prefix_F5FF = simd_and(u8prefix4, simd_or(u8bit4,
1140
- simd_and(u8bit5,
1141
- simd_or(u8bit6, u8bit7))));
1142
- error_mask = simd_or(error_mask, prefix_F5FF);
1143
- prefix_F0F4 = simd_andc(u8prefix4, simd_or(u8bit4, simd_or(u8bit6, u8bit7)));
1144
- F0F4_constraint = simd_xor(bitblock_sfli(u8bit5, 1), simd_or(u8bit2, u8bit3));
1145
- error_mask = simd_or(error_mask, simd_andc(bitblock_sfli(prefix_F0F4, 1), F0F4_constraint));
1146
- suffix_required_scope = simd_or(suffix_required_scope,
1147
- simd_or(u8surrogate, u8scope43));
1148
- }
1149
-
1150
- @ Completion of validation requires that any mismatch between a
1151
- scope expectation and the occurrence of a suffix byte be identified.
1152
-
1153
- @<Complete validation by checking for prefix-suffix mismatches@>=
1154
- error_mask = simd_or(error_mask, simd_xor(suffix_required_scope, u8suffix));
1155
-
1156
- @* UTF-16 Bit Streams.
1157
-
1158
- Given validated UTF-8 bit streams, conversion to UTF-16 proceeds
1159
- by first determining a parallel set of 16 bit streams that
1160
- comprise a {\it u8-indexed} representation of UTF-16. This
1161
- representation defines the correct UTF-16 bit representation
1162
- at the following UTF-8 positions: at the single byte of
1163
- a single-byte sequence (|u8unibyte|), at the second byte of a two-byte
1164
- sequence (|u8scope22|), at the third byte of a three byte
1165
- sequence (|u8scope33|), and
1166
- at the second and fourth bytes of a four-byte sequence
1167
- (|u8scope42| and |u8scope42|). In the case of four byte
1168
- sequences, two UTF-16 units are produced, comprising the
1169
- UTF-16 surrogate pair for a codepoint beyond the basic
1170
- multilingual plane.
1171
-
1172
- The UTF-16 bit stream values at other positions
1173
- (|u8prefix2|, |u8prefix3|, |u8prefix4|, |u8scope32|, |u8scope43|)
1174
- are not significant; no UTF-16 output is to be generated from
1175
- these positions. Prior to generation of output, data bits
1176
- at these positions are to be deleted using the deletion
1177
- operations of the subsequent section. These deletions
1178
- produce the UTF-16 bit streams in {\it u16-indexed} form.
1179
-
1180
- @ Decoding is initiated by applying the common logic
1181
- for the low eleven bit streams identified by the
1182
- the |u8lastsuffix| and |u8lastbyte| conditions.
1183
-
1184
- @<Perform initial decoding of low eleven UTF-16 bit streams@>=
1185
- u16hi5 = simd_and(u8lastsuffix, bitblock_sfli(u8bit3, 1));
1186
- u16hi6 = simd_and(u8lastsuffix, bitblock_sfli(u8bit4, 1));
1187
- u16hi7 = simd_and(u8lastsuffix, bitblock_sfli(u8bit5, 1));
1188
- u16lo0 = simd_and(u8lastsuffix, bitblock_sfli(u8bit6, 1));
1189
- u16lo1 = simd_or(simd_and(u8unibyte, u8bit1), simd_and(u8lastsuffix, bitblock_sfli(u8bit7, 1)));
1190
- u16lo2 = simd_and(u8lastbyte, u8bit2);
1191
- u16lo3 = simd_and(u8lastbyte, u8bit3);
1192
- u16lo4 = simd_and(u8lastbyte, u8bit4);
1193
- u16lo5 = simd_and(u8lastbyte, u8bit5);
1194
- u16lo6 = simd_and(u8lastbyte, u8bit6);
1195
- u16lo7 = simd_and(u8lastbyte, u8bit7);
1196
-
1197
- @ For blocks containing three-byte sequences in the basic
1198
- multilingual plane, the high five UTF-16 bit streams become
1199
- significant at |u8scope33| positions.
1200
-
1201
- @<Perform initial decoding of high five UTF-16 bit streams@>=
1202
- u16hi0 = simd_and(u8scope33, bitblock_sfli(u8bit4, 2));
1203
- u16hi1 = simd_and(u8scope33, bitblock_sfli(u8bit5, 2));
1204
- u16hi2 = simd_and(u8scope33, bitblock_sfli(u8bit6, 2));
1205
- u16hi3 = simd_and(u8scope33, bitblock_sfli(u8bit7, 2));
1206
- u16hi4 = simd_and(u8scope33, bitblock_sfli(u8bit2, 1));
1207
-
1208
- @ Decoding for 4-byte UTF-8 sequences involves logic for
1209
- for UTF-16 surrogate pairs at the |u8scope42| and |u8scope44|
1210
- positions. However, the values for the low ten bit streams
1211
- at |u8scope44| positions have already been set according
1212
- to the common pattern for |u8lastsuffix| and |u8lastbyte|,
1213
- so it is only necessary to extend the definitions of
1214
- these ten bit streams with the logic for the first
1215
- UTF-16 code unit of the surrogate pair at the
1216
- |u8scope42| position. The high six UTF-16 bits
1217
- are set to a fixed bit pattern of |110110| or |110111| for
1218
- the respective surrogate pair positions.
1219
-
1220
- @<Extend decoding for four-byte sequences@>=
1221
- {BitBlock borrow1, borrow2;
1222
- u16hi0 = simd_or(u16hi0, u8surrogate);
1223
- u16hi1 = simd_or(u16hi1, u8surrogate);
1224
- u16hi3 = simd_or(u16hi3, u8surrogate);
1225
- u16hi4 = simd_or(u16hi4, u8surrogate);
1226
- u16hi5 = simd_or(u16hi5, u8scope44);
1227
- u16lo1 = simd_or(u16lo1, simd_and(u8scope42, simd_not(u8bit3)));
1228
- /* under |u8scope42|: |u16lo0| = |u8bit2| - |borrow|, where |borrow| = |u16lo1| */
1229
- u16lo0 = simd_or(u16lo0, simd_and(u8scope42, simd_xor(u8bit2, u16lo1)));
1230
- borrow1 = simd_andc(u16lo1, u8bit2); /* borrow for |u16hi7|. */
1231
- u16hi7 = simd_or(u16hi7, simd_and(u8scope42, simd_xor(bitblock_sfli(u8bit7, 1), borrow1)));
1232
- borrow2 = simd_andc(borrow1, bitblock_sfli(u8bit7, 1)); /* borrow for |u16hi6|. */
1233
- u16hi6 = simd_or(u16hi6, simd_and(u8scope42, simd_xor(bitblock_sfli(u8bit6, 1), borrow2)));
1234
- u16lo2 = simd_or(u16lo2, simd_and(u8scope42, u8bit4));
1235
- u16lo3 = simd_or(u16lo3, simd_and(u8scope42, u8bit5));
1236
- u16lo4 = simd_or(u16lo4, simd_and(u8scope42, u8bit6));
1237
- u16lo5 = simd_or(u16lo5, simd_and(u8scope42, u8bit7));
1238
- u16lo6 = simd_or(u16lo6, simd_and(u8scope42, bitblock_sbli(u8bit2, 1)));
1239
- u16lo7 = simd_or(u16lo7, simd_and(u8scope42, bitblock_sbli(u8bit3, 1)));
1240
- }
1241
-
1242
-
1243
- @* Compression to U16-Indexed Form by Deletion.
1244
-
1245
- As identified in the previous section, the UTF-16 bit streams
1246
- are initially defined in u8-indexed form, that is, with sets
1247
- of bits in one-to-one correspondence with UTF-8 bytes. However,
1248
- only one set of UTF-16 bits is required for encoding two or three-byte
1249
- UTF-8 sequences and only two sets are required for surrogate pairs
1250
- corresponding to four-byte UTF-8 positions. The |u8lastbyte|
1251
- (|unibyte|, |u8scope22|, |u8scope33|, and |u8scope44|) and
1252
- |u8scope42| streams mark the positions at which the correct UTF-16 bits
1253
- are computed. The bit sets at other positions must be deleted
1254
- to compress the streams to u16-indexed form. In addition,
1255
- any positions outside the |input_select_mask| must also be
1256
- deleted.
1257
-
1258
- In the case of input confined to the basic multilingual plane,
1259
- there are no |u8scope42| positions to consider in forming the
1260
- deletion mask.
1261
- @<Identify deleted positions for basic multilingual plane giving |delmask|@>=
1262
- delmask = simd_not(simd_and(input_select_mask, u8lastbyte));
1263
-
1264
- @ For general Unicode, however, |u8scope42| positions must
1265
- be not be deleted, provided that the the full 4-byte sequence
1266
- (including the corresponding |u8scope44| position) is within
1267
- the selected input area.
1268
-
1269
- @<Identify deleted positions for general Unicode giving |delmask|@>=
1270
- {BitBlock scope42_selected = bitblock_sbli(simd_and(u8scope44, input_select_mask), 2);
1271
- delmask = simd_not(simd_and(input_select_mask,
1272
- simd_or(u8lastbyte, scope42_selected)));
1273
- }
1274
-
1275
- @ Several algorithms to delete bits at positions marked by |delmask|
1276
- are possible. Preprocessor configuration options allow selection from
1277
- available alternatives for particular architectures.
1278
- In each case, however, the |u8u16| program is
1279
- designed to perform the initial deletion operations within
1280
- fields of size |PACKSIZE/2|. Within each such field, then,
1281
- non-deleted bits become compressed together at the front end of
1282
- the field, followed by zeroes for the deleted bits at the back end.
1283
- Upon transposition to doublebyte streams of UTF-16 code units,
1284
- each |PACKSIZE/2|-bit field becomes a single bytepack of UTF-16 data.
1285
- After writing each such bytepack to output, the output pointer
1286
- is advanced only by the number of nondeleted bytes. In this way,
1287
- the final compression to continuous u16-indexed code unit
1288
- streams is achieved as part of the output process.
1289
-
1290
- In the context of this general deletion strategy, algorithm
1291
- variations achieve deletion within |PACKSIZE/2| fields by
1292
- different methods. In each case, the deletion process is
1293
- controlled by deletion information computed from |delmask|.
1294
- Based on this information, deletion operations may be
1295
- applied to bit streams and/or as byte stream transformations.
1296
-
1297
- The following code describes the general structure, also
1298
- incorporating an optimization for the two-byte subplane
1299
- (UTF-8 inputs confined to one or two-byte sequences). In this case,
1300
- the high five bits of the UTF-16 representation are always zero,
1301
- so bit deletion operations for these streams can be eliminated.
1302
-
1303
- @<Compress bit streams and transpose to UTF-16 doublebyte streams@>=
1304
- @<Determine deletion information from |delmask|@>@;
1305
- @<Apply bit deletions to low eleven UTF-16 bit streams@>@;
1306
- if (!@<Test whether the block is above the two-byte subplane@>) {
1307
- @<Transpose three high UTF-16 bit streams to high byte stream@>@;
1308
- }
1309
- else {
1310
- @<Apply bit deletions to high five UTF-16 bit streams@>@;
1311
- @<Transpose high UTF-16 bit streams to high byte stream@>@;
1312
- }
1313
- @<Transpose low UTF-16 bit streams to low byte stream@>@;
1314
- @<Apply byte stream transformations@>@;
1315
- @<Merge high and low byte streams to doublebyte streams@>@;
1316
- @<Write compressed UTF-16 data@>@;
1317
-
1318
-
1319
- @*1 Deletion by Central Result Induction.
1320
-
1321
- @ The default implementation of deletion within |PACKSIZE/2| fields
1322
- is designed for a |BLOCKSIZE| of 128 and hence a |PACKSIZE| of 16.
1323
- Deletion within 8-bit fields requires three
1324
- operations per bit stream: conversion of 2-bit central deletion results
1325
- to 8-bit central deletion results in two steps of deletion by rotation
1326
- (central result induction), followed by conversion to 8-bit front-justified
1327
- results by a back-shift operation.
1328
-
1329
- @<Local variable declarations@>=
1330
- #ifdef __GNUC__
1331
- unsigned char u16_bytes_per_reg[16] __attribute__((aligned(16)));
1332
- #endif
1333
- #ifdef _MSC_VER
1334
- __declspec(align(16)) unsigned char u16_bytes_per_reg[16];
1335
- #endif
1336
- #if ((DOUBLEBYTE_DELETION == FROM_LEFT8) || (BIT_DELETION == ROTATION_TO_LEFT8))
1337
- BitBlock delcounts_2, delcounts_4, delcounts_8;
1338
- #endif
1339
- #if (BIT_DELETION == ROTATION_TO_LEFT8)
1340
- BitBlock rotl_2, rotl_4, sll_8;
1341
- #endif
1342
-
1343
- @ @<Determine deletion information from |delmask|@>=
1344
- #if ((DOUBLEBYTE_DELETION == FROM_LEFT8) || (BIT_DELETION == ROTATION_TO_LEFT8))
1345
- delcounts_2 = simd_add_2_lh(delmask, delmask);
1346
- delcounts_4 = simd_add_4_lh(delcounts_2, delcounts_2);
1347
- delcounts_8 = simd_add_8_lh(delcounts_4, delcounts_4);
1348
- sisd_store_aligned(simd_slli_8(simd_sub_8(simd_const_8(8), delcounts_8), 1),
1349
- (BytePack *) &u16_bytes_per_reg[0]);
1350
- #endif
1351
- #if (BIT_DELETION == ROTATION_TO_LEFT8)
1352
- rotl_2 = simd_if(simd_himask_4, delmask, sisd_srli(delmask, 1));
1353
- rotl_4 = simd_if(simd_himask_8, simd_sub_2(vec_0, delcounts_2), sisd_srli(delcounts_2, 2));
1354
- sll_8 = sisd_srli(delcounts_4, 4);
1355
- #endif
1356
-
1357
- @*1 Apply Deletions to Bit Streams.
1358
-
1359
- @ Perform the two rotations and one shift operation to yield
1360
- left-justified data within 8-bit fields.
1361
-
1362
- @<Apply bit deletions to high five UTF-16 bit streams@>=
1363
- #if (BIT_DELETION == ROTATION_TO_LEFT8)
1364
- u16hi0 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16hi0, rotl_2), rotl_4), sll_8);
1365
- u16hi1 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16hi1, rotl_2), rotl_4), sll_8);
1366
- u16hi2 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16hi2, rotl_2), rotl_4), sll_8);
1367
- u16hi3 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16hi3, rotl_2), rotl_4), sll_8);
1368
- u16hi4 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16hi4, rotl_2), rotl_4), sll_8);
1369
- #endif
1370
-
1371
- @ @<Apply bit deletions to low eleven UTF-16 bit streams@>=
1372
- #if (BIT_DELETION == ROTATION_TO_LEFT8)
1373
- u16hi5 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16hi5, rotl_2), rotl_4), sll_8);
1374
- u16hi6 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16hi6, rotl_2), rotl_4), sll_8);
1375
- u16hi7 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16hi7, rotl_2), rotl_4), sll_8);
1376
- u16lo0 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16lo0, rotl_2), rotl_4), sll_8);
1377
- u16lo1 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16lo1, rotl_2), rotl_4), sll_8);
1378
- u16lo2 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16lo2, rotl_2), rotl_4), sll_8);
1379
- u16lo3 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16lo3, rotl_2), rotl_4), sll_8);
1380
- u16lo4 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16lo4, rotl_2), rotl_4), sll_8);
1381
- u16lo5 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16lo5, rotl_2), rotl_4), sll_8);
1382
- u16lo6 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16lo6, rotl_2), rotl_4), sll_8);
1383
- u16lo7 = simd_sll_8(simd_rotl_4(simd_rotl_2(u16lo7, rotl_2), rotl_4), sll_8);
1384
- #endif
1385
-
1386
-
1387
-
1388
- @ @<Apply byte stream transformations@>=
1389
- /* No byte stream transformations are required in the default algorithm. */
1390
-
1391
-
1392
- @
1393
- @d unaligned_output_step(reg, bytes)
1394
- sisd_store_unaligned(reg, (BytePack *) &U16out[u16advance]);
1395
- u16advance += bytes;
1396
-
1397
- @<Write compressed UTF-16 data@>=
1398
- #ifdef OUTBUF_WRITE_NONALIGNED
1399
- u16advance = 0;
1400
- unaligned_output_step(U16s0, u16_bytes_per_reg[0])@;
1401
- unaligned_output_step(U16s1, u16_bytes_per_reg[1])@;
1402
- unaligned_output_step(U16s2, u16_bytes_per_reg[2])@;
1403
- unaligned_output_step(U16s3, u16_bytes_per_reg[3])@;
1404
- unaligned_output_step(U16s4, u16_bytes_per_reg[4])@;
1405
- unaligned_output_step(U16s5, u16_bytes_per_reg[5])@;
1406
- unaligned_output_step(U16s6, u16_bytes_per_reg[6])@;
1407
- unaligned_output_step(U16s7, u16_bytes_per_reg[7])@;
1408
- unaligned_output_step(U16s8, u16_bytes_per_reg[8])@;
1409
- unaligned_output_step(U16s9, u16_bytes_per_reg[9])@;
1410
- unaligned_output_step(U16s10, u16_bytes_per_reg[10])@;
1411
- unaligned_output_step(U16s11, u16_bytes_per_reg[11])@;
1412
- unaligned_output_step(U16s12, u16_bytes_per_reg[12])@;
1413
- unaligned_output_step(U16s13, u16_bytes_per_reg[13])@;
1414
- unaligned_output_step(U16s14, u16_bytes_per_reg[14])@;
1415
- unaligned_output_step(U16s15, u16_bytes_per_reg[15])@;
1416
- #endif
1417
-
1418
-
1419
-
1420
- @* Error Identification and Reporting.
1421
-
1422
- @ When a validation error is identified, the end of the last
1423
- complete UTF-8 sequence prior to the error must be determined
1424
- as the basis for calculating |u8advance| and |u16advance|.
1425
- The pointers and counters may then be updated and the error
1426
- return made.
1427
- @<Adjust to error position and signal the error@>=
1428
- {
1429
- BitBlock cutoff_mask, errbit, u8scopex2;
1430
- int errpos, u8u16errno;
1431
- @<Extend scope classifications for three-byte sequences@>@;
1432
- @<Extend scope classifications for four-byte sequences@>@;
1433
- u8scopex2 = simd_or(u8scope22, simd_or(u8scope32, u8scope42));
1434
- if (!bitblock_has_bit(simd_and(error_mask, input_select_mask))) {
1435
- /* Error is not in block; must be at end of input. */
1436
- u8u16errno = EINVAL;
1437
- }
1438
- else {
1439
- u8u16errno = EILSEQ;
1440
- }
1441
- errpos = count_forward_zeroes(error_mask);
1442
- u8advance = errpos - count_forward_zeroes(input_select_mask);
1443
- cutoff_mask = sisd_sfl(simd_const_8(-1), sisd_from_int(errpos));
1444
- errbit = simd_andc(error_mask, sisd_sfli(cutoff_mask, 1));
1445
- input_select_mask = simd_andc(input_select_mask, cutoff_mask);
1446
- u16advance = 2 * (bitblock_bit_count(simd_and(u8lastbyte, input_select_mask)) + @|
1447
- bitblock_bit_count(simd_and(u8scope42, input_select_mask)));
1448
- if (bitblock_has_bit(simd_and(u8scope44, errbit))) {
1449
- u8advance -= 3;
1450
- u16advance -= 2;
1451
- }
1452
- else if (bitblock_has_bit(simd_and(u8scope43, errbit))) {
1453
- u8advance -= 2;
1454
- u16advance -= 2;
1455
- }
1456
- else if (bitblock_has_bit(simd_and(u8scope33, errbit))) {
1457
- u8advance -= 2;
1458
- }
1459
- else if (bitblock_has_bit(simd_and(u8scopex2, errbit))) {
1460
- u8advance -= 1;
1461
- }
1462
-
1463
- @<Advance pointers and counters@>@;
1464
-
1465
- *outbytesleft -= (intptr_t) U16out - (intptr_t) *outbuf;
1466
- *inbytesleft = inbytes;
1467
- *inbuf = (char *) U8data;
1468
- *outbuf = (char *) U16out;
1469
- @<Clear SIMD state@>;
1470
- errno = u8u16errno;
1471
- return (size_t) -1;
1472
- }
1473
-
1474
- @* Buffered Version.
1475
-
1476
- @ The |buffered_u8u16| routine uses an internal buffer for
1477
- assembling UTF-16 code units prior to copying them to the
1478
- specified output buffer.
1479
-
1480
- @d is_suffix_byte(byte) (byte >= 0x80 && byte <= 0xBF)
1481
-
1482
- @c
1483
- size_t
1484
- buffered_u8u16(char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) {
1485
- if (inbuf && *inbuf && outbuf && *outbuf) /* are all non-NULL */ @+ {
1486
- unsigned char * inbuf_start = (unsigned char *) *inbuf;
1487
- size_t max_inbytes = min(3 * (*outbytesleft) / 2, *inbytesleft);
1488
- size_t internal_space = 2 * (*inbytesleft) + PACKSIZE;
1489
- size_t internal_space_left = internal_space;
1490
- char * internal_buf_start = (char *) malloc(internal_space);
1491
- char * internal_buf = internal_buf_start;
1492
- size_t return_code = u8u16(inbuf, &max_inbytes, &internal_buf, &internal_space_left);
1493
- intptr_t u16advance = internal_space - internal_space_left;
1494
- intptr_t u8advance = (intptr_t) (*inbuf) - (intptr_t) inbuf_start;
1495
- if (size_t(u16advance) > *outbytesleft) {
1496
- errno = E2BIG;
1497
- return_code = (size_t) -1;
1498
- do {
1499
- do {
1500
- u8advance--;
1501
- }
1502
- while (is_suffix_byte(inbuf_start[u8advance]));
1503
- if (is_prefix4_byte(inbuf_start[u8advance])) u16advance -= 4;
1504
- else u16advance -= 2;
1505
- } while (size_t(u16advance) > *outbytesleft);
1506
- }
1507
- memcpy(*outbuf, internal_buf_start, u16advance);
1508
- free(internal_buf_start);
1509
- *inbuf = (char *) inbuf_start + u8advance;
1510
- *inbytesleft -= u8advance;
1511
- *outbuf += u16advance;
1512
- *outbytesleft -= u16advance;
1513
- return return_code;
1514
- }
1515
- else if (inbuf == NULL || *inbuf == NULL || *inbytesleft == 0) @+
1516
- return (size_t) 0;
1517
- else {@+ errno = E2BIG; @+ return (size_t) -1; @+ }
1518
- }
1519
-
1520
-
1521
-
1522
- @* Alternative Transposition Algorithms Using Byte Packing/Merging.
1523
-
1524
- In the event that byte-level pack and merge operations
1525
- represent the finest granularity level available on a particular
1526
- SIMD target architecture, transposition using the
1527
- generic algorithms uses simulated implementations of
1528
- pack and merge operations at the bit, bit pair and nybble levels.
1529
- Better performance can be achieved by restructured
1530
- algorithms that directly use byte-level pack and merge.
1531
-
1532
- In the case of serial to parallel to serial transposition,
1533
- the restructured algorithm uses three stages of packing
1534
- data from consecutive bytes. In the first stage, individual
1535
- bits from consecutive bytes are paired up to produce
1536
- two parallel streams comprising the even bits and the
1537
- odd bits of the original byte data. In the second stage,
1538
- pairs of bits from consecutive bytes are paired up to
1539
- give runs of 4. In the final stage, runs of 4 are
1540
- paired up to generate bit streams.
1541
-
1542
- @d s2p_step(s0, s1, hi_mask, shift, p0, p1)
1543
- {
1544
- BitBlock t0, t1;
1545
- t0 = simd_pack_16_hh(s0, s1);
1546
- t1 = simd_pack_16_ll(s0, s1);
1547
- p0 = simd_if(hi_mask, t0, simd_srli_16(t1, shift));
1548
- p1 = simd_if(hi_mask, simd_slli_16(t0, shift), t1);
1549
- }
1550
-
1551
- @d s2p_bytepack(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7)
1552
- { BitBlock bit00224466_0, bit00224466_1, bit00224466_2, bit00224466_3;
1553
- BitBlock bit11335577_0, bit11335577_1, bit11335577_2, bit11335577_3;
1554
- BitBlock bit00004444_0, bit22226666_0, bit00004444_1, bit22226666_1;
1555
- BitBlock bit11115555_0, bit33337777_0, bit11115555_1, bit33337777_1;
1556
- s2p_step(s0, s1, mask_2, 1, bit00224466_0, bit11335577_0)@;
1557
- s2p_step(s2, s3, mask_2, 1, bit00224466_1, bit11335577_1)@;
1558
- s2p_step(s4, s5, mask_2, 1, bit00224466_2, bit11335577_2)@;
1559
- s2p_step(s6, s7, mask_2, 1, bit00224466_3, bit11335577_3)@;
1560
- s2p_step(bit00224466_0, bit00224466_1, mask_4, 2, bit00004444_0, bit22226666_0)@;
1561
- s2p_step(bit00224466_2, bit00224466_3, mask_4, 2, bit00004444_1, bit22226666_1)@;
1562
- s2p_step(bit11335577_0, bit11335577_1, mask_4, 2, bit11115555_0, bit33337777_0)@;
1563
- s2p_step(bit11335577_2, bit11335577_3, mask_4, 2, bit11115555_1, bit33337777_1)@;
1564
- s2p_step(bit00004444_0, bit00004444_1, mask_8, 4, p0, p4)@;
1565
- s2p_step(bit11115555_0, bit11115555_1, mask_8, 4, p1, p5)@;
1566
- s2p_step(bit22226666_0, bit22226666_1, mask_8, 4, p2, p6)@;
1567
- s2p_step(bit33337777_0, bit33337777_1, mask_8, 4, p3, p7)@;
1568
- }
1569
-
1570
- @<Transpose to parallel bit streams |u8bit0| through |u8bit7|@>=
1571
- #if (S2P_ALGORITHM == S2P_BYTEPACK)
1572
- {
1573
- BitBlock mask_2 = simd_himask_2;
1574
- BitBlock mask_4 = simd_himask_4;
1575
- BitBlock mask_8 = simd_himask_8;
1576
- #if (BYTE_ORDER == BIG_ENDIAN)
1577
- s2p_bytepack(U8s0, U8s1, U8s2, U8s3, U8s4, U8s5, U8s6, U8s7, @/
1578
- u8bit0, u8bit1, u8bit2, u8bit3, u8bit4, u8bit5, u8bit6, u8bit7)@;
1579
- #endif
1580
- #if (BYTE_ORDER == LITTLE_ENDIAN)
1581
- s2p_bytepack(U8s7, U8s6, U8s5, U8s4, U8s3, U8s2, U8s1, U8s0, @/
1582
- u8bit0, u8bit1, u8bit2, u8bit3, u8bit4, u8bit5, u8bit6, u8bit7)@;
1583
- #endif
1584
- }
1585
- #endif
1586
-
1587
-
1588
- @ Parallel to serial transposition reverses the process.
1589
-
1590
- @d p2s_step(p0, p1, hi_mask, shift, s0, s1)
1591
- {
1592
- BitBlock t0, t1;
1593
- t0 = simd_if(hi_mask, p0, simd_srli_16(p1, shift));
1594
- t1 = simd_if(hi_mask, simd_slli_16(p0, shift), p1);
1595
- s0 = simd_mergeh_8(t0, t1);
1596
- s1 = simd_mergel_8(t0, t1);
1597
- }
1598
-
1599
- @d p2s_bytemerge(p0, p1, p2, p3, p4, p5, p6, p7, s0, s1, s2, s3, s4, s5, s6, s7)
1600
- {
1601
- BitBlock bit00004444_0, bit22226666_0, bit00004444_1, bit22226666_1;
1602
- BitBlock bit11115555_0, bit33337777_0, bit11115555_1, bit33337777_1;
1603
- BitBlock bit00224466_0, bit00224466_1, bit00224466_2, bit00224466_3;
1604
- BitBlock bit11335577_0, bit11335577_1, bit11335577_2, bit11335577_3;
1605
- p2s_step(p0, p4, simd_himask_8, 4, bit00004444_0, bit00004444_1)@;
1606
- p2s_step(p1, p5, simd_himask_8, 4, bit11115555_0, bit11115555_1)@;
1607
- p2s_step(p2, p6, simd_himask_8, 4, bit22226666_0, bit22226666_1)@;
1608
- p2s_step(p3, p7, simd_himask_8, 4, bit33337777_0, bit33337777_1)@;
1609
- p2s_step(bit00004444_0, bit22226666_0, simd_himask_4, 2, bit00224466_0, bit00224466_1)@;
1610
- p2s_step(bit11115555_0, bit33337777_0, simd_himask_4, 2, bit11335577_0, bit11335577_1)@;
1611
- p2s_step(bit00004444_1, bit22226666_1, simd_himask_4, 2, bit00224466_2, bit00224466_3)@;
1612
- p2s_step(bit11115555_1, bit33337777_1, simd_himask_4, 2, bit11335577_2, bit11335577_3)@;
1613
- p2s_step(bit00224466_0, bit11335577_0, simd_himask_2, 1, s0, s1)@;
1614
- p2s_step(bit00224466_1, bit11335577_1, simd_himask_2, 1, s2, s3)@;
1615
- p2s_step(bit00224466_2, bit11335577_2, simd_himask_2, 1, s4, s5)@;
1616
- p2s_step(bit00224466_3, bit11335577_3, simd_himask_2, 1, s6, s7)@;
1617
- }
1618
-
1619
- @<Transpose high UTF-16 bit streams to high byte stream@>=
1620
- #if (P2S_ALGORITHM == P2S_BYTEMERGE)
1621
- #if (BYTE_ORDER == BIG_ENDIAN)
1622
- p2s_bytemerge(u16hi0, u16hi1, u16hi2, u16hi3, u16hi4, u16hi5, u16hi6, u16hi7, @/
1623
- U16h0, U16h1, U16h2, U16h3, U16h4, U16h5, U16h6, U16h7)@;
1624
- #endif
1625
- #if (BYTE_ORDER == LITTLE_ENDIAN)
1626
- p2s_bytemerge(u16hi0, u16hi1, u16hi2, u16hi3, u16hi4, u16hi5, u16hi6, u16hi7, @/
1627
- U16h7, U16h6, U16h5, U16h4, U16h3, U16h2, U16h1, U16h0)@;
1628
- #endif
1629
- #endif
1630
-
1631
- @ @<Transpose low UTF-16 bit streams to low byte stream@>=
1632
- #if (P2S_ALGORITHM == P2S_BYTEMERGE)
1633
- #if (BYTE_ORDER == BIG_ENDIAN)
1634
- p2s_bytemerge(u16lo0, u16lo1, u16lo2, u16lo3, u16lo4, u16lo5, u16lo6, u16lo7, @/
1635
- U16l0, U16l1, U16l2, U16l3, U16l4, U16l5, U16l6, U16l7)@;
1636
- #endif
1637
- #if (BYTE_ORDER == LITTLE_ENDIAN)
1638
- p2s_bytemerge(u16lo0, u16lo1, u16lo2, u16lo3, u16lo4, u16lo5, u16lo6, u16lo7, @/
1639
- U16l7, U16l6, U16l5, U16l4, U16l3, U16l2, U16l1, U16l0)@;
1640
- #endif
1641
- #endif
1642
-
1643
-
1644
- @ When a block of input consists of single and two-byte sequences only,
1645
- the high 5 bits of the UTF-16 representation are always zero.
1646
- Transposition of the remaining three bit streams
1647
- (|16hi5| through |u16hi7| to high UTF-16 bytes is simplified
1648
- in this case.
1649
-
1650
- @d p2s_halfstep(p1, hi_mask, shift, s0, s1)
1651
- {
1652
- BitBlock t0, t1;
1653
- t0 = simd_andc(sisd_srli(p1, shift), hi_mask);
1654
- t1 = simd_andc(p1, hi_mask);
1655
- s0 = simd_mergeh_8(t0, t1);
1656
- s1 = simd_mergel_8(t0, t1);
1657
- }
1658
- @d p2s_567_bytemerge(p5, p6, p7, s0, s1, s2, s3, s4, s5, s6, s7)
1659
- {
1660
- BitBlock bit22226666_0, bit22226666_1;
1661
- BitBlock bit11115555_0, bit33337777_0, bit11115555_1, bit33337777_1;
1662
- BitBlock bit00224466_0, bit00224466_1, bit00224466_2, bit00224466_3;
1663
- BitBlock bit11335577_0, bit11335577_1, bit11335577_2, bit11335577_3;
1664
- p2s_halfstep(p5, simd_himask_8, 4, bit11115555_0, bit11115555_1)@;
1665
- p2s_halfstep(p6, simd_himask_8, 4, bit22226666_0, bit22226666_1)@;
1666
- p2s_halfstep(p7, simd_himask_8, 4, bit33337777_0, bit33337777_1)@;
1667
- p2s_halfstep(bit22226666_0, simd_himask_4, 2, bit00224466_0, bit00224466_1)@;
1668
- p2s_step(bit11115555_0, bit33337777_0, simd_himask_4, 2, bit11335577_0, bit11335577_1)@;
1669
- p2s_halfstep(bit22226666_1, simd_himask_4, 2, bit00224466_2, bit00224466_3)@;
1670
- p2s_step(bit11115555_1, bit33337777_1, simd_himask_4, 2, bit11335577_2, bit11335577_3)@;
1671
- p2s_step(bit00224466_0, bit11335577_0, simd_himask_2, 1, s0, s1)@;
1672
- p2s_step(bit00224466_1, bit11335577_1, simd_himask_2, 1, s2, s3)@;
1673
- p2s_step(bit00224466_2, bit11335577_2, simd_himask_2, 1, s4, s5)@;
1674
- p2s_step(bit00224466_3, bit11335577_3, simd_himask_2, 1, s6, s7)@;
1675
- }
1676
-
1677
- @<Transpose three high UTF-16 bit streams to high byte stream@>=
1678
- #if (P2S_ALGORITHM == P2S_BYTEMERGE)
1679
- #if (BYTE_ORDER == BIG_ENDIAN)
1680
- p2s_567_bytemerge(u16hi5, u16hi6, u16hi7, @/
1681
- U16h0, U16h1, U16h2, U16h3, U16h4, U16h5, U16h6, U16h7)@;
1682
- #endif
1683
- #if (BYTE_ORDER == LITTLE_ENDIAN)
1684
- p2s_567_bytemerge(u16hi5, u16hi6, u16hi7, @/
1685
- U16h7, U16h6, U16h5, U16h4, U16h3, U16h2, U16h1, U16h0)@;
1686
- #endif
1687
- #endif
1688
-
1689
-
1690
- @* Altivec-Specific Implementation.
1691
-
1692
- @ @<Import idealized SIMD operations@>=
1693
- #if (U8U16_TARGET == ALTIVEC_TARGET)
1694
- #include "../lib/altivec_simd.h"
1695
- #endif
1696
-
1697
-
1698
- @ @<Load a full block of UTF-8 byte data@>=
1699
- #if ((U8U16_TARGET == ALTIVEC_TARGET) || (U8U16_TARGET == SPU_TARGET))
1700
- {
1701
- BitBlock r0, r1, r2, r3, r4, r5, r6, r7, r8;
1702
- BitBlock input_shiftl = vec_lvsl(0, U8data);
1703
- r0 = vec_ld(0, U8data);
1704
- r1 = vec_ld(16, U8data);
1705
- r2 = vec_ld(32, U8data);
1706
- U8s0 = simd_permute(r0, r1, input_shiftl);
1707
- r3 = vec_ld(48, U8data);
1708
- U8s1 = simd_permute(r1, r2, input_shiftl);
1709
- r4 = vec_ld(64, U8data);
1710
- U8s2 = simd_permute(r2, r3, input_shiftl);
1711
- r5 = vec_ld(80, U8data);
1712
- U8s3 = simd_permute(r3, r4, input_shiftl);
1713
- r6 = vec_ld(96, U8data);
1714
- U8s4 = simd_permute(r4, r5, input_shiftl);
1715
- r7 = vec_ld(112, U8data);
1716
- U8s5 = simd_permute(r5, r6, input_shiftl);
1717
- /* Do not load beyond known input area (bytes 0 to 127).*/
1718
- r8 = vec_ld(127, U8data);
1719
- U8s6 = simd_permute(r6, r7, input_shiftl);
1720
- U8s7 = simd_permute(r7, r8, input_shiftl);
1721
- u8advance = BLOCKSIZE;
1722
- @<Apply block shortening@>@;
1723
- }
1724
- #endif
1725
-
1726
-
1727
- @ Load a block fragment as a full block with possible
1728
- junk after the fragment end position. Make sure to
1729
- avoid any access past the end of buffer.
1730
-
1731
- @d min(x, y) ((x) < (y) ? (x) : (y))
1732
-
1733
- @<Load a block fragment@>=
1734
- #if ((U8U16_TARGET == ALTIVEC_TARGET) || (U8U16_TARGET == SPU_TARGET))
1735
- {
1736
- BitBlock r0, r1, r2, r3, r4, r5, r6, r7, r8;
1737
- BitBlock input_shiftl = vec_lvsl(0, U8data);
1738
- int last_byte = inbytes - 1;
1739
- r0 = vec_ld(0, U8data);
1740
- r1 = vec_ld(min(16, last_byte), U8data);
1741
- r2 = vec_ld(min(32, last_byte), U8data);
1742
- U8s0 = simd_permute(r0, r1, input_shiftl);
1743
- r3 = vec_ld(min(48, last_byte), U8data);
1744
- U8s1 = simd_permute(r1, r2, input_shiftl);
1745
- r4 = vec_ld(min(64, last_byte), U8data);
1746
- U8s2 = simd_permute(r2, r3, input_shiftl);
1747
- r5 = vec_ld(min(80, last_byte), U8data);
1748
- U8s3 = simd_permute(r3, r4, input_shiftl);
1749
- r6 = vec_ld(min(96, last_byte), U8data);
1750
- U8s4 = simd_permute(r4, r5, input_shiftl);
1751
- r7 = vec_ld(min(112, last_byte), U8data);
1752
- U8s5 = simd_permute(r5, r6, input_shiftl);
1753
- r8 = vec_ld(min(127, last_byte), U8data);
1754
- U8s6 = simd_permute(r6, r7, input_shiftl);
1755
- U8s7 = simd_permute(r7, r8, input_shiftl);
1756
- u8advance = inbytes;
1757
- }
1758
- #endif
1759
-
1760
- @
1761
- @<Apply ASCII short-cut optimization and continue@>=
1762
- #if ((U8U16_TARGET == ALTIVEC_TARGET) || (U8U16_TARGET == SPU_TARGET))
1763
- BitBlock vec_0 = simd_const_8(0);
1764
- if (inbytes > PACKSIZE) {
1765
- BitBlock r0, r1, r2, r3, r4;
1766
- BitBlock input_shiftl = vec_lvsl(0, U8data);
1767
- U8s0 = simd_permute(vec_ld(0, U8data), vec_ld(15, U8data), input_shiftl);
1768
- if (!simd_any_sign_bit_8(U8s0)) {
1769
- int fill_to_align = PACKSIZE - align_offset(U16out);
1770
- U16s0 = u16_merge0(vec_0, U8s0);
1771
- pending = simd_permute(pending, U16s0, vec_lvsr(0, U16out));
1772
- vec_st(pending, 0, U16out);
1773
- u8advance = fill_to_align/2;
1774
- u16advance = fill_to_align;
1775
- @<Advance pointers and counters@>@;
1776
- input_shiftl = vec_lvsl(0, U8data);
1777
- r0 = vec_ld(0, U8data);
1778
- while (inbytes > 4 * PACKSIZE) {
1779
- BytePack * U16pack = (BytePack *) U16out;
1780
- r1 = vec_ld(16, U8data);
1781
- r2 = vec_ld(32, U8data);
1782
- U8s0 = simd_permute(r0, r1, input_shiftl);
1783
- r3 = vec_ld(48, U8data);
1784
- U8s1 = simd_permute(r1, r2, input_shiftl);
1785
- r4 = vec_ld(64, U8data);
1786
- U8s2 = simd_permute(r2, r3, input_shiftl);
1787
- U8s3 = simd_permute(r3, r4, input_shiftl);
1788
- if (simd_any_sign_bit_8(simd_or(simd_or(U8s0, U8s1), simd_or(U8s2, U8s3)))) break;
1789
- sisd_store_aligned(u16_merge0(vec_0, U8s0), U16pack);
1790
- sisd_store_aligned(u16_merge1(vec_0, U8s0), &U16pack[1]);
1791
- sisd_store_aligned(u16_merge0(vec_0, U8s1), &U16pack[2]);
1792
- sisd_store_aligned(u16_merge1(vec_0, U8s1), &U16pack[3]);
1793
- sisd_store_aligned(u16_merge0(vec_0, U8s2), &U16pack[4]);
1794
- sisd_store_aligned(u16_merge1(vec_0, U8s2), &U16pack[5]);
1795
- sisd_store_aligned(u16_merge0(vec_0, U8s3), &U16pack[6]);
1796
- pending = u16_merge1(vec_0, U8s3);
1797
- sisd_store_aligned(pending, &U16pack[7]);
1798
- u8advance = 4*PACKSIZE;
1799
- u16advance = 8*PACKSIZE;
1800
- @<Advance pointers and counters@>@;
1801
- r0 = r4;
1802
- }
1803
- while (inbytes > PACKSIZE) {
1804
- BytePack * U16pack = (BytePack *) U16out;
1805
- r1 = vec_ld(16, U8data);
1806
- U8s0 = simd_permute(r0, r1, input_shiftl);
1807
- if (simd_any_sign_bit_8(U8s0)) break;
1808
- sisd_store_aligned(u16_merge0(vec_0, U8s0), U16pack);
1809
- pending = u16_merge1(vec_0, U8s0);
1810
- sisd_store_aligned(pending, &U16pack[1]);
1811
- u8advance = PACKSIZE;
1812
- u16advance = 2*PACKSIZE;
1813
- @<Advance pointers and counters@>@;
1814
- r0 = r1;
1815
- }
1816
- }
1817
- }
1818
- #endif
1819
-
1820
-
1821
- @*1 Deletion by Central Result Induction/Packed Permutation Vector.
1822
-
1823
- Permutation vectors allow selection of arbitrary sets of bytes
1824
- in a single |simd_permute| operation.
1825
- For example, select all the
1826
- nondeleted bytes into leftmost positions.
1827
-
1828
- Packed permutation vectors consist of 2 consecutive 32-byte
1829
- vectors packed into a single vector of 32 nybbles. Permutation
1830
- values are confined to the range 0..15.
1831
-
1832
- Packed permutation vectors can be computed by deleting indices
1833
- of deleted elements. These deletions are applied in nybble space,
1834
- operating on 32 elements at a time. This provides a 4:1 advantage
1835
- over applying operations in doublebyte space, and a 2:1 advantage
1836
- over applying operations in byte space.
1837
-
1838
- Given a 128-bit delmask, the following logic computes 4 32-position
1839
- packed permutation vectors that can be used to compute 8-position
1840
- left deletion results.
1841
-
1842
- @<Determine deletion information from |delmask|@>=
1843
- #if (BYTE_DELETION == BYTE_DEL_BY_PERMUTE_TO_LEFT8)
1844
- {
1845
- BitBlock d0, d1, q0, q1, p0, p1;
1846
- BitBlock delmask_hi4 = simd_srli_8(delmask, 4);
1847
- /* Step 1. 2->4 central deletion */
1848
- d0 = simd_permute(del2_4_shift_tbl, del2_4_shift_tbl, delmask_hi4);
1849
- d1 = simd_permute(del2_4_shift_tbl, del2_4_shift_tbl, delmask);
1850
- q0 = simd_mergeh_8(d0, d1); /* 0A00 0B00 0C00 0D00 pattern 0..63 */
1851
- q1 = simd_mergel_8(d0, d1); /* 0A00 0B00 0C00 0D00 pattern 64 .. 128 */
1852
- p0 = simd_srli_8(q0, 4); /* 0000 0A00 0000 0C00 pattern 0..63 */
1853
- p1 = simd_srli_8(q1, 4); /* 0000 0A00 0000 0C00 pattern 64 .. 128 */
1854
- l8perm0 = simd_rotl_8(packed_identity, simd_mergeh_8(p0, q0));
1855
- l8perm1 = simd_rotl_8(packed_identity, simd_mergel_8(p0, q0));
1856
- l8perm2 = simd_rotl_8(packed_identity, simd_mergeh_8(p1, q1));
1857
- l8perm3 = simd_rotl_8(packed_identity, simd_mergel_8(p1, q1));
1858
- /* Step 2. 4->8 central deletion */
1859
- d0 = simd_permute(del4_8_rshift_tbl, del4_8_rshift_tbl, delmask_hi4);
1860
- d1 = simd_permute(del4_8_lshift_tbl, del4_8_lshift_tbl, delmask);
1861
- p0 = simd_mergeh_8(d0, d1); /* -4*(A+B), 4*(C+D), -4*(E+F) for 0..63 */
1862
- p1 = simd_mergel_8(d0, d1);
1863
- l8perm0 = simd_rotl_16(l8perm0, simd_mergeh_8(simd_const_8(0), p0));
1864
- l8perm1 = simd_rotl_16(l8perm1, simd_mergel_8(simd_const_8(0), p0));
1865
- l8perm2 = simd_rotl_16(l8perm2, simd_mergeh_8(simd_const_8(0), p1));
1866
- l8perm3 = simd_rotl_16(l8perm3, simd_mergel_8(simd_const_8(0), p1));
1867
- /* Step 3. 8 central -> 8 left deletion */
1868
- d0 = simd_permute(del8_shift_tbl, del8_shift_tbl, delmask_hi4);
1869
- p0 = simd_mergeh_8(simd_const_8(0), d0);
1870
- p1 = simd_mergel_8(simd_const_8(0), d0);
1871
- l8perm0 = simd_rotl_32(l8perm0, simd_mergeh_8(simd_const_8(0), p0));
1872
- l8perm1 = simd_rotl_32(l8perm1, simd_mergel_8(simd_const_8(0), p0));
1873
- l8perm2 = simd_rotl_32(l8perm2, simd_mergeh_8(simd_const_8(0), p1));
1874
- l8perm3 = simd_rotl_32(l8perm3, simd_mergel_8(simd_const_8(0), p1));
1875
- }
1876
- #endif
1877
- #if (DOUBLEBYTE_DELETION == ALTIVEC_FROM_LEFT8)
1878
- {
1879
- BitBlock delmask_hi4 = simd_srli_8(delmask, 4);
1880
- delcounts_8 = simd_add_8(simd_permute(bits_per_nybble_tbl, bits_per_nybble_tbl, delmask_hi4),
1881
- simd_permute(bits_per_nybble_tbl, bits_per_nybble_tbl, delmask));
1882
- u16_bytes_8 = simd_slli_8(simd_sub_8(simd_const_8(8), delcounts_8), 1); /* $2 \times (8 - d)$ */
1883
- }
1884
- #endif
1885
-
1886
- @ Tables for computing deletion info.
1887
- @<Local variable declarations@>=
1888
- #if ((U8U16_TARGET == ALTIVEC_TARGET) || (U8U16_TARGET == SPU_TARGET))
1889
- BitBlock bits_per_nybble_tbl =
1890
- (BitBlock) {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4};
1891
- #endif
1892
- #if (BYTE_DELETION == BYTE_DEL_BY_PERMUTE_TO_LEFT8)
1893
- BitBlock packed_identity =
1894
- (BitBlock) {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
1895
- 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF};
1896
- BitBlock del2_4_shift_tbl =
1897
- (BitBlock) {0, 0, 4, 4, 0x40, 0x40, 0x44, 0x44, 0, 0, 4, 4, 0x40, 0x40, 0x44, 0x44};
1898
- BitBlock del4_8_rshift_tbl =
1899
- (BitBlock) {0, 0xFC, 0xFC, 0xF8, 0, 0xFC, 0xFC, 0xF8,
1900
- 0, 0xFC, 0xFC, 0xF8, 0, 0xFC, 0xFC, 0xF8};
1901
- BitBlock del4_8_lshift_tbl =
1902
- (BitBlock) {0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8};
1903
- BitBlock del8_shift_tbl = // 4 * bitcount
1904
- (BitBlock) {0, 4, 4, 8, 4, 8, 8, 12, 4, 8, 8, 12, 8, 12, 12, 16};
1905
- BitBlock l8perm0, l8perm1, l8perm2, l8perm3;
1906
- #endif
1907
- #if ((BIT_DELETION != ROTATION_TO_LEFT8) && (DOUBLEBYTE_DELETION == ALTIVEC_FROM_LEFT8))
1908
- BitBlock delcounts_8, u16_bytes_8;
1909
- #endif
1910
-
1911
- @ @<Apply bit deletions to high five UTF-16 bit streams@>=
1912
- #if (BYTE_DELETION == BYTE_DEL_BY_PERMUTE_TO_LEFT8)
1913
- { /* No operations on bit streams. */
1914
- }
1915
- #endif
1916
-
1917
- @ @<Apply bit deletions to low eleven UTF-16 bit streams@>=
1918
- #if (BYTE_DELETION == BYTE_DEL_BY_PERMUTE_TO_LEFT8)
1919
- { /* No operations on bit streams. */
1920
- }
1921
- #endif
1922
-
1923
- @
1924
- @d unpack_packed_permutation(packed, high_perm, low_perm)
1925
- {
1926
- BitBlock even_perms = simd_srli_8(packed, 4);
1927
- BitBlock odd_perms = simd_andc(packed, simd_himask_8);
1928
- high_perm = simd_mergeh_8(even_perms, odd_perms);
1929
- low_perm = simd_mergel_8(even_perms, odd_perms);
1930
- }
1931
-
1932
- @<Apply byte stream transformations@>=
1933
- #if (BYTE_DELETION == BYTE_DEL_BY_PERMUTE_TO_LEFT8)
1934
- {
1935
- BitBlock high_perm, low_perm;
1936
- unpack_packed_permutation(l8perm0, high_perm, low_perm)@;
1937
- U16l0 = simd_permute(U16l0, U16l0, high_perm);
1938
- U16h0 = simd_permute(U16h0, U16h0, high_perm);
1939
- U16l1 = simd_permute(U16l1, U16l1, low_perm);
1940
- U16h1 = simd_permute(U16h1, U16h1, low_perm);
1941
- unpack_packed_permutation(l8perm1, high_perm, low_perm)@;
1942
- U16l2 = simd_permute(U16l2, U16l2, high_perm);
1943
- U16h2 = simd_permute(U16h2, U16h2, high_perm);
1944
- U16l3 = simd_permute(U16l3, U16l3, low_perm);
1945
- U16h3 = simd_permute(U16h3, U16h3, low_perm);
1946
- unpack_packed_permutation(l8perm2, high_perm, low_perm)@;
1947
- U16l4 = simd_permute(U16l4, U16l4, high_perm);
1948
- U16h4 = simd_permute(U16h4, U16h4, high_perm);
1949
- U16l5 = simd_permute(U16l5, U16l5, low_perm);
1950
- U16h5 = simd_permute(U16h5, U16h5, low_perm);
1951
- unpack_packed_permutation(l8perm3, high_perm, low_perm)@;
1952
- U16l6 = simd_permute(U16l6, U16l6, high_perm);
1953
- U16h6 = simd_permute(U16h6, U16h6, high_perm);
1954
- U16l7 = simd_permute(U16l7, U16l7, low_perm);
1955
- U16h7 = simd_permute(U16h7, U16h7, low_perm);
1956
-
1957
- }
1958
- #endif
1959
-
1960
-
1961
-
1962
- @
1963
- @d output_step(vec, vec_num)
1964
- {
1965
- BitBlock rshift, lshift;
1966
- rshift = vec_lvsr(u16advance, U16out);
1967
- vec_stl(simd_permute(pending, vec, rshift), u16advance, U16out);
1968
- lshift = simd_add_8(vec_0__15, vec_splat(u16_bytes_8, vec_num));
1969
- pending = simd_permute(pending, vec, lshift);
1970
- u16advance += dbyte_count[vec_num];
1971
- }
1972
-
1973
-
1974
- @
1975
- @<Write compressed UTF-16 data@>=
1976
- #if ((U8U16_TARGET == ALTIVEC_TARGET) || (U8U16_TARGET == SPU_TARGET))
1977
- {
1978
- u16advance = 0;
1979
- BitBlock vec_0__15 = vec_lvsl1(0);
1980
- unsigned char * dbyte_count = (unsigned char *) & u16_bytes_8;
1981
- output_step(U16s0, 0)@;
1982
- output_step(U16s1, 1)@;
1983
- output_step(U16s2, 2)@;
1984
- output_step(U16s3, 3)@;
1985
- output_step(U16s4, 4)@;
1986
- output_step(U16s5, 5)@;
1987
- output_step(U16s6, 6)@;
1988
- output_step(U16s7, 7)@;
1989
- output_step(U16s8, 8)@;
1990
- output_step(U16s9, 9)@;
1991
- output_step(U16s10, 10)@;
1992
- output_step(U16s11, 11)@;
1993
- output_step(U16s12, 12)@;
1994
- output_step(U16s13, 13)@;
1995
- output_step(U16s14, 14)@;
1996
- output_step(U16s15, 15)@;
1997
- vec_st(simd_permute(pending, simd_const_8(0),
1998
- vec_lvsl1(16 - (0x0F & ((int) &U16out[u16advance])))),
1999
- u16advance-1, U16out);
2000
- }
2001
- #endif
2002
-
2003
- @ If the initial value of |*outbuf| is not on an aligned boundary,
2004
- the existing data between the boundary and |*outbuf| must be
2005
- loaded into the |pending| output data register.
2006
-
2007
- @<Local variable declarations@>=
2008
- #if ((U8U16_TARGET == ALTIVEC_TARGET) || (U8U16_TARGET == SPU_TARGET))
2009
- BitBlock start_of_output_existing = vec_ld(0, (vector unsigned char *)*outbuf);
2010
- BitBlock pending = simd_permute(start_of_output_existing,
2011
- start_of_output_existing,
2012
- vec_lvsl(0, (unsigned char *)*outbuf));
2013
- #endif
2014
-
2015
-
2016
- @* SPU-Specific Implementation.
2017
-
2018
- @ @<Import idealized SIMD operations@>=
2019
- #if (U8U16_TARGET == SPU_TARGET)
2020
- #include "spu_simd.h"
2021
- #include "vmx2spu.h"
2022
- #endif
2023
-
2024
-
2025
-
2026
-
2027
- @* MMX-Specific Implementation.
2028
- @ To right-justify within 4-bit fields, bits move at most
2029
- three positions. For each bit position, determine the 2-bit
2030
- coding for the amount to move as
2031
- |del4_rshift2| and |del4_rshift1|. Initially,
2032
- |del4_rshift1| is the |delmask| parity of the two positions immediately
2033
- to the right (within the 4-bit field). One step of the
2034
- parallel prefix method completes the calculation.
2035
- @<Determine deletion information from |delmask|@>=
2036
- #if (BIT_DELETION == SHIFT_TO_RIGHT4)
2037
- del4_rshift1 = simd_xor(simd_slli_4(delmask, 1), simd_slli_4(delmask, 2));
2038
- del4_rshift1 = simd_xor(del4_rshift1, simd_slli_4(del4_rshift1, 2));
2039
- /* Transition to even delcount: odd delcount to right, this one deleted. */
2040
- del4_trans2 = simd_and(del4_rshift1, delmask);
2041
- /* Odd number of transition positions to right. */
2042
- del4_rshift2 = simd_xor(simd_slli_4(del4_trans2, 1), simd_slli_4(del4_trans2, 2));
2043
- del4_rshift2 = simd_xor(del4_rshift2, simd_slli_4(del4_rshift2, 2));
2044
- /* Only move bits that are not deleted. */
2045
- del4_rshift1 = simd_andc(del4_rshift1, delmask);
2046
- del4_rshift2 = simd_andc(del4_rshift2, delmask);
2047
- /* Update |del4_rshift2| to apply after |del4_rshift1|. */
2048
- del4_rshift2 = simd_add_4(simd_and(del4_rshift1, del4_rshift2), del4_rshift2);
2049
- #endif
2050
-
2051
- @ @<Local variable declarations@>=
2052
- #if (BIT_DELETION == SHIFT_TO_RIGHT4)
2053
- BitBlock del4_rshift1, del4_trans2, del4_rshift2;
2054
- #endif
2055
-
2056
- @ Right shift within 4-bit fields with the combination
2057
- of a single-bit shift for bits that must move an odd number
2058
- of positions and a 2-bit shift for bits that must move 2 or 3
2059
- positions.
2060
-
2061
- @d do_right4_shifts(vec, rshift1, rshift2)
2062
- { BitBlock s2;
2063
- vec = simd_sub_8(vec, sisd_srli(simd_and(rshift1, vec), 1));
2064
- s2 = simd_and(rshift2, vec);
2065
- vec = simd_or(sisd_srli(s2, 2), simd_xor(vec, s2));
2066
- }
2067
- @<Apply bit deletions to high five UTF-16 bit streams@>=
2068
- #if (BIT_DELETION == SHIFT_TO_RIGHT4)
2069
- do_right4_shifts(u16hi0, del4_rshift1, del4_rshift2)@;
2070
- do_right4_shifts(u16hi1, del4_rshift1, del4_rshift2)@;
2071
- do_right4_shifts(u16hi2, del4_rshift1, del4_rshift2)@;
2072
- do_right4_shifts(u16hi3, del4_rshift1, del4_rshift2)@;
2073
- do_right4_shifts(u16hi4, del4_rshift1, del4_rshift2)@;
2074
- #endif
2075
-
2076
- @ @<Apply bit deletions to low eleven UTF-16 bit streams@>=
2077
- #if (BIT_DELETION == SHIFT_TO_RIGHT4)
2078
- do_right4_shifts(u16hi5, del4_rshift1, del4_rshift2)@;
2079
- do_right4_shifts(u16hi6, del4_rshift1, del4_rshift2)@;
2080
- do_right4_shifts(u16hi7, del4_rshift1, del4_rshift2)@;
2081
- do_right4_shifts(u16lo0, del4_rshift1, del4_rshift2)@;
2082
- do_right4_shifts(u16lo1, del4_rshift1, del4_rshift2)@;
2083
- do_right4_shifts(u16lo2, del4_rshift1, del4_rshift2)@;
2084
- do_right4_shifts(u16lo3, del4_rshift1, del4_rshift2)@;
2085
- do_right4_shifts(u16lo4, del4_rshift1, del4_rshift2)@;
2086
- do_right4_shifts(u16lo5, del4_rshift1, del4_rshift2)@;
2087
- do_right4_shifts(u16lo6, del4_rshift1, del4_rshift2)@;
2088
- do_right4_shifts(u16lo7, del4_rshift1, del4_rshift2)@;
2089
- #endif
2090
-
2091
- @ @<Local variable declarations@>=
2092
- #if (DOUBLEBYTE_DELETION == FROM_LEFT4)
2093
- BitBlock delcounts_2, delcounts_4, u16_bytes_4;
2094
- #endif
2095
-
2096
- @ @<Determine deletion information from |delmask|@>=
2097
- #if (DOUBLEBYTE_DELETION == FROM_LEFT4)
2098
- delcounts_2 = simd_add_2_lh(delmask, delmask);
2099
- delcounts_4 = simd_add_4_lh(delcounts_2, delcounts_2);
2100
- u16_bytes_4 = sisd_slli(simd_sub_8(simd_const_4(4), delcounts_4), 1);
2101
-
2102
- #if BYTE_ORDER == BIG_ENDIAN
2103
- sisd_store_aligned(simd_mergeh_4(simd_const_4(0), u16_bytes_4),
2104
- &u16_bytes_per_reg[0]);
2105
- sisd_store_aligned(simd_mergel_4(simd_const_4(0), u16_bytes_4),
2106
- &u16_bytes_per_reg[8]);
2107
- #endif
2108
- #if BYTE_ORDER == LITTLE_ENDIAN
2109
- sisd_store_aligned(simd_mergel_4(simd_const_4(0), u16_bytes_4),
2110
- &u16_bytes_per_reg[0]);
2111
- sisd_store_aligned(simd_mergeh_4(simd_const_4(0), u16_bytes_4),
2112
- &u16_bytes_per_reg[8]);
2113
- #endif
2114
- #endif
2115
-
2116
- @ @<Import idealized SIMD operations@>=
2117
- #if (U8U16_TARGET == MMX_TARGET)
2118
- #include "../lib/mmx_simd.h"
2119
- #endif
2120
-
2121
- @ @<Clear SIMD state@>=
2122
- #if (U8U16_TARGET == MMX_TARGET)
2123
- _mm_empty();
2124
- #endif
2125
-
2126
- @* SSE-Specific Implementation.
2127
-
2128
- @ To right-justify within 8-bit fields, bits move at most
2129
- seven positions. For each bit position, determine the 3-bit
2130
- coding for the amount to move as |del8_rshift4|,
2131
- |del8_rshift2|, and |del8_rshift1|. Initially,
2132
- |del8_rshift1| is the |delmask| parity of the two positions immediately
2133
- to the right (within the 8-bit field). Two steps of the
2134
- parallel prefix method complete the calculation.
2135
-
2136
- @<Determine deletion information from |delmask|@>=
2137
- #if ((BIT_DELETION == SHIFT_TO_RIGHT8) || (BIT_DELETION == PERMUTE_INDEX_TO_RIGHT8))
2138
- del8_rshift1 = simd_xor(simd_slli_8(delmask, 1), simd_slli_8(delmask, 2));
2139
- del8_rshift1 = simd_xor(del8_rshift1, simd_slli_8(del8_rshift1, 2));
2140
- del8_rshift1 = simd_xor(del8_rshift1, simd_slli_8(del8_rshift1, 4));
2141
- /* Transition to even delcount: odd delcount to left, this one deleted. */
2142
- del8_trans2 = simd_and(del8_rshift1, delmask);
2143
- /* Odd number of transition positions to left. */
2144
- del8_rshift2 = simd_xor(simd_slli_8(del8_trans2, 1), simd_slli_8(del8_trans2, 2));
2145
- del8_rshift2 = simd_xor(del8_rshift2, simd_slli_8(del8_rshift2, 2));
2146
- del8_rshift2 = simd_xor(del8_rshift2, simd_slli_8(del8_rshift2, 4));
2147
- /* Transition positions: odd |del2count| to left, this one a transition to even. */
2148
- del8_trans4 = simd_and(del8_rshift2, del8_trans2);
2149
- del8_rshift4 = simd_xor(simd_slli_8(del8_trans4, 1), simd_slli_8(del8_trans4, 2));
2150
- del8_rshift4 = simd_xor(del8_rshift4, simd_slli_8(del8_rshift4, 2));
2151
- del8_rshift4 = simd_xor(del8_rshift4, simd_slli_8(del8_rshift4, 4));
2152
- /* Only move bits that are not deleted. */
2153
- del8_rshift1 = simd_andc(del8_rshift1, delmask);
2154
- del8_rshift2 = simd_andc(del8_rshift2, delmask);
2155
- del8_rshift4 = simd_andc(del8_rshift4, delmask);
2156
- /* Update |del8_rshift2| to apply after |del8_rshift1|. */
2157
- del8_rshift2 = simd_sub_8(del8_rshift2, simd_srli_16(simd_and(del8_rshift1, del8_rshift2),1));
2158
- /* Update |del8_rshift4| to apply after |del8_rshift2| and |del8_rshift1|. */
2159
- del8_rshift4 = simd_sub_8(del8_rshift4, simd_srli_16(simd_and(del8_rshift1, del8_rshift4),1));
2160
- {BitBlock shift_bits = simd_and(del8_rshift2, del8_rshift4);
2161
- del8_rshift4 = simd_or(simd_srli_16(shift_bits, 2), simd_xor(del8_rshift4, shift_bits));}
2162
- #endif
2163
-
2164
- @ @<Local variable declarations@>=
2165
- #if ((BIT_DELETION == SHIFT_TO_RIGHT8) || (BIT_DELETION == PERMUTE_INDEX_TO_RIGHT8))
2166
- BitBlock del8_rshift1, del8_trans2, del8_rshift2, del8_trans4, del8_rshift4;
2167
- #endif
2168
-
2169
- @ Right shift within 8-bit fields with the combination
2170
- of a single-bit shift for bits that must move an odd number
2171
- of positions and a 2-bit shift for bits that must move 2, 3, 6 or 7
2172
- positions and a 4-bit shift for bits that must move 4 or more positions.
2173
-
2174
- @d do_right8_shifts(vec, rshift1, rshift2, rshift4)
2175
- { BitBlock s2;
2176
- vec = simd_sub_8(vec, simd_srli_16(simd_and(rshift1, vec), 1));
2177
- s2 = simd_and(rshift2, vec);
2178
- vec = simd_or(simd_srli_16(s2, 2), simd_xor(vec, s2));
2179
- s2 = simd_and(rshift4, vec);
2180
- vec = simd_or(simd_srli_16(s2, 4), simd_xor(vec, s2));
2181
- }
2182
- @<Apply bit deletions to high five UTF-16 bit streams@>=
2183
- #if (BIT_DELETION == SHIFT_TO_RIGHT8)
2184
- do_right8_shifts(u16hi0, del8_rshift1, del8_rshift2, del8_rshift4)@;
2185
- do_right8_shifts(u16hi1, del8_rshift1, del8_rshift2, del8_rshift4)@;
2186
- do_right8_shifts(u16hi2, del8_rshift1, del8_rshift2, del8_rshift4)@;
2187
- do_right8_shifts(u16hi3, del8_rshift1, del8_rshift2, del8_rshift4)@;
2188
- do_right8_shifts(u16hi4, del8_rshift1, del8_rshift2, del8_rshift4)@;
2189
- #endif
2190
- @ @<Apply bit deletions to low eleven UTF-16 bit streams@>=
2191
- #if (BIT_DELETION == SHIFT_TO_RIGHT8)
2192
- do_right8_shifts(u16hi5, del8_rshift1, del8_rshift2, del8_rshift4)@;
2193
- do_right8_shifts(u16hi6, del8_rshift1, del8_rshift2, del8_rshift4)@;
2194
- do_right8_shifts(u16hi7, del8_rshift1, del8_rshift2, del8_rshift4)@;
2195
- do_right8_shifts(u16lo0, del8_rshift1, del8_rshift2, del8_rshift4)@;
2196
- do_right8_shifts(u16lo1, del8_rshift1, del8_rshift2, del8_rshift4)@;
2197
- do_right8_shifts(u16lo2, del8_rshift1, del8_rshift2, del8_rshift4)@;
2198
- do_right8_shifts(u16lo3, del8_rshift1, del8_rshift2, del8_rshift4)@;
2199
- do_right8_shifts(u16lo4, del8_rshift1, del8_rshift2, del8_rshift4)@;
2200
- do_right8_shifts(u16lo5, del8_rshift1, del8_rshift2, del8_rshift4)@;
2201
- do_right8_shifts(u16lo6, del8_rshift1, del8_rshift2, del8_rshift4)@;
2202
- do_right8_shifts(u16lo7, del8_rshift1, del8_rshift2, del8_rshift4)@;
2203
- #endif
2204
-
2205
-
2206
-
2207
- @ @<Apply byte stream transformations@>=
2208
- #if (BYTE_DELETION == BYTE_DEL_BY_PERMUTE_TO_RIGHT8)
2209
- {
2210
- BitBlock permute_index_bit0 = simd_andc(simd_const_8(0xAA), delmask);
2211
- BitBlock permute_index_bit1 = simd_andc(simd_const_8(0xCC), delmask);
2212
- BitBlock permute_index_bit2 = simd_andc(simd_const_8(0xF0), delmask);
2213
- BitBlock permute_high_offset = sisd_sfli(simd_const_8(0x08), 64);
2214
- BitBlock perm[8];
2215
- // Delete indexes of bytes to delete from each group of 8.
2216
- do_right8_shifts(permute_index_bit0, del8_rshift1, del8_rshift2, del8_rshift4)@;
2217
- do_right8_shifts(permute_index_bit1, del8_rshift1, del8_rshift2, del8_rshift4)@;
2218
- do_right8_shifts(permute_index_bit2, del8_rshift1, del8_rshift2, del8_rshift4)@;
2219
- // Transform index bit streams to index byte stream.
2220
- p2s_567_bytemerge(permute_index_bit2, permute_index_bit1, permute_index_bit0, @/
2221
- perm[7], perm[6], perm[5], perm[4], perm[3], perm[2], perm[1], perm[0])@;
2222
-
2223
- perm[0] = simd_or(perm[0], permute_high_offset);
2224
- perm[1] = simd_or(perm[1], permute_high_offset);
2225
- perm[2] = simd_or(perm[2], permute_high_offset);
2226
- perm[3] = simd_or(perm[3], permute_high_offset);
2227
- perm[4] = simd_or(perm[4], permute_high_offset);
2228
- perm[5] = simd_or(perm[5], permute_high_offset);
2229
- perm[6] = simd_or(perm[6], permute_high_offset);
2230
- perm[7] = simd_or(perm[7], permute_high_offset);
2231
-
2232
- U16l0 = simd_permute(U16l0, perm[0]);
2233
- U16h0 = simd_permute(U16h0, perm[0]);
2234
- U16l1 = simd_permute(U16l1, perm[1]);
2235
- U16h1 = simd_permute(U16h1, perm[1]);
2236
- U16l2 = simd_permute(U16l2, perm[2]);
2237
- U16h2 = simd_permute(U16h2, perm[2]);
2238
- U16l3 = simd_permute(U16l3, perm[3]);
2239
- U16h3 = simd_permute(U16h3, perm[3]);
2240
- U16l4 = simd_permute(U16l4, perm[4]);
2241
- U16h4 = simd_permute(U16h4, perm[4]);
2242
- U16l5 = simd_permute(U16l5, perm[5]);
2243
- U16h5 = simd_permute(U16h5, perm[5]);
2244
- U16l6 = simd_permute(U16l6, perm[6]);
2245
- U16h6 = simd_permute(U16h6, perm[6]);
2246
- U16l7 = simd_permute(U16l7, perm[7]);
2247
- U16h7 = simd_permute(U16h7, perm[7]);
2248
-
2249
- }
2250
- #endif
2251
-
2252
-
2253
-
2254
-
2255
-
2256
- @ @<Import idealized SIMD operations@>=
2257
- #if (U8U16_TARGET == SSE_TARGET)
2258
- #include "../lib/sse_simd.h"
2259
- #endif
2260
-
2261
-
2262
- @
2263
- \end{document}