pq_crypto 0.3.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +56 -0
  3. data/CHANGELOG.md +62 -0
  4. data/GET_STARTED.md +366 -40
  5. data/README.md +76 -233
  6. data/SECURITY.md +107 -82
  7. data/ext/pqcrypto/extconf.rb +169 -87
  8. data/ext/pqcrypto/mldsa_api.h +1 -48
  9. data/ext/pqcrypto/mlkem_api.h +1 -18
  10. data/ext/pqcrypto/pq_externalmu.c +89 -204
  11. data/ext/pqcrypto/pqcrypto_native_api.h +129 -0
  12. data/ext/pqcrypto/pqcrypto_ruby_secure.c +484 -84
  13. data/ext/pqcrypto/pqcrypto_secure.c +203 -78
  14. data/ext/pqcrypto/pqcrypto_secure.h +53 -14
  15. data/ext/pqcrypto/pqcrypto_version.h +7 -0
  16. data/ext/pqcrypto/randombytes.h +9 -0
  17. data/ext/pqcrypto/vendor/.vendored +10 -5
  18. data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
  19. data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
  20. data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
  21. data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
  22. data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
  107. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
  108. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
  109. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
  111. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
  112. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
  113. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
  114. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
  115. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
  116. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
  117. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
  118. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
  119. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
  120. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
  121. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
  122. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
  128. data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
  129. data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
  130. data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
  131. data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
  132. data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
  133. data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
  134. data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
  135. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
  136. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
  137. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
  138. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
  139. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
  140. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
  141. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
  142. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
  143. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
  144. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
  145. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
  146. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
  147. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
  148. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
  149. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
  150. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
  151. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
  152. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
  153. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
  154. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
  155. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
  156. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
  157. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
  158. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
  159. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
  160. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
  161. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
  162. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
  163. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
  164. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
  165. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
  166. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
  167. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
  168. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
  169. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
  170. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
  171. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
  172. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
  173. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
  174. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
  175. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
  176. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
  177. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
  178. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
  179. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
  180. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
  181. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
  182. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
  183. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
  184. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
  185. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
  186. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
  187. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
  188. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
  189. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
  190. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
  191. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
  192. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
  193. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
  194. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
  195. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
  196. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
  197. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
  198. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
  199. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
  200. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
  201. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
  202. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
  203. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
  204. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
  205. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
  206. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
  207. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
  208. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
  209. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
  210. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
  211. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
  212. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
  213. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
  214. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
  215. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
  216. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
  217. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
  218. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
  219. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
  220. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
  221. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
  222. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
  223. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
  224. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
  225. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
  226. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
  227. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
  228. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
  229. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
  230. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
  231. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
  232. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
  233. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
  234. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
  235. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
  236. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
  237. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
  238. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
  239. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
  240. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
  241. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
  242. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
  243. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
  244. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
  245. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
  246. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
  247. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
  248. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
  249. data/lib/pq_crypto/algorithm_registry.rb +200 -0
  250. data/lib/pq_crypto/hybrid_kem.rb +1 -12
  251. data/lib/pq_crypto/kem.rb +104 -13
  252. data/lib/pq_crypto/pkcs8.rb +387 -0
  253. data/lib/pq_crypto/serialization.rb +1 -14
  254. data/lib/pq_crypto/signature.rb +123 -17
  255. data/lib/pq_crypto/spki.rb +131 -0
  256. data/lib/pq_crypto/version.rb +1 -1
  257. data/lib/pq_crypto.rb +79 -20
  258. data/script/vendor_libs.rb +88 -155
  259. metadata +241 -73
  260. data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
  261. data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
  262. data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
  263. data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
  264. data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
  265. data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
  266. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
  267. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
  268. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
  269. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
  270. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
  271. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
  272. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
  273. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
  274. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
  275. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
  276. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
  277. data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
  278. data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
  279. data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
  280. data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
  281. data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
  282. data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
  283. data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
  284. data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
  285. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
  286. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
  287. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
  288. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
  289. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
  290. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
  291. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
  292. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
  293. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
  294. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
  295. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
  296. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
  297. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
  298. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
  299. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
  300. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
  301. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
  302. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
  303. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
  304. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
  305. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
  306. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
  307. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
  308. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
  309. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
  310. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
  311. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
  312. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
  313. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
  314. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
  315. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
  316. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
  317. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
  318. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
  319. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
  320. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
  321. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
  322. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
  323. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
  324. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
  325. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
  326. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
  327. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
  328. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
@@ -1,1028 +0,0 @@
1
- /*
2
- Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
3
-
4
- For more information, feedback or questions, please refer to our website:
5
- https://keccak.team/
6
-
7
- To the extent possible under law, the implementer has waived all copyright
8
- and related or neighboring rights to the source code in this file.
9
- http://creativecommons.org/publicdomain/zero/1.0/
10
-
11
- ---
12
-
13
- This file implements Keccak-p[1600]×4 in a PlSnP-compatible way.
14
- Please refer to PlSnP-documentation.h for more details.
15
-
16
- This implementation comes with KeccakP-1600-times4-SnP.h in the same folder.
17
- Please refer to LowLevel.build for the exact list of other files it must be combined with.
18
- */
19
-
20
- #include <immintrin.h>
21
- #include <stdint.h>
22
- #include <stdio.h>
23
- #include <stdlib.h>
24
- #include <string.h>
25
-
26
- #include "align.h"
27
- #include "KeccakP-1600-times4-SnP.h"
28
- #include "SIMD256-config.h"
29
-
30
- #include "brg_endian.h"
31
- #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
32
- #error Expecting a little-endian platform
33
- #endif
34
-
35
- typedef unsigned char UINT8;
36
- typedef unsigned long long int UINT64;
37
- typedef __m128i V128;
38
- typedef __m256i V256;
39
-
40
- //#define UseGatherScatter
41
-
42
- #define laneIndex(instanceIndex, lanePosition) ((lanePosition)*4 + instanceIndex)
43
-
44
- #if defined(KeccakP1600times4_useAVX2)
45
- #define ANDnu256(a, b) _mm256_andnot_si256(a, b)
46
- #define CONST256(a) _mm256_load_si256((const V256 *)&(a))
47
- #define CONST256_64(a) _mm256_set1_epi64x(a)
48
- #define LOAD256(a) _mm256_load_si256((const V256 *)&(a))
49
- #define LOAD256u(a) _mm256_loadu_si256((const V256 *)&(a))
50
- #define LOAD4_64(a, b, c, d) _mm256_set_epi64x((UINT64)(a), (UINT64)(b), (UINT64)(c), (UINT64)(d))
51
- #define ROL64in256(d, a, o) d = _mm256_or_si256(_mm256_slli_epi64(a, o), _mm256_srli_epi64(a, 64-(o)))
52
- #define ROL64in256_8(d, a) d = _mm256_shuffle_epi8(a, CONST256(rho8))
53
- #define ROL64in256_56(d, a) d = _mm256_shuffle_epi8(a, CONST256(rho56))
54
- static const UINT64 rho8[4] = {0x0605040302010007, 0x0E0D0C0B0A09080F, 0x1615141312111017, 0x1E1D1C1B1A19181F};
55
- static const UINT64 rho56[4] = {0x0007060504030201, 0x080F0E0D0C0B0A09, 0x1017161514131211, 0x181F1E1D1C1B1A19};
56
- #define STORE256(a, b) _mm256_store_si256((V256 *)&(a), b)
57
- #define STORE256u(a, b) _mm256_storeu_si256((V256 *)&(a), b)
58
- #define STORE2_128(ah, al, v) _mm256_storeu2_m128i(&(ah), &(al), v)
59
- #define XOR256(a, b) _mm256_xor_si256(a, b)
60
- #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
61
- #define UNPACKL( a, b ) _mm256_unpacklo_epi64((a), (b))
62
- #define UNPACKH( a, b ) _mm256_unpackhi_epi64((a), (b))
63
- #define PERM128( a, b, c ) _mm256_permute2f128_si256((a), (b), c)
64
- #define SHUFFLE64( a, b, c ) _mm256_castpd_si256(_mm256_shuffle_pd(_mm256_castsi256_pd(a), _mm256_castsi256_pd(b), c))
65
-
66
- #define UNINTLEAVE() lanesL01 = UNPACKL( lanes0, lanes1 ), \
67
- lanesH01 = UNPACKH( lanes0, lanes1 ), \
68
- lanesL23 = UNPACKL( lanes2, lanes3 ), \
69
- lanesH23 = UNPACKH( lanes2, lanes3 ), \
70
- lanes0 = PERM128( lanesL01, lanesL23, 0x20 ), \
71
- lanes2 = PERM128( lanesL01, lanesL23, 0x31 ), \
72
- lanes1 = PERM128( lanesH01, lanesH23, 0x20 ), \
73
- lanes3 = PERM128( lanesH01, lanesH23, 0x31 )
74
-
75
- #define INTLEAVE() lanesL01 = PERM128( lanes0, lanes2, 0x20 ), \
76
- lanesH01 = PERM128( lanes1, lanes3, 0x20 ), \
77
- lanesL23 = PERM128( lanes0, lanes2, 0x31 ), \
78
- lanesH23 = PERM128( lanes1, lanes3, 0x31 ), \
79
- lanes0 = SHUFFLE64( lanesL01, lanesH01, 0x00 ), \
80
- lanes1 = SHUFFLE64( lanesL01, lanesH01, 0x0F ), \
81
- lanes2 = SHUFFLE64( lanesL23, lanesH23, 0x00 ), \
82
- lanes3 = SHUFFLE64( lanesL23, lanesH23, 0x0F )
83
-
84
- #endif
85
-
86
- #define SnP_laneLengthInBytes 8
87
-
88
- void KeccakP1600times4_InitializeAll(void *states) {
89
- memset(states, 0, KeccakP1600times4_statesSizeInBytes);
90
- }
91
-
92
- void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length) {
93
- unsigned int sizeLeft = length;
94
- unsigned int lanePosition = offset / SnP_laneLengthInBytes;
95
- unsigned int offsetInLane = offset % SnP_laneLengthInBytes;
96
- const unsigned char *curData = data;
97
- UINT64 *statesAsLanes = (UINT64 *)states;
98
-
99
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
100
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
101
- UINT64 lane = 0;
102
- if (bytesInLane > sizeLeft) {
103
- bytesInLane = sizeLeft;
104
- }
105
- memcpy((unsigned char *)&lane + offsetInLane, curData, bytesInLane);
106
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
107
- sizeLeft -= bytesInLane;
108
- lanePosition++;
109
- curData += bytesInLane;
110
- }
111
-
112
- while (sizeLeft >= SnP_laneLengthInBytes) {
113
- UINT64 lane = *((const UINT64 *)curData);
114
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
115
- sizeLeft -= SnP_laneLengthInBytes;
116
- lanePosition++;
117
- curData += SnP_laneLengthInBytes;
118
- }
119
-
120
- if (sizeLeft > 0) {
121
- UINT64 lane = 0;
122
- memcpy(&lane, curData, sizeLeft);
123
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
124
- }
125
- }
126
-
127
- void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset) {
128
- V256 *stateAsLanes = (V256 *)states;
129
- unsigned int i;
130
- const UINT64 *curData0 = (const UINT64 *)data;
131
- const UINT64 *curData1 = (const UINT64 *)(data + laneOffset * SnP_laneLengthInBytes);
132
- const UINT64 *curData2 = (const UINT64 *)(data + laneOffset * 2 * SnP_laneLengthInBytes);
133
- const UINT64 *curData3 = (const UINT64 *)(data + laneOffset * 3 * SnP_laneLengthInBytes);
134
- V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;
135
-
136
- #define Xor_In( argIndex ) XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))
137
-
138
- #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
139
- lanes1 = LOAD256u( curData1[argIndex]),\
140
- lanes2 = LOAD256u( curData2[argIndex]),\
141
- lanes3 = LOAD256u( curData3[argIndex]),\
142
- INTLEAVE(),\
143
- XOReq256( stateAsLanes[argIndex+0], lanes0 ),\
144
- XOReq256( stateAsLanes[argIndex+1], lanes1 ),\
145
- XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
146
- XOReq256( stateAsLanes[argIndex+3], lanes3 )
147
-
148
- if ( laneCount >= 16 ) {
149
- Xor_In4( 0 );
150
- Xor_In4( 4 );
151
- Xor_In4( 8 );
152
- Xor_In4( 12 );
153
- if ( laneCount >= 20 ) {
154
- Xor_In4( 16 );
155
- for (i = 20; i < laneCount; i++) {
156
- Xor_In( i );
157
- }
158
- } else {
159
- for (i = 16; i < laneCount; i++) {
160
- Xor_In( i );
161
- }
162
- }
163
- } else {
164
- for (i = 0; i < laneCount; i++) {
165
- Xor_In( i );
166
- }
167
- }
168
- #undef Xor_In
169
- #undef Xor_In4
170
- }
171
-
172
- void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length) {
173
- unsigned int sizeLeft = length;
174
- unsigned int lanePosition = offset / SnP_laneLengthInBytes;
175
- unsigned int offsetInLane = offset % SnP_laneLengthInBytes;
176
- const unsigned char *curData = data;
177
- UINT64 *statesAsLanes = (UINT64 *)states;
178
-
179
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
180
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
181
- if (bytesInLane > sizeLeft) {
182
- bytesInLane = sizeLeft;
183
- }
184
- memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);
185
- sizeLeft -= bytesInLane;
186
- lanePosition++;
187
- curData += bytesInLane;
188
- }
189
-
190
- while (sizeLeft >= SnP_laneLengthInBytes) {
191
- UINT64 lane = *((const UINT64 *)curData);
192
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;
193
- sizeLeft -= SnP_laneLengthInBytes;
194
- lanePosition++;
195
- curData += SnP_laneLengthInBytes;
196
- }
197
-
198
- if (sizeLeft > 0) {
199
- memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);
200
- }
201
- }
202
-
203
- void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset) {
204
- V256 *stateAsLanes = (V256 *)states;
205
- unsigned int i;
206
- const UINT64 *curData0 = (const UINT64 *)data;
207
- const UINT64 *curData1 = (const UINT64 *)(data + laneOffset * SnP_laneLengthInBytes);
208
- const UINT64 *curData2 = (const UINT64 *)(data + laneOffset * 2 * SnP_laneLengthInBytes);
209
- const UINT64 *curData3 = (const UINT64 *)(data + laneOffset * 3 * SnP_laneLengthInBytes);
210
- V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;
211
-
212
- #define OverWr( argIndex ) STORE256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))
213
-
214
- #define OverWr4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
215
- lanes1 = LOAD256u( curData1[argIndex]),\
216
- lanes2 = LOAD256u( curData2[argIndex]),\
217
- lanes3 = LOAD256u( curData3[argIndex]),\
218
- INTLEAVE(),\
219
- STORE256( stateAsLanes[argIndex+0], lanes0 ),\
220
- STORE256( stateAsLanes[argIndex+1], lanes1 ),\
221
- STORE256( stateAsLanes[argIndex+2], lanes2 ),\
222
- STORE256( stateAsLanes[argIndex+3], lanes3 )
223
-
224
- if ( laneCount >= 16 ) {
225
- OverWr4( 0 );
226
- OverWr4( 4 );
227
- OverWr4( 8 );
228
- OverWr4( 12 );
229
- if ( laneCount >= 20 ) {
230
- OverWr4( 16 );
231
- for (i = 20; i < laneCount; i++) {
232
- OverWr( i );
233
- }
234
- } else {
235
- for (i = 16; i < laneCount; i++) {
236
- OverWr( i );
237
- }
238
- }
239
- } else {
240
- for (i = 0; i < laneCount; i++) {
241
- OverWr( i );
242
- }
243
- }
244
- #undef OverWr
245
- #undef OverWr4
246
- }
247
-
248
- void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount) {
249
- unsigned int sizeLeft = byteCount;
250
- unsigned int lanePosition = 0;
251
- UINT64 *statesAsLanes = (UINT64 *)states;
252
-
253
- while (sizeLeft >= SnP_laneLengthInBytes) {
254
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;
255
- sizeLeft -= SnP_laneLengthInBytes;
256
- lanePosition++;
257
- }
258
-
259
- if (sizeLeft > 0) {
260
- memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);
261
- }
262
- }
263
-
264
- void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length) {
265
- unsigned int sizeLeft = length;
266
- unsigned int lanePosition = offset / SnP_laneLengthInBytes;
267
- unsigned int offsetInLane = offset % SnP_laneLengthInBytes;
268
- unsigned char *curData = data;
269
- const UINT64 *statesAsLanes = (const UINT64 *)states;
270
-
271
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
272
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
273
- if (bytesInLane > sizeLeft) {
274
- bytesInLane = sizeLeft;
275
- }
276
- memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);
277
- sizeLeft -= bytesInLane;
278
- lanePosition++;
279
- curData += bytesInLane;
280
- }
281
-
282
- while (sizeLeft >= SnP_laneLengthInBytes) {
283
- *(UINT64 *)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
284
- sizeLeft -= SnP_laneLengthInBytes;
285
- lanePosition++;
286
- curData += SnP_laneLengthInBytes;
287
- }
288
-
289
- if (sizeLeft > 0) {
290
- memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);
291
- }
292
- }
293
-
294
- void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset) {
295
- UINT64 *curData0 = (UINT64 *)data;
296
- UINT64 *curData1 = (UINT64 *)(data + laneOffset * 1 * SnP_laneLengthInBytes);
297
- UINT64 *curData2 = (UINT64 *)(data + laneOffset * 2 * SnP_laneLengthInBytes);
298
- UINT64 *curData3 = (UINT64 *)(data + laneOffset * 3 * SnP_laneLengthInBytes);
299
-
300
- const V256 *stateAsLanes = (const V256 *)states;
301
- const UINT64 *stateAsLanes64 = (const UINT64 *)states;
302
- V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;
303
- unsigned int i;
304
-
305
- #define Extr( argIndex ) curData0[argIndex] = stateAsLanes64[4*(argIndex)], \
306
- curData1[argIndex] = stateAsLanes64[4*(argIndex)+1], \
307
- curData2[argIndex] = stateAsLanes64[4*(argIndex)+2], \
308
- curData3[argIndex] = stateAsLanes64[4*(argIndex)+3]
309
-
310
- #define Extr4( argIndex ) lanes0 = LOAD256( stateAsLanes[argIndex+0] ), \
311
- lanes1 = LOAD256( stateAsLanes[argIndex+1] ), \
312
- lanes2 = LOAD256( stateAsLanes[argIndex+2] ), \
313
- lanes3 = LOAD256( stateAsLanes[argIndex+3] ), \
314
- UNINTLEAVE(), \
315
- STORE256u( curData0[argIndex], lanes0 ), \
316
- STORE256u( curData1[argIndex], lanes1 ), \
317
- STORE256u( curData2[argIndex], lanes2 ), \
318
- STORE256u( curData3[argIndex], lanes3 )
319
-
320
- if ( laneCount >= 16 ) {
321
- Extr4( 0 );
322
- Extr4( 4 );
323
- Extr4( 8 );
324
- Extr4( 12 );
325
- if ( laneCount >= 20 ) {
326
- Extr4( 16 );
327
- for (i = 20; i < laneCount; i++) {
328
- Extr( i );
329
- }
330
- } else {
331
- for (i = 16; i < laneCount; i++) {
332
- Extr( i );
333
- }
334
- }
335
- } else {
336
- for (i = 0; i < laneCount; i++) {
337
- Extr( i );
338
- }
339
- }
340
- #undef Extr
341
- #undef Extr4
342
- }
343
-
344
- void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) {
345
- unsigned int sizeLeft = length;
346
- unsigned int lanePosition = offset / SnP_laneLengthInBytes;
347
- unsigned int offsetInLane = offset % SnP_laneLengthInBytes;
348
- const unsigned char *curInput = input;
349
- unsigned char *curOutput = output;
350
- const UINT64 *statesAsLanes = (const UINT64 *)states;
351
-
352
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
353
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
354
- UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);
355
- if (bytesInLane > sizeLeft) {
356
- bytesInLane = sizeLeft;
357
- }
358
- sizeLeft -= bytesInLane;
359
- do {
360
- *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
361
- lane >>= 8;
362
- } while ( --bytesInLane != 0);
363
- lanePosition++;
364
- }
365
-
366
- while (sizeLeft >= SnP_laneLengthInBytes) {
367
- *((UINT64 *)curOutput) = *((UINT64 *)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];
368
- sizeLeft -= SnP_laneLengthInBytes;
369
- lanePosition++;
370
- curInput += SnP_laneLengthInBytes;
371
- curOutput += SnP_laneLengthInBytes;
372
- }
373
-
374
- if (sizeLeft != 0) {
375
- UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
376
- do {
377
- *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
378
- lane >>= 8;
379
- } while ( --sizeLeft != 0);
380
- }
381
- }
382
-
383
- void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset) {
384
- const UINT64 *curInput0 = (UINT64 *)input;
385
- const UINT64 *curInput1 = (UINT64 *)(input + laneOffset * 1 * SnP_laneLengthInBytes);
386
- const UINT64 *curInput2 = (UINT64 *)(input + laneOffset * 2 * SnP_laneLengthInBytes);
387
- const UINT64 *curInput3 = (UINT64 *)(input + laneOffset * 3 * SnP_laneLengthInBytes);
388
- UINT64 *curOutput0 = (UINT64 *)output;
389
- UINT64 *curOutput1 = (UINT64 *)(output + laneOffset * 1 * SnP_laneLengthInBytes);
390
- UINT64 *curOutput2 = (UINT64 *)(output + laneOffset * 2 * SnP_laneLengthInBytes);
391
- UINT64 *curOutput3 = (UINT64 *)(output + laneOffset * 3 * SnP_laneLengthInBytes);
392
-
393
- const V256 *stateAsLanes = (const V256 *)states;
394
- const UINT64 *stateAsLanes64 = (const UINT64 *)states;
395
- V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;
396
- unsigned int i;
397
-
398
- #define ExtrXor( argIndex ) \
399
- curOutput0[argIndex] = curInput0[argIndex] ^ stateAsLanes64[4*(argIndex)],\
400
- curOutput1[argIndex] = curInput1[argIndex] ^ stateAsLanes64[4*(argIndex)+1],\
401
- curOutput2[argIndex] = curInput2[argIndex] ^ stateAsLanes64[4*(argIndex)+2],\
402
- curOutput3[argIndex] = curInput3[argIndex] ^ stateAsLanes64[4*(argIndex)+3]
403
-
404
- #define ExtrXor4( argIndex ) \
405
- lanes0 = LOAD256( stateAsLanes[argIndex+0] ),\
406
- lanes1 = LOAD256( stateAsLanes[argIndex+1] ),\
407
- lanes2 = LOAD256( stateAsLanes[argIndex+2] ),\
408
- lanes3 = LOAD256( stateAsLanes[argIndex+3] ),\
409
- UNINTLEAVE(),\
410
- lanesL01 = LOAD256u( curInput0[argIndex]),\
411
- lanesH01 = LOAD256u( curInput1[argIndex]),\
412
- lanesL23 = LOAD256u( curInput2[argIndex]),\
413
- lanesH23 = LOAD256u( curInput3[argIndex]),\
414
- XOReq256( lanes0, lanesL01 ),\
415
- XOReq256( lanes1, lanesH01 ),\
416
- XOReq256( lanes2, lanesL23 ),\
417
- XOReq256( lanes3, lanesH23 ),\
418
- STORE256u( curOutput0[argIndex], lanes0 ),\
419
- STORE256u( curOutput1[argIndex], lanes1 ),\
420
- STORE256u( curOutput2[argIndex], lanes2 ),\
421
- STORE256u( curOutput3[argIndex], lanes3 )
422
-
423
- if ( laneCount >= 16 ) {
424
- ExtrXor4( 0 );
425
- ExtrXor4( 4 );
426
- ExtrXor4( 8 );
427
- ExtrXor4( 12 );
428
- if ( laneCount >= 20 ) {
429
- ExtrXor4( 16 );
430
- for (i = 20; i < laneCount; i++) {
431
- ExtrXor( i );
432
- }
433
- } else {
434
- for (i = 16; i < laneCount; i++) {
435
- ExtrXor( i );
436
- }
437
- }
438
- } else {
439
- for (i = 0; i < laneCount; i++) {
440
- ExtrXor( i );
441
- }
442
- }
443
- #undef ExtrXor
444
- #undef ExtrXor4
445
- }
446
-
447
- #define declareABCDE \
448
- V256 Aba, Abe, Abi, Abo, Abu; \
449
- V256 Aga, Age, Agi, Ago, Agu; \
450
- V256 Aka, Ake, Aki, Ako, Aku; \
451
- V256 Ama, Ame, Ami, Amo, Amu; \
452
- V256 Asa, Ase, Asi, Aso, Asu; \
453
- V256 Bba, Bbe, Bbi, Bbo, Bbu; \
454
- V256 Bga, Bge, Bgi, Bgo, Bgu; \
455
- V256 Bka, Bke, Bki, Bko, Bku; \
456
- V256 Bma, Bme, Bmi, Bmo, Bmu; \
457
- V256 Bsa, Bse, Bsi, Bso, Bsu; \
458
- V256 Ca, Ce, Ci, Co, Cu; \
459
- V256 Ca1, Ce1, Ci1, Co1, Cu1; \
460
- V256 Da, De, Di, Do, Du; \
461
- V256 Eba, Ebe, Ebi, Ebo, Ebu; \
462
- V256 Ega, Ege, Egi, Ego, Egu; \
463
- V256 Eka, Eke, Eki, Eko, Eku; \
464
- V256 Ema, Eme, Emi, Emo, Emu; \
465
- V256 Esa, Ese, Esi, Eso, Esu; \
466
-
467
- #define prepareTheta \
468
- Ca = XOR256(Aba, XOR256(Aga, XOR256(Aka, XOR256(Ama, Asa)))); \
469
- Ce = XOR256(Abe, XOR256(Age, XOR256(Ake, XOR256(Ame, Ase)))); \
470
- Ci = XOR256(Abi, XOR256(Agi, XOR256(Aki, XOR256(Ami, Asi)))); \
471
- Co = XOR256(Abo, XOR256(Ago, XOR256(Ako, XOR256(Amo, Aso)))); \
472
- Cu = XOR256(Abu, XOR256(Agu, XOR256(Aku, XOR256(Amu, Asu)))); \
473
-
474
- /* --- Theta Rho Pi Chi Iota Prepare-theta */
475
- /* --- 64-bit lanes mapped to 64-bit words */
476
- #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
477
- ROL64in256(Ce1, Ce, 1); \
478
- Da = XOR256(Cu, Ce1); \
479
- ROL64in256(Ci1, Ci, 1); \
480
- De = XOR256(Ca, Ci1); \
481
- ROL64in256(Co1, Co, 1); \
482
- Di = XOR256(Ce, Co1); \
483
- ROL64in256(Cu1, Cu, 1); \
484
- Do = XOR256(Ci, Cu1); \
485
- ROL64in256(Ca1, Ca, 1); \
486
- Du = XOR256(Co, Ca1); \
487
- \
488
- XOReq256(A##ba, Da); \
489
- Bba = A##ba; \
490
- XOReq256(A##ge, De); \
491
- ROL64in256(Bbe, A##ge, 44); \
492
- XOReq256(A##ki, Di); \
493
- ROL64in256(Bbi, A##ki, 43); \
494
- E##ba = XOR256(Bba, ANDnu256(Bbe, Bbi)); \
495
- XOReq256(E##ba, CONST256_64(KeccakF1600RoundConstants[i])); \
496
- Ca = E##ba; \
497
- XOReq256(A##mo, Do); \
498
- ROL64in256(Bbo, A##mo, 21); \
499
- E##be = XOR256(Bbe, ANDnu256(Bbi, Bbo)); \
500
- Ce = E##be; \
501
- XOReq256(A##su, Du); \
502
- ROL64in256(Bbu, A##su, 14); \
503
- E##bi = XOR256(Bbi, ANDnu256(Bbo, Bbu)); \
504
- Ci = E##bi; \
505
- E##bo = XOR256(Bbo, ANDnu256(Bbu, Bba)); \
506
- Co = E##bo; \
507
- E##bu = XOR256(Bbu, ANDnu256(Bba, Bbe)); \
508
- Cu = E##bu; \
509
- \
510
- XOReq256(A##bo, Do); \
511
- ROL64in256(Bga, A##bo, 28); \
512
- XOReq256(A##gu, Du); \
513
- ROL64in256(Bge, A##gu, 20); \
514
- XOReq256(A##ka, Da); \
515
- ROL64in256(Bgi, A##ka, 3); \
516
- E##ga = XOR256(Bga, ANDnu256(Bge, Bgi)); \
517
- XOReq256(Ca, E##ga); \
518
- XOReq256(A##me, De); \
519
- ROL64in256(Bgo, A##me, 45); \
520
- E##ge = XOR256(Bge, ANDnu256(Bgi, Bgo)); \
521
- XOReq256(Ce, E##ge); \
522
- XOReq256(A##si, Di); \
523
- ROL64in256(Bgu, A##si, 61); \
524
- E##gi = XOR256(Bgi, ANDnu256(Bgo, Bgu)); \
525
- XOReq256(Ci, E##gi); \
526
- E##go = XOR256(Bgo, ANDnu256(Bgu, Bga)); \
527
- XOReq256(Co, E##go); \
528
- E##gu = XOR256(Bgu, ANDnu256(Bga, Bge)); \
529
- XOReq256(Cu, E##gu); \
530
- \
531
- XOReq256(A##be, De); \
532
- ROL64in256(Bka, A##be, 1); \
533
- XOReq256(A##gi, Di); \
534
- ROL64in256(Bke, A##gi, 6); \
535
- XOReq256(A##ko, Do); \
536
- ROL64in256(Bki, A##ko, 25); \
537
- E##ka = XOR256(Bka, ANDnu256(Bke, Bki)); \
538
- XOReq256(Ca, E##ka); \
539
- XOReq256(A##mu, Du); \
540
- ROL64in256_8(Bko, A##mu); \
541
- E##ke = XOR256(Bke, ANDnu256(Bki, Bko)); \
542
- XOReq256(Ce, E##ke); \
543
- XOReq256(A##sa, Da); \
544
- ROL64in256(Bku, A##sa, 18); \
545
- E##ki = XOR256(Bki, ANDnu256(Bko, Bku)); \
546
- XOReq256(Ci, E##ki); \
547
- E##ko = XOR256(Bko, ANDnu256(Bku, Bka)); \
548
- XOReq256(Co, E##ko); \
549
- E##ku = XOR256(Bku, ANDnu256(Bka, Bke)); \
550
- XOReq256(Cu, E##ku); \
551
- \
552
- XOReq256(A##bu, Du); \
553
- ROL64in256(Bma, A##bu, 27); \
554
- XOReq256(A##ga, Da); \
555
- ROL64in256(Bme, A##ga, 36); \
556
- XOReq256(A##ke, De); \
557
- ROL64in256(Bmi, A##ke, 10); \
558
- E##ma = XOR256(Bma, ANDnu256(Bme, Bmi)); \
559
- XOReq256(Ca, E##ma); \
560
- XOReq256(A##mi, Di); \
561
- ROL64in256(Bmo, A##mi, 15); \
562
- E##me = XOR256(Bme, ANDnu256(Bmi, Bmo)); \
563
- XOReq256(Ce, E##me); \
564
- XOReq256(A##so, Do); \
565
- ROL64in256_56(Bmu, A##so); \
566
- E##mi = XOR256(Bmi, ANDnu256(Bmo, Bmu)); \
567
- XOReq256(Ci, E##mi); \
568
- E##mo = XOR256(Bmo, ANDnu256(Bmu, Bma)); \
569
- XOReq256(Co, E##mo); \
570
- E##mu = XOR256(Bmu, ANDnu256(Bma, Bme)); \
571
- XOReq256(Cu, E##mu); \
572
- \
573
- XOReq256(A##bi, Di); \
574
- ROL64in256(Bsa, A##bi, 62); \
575
- XOReq256(A##go, Do); \
576
- ROL64in256(Bse, A##go, 55); \
577
- XOReq256(A##ku, Du); \
578
- ROL64in256(Bsi, A##ku, 39); \
579
- E##sa = XOR256(Bsa, ANDnu256(Bse, Bsi)); \
580
- XOReq256(Ca, E##sa); \
581
- XOReq256(A##ma, Da); \
582
- ROL64in256(Bso, A##ma, 41); \
583
- E##se = XOR256(Bse, ANDnu256(Bsi, Bso)); \
584
- XOReq256(Ce, E##se); \
585
- XOReq256(A##se, De); \
586
- ROL64in256(Bsu, A##se, 2); \
587
- E##si = XOR256(Bsi, ANDnu256(Bso, Bsu)); \
588
- XOReq256(Ci, E##si); \
589
- E##so = XOR256(Bso, ANDnu256(Bsu, Bsa)); \
590
- XOReq256(Co, E##so); \
591
- E##su = XOR256(Bsu, ANDnu256(Bsa, Bse)); \
592
- XOReq256(Cu, E##su); \
593
- \
594
-
595
- /* --- Theta Rho Pi Chi Iota */
596
- /* --- 64-bit lanes mapped to 64-bit words */
597
- #define thetaRhoPiChiIota(i, A, E) \
598
- ROL64in256(Ce1, Ce, 1); \
599
- Da = XOR256(Cu, Ce1); \
600
- ROL64in256(Ci1, Ci, 1); \
601
- De = XOR256(Ca, Ci1); \
602
- ROL64in256(Co1, Co, 1); \
603
- Di = XOR256(Ce, Co1); \
604
- ROL64in256(Cu1, Cu, 1); \
605
- Do = XOR256(Ci, Cu1); \
606
- ROL64in256(Ca1, Ca, 1); \
607
- Du = XOR256(Co, Ca1); \
608
- \
609
- XOReq256(A##ba, Da); \
610
- Bba = A##ba; \
611
- XOReq256(A##ge, De); \
612
- ROL64in256(Bbe, A##ge, 44); \
613
- XOReq256(A##ki, Di); \
614
- ROL64in256(Bbi, A##ki, 43); \
615
- E##ba = XOR256(Bba, ANDnu256(Bbe, Bbi)); \
616
- XOReq256(E##ba, CONST256_64(KeccakF1600RoundConstants[i])); \
617
- XOReq256(A##mo, Do); \
618
- ROL64in256(Bbo, A##mo, 21); \
619
- E##be = XOR256(Bbe, ANDnu256(Bbi, Bbo)); \
620
- XOReq256(A##su, Du); \
621
- ROL64in256(Bbu, A##su, 14); \
622
- E##bi = XOR256(Bbi, ANDnu256(Bbo, Bbu)); \
623
- E##bo = XOR256(Bbo, ANDnu256(Bbu, Bba)); \
624
- E##bu = XOR256(Bbu, ANDnu256(Bba, Bbe)); \
625
- \
626
- XOReq256(A##bo, Do); \
627
- ROL64in256(Bga, A##bo, 28); \
628
- XOReq256(A##gu, Du); \
629
- ROL64in256(Bge, A##gu, 20); \
630
- XOReq256(A##ka, Da); \
631
- ROL64in256(Bgi, A##ka, 3); \
632
- E##ga = XOR256(Bga, ANDnu256(Bge, Bgi)); \
633
- XOReq256(A##me, De); \
634
- ROL64in256(Bgo, A##me, 45); \
635
- E##ge = XOR256(Bge, ANDnu256(Bgi, Bgo)); \
636
- XOReq256(A##si, Di); \
637
- ROL64in256(Bgu, A##si, 61); \
638
- E##gi = XOR256(Bgi, ANDnu256(Bgo, Bgu)); \
639
- E##go = XOR256(Bgo, ANDnu256(Bgu, Bga)); \
640
- E##gu = XOR256(Bgu, ANDnu256(Bga, Bge)); \
641
- \
642
- XOReq256(A##be, De); \
643
- ROL64in256(Bka, A##be, 1); \
644
- XOReq256(A##gi, Di); \
645
- ROL64in256(Bke, A##gi, 6); \
646
- XOReq256(A##ko, Do); \
647
- ROL64in256(Bki, A##ko, 25); \
648
- E##ka = XOR256(Bka, ANDnu256(Bke, Bki)); \
649
- XOReq256(A##mu, Du); \
650
- ROL64in256_8(Bko, A##mu); \
651
- E##ke = XOR256(Bke, ANDnu256(Bki, Bko)); \
652
- XOReq256(A##sa, Da); \
653
- ROL64in256(Bku, A##sa, 18); \
654
- E##ki = XOR256(Bki, ANDnu256(Bko, Bku)); \
655
- E##ko = XOR256(Bko, ANDnu256(Bku, Bka)); \
656
- E##ku = XOR256(Bku, ANDnu256(Bka, Bke)); \
657
- \
658
- XOReq256(A##bu, Du); \
659
- ROL64in256(Bma, A##bu, 27); \
660
- XOReq256(A##ga, Da); \
661
- ROL64in256(Bme, A##ga, 36); \
662
- XOReq256(A##ke, De); \
663
- ROL64in256(Bmi, A##ke, 10); \
664
- E##ma = XOR256(Bma, ANDnu256(Bme, Bmi)); \
665
- XOReq256(A##mi, Di); \
666
- ROL64in256(Bmo, A##mi, 15); \
667
- E##me = XOR256(Bme, ANDnu256(Bmi, Bmo)); \
668
- XOReq256(A##so, Do); \
669
- ROL64in256_56(Bmu, A##so); \
670
- E##mi = XOR256(Bmi, ANDnu256(Bmo, Bmu)); \
671
- E##mo = XOR256(Bmo, ANDnu256(Bmu, Bma)); \
672
- E##mu = XOR256(Bmu, ANDnu256(Bma, Bme)); \
673
- \
674
- XOReq256(A##bi, Di); \
675
- ROL64in256(Bsa, A##bi, 62); \
676
- XOReq256(A##go, Do); \
677
- ROL64in256(Bse, A##go, 55); \
678
- XOReq256(A##ku, Du); \
679
- ROL64in256(Bsi, A##ku, 39); \
680
- E##sa = XOR256(Bsa, ANDnu256(Bse, Bsi)); \
681
- XOReq256(A##ma, Da); \
682
- ROL64in256(Bso, A##ma, 41); \
683
- E##se = XOR256(Bse, ANDnu256(Bsi, Bso)); \
684
- XOReq256(A##se, De); \
685
- ROL64in256(Bsu, A##se, 2); \
686
- E##si = XOR256(Bsi, ANDnu256(Bso, Bsu)); \
687
- E##so = XOR256(Bso, ANDnu256(Bsu, Bsa)); \
688
- E##su = XOR256(Bsu, ANDnu256(Bsa, Bse)); \
689
- \
690
-
691
- static ALIGN(KeccakP1600times4_statesAlignment) const UINT64 KeccakF1600RoundConstants[24] = {
692
- 0x0000000000000001ULL,
693
- 0x0000000000008082ULL,
694
- 0x800000000000808aULL,
695
- 0x8000000080008000ULL,
696
- 0x000000000000808bULL,
697
- 0x0000000080000001ULL,
698
- 0x8000000080008081ULL,
699
- 0x8000000000008009ULL,
700
- 0x000000000000008aULL,
701
- 0x0000000000000088ULL,
702
- 0x0000000080008009ULL,
703
- 0x000000008000000aULL,
704
- 0x000000008000808bULL,
705
- 0x800000000000008bULL,
706
- 0x8000000000008089ULL,
707
- 0x8000000000008003ULL,
708
- 0x8000000000008002ULL,
709
- 0x8000000000000080ULL,
710
- 0x000000000000800aULL,
711
- 0x800000008000000aULL,
712
- 0x8000000080008081ULL,
713
- 0x8000000000008080ULL,
714
- 0x0000000080000001ULL,
715
- 0x8000000080008008ULL
716
- };
717
-
718
- #define copyFromState(X, state) \
719
- X##ba = LOAD256(state[ 0]); \
720
- X##be = LOAD256(state[ 1]); \
721
- X##bi = LOAD256(state[ 2]); \
722
- X##bo = LOAD256(state[ 3]); \
723
- X##bu = LOAD256(state[ 4]); \
724
- X##ga = LOAD256(state[ 5]); \
725
- X##ge = LOAD256(state[ 6]); \
726
- X##gi = LOAD256(state[ 7]); \
727
- X##go = LOAD256(state[ 8]); \
728
- X##gu = LOAD256(state[ 9]); \
729
- X##ka = LOAD256(state[10]); \
730
- X##ke = LOAD256(state[11]); \
731
- X##ki = LOAD256(state[12]); \
732
- X##ko = LOAD256(state[13]); \
733
- X##ku = LOAD256(state[14]); \
734
- X##ma = LOAD256(state[15]); \
735
- X##me = LOAD256(state[16]); \
736
- X##mi = LOAD256(state[17]); \
737
- X##mo = LOAD256(state[18]); \
738
- X##mu = LOAD256(state[19]); \
739
- X##sa = LOAD256(state[20]); \
740
- X##se = LOAD256(state[21]); \
741
- X##si = LOAD256(state[22]); \
742
- X##so = LOAD256(state[23]); \
743
- X##su = LOAD256(state[24]); \
744
-
745
- #define copyToState(state, X) \
746
- STORE256(state[ 0], X##ba); \
747
- STORE256(state[ 1], X##be); \
748
- STORE256(state[ 2], X##bi); \
749
- STORE256(state[ 3], X##bo); \
750
- STORE256(state[ 4], X##bu); \
751
- STORE256(state[ 5], X##ga); \
752
- STORE256(state[ 6], X##ge); \
753
- STORE256(state[ 7], X##gi); \
754
- STORE256(state[ 8], X##go); \
755
- STORE256(state[ 9], X##gu); \
756
- STORE256(state[10], X##ka); \
757
- STORE256(state[11], X##ke); \
758
- STORE256(state[12], X##ki); \
759
- STORE256(state[13], X##ko); \
760
- STORE256(state[14], X##ku); \
761
- STORE256(state[15], X##ma); \
762
- STORE256(state[16], X##me); \
763
- STORE256(state[17], X##mi); \
764
- STORE256(state[18], X##mo); \
765
- STORE256(state[19], X##mu); \
766
- STORE256(state[20], X##sa); \
767
- STORE256(state[21], X##se); \
768
- STORE256(state[22], X##si); \
769
- STORE256(state[23], X##so); \
770
- STORE256(state[24], X##su); \
771
-
772
- #define copyStateVariables(X, Y) \
773
- X##ba = Y##ba; \
774
- X##be = Y##be; \
775
- X##bi = Y##bi; \
776
- X##bo = Y##bo; \
777
- X##bu = Y##bu; \
778
- X##ga = Y##ga; \
779
- X##ge = Y##ge; \
780
- X##gi = Y##gi; \
781
- X##go = Y##go; \
782
- X##gu = Y##gu; \
783
- X##ka = Y##ka; \
784
- X##ke = Y##ke; \
785
- X##ki = Y##ki; \
786
- X##ko = Y##ko; \
787
- X##ku = Y##ku; \
788
- X##ma = Y##ma; \
789
- X##me = Y##me; \
790
- X##mi = Y##mi; \
791
- X##mo = Y##mo; \
792
- X##mu = Y##mu; \
793
- X##sa = Y##sa; \
794
- X##se = Y##se; \
795
- X##si = Y##si; \
796
- X##so = Y##so; \
797
- X##su = Y##su; \
798
-
799
- #ifdef KeccakP1600times4_fullUnrolling
800
- #define FullUnrolling
801
- #else
802
- #define Unrolling KeccakP1600times4_unrolling
803
- #endif
804
- #include "KeccakP-1600-unrolling.macros"
805
-
806
- void KeccakP1600times4_PermuteAll_24rounds(void *states) {
807
- V256 *statesAsLanes = (V256 *)states;
808
- declareABCDE
809
- #ifndef KeccakP1600times4_fullUnrolling
810
- unsigned int i;
811
- #endif
812
-
813
- copyFromState(A, statesAsLanes)
814
- rounds24
815
- copyToState(statesAsLanes, A)
816
- }
817
-
818
- void KeccakP1600times4_PermuteAll_12rounds(void *states) {
819
- V256 *statesAsLanes = (V256 *)states;
820
- declareABCDE
821
- #ifndef KeccakP1600times4_fullUnrolling
822
- unsigned int i;
823
- #endif
824
-
825
- copyFromState(A, statesAsLanes)
826
- rounds12
827
- copyToState(statesAsLanes, A)
828
- }
829
-
830
- size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen) {
831
- if (laneCount == 21) {
832
- #if 0
833
- const unsigned char *dataStart = data;
834
- const UINT64 *curData0 = (const UINT64 *)data;
835
- const UINT64 *curData1 = (const UINT64 *)(data + laneOffsetParallel * 1 * SnP_laneLengthInBytes);
836
- const UINT64 *curData2 = (const UINT64 *)(data + laneOffsetParallel * 2 * SnP_laneLengthInBytes);
837
- const UINT64 *curData3 = (const UINT64 *)(data + laneOffsetParallel * 3 * SnP_laneLengthInBytes);
838
-
839
- while (dataByteLen >= (laneOffsetParallel * 3 + laneCount) * 8) {
840
- V256 *stateAsLanes = (V256 *)states;
841
- V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;
842
- #define Xor_In( argIndex ) \
843
- XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))
844
- #define Xor_In4( argIndex ) \
845
- lanes0 = LOAD256u( curData0[argIndex]),\
846
- lanes1 = LOAD256u( curData1[argIndex]),\
847
- lanes2 = LOAD256u( curData2[argIndex]),\
848
- lanes3 = LOAD256u( curData3[argIndex]),\
849
- INTLEAVE(),\
850
- XOReq256( stateAsLanes[argIndex+0], lanes0 ),\
851
- XOReq256( stateAsLanes[argIndex+1], lanes1 ),\
852
- XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
853
- XOReq256( stateAsLanes[argIndex+3], lanes3 )
854
- Xor_In4( 0 );
855
- Xor_In4( 4 );
856
- Xor_In4( 8 );
857
- Xor_In4( 12 );
858
- Xor_In4( 16 );
859
- Xor_In( 20 );
860
- #undef Xor_In
861
- #undef Xor_In4
862
- KeccakP1600times4_PermuteAll_24rounds(states);
863
- curData0 += laneOffsetSerial;
864
- curData1 += laneOffsetSerial;
865
- curData2 += laneOffsetSerial;
866
- curData3 += laneOffsetSerial;
867
- dataByteLen -= laneOffsetSerial * 8;
868
- }
869
- return (const unsigned char *)curData0 - dataStart;
870
- #else
871
- // unsigned int i;
872
- const unsigned char *dataStart = data;
873
- const UINT64 *curData0 = (const UINT64 *)data;
874
- const UINT64 *curData1 = (const UINT64 *)(data + laneOffsetParallel * 1 * SnP_laneLengthInBytes);
875
- const UINT64 *curData2 = (const UINT64 *)(data + laneOffsetParallel * 2 * SnP_laneLengthInBytes);
876
- const UINT64 *curData3 = (const UINT64 *)(data + laneOffsetParallel * 3 * SnP_laneLengthInBytes);
877
- V256 *statesAsLanes = (V256 *)states;
878
- declareABCDE
879
-
880
- copyFromState(A, statesAsLanes)
881
- while (dataByteLen >= (laneOffsetParallel * 3 + laneCount) * 8) {
882
- #define XOR_In( Xxx, argIndex ) \
883
- XOReq256(Xxx, LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))
884
- XOR_In( Aba, 0 );
885
- XOR_In( Abe, 1 );
886
- XOR_In( Abi, 2 );
887
- XOR_In( Abo, 3 );
888
- XOR_In( Abu, 4 );
889
- XOR_In( Aga, 5 );
890
- XOR_In( Age, 6 );
891
- XOR_In( Agi, 7 );
892
- XOR_In( Ago, 8 );
893
- XOR_In( Agu, 9 );
894
- XOR_In( Aka, 10 );
895
- XOR_In( Ake, 11 );
896
- XOR_In( Aki, 12 );
897
- XOR_In( Ako, 13 );
898
- XOR_In( Aku, 14 );
899
- XOR_In( Ama, 15 );
900
- XOR_In( Ame, 16 );
901
- XOR_In( Ami, 17 );
902
- XOR_In( Amo, 18 );
903
- XOR_In( Amu, 19 );
904
- XOR_In( Asa, 20 );
905
- #undef XOR_In
906
- rounds24
907
- curData0 += laneOffsetSerial;
908
- curData1 += laneOffsetSerial;
909
- curData2 += laneOffsetSerial;
910
- curData3 += laneOffsetSerial;
911
- dataByteLen -= laneOffsetSerial * 8;
912
- }
913
- copyToState(statesAsLanes, A)
914
- return (const unsigned char *)curData0 - dataStart;
915
- #endif
916
- } else {
917
- // unsigned int i;
918
- const unsigned char *dataStart = data;
919
-
920
- while (dataByteLen >= (laneOffsetParallel * 3 + laneCount) * 8) {
921
- KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel);
922
- KeccakP1600times4_PermuteAll_24rounds(states);
923
- data += laneOffsetSerial * 8;
924
- dataByteLen -= laneOffsetSerial * 8;
925
- }
926
- return data - dataStart;
927
- }
928
- }
929
-
930
- size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen) {
931
- if (laneCount == 21) {
932
- #if 0
933
- const unsigned char *dataStart = data;
934
- const UINT64 *curData0 = (const UINT64 *)data;
935
- const UINT64 *curData1 = (const UINT64 *)(data + laneOffsetParallel * 1 * SnP_laneLengthInBytes);
936
- const UINT64 *curData2 = (const UINT64 *)(data + laneOffsetParallel * 2 * SnP_laneLengthInBytes);
937
- const UINT64 *curData3 = (const UINT64 *)(data + laneOffsetParallel * 3 * SnP_laneLengthInBytes);
938
-
939
- while (dataByteLen >= (laneOffsetParallel * 3 + laneCount) * 8) {
940
- V256 *stateAsLanes = states;
941
- V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;
942
- #define Xor_In( argIndex ) \
943
- XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))
944
- #define Xor_In4( argIndex ) \
945
- lanes0 = LOAD256u( curData0[argIndex]),\
946
- lanes1 = LOAD256u( curData1[argIndex]),\
947
- lanes2 = LOAD256u( curData2[argIndex]),\
948
- lanes3 = LOAD256u( curData3[argIndex]),\
949
- INTLEAVE(),\
950
- XOReq256( stateAsLanes[argIndex+0], lanes0 ),\
951
- XOReq256( stateAsLanes[argIndex+1], lanes1 ),\
952
- XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
953
- XOReq256( stateAsLanes[argIndex+3], lanes3 )
954
- Xor_In4( 0 );
955
- Xor_In4( 4 );
956
- Xor_In4( 8 );
957
- Xor_In4( 12 );
958
- Xor_In4( 16 );
959
- Xor_In( 20 );
960
- #undef Xor_In
961
- #undef Xor_In4
962
- KeccakP1600times4_PermuteAll_12rounds(states);
963
- curData0 += laneOffsetSerial;
964
- curData1 += laneOffsetSerial;
965
- curData2 += laneOffsetSerial;
966
- curData3 += laneOffsetSerial;
967
- dataByteLen -= laneOffsetSerial * 8;
968
- }
969
- return (const unsigned char *)curData0 - dataStart;
970
- #else
971
- // unsigned int i;
972
- const unsigned char *dataStart = data;
973
- const UINT64 *curData0 = (const UINT64 *)data;
974
- const UINT64 *curData1 = (const UINT64 *)(data + laneOffsetParallel * 1 * SnP_laneLengthInBytes);
975
- const UINT64 *curData2 = (const UINT64 *)(data + laneOffsetParallel * 2 * SnP_laneLengthInBytes);
976
- const UINT64 *curData3 = (const UINT64 *)(data + laneOffsetParallel * 3 * SnP_laneLengthInBytes);
977
- V256 *statesAsLanes = states;
978
- declareABCDE
979
-
980
- copyFromState(A, statesAsLanes)
981
- while (dataByteLen >= (laneOffsetParallel * 3 + laneCount) * 8) {
982
- #define XOR_In( Xxx, argIndex ) \
983
- XOReq256(Xxx, LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))
984
- XOR_In( Aba, 0 );
985
- XOR_In( Abe, 1 );
986
- XOR_In( Abi, 2 );
987
- XOR_In( Abo, 3 );
988
- XOR_In( Abu, 4 );
989
- XOR_In( Aga, 5 );
990
- XOR_In( Age, 6 );
991
- XOR_In( Agi, 7 );
992
- XOR_In( Ago, 8 );
993
- XOR_In( Agu, 9 );
994
- XOR_In( Aka, 10 );
995
- XOR_In( Ake, 11 );
996
- XOR_In( Aki, 12 );
997
- XOR_In( Ako, 13 );
998
- XOR_In( Aku, 14 );
999
- XOR_In( Ama, 15 );
1000
- XOR_In( Ame, 16 );
1001
- XOR_In( Ami, 17 );
1002
- XOR_In( Amo, 18 );
1003
- XOR_In( Amu, 19 );
1004
- XOR_In( Asa, 20 );
1005
- #undef XOR_In
1006
- rounds12
1007
- curData0 += laneOffsetSerial;
1008
- curData1 += laneOffsetSerial;
1009
- curData2 += laneOffsetSerial;
1010
- curData3 += laneOffsetSerial;
1011
- dataByteLen -= laneOffsetSerial * 8;
1012
- }
1013
- copyToState(statesAsLanes, A)
1014
- return (const unsigned char *)curData0 - dataStart;
1015
- #endif
1016
- } else {
1017
- // unsigned int i;
1018
- const unsigned char *dataStart = data;
1019
-
1020
- while (dataByteLen >= (laneOffsetParallel * 3 + laneCount) * 8) {
1021
- KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel);
1022
- KeccakP1600times4_PermuteAll_12rounds(states);
1023
- data += laneOffsetSerial * 8;
1024
- dataByteLen -= laneOffsetSerial * 8;
1025
- }
1026
- return data - dataStart;
1027
- }
1028
- }