digest-blake3 0.37.0.1 → 1.2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4dc981436633bde6ba4fb278252d8a4a1ba58d039d0b1c8c794e36c4e47fa4a0
4
- data.tar.gz: ae40be72a0252730792f3e82a00da765546c9606d91405ff69d3bad078ad307f
3
+ metadata.gz: ec7a77d6875b688e1cb1fbe8470cbf67278f9fe3f2f8e516bafe7abc0bf54bc4
4
+ data.tar.gz: 74e13b2480eccd5c2fe3fa913a0962217c1f07c95b5db80b8303086488ee5d9f
5
5
  SHA512:
6
- metadata.gz: c18ca69b1f4b47ac8308ee00cc6db861eb48bd3921a85c291cb09ea595534b1a476988453c1931cb9982e9f96e7d14e4fa4356cbecaa9c584252c9b7ad30ac62
7
- data.tar.gz: b9cf5f04daf5d83a797191caa2f2c30e068ddd6b771d887acb96963b93550171e77ea81128fffc316b49641391a4f245a8484b8b909d53502be3a2fb3170ad76
6
+ metadata.gz: de0fb7b5ccce755c313da8e547a430950d181170c64561746890ce8855ce5e09d3232b16316f36d22320ae5d23cf7904e8221a26358e96d9566ba247ef613214
7
+ data.tar.gz: 33e15e9469128ba227dbe6b57d9c44fe55078b9031975bf9db783a469c93342c7ccbf38b763ddfed7f09c941a42a6df89302cfda0e38b0ad4967a12acac4b18a
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- digest-blake3 (0.37.0)
4
+ digest-blake3 (1.2.0.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -5,6 +5,8 @@
5
5
  #include "blake3.h"
6
6
  #include "blake3_impl.h"
7
7
 
8
+ const char *blake3_version(void) { return BLAKE3_VERSION_STRING; }
9
+
8
10
  INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
9
11
  uint8_t flags) {
10
12
  memcpy(self->cv, key, BLAKE3_KEY_LEN);
@@ -338,12 +340,18 @@ INLINE void compress_subtree_to_parent_node(
338
340
  uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
339
341
  size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
340
342
  chunk_counter, flags, cv_array);
343
+ assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);
341
344
 
342
345
  // If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
343
346
  // compress_subtree_wide() returns more than 2 chaining values. Condense
344
347
  // them into 2 by forming parent nodes repeatedly.
345
348
  uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
346
- while (num_cvs > 2) {
349
+ // The second half of this loop condition is always true, and we just
350
+ // asserted it above. But GCC can't tell that it's always true, and if NDEBUG
351
+ // is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious
352
+ // warnings here. GCC 8.5 is particularly sensitive, so if you're changing
353
+ // this code, test it against that version.
354
+ while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) {
347
355
  num_cvs =
348
356
  compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
349
357
  memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
@@ -8,12 +8,12 @@
8
8
  extern "C" {
9
9
  #endif
10
10
 
11
+ #define BLAKE3_VERSION_STRING "1.2.0"
11
12
  #define BLAKE3_KEY_LEN 32
12
13
  #define BLAKE3_OUT_LEN 32
13
14
  #define BLAKE3_BLOCK_LEN 64
14
15
  #define BLAKE3_CHUNK_LEN 1024
15
16
  #define BLAKE3_MAX_DEPTH 54
16
- #define BLAKE3_MAX_SIMD_DEGREE 16
17
17
 
18
18
  // This struct is a private implementation detail. It has to be here because
19
19
  // it's part of blake3_hasher below.
@@ -38,11 +38,12 @@ typedef struct {
38
38
  uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
39
39
  } blake3_hasher;
40
40
 
41
+ const char *blake3_version(void);
41
42
  void blake3_hasher_init(blake3_hasher *self);
42
43
  void blake3_hasher_init_keyed(blake3_hasher *self,
43
44
  const uint8_t key[BLAKE3_KEY_LEN]);
44
45
  void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
45
- void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
46
+ void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
46
47
  size_t context_len);
47
48
  void blake3_hasher_update(blake3_hasher *self, const void *input,
48
49
  size_t input_len);
@@ -14,6 +14,8 @@
14
14
  #endif
15
15
  #endif
16
16
 
17
+ #define MAYBE_UNUSED(x) (void)((x))
18
+
17
19
  #if defined(IS_X86)
18
20
  static uint64_t xgetbv() {
19
21
  #if defined(_MSC_VER)
@@ -137,6 +139,7 @@ void blake3_compress_in_place(uint32_t cv[8],
137
139
  uint8_t flags) {
138
140
  #if defined(IS_X86)
139
141
  const enum cpu_feature features = get_cpu_features();
142
+ MAYBE_UNUSED(features);
140
143
  #if !defined(BLAKE3_NO_AVX512)
141
144
  if (features & AVX512VL) {
142
145
  blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
@@ -165,6 +168,7 @@ void blake3_compress_xof(const uint32_t cv[8],
165
168
  uint8_t out[64]) {
166
169
  #if defined(IS_X86)
167
170
  const enum cpu_feature features = get_cpu_features();
171
+ MAYBE_UNUSED(features);
168
172
  #if !defined(BLAKE3_NO_AVX512)
169
173
  if (features & AVX512VL) {
170
174
  blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
@@ -193,6 +197,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
193
197
  uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
194
198
  #if defined(IS_X86)
195
199
  const enum cpu_feature features = get_cpu_features();
200
+ MAYBE_UNUSED(features);
196
201
  #if !defined(BLAKE3_NO_AVX512)
197
202
  if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
198
203
  blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
@@ -227,7 +232,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
227
232
  #endif
228
233
  #endif
229
234
 
230
- #if defined(BLAKE3_USE_NEON)
235
+ #if BLAKE3_USE_NEON == 1
231
236
  blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
232
237
  increment_counter, flags, flags_start, flags_end, out);
233
238
  return;
@@ -242,6 +247,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
242
247
  size_t blake3_simd_degree(void) {
243
248
  #if defined(IS_X86)
244
249
  const enum cpu_feature features = get_cpu_features();
250
+ MAYBE_UNUSED(features);
245
251
  #if !defined(BLAKE3_NO_AVX512)
246
252
  if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
247
253
  return 16;
@@ -263,7 +269,7 @@ size_t blake3_simd_degree(void) {
263
269
  }
264
270
  #endif
265
271
  #endif
266
- #if defined(BLAKE3_USE_NEON)
272
+ #if BLAKE3_USE_NEON == 1
267
273
  return 4;
268
274
  #endif
269
275
  return 1;
@@ -38,6 +38,10 @@ enum blake3_flags {
38
38
  #define IS_X86_32
39
39
  #endif
40
40
 
41
+ #if defined(__aarch64__) || defined(_M_ARM64)
42
+ #define IS_AARCH64
43
+ #endif
44
+
41
45
  #if defined(IS_X86)
42
46
  #if defined(_MSC_VER)
43
47
  #include <intrin.h>
@@ -45,9 +49,18 @@ enum blake3_flags {
45
49
  #include <immintrin.h>
46
50
  #endif
47
51
 
52
+ #if !defined(BLAKE3_USE_NEON)
53
+ // If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
54
+ #if defined(IS_AARCH64)
55
+ #define BLAKE3_USE_NEON 1
56
+ #else
57
+ #define BLAKE3_USE_NEON 0
58
+ #endif
59
+ #endif
60
+
48
61
  #if defined(IS_X86)
49
62
  #define MAX_SIMD_DEGREE 16
50
- #elif defined(BLAKE3_USE_NEON)
63
+ #elif BLAKE3_USE_NEON == 1
51
64
  #define MAX_SIMD_DEGREE 4
52
65
  #else
53
66
  #define MAX_SIMD_DEGREE 1
@@ -257,7 +270,7 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
257
270
  #endif
258
271
  #endif
259
272
 
260
- #if defined(BLAKE3_USE_NEON)
273
+ #if BLAKE3_USE_NEON == 1
261
274
  void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
262
275
  size_t blocks, const uint32_t key[8],
263
276
  uint64_t counter, bool increment_counter,
@@ -2,7 +2,12 @@
2
2
 
3
3
  #include <arm_neon.h>
4
4
 
5
- // TODO: This is probably incorrect for big-endian ARM. How should that work?
5
+ #ifdef __ARM_BIG_ENDIAN
6
+ #error "This implementation only supports little-endian ARM."
7
+ // It might be that all we need for big-endian support here is to get the loads
8
+ // and stores right, but step zero would be finding a way to test it in CI.
9
+ #endif
10
+
6
11
  INLINE uint32x4_t loadu_128(const uint8_t src[16]) {
7
12
  // vld1q_u32 has alignment requirements. Don't use it.
8
13
  uint32x4_t x;
@@ -1704,7 +1704,7 @@ blake3_hash_many_sse2:
1704
1704
  pshufd xmm15, xmm11, 0x93
1705
1705
  shl rax, 0x20
1706
1706
  or rax, 0x40
1707
- movd xmm3, rax
1707
+ movq xmm3, rax
1708
1708
  movdqa xmmword ptr [rsp+0x20], xmm3
1709
1709
  movaps xmm3, xmmword ptr [rsp]
1710
1710
  movaps xmm11, xmmword ptr [rsp+0x10]
@@ -1917,7 +1917,7 @@ blake3_hash_many_sse2:
1917
1917
  movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1918
1918
  shl rax, 32
1919
1919
  or rax, 64
1920
- movd xmm12, rax
1920
+ movq xmm12, rax
1921
1921
  movdqa xmm3, xmm13
1922
1922
  punpcklqdq xmm3, xmm12
1923
1923
  movups xmm4, xmmword ptr [r8+rdx-0x40]
@@ -1715,7 +1715,7 @@ blake3_hash_many_sse2:
1715
1715
  pshufd xmm15, xmm11, 0x93
1716
1716
  shl rax, 0x20
1717
1717
  or rax, 0x40
1718
- movd xmm3, rax
1718
+ movq xmm3, rax
1719
1719
  movdqa xmmword ptr [rsp+0x20], xmm3
1720
1720
  movaps xmm3, xmmword ptr [rsp]
1721
1721
  movaps xmm11, xmmword ptr [rsp+0x10]
@@ -1928,7 +1928,7 @@ blake3_hash_many_sse2:
1928
1928
  movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1929
1929
  shl rax, 32
1930
1930
  or rax, 64
1931
- movd xmm12, rax
1931
+ movq xmm12, rax
1932
1932
  movdqa xmm3, xmm13
1933
1933
  punpcklqdq xmm3, xmm12
1934
1934
  movups xmm4, xmmword ptr [r8+rdx-0x40]
@@ -2137,10 +2137,10 @@ _blake3_compress_in_place_sse2:
2137
2137
  por xmm9, xmm8
2138
2138
  movdqa xmm8, xmm7
2139
2139
  punpcklqdq xmm8, xmm5
2140
- movdqa xmm10, xmm6
2140
+ movdqa xmm14, xmm6
2141
2141
  pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
2142
- pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2143
- por xmm8, xmm10
2142
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2143
+ por xmm8, xmm14
2144
2144
  pshufd xmm8, xmm8, 0x78
2145
2145
  punpckhdq xmm5, xmm7
2146
2146
  punpckldq xmm6, xmm5
@@ -2268,10 +2268,10 @@ blake3_compress_xof_sse2:
2268
2268
  por xmm9, xmm8
2269
2269
  movdqa xmm8, xmm7
2270
2270
  punpcklqdq xmm8, xmm5
2271
- movdqa xmm10, xmm6
2271
+ movdqa xmm14, xmm6
2272
2272
  pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
2273
- pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2274
- por xmm8, xmm10
2273
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2274
+ por xmm8, xmm14
2275
2275
  pshufd xmm8, xmm8, 0x78
2276
2276
  punpckhdq xmm5, xmm7
2277
2277
  punpckldq xmm6, xmm5
@@ -2054,8 +2054,8 @@ _blake3_compress_in_place_sse2 PROC
2054
2054
  movzx r8d, r8b
2055
2055
  shl rax, 32
2056
2056
  add r8, rax
2057
- movq xmm3, r9
2058
- movq xmm4, r8
2057
+ movd xmm3, r9
2058
+ movd xmm4, r8
2059
2059
  punpcklqdq xmm3, xmm4
2060
2060
  movups xmm4, xmmword ptr [rdx]
2061
2061
  movups xmm5, xmmword ptr [rdx+10H]
@@ -2139,10 +2139,10 @@ _blake3_compress_in_place_sse2 PROC
2139
2139
  por xmm9, xmm8
2140
2140
  movdqa xmm8, xmm7
2141
2141
  punpcklqdq xmm8, xmm5
2142
- movdqa xmm10, xmm6
2142
+ movdqa xmm14, xmm6
2143
2143
  pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
2144
- pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK]
2145
- por xmm8, xmm10
2144
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
2145
+ por xmm8, xmm14
2146
2146
  pshufd xmm8, xmm8, 78H
2147
2147
  punpckhdq xmm5, xmm7
2148
2148
  punpckldq xmm6, xmm5
@@ -2186,8 +2186,8 @@ _blake3_compress_xof_sse2 PROC
2186
2186
  mov r10, qword ptr [rsp+0A8H]
2187
2187
  shl rax, 32
2188
2188
  add r8, rax
2189
- movq xmm3, r9
2190
- movq xmm4, r8
2189
+ movd xmm3, r9
2190
+ movd xmm4, r8
2191
2191
  punpcklqdq xmm3, xmm4
2192
2192
  movups xmm4, xmmword ptr [rdx]
2193
2193
  movups xmm5, xmmword ptr [rdx+10H]
@@ -2271,10 +2271,10 @@ _blake3_compress_xof_sse2 PROC
2271
2271
  por xmm9, xmm8
2272
2272
  movdqa xmm8, xmm7
2273
2273
  punpcklqdq xmm8, xmm5
2274
- movdqa xmm10, xmm6
2274
+ movdqa xmm14, xmm6
2275
2275
  pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
2276
- pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK]
2277
- por xmm8, xmm10
2276
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
2277
+ por xmm8, xmm14
2278
2278
  pshufd xmm8, xmm8, 78H
2279
2279
  punpckhdq xmm5, xmm7
2280
2280
  punpckldq xmm6, xmm5
@@ -1817,8 +1817,8 @@ _blake3_compress_in_place_sse41 PROC
1817
1817
  movzx r8d, r8b
1818
1818
  shl rax, 32
1819
1819
  add r8, rax
1820
- movq xmm3, r9
1821
- movq xmm4, r8
1820
+ movd xmm3, r9
1821
+ movd xmm4, r8
1822
1822
  punpcklqdq xmm3, xmm4
1823
1823
  movups xmm4, xmmword ptr [rdx]
1824
1824
  movups xmm5, xmmword ptr [rdx+10H]
@@ -1938,8 +1938,8 @@ _blake3_compress_xof_sse41 PROC
1938
1938
  mov r10, qword ptr [rsp+0A8H]
1939
1939
  shl rax, 32
1940
1940
  add r8, rax
1941
- movq xmm3, r9
1942
- movq xmm4, r8
1941
+ movd xmm3, r9
1942
+ movd xmm4, r8
1943
1943
  punpcklqdq xmm3, xmm4
1944
1944
  movups xmm4, xmmword ptr [rdx]
1945
1945
  movups xmm5, xmmword ptr [rdx+10H]
@@ -2,6 +2,6 @@ require 'digest'
2
2
 
3
3
  module Digest
4
4
  class BLAKE3 < Base
5
- VERSION = "0.37.0.1"
5
+ VERSION = "1.2.0.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digest-blake3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.37.0.1
4
+ version: 1.2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Will Bryant
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-18 00:00:00.000000000 Z
11
+ date: 2022-01-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler