digest-blake3 0.37.0.1 → 1.2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4dc981436633bde6ba4fb278252d8a4a1ba58d039d0b1c8c794e36c4e47fa4a0
4
- data.tar.gz: ae40be72a0252730792f3e82a00da765546c9606d91405ff69d3bad078ad307f
3
+ metadata.gz: ec7a77d6875b688e1cb1fbe8470cbf67278f9fe3f2f8e516bafe7abc0bf54bc4
4
+ data.tar.gz: 74e13b2480eccd5c2fe3fa913a0962217c1f07c95b5db80b8303086488ee5d9f
5
5
  SHA512:
6
- metadata.gz: c18ca69b1f4b47ac8308ee00cc6db861eb48bd3921a85c291cb09ea595534b1a476988453c1931cb9982e9f96e7d14e4fa4356cbecaa9c584252c9b7ad30ac62
7
- data.tar.gz: b9cf5f04daf5d83a797191caa2f2c30e068ddd6b771d887acb96963b93550171e77ea81128fffc316b49641391a4f245a8484b8b909d53502be3a2fb3170ad76
6
+ metadata.gz: de0fb7b5ccce755c313da8e547a430950d181170c64561746890ce8855ce5e09d3232b16316f36d22320ae5d23cf7904e8221a26358e96d9566ba247ef613214
7
+ data.tar.gz: 33e15e9469128ba227dbe6b57d9c44fe55078b9031975bf9db783a469c93342c7ccbf38b763ddfed7f09c941a42a6df89302cfda0e38b0ad4967a12acac4b18a
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- digest-blake3 (0.37.0)
4
+ digest-blake3 (1.2.0.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -5,6 +5,8 @@
5
5
  #include "blake3.h"
6
6
  #include "blake3_impl.h"
7
7
 
8
+ const char *blake3_version(void) { return BLAKE3_VERSION_STRING; }
9
+
8
10
  INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
9
11
  uint8_t flags) {
10
12
  memcpy(self->cv, key, BLAKE3_KEY_LEN);
@@ -338,12 +340,18 @@ INLINE void compress_subtree_to_parent_node(
338
340
  uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
339
341
  size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
340
342
  chunk_counter, flags, cv_array);
343
+ assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);
341
344
 
342
345
  // If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
343
346
  // compress_subtree_wide() returns more than 2 chaining values. Condense
344
347
  // them into 2 by forming parent nodes repeatedly.
345
348
  uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
346
- while (num_cvs > 2) {
349
+ // The second half of this loop condition is always true, and we just
350
+ // asserted it above. But GCC can't tell that it's always true, and if NDEBUG
351
+ // is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious
352
+ // warnings here. GCC 8.5 is particularly sensitive, so if you're changing
353
+ // this code, test it against that version.
354
+ while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) {
347
355
  num_cvs =
348
356
  compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
349
357
  memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
@@ -8,12 +8,12 @@
8
8
  extern "C" {
9
9
  #endif
10
10
 
11
+ #define BLAKE3_VERSION_STRING "1.2.0"
11
12
  #define BLAKE3_KEY_LEN 32
12
13
  #define BLAKE3_OUT_LEN 32
13
14
  #define BLAKE3_BLOCK_LEN 64
14
15
  #define BLAKE3_CHUNK_LEN 1024
15
16
  #define BLAKE3_MAX_DEPTH 54
16
- #define BLAKE3_MAX_SIMD_DEGREE 16
17
17
 
18
18
  // This struct is a private implementation detail. It has to be here because
19
19
  // it's part of blake3_hasher below.
@@ -38,11 +38,12 @@ typedef struct {
38
38
  uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
39
39
  } blake3_hasher;
40
40
 
41
+ const char *blake3_version(void);
41
42
  void blake3_hasher_init(blake3_hasher *self);
42
43
  void blake3_hasher_init_keyed(blake3_hasher *self,
43
44
  const uint8_t key[BLAKE3_KEY_LEN]);
44
45
  void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
45
- void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
46
+ void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
46
47
  size_t context_len);
47
48
  void blake3_hasher_update(blake3_hasher *self, const void *input,
48
49
  size_t input_len);
@@ -14,6 +14,8 @@
14
14
  #endif
15
15
  #endif
16
16
 
17
+ #define MAYBE_UNUSED(x) (void)((x))
18
+
17
19
  #if defined(IS_X86)
18
20
  static uint64_t xgetbv() {
19
21
  #if defined(_MSC_VER)
@@ -137,6 +139,7 @@ void blake3_compress_in_place(uint32_t cv[8],
137
139
  uint8_t flags) {
138
140
  #if defined(IS_X86)
139
141
  const enum cpu_feature features = get_cpu_features();
142
+ MAYBE_UNUSED(features);
140
143
  #if !defined(BLAKE3_NO_AVX512)
141
144
  if (features & AVX512VL) {
142
145
  blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
@@ -165,6 +168,7 @@ void blake3_compress_xof(const uint32_t cv[8],
165
168
  uint8_t out[64]) {
166
169
  #if defined(IS_X86)
167
170
  const enum cpu_feature features = get_cpu_features();
171
+ MAYBE_UNUSED(features);
168
172
  #if !defined(BLAKE3_NO_AVX512)
169
173
  if (features & AVX512VL) {
170
174
  blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
@@ -193,6 +197,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
193
197
  uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
194
198
  #if defined(IS_X86)
195
199
  const enum cpu_feature features = get_cpu_features();
200
+ MAYBE_UNUSED(features);
196
201
  #if !defined(BLAKE3_NO_AVX512)
197
202
  if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
198
203
  blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
@@ -227,7 +232,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
227
232
  #endif
228
233
  #endif
229
234
 
230
- #if defined(BLAKE3_USE_NEON)
235
+ #if BLAKE3_USE_NEON == 1
231
236
  blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
232
237
  increment_counter, flags, flags_start, flags_end, out);
233
238
  return;
@@ -242,6 +247,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
242
247
  size_t blake3_simd_degree(void) {
243
248
  #if defined(IS_X86)
244
249
  const enum cpu_feature features = get_cpu_features();
250
+ MAYBE_UNUSED(features);
245
251
  #if !defined(BLAKE3_NO_AVX512)
246
252
  if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
247
253
  return 16;
@@ -263,7 +269,7 @@ size_t blake3_simd_degree(void) {
263
269
  }
264
270
  #endif
265
271
  #endif
266
- #if defined(BLAKE3_USE_NEON)
272
+ #if BLAKE3_USE_NEON == 1
267
273
  return 4;
268
274
  #endif
269
275
  return 1;
@@ -38,6 +38,10 @@ enum blake3_flags {
38
38
  #define IS_X86_32
39
39
  #endif
40
40
 
41
+ #if defined(__aarch64__) || defined(_M_ARM64)
42
+ #define IS_AARCH64
43
+ #endif
44
+
41
45
  #if defined(IS_X86)
42
46
  #if defined(_MSC_VER)
43
47
  #include <intrin.h>
@@ -45,9 +49,18 @@ enum blake3_flags {
45
49
  #include <immintrin.h>
46
50
  #endif
47
51
 
52
+ #if !defined(BLAKE3_USE_NEON)
53
+ // If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
54
+ #if defined(IS_AARCH64)
55
+ #define BLAKE3_USE_NEON 1
56
+ #else
57
+ #define BLAKE3_USE_NEON 0
58
+ #endif
59
+ #endif
60
+
48
61
  #if defined(IS_X86)
49
62
  #define MAX_SIMD_DEGREE 16
50
- #elif defined(BLAKE3_USE_NEON)
63
+ #elif BLAKE3_USE_NEON == 1
51
64
  #define MAX_SIMD_DEGREE 4
52
65
  #else
53
66
  #define MAX_SIMD_DEGREE 1
@@ -257,7 +270,7 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
257
270
  #endif
258
271
  #endif
259
272
 
260
- #if defined(BLAKE3_USE_NEON)
273
+ #if BLAKE3_USE_NEON == 1
261
274
  void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
262
275
  size_t blocks, const uint32_t key[8],
263
276
  uint64_t counter, bool increment_counter,
@@ -2,7 +2,12 @@
2
2
 
3
3
  #include <arm_neon.h>
4
4
 
5
- // TODO: This is probably incorrect for big-endian ARM. How should that work?
5
+ #ifdef __ARM_BIG_ENDIAN
6
+ #error "This implementation only supports little-endian ARM."
7
+ // It might be that all we need for big-endian support here is to get the loads
8
+ // and stores right, but step zero would be finding a way to test it in CI.
9
+ #endif
10
+
6
11
  INLINE uint32x4_t loadu_128(const uint8_t src[16]) {
7
12
  // vld1q_u32 has alignment requirements. Don't use it.
8
13
  uint32x4_t x;
@@ -1704,7 +1704,7 @@ blake3_hash_many_sse2:
1704
1704
  pshufd xmm15, xmm11, 0x93
1705
1705
  shl rax, 0x20
1706
1706
  or rax, 0x40
1707
- movd xmm3, rax
1707
+ movq xmm3, rax
1708
1708
  movdqa xmmword ptr [rsp+0x20], xmm3
1709
1709
  movaps xmm3, xmmword ptr [rsp]
1710
1710
  movaps xmm11, xmmword ptr [rsp+0x10]
@@ -1917,7 +1917,7 @@ blake3_hash_many_sse2:
1917
1917
  movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1918
1918
  shl rax, 32
1919
1919
  or rax, 64
1920
- movd xmm12, rax
1920
+ movq xmm12, rax
1921
1921
  movdqa xmm3, xmm13
1922
1922
  punpcklqdq xmm3, xmm12
1923
1923
  movups xmm4, xmmword ptr [r8+rdx-0x40]
@@ -1715,7 +1715,7 @@ blake3_hash_many_sse2:
1715
1715
  pshufd xmm15, xmm11, 0x93
1716
1716
  shl rax, 0x20
1717
1717
  or rax, 0x40
1718
- movd xmm3, rax
1718
+ movq xmm3, rax
1719
1719
  movdqa xmmword ptr [rsp+0x20], xmm3
1720
1720
  movaps xmm3, xmmword ptr [rsp]
1721
1721
  movaps xmm11, xmmword ptr [rsp+0x10]
@@ -1928,7 +1928,7 @@ blake3_hash_many_sse2:
1928
1928
  movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1929
1929
  shl rax, 32
1930
1930
  or rax, 64
1931
- movd xmm12, rax
1931
+ movq xmm12, rax
1932
1932
  movdqa xmm3, xmm13
1933
1933
  punpcklqdq xmm3, xmm12
1934
1934
  movups xmm4, xmmword ptr [r8+rdx-0x40]
@@ -2137,10 +2137,10 @@ _blake3_compress_in_place_sse2:
2137
2137
  por xmm9, xmm8
2138
2138
  movdqa xmm8, xmm7
2139
2139
  punpcklqdq xmm8, xmm5
2140
- movdqa xmm10, xmm6
2140
+ movdqa xmm14, xmm6
2141
2141
  pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
2142
- pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2143
- por xmm8, xmm10
2142
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2143
+ por xmm8, xmm14
2144
2144
  pshufd xmm8, xmm8, 0x78
2145
2145
  punpckhdq xmm5, xmm7
2146
2146
  punpckldq xmm6, xmm5
@@ -2268,10 +2268,10 @@ blake3_compress_xof_sse2:
2268
2268
  por xmm9, xmm8
2269
2269
  movdqa xmm8, xmm7
2270
2270
  punpcklqdq xmm8, xmm5
2271
- movdqa xmm10, xmm6
2271
+ movdqa xmm14, xmm6
2272
2272
  pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
2273
- pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2274
- por xmm8, xmm10
2273
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2274
+ por xmm8, xmm14
2275
2275
  pshufd xmm8, xmm8, 0x78
2276
2276
  punpckhdq xmm5, xmm7
2277
2277
  punpckldq xmm6, xmm5
@@ -2054,8 +2054,8 @@ _blake3_compress_in_place_sse2 PROC
2054
2054
  movzx r8d, r8b
2055
2055
  shl rax, 32
2056
2056
  add r8, rax
2057
- movq xmm3, r9
2058
- movq xmm4, r8
2057
+ movd xmm3, r9
2058
+ movd xmm4, r8
2059
2059
  punpcklqdq xmm3, xmm4
2060
2060
  movups xmm4, xmmword ptr [rdx]
2061
2061
  movups xmm5, xmmword ptr [rdx+10H]
@@ -2139,10 +2139,10 @@ _blake3_compress_in_place_sse2 PROC
2139
2139
  por xmm9, xmm8
2140
2140
  movdqa xmm8, xmm7
2141
2141
  punpcklqdq xmm8, xmm5
2142
- movdqa xmm10, xmm6
2142
+ movdqa xmm14, xmm6
2143
2143
  pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
2144
- pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK]
2145
- por xmm8, xmm10
2144
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
2145
+ por xmm8, xmm14
2146
2146
  pshufd xmm8, xmm8, 78H
2147
2147
  punpckhdq xmm5, xmm7
2148
2148
  punpckldq xmm6, xmm5
@@ -2186,8 +2186,8 @@ _blake3_compress_xof_sse2 PROC
2186
2186
  mov r10, qword ptr [rsp+0A8H]
2187
2187
  shl rax, 32
2188
2188
  add r8, rax
2189
- movq xmm3, r9
2190
- movq xmm4, r8
2189
+ movd xmm3, r9
2190
+ movd xmm4, r8
2191
2191
  punpcklqdq xmm3, xmm4
2192
2192
  movups xmm4, xmmword ptr [rdx]
2193
2193
  movups xmm5, xmmword ptr [rdx+10H]
@@ -2271,10 +2271,10 @@ _blake3_compress_xof_sse2 PROC
2271
2271
  por xmm9, xmm8
2272
2272
  movdqa xmm8, xmm7
2273
2273
  punpcklqdq xmm8, xmm5
2274
- movdqa xmm10, xmm6
2274
+ movdqa xmm14, xmm6
2275
2275
  pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
2276
- pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK]
2277
- por xmm8, xmm10
2276
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
2277
+ por xmm8, xmm14
2278
2278
  pshufd xmm8, xmm8, 78H
2279
2279
  punpckhdq xmm5, xmm7
2280
2280
  punpckldq xmm6, xmm5
@@ -1817,8 +1817,8 @@ _blake3_compress_in_place_sse41 PROC
1817
1817
  movzx r8d, r8b
1818
1818
  shl rax, 32
1819
1819
  add r8, rax
1820
- movq xmm3, r9
1821
- movq xmm4, r8
1820
+ movd xmm3, r9
1821
+ movd xmm4, r8
1822
1822
  punpcklqdq xmm3, xmm4
1823
1823
  movups xmm4, xmmword ptr [rdx]
1824
1824
  movups xmm5, xmmword ptr [rdx+10H]
@@ -1938,8 +1938,8 @@ _blake3_compress_xof_sse41 PROC
1938
1938
  mov r10, qword ptr [rsp+0A8H]
1939
1939
  shl rax, 32
1940
1940
  add r8, rax
1941
- movq xmm3, r9
1942
- movq xmm4, r8
1941
+ movd xmm3, r9
1942
+ movd xmm4, r8
1943
1943
  punpcklqdq xmm3, xmm4
1944
1944
  movups xmm4, xmmword ptr [rdx]
1945
1945
  movups xmm5, xmmword ptr [rdx+10H]
@@ -2,6 +2,6 @@ require 'digest'
2
2
 
3
3
  module Digest
4
4
  class BLAKE3 < Base
5
- VERSION = "0.37.0.1"
5
+ VERSION = "1.2.0.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digest-blake3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.37.0.1
4
+ version: 1.2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Will Bryant
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-18 00:00:00.000000000 Z
11
+ date: 2022-01-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler