digest-blake3 1.2.0.0 → 1.4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ec7a77d6875b688e1cb1fbe8470cbf67278f9fe3f2f8e516bafe7abc0bf54bc4
4
- data.tar.gz: 74e13b2480eccd5c2fe3fa913a0962217c1f07c95b5db80b8303086488ee5d9f
3
+ metadata.gz: 600afca6f08145f3e28b49fbe757b661368d58dc8ec20e1778a915407dcc660a
4
+ data.tar.gz: 1cd455e9caf97fd0f514623ba6b9d7f74249071af8cb8b5377ba10de91c5eb34
5
5
  SHA512:
6
- metadata.gz: de0fb7b5ccce755c313da8e547a430950d181170c64561746890ce8855ce5e09d3232b16316f36d22320ae5d23cf7904e8221a26358e96d9566ba247ef613214
7
- data.tar.gz: 33e15e9469128ba227dbe6b57d9c44fe55078b9031975bf9db783a469c93342c7ccbf38b763ddfed7f09c941a42a6df89302cfda0e38b0ad4967a12acac4b18a
6
+ metadata.gz: d515228fab5f92576d9b1f67d66ffff97623f154dcab9a1dcb140b9e69884325797991d1f87d7f5cb26cb6397a86e989a1c1f2163b478641efdc4b85b5772026
7
+ data.tar.gz: ed75418dda098a8700554b871189c9995e1e6e4969d86cc8d9afd46d8439ecf985fb111f1a6d6adb9173f68cb33c7c261b35b0820439c48a92e354da2f662e35
data/Gemfile.lock CHANGED
@@ -1,13 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- digest-blake3 (1.2.0.0)
4
+ digest-blake3 (1.3.3.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
- minitest (5.14.0)
10
- rake (13.0.1)
9
+ minitest (5.16.3)
10
+ rake (13.0.6)
11
11
 
12
12
  PLATFORMS
13
13
  ruby
@@ -19,4 +19,4 @@ DEPENDENCIES
19
19
  rake
20
20
 
21
21
  BUNDLED WITH
22
- 1.17.3
22
+ 2.3.1
@@ -246,7 +246,7 @@ INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
246
246
 
247
247
  // The wide helper function returns (writes out) an array of chaining values
248
248
  // and returns the length of that array. The number of chaining values returned
249
- // is the dyanmically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
249
+ // is the dynamically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
250
250
  // if the input is shorter than that many chunks. The reason for maintaining a
251
251
  // wide array of chaining values going back up the tree, is to allow the
252
252
  // implementation to hash as many parents in parallel as possible.
@@ -254,7 +254,7 @@ INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
254
254
  // As a special case when the SIMD degree is 1, this function will still return
255
255
  // at least 2 outputs. This guarantees that this function doesn't perform the
256
256
  // root compression. (If it did, it would use the wrong flags, and also we
257
- // wouldn't be able to implement exendable ouput.) Note that this function is
257
+ // wouldn't be able to implement exendable output.) Note that this function is
258
258
  // not used when the whole input is only 1 chunk long; that's a different
259
259
  // codepath.
260
260
  //
@@ -609,3 +609,8 @@ void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
609
609
  }
610
610
  output_root_bytes(&output, seek, out, out_len);
611
611
  }
612
+
613
+ void blake3_hasher_reset(blake3_hasher *self) {
614
+ chunk_state_reset(&self->chunk, self->key, 0);
615
+ self->cv_stack_len = 0;
616
+ }
@@ -8,7 +8,7 @@
8
8
  extern "C" {
9
9
  #endif
10
10
 
11
- #define BLAKE3_VERSION_STRING "1.2.0"
11
+ #define BLAKE3_VERSION_STRING "1.3.3"
12
12
  #define BLAKE3_KEY_LEN 32
13
13
  #define BLAKE3_OUT_LEN 32
14
14
  #define BLAKE3_BLOCK_LEN 64
@@ -51,6 +51,7 @@ void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
51
51
  size_t out_len);
52
52
  void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
53
53
  uint8_t *out, size_t out_len);
54
+ void blake3_hasher_reset(blake3_hasher *self);
54
55
 
55
56
  #ifdef __cplusplus
56
57
  }
@@ -208,7 +208,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
208
208
  out[14] = loadu(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
209
209
  out[15] = loadu(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
210
210
  for (size_t i = 0; i < 8; ++i) {
211
- _mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
211
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
212
212
  }
213
213
  transpose_vecs(&out[0]);
214
214
  transpose_vecs(&out[8]);
@@ -219,14 +219,15 @@ INLINE void load_counters(uint64_t counter, bool increment_counter,
219
219
  const __m256i mask = _mm256_set1_epi32(-(int32_t)increment_counter);
220
220
  const __m256i add0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
221
221
  const __m256i add1 = _mm256_and_si256(mask, add0);
222
- __m256i l = _mm256_add_epi32(_mm256_set1_epi32(counter), add1);
222
+ __m256i l = _mm256_add_epi32(_mm256_set1_epi32((int32_t)counter), add1);
223
223
  __m256i carry = _mm256_cmpgt_epi32(_mm256_xor_si256(add1, _mm256_set1_epi32(0x80000000)),
224
224
  _mm256_xor_si256( l, _mm256_set1_epi32(0x80000000)));
225
- __m256i h = _mm256_sub_epi32(_mm256_set1_epi32(counter >> 32), carry);
225
+ __m256i h = _mm256_sub_epi32(_mm256_set1_epi32((int32_t)(counter >> 32)), carry);
226
226
  *out_lo = l;
227
227
  *out_hi = h;
228
228
  }
229
229
 
230
+ static
230
231
  void blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks,
231
232
  const uint32_t key[8], uint64_t counter,
232
233
  bool increment_counter, uint8_t flags,
@@ -468,7 +468,7 @@ INLINE void transpose_msg_vecs4(const uint8_t *const *inputs,
468
468
  out[14] = loadu_128(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
469
469
  out[15] = loadu_128(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
470
470
  for (size_t i = 0; i < 4; ++i) {
471
- _mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
471
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
472
472
  }
473
473
  transpose_vecs_128(&out[0]);
474
474
  transpose_vecs_128(&out[4]);
@@ -488,6 +488,7 @@ INLINE void load_counters4(uint64_t counter, bool increment_counter,
488
488
  *out_hi = _mm256_cvtepi64_epi32(_mm256_srli_epi64(counters, 32));
489
489
  }
490
490
 
491
+ static
491
492
  void blake3_hash4_avx512(const uint8_t *const *inputs, size_t blocks,
492
493
  const uint32_t key[8], uint64_t counter,
493
494
  bool increment_counter, uint8_t flags,
@@ -724,7 +725,7 @@ INLINE void transpose_msg_vecs8(const uint8_t *const *inputs,
724
725
  out[14] = loadu_256(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
725
726
  out[15] = loadu_256(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
726
727
  for (size_t i = 0; i < 8; ++i) {
727
- _mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
728
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
728
729
  }
729
730
  transpose_vecs_256(&out[0]);
730
731
  transpose_vecs_256(&out[8]);
@@ -742,6 +743,7 @@ INLINE void load_counters8(uint64_t counter, bool increment_counter,
742
743
  *out_hi = _mm512_cvtepi64_epi32(_mm512_srli_epi64(counters, 32));
743
744
  }
744
745
 
746
+ static
745
747
  void blake3_hash8_avx512(const uint8_t *const *inputs, size_t blocks,
746
748
  const uint32_t key[8], uint64_t counter,
747
749
  bool increment_counter, uint8_t flags,
@@ -1037,7 +1039,7 @@ INLINE void transpose_msg_vecs16(const uint8_t *const *inputs,
1037
1039
  out[14] = loadu_512(&inputs[14][block_offset]);
1038
1040
  out[15] = loadu_512(&inputs[15][block_offset]);
1039
1041
  for (size_t i = 0; i < 16; ++i) {
1040
- _mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
1042
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
1041
1043
  }
1042
1044
  transpose_vecs_512(out);
1043
1045
  }
@@ -1045,15 +1047,29 @@ INLINE void transpose_msg_vecs16(const uint8_t *const *inputs,
1045
1047
  INLINE void load_counters16(uint64_t counter, bool increment_counter,
1046
1048
  __m512i *out_lo, __m512i *out_hi) {
1047
1049
  const __m512i mask = _mm512_set1_epi32(-(int32_t)increment_counter);
1048
- const __m512i add0 = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
1049
- const __m512i add1 = _mm512_and_si512(mask, add0);
1050
- __m512i l = _mm512_add_epi32(_mm512_set1_epi32(counter), add1);
1051
- __mmask16 carry = _mm512_cmp_epu32_mask(l, add1, _MM_CMPINT_LT);
1052
- __m512i h = _mm512_mask_add_epi32(_mm512_set1_epi32(counter >> 32), carry, _mm512_set1_epi32(counter >> 32), _mm512_set1_epi32(1));
1053
- *out_lo = l;
1054
- *out_hi = h;
1050
+ const __m512i deltas = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
1051
+ const __m512i masked_deltas = _mm512_and_si512(deltas, mask);
1052
+ const __m512i low_words = _mm512_add_epi32(
1053
+ _mm512_set1_epi32((int32_t)counter),
1054
+ masked_deltas);
1055
+ // The carry bit is 1 if the high bit of the word was 1 before addition and is
1056
+ // 0 after.
1057
+ // NOTE: It would be a bit more natural to use _mm512_cmp_epu32_mask to
1058
+ // compute the carry bits here, and originally we did, but that intrinsic is
1059
+ // broken under GCC 5.4. See https://github.com/BLAKE3-team/BLAKE3/issues/271.
1060
+ const __m512i carries = _mm512_srli_epi32(
1061
+ _mm512_andnot_si512(
1062
+ low_words, // 0 after (gets inverted by andnot)
1063
+ _mm512_set1_epi32((int32_t)counter)), // and 1 before
1064
+ 31);
1065
+ const __m512i high_words = _mm512_add_epi32(
1066
+ _mm512_set1_epi32((int32_t)(counter >> 32)),
1067
+ carries);
1068
+ *out_lo = low_words;
1069
+ *out_hi = high_words;
1055
1070
  }
1056
1071
 
1072
+ static
1057
1073
  void blake3_hash16_avx512(const uint8_t *const *inputs, size_t blocks,
1058
1074
  const uint32_t key[8], uint64_t counter,
1059
1075
  bool increment_counter, uint8_t flags,
@@ -10,14 +10,14 @@
10
10
  #elif defined(__GNUC__)
11
11
  #include <immintrin.h>
12
12
  #else
13
- #error "Unimplemented!"
13
+ #undef IS_X86 /* Unimplemented! */
14
14
  #endif
15
15
  #endif
16
16
 
17
17
  #define MAYBE_UNUSED(x) (void)((x))
18
18
 
19
19
  #if defined(IS_X86)
20
- static uint64_t xgetbv() {
20
+ static uint64_t xgetbv(void) {
21
21
  #if defined(_MSC_VER)
22
22
  return _xgetbv(0);
23
23
  #else
@@ -82,7 +82,7 @@ static /* Allow the variable to be controlled manually for testing */
82
82
  static
83
83
  #endif
84
84
  enum cpu_feature
85
- get_cpu_features() {
85
+ get_cpu_features(void) {
86
86
 
87
87
  if (g_cpu_features != UNDEFINED) {
88
88
  return g_cpu_features;
@@ -46,7 +46,6 @@ enum blake3_flags {
46
46
  #if defined(_MSC_VER)
47
47
  #include <intrin.h>
48
48
  #endif
49
- #include <immintrin.h>
50
49
  #endif
51
50
 
52
51
  #if !defined(BLAKE3_USE_NEON)
@@ -96,11 +95,11 @@ static unsigned int highest_one(uint64_t x) {
96
95
  #elif defined(_MSC_VER) && defined(IS_X86_32)
97
96
  if(x >> 32) {
98
97
  unsigned long index;
99
- _BitScanReverse(&index, x >> 32);
98
+ _BitScanReverse(&index, (unsigned long)(x >> 32));
100
99
  return 32 + index;
101
100
  } else {
102
101
  unsigned long index;
103
- _BitScanReverse(&index, x);
102
+ _BitScanReverse(&index, (unsigned long)x);
104
103
  return index;
105
104
  }
106
105
  #else
@@ -78,7 +78,7 @@ INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
78
78
  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
79
79
  }
80
80
 
81
- INLINE __m128i blend_epi16(__m128i a, __m128i b, const int imm8) {
81
+ INLINE __m128i blend_epi16(__m128i a, __m128i b, const int16_t imm8) {
82
82
  const __m128i bits = _mm_set_epi16(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
83
83
  __m128i mask = _mm_set1_epi16(imm8);
84
84
  mask = _mm_and_si128(mask, bits);
@@ -435,7 +435,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
435
435
  out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
436
436
  out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
437
437
  for (size_t i = 0; i < 4; ++i) {
438
- _mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
438
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
439
439
  }
440
440
  transpose_vecs(&out[0]);
441
441
  transpose_vecs(&out[4]);
@@ -448,14 +448,15 @@ INLINE void load_counters(uint64_t counter, bool increment_counter,
448
448
  const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
449
449
  const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
450
450
  const __m128i add1 = _mm_and_si128(mask, add0);
451
- __m128i l = _mm_add_epi32(_mm_set1_epi32(counter), add1);
451
+ __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
452
452
  __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)),
453
453
  _mm_xor_si128( l, _mm_set1_epi32(0x80000000)));
454
- __m128i h = _mm_sub_epi32(_mm_set1_epi32(counter >> 32), carry);
454
+ __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
455
455
  *out_lo = l;
456
456
  *out_hi = h;
457
457
  }
458
458
 
459
+ static
459
460
  void blake3_hash4_sse2(const uint8_t *const *inputs, size_t blocks,
460
461
  const uint32_t key[8], uint64_t counter,
461
462
  bool increment_counter, uint8_t flags,
@@ -429,7 +429,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
429
429
  out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
430
430
  out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
431
431
  for (size_t i = 0; i < 4; ++i) {
432
- _mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
432
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
433
433
  }
434
434
  transpose_vecs(&out[0]);
435
435
  transpose_vecs(&out[4]);
@@ -442,14 +442,15 @@ INLINE void load_counters(uint64_t counter, bool increment_counter,
442
442
  const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
443
443
  const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
444
444
  const __m128i add1 = _mm_and_si128(mask, add0);
445
- __m128i l = _mm_add_epi32(_mm_set1_epi32(counter), add1);
445
+ __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
446
446
  __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)),
447
447
  _mm_xor_si128( l, _mm_set1_epi32(0x80000000)));
448
- __m128i h = _mm_sub_epi32(_mm_set1_epi32(counter >> 32), carry);
448
+ __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
449
449
  *out_lo = l;
450
450
  *out_hi = h;
451
451
  }
452
452
 
453
+ static
453
454
  void blake3_hash4_sse41(const uint8_t *const *inputs, size_t blocks,
454
455
  const uint32_t key[8], uint64_t counter,
455
456
  bool increment_counter, uint8_t flags,
@@ -34,17 +34,19 @@ def check_supported_flags(flags, obj_if_enabled, def_if_disabled)
34
34
  end
35
35
  end
36
36
 
37
- check_supported_flags("-msse2", "blake3_sse2.o", "-DBLAKE3_NO_SSE2")
38
- check_supported_flags("-msse4.1", "blake3_sse41.o", "-DBLAKE3_NO_SSE41")
39
- check_supported_flags("-mavx2", "blake3_avx2.o", "-DBLAKE3_NO_AVX2")
40
- check_supported_flags("-mavx512f -mavx512vl -mavx512bw", "blake3_avx512.o", "-DBLAKE3_NO_AVX512")
37
+ unless RUBY_PLATFORM.include? 'arm64' or RUBY_PLATFORM.include? 'aarch64'
38
+ check_supported_flags("-msse2", "blake3_sse2.o", "-DBLAKE3_NO_SSE2")
39
+ check_supported_flags("-msse4.1", "blake3_sse41.o", "-DBLAKE3_NO_SSE41")
40
+ check_supported_flags("-mavx2", "blake3_avx2.o", "-DBLAKE3_NO_AVX2")
41
+ check_supported_flags("-mavx512f -mavx512vl -mavx512bw", "blake3_avx512.o", "-DBLAKE3_NO_AVX512")
42
+ end
41
43
 
42
44
  if have_header("arm_neon.h")
43
45
  $objs << "blake3_neon.o"
44
46
  $defs << "-DBLAKE3_USE_NEON"
45
47
  end
46
48
 
47
- create_makefile("digest/blake3") do |conf|
49
+ create_makefile("digest/blake3/blake3") do |conf|
48
50
  # annoyingly, we have to repeat this line from the default output, so that it appears above the
49
51
  # defines we add below and therefore becomes the default target. otherwise running 'make' with
50
52
  # no arguments builds the first of our .o files instead of the library.
@@ -2,6 +2,6 @@ require 'digest'
2
2
 
3
3
  module Digest
4
4
  class BLAKE3 < Base
5
- VERSION = "1.2.0.0"
5
+ VERSION = "1.4.0.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digest-blake3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0.0
4
+ version: 1.4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Will Bryant
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-07 00:00:00.000000000 Z
11
+ date: 2023-09-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -116,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0'
118
118
  requirements: []
119
- rubygems_version: 3.0.3
119
+ rubygems_version: 3.4.10
120
120
  signing_key:
121
121
  specification_version: 4
122
122
  summary: BLAKE3 for Ruby