digest-blake3 1.2.0.0 → 1.3.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ec7a77d6875b688e1cb1fbe8470cbf67278f9fe3f2f8e516bafe7abc0bf54bc4
4
- data.tar.gz: 74e13b2480eccd5c2fe3fa913a0962217c1f07c95b5db80b8303086488ee5d9f
3
+ metadata.gz: 6e9f7d8e4619bac26fee8eceb10b221935755588c05f333e53275080c61b83a5
4
+ data.tar.gz: fecaddd526e8bf374d675e80d49aefbcbed12dc491a6be50ea9beea335b943fd
5
5
  SHA512:
6
- metadata.gz: de0fb7b5ccce755c313da8e547a430950d181170c64561746890ce8855ce5e09d3232b16316f36d22320ae5d23cf7904e8221a26358e96d9566ba247ef613214
7
- data.tar.gz: 33e15e9469128ba227dbe6b57d9c44fe55078b9031975bf9db783a469c93342c7ccbf38b763ddfed7f09c941a42a6df89302cfda0e38b0ad4967a12acac4b18a
6
+ metadata.gz: 6ecd8cdc8f5b320f152e4d89c263b68803f63ec609235965eebc11a8093d146bb7223b86302c24c0dc65abb69cbf6040e5635704fb07caeb115c8f76effe38c2
7
+ data.tar.gz: ec642c10fb95e51620ad78fa2915d467dc665b7c1683f8f30c56ab9ab5560a98983e6afdd4fca1c18d2088f0bd0e58248ea30a05230bbed48ff95d74851cd056
data/Gemfile.lock CHANGED
@@ -1,13 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- digest-blake3 (1.2.0.0)
4
+ digest-blake3 (1.3.3.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
- minitest (5.14.0)
10
- rake (13.0.1)
9
+ minitest (5.16.3)
10
+ rake (13.0.6)
11
11
 
12
12
  PLATFORMS
13
13
  ruby
@@ -19,4 +19,4 @@ DEPENDENCIES
19
19
  rake
20
20
 
21
21
  BUNDLED WITH
22
- 1.17.3
22
+ 2.3.1
@@ -246,7 +246,7 @@ INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
246
246
 
247
247
  // The wide helper function returns (writes out) an array of chaining values
248
248
  // and returns the length of that array. The number of chaining values returned
249
- // is the dyanmically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
249
+ // is the dynamically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
250
250
  // if the input is shorter than that many chunks. The reason for maintaining a
251
251
  // wide array of chaining values going back up the tree, is to allow the
252
252
  // implementation to hash as many parents in parallel as possible.
@@ -254,7 +254,7 @@ INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
254
254
  // As a special case when the SIMD degree is 1, this function will still return
255
255
  // at least 2 outputs. This guarantees that this function doesn't perform the
256
256
  // root compression. (If it did, it would use the wrong flags, and also we
257
- // wouldn't be able to implement exendable ouput.) Note that this function is
257
+ // wouldn't be able to implement exendable output.) Note that this function is
258
258
  // not used when the whole input is only 1 chunk long; that's a different
259
259
  // codepath.
260
260
  //
@@ -609,3 +609,8 @@ void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
609
609
  }
610
610
  output_root_bytes(&output, seek, out, out_len);
611
611
  }
612
+
613
+ void blake3_hasher_reset(blake3_hasher *self) {
614
+ chunk_state_reset(&self->chunk, self->key, 0);
615
+ self->cv_stack_len = 0;
616
+ }
@@ -8,7 +8,7 @@
8
8
  extern "C" {
9
9
  #endif
10
10
 
11
- #define BLAKE3_VERSION_STRING "1.2.0"
11
+ #define BLAKE3_VERSION_STRING "1.3.3"
12
12
  #define BLAKE3_KEY_LEN 32
13
13
  #define BLAKE3_OUT_LEN 32
14
14
  #define BLAKE3_BLOCK_LEN 64
@@ -51,6 +51,7 @@ void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
51
51
  size_t out_len);
52
52
  void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
53
53
  uint8_t *out, size_t out_len);
54
+ void blake3_hasher_reset(blake3_hasher *self);
54
55
 
55
56
  #ifdef __cplusplus
56
57
  }
@@ -208,7 +208,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
208
208
  out[14] = loadu(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
209
209
  out[15] = loadu(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
210
210
  for (size_t i = 0; i < 8; ++i) {
211
- _mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
211
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
212
212
  }
213
213
  transpose_vecs(&out[0]);
214
214
  transpose_vecs(&out[8]);
@@ -219,14 +219,15 @@ INLINE void load_counters(uint64_t counter, bool increment_counter,
219
219
  const __m256i mask = _mm256_set1_epi32(-(int32_t)increment_counter);
220
220
  const __m256i add0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
221
221
  const __m256i add1 = _mm256_and_si256(mask, add0);
222
- __m256i l = _mm256_add_epi32(_mm256_set1_epi32(counter), add1);
222
+ __m256i l = _mm256_add_epi32(_mm256_set1_epi32((int32_t)counter), add1);
223
223
  __m256i carry = _mm256_cmpgt_epi32(_mm256_xor_si256(add1, _mm256_set1_epi32(0x80000000)),
224
224
  _mm256_xor_si256( l, _mm256_set1_epi32(0x80000000)));
225
- __m256i h = _mm256_sub_epi32(_mm256_set1_epi32(counter >> 32), carry);
225
+ __m256i h = _mm256_sub_epi32(_mm256_set1_epi32((int32_t)(counter >> 32)), carry);
226
226
  *out_lo = l;
227
227
  *out_hi = h;
228
228
  }
229
229
 
230
+ static
230
231
  void blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks,
231
232
  const uint32_t key[8], uint64_t counter,
232
233
  bool increment_counter, uint8_t flags,
@@ -468,7 +468,7 @@ INLINE void transpose_msg_vecs4(const uint8_t *const *inputs,
468
468
  out[14] = loadu_128(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
469
469
  out[15] = loadu_128(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
470
470
  for (size_t i = 0; i < 4; ++i) {
471
- _mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
471
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
472
472
  }
473
473
  transpose_vecs_128(&out[0]);
474
474
  transpose_vecs_128(&out[4]);
@@ -488,6 +488,7 @@ INLINE void load_counters4(uint64_t counter, bool increment_counter,
488
488
  *out_hi = _mm256_cvtepi64_epi32(_mm256_srli_epi64(counters, 32));
489
489
  }
490
490
 
491
+ static
491
492
  void blake3_hash4_avx512(const uint8_t *const *inputs, size_t blocks,
492
493
  const uint32_t key[8], uint64_t counter,
493
494
  bool increment_counter, uint8_t flags,
@@ -724,7 +725,7 @@ INLINE void transpose_msg_vecs8(const uint8_t *const *inputs,
724
725
  out[14] = loadu_256(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
725
726
  out[15] = loadu_256(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
726
727
  for (size_t i = 0; i < 8; ++i) {
727
- _mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
728
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
728
729
  }
729
730
  transpose_vecs_256(&out[0]);
730
731
  transpose_vecs_256(&out[8]);
@@ -742,6 +743,7 @@ INLINE void load_counters8(uint64_t counter, bool increment_counter,
742
743
  *out_hi = _mm512_cvtepi64_epi32(_mm512_srli_epi64(counters, 32));
743
744
  }
744
745
 
746
+ static
745
747
  void blake3_hash8_avx512(const uint8_t *const *inputs, size_t blocks,
746
748
  const uint32_t key[8], uint64_t counter,
747
749
  bool increment_counter, uint8_t flags,
@@ -1037,7 +1039,7 @@ INLINE void transpose_msg_vecs16(const uint8_t *const *inputs,
1037
1039
  out[14] = loadu_512(&inputs[14][block_offset]);
1038
1040
  out[15] = loadu_512(&inputs[15][block_offset]);
1039
1041
  for (size_t i = 0; i < 16; ++i) {
1040
- _mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
1042
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
1041
1043
  }
1042
1044
  transpose_vecs_512(out);
1043
1045
  }
@@ -1045,15 +1047,29 @@ INLINE void transpose_msg_vecs16(const uint8_t *const *inputs,
1045
1047
  INLINE void load_counters16(uint64_t counter, bool increment_counter,
1046
1048
  __m512i *out_lo, __m512i *out_hi) {
1047
1049
  const __m512i mask = _mm512_set1_epi32(-(int32_t)increment_counter);
1048
- const __m512i add0 = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
1049
- const __m512i add1 = _mm512_and_si512(mask, add0);
1050
- __m512i l = _mm512_add_epi32(_mm512_set1_epi32(counter), add1);
1051
- __mmask16 carry = _mm512_cmp_epu32_mask(l, add1, _MM_CMPINT_LT);
1052
- __m512i h = _mm512_mask_add_epi32(_mm512_set1_epi32(counter >> 32), carry, _mm512_set1_epi32(counter >> 32), _mm512_set1_epi32(1));
1053
- *out_lo = l;
1054
- *out_hi = h;
1050
+ const __m512i deltas = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
1051
+ const __m512i masked_deltas = _mm512_and_si512(deltas, mask);
1052
+ const __m512i low_words = _mm512_add_epi32(
1053
+ _mm512_set1_epi32((int32_t)counter),
1054
+ masked_deltas);
1055
+ // The carry bit is 1 if the high bit of the word was 1 before addition and is
1056
+ // 0 after.
1057
+ // NOTE: It would be a bit more natural to use _mm512_cmp_epu32_mask to
1058
+ // compute the carry bits here, and originally we did, but that intrinsic is
1059
+ // broken under GCC 5.4. See https://github.com/BLAKE3-team/BLAKE3/issues/271.
1060
+ const __m512i carries = _mm512_srli_epi32(
1061
+ _mm512_andnot_si512(
1062
+ low_words, // 0 after (gets inverted by andnot)
1063
+ _mm512_set1_epi32((int32_t)counter)), // and 1 before
1064
+ 31);
1065
+ const __m512i high_words = _mm512_add_epi32(
1066
+ _mm512_set1_epi32((int32_t)(counter >> 32)),
1067
+ carries);
1068
+ *out_lo = low_words;
1069
+ *out_hi = high_words;
1055
1070
  }
1056
1071
 
1072
+ static
1057
1073
  void blake3_hash16_avx512(const uint8_t *const *inputs, size_t blocks,
1058
1074
  const uint32_t key[8], uint64_t counter,
1059
1075
  bool increment_counter, uint8_t flags,
@@ -10,14 +10,14 @@
10
10
  #elif defined(__GNUC__)
11
11
  #include <immintrin.h>
12
12
  #else
13
- #error "Unimplemented!"
13
+ #undef IS_X86 /* Unimplemented! */
14
14
  #endif
15
15
  #endif
16
16
 
17
17
  #define MAYBE_UNUSED(x) (void)((x))
18
18
 
19
19
  #if defined(IS_X86)
20
- static uint64_t xgetbv() {
20
+ static uint64_t xgetbv(void) {
21
21
  #if defined(_MSC_VER)
22
22
  return _xgetbv(0);
23
23
  #else
@@ -82,7 +82,7 @@ static /* Allow the variable to be controlled manually for testing */
82
82
  static
83
83
  #endif
84
84
  enum cpu_feature
85
- get_cpu_features() {
85
+ get_cpu_features(void) {
86
86
 
87
87
  if (g_cpu_features != UNDEFINED) {
88
88
  return g_cpu_features;
@@ -46,7 +46,6 @@ enum blake3_flags {
46
46
  #if defined(_MSC_VER)
47
47
  #include <intrin.h>
48
48
  #endif
49
- #include <immintrin.h>
50
49
  #endif
51
50
 
52
51
  #if !defined(BLAKE3_USE_NEON)
@@ -96,11 +95,11 @@ static unsigned int highest_one(uint64_t x) {
96
95
  #elif defined(_MSC_VER) && defined(IS_X86_32)
97
96
  if(x >> 32) {
98
97
  unsigned long index;
99
- _BitScanReverse(&index, x >> 32);
98
+ _BitScanReverse(&index, (unsigned long)(x >> 32));
100
99
  return 32 + index;
101
100
  } else {
102
101
  unsigned long index;
103
- _BitScanReverse(&index, x);
102
+ _BitScanReverse(&index, (unsigned long)x);
104
103
  return index;
105
104
  }
106
105
  #else
@@ -78,7 +78,7 @@ INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
78
78
  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
79
79
  }
80
80
 
81
- INLINE __m128i blend_epi16(__m128i a, __m128i b, const int imm8) {
81
+ INLINE __m128i blend_epi16(__m128i a, __m128i b, const int16_t imm8) {
82
82
  const __m128i bits = _mm_set_epi16(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
83
83
  __m128i mask = _mm_set1_epi16(imm8);
84
84
  mask = _mm_and_si128(mask, bits);
@@ -435,7 +435,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
435
435
  out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
436
436
  out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
437
437
  for (size_t i = 0; i < 4; ++i) {
438
- _mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
438
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
439
439
  }
440
440
  transpose_vecs(&out[0]);
441
441
  transpose_vecs(&out[4]);
@@ -448,14 +448,15 @@ INLINE void load_counters(uint64_t counter, bool increment_counter,
448
448
  const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
449
449
  const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
450
450
  const __m128i add1 = _mm_and_si128(mask, add0);
451
- __m128i l = _mm_add_epi32(_mm_set1_epi32(counter), add1);
451
+ __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
452
452
  __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)),
453
453
  _mm_xor_si128( l, _mm_set1_epi32(0x80000000)));
454
- __m128i h = _mm_sub_epi32(_mm_set1_epi32(counter >> 32), carry);
454
+ __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
455
455
  *out_lo = l;
456
456
  *out_hi = h;
457
457
  }
458
458
 
459
+ static
459
460
  void blake3_hash4_sse2(const uint8_t *const *inputs, size_t blocks,
460
461
  const uint32_t key[8], uint64_t counter,
461
462
  bool increment_counter, uint8_t flags,
@@ -429,7 +429,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
429
429
  out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
430
430
  out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
431
431
  for (size_t i = 0; i < 4; ++i) {
432
- _mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
432
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
433
433
  }
434
434
  transpose_vecs(&out[0]);
435
435
  transpose_vecs(&out[4]);
@@ -442,14 +442,15 @@ INLINE void load_counters(uint64_t counter, bool increment_counter,
442
442
  const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
443
443
  const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
444
444
  const __m128i add1 = _mm_and_si128(mask, add0);
445
- __m128i l = _mm_add_epi32(_mm_set1_epi32(counter), add1);
445
+ __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
446
446
  __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)),
447
447
  _mm_xor_si128( l, _mm_set1_epi32(0x80000000)));
448
- __m128i h = _mm_sub_epi32(_mm_set1_epi32(counter >> 32), carry);
448
+ __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
449
449
  *out_lo = l;
450
450
  *out_hi = h;
451
451
  }
452
452
 
453
+ static
453
454
  void blake3_hash4_sse41(const uint8_t *const *inputs, size_t blocks,
454
455
  const uint32_t key[8], uint64_t counter,
455
456
  bool increment_counter, uint8_t flags,
@@ -34,10 +34,12 @@ def check_supported_flags(flags, obj_if_enabled, def_if_disabled)
34
34
  end
35
35
  end
36
36
 
37
- check_supported_flags("-msse2", "blake3_sse2.o", "-DBLAKE3_NO_SSE2")
38
- check_supported_flags("-msse4.1", "blake3_sse41.o", "-DBLAKE3_NO_SSE41")
39
- check_supported_flags("-mavx2", "blake3_avx2.o", "-DBLAKE3_NO_AVX2")
40
- check_supported_flags("-mavx512f -mavx512vl -mavx512bw", "blake3_avx512.o", "-DBLAKE3_NO_AVX512")
37
+ unless RUBY_PLATFORM.include? 'arm64' or RUBY_PLATFORM.include? 'aarch64'
38
+ check_supported_flags("-msse2", "blake3_sse2.o", "-DBLAKE3_NO_SSE2")
39
+ check_supported_flags("-msse4.1", "blake3_sse41.o", "-DBLAKE3_NO_SSE41")
40
+ check_supported_flags("-mavx2", "blake3_avx2.o", "-DBLAKE3_NO_AVX2")
41
+ check_supported_flags("-mavx512f -mavx512vl -mavx512bw", "blake3_avx512.o", "-DBLAKE3_NO_AVX512")
42
+ end
41
43
 
42
44
  if have_header("arm_neon.h")
43
45
  $objs << "blake3_neon.o"
@@ -2,6 +2,6 @@ require 'digest'
2
2
 
3
3
  module Digest
4
4
  class BLAKE3 < Base
5
- VERSION = "1.2.0.0"
5
+ VERSION = "1.3.3.1"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digest-blake3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0.0
4
+ version: 1.3.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Will Bryant
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-07 00:00:00.000000000 Z
11
+ date: 2022-12-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -100,7 +100,7 @@ homepage: https://github.com/willbryant/digest-blake3
100
100
  licenses:
101
101
  - MIT
102
102
  metadata: {}
103
- post_install_message:
103
+ post_install_message:
104
104
  rdoc_options: []
105
105
  require_paths:
106
106
  - lib
@@ -116,8 +116,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0'
118
118
  requirements: []
119
- rubygems_version: 3.0.3
120
- signing_key:
119
+ rubygems_version: 3.1.6
120
+ signing_key:
121
121
  specification_version: 4
122
122
  summary: BLAKE3 for Ruby
123
123
  test_files: []