yencode 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/binding.gyp CHANGED
@@ -50,7 +50,7 @@
50
50
  }],
51
51
  ['OS!="win"', {
52
52
  "variables": {
53
- "missing_memalign%": "<!(<!(echo ${CC_target:-${CC:-cc}}) -c src/test_alignalloc.c -o /dev/null -Werror 2>/dev/null || echo failed)",
53
+ "missing_memalign%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -c src/test_alignalloc.cc -o /dev/null -Werror 2>/dev/null || echo failed)",
54
54
  },
55
55
  "conditions": [
56
56
  ['missing_memalign!=""', {
@@ -60,7 +60,7 @@
60
60
  }]
61
61
  ],
62
62
  "cflags": ["-Wno-unused-function"],
63
- "cxxflags": ["-Wno-unused-function", "-std=c++03", "-D_POSIX_C_SOURCE=200112L"],
63
+ "cxxflags": ["-Wno-unused-function", "-std=c++03"],
64
64
  "xcode_settings": {
65
65
  "OTHER_CFLAGS": ["-Wno-unused-function"],
66
66
  "OTHER_CXXFLAGS": ["-Wno-unused-function"]
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "yencode",
3
- "version": "1.2.1",
3
+ "version": "1.2.3",
4
4
  "description": "SIMD accelerated yEnc encoder/decoder and CRC32 calculator",
5
5
  "keywords": [
6
6
  "yenc",
package/src/crc.cc CHANGED
@@ -206,7 +206,7 @@ namespace RapidYenc {
206
206
  # elif defined(__has_include)
207
207
  # if __has_include(<sys/auxv.h>)
208
208
  # include <sys/auxv.h>
209
- # ifdef __FreeBSD__
209
+ # if defined(__FreeBSD__) || defined(__OpenBSD__)
210
210
  static unsigned long getauxval(unsigned long cap) {
211
211
  unsigned long ret;
212
212
  elf_aux_info(cap, &ret, sizeof(ret));
package/src/crc.h CHANGED
@@ -23,7 +23,7 @@ static inline int crc32_isa_level() {
23
23
 
24
24
  // computes `n % 0xffffffff` (well, almost), using some bit-hacks
25
25
  static inline uint32_t crc32_powmod(uint64_t n) {
26
- #ifdef __GNUC__
26
+ #if defined(__GNUC__) && (__GNUC__ >= 5 || (defined(__clang__) && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ > 3))))
27
27
  unsigned res;
28
28
  unsigned carry = __builtin_uadd_overflow(n >> 32, n, &res);
29
29
  res += carry;
@@ -207,7 +207,7 @@ static uint32_t crc32_shift_pmull(uint32_t crc1, uint32_t n) {
207
207
  void RapidYenc::crc_pmull_set_funcs() {
208
208
  _crc32_multiply = &crc32_multiply_pmull;
209
209
  _crc32_shift = &crc32_shift_pmull;
210
- _crc32_isa &= ISA_FEATURE_PMULL;
210
+ _crc32_isa |= ISA_FEATURE_PMULL;
211
211
  }
212
212
 
213
213
  #else
@@ -209,7 +209,8 @@ HEDLEY_ALWAYS_INLINE void do_decode_rvv(const uint8_t* src, long& len, unsigned
209
209
  if(LIKELIHOOD(0.0001, RV(vcpop_m_b4)(RV(vmandn_mm_b4)(cmpEqShift1, cmp, vl2), vl2) != 0)) {
210
210
  // replicate fix_eqMask, but in vector form
211
211
  vbool4_t groupStart = RV(vmandn_mm_b4)(cmpEq, cmpEqShift1, vl2);
212
- vbool4_t evenBits = RV_MASK_CAST(4, 8, RV(vmv_v_x_u8m1)(0x55, vl2));
212
+ vuint8m1_t evenBitsV = RV(vmv_v_x_u8m1)(0x55, vl2);
213
+ vbool4_t evenBits = RV_MASK_CAST(4, 8, evenBitsV);
213
214
  vbool4_t evenStart = RV(vmand_mm_b4)(groupStart, evenBits, vl2);
214
215
 
215
216
  // compute `cmpEq + evenStart` to obtain oddGroups
@@ -217,12 +217,12 @@ HEDLEY_ALWAYS_INLINE void do_encode_avx2(int line_size, int* colOffset, const ui
217
217
  // duplicate halves
218
218
  data1A = _mm256_inserti128_si256(dataA, _mm256_castsi256_si128(dataA), 1);
219
219
  data1B = _mm256_inserti128_si256(dataB, _mm256_castsi256_si128(dataB), 1);
220
- #if defined(__tune_znver2__) || defined(__tune_znver3__) || defined(__tune_znver4__)
221
- data2A = _mm256_permute2x128_si256(dataA, dataA, 0x11);
222
- data2B = _mm256_permute2x128_si256(dataB, dataB, 0x11);
223
- #else
220
+ #if defined(__tune_znver1__) || defined(__tune_bdver4__)
224
221
  data2A = _mm256_permute4x64_epi64(dataA, 0xee);
225
222
  data2B = _mm256_permute4x64_epi64(dataB, 0xee);
223
+ #else
224
+ data2A = _mm256_permute2x128_si256(dataA, dataA, 0x11);
225
+ data2B = _mm256_permute2x128_si256(dataB, dataB, 0x11);
226
226
  #endif
227
227
 
228
228
  shuf1A = _mm256_load_si256(lookupsAVX2->shufExpand + m1);
@@ -351,7 +351,7 @@ HEDLEY_ALWAYS_INLINE void do_encode_sse(int line_size, int* colOffset, const uin
351
351
  #if defined(__POPCNT__) && !defined(__tune_btver1__)
352
352
  if(use_isa & ISA_FEATURE_POPCNT) {
353
353
  shuf2Len = popcnt32(maskA) + 16;
354
- # if defined(__tune_znver4__) || defined(__tune_znver3__) || defined(__tune_znver2__) || defined(__tune_znver1__) || defined(__tune_btver2__)
354
+ # if defined(__tune_znver6__) || defined(__tune_znver5__) || defined(__tune_znver4__) || defined(__tune_znver3__) || defined(__tune_znver2__) || defined(__tune_znver1__) || defined(__tune_btver2__)
355
355
  shuf1Len = popcnt32(m1) + 8;
356
356
  shuf3Len = popcnt32(m3) + shuf2Len + 8;
357
357
  # else
package/src/platform.cc CHANGED
@@ -19,7 +19,7 @@
19
19
  # endif
20
20
  bool RapidYenc::cpu_supports_neon() {
21
21
  # if defined(AT_HWCAP)
22
- # ifdef __FreeBSD__
22
+ # if defined(__FreeBSD__) || defined(__OpenBSD__)
23
23
  unsigned long supported;
24
24
  elf_aux_info(AT_HWCAP, &supported, sizeof(supported));
25
25
  # ifdef __aarch64__
@@ -139,11 +139,7 @@ int RapidYenc::cpu_supports_isa() {
139
139
  if(cpuInfo2[3] & 0x80000) {
140
140
  _cpuidX(cpuInfo2, 0x24, 0);
141
141
  if((cpuInfo2[1] & 0xff) >= 1 && ( // minimum AVX10.1
142
- #ifdef YENC_DISABLE_AVX256
143
- cpuInfo2[1] & 0x10000 // AVX10/128
144
- #else
145
- cpuInfo2[1] & 0x20000 // AVX10/256
146
- #endif
142
+ cpuInfo2[1] & 0x20000 // AVX10/256 (AVX10/128 is now invalid)
147
143
  )) {
148
144
  if(cpuInfo2[1] & 0x40000) ret |= ISA_FEATURE_EVEX512;
149
145
  return ret | ISA_LEVEL_VBMI2;
@@ -204,7 +200,7 @@ int RapidYenc::cpu_supports_crc_isa() {
204
200
  bool RapidYenc::cpu_supports_rvv() {
205
201
  # if defined(AT_HWCAP)
206
202
  unsigned long ret;
207
- # ifdef __FreeBSD__
203
+ # if defined(__FreeBSD__) || defined(__OpenBSD__)
208
204
  elf_aux_info(AT_HWCAP, &ret, sizeof(ret));
209
205
  # else
210
206
  ret = getauxval(AT_HWCAP);
File without changes