yencode 1.2.1 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -2
- package/package.json +1 -1
- package/src/crc.cc +1 -1
- package/src/crc.h +1 -1
- package/src/crc_arm_pmull.cc +1 -1
- package/src/decoder_rvv.cc +2 -1
- package/src/encoder_avx_base.h +4 -4
- package/src/encoder_sse_base.h +1 -1
- package/src/platform.cc +3 -7
- /package/src/{test_alignalloc.c → test_alignalloc.cc} +0 -0
package/binding.gyp
CHANGED
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
}],
|
|
51
51
|
['OS!="win"', {
|
|
52
52
|
"variables": {
|
|
53
|
-
"missing_memalign%": "<!(<!(echo ${
|
|
53
|
+
"missing_memalign%": "<!(<!(echo ${CXX_target:-${CXX:-c++}}) -c src/test_alignalloc.cc -o /dev/null -Werror 2>/dev/null || echo failed)",
|
|
54
54
|
},
|
|
55
55
|
"conditions": [
|
|
56
56
|
['missing_memalign!=""', {
|
|
@@ -60,7 +60,7 @@
|
|
|
60
60
|
}]
|
|
61
61
|
],
|
|
62
62
|
"cflags": ["-Wno-unused-function"],
|
|
63
|
-
"cxxflags": ["-Wno-unused-function", "-std=c++03"
|
|
63
|
+
"cxxflags": ["-Wno-unused-function", "-std=c++03"],
|
|
64
64
|
"xcode_settings": {
|
|
65
65
|
"OTHER_CFLAGS": ["-Wno-unused-function"],
|
|
66
66
|
"OTHER_CXXFLAGS": ["-Wno-unused-function"]
|
package/package.json
CHANGED
package/src/crc.cc
CHANGED
|
@@ -206,7 +206,7 @@ namespace RapidYenc {
|
|
|
206
206
|
# elif defined(__has_include)
|
|
207
207
|
# if __has_include(<sys/auxv.h>)
|
|
208
208
|
# include <sys/auxv.h>
|
|
209
|
-
#
|
|
209
|
+
# if defined(__FreeBSD__) || defined(__OpenBSD__)
|
|
210
210
|
static unsigned long getauxval(unsigned long cap) {
|
|
211
211
|
unsigned long ret;
|
|
212
212
|
elf_aux_info(cap, &ret, sizeof(ret));
|
package/src/crc.h
CHANGED
|
@@ -23,7 +23,7 @@ static inline int crc32_isa_level() {
|
|
|
23
23
|
|
|
24
24
|
// computes `n % 0xffffffff` (well, almost), using some bit-hacks
|
|
25
25
|
static inline uint32_t crc32_powmod(uint64_t n) {
|
|
26
|
-
#
|
|
26
|
+
#if defined(__GNUC__) && (__GNUC__ >= 5 || (defined(__clang__) && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ > 3))))
|
|
27
27
|
unsigned res;
|
|
28
28
|
unsigned carry = __builtin_uadd_overflow(n >> 32, n, &res);
|
|
29
29
|
res += carry;
|
package/src/crc_arm_pmull.cc
CHANGED
|
@@ -207,7 +207,7 @@ static uint32_t crc32_shift_pmull(uint32_t crc1, uint32_t n) {
|
|
|
207
207
|
void RapidYenc::crc_pmull_set_funcs() {
|
|
208
208
|
_crc32_multiply = &crc32_multiply_pmull;
|
|
209
209
|
_crc32_shift = &crc32_shift_pmull;
|
|
210
|
-
_crc32_isa
|
|
210
|
+
_crc32_isa |= ISA_FEATURE_PMULL;
|
|
211
211
|
}
|
|
212
212
|
|
|
213
213
|
#else
|
package/src/decoder_rvv.cc
CHANGED
|
@@ -209,7 +209,8 @@ HEDLEY_ALWAYS_INLINE void do_decode_rvv(const uint8_t* src, long& len, unsigned
|
|
|
209
209
|
if(LIKELIHOOD(0.0001, RV(vcpop_m_b4)(RV(vmandn_mm_b4)(cmpEqShift1, cmp, vl2), vl2) != 0)) {
|
|
210
210
|
// replicate fix_eqMask, but in vector form
|
|
211
211
|
vbool4_t groupStart = RV(vmandn_mm_b4)(cmpEq, cmpEqShift1, vl2);
|
|
212
|
-
|
|
212
|
+
vuint8m1_t evenBitsV = RV(vmv_v_x_u8m1)(0x55, vl2);
|
|
213
|
+
vbool4_t evenBits = RV_MASK_CAST(4, 8, evenBitsV);
|
|
213
214
|
vbool4_t evenStart = RV(vmand_mm_b4)(groupStart, evenBits, vl2);
|
|
214
215
|
|
|
215
216
|
// compute `cmpEq + evenStart` to obtain oddGroups
|
package/src/encoder_avx_base.h
CHANGED
|
@@ -217,12 +217,12 @@ HEDLEY_ALWAYS_INLINE void do_encode_avx2(int line_size, int* colOffset, const ui
|
|
|
217
217
|
// duplicate halves
|
|
218
218
|
data1A = _mm256_inserti128_si256(dataA, _mm256_castsi256_si128(dataA), 1);
|
|
219
219
|
data1B = _mm256_inserti128_si256(dataB, _mm256_castsi256_si128(dataB), 1);
|
|
220
|
-
#if defined(
|
|
221
|
-
data2A = _mm256_permute2x128_si256(dataA, dataA, 0x11);
|
|
222
|
-
data2B = _mm256_permute2x128_si256(dataB, dataB, 0x11);
|
|
223
|
-
#else
|
|
220
|
+
#if defined(__tune_znver1__) || defined(__tune_bdver4__)
|
|
224
221
|
data2A = _mm256_permute4x64_epi64(dataA, 0xee);
|
|
225
222
|
data2B = _mm256_permute4x64_epi64(dataB, 0xee);
|
|
223
|
+
#else
|
|
224
|
+
data2A = _mm256_permute2x128_si256(dataA, dataA, 0x11);
|
|
225
|
+
data2B = _mm256_permute2x128_si256(dataB, dataB, 0x11);
|
|
226
226
|
#endif
|
|
227
227
|
|
|
228
228
|
shuf1A = _mm256_load_si256(lookupsAVX2->shufExpand + m1);
|
package/src/encoder_sse_base.h
CHANGED
|
@@ -351,7 +351,7 @@ HEDLEY_ALWAYS_INLINE void do_encode_sse(int line_size, int* colOffset, const uin
|
|
|
351
351
|
#if defined(__POPCNT__) && !defined(__tune_btver1__)
|
|
352
352
|
if(use_isa & ISA_FEATURE_POPCNT) {
|
|
353
353
|
shuf2Len = popcnt32(maskA) + 16;
|
|
354
|
-
# if defined(__tune_znver4__) || defined(__tune_znver3__) || defined(__tune_znver2__) || defined(__tune_znver1__) || defined(__tune_btver2__)
|
|
354
|
+
# if defined(__tune_znver6__) || defined(__tune_znver5__) || defined(__tune_znver4__) || defined(__tune_znver3__) || defined(__tune_znver2__) || defined(__tune_znver1__) || defined(__tune_btver2__)
|
|
355
355
|
shuf1Len = popcnt32(m1) + 8;
|
|
356
356
|
shuf3Len = popcnt32(m3) + shuf2Len + 8;
|
|
357
357
|
# else
|
package/src/platform.cc
CHANGED
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
# endif
|
|
20
20
|
bool RapidYenc::cpu_supports_neon() {
|
|
21
21
|
# if defined(AT_HWCAP)
|
|
22
|
-
#
|
|
22
|
+
# if defined(__FreeBSD__) || defined(__OpenBSD__)
|
|
23
23
|
unsigned long supported;
|
|
24
24
|
elf_aux_info(AT_HWCAP, &supported, sizeof(supported));
|
|
25
25
|
# ifdef __aarch64__
|
|
@@ -139,11 +139,7 @@ int RapidYenc::cpu_supports_isa() {
|
|
|
139
139
|
if(cpuInfo2[3] & 0x80000) {
|
|
140
140
|
_cpuidX(cpuInfo2, 0x24, 0);
|
|
141
141
|
if((cpuInfo2[1] & 0xff) >= 1 && ( // minimum AVX10.1
|
|
142
|
-
|
|
143
|
-
cpuInfo2[1] & 0x10000 // AVX10/128
|
|
144
|
-
#else
|
|
145
|
-
cpuInfo2[1] & 0x20000 // AVX10/256
|
|
146
|
-
#endif
|
|
142
|
+
cpuInfo2[1] & 0x20000 // AVX10/256 (AVX10/128 is now invalid)
|
|
147
143
|
)) {
|
|
148
144
|
if(cpuInfo2[1] & 0x40000) ret |= ISA_FEATURE_EVEX512;
|
|
149
145
|
return ret | ISA_LEVEL_VBMI2;
|
|
@@ -204,7 +200,7 @@ int RapidYenc::cpu_supports_crc_isa() {
|
|
|
204
200
|
bool RapidYenc::cpu_supports_rvv() {
|
|
205
201
|
# if defined(AT_HWCAP)
|
|
206
202
|
unsigned long ret;
|
|
207
|
-
#
|
|
203
|
+
# if defined(__FreeBSD__) || defined(__OpenBSD__)
|
|
208
204
|
elf_aux_info(AT_HWCAP, &ret, sizeof(ret));
|
|
209
205
|
# else
|
|
210
206
|
ret = getauxval(AT_HWCAP);
|
|
File without changes
|