oj 3.16.15 → 3.16.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/oj/dump.c +1 -1
- data/ext/oj/parse.c +28 -1
- data/ext/oj/parser.c +7 -0
- data/ext/oj/simd.h +22 -0
- data/lib/oj/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 07c7a6ebc6baf4f02b93760ed3d7440cdc9b2af85d0976e525f34978ab823c45
|
|
4
|
+
data.tar.gz: ed5a5a2e16f8df836a098de0da60657688049cfcf820d3142b4122302ae4e946
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d8f5c73fbc30a2985c2352e466f8ef9762f831d15b9c0a8b9394d66a5613910fd5aca4c4bdc49c40230128d791244ad67f6f9852c4778e89828a4358bb997c11
|
|
7
|
+
data.tar.gz: dbc5fe50d1ad89cb23918c950ac849052a95defb7e5509d7653236eb3569c811685e2c4c6546a8ab38c5a346aad0ccdd9770c855bc53f96c0c5312c7d61fab3e
|
data/CHANGELOG.md
CHANGED
data/ext/oj/dump.c
CHANGED
|
@@ -206,7 +206,7 @@ void initialize_neon(void) {
|
|
|
206
206
|
static __m128i hibit_friendly_chars_sse42[8];
|
|
207
207
|
|
|
208
208
|
// From: https://stackoverflow.com/questions/36998538/fastest-way-to-horizontally-sum-sse-unsigned-byte-vector
|
|
209
|
-
inline uint32_t _mm_sum_epu8(const __m128i v) {
|
|
209
|
+
inline static OJ_TARGET_SSE42 uint32_t _mm_sum_epu8(const __m128i v) {
|
|
210
210
|
__m128i vsum = _mm_sad_epu8(v, _mm_setzero_si128());
|
|
211
211
|
return _mm_cvtsi128_si32(vsum) + _mm_extract_epi16(vsum, 4);
|
|
212
212
|
}
|
data/ext/oj/parse.c
CHANGED
|
@@ -199,6 +199,31 @@ static inline const char *scan_string_noSIMD(const char *str, const char *end) {
|
|
|
199
199
|
return str;
|
|
200
200
|
}
|
|
201
201
|
|
|
202
|
+
#ifdef HAVE_SIMD_NEON
|
|
203
|
+
|
|
204
|
+
static inline const char *string_scan_neon(const char *str, const char *end) {
|
|
205
|
+
const uint8x16_t null_char = vdupq_n_u8(0);
|
|
206
|
+
const uint8x16_t backslash = vdupq_n_u8('\\');
|
|
207
|
+
const uint8x16_t quote = vdupq_n_u8('"');
|
|
208
|
+
|
|
209
|
+
while (str + sizeof(uint8x16_t) <= end) {
|
|
210
|
+
uint8x16_t chunk = vld1q_u8((const uint8_t *)str);
|
|
211
|
+
uint8x16_t tmp = vorrq_u8(vorrq_u8(vceqq_u8(chunk, null_char), vceqq_u8(chunk, backslash)),
|
|
212
|
+
vceqq_u8(chunk, quote));
|
|
213
|
+
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(tmp), 4);
|
|
214
|
+
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
|
215
|
+
if (mask != 0) {
|
|
216
|
+
mask &= 0x8888888888888888ull;
|
|
217
|
+
return str + (OJ_CTZ64(mask) >> 2);
|
|
218
|
+
}
|
|
219
|
+
str += sizeof(uint8x16_t);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return scan_string_noSIMD(str, end);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
#endif
|
|
226
|
+
|
|
202
227
|
#ifdef HAVE_SIMD_SSE4_2
|
|
203
228
|
// Optimized SIMD string scanner using SSE4.2 instructions
|
|
204
229
|
// Uses prefetching and processes multiple chunks in parallel to reduce latency
|
|
@@ -357,10 +382,12 @@ void oj_scanner_init(void) {
|
|
|
357
382
|
#endif
|
|
358
383
|
#ifdef HAVE_SIMD_SSE2
|
|
359
384
|
case SIMD_SSE2: scan_func = scan_string_SSE2; break;
|
|
385
|
+
#endif
|
|
386
|
+
#ifdef HAVE_SIMD_NEON
|
|
387
|
+
case SIMD_NEON: scan_func = string_scan_neon; break;
|
|
360
388
|
#endif
|
|
361
389
|
default: scan_func = scan_string_noSIMD; break;
|
|
362
390
|
}
|
|
363
|
-
// Note: ARM NEON string scanning would be added here if needed
|
|
364
391
|
}
|
|
365
392
|
|
|
366
393
|
// entered at /
|
data/ext/oj/parser.c
CHANGED
|
@@ -1371,6 +1371,13 @@ static VALUE parser_parse(VALUE self, VALUE json) {
|
|
|
1371
1371
|
p->start(p);
|
|
1372
1372
|
parse(p, ptr);
|
|
1373
1373
|
|
|
1374
|
+
if (0 < p->depth) {
|
|
1375
|
+
if (OBJECT_FUN == p->stack[p->depth]) {
|
|
1376
|
+
parse_error(p, "Object is not closed");
|
|
1377
|
+
} else {
|
|
1378
|
+
parse_error(p, "Array is not closed");
|
|
1379
|
+
}
|
|
1380
|
+
}
|
|
1374
1381
|
return p->result(p);
|
|
1375
1382
|
}
|
|
1376
1383
|
|
data/ext/oj/simd.h
CHANGED
|
@@ -40,14 +40,26 @@ SIMD_Implementation oj_get_simd_implementation(void);
|
|
|
40
40
|
// Count trailing zeros (for SSE2 mask scanning)
|
|
41
41
|
#if defined(__GNUC__) || defined(__clang__)
|
|
42
42
|
#define OJ_CTZ(x) __builtin_ctz(x)
|
|
43
|
+
#define OJ_CTZ64(x) __builtin_ctzll(x)
|
|
43
44
|
#elif defined(_MSC_VER)
|
|
44
45
|
#include <intrin.h>
|
|
45
46
|
static __inline int oj_ctz_msvc(unsigned int x) {
|
|
46
47
|
unsigned long index;
|
|
48
|
+
if (0 == x) {
|
|
49
|
+
return 32;
|
|
50
|
+
}
|
|
47
51
|
_BitScanForward(&index, x);
|
|
48
52
|
return (int)index;
|
|
49
53
|
}
|
|
54
|
+
static __inline int oj_ctz64_msvc(uint64_t x) {
|
|
55
|
+
unsigned long index;
|
|
56
|
+
if (_BitScanForward64(&index, x)) {
|
|
57
|
+
return (int)index;
|
|
58
|
+
}
|
|
59
|
+
return 64;
|
|
60
|
+
}
|
|
50
61
|
#define OJ_CTZ(x) oj_ctz_msvc(x)
|
|
62
|
+
#define OJ_CTZ64(x) oj_ctz64_msvc(x)
|
|
51
63
|
#else
|
|
52
64
|
// Fallback: naive implementation
|
|
53
65
|
static inline int oj_ctz_fallback(unsigned int x) {
|
|
@@ -58,7 +70,17 @@ static inline int oj_ctz_fallback(unsigned int x) {
|
|
|
58
70
|
}
|
|
59
71
|
return count;
|
|
60
72
|
}
|
|
73
|
+
|
|
74
|
+
static inline int oj_ctz64_fallback(uint64_t x) {
|
|
75
|
+
int count = 0;
|
|
76
|
+
while ((x & 1) == 0 && count < 64) {
|
|
77
|
+
x >>= 1;
|
|
78
|
+
count++;
|
|
79
|
+
}
|
|
80
|
+
return count;
|
|
81
|
+
}
|
|
61
82
|
#define OJ_CTZ(x) oj_ctz_fallback(x)
|
|
83
|
+
#define OJ_CTZ64(x) oj_ctz64_fallback(x)
|
|
62
84
|
#endif
|
|
63
85
|
|
|
64
86
|
// =============================================================================
|
data/lib/oj/version.rb
CHANGED