json-as 1.2.2 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/assembly/deserialize/simd/string.ts +174 -78
- package/assembly/deserialize/simple/arbitrary.ts +1 -2
- package/assembly/deserialize/simple/object.ts +1 -1
- package/assembly/deserialize/simple/string.ts +31 -40
- package/assembly/deserialize/swar/string.ts +146 -109
- package/assembly/globals/tables.ts +1 -1
- package/assembly/index.ts +112 -163
- package/assembly/serialize/simd/string.ts +136 -39
- package/assembly/serialize/simple/arbitrary.ts +18 -10
- package/assembly/serialize/simple/string.ts +54 -4
- package/assembly/serialize/swar/string.ts +112 -69
- package/assembly/test.mask.ts +87 -0
- package/assembly/test.ts +38 -34
- package/assembly/util/masks.ts +47 -0
- package/assembly/util/swar.ts +14 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -413,6 +413,8 @@ The following charts compare JSON-AS (both SWAR and SIMD variants) against JavaS
|
|
|
413
413
|
|
|
414
414
|
<img src="https://raw.githubusercontent.com/JairusSW/json-as/refs/heads/docs/charts/chart03.png" alt="Performance Chart 3">
|
|
415
415
|
|
|
416
|
+
<img src="https://raw.githubusercontent.com/JairusSW/json-as/refs/heads/docs/charts/chart04.png" alt="Performance Chart 3">
|
|
417
|
+
|
|
416
418
|
> Note: I have focused on extensively optimizing serialization. I used to have deserialization be highly unsafe and extremely fast, but I've since doubled down on safety for deserialization which has negatively affected performance. I will be optimizing soon.
|
|
417
419
|
|
|
418
420
|
### Performance Tuning
|
|
@@ -1,6 +1,57 @@
|
|
|
1
1
|
import { bs } from "../../../lib/as-bs";
|
|
2
2
|
import { BACK_SLASH } from "../../custom/chars";
|
|
3
3
|
import { DESERIALIZE_ESCAPE_TABLE, ESCAPE_HEX_TABLE } from "../../globals/tables";
|
|
4
|
+
import { hex4_to_u16_swar } from "../../util/swar";
|
|
5
|
+
|
|
6
|
+
// @ts-ignore: decorator allowed
|
|
7
|
+
@lazy const SPLAT_5C = i16x8.splat(0x5C); // \
|
|
8
|
+
|
|
9
|
+
// Overflow Pattern for Unicode Escapes (READ)
|
|
10
|
+
// \u0001 0 \u0001__| + 0
|
|
11
|
+
// -\u0001 2 -\u0001_| + 0
|
|
12
|
+
// --\u0001 4 --\u0001| + 0
|
|
13
|
+
// ---\u0001 6 ---\u000|1 + 2
|
|
14
|
+
// ----\u0001 8 ----\u00|01 + 4
|
|
15
|
+
// -----\u0001 10 -----\u0|001 + 6
|
|
16
|
+
// ------\u0001 12 ------\u|0001 + 8
|
|
17
|
+
// -------\u0001 14 -------\|u0001 + 10
|
|
18
|
+
// Formula: overflow = max(0, lane - 4)
|
|
19
|
+
|
|
20
|
+
// Overflow Pattern for Unicode Escapes (WRITE)
|
|
21
|
+
// * = escape, _ = empty
|
|
22
|
+
// \u0001 0 *_______| - 14
|
|
23
|
+
// -\u0001 2 -*______| - 12
|
|
24
|
+
// --\u0001 4 --*_____| - 10
|
|
25
|
+
// ---\u0001 6 ---*____| - 8
|
|
26
|
+
// ----\u0001 8 ----*___| - 6
|
|
27
|
+
// -----\u0001 10 -----*__| - 4
|
|
28
|
+
// ------\u0001 12 ------*_| - 2
|
|
29
|
+
// -------\u0001 14 -------*| + 0
|
|
30
|
+
// Formula: overflow = lane - 14
|
|
31
|
+
|
|
32
|
+
// Overflow pattern for Short Escapes (READ)
|
|
33
|
+
// \n------ 0 \n------| - 12
|
|
34
|
+
// -\n----- 2 -\n-----| - 10
|
|
35
|
+
// --\n---- 4 --\n----| - 8
|
|
36
|
+
// ---\n--- 6 ---\n---| - 6
|
|
37
|
+
// ----\n-- 8 ----\n--| - 4
|
|
38
|
+
// -----\n- 10 -----\n-| - 2
|
|
39
|
+
// ------\n 12 ------\n| + 0
|
|
40
|
+
// -------\n 14 -------\|n + 2
|
|
41
|
+
// Formula: overflow = lane - 12
|
|
42
|
+
|
|
43
|
+
// Overflow pattern for Short Escapes (WRITE)
|
|
44
|
+
// * = escape, _ = empty
|
|
45
|
+
// \n------ 0 *_______| - 14
|
|
46
|
+
// -\n----- 2 -*______| - 12
|
|
47
|
+
// --\n---- 4 --*_____| - 10
|
|
48
|
+
// ---\n--- 6 ---*____| - 8
|
|
49
|
+
// ----\n-- 8 ----*___| - 6
|
|
50
|
+
// -----\n- 10 -----*__| - 4
|
|
51
|
+
// ------\n 12 ------*_| - 2
|
|
52
|
+
// -------\n 14 -------*| + 0
|
|
53
|
+
// Formula: overflow = lane - 14
|
|
54
|
+
|
|
4
55
|
|
|
5
56
|
/**
|
|
6
57
|
* Deserializes strings back into into their original form using SIMD operations
|
|
@@ -9,92 +60,137 @@ import { DESERIALIZE_ESCAPE_TABLE, ESCAPE_HEX_TABLE } from "../../globals/tables
|
|
|
9
60
|
* @returns number of bytes written
|
|
10
61
|
*/
|
|
11
62
|
// todo: optimize and stuff. it works, its not pretty. ideally, i'd like this to be (nearly) branchless
|
|
12
|
-
export function deserializeString_SIMD(srcStart: usize, srcEnd: usize):
|
|
13
|
-
|
|
63
|
+
export function deserializeString_SIMD(srcStart: usize, srcEnd: usize): string {
|
|
64
|
+
// Strip quotes
|
|
14
65
|
srcStart += 2;
|
|
15
66
|
srcEnd -= 2;
|
|
67
|
+
const srcEnd16 = srcEnd - 16;
|
|
16
68
|
bs.ensureSize(u32(srcEnd - srcStart));
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
// console.log(" l: " + (lane_index).toString())
|
|
47
|
-
// store<u16>(dst_offset, escaped);
|
|
48
|
-
// v128.store(dst_offset, v128.load(src_offset, 4), 2);
|
|
49
|
-
// if (lane_index >= 6) {
|
|
50
|
-
// const bytes_left = lane_index - 4;
|
|
51
|
-
// srcStart += bytes_left;
|
|
52
|
-
// bs.offset += bytes_left;
|
|
53
|
-
// // console.log(" e: " + (bytes_left).toString())
|
|
54
|
-
// }
|
|
55
|
-
// bs.offset -= 10;
|
|
56
|
-
// } else {
|
|
57
|
-
// const escaped = load<u8>(DESERIALIZE_ESCAPE_TABLE + code);
|
|
58
|
-
// store<u16>(dst_offset, escaped);
|
|
59
|
-
// v128.store(dst_offset, v128.load(src_offset, 4), 2);
|
|
60
|
-
// // console.log("Escaped:");
|
|
61
|
-
// if (lane_index == 14) {
|
|
62
|
-
// srcStart += 2;
|
|
63
|
-
// } else {
|
|
64
|
-
// bs.offset -= 2;
|
|
65
|
-
// }
|
|
66
|
-
// }
|
|
67
|
-
// }
|
|
68
|
-
|
|
69
|
-
// srcStart += 16;
|
|
70
|
-
// bs.offset += 16;
|
|
71
|
-
|
|
72
|
-
// // console.log("src: " + (srcStart - changetype<usize>(src)).toString());
|
|
73
|
-
// // console.log("dst: " + (dst_ptr - dst).toString());
|
|
74
|
-
// }
|
|
75
|
-
while (srcStart < srcEnd) {
|
|
76
|
-
let code = load<u16>(srcStart);
|
|
77
|
-
if (code === BACK_SLASH) {
|
|
78
|
-
code = load<u16>(DESERIALIZE_ESCAPE_TABLE + load<u8>(srcStart, 2));
|
|
79
|
-
if (code === 117 && load<u32>(srcStart, 4) === 3145776) {
|
|
80
|
-
const block = load<u32>(srcStart, 8);
|
|
81
|
-
const codeA = block & 0xffff;
|
|
82
|
-
const codeB = (block >> 16) & 0xffff;
|
|
83
|
-
const escapedA = load<u8>(ESCAPE_HEX_TABLE + codeA);
|
|
84
|
-
const escapedB = load<u8>(ESCAPE_HEX_TABLE + codeB);
|
|
85
|
-
const escaped = (escapedA << 4) + escapedB;
|
|
69
|
+
|
|
70
|
+
while (srcStart < srcEnd16) {
|
|
71
|
+
const block = load<v128>(srcStart);
|
|
72
|
+
store<v128>(bs.offset, block);
|
|
73
|
+
|
|
74
|
+
const eq5C = i16x8.eq(load<v128>(srcStart), SPLAT_5C);
|
|
75
|
+
let mask = i16x8.bitmask(eq5C);
|
|
76
|
+
// Early exit
|
|
77
|
+
if (mask === 0) {
|
|
78
|
+
srcStart += 16;
|
|
79
|
+
bs.offset += 16;
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
let srcChg: usize = 0;
|
|
84
|
+
let lastLane: usize = 0;
|
|
85
|
+
do {
|
|
86
|
+
const laneIdx = usize(ctz(mask) << 1); // 0 2 4 6 8 10 12 14
|
|
87
|
+
mask &= mask - 1;
|
|
88
|
+
const srcIdx = srcStart + laneIdx;
|
|
89
|
+
const code = load<u16>(srcIdx, 2);
|
|
90
|
+
|
|
91
|
+
bs.offset += laneIdx - lastLane;
|
|
92
|
+
|
|
93
|
+
// Hot path (negative bias)
|
|
94
|
+
if (code !== 0x75) {
|
|
95
|
+
// Short escapes (\n \t \" \\)
|
|
96
|
+
const escaped = load<u16>(DESERIALIZE_ESCAPE_TABLE + code);
|
|
97
|
+
mask &= mask - i32(escaped === 0x5C);
|
|
86
98
|
store<u16>(bs.offset, escaped);
|
|
99
|
+
store<v128>(bs.offset, load<v128>(srcIdx, 4), 2);
|
|
100
|
+
|
|
101
|
+
const l6 = usize(laneIdx === 14);
|
|
102
|
+
// bs.offset -= (1 - l6) << 1;
|
|
87
103
|
bs.offset += 2;
|
|
88
|
-
srcStart +=
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
bs.offset += 2;
|
|
92
|
-
srcStart += 4;
|
|
104
|
+
srcStart += l6 << 1;
|
|
105
|
+
lastLane = laneIdx + 4;
|
|
106
|
+
continue;
|
|
93
107
|
}
|
|
94
|
-
|
|
95
|
-
|
|
108
|
+
|
|
109
|
+
// Unicode escape (\uXXXX)
|
|
110
|
+
const block = load<u64>(srcIdx, 4); // XXXX
|
|
111
|
+
const escaped = hex4_to_u16_swar(block);
|
|
112
|
+
|
|
113
|
+
store<u16>(bs.offset, escaped);
|
|
114
|
+
store<u64>(bs.offset, load<u64>(srcIdx, 12), 2);
|
|
115
|
+
|
|
116
|
+
bs.offset += 2;
|
|
117
|
+
if (laneIdx >= 6) {
|
|
118
|
+
srcStart += laneIdx - 4;
|
|
119
|
+
}
|
|
120
|
+
lastLane = laneIdx + 12;
|
|
121
|
+
} while (mask !== 0);
|
|
122
|
+
|
|
123
|
+
if (lastLane < 16) {
|
|
124
|
+
bs.offset += 16 - lastLane;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
srcStart += 16 + srcChg;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
while (srcStart < srcEnd) {
|
|
131
|
+
const block = load<u16>(srcStart);
|
|
132
|
+
store<u16>(bs.offset, block);
|
|
133
|
+
srcStart += 2;
|
|
134
|
+
|
|
135
|
+
// Early exit
|
|
136
|
+
if (block !== 0x5C) {
|
|
96
137
|
bs.offset += 2;
|
|
138
|
+
continue;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const code = load<u16>(srcStart);
|
|
142
|
+
if (code !== 0x75) {
|
|
143
|
+
// Short escapes (\n \t \" \\)
|
|
144
|
+
const block = load<u16>(srcStart);
|
|
145
|
+
const escape = load<u16>(DESERIALIZE_ESCAPE_TABLE + block);
|
|
146
|
+
store<u16>(bs.offset, escape);
|
|
97
147
|
srcStart += 2;
|
|
148
|
+
} else {
|
|
149
|
+
// Unicode escape (\uXXXX)
|
|
150
|
+
const block = load<u64>(srcStart, 2); // XXXX
|
|
151
|
+
const escaped = hex4_to_u16_swar(block);
|
|
152
|
+
store<u16>(bs.offset, escaped);
|
|
153
|
+
srcStart += 10;
|
|
98
154
|
}
|
|
155
|
+
bs.offset += 2;
|
|
99
156
|
}
|
|
157
|
+
|
|
158
|
+
return bs.out<string>();
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Computes a per-lane mask identifying UTF-16 code units whose **low byte**
|
|
163
|
+
* is the ASCII backslash (`'\\'`, 0x5C).
|
|
164
|
+
*
|
|
165
|
+
* The mask is produced in two stages:
|
|
166
|
+
* 1. Detects bytes equal to 0x5C using a SWAR equality test.
|
|
167
|
+
* 2. Clears matches where 0x5C appears in the **high byte** of a UTF-16 code unit,
|
|
168
|
+
* ensuring only valid low-byte backslashes are reported.
|
|
169
|
+
*
|
|
170
|
+
* Each matching lane sets itself to 0x80.
|
|
171
|
+
*/
|
|
172
|
+
// @ts-ignore: decorator
|
|
173
|
+
@inline function backslash_mask(block: u64): u64 {
|
|
174
|
+
const b = block ^ 0x005C_005C_005C_005C;
|
|
175
|
+
const backslash_mask = (b - 0x0001_0001_0001_0001) & ~b & 0x0080_0080_0080_0080;
|
|
176
|
+
const high_byte_mask =
|
|
177
|
+
~(((block - 0x0100_0100_0100_0100) & ~block & 0x8000_8000_8000_8000)
|
|
178
|
+
^ 0x8000_8000_8000_8000) >> 8;
|
|
179
|
+
return backslash_mask & high_byte_mask;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Computes a per-lane mask identifying UTF-16 code units whose **low byte**
|
|
184
|
+
* is the ASCII backslash (`'\\'`, 0x5C).
|
|
185
|
+
*
|
|
186
|
+
* Each matching lane sets itself to 0x80.
|
|
187
|
+
*
|
|
188
|
+
* WARNING: The low byte of a code unit *may* be a backslash, thus triggering false positives!
|
|
189
|
+
* This is useful for a hot path where it is possible to detect the false positive scalarly.
|
|
190
|
+
*/
|
|
191
|
+
// @ts-ignore: decorator
|
|
192
|
+
@inline function backslash_mask_unsafe(block: u64): u64 {
|
|
193
|
+
const b = block ^ 0x005C_005C_005C_005C;
|
|
194
|
+
const backslash_mask = (b - 0x0001_0001_0001_0001) & ~b & 0x0080_0080_0080_0080;
|
|
195
|
+
return backslash_mask;
|
|
100
196
|
}
|
|
@@ -9,7 +9,7 @@ import { BRACE_LEFT, BRACKET_LEFT, CHAR_N, QUOTE } from "../../custom/chars";
|
|
|
9
9
|
export function deserializeArbitrary(srcStart: usize, srcEnd: usize, dst: usize): JSON.Value {
|
|
10
10
|
const firstChar = load<u16>(srcStart);
|
|
11
11
|
if (firstChar == QUOTE) {
|
|
12
|
-
return JSON.Value.from(deserializeString(srcStart, srcEnd
|
|
12
|
+
return JSON.Value.from(deserializeString(srcStart, srcEnd));
|
|
13
13
|
} else if (firstChar == BRACE_LEFT) return JSON.Value.from(deserializeObject(srcStart, srcEnd, 0));
|
|
14
14
|
else if (firstChar - 48 <= 9 || firstChar == 45) return JSON.Value.from(deserializeFloat<f64>(srcStart, srcEnd));
|
|
15
15
|
else if (firstChar == BRACKET_LEFT) {
|
|
@@ -17,7 +17,6 @@ export function deserializeArbitrary(srcStart: usize, srcEnd: usize, dst: usize)
|
|
|
17
17
|
} else if (firstChar == 116 || firstChar == 102) return JSON.Value.from(deserializeBoolean(srcStart, srcEnd));
|
|
18
18
|
else if (firstChar == CHAR_N) {
|
|
19
19
|
const value = JSON.Value.from<usize>(0);
|
|
20
|
-
value.isNull = true;
|
|
21
20
|
return value;
|
|
22
21
|
}
|
|
23
22
|
return unreachable();
|
|
@@ -55,7 +55,7 @@ export function deserializeObject(srcStart: usize, srcEnd: usize, dst: usize): J
|
|
|
55
55
|
const code = load<u16>(srcStart);
|
|
56
56
|
if (code == QUOTE && load<u16>(srcStart - 2) !== BACK_SLASH) {
|
|
57
57
|
// console.log("Value (string):-" + deserializeString_SWAR(lastIndex, srcStart + 2, 0) + "-");
|
|
58
|
-
out.set(ptrToStr(keyStart, keyEnd), deserializeString(lastIndex, srcStart + 2
|
|
58
|
+
out.set(ptrToStr(keyStart, keyEnd), deserializeString(lastIndex, srcStart + 2));
|
|
59
59
|
// while (isSpace(load<u16>(srcStart))) srcStart += 2;
|
|
60
60
|
srcStart += 4;
|
|
61
61
|
// console.log("Next: " + String.fromCharCode(load<u16>(srcStart)));
|
|
@@ -1,50 +1,41 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { bs } from "../../../lib/as-bs";
|
|
2
2
|
import { DESERIALIZE_ESCAPE_TABLE, ESCAPE_HEX_TABLE } from "../../globals/tables";
|
|
3
|
+
import { hex4_to_u16_swar } from "../../util/swar";
|
|
3
4
|
|
|
4
5
|
// @ts-ignore: inline
|
|
5
|
-
@inline export function deserializeString(srcStart: usize, srcEnd: usize
|
|
6
|
+
@inline export function deserializeString(srcStart: usize, srcEnd: usize): string {
|
|
7
|
+
// Strip quotes
|
|
6
8
|
srcStart += 2;
|
|
7
9
|
srcEnd -= 2;
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
let dstPtr = dst;
|
|
11
|
-
let lastPtr = srcStart;
|
|
10
|
+
bs.proposeSize(u32(srcEnd - srcStart));
|
|
11
|
+
|
|
12
12
|
while (srcStart < srcEnd) {
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
const escapedB = load<u8>(ESCAPE_HEX_TABLE + codeB);
|
|
22
|
-
const escaped = (escapedA << 4) + escapedB;
|
|
23
|
-
const remBytes = srcStart - lastPtr;
|
|
24
|
-
memory.copy(dstPtr, lastPtr, remBytes);
|
|
25
|
-
dstPtr += remBytes;
|
|
26
|
-
store<u16>(dstPtr, escaped);
|
|
27
|
-
dstPtr += 2;
|
|
28
|
-
srcStart += 12;
|
|
29
|
-
lastPtr = srcStart;
|
|
30
|
-
} else {
|
|
31
|
-
const remBytes = srcStart - lastPtr;
|
|
32
|
-
memory.copy(dstPtr, lastPtr, remBytes);
|
|
33
|
-
dstPtr += remBytes;
|
|
34
|
-
store<u16>(dstPtr, code);
|
|
35
|
-
dstPtr += 2;
|
|
36
|
-
srcStart += 4;
|
|
37
|
-
lastPtr = srcStart;
|
|
38
|
-
}
|
|
39
|
-
} else {
|
|
40
|
-
srcStart += 2;
|
|
13
|
+
const block = load<u16>(srcStart);
|
|
14
|
+
store<u16>(bs.offset, block);
|
|
15
|
+
srcStart += 2;
|
|
16
|
+
|
|
17
|
+
// Early exit
|
|
18
|
+
if (block !== 0x5C) {
|
|
19
|
+
bs.offset += 2;
|
|
20
|
+
continue;
|
|
41
21
|
}
|
|
42
|
-
}
|
|
43
22
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
23
|
+
const code = load<u16>(srcStart);
|
|
24
|
+
if (code !== 0x75) {
|
|
25
|
+
// Short escapes (\n \t \" \\)
|
|
26
|
+
const block = load<u16>(srcStart);
|
|
27
|
+
const escape = load<u16>(DESERIALIZE_ESCAPE_TABLE + block);
|
|
28
|
+
store<u16>(bs.offset, escape);
|
|
29
|
+
srcStart += 2;
|
|
30
|
+
} else {
|
|
31
|
+
// Unicode escape (\uXXXX)
|
|
32
|
+
const block = load<u64>(srcStart, 2); // XXXX
|
|
33
|
+
const escaped = hex4_to_u16_swar(block);
|
|
34
|
+
store<u16>(bs.offset, escaped);
|
|
35
|
+
srcStart += 10;
|
|
36
|
+
}
|
|
47
37
|
|
|
48
|
-
|
|
49
|
-
|
|
38
|
+
bs.offset += 2;
|
|
39
|
+
}
|
|
40
|
+
return bs.out<string>();
|
|
50
41
|
}
|