json-as 1.2.2 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -413,6 +413,8 @@ The following charts compare JSON-AS (both SWAR and SIMD variants) against JavaS
413
413
 
414
414
  <img src="https://raw.githubusercontent.com/JairusSW/json-as/refs/heads/docs/charts/chart03.png" alt="Performance Chart 3">
415
415
 
416
+ <img src="https://raw.githubusercontent.com/JairusSW/json-as/refs/heads/docs/charts/chart04.png" alt="Performance Chart 3">
417
+
416
418
  > Note: I have focused on extensively optimizing serialization. I used to have deserialization be highly unsafe and extremely fast, but I've since doubled down on safety for deserialization which has negatively affected performance. I will be optimizing soon.
417
419
 
418
420
  ### Performance Tuning
@@ -1,6 +1,57 @@
1
1
  import { bs } from "../../../lib/as-bs";
2
2
  import { BACK_SLASH } from "../../custom/chars";
3
3
  import { DESERIALIZE_ESCAPE_TABLE, ESCAPE_HEX_TABLE } from "../../globals/tables";
4
+ import { hex4_to_u16_swar } from "../../util/swar";
5
+
6
+ // @ts-ignore: decorator allowed
7
+ @lazy const SPLAT_5C = i16x8.splat(0x5C); // \
8
+
9
+ // Overflow Pattern for Unicode Escapes (READ)
10
+ // \u0001 0 \u0001__| + 0
11
+ // -\u0001 2 -\u0001_| + 0
12
+ // --\u0001 4 --\u0001| + 0
13
+ // ---\u0001 6 ---\u000|1 + 2
14
+ // ----\u0001 8 ----\u00|01 + 4
15
+ // -----\u0001 10 -----\u0|001 + 6
16
+ // ------\u0001 12 ------\u|0001 + 8
17
+ // -------\u0001 14 -------\|u0001 + 10
18
+ // Formula: overflow = max(0, lane - 4)
19
+
20
+ // Overflow Pattern for Unicode Escapes (WRITE)
21
+ // * = escape, _ = empty
22
+ // \u0001 0 *_______| - 14
23
+ // -\u0001 2 -*______| - 12
24
+ // --\u0001 4 --*_____| - 10
25
+ // ---\u0001 6 ---*____| - 8
26
+ // ----\u0001 8 ----*___| - 6
27
+ // -----\u0001 10 -----*__| - 4
28
+ // ------\u0001 12 ------*_| - 2
29
+ // -------\u0001 14 -------*| + 0
30
+ // Formula: overflow = lane - 14
31
+
32
+ // Overflow pattern for Short Escapes (READ)
33
+ // \n------ 0 \n------| - 12
34
+ // -\n----- 2 -\n-----| - 10
35
+ // --\n---- 4 --\n----| - 8
36
+ // ---\n--- 6 ---\n---| - 6
37
+ // ----\n-- 8 ----\n--| - 4
38
+ // -----\n- 10 -----\n-| - 2
39
+ // ------\n 12 ------\n| + 0
40
+ // -------\n 14 -------\|n + 2
41
+ // Formula: overflow = lane - 12
42
+
43
+ // Overflow pattern for Short Escapes (WRITE)
44
+ // * = escape, _ = empty
45
+ // \n------ 0 *_______| - 14
46
+ // -\n----- 2 -*______| - 12
47
+ // --\n---- 4 --*_____| - 10
48
+ // ---\n--- 6 ---*____| - 8
49
+ // ----\n-- 8 ----*___| - 6
50
+ // -----\n- 10 -----*__| - 4
51
+ // ------\n 12 ------*_| - 2
52
+ // -------\n 14 -------*| + 0
53
+ // Formula: overflow = lane - 14
54
+
4
55
 
5
56
  /**
6
57
  * Deserializes strings back into into their original form using SIMD operations
@@ -9,92 +60,137 @@ import { DESERIALIZE_ESCAPE_TABLE, ESCAPE_HEX_TABLE } from "../../globals/tables
9
60
  * @returns number of bytes written
10
61
  */
11
62
  // todo: optimize and stuff. it works, its not pretty. ideally, i'd like this to be (nearly) branchless
12
- export function deserializeString_SIMD(srcStart: usize, srcEnd: usize): void {
13
- const SPLAT_92 = i16x8.splat(92); /* \ */
63
+ export function deserializeString_SIMD(srcStart: usize, srcEnd: usize): string {
64
+ // Strip quotes
14
65
  srcStart += 2;
15
66
  srcEnd -= 2;
67
+ const srcEnd16 = srcEnd - 16;
16
68
  bs.ensureSize(u32(srcEnd - srcStart));
17
- const src_end_15 = srcEnd - 15;
18
-
19
- // while (srcStart < src_end_15) {
20
- // const block = v128.load(srcStart);
21
- // v128.store(bs.offset, block);
22
-
23
- // const backslash_indices = i16x8.eq(block, SPLAT_92);
24
- // let mask = i16x8.bitmask(backslash_indices);
25
-
26
- // while (mask != 0) {
27
- // const lane_index = ctz(mask) << 1;
28
- // const dst_offset = bs.offset + lane_index;
29
- // const src_offset = srcStart + lane_index;
30
- // const code = load<u16>(src_offset, 2);
31
-
32
- // mask &= mask - 1;
33
- // if (code == 117 && load<u32>(src_offset, 4) == 3145776) {
34
- // const block = load<u32>(src_offset, 8);
35
- // const codeA = block & 0xffff;
36
- // const codeB = (block >> 16) & 0xffff;
37
- // const escapedA = load<u8>(ESCAPE_HEX_TABLE + codeA);
38
- // const escapedB = load<u8>(ESCAPE_HEX_TABLE + codeB);
39
- // const escaped = (escapedA << 4) + escapedB;
40
- // // console.log("Escaped:");
41
- // console.log(" a: " + escapedA.toString())
42
- // console.log(" b: " + escapedB.toString());
43
- // console.log(" c: " + escaped.toString());
44
- // console.log(" o: " + (bs.offset - dst).toString());
45
- // console.log(" d: " + (dst_offset - dst).toString())
46
- // console.log(" l: " + (lane_index).toString())
47
- // store<u16>(dst_offset, escaped);
48
- // v128.store(dst_offset, v128.load(src_offset, 4), 2);
49
- // if (lane_index >= 6) {
50
- // const bytes_left = lane_index - 4;
51
- // srcStart += bytes_left;
52
- // bs.offset += bytes_left;
53
- // // console.log(" e: " + (bytes_left).toString())
54
- // }
55
- // bs.offset -= 10;
56
- // } else {
57
- // const escaped = load<u8>(DESERIALIZE_ESCAPE_TABLE + code);
58
- // store<u16>(dst_offset, escaped);
59
- // v128.store(dst_offset, v128.load(src_offset, 4), 2);
60
- // // console.log("Escaped:");
61
- // if (lane_index == 14) {
62
- // srcStart += 2;
63
- // } else {
64
- // bs.offset -= 2;
65
- // }
66
- // }
67
- // }
68
-
69
- // srcStart += 16;
70
- // bs.offset += 16;
71
-
72
- // // console.log("src: " + (srcStart - changetype<usize>(src)).toString());
73
- // // console.log("dst: " + (dst_ptr - dst).toString());
74
- // }
75
- while (srcStart < srcEnd) {
76
- let code = load<u16>(srcStart);
77
- if (code === BACK_SLASH) {
78
- code = load<u16>(DESERIALIZE_ESCAPE_TABLE + load<u8>(srcStart, 2));
79
- if (code === 117 && load<u32>(srcStart, 4) === 3145776) {
80
- const block = load<u32>(srcStart, 8);
81
- const codeA = block & 0xffff;
82
- const codeB = (block >> 16) & 0xffff;
83
- const escapedA = load<u8>(ESCAPE_HEX_TABLE + codeA);
84
- const escapedB = load<u8>(ESCAPE_HEX_TABLE + codeB);
85
- const escaped = (escapedA << 4) + escapedB;
69
+
70
+ while (srcStart < srcEnd16) {
71
+ const block = load<v128>(srcStart);
72
+ store<v128>(bs.offset, block);
73
+
74
+ const eq5C = i16x8.eq(load<v128>(srcStart), SPLAT_5C);
75
+ let mask = i16x8.bitmask(eq5C);
76
+ // Early exit
77
+ if (mask === 0) {
78
+ srcStart += 16;
79
+ bs.offset += 16;
80
+ continue;
81
+ }
82
+
83
+ let srcChg: usize = 0;
84
+ let lastLane: usize = 0;
85
+ do {
86
+ const laneIdx = usize(ctz(mask) << 1); // 0 2 4 6 8 10 12 14
87
+ mask &= mask - 1;
88
+ const srcIdx = srcStart + laneIdx;
89
+ const code = load<u16>(srcIdx, 2);
90
+
91
+ bs.offset += laneIdx - lastLane;
92
+
93
+ // Hot path (negative bias)
94
+ if (code !== 0x75) {
95
+ // Short escapes (\n \t \" \\)
96
+ const escaped = load<u16>(DESERIALIZE_ESCAPE_TABLE + code);
97
+ mask &= mask - i32(escaped === 0x5C);
86
98
  store<u16>(bs.offset, escaped);
99
+ store<v128>(bs.offset, load<v128>(srcIdx, 4), 2);
100
+
101
+ const l6 = usize(laneIdx === 14);
102
+ // bs.offset -= (1 - l6) << 1;
87
103
  bs.offset += 2;
88
- srcStart += 12;
89
- } else {
90
- store<u16>(bs.offset, code);
91
- bs.offset += 2;
92
- srcStart += 4;
104
+ srcStart += l6 << 1;
105
+ lastLane = laneIdx + 4;
106
+ continue;
93
107
  }
94
- } else {
95
- store<u16>(bs.offset, code);
108
+
109
+ // Unicode escape (\uXXXX)
110
+ const block = load<u64>(srcIdx, 4); // XXXX
111
+ const escaped = hex4_to_u16_swar(block);
112
+
113
+ store<u16>(bs.offset, escaped);
114
+ store<u64>(bs.offset, load<u64>(srcIdx, 12), 2);
115
+
116
+ bs.offset += 2;
117
+ if (laneIdx >= 6) {
118
+ srcStart += laneIdx - 4;
119
+ }
120
+ lastLane = laneIdx + 12;
121
+ } while (mask !== 0);
122
+
123
+ if (lastLane < 16) {
124
+ bs.offset += 16 - lastLane;
125
+ }
126
+
127
+ srcStart += 16 + srcChg;
128
+ }
129
+
130
+ while (srcStart < srcEnd) {
131
+ const block = load<u16>(srcStart);
132
+ store<u16>(bs.offset, block);
133
+ srcStart += 2;
134
+
135
+ // Early exit
136
+ if (block !== 0x5C) {
96
137
  bs.offset += 2;
138
+ continue;
139
+ }
140
+
141
+ const code = load<u16>(srcStart);
142
+ if (code !== 0x75) {
143
+ // Short escapes (\n \t \" \\)
144
+ const block = load<u16>(srcStart);
145
+ const escape = load<u16>(DESERIALIZE_ESCAPE_TABLE + block);
146
+ store<u16>(bs.offset, escape);
97
147
  srcStart += 2;
148
+ } else {
149
+ // Unicode escape (\uXXXX)
150
+ const block = load<u64>(srcStart, 2); // XXXX
151
+ const escaped = hex4_to_u16_swar(block);
152
+ store<u16>(bs.offset, escaped);
153
+ srcStart += 10;
98
154
  }
155
+ bs.offset += 2;
99
156
  }
157
+
158
+ return bs.out<string>();
159
+ }
160
+
161
+ /**
162
+ * Computes a per-lane mask identifying UTF-16 code units whose **low byte**
163
+ * is the ASCII backslash (`'\\'`, 0x5C).
164
+ *
165
+ * The mask is produced in two stages:
166
+ * 1. Detects bytes equal to 0x5C using a SWAR equality test.
167
+ * 2. Clears matches where 0x5C appears in the **high byte** of a UTF-16 code unit,
168
+ * ensuring only valid low-byte backslashes are reported.
169
+ *
170
+ * Each matching lane sets itself to 0x80.
171
+ */
172
+ // @ts-ignore: decorator
173
+ @inline function backslash_mask(block: u64): u64 {
174
+ const b = block ^ 0x005C_005C_005C_005C;
175
+ const backslash_mask = (b - 0x0001_0001_0001_0001) & ~b & 0x0080_0080_0080_0080;
176
+ const high_byte_mask =
177
+ ~(((block - 0x0100_0100_0100_0100) & ~block & 0x8000_8000_8000_8000)
178
+ ^ 0x8000_8000_8000_8000) >> 8;
179
+ return backslash_mask & high_byte_mask;
180
+ }
181
+
182
+ /**
183
+ * Computes a per-lane mask identifying UTF-16 code units whose **low byte**
184
+ * is the ASCII backslash (`'\\'`, 0x5C).
185
+ *
186
+ * Each matching lane sets itself to 0x80.
187
+ *
188
+ * WARNING: The low byte of a code unit *may* be a backslash, thus triggering false positives!
189
+ * This is useful for a hot path where it is possible to detect the false positive scalarly.
190
+ */
191
+ // @ts-ignore: decorator
192
+ @inline function backslash_mask_unsafe(block: u64): u64 {
193
+ const b = block ^ 0x005C_005C_005C_005C;
194
+ const backslash_mask = (b - 0x0001_0001_0001_0001) & ~b & 0x0080_0080_0080_0080;
195
+ return backslash_mask;
100
196
  }
@@ -9,7 +9,7 @@ import { BRACE_LEFT, BRACKET_LEFT, CHAR_N, QUOTE } from "../../custom/chars";
9
9
  export function deserializeArbitrary(srcStart: usize, srcEnd: usize, dst: usize): JSON.Value {
10
10
  const firstChar = load<u16>(srcStart);
11
11
  if (firstChar == QUOTE) {
12
- return JSON.Value.from(deserializeString(srcStart, srcEnd, 0));
12
+ return JSON.Value.from(deserializeString(srcStart, srcEnd));
13
13
  } else if (firstChar == BRACE_LEFT) return JSON.Value.from(deserializeObject(srcStart, srcEnd, 0));
14
14
  else if (firstChar - 48 <= 9 || firstChar == 45) return JSON.Value.from(deserializeFloat<f64>(srcStart, srcEnd));
15
15
  else if (firstChar == BRACKET_LEFT) {
@@ -17,7 +17,6 @@ export function deserializeArbitrary(srcStart: usize, srcEnd: usize, dst: usize)
17
17
  } else if (firstChar == 116 || firstChar == 102) return JSON.Value.from(deserializeBoolean(srcStart, srcEnd));
18
18
  else if (firstChar == CHAR_N) {
19
19
  const value = JSON.Value.from<usize>(0);
20
- value.isNull = true;
21
20
  return value;
22
21
  }
23
22
  return unreachable();
@@ -55,7 +55,7 @@ export function deserializeObject(srcStart: usize, srcEnd: usize, dst: usize): J
55
55
  const code = load<u16>(srcStart);
56
56
  if (code == QUOTE && load<u16>(srcStart - 2) !== BACK_SLASH) {
57
57
  // console.log("Value (string):-" + deserializeString_SWAR(lastIndex, srcStart + 2, 0) + "-");
58
- out.set(ptrToStr(keyStart, keyEnd), deserializeString(lastIndex, srcStart + 2, 0));
58
+ out.set(ptrToStr(keyStart, keyEnd), deserializeString(lastIndex, srcStart + 2));
59
59
  // while (isSpace(load<u16>(srcStart))) srcStart += 2;
60
60
  srcStart += 4;
61
61
  // console.log("Next: " + String.fromCharCode(load<u16>(srcStart)));
@@ -1,50 +1,41 @@
1
- import { BACK_SLASH } from "../../custom/chars";
1
+ import { bs } from "../../../lib/as-bs";
2
2
  import { DESERIALIZE_ESCAPE_TABLE, ESCAPE_HEX_TABLE } from "../../globals/tables";
3
+ import { hex4_to_u16_swar } from "../../util/swar";
3
4
 
4
5
  // @ts-ignore: inline
5
- @inline export function deserializeString(srcStart: usize, srcEnd: usize, dst: usize): string {
6
+ @inline export function deserializeString(srcStart: usize, srcEnd: usize): string {
7
+ // Strip quotes
6
8
  srcStart += 2;
7
9
  srcEnd -= 2;
8
- const startPtr = srcStart;
9
- if (dst == 0) dst = __new(srcEnd - srcStart, idof<string>());
10
- let dstPtr = dst;
11
- let lastPtr = srcStart;
10
+ bs.proposeSize(u32(srcEnd - srcStart));
11
+
12
12
  while (srcStart < srcEnd) {
13
- let code = load<u16>(srcStart);
14
- if (code == BACK_SLASH) {
15
- code = <u16>load<u8>(DESERIALIZE_ESCAPE_TABLE + load<u8>(srcStart, 2));
16
- if (code == 117 && load<u32>(srcStart, 4) == 3145776) {
17
- const block = load<u32>(srcStart, 8);
18
- const codeA = block & 0xffff;
19
- const codeB = (block >> 16) & 0xffff;
20
- const escapedA = load<u8>(ESCAPE_HEX_TABLE + codeA);
21
- const escapedB = load<u8>(ESCAPE_HEX_TABLE + codeB);
22
- const escaped = (escapedA << 4) + escapedB;
23
- const remBytes = srcStart - lastPtr;
24
- memory.copy(dstPtr, lastPtr, remBytes);
25
- dstPtr += remBytes;
26
- store<u16>(dstPtr, escaped);
27
- dstPtr += 2;
28
- srcStart += 12;
29
- lastPtr = srcStart;
30
- } else {
31
- const remBytes = srcStart - lastPtr;
32
- memory.copy(dstPtr, lastPtr, remBytes);
33
- dstPtr += remBytes;
34
- store<u16>(dstPtr, code);
35
- dstPtr += 2;
36
- srcStart += 4;
37
- lastPtr = srcStart;
38
- }
39
- } else {
40
- srcStart += 2;
13
+ const block = load<u16>(srcStart);
14
+ store<u16>(bs.offset, block);
15
+ srcStart += 2;
16
+
17
+ // Early exit
18
+ if (block !== 0x5C) {
19
+ bs.offset += 2;
20
+ continue;
41
21
  }
42
- }
43
22
 
44
- const remBytes = srcEnd - lastPtr;
45
- memory.copy(dstPtr, lastPtr, remBytes);
46
- dstPtr += remBytes;
23
+ const code = load<u16>(srcStart);
24
+ if (code !== 0x75) {
25
+ // Short escapes (\n \t \" \\)
26
+ const block = load<u16>(srcStart);
27
+ const escape = load<u16>(DESERIALIZE_ESCAPE_TABLE + block);
28
+ store<u16>(bs.offset, escape);
29
+ srcStart += 2;
30
+ } else {
31
+ // Unicode escape (\uXXXX)
32
+ const block = load<u64>(srcStart, 2); // XXXX
33
+ const escaped = hex4_to_u16_swar(block);
34
+ store<u16>(bs.offset, escaped);
35
+ srcStart += 10;
36
+ }
47
37
 
48
- if (lastPtr != startPtr) dst = __renew(dst, dstPtr - dst);
49
- return changetype<string>(dst);
38
+ bs.offset += 2;
39
+ }
40
+ return bs.out<string>();
50
41
  }