json-as 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/ARCHITECTURE.md +320 -0
  3. package/CONTRIBUTING.md +238 -0
  4. package/LICENSE +2 -2
  5. package/README.md +35 -1
  6. package/TODO +1 -0
  7. package/assembly/custom/chars.ts +9 -0
  8. package/assembly/deserialize/simd/string.ts +139 -78
  9. package/assembly/deserialize/simple/arbitrary.ts +1 -2
  10. package/assembly/deserialize/simple/object.ts +1 -1
  11. package/assembly/deserialize/simple/set.ts +169 -0
  12. package/assembly/deserialize/simple/staticarray/array.ts +37 -0
  13. package/assembly/deserialize/simple/staticarray/bool.ts +39 -0
  14. package/assembly/deserialize/simple/staticarray/float.ts +44 -0
  15. package/assembly/deserialize/simple/staticarray/integer.ts +44 -0
  16. package/assembly/deserialize/simple/staticarray/string.ts +45 -0
  17. package/assembly/deserialize/simple/staticarray/struct.ts +47 -0
  18. package/assembly/deserialize/simple/staticarray.ts +28 -0
  19. package/assembly/deserialize/simple/string.ts +31 -40
  20. package/assembly/deserialize/swar/string.ts +147 -109
  21. package/assembly/globals/tables.ts +1 -1
  22. package/assembly/index.ts +419 -276
  23. package/assembly/serialize/simd/string.ts +134 -40
  24. package/assembly/serialize/simple/arbitrary.ts +18 -10
  25. package/assembly/serialize/simple/set.ts +34 -0
  26. package/assembly/serialize/simple/staticarray.ts +30 -0
  27. package/assembly/serialize/simple/string.ts +54 -4
  28. package/assembly/serialize/swar/string.ts +128 -72
  29. package/assembly/test.ts +18 -32
  30. package/assembly/util/masks.ts +47 -0
  31. package/assembly/util/swar.ts +14 -0
  32. package/eslint.config.js +77 -0
  33. package/lib/as-bs.ts +142 -49
  34. package/package.json +18 -5
  35. package/transform/lib/builder.d.ts +87 -0
  36. package/transform/lib/builder.d.ts.map +1 -0
  37. package/transform/lib/builder.js +169 -169
  38. package/transform/lib/builder.js.map +1 -1
  39. package/transform/lib/index.d.ts +32 -0
  40. package/transform/lib/index.d.ts.map +1 -0
  41. package/transform/lib/index.js +36 -17
  42. package/transform/lib/index.js.map +1 -1
  43. package/transform/lib/linkers/alias.d.ts +12 -0
  44. package/transform/lib/linkers/alias.d.ts.map +1 -0
  45. package/transform/lib/linkers/alias.js +1 -1
  46. package/transform/lib/linkers/alias.js.map +1 -1
  47. package/transform/lib/linkers/custom.d.ts +10 -0
  48. package/transform/lib/linkers/custom.d.ts.map +1 -0
  49. package/transform/lib/linkers/imports.d.ts +3 -0
  50. package/transform/lib/linkers/imports.d.ts.map +1 -0
  51. package/transform/lib/linkers/imports.js +1 -1
  52. package/transform/lib/linkers/imports.js.map +1 -1
  53. package/transform/lib/types.d.ts +66 -0
  54. package/transform/lib/types.d.ts.map +1 -0
  55. package/transform/lib/types.js +6 -5
  56. package/transform/lib/types.js.map +1 -1
  57. package/transform/lib/util.d.ts +18 -0
  58. package/transform/lib/util.d.ts.map +1 -0
  59. package/transform/lib/util.js +4 -4
  60. package/transform/lib/util.js.map +1 -1
  61. package/transform/lib/visitor.d.ts +84 -0
  62. package/transform/lib/visitor.d.ts.map +1 -0
  63. package/transform/lib/visitor.js +76 -76
  64. package/transform/lib/visitor.js.map +1 -1
  65. package/transform/tsconfig.json +29 -2
  66. package/assembly/serialize/swar/number.ts +0 -0
  67. package/transform/lib/linkers/classes.js +0 -36
  68. package/transform/lib/linkers/classes.js.map +0 -1
@@ -1,14 +1,26 @@
1
+ import { OBJECT, TOTAL_OVERHEAD } from "rt/common";
1
2
  import { bs, sc } from "../../../lib/as-bs";
2
3
  import { BACK_SLASH } from "../../custom/chars";
3
4
  import { SERIALIZE_ESCAPE_TABLE } from "../../globals/tables";
4
- import { bytes } from "../../util";
5
+ // @ts-expect-error: @lazy is a valid decorator
6
+ @lazy const U00_MARKER = 13511005048209500;
7
+ // @ts-expect-error: @lazy is a valid decorator
8
+ @lazy const U_MARKER = 7667804;
9
+ // @ts-expect-error: @lazy is a valid decorator
10
+ @lazy const SPLAT_0022 = i16x8.splat(0x0022); // "
11
+ // @ts-expect-error: @lazy is a valid decorator
12
+ @lazy const SPLAT_005C = i16x8.splat(0x005C); // \
13
+ // @ts-expect-error: @lazy is a valid decorator
14
+ @lazy const SPLAT_0020 = i16x8.splat(0x0020); // space and control check
15
+ // @ts-expect-error: @lazy is a valid decorator
16
+ @lazy const SPLAT_FFD8 = i16x8.splat(i16(0xD7FE));
5
17
 
6
18
  /**
7
19
  * Serializes strings into their JSON counterparts using SIMD operations
8
20
  */
9
21
  export function serializeString_SIMD(src: string): void {
10
22
  let srcStart = changetype<usize>(src);
11
- if (isDefined(JSON_CACHE)) {
23
+ if (isDefined(JSON_CACHE)) {
12
24
  // check cache
13
25
  const e = unchecked(sc.entries[(srcStart >> 4) & sc.CACHE_MASK]);
14
26
  if (e.key == srcStart) {
@@ -19,55 +31,98 @@ if (isDefined(JSON_CACHE)) {
19
31
  return;
20
32
  }
21
33
  }
22
- const U00_MARKER = 13511005048209500;
23
- const SPLAT_34 = i16x8.splat(34); /* " */
24
- const SPLAT_92 = i16x8.splat(92); /* \ */
25
34
 
26
- const SPLAT_32 = i16x8.splat(32); /* [ESC] */
27
-
28
- const srcSize = bytes(src);
35
+ const srcSize = changetype<OBJECT>(srcStart - TOTAL_OVERHEAD).rtSize
29
36
  const srcEnd = srcStart + srcSize;
30
37
  const srcEnd16 = srcEnd - 16;
31
38
 
32
39
  bs.proposeSize(srcSize + 4);
33
-
34
- store<u8>(bs.offset, 34); // "
40
+ store<u16>(bs.offset, 34); // "
35
41
  bs.offset += 2;
36
42
 
37
- while (srcStart <= srcEnd16) {
38
- const block = v128.load(srcStart);
43
+ while (srcStart < srcEnd16) {
44
+ const block = load<v128>(srcStart);
45
+ store<v128>(bs.offset, block);
39
46
 
40
- v128.store(bs.offset, block);
47
+ const eq22 = i16x8.eq(block, SPLAT_0022);
48
+ const eq5C = i16x8.eq(block, SPLAT_005C);
49
+ const lt20 = i16x8.lt_u(block, SPLAT_0020);
50
+ const gteD8 = i8x16.gt_u(block, SPLAT_FFD8);
51
+ // console.log("\nblock : " + mask_to_string_v128(block));
52
+ // console.log("eq22 : " + mask_to_string_v128(eq22) + " -> " + mask_to_string_v128(SPLAT_0022));
53
+ // console.log("eq5C : " + mask_to_string_v128(eq5C) + " -> " + mask_to_string_v128(SPLAT_005C));
54
+ // console.log("lt20 : " + mask_to_string_v128(lt20) + " -> " + mask_to_string_v128(SPLAT_0020));
55
+ // console.log("gteD8 : " + mask_to_string_v128(gteD8) + " -> " + mask_to_string_v128(SPLAT_FFD8));
41
56
 
42
- const backslash_indices = i16x8.eq(block, SPLAT_92);
43
- const quote_indices = i16x8.eq(block, SPLAT_34);
44
- const escape_indices = i16x8.lt_u(block, SPLAT_32);
45
- const sieve = v128.or(v128.or(backslash_indices, quote_indices), escape_indices);
57
+ const sieve = v128.or(eq22, v128.or(eq5C, v128.or(lt20, gteD8)));
58
+ // console.log("sieve : " + mask_to_string_v128(sieve));
46
59
 
47
- let mask = i16x8.bitmask(sieve);
60
+ if (!v128.any_true(sieve)) {
61
+ bs.offset += 16;
62
+ srcStart += 16;
63
+ continue;
64
+ }
65
+
66
+ let mask = i8x16.bitmask(sieve);
67
+
68
+ do {
69
+ const laneIdx = ctz(mask);
70
+ const srcIdx = srcStart + laneIdx;
71
+
72
+ mask &= mask - 1;
73
+ // Even (0 2 4 6 8 10 12 14) -> Confirmed ASCII Escape
74
+ // Odd (1 3 5 7 9 11 13 15) -> Possibly a Unicode code unit or surrogate
75
+
76
+ if ((laneIdx & 1) === 0) {
77
+ const code = load<u16>(srcIdx);
78
+ const escaped = load<u32>(SERIALIZE_ESCAPE_TABLE + (code << 2));
79
+
80
+ if ((escaped & 0xffff) != BACK_SLASH) {
81
+ bs.growSize(10);
82
+ const dstIdx = bs.offset + laneIdx;
83
+ store<u64>(dstIdx, U00_MARKER);
84
+ store<u32>(dstIdx, escaped, 8);
85
+ // memory.copy(dstIdx + 12, srcIdx + 2, 14 - laneIdx);
86
+ store<v128>(dstIdx, load<v128>(srcIdx, 2), 12); // unsafe. can overflow here
87
+ bs.offset += 10;
88
+ } else {
89
+ bs.growSize(2);
90
+ const dstIdx = bs.offset + laneIdx;
91
+ store<u32>(dstIdx, escaped);
92
+ store<v128>(dstIdx, load<v128>(srcIdx, 2), 4);
93
+ // memory.copy(dstIdx + 4, srcIdx + 2, 14 - laneIdx);
94
+ bs.offset += 2;
95
+ }
96
+ continue;
97
+ }
98
+
99
+ const code = load<u16>(srcIdx - 1);
100
+ // console.log("\nb->" + mask_to_string_v128(block));
101
+ // console.log("h->" + mask_to_string_v128(sieve));
102
+ // console.log("z->" + mask_to_string_v128(i8x16.ge_u(block,SPLAT_FFD8)));
103
+ // console.log("m->" + mask.toString(2));
104
+ // console.log("l->" + laneIdx.toString());
105
+ // console.log("c->" + code.toString(16));
106
+ if (code < 0xD800 || code > 0xDFFF) continue;
107
+
108
+ if (code <= 0xDBFF && srcIdx + 1 <= srcEnd - 2) {
109
+ const next = load<u16>(srcIdx, 1);
110
+ if (next >= 0xDC00 && next <= 0xDFFF) {
111
+ // paired surrogate
112
+ mask &= mask - 1;
113
+ continue;
114
+ }
115
+ }
48
116
 
49
- while (mask != 0) {
50
- const lane_index = ctz(mask) << 1; // 0 2 4 6 8 10 12 14
51
- // console.log("lane: " + (lane_index >= 8 ? (lane_index - 8).toString():lane_index.toString()));
52
- const src_offset = srcStart + lane_index;
53
- const code = load<u16>(src_offset) << 2;
54
- const escaped = load<u32>(SERIALIZE_ESCAPE_TABLE + code);
55
- mask &= mask - 1;
56
- if ((escaped & 0xffff) != BACK_SLASH) {
57
117
  bs.growSize(10);
58
- const dst_offset = bs.offset + lane_index;
59
- store<u64>(dst_offset, U00_MARKER);
60
- store<u32>(dst_offset, escaped, 8);
61
- v128.store(dst_offset, v128.load(src_offset, 2), 12);
118
+ // unpaired high/low surrogate
119
+ const dstIdx = bs.offset + laneIdx - 1;
120
+ store<u32>(dstIdx, U_MARKER); // \u
121
+ store<u64>(dstIdx, load<u64>(changetype<usize>(code.toString(16))), 4);
122
+ // memory.copy(dstIdx + 12, srcIdx + 1, 15 - laneIdx);
123
+ store<v128>(dstIdx, load<v128>(srcIdx, 1), 12);
62
124
  bs.offset += 10;
63
- } else {
64
- bs.growSize(2);
65
- const dst_offset = bs.offset + lane_index;
66
- store<u32>(dst_offset, escaped);
67
- v128.store(dst_offset, v128.load(src_offset, 2), 4);
68
- bs.offset += 2;
69
- }
70
- }
125
+ } while (mask !== 0);
71
126
 
72
127
  srcStart += 16;
73
128
  bs.offset += 16;
@@ -87,15 +142,54 @@ if (isDefined(JSON_CACHE)) {
87
142
  store<u32>(bs.offset, escaped);
88
143
  bs.offset += 4;
89
144
  }
90
- } else {
145
+ srcStart += 2;
146
+ continue;
147
+ }
148
+
149
+ if (code < 0xD800 || code > 0xDFFF) {
91
150
  store<u16>(bs.offset, code);
92
151
  bs.offset += 2;
152
+ srcStart += 2;
153
+ continue;
93
154
  }
155
+
156
+ if (code <= 0xDBFF && srcStart + 2 <= srcEnd - 2) {
157
+ const next = load<u16>(srcStart, 2);
158
+ if (next >= 0xDC00 && next <= 0xDFFF) {
159
+ // valid surrogate pair
160
+ store<u16>(bs.offset, code);
161
+ store<u16>(bs.offset + 2, next);
162
+ bs.offset += 4;
163
+ srcStart += 4;
164
+ continue;
165
+ }
166
+ }
167
+
168
+ // unpaired high/low surrogate
169
+ write_u_escape(code);
94
170
  srcStart += 2;
171
+ continue;
95
172
  }
96
173
 
97
- store<u8>(bs.offset, 34); /* " */
174
+ store<u16>(bs.offset, 34); // "
98
175
  bs.offset += 2;
99
176
 
100
177
  if (isDefined(JSON_CACHE)) sc.insertCached(changetype<usize>(src), srcStart, srcSize);
101
178
  }
179
+
180
+ // @ts-expect-error: @inline is a valid decorator
181
+ @inline function write_u_escape(code: u16): void {
182
+ bs.growSize(10);
183
+ store<u32>(bs.offset, U_MARKER); // "\u"
184
+ // write hex digits (lowercase, matches tests)
185
+ store<u16>(bs.offset + 4, hexNibble((code >> 12) & 0xF));
186
+ store<u16>(bs.offset + 6, hexNibble((code >> 8) & 0xF));
187
+ store<u16>(bs.offset + 8, hexNibble((code >> 4) & 0xF));
188
+ store<u16>(bs.offset + 10, hexNibble(code & 0xF));
189
+ bs.offset += 12;
190
+ }
191
+
192
+ // @ts-expect-error: @inline is a valid decorator
193
+ @inline function hexNibble(n: u16): u16 {
194
+ return n < 10 ? (48 + n) : (87 + n);
195
+ }
@@ -4,21 +4,13 @@ import { serializeArray } from "./array";
4
4
  import { serializeBool } from "./bool";
5
5
  import { serializeFloat } from "./float";
6
6
  import { serializeInteger } from "./integer";
7
+ import { serializeMap } from "./map";
7
8
  import { serializeObject } from "./object";
8
9
  import { serializeString } from "./string";
9
10
 
10
11
  export function serializeArbitrary(src: JSON.Value): void {
11
- if (src.type < JSON.Types.Null) {
12
- if (src.isNull) {
13
- bs.proposeSize(8);
14
- store<u64>(bs.offset, 30399761348886638);
15
- bs.offset += 8;
16
- return;
17
- } else src.type = ~src.type + 1;
18
- }
19
-
20
12
  switch (src.type) {
21
- case JSON.Types.Null:
13
+ case JSON.Types.Null:
22
14
  bs.proposeSize(8);
23
15
  store<u64>(bs.offset, 30399761348886638);
24
16
  bs.offset += 8;
@@ -35,6 +27,18 @@ export function serializeArbitrary(src: JSON.Value): void {
35
27
  case JSON.Types.U64:
36
28
  serializeInteger<u64>(src.get<u64>());
37
29
  break;
30
+ case JSON.Types.I8:
31
+ serializeInteger<i8>(src.get<i8>());
32
+ break;
33
+ case JSON.Types.I16:
34
+ serializeInteger<i16>(src.get<i16>());
35
+ break;
36
+ case JSON.Types.I32:
37
+ serializeInteger<i32>(src.get<i32>());
38
+ break;
39
+ case JSON.Types.I64:
40
+ serializeInteger<i64>(src.get<i64>());
41
+ break;
38
42
  case JSON.Types.F32:
39
43
  serializeFloat<f32>(src.get<f32>());
40
44
  break;
@@ -55,6 +59,10 @@ export function serializeArbitrary(src: JSON.Value): void {
55
59
  serializeObject(src.get<JSON.Obj>());
56
60
  break;
57
61
  }
62
+ case JSON.Types.Map: {
63
+ serializeMap(src.get<Map<string, JSON.Value>>());
64
+ break;
65
+ }
58
66
  default: {
59
67
  const fn = JSON.Value.METHODS.get(src.type - JSON.Types.Struct);
60
68
  const ptr = src.get<usize>();
@@ -0,0 +1,34 @@
1
+ import { bs } from "../../../lib/as-bs";
2
+ import { COMMA, BRACKET_RIGHT, BRACKET_LEFT } from "../../custom/chars";
3
+ import { JSON } from "../..";
4
+
5
+ export function serializeSet<T extends Set<any>>(src: T): void {
6
+ bs.proposeSize(4);
7
+ const srcSize = src.size;
8
+ if (srcSize == 0) {
9
+ store<u32>(bs.offset, 6094939); // []
10
+ bs.offset += 4;
11
+ return;
12
+ }
13
+
14
+ const values = src.values();
15
+ const end = srcSize - 1;
16
+
17
+ store<u16>(bs.offset, BRACKET_LEFT);
18
+ bs.offset += 2;
19
+
20
+ for (let i = 0; i < end; i++) {
21
+ const block = unchecked(values[i]);
22
+ // @ts-ignore: type
23
+ JSON.__serialize<indexof<T>>(block);
24
+ bs.growSize(2);
25
+ store<u16>(bs.offset, COMMA);
26
+ bs.offset += 2;
27
+ }
28
+
29
+ const lastBlock = unchecked(values[end]);
30
+ // @ts-ignore: type
31
+ JSON.__serialize<indexof<T>>(lastBlock);
32
+ store<u16>(bs.offset, BRACKET_RIGHT);
33
+ bs.offset += 2;
34
+ }
@@ -0,0 +1,30 @@
1
+ import { bs } from "../../../lib/as-bs";
2
+ import { COMMA, BRACKET_RIGHT, BRACKET_LEFT } from "../../custom/chars";
3
+ import { JSON } from "../..";
4
+
5
+ export function serializeStaticArray<T extends StaticArray<any>>(src: T): void {
6
+ bs.proposeSize(4);
7
+ const end = src.length - 1;
8
+ let i = 0;
9
+ if (end == -1) {
10
+ store<u32>(bs.offset, 6094939); // []
11
+ bs.offset += 4;
12
+ return;
13
+ }
14
+
15
+ store<u16>(bs.offset, BRACKET_LEFT);
16
+ bs.offset += 2;
17
+
18
+ while (i < end) {
19
+ const block = unchecked(src[i++]);
20
+ JSON.__serialize<valueof<T>>(block);
21
+ bs.growSize(2);
22
+ store<u16>(bs.offset, COMMA);
23
+ bs.offset += 2;
24
+ }
25
+
26
+ const lastBlock = unchecked(src[end]);
27
+ JSON.__serialize<valueof<T>>(lastBlock);
28
+ store<u16>(bs.offset, BRACKET_RIGHT);
29
+ bs.offset += 2;
30
+ }
@@ -3,6 +3,12 @@ import { _intTo16 } from "../../custom/util";
3
3
  import { bytes } from "../../util/bytes";
4
4
  import { BACK_SLASH, QUOTE } from "../../custom/chars";
5
5
  import { SERIALIZE_ESCAPE_TABLE } from "../../globals/tables";
6
+ import { serializeStruct } from "./struct";
7
+
8
+ // @ts-ignore: decorator allowed
9
+ @lazy const U00_MARKER = 13511005048209500;
10
+ // @ts-ignore: decorator allowed
11
+ @lazy const U_MARKER = 7667804;
6
12
 
7
13
  /**
8
14
  * Serializes valid strings into their JSON counterpart
@@ -22,14 +28,16 @@ import { SERIALIZE_ESCAPE_TABLE } from "../../globals/tables";
22
28
  let lastPtr: usize = srcPtr;
23
29
  while (srcPtr < srcEnd) {
24
30
  const code = load<u16>(srcPtr);
31
+ srcPtr += 2;
32
+
25
33
  if (code == 34 || code == 92 || code < 32) {
26
- const remBytes = srcPtr - lastPtr;
34
+ const remBytes = srcPtr - lastPtr - 2;
27
35
  memory.copy(bs.offset, lastPtr, remBytes);
28
36
  bs.offset += remBytes;
29
37
  const escaped = load<u32>(SERIALIZE_ESCAPE_TABLE + (code << 2));
30
38
  if ((escaped & 0xffff) != BACK_SLASH) {
31
39
  bs.growSize(10);
32
- store<u64>(bs.offset, 13511005048209500, 0);
40
+ store<u64>(bs.offset, U00_MARKER, 0);
33
41
  store<u32>(bs.offset, escaped, 8);
34
42
  bs.offset += 12;
35
43
  } else {
@@ -37,9 +45,34 @@ import { SERIALIZE_ESCAPE_TABLE } from "../../globals/tables";
37
45
  store<u32>(bs.offset, escaped, 0);
38
46
  bs.offset += 4;
39
47
  }
40
- lastPtr = srcPtr + 2;
48
+ lastPtr = srcPtr;
49
+ continue;
41
50
  }
42
- srcPtr += 2;
51
+ // srcPtr += 2;
52
+ if (code < 0xD800 || code > 0xDFFF) continue;
53
+
54
+ if (code <= 0xDBFF) {
55
+ if (srcPtr <= srcEnd - 2) {
56
+ const next = load<u16>(srcPtr);
57
+ if (next >= 0xDC00 && next <= 0xDFFF) {
58
+ srcPtr += 2;
59
+ continue;
60
+ }
61
+ }
62
+ }
63
+
64
+ const remBytes = srcPtr - lastPtr - 2;
65
+ memory.copy(bs.offset, lastPtr, remBytes);
66
+ bs.offset += remBytes;
67
+
68
+ // unpaired high/low surrogate
69
+ bs.growSize(10);
70
+ store<u32>(bs.offset, U_MARKER); // \u
71
+ store<u64>(bs.offset, load<u64>(changetype<usize>(code.toString(16))), 4);
72
+ bs.offset += 12;
73
+ lastPtr = srcPtr;
74
+ continue;
75
+
43
76
  }
44
77
  const remBytes = srcEnd - lastPtr;
45
78
  memory.copy(bs.offset, lastPtr, remBytes);
@@ -47,3 +80,20 @@ import { SERIALIZE_ESCAPE_TABLE } from "../../globals/tables";
47
80
  store<u16>(bs.offset, QUOTE);
48
81
  bs.offset += 2;
49
82
  }
83
+
84
+ // @ts-ignore: inline
85
+ @inline function write_u_escape(code: u16): void {
86
+ bs.growSize(10);
87
+ store<u32>(bs.offset, U_MARKER); // "\u"
88
+ // write hex digits (lowercase, matches tests)
89
+ store<u16>(bs.offset + 4, hexNibble((code >> 12) & 0xF));
90
+ store<u16>(bs.offset + 6, hexNibble((code >> 8) & 0xF));
91
+ store<u16>(bs.offset + 8, hexNibble((code >> 4) & 0xF));
92
+ store<u16>(bs.offset + 10, hexNibble(code & 0xF));
93
+ bs.offset += 12;
94
+ }
95
+
96
+ // @ts-ignore: inline
97
+ @inline function hexNibble(n: u16): u16 {
98
+ return n < 10 ? (48 + n) : (87 + n);
99
+ }
@@ -3,33 +3,19 @@ import { BACK_SLASH } from "../../custom/chars";
3
3
  import { SERIALIZE_ESCAPE_TABLE } from "../../globals/tables";
4
4
  import { OBJECT, TOTAL_OVERHEAD } from "rt/common";
5
5
 
6
-
7
- // @ts-ignore: decorator allowed
8
- @lazy const LANE_MASK_HIGH = 0xFF00_FF00_FF00_FF00;
9
- // @ts-ignore: decorator allowed
10
- @lazy const ONES: u64 = 0x0101010101010101;
11
- // @ts-ignore: decorator allowed
12
- @lazy const LANE_MASK_LOW = 0x00FF_00FF_00FF_00FF;
13
- // @ts-ignore: decorator allowed
14
- @lazy const HIGHS = 0x8080808080808080;
15
- // @ts-ignore: decorator allowed
16
- @lazy const QUOTE_MASK = 0x0022_0022_0022_0022;
17
- // @ts-ignore: decorator allowed
18
- @lazy const BACKSLASH_MASK = 0x005C_005C_005C_005C;
19
- // @ts-ignore: decorator allowed
20
- @lazy const CONTROL_MASK = 0x0020_0020_0020_0020;
21
- // @ts-ignore: decorator allowed
6
+ // @ts-expect-error: @lazy is a valid decorator
22
7
  @lazy const U00_MARKER = 13511005048209500;
8
+ // @ts-expect-error: @lazy is a valid decorator
9
+ @lazy const U_MARKER = 7667804;
23
10
 
24
11
  export function serializeString_SWAR(src: string): void {
25
12
  let srcStart = changetype<usize>(src);
26
13
 
27
14
  if (isDefined(JSON_CACHE)) {
28
- // check cache
29
15
  const e = unchecked(sc.entries[(srcStart >> 4) & sc.CACHE_MASK]);
30
16
  if (e.key == srcStart) {
31
- // bs.offset += e.len;
32
- // bs.stackSize += e.len;
17
+ bs.offset += e.len;
18
+ bs.stackSize += e.len;
33
19
  bs.cacheOutput = e.ptr;
34
20
  bs.cacheOutputLen = e.len;
35
21
  return;
@@ -44,38 +30,68 @@ export function serializeString_SWAR(src: string): void {
44
30
  store<u16>(bs.offset, 34); // "
45
31
  bs.offset += 2;
46
32
 
47
- while (srcStart <= srcEnd8) {
48
- const block = load<u64>(srcStart);
33
+ while (srcStart < srcEnd8) {
34
+ let block = load<u64>(srcStart);
49
35
  store<u64>(bs.offset, block);
50
36
 
51
- let mask = v64x4_should_escape(block);
37
+ let mask = detect_escapable_u64_swar_unsafe(block);
52
38
 
53
- while (mask != 0) {
54
- const lane_index = usize(ctz(mask) >> 3); // 0 2 4 6
55
- const src_offset = srcStart + lane_index;
56
- // const dst_offset = bs.offset + lane_index;
57
- const code = load<u16>(src_offset) << 2;
58
- // console.log("lane: " + lane_index.toString())
59
- const escaped = load<u32>(SERIALIZE_ESCAPE_TABLE + code);
39
+ if (mask === 0) {
40
+ srcStart += 8;
41
+ bs.offset += 8;
42
+ continue;
43
+ }
60
44
 
61
- mask = mask & ~(0xFF << (lane_index << 3));
62
- if ((escaped & 0xffff) != BACK_SLASH) {
63
- bs.growSize(10);
64
- const dst_offset = bs.offset + lane_index;
65
- store<u64>(dst_offset, U00_MARKER);
66
- store<u32>(dst_offset, escaped, 8);
67
- store<u64>(dst_offset, load<u64>(src_offset, 2), 12); // unsafe. can overflow here
68
- // memory.copy(dst_offset + 12, src_offset + 2, (4 - lane_index) << 1);
69
- bs.offset += 10;
70
- } else {
71
- bs.growSize(2);
72
- const dst_offset = bs.offset + lane_index;
73
- store<u32>(dst_offset, escaped);
74
- store<u64>(dst_offset, load<u64>(src_offset, 2), 4);
75
- // memory.copy(dst_offset + 4, src_offset + 2, (4 - lane_index) << 1);
76
- bs.offset += 2;
45
+ do {
46
+ const laneIdx = usize(ctz(mask) >> 3);
47
+ const srcIdx = srcStart + laneIdx;
48
+ // Even (0 2 4 6) -> Confirmed ASCII Escape
49
+ // Odd (1 3 5 7) -> Possibly a Unicode code unit or surrogate
50
+ if ((laneIdx & 1) === 0 && (mask & (0xFF << (laneIdx + 1 << 3))) === 0) {
51
+ mask &= ~(0xFFFF << (laneIdx << 3));
52
+ const code = load<u16>(srcIdx);
53
+ const escaped = load<u32>(SERIALIZE_ESCAPE_TABLE + (code << 2));
54
+
55
+ if ((escaped & 0xffff) != BACK_SLASH) {
56
+ bs.growSize(10);
57
+ const dstIdx = bs.offset + laneIdx;
58
+ store<u64>(dstIdx, U00_MARKER);
59
+ store<u32>(dstIdx, escaped, 8);
60
+ store<u64>(dstIdx, load<u64>(srcIdx, 2), 12);
61
+ bs.offset += 10;
62
+ } else {
63
+ bs.growSize(2);
64
+ const dstIdx = bs.offset + laneIdx;
65
+ store<u32>(dstIdx, escaped);
66
+ store<u64>(dstIdx, load<u64>(srcIdx, 2), 4);
67
+ bs.offset += 2;
68
+ }
69
+ continue;
70
+ }
71
+ mask &= ~(0xFFFF << (laneIdx << 3));
72
+
73
+ const code = load<u16>(srcIdx - 1);
74
+ if (code < 0xD800 || code > 0xDFFF) continue;
75
+
76
+ if (code <= 0xDBFF && srcIdx + 2 < srcEnd) {
77
+ const next = load<u16>(srcIdx, 1);
78
+ if (next >= 0xDC00 && next <= 0xDFFF) {
79
+ // paired surrogate
80
+ // mask &= ~(0xFF << ((laneIdx+2) << 3));
81
+ mask &= mask - 1;
82
+ continue;
83
+ }
77
84
  }
78
- }
85
+
86
+ bs.growSize(10);
87
+
88
+ // unpaired high/low surrogate
89
+ const dstIdx = bs.offset + laneIdx - 1;
90
+ store<u32>(dstIdx, U_MARKER); // \u
91
+ store<u64>(dstIdx, load<u64>(changetype<usize>(code.toString(16))), 4);
92
+ store<u64>(dstIdx, load<u64>(srcIdx, 1), 12);
93
+ bs.offset += 10;
94
+ } while (mask !== 0);
79
95
 
80
96
  srcStart += 8;
81
97
  bs.offset += 8;
@@ -83,6 +99,7 @@ export function serializeString_SWAR(src: string): void {
83
99
 
84
100
  while (srcStart <= srcEnd - 2) {
85
101
  const code = load<u16>(srcStart);
102
+
86
103
  if (code == 92 || code == 34 || code < 32) {
87
104
  const escaped = load<u32>(SERIALIZE_ESCAPE_TABLE + (code << 2));
88
105
  if ((escaped & 0xffff) != BACK_SLASH) {
@@ -95,11 +112,33 @@ export function serializeString_SWAR(src: string): void {
95
112
  store<u32>(bs.offset, escaped);
96
113
  bs.offset += 4;
97
114
  }
98
- } else {
115
+ srcStart += 2;
116
+ continue;
117
+ }
118
+
119
+ if (code < 0xD800 || code > 0xDFFF) {
99
120
  store<u16>(bs.offset, code);
100
121
  bs.offset += 2;
122
+ srcStart += 2;
123
+ continue;
101
124
  }
125
+
126
+ if (code <= 0xDBFF && srcStart + 2 <= srcEnd - 2) {
127
+ const next = load<u16>(srcStart, 2);
128
+ if (next >= 0xDC00 && next <= 0xDFFF) {
129
+ // valid surrogate pair
130
+ store<u16>(bs.offset, code);
131
+ store<u16>(bs.offset + 2, next);
132
+ bs.offset += 4;
133
+ srcStart += 4;
134
+ continue;
135
+ }
136
+ }
137
+
138
+ // unpaired high/low surrogate
139
+ write_u_escape(code);
102
140
  srcStart += 2;
141
+ continue;
103
142
  }
104
143
 
105
144
  store<u16>(bs.offset, 34); // "
@@ -108,28 +147,45 @@ export function serializeString_SWAR(src: string): void {
108
147
  if (isDefined(JSON_CACHE)) sc.insertCached(changetype<usize>(src), srcStart, srcSize);
109
148
  }
110
149
 
111
- // @ts-ignore: decorators allowed
112
- @inline function v64x4_should_escape(x: u64): u64 {
113
- // console.log("input: " + mask_to_string(x));
114
- const hi = x & 0xff00_ff00_ff00_ff00;
115
- const lo = x & 0x00ff_00ff_00ff_00ff;
116
- x &= 0x00ff_00ff_00ff_00ff;
117
- // const is_cp = hi & 0x8080_8080_8080_8080;
118
- const is_ascii = 0x0080_0080_0080_0080 & ~x; // lane remains 0x80 if ascii
119
- const lt32 = (x - 0x0020_0020_0020_0020);
120
- const sub34 = x ^ 0x0022_0022_0022_0022;
121
- const eq34 = (sub34 - 0x0001_0001_0001_0001);
122
- const sub92 = x ^ 0x005C_005C_005C_005C;
123
- const eq92 = (sub92 - 0x0001_0001_0001_0001);
124
- // console.log("low: " + mask_to_string(lo));
125
- // console.log("high: " + mask_to_string(hi));
126
- // console.log("is_cp: " + mask_to_string(is_cp));
127
- // console.log("is_ascii: " + mask_to_string(is_ascii));
128
- // console.log("lt32: " + mask_to_string(lt32));
129
- // console.log("sub34: " + mask_to_string(sub34));
130
- // console.log("eq34: " + mask_to_string(eq34));
131
- // console.log("eq92: " + mask_to_string(eq92));
132
- // console.log("pre: " + mask_to_string((lt32 | eq34 | eq92)));
133
- // console.log("out: " + mask_to_string((lt32 | eq34 | eq92) & is_ascii));
134
- return ((lt32 | eq34 | eq92)& is_ascii);
135
- }
150
+ // @ts-expect-error: @inline is a valid decorator
151
+ @inline function write_u_escape(code: u16): void {
152
+ bs.growSize(10);
153
+ store<u32>(bs.offset, U_MARKER); // "\u"
154
+ // write hex digits (lowercase, matches tests)
155
+ store<u16>(bs.offset + 4, hexNibble((code >> 12) & 0xF));
156
+ store<u16>(bs.offset + 6, hexNibble((code >> 8) & 0xF));
157
+ store<u16>(bs.offset + 8, hexNibble((code >> 4) & 0xF));
158
+ store<u16>(bs.offset + 10, hexNibble(code & 0xF));
159
+ bs.offset += 12;
160
+ }
161
+
162
+ // @ts-expect-error: @inline is a valid decorator
163
+ @inline function hexNibble(n: u16): u16 {
164
+ return n < 10 ? (48 + n) : (87 + n);
165
+ }
166
+
167
+ // @ts-expect-error: @inline is a valid decorator
168
+ @inline export function detect_escapable_u64_swar(block: u64): u64 {
169
+ const lo = block & 0x00FF_00FF_00FF_00FF;
170
+ const ascii_mask = (
171
+ ((lo - 0x0020_0020_0020_0020) |
172
+ ((lo ^ 0x0022_0022_0022_0022) - 0x0001_0001_0001_0001) |
173
+ ((lo ^ 0x005C_005C_005C_005C) - 0x0001_0001_0001_0001))
174
+ & (0x0080_0080_0080_0080 & ~lo)
175
+ );
176
+ const hi_mask = ((block - 0x0100_0100_0100_0100) & ~block & 0x8000_8000_8000_8000) ^ 0x8000_8000_8000_8000;
177
+ return (ascii_mask & (~hi_mask >> 8)) | hi_mask;
178
+ }
179
+
180
+ // @ts-expect-error: @inline is a valid decorator
181
+ @inline export function detect_escapable_u64_swar_unsafe(block: u64): u64 {
182
+ const lo = block & 0x00FF_00FF_00FF_00FF;
183
+ const ascii_mask = (
184
+ ((lo - 0x0020_0020_0020_0020) |
185
+ ((lo ^ 0x0022_0022_0022_0022) - 0x0001_0001_0001_0001) |
186
+ ((lo ^ 0x005C_005C_005C_005C) - 0x0001_0001_0001_0001))
187
+ & (0x0080_0080_0080_0080 & ~lo)
188
+ );
189
+ const hi = block & 0xFF00_FF00_FF00_FF00;
190
+ return ascii_mask | hi;
191
+ }