json-as 1.3.6 → 1.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/CHANGELOG.md +45 -0
  2. package/README.md +1 -1
  3. package/assembly/deserialize/helpers/uint.ts +4 -1
  4. package/assembly/deserialize/index/arbitrary.ts +7 -3
  5. package/assembly/deserialize/index/array.ts +42 -17
  6. package/assembly/deserialize/index/bool.ts +1 -1
  7. package/assembly/deserialize/index/date.ts +1 -1
  8. package/assembly/deserialize/index/float.ts +40 -1
  9. package/assembly/deserialize/index/integer.ts +68 -1
  10. package/assembly/deserialize/index/map.ts +1 -1
  11. package/assembly/deserialize/index/object.ts +1 -1
  12. package/assembly/deserialize/index/raw.ts +1 -1
  13. package/assembly/deserialize/index/set.ts +1 -1
  14. package/assembly/deserialize/index/staticarray.ts +4 -1
  15. package/assembly/deserialize/index/string.ts +32 -4
  16. package/assembly/deserialize/index/struct.ts +1 -1
  17. package/assembly/deserialize/index/typedarray.ts +30 -10
  18. package/assembly/deserialize/index/unsigned.ts +78 -1
  19. package/assembly/deserialize/index.ts +1 -0
  20. package/assembly/deserialize/{simple → naive}/array/arbitrary.ts +24 -5
  21. package/assembly/deserialize/{simple → naive}/array/array.ts +8 -2
  22. package/assembly/deserialize/naive/array/bool.ts +68 -0
  23. package/assembly/deserialize/{simple → naive}/array/box.ts +8 -2
  24. package/assembly/deserialize/naive/array/float.ts +63 -0
  25. package/assembly/deserialize/{simple → naive}/array/generic.ts +14 -7
  26. package/assembly/deserialize/naive/array/integer.ts +86 -0
  27. package/assembly/deserialize/naive/array/map.ts +47 -0
  28. package/assembly/deserialize/naive/array/object.ts +47 -0
  29. package/assembly/deserialize/{simple → naive}/array/raw.ts +34 -7
  30. package/assembly/deserialize/naive/array/string.ts +69 -0
  31. package/assembly/deserialize/naive/array/struct.ts +47 -0
  32. package/assembly/deserialize/{simple → naive}/array.ts +15 -10
  33. package/assembly/deserialize/{simple → naive}/bool.ts +6 -2
  34. package/assembly/deserialize/naive/float.ts +135 -0
  35. package/assembly/deserialize/{simple → naive}/integer.ts +10 -2
  36. package/assembly/deserialize/{simple → naive}/map.ts +106 -27
  37. package/assembly/deserialize/{simple → naive}/object.ts +65 -19
  38. package/assembly/deserialize/{simple → naive}/raw.ts +4 -1
  39. package/assembly/deserialize/{simple → naive}/set.ts +49 -19
  40. package/assembly/deserialize/{simple → naive}/staticarray/array.ts +1 -1
  41. package/assembly/deserialize/{simple → naive}/staticarray/bool.ts +1 -1
  42. package/assembly/deserialize/{simple → naive}/staticarray/float.ts +1 -1
  43. package/assembly/deserialize/{simple → naive}/staticarray/integer.ts +1 -1
  44. package/assembly/deserialize/{simple → naive}/staticarray/string.ts +11 -3
  45. package/assembly/deserialize/{simple → naive}/staticarray/struct.ts +1 -2
  46. package/assembly/deserialize/{simple → naive}/staticarray.ts +68 -18
  47. package/assembly/deserialize/naive/string.ts +199 -0
  48. package/assembly/deserialize/{simple → naive}/struct.ts +5 -1
  49. package/assembly/deserialize/{simple → naive}/typedarray.ts +17 -4
  50. package/assembly/deserialize/{simple → naive}/unsigned.ts +10 -15
  51. package/assembly/deserialize/simd/array/integer.ts +339 -62
  52. package/assembly/deserialize/simd/float.ts +303 -0
  53. package/assembly/deserialize/simd/integer.ts +233 -0
  54. package/assembly/deserialize/simd/string.ts +266 -107
  55. package/assembly/deserialize/swar/array/arbitrary.ts +11 -3
  56. package/assembly/deserialize/swar/array/array.ts +40 -9
  57. package/assembly/deserialize/swar/array/bool.ts +28 -5
  58. package/assembly/deserialize/swar/array/box.ts +11 -3
  59. package/assembly/deserialize/swar/array/float.ts +295 -7
  60. package/assembly/deserialize/swar/array/generic.ts +28 -7
  61. package/assembly/deserialize/swar/array/integer.ts +363 -112
  62. package/assembly/deserialize/swar/array/map.ts +11 -3
  63. package/assembly/deserialize/swar/array/object.ts +37 -25
  64. package/assembly/deserialize/swar/array/raw.ts +11 -3
  65. package/assembly/deserialize/swar/array/shared.ts +63 -14
  66. package/assembly/deserialize/swar/array/string.ts +140 -7
  67. package/assembly/deserialize/swar/array/struct.ts +66 -12
  68. package/assembly/deserialize/swar/array.ts +12 -51
  69. package/assembly/deserialize/swar/float.ts +304 -0
  70. package/assembly/deserialize/swar/integer.ts +246 -0
  71. package/assembly/deserialize/swar/string.ts +213 -294
  72. package/assembly/deserialize/swar/typedarray.ts +224 -0
  73. package/assembly/index.d.ts +3 -1
  74. package/assembly/index.ts +402 -261
  75. package/assembly/serialize/index/array.ts +1 -1
  76. package/assembly/serialize/index/bool.ts +1 -1
  77. package/assembly/serialize/index/date.ts +1 -1
  78. package/assembly/serialize/index/float.ts +5 -1
  79. package/assembly/serialize/index/integer.ts +1 -1
  80. package/assembly/serialize/index/map.ts +1 -1
  81. package/assembly/serialize/index/raw.ts +1 -1
  82. package/assembly/serialize/index/set.ts +1 -1
  83. package/assembly/serialize/index/staticarray.ts +1 -1
  84. package/assembly/serialize/index/string.ts +1 -1
  85. package/assembly/serialize/index/struct.ts +1 -1
  86. package/assembly/serialize/index/typedarray.ts +21 -12
  87. package/assembly/serialize/index.ts +1 -0
  88. package/assembly/serialize/naive/array.ts +351 -0
  89. package/assembly/serialize/{simple → naive}/float.ts +4 -1
  90. package/assembly/serialize/naive/integer.ts +19 -0
  91. package/assembly/serialize/{simple → naive}/map.ts +6 -2
  92. package/assembly/serialize/{simple → naive}/raw.ts +5 -1
  93. package/assembly/serialize/{simple → naive}/set.ts +6 -1
  94. package/assembly/serialize/{simple → naive}/staticarray.ts +6 -1
  95. package/assembly/serialize/{simple → naive}/string.ts +1 -2
  96. package/assembly/serialize/{simple → naive}/typedarray.ts +10 -3
  97. package/assembly/serialize/simd/string.ts +6 -2
  98. package/assembly/serialize/swar/string.ts +15 -141
  99. package/assembly/util/atoi-fast.ts +81 -0
  100. package/assembly/util/concat.ts +5 -1
  101. package/assembly/util/dragonbox-cache.ts +443 -2
  102. package/assembly/util/dragonbox.ts +53 -17
  103. package/assembly/util/itoa-fast.ts +241 -0
  104. package/assembly/util/masks.ts +18 -1
  105. package/assembly/util/parsefloat-fast.ts +167 -0
  106. package/assembly/util/scanValueEnd.ts +78 -0
  107. package/assembly/util/scientific.ts +132 -0
  108. package/assembly/util/simd-int.ts +191 -0
  109. package/assembly/util/snp.ts +4 -1
  110. package/assembly/util/swar-int.ts +248 -0
  111. package/assembly/util/swar.ts +13 -3
  112. package/lib/as-bs.ts +27 -6
  113. package/package.json +15 -11
  114. package/transform/lib/builder.d.ts.map +1 -1
  115. package/transform/lib/builder.js +13 -5
  116. package/transform/lib/builder.js.map +1 -1
  117. package/transform/lib/index.d.ts +5 -0
  118. package/transform/lib/index.d.ts.map +1 -1
  119. package/transform/lib/index.js +1046 -340
  120. package/transform/lib/index.js.map +1 -1
  121. package/transform/lib/linkers/alias.d.ts.map +1 -1
  122. package/transform/lib/linkers/alias.js.map +1 -1
  123. package/transform/lib/linkers/custom.d.ts.map +1 -1
  124. package/transform/lib/linkers/custom.js +3 -2
  125. package/transform/lib/linkers/custom.js.map +1 -1
  126. package/transform/lib/linkers/imports.d.ts.map +1 -1
  127. package/transform/lib/linkers/imports.js.map +1 -1
  128. package/transform/lib/types.d.ts.map +1 -1
  129. package/transform/lib/types.js +54 -16
  130. package/transform/lib/types.js.map +1 -1
  131. package/transform/lib/util.d.ts.map +1 -1
  132. package/transform/lib/util.js +1 -1
  133. package/transform/lib/util.js.map +1 -1
  134. package/transform/lib/visitor.d.ts.map +1 -1
  135. package/transform/lib/visitor.js +2 -1
  136. package/transform/lib/visitor.js.map +1 -1
  137. package/assembly/custom/util.ts +0 -310
  138. package/assembly/deserialize/simple/arbitrary.ts +0 -23
  139. package/assembly/deserialize/simple/array/bool.ts +0 -17
  140. package/assembly/deserialize/simple/array/float.ts +0 -28
  141. package/assembly/deserialize/simple/array/integer.ts +0 -27
  142. package/assembly/deserialize/simple/array/map.ts +0 -28
  143. package/assembly/deserialize/simple/array/object.ts +0 -28
  144. package/assembly/deserialize/simple/array/string.ts +0 -23
  145. package/assembly/deserialize/simple/array/struct.ts +0 -28
  146. package/assembly/deserialize/simple/float.ts +0 -201
  147. package/assembly/deserialize/simple/string.ts +0 -132
  148. package/assembly/serialize/simple/arbitrary.ts +0 -79
  149. package/assembly/serialize/simple/array.ts +0 -86
  150. package/assembly/serialize/simple/integer.ts +0 -20
  151. package/assembly/serialize/simple/object.ts +0 -42
  152. /package/assembly/deserialize/{simple → naive}/date.ts +0 -0
  153. /package/assembly/serialize/{simple → naive}/bool.ts +0 -0
  154. /package/assembly/serialize/{simple → naive}/date.ts +0 -0
  155. /package/assembly/serialize/{simple → naive}/struct.ts +0 -0
@@ -0,0 +1,132 @@
1
+ // Direct decimal-to-f64 conversion from a `(u64 mantissa, i32 decimal exp)`
2
+ // pair. Bit-identical to `f64.parse` / `f32.parse` for any input the SWAR
3
+ // float deserializer can produce.
4
+ //
5
+ // Ported from AssemblyScript std's `util/string.ts` (which itself is adapted
6
+ // from the "metallic" library). The reason we duplicate it: AS std exposes
7
+ // `strtod(str)` but the underlying `scientific(mantissa, exp)` helper is
8
+ // module-private. Going through `strtod` requires a string allocation and a
9
+ // re-parse of digits we've already accumulated in the SWAR loop. Calling
10
+ // `scientific` directly skips both costs.
11
+ //
12
+ // scientific() is correctly rounded for all u64 mantissas and decimal
13
+ // exponents that fit in IEEE-754 f64's range — including the [2^53, 2^64)
14
+ // mantissa range that breaks Lemire's single-fmul fast path.
15
+
16
+ const POWERS10: usize = memory.data<f64>([
17
+ 1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14,
18
+ 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22,
19
+ ]);
20
+
21
+ // 5^i for i in [0, 13]. ipow32(5, e) for the exponent ranges scaledown
22
+ // and scaleup actually call it with.
23
+ const POWERS5: usize = memory.data<i32>([
24
+ 1, 5, 25, 125, 625, 3125, 15625, 78125, 390625, 1953125, 9765625, 48828125,
25
+ 244140625, 1220703125,
26
+ ]);
27
+
28
+ // @ts-ignore: inline
29
+ @inline function pow10(n: i32): f64 {
30
+ return load<f64>(POWERS10 + ((<usize>n) << alignof<f64>()));
31
+ }
32
+
33
+ // @ts-ignore: inline
34
+ @inline function pow5_32(n: i32): i32 {
35
+ return load<i32>(POWERS5 + ((<usize>n) << alignof<i32>()));
36
+ }
37
+
38
+ // __fixmulShift is mutated by `fixmul` and read by `scaleup`. AS std uses a
39
+ // module-level @lazy variable for the same reason; matching that.
40
+ // @ts-ignore: lazy decorator
41
+ @lazy let __fixmulShift: u64 = 0;
42
+
43
+ // @ts-ignore: inline
44
+ @inline function fixmul(a: u64, b: u32): u64 {
45
+ const low = (a & 0xffffffff) * b;
46
+ const high = (a >> 32) * b + (low >> 32);
47
+ const overflow = <u32>(high >> 32);
48
+ const space = clz(overflow);
49
+ const revspace: u64 = 32 - space;
50
+ __fixmulShift += revspace;
51
+ return (
52
+ ((high << space) | ((low & 0xffffffff) >> revspace)) +
53
+ (((low << space) >> 31) & 1)
54
+ );
55
+ }
56
+
57
+ // @ts-ignore: inline
58
+ @inline function scaledown(significand: u64, exp: i32): f64 {
59
+ const denom: u64 = 6103515625; // 1e14 * 0x1p-14
60
+ const scale = reinterpret<f64>(0x3f06849b86a12b9b); // 1e-14 * 0x1p32
61
+
62
+ let shift = clz(significand);
63
+ significand <<= shift;
64
+ shift = exp - shift;
65
+
66
+ for (; exp <= -14; exp += 14) {
67
+ const q = significand / denom;
68
+ const r = significand % denom;
69
+ const s = clz(q);
70
+ significand = (q << s) + <u64>nearest(scale * <f64>(r << (s - 18)));
71
+ shift -= s;
72
+ }
73
+ const b = <u64>pow5_32(-exp);
74
+ const q = significand / b;
75
+ const r = significand % b;
76
+ const s = clz(q);
77
+ significand =
78
+ (q << s) +
79
+ <u64>(reinterpret<f64>(reinterpret<u64>(<f64>r) + (s << 52)) / <f64>b);
80
+ shift -= s;
81
+
82
+ return NativeMath.scalbn(<f64>significand, <i32>shift);
83
+ }
84
+
85
+ // @ts-ignore: inline
86
+ @inline function scaleup(significand: u64, exp: i32): f64 {
87
+ const coeff: u32 = 1220703125; // 1e13 * 0x1p-13;
88
+ let shift = ctz(significand);
89
+ significand >>= shift;
90
+ shift += exp;
91
+
92
+ __fixmulShift = shift;
93
+ for (; exp >= 13; exp -= 13) {
94
+ significand = fixmul(significand, coeff);
95
+ }
96
+ significand = fixmul(significand, <u32>pow5_32(exp));
97
+ shift = __fixmulShift;
98
+ return NativeMath.scalbn(<f64>significand, <i32>shift);
99
+ }
100
+
101
+ /**
102
+ * Construct an f64 from a u64 mantissa and decimal exponent. Result is
103
+ * correctly rounded — bit-identical to `f64.parse` for any input the SWAR
104
+ * float deserializer can pre-parse into this form.
105
+ *
106
+ * Caller guarantees the digit run that produced `significand` was already
107
+ * scanned and validated; this function only handles the value computation,
108
+ * not the lexing.
109
+ *
110
+ * @param significand u64 mantissa (any value from 0 to U64.MAX_VALUE)
111
+ * @param exp Decimal exponent (e.g. for "12.34" pass 1234 and -2)
112
+ * @returns The correctly rounded f64, or 0 / Infinity at the extremes.
113
+ */
114
+ // @ts-ignore: inline
115
+ @inline export function scientific(significand: u64, exp: i32): f64 {
116
+ if (!significand || exp < -342) return 0;
117
+ if (exp > 308) return Infinity;
118
+ let significandf = <f64>significand;
119
+ if (!exp) return significandf;
120
+ if (exp > 22 && exp <= 22 + 15) {
121
+ significandf *= pow10(exp - 22);
122
+ exp = 22;
123
+ }
124
+ if (significand <= 9007199254740991 && abs(exp) <= 22) {
125
+ if (exp > 0) return significandf * pow10(exp);
126
+ return significandf / pow10(-exp);
127
+ } else if (exp < 0) {
128
+ return scaledown(significand, exp);
129
+ } else {
130
+ return scaleup(significand, exp);
131
+ }
132
+ }
@@ -0,0 +1,191 @@
1
+ // SIMD (v128) integer-digit parsing kernels over UTF-16 sources.
2
+ //
3
+ // Requires `--enable simd` at compile time. Imported only by the SIMD-mode
4
+ // dispatch paths and dead-code-eliminated when JSON_MODE != SIMD.
5
+ //
6
+ // Algorithm is the Lemire-style narrow-extmul-dot pipeline used by simdjson:
7
+ //
8
+ // 1. `i16x8.sub` subtracts `'0'` from each UTF-16 lane.
9
+ // 2. `i8x16.narrow_i16x8_u` packs two 8-lane u16 vectors into one 16-lane u8
10
+ // vector. This pack is free in SIMD and is the move that makes the SWAR
11
+ // packing problem disappear.
12
+ // 3. `i16x8.extmul_low/high_i8x16_u(packed, (10, 1, ...))` multiplies
13
+ // adjacent bytes by 10 and 1, encoding the first pair-fold step in a
14
+ // vector op.
15
+ // 4. `i32x4.extadd_pairwise_i16x8_u` pairwise-sums adjacent u16 lanes into
16
+ // u32 lanes, completing the first pair-fold.
17
+ // 5. `i16x8.narrow_i32x4_u + i32x4.dot_i16x8_s(_, (100, 1, 100, 1, ...))`
18
+ // folds 4 u32 pair-values into 2 u32 group-values per lane via dot
19
+ // product.
20
+
21
+ // @ts-expect-error: @lazy is a valid decorator
22
+ @lazy const SPLAT_30 = i16x8.splat(0x30);
23
+ // @ts-expect-error: @lazy is a valid decorator
24
+ @lazy const SPLAT_09 = i16x8.splat(9);
25
+ // @ts-expect-error: @lazy is a valid decorator
26
+ @lazy const ZERO_I16X8 = i16x8.splat(0);
27
+ // @ts-expect-error: @lazy is a valid decorator
28
+ @lazy const ZERO_I32X4 = i32x4.splat(0);
29
+
30
+ // Weights for the first pair-fold step (`digit_lo * 10 + digit_hi`).
31
+ // @ts-expect-error: @lazy is a valid decorator
32
+ @lazy const PACK_WEIGHTS_10_1 = i8x16(
33
+ 10,
34
+ 1,
35
+ 10,
36
+ 1,
37
+ 10,
38
+ 1,
39
+ 10,
40
+ 1,
41
+ 0,
42
+ 0,
43
+ 0,
44
+ 0,
45
+ 0,
46
+ 0,
47
+ 0,
48
+ 0,
49
+ );
50
+ // @ts-expect-error: @lazy is a valid decorator
51
+ @lazy const PACK_WEIGHTS_10_1_FULL = i8x16(
52
+ 10,
53
+ 1,
54
+ 10,
55
+ 1,
56
+ 10,
57
+ 1,
58
+ 10,
59
+ 1,
60
+ 10,
61
+ 1,
62
+ 10,
63
+ 1,
64
+ 10,
65
+ 1,
66
+ 10,
67
+ 1,
68
+ );
69
+
70
+ // Weights for the second fold step (`pair_lo * 100 + pair_hi`).
71
+ // @ts-expect-error: @lazy is a valid decorator
72
+ @lazy const PAIR_WEIGHTS_100_1 = i16x8(100, 1, 100, 1, 0, 0, 0, 0);
73
+ // @ts-expect-error: @lazy is a valid decorator
74
+ @lazy const PAIR_WEIGHTS_100_1_FULL = i16x8(100, 1, 100, 1, 100, 1, 100, 1);
75
+
76
+ /**
77
+ * Parse eight UTF-16 ASCII digits (16 source bytes) into the 8-digit `u32`
78
+ * value using SIMD.
79
+ *
80
+ * Returns `U32.MAX_VALUE` on any non-digit lane.
81
+ *
82
+ * @param srcStart Pointer to 16 source bytes (8 UTF-16 chars).
83
+ * @returns The parsed 8-digit value, or `U32.MAX_VALUE` on invalid input.
84
+ */
85
+ // @ts-expect-error: @inline is a valid decorator
86
+ @inline export function parse8Digits_SIMD(srcStart: usize): u32 {
87
+ const block = load<v128>(srcStart);
88
+ const digits = i16x8.sub(block, SPLAT_30);
89
+ if (v128.any_true(i16x8.gt_u(digits, SPLAT_09))) return U32.MAX_VALUE;
90
+ const packed = i8x16.narrow_i16x8_u(digits, ZERO_I16X8);
91
+ const products = i16x8.extmul_low_i8x16_u(packed, PACK_WEIGHTS_10_1);
92
+ const pairs = i32x4.extadd_pairwise_i16x8_u(products);
93
+ const pairs16 = i16x8.narrow_i32x4_u(pairs, ZERO_I32X4);
94
+ const groups = i32x4.dot_i16x8_s(pairs16, PAIR_WEIGHTS_100_1);
95
+ const lo = i32x4.extract_lane(groups, 0);
96
+ const hi = i32x4.extract_lane(groups, 1);
97
+ return <u32>lo * 10_000 + <u32>hi;
98
+ }
99
+
100
+ /**
101
+ * Same as {@link parse8Digits_SIMD} but with the validation step removed.
102
+ * Used in consume-to-end paths.
103
+ *
104
+ * @param srcStart Pointer to 16 source bytes (8 UTF-16 chars).
105
+ * @returns The parsed 8-digit value.
106
+ */
107
+ // @ts-expect-error: @inline is a valid decorator
108
+ @inline export function parse8Digits_SIMD_Unsafe(srcStart: usize): u32 {
109
+ const block = load<v128>(srcStart);
110
+ const digits = i16x8.sub(block, SPLAT_30);
111
+ const packed = i8x16.narrow_i16x8_u(digits, ZERO_I16X8);
112
+ const products = i16x8.extmul_low_i8x16_u(packed, PACK_WEIGHTS_10_1);
113
+ const pairs = i32x4.extadd_pairwise_i16x8_u(products);
114
+ const pairs16 = i16x8.narrow_i32x4_u(pairs, ZERO_I32X4);
115
+ const groups = i32x4.dot_i16x8_s(pairs16, PAIR_WEIGHTS_100_1);
116
+ const lo = i32x4.extract_lane(groups, 0);
117
+ const hi = i32x4.extract_lane(groups, 1);
118
+ return <u32>lo * 10_000 + <u32>hi;
119
+ }
120
+
121
+ /**
122
+ * Parse sixteen UTF-16 ASCII digits (32 source bytes) into one 16-digit
123
+ * `u64` value using SIMD.
124
+ *
125
+ * Two `v128` loads. Combined OR'd validation across both halves means one
126
+ * branch covers all 16 digits. Both halves' `extmul`s feed a single dot
127
+ * product, producing 4 four-digit groups that the final parallel-pair
128
+ * scalar combine merges.
129
+ *
130
+ * Returns `U64.MAX_VALUE` on any non-digit lane.
131
+ *
132
+ * @param srcStart Pointer to 32 source bytes (16 UTF-16 chars).
133
+ * @returns The parsed 16-digit value, or `U64.MAX_VALUE` on invalid input.
134
+ */
135
+ // @ts-expect-error: @inline is a valid decorator
136
+ @inline export function parse16Digits_SIMD(srcStart: usize): u64 {
137
+ const block0 = load<v128>(srcStart);
138
+ const block1 = load<v128>(srcStart, 16);
139
+
140
+ const digits0 = i16x8.sub(block0, SPLAT_30);
141
+ const digits1 = i16x8.sub(block1, SPLAT_30);
142
+
143
+ const bad0 = i16x8.gt_u(digits0, SPLAT_09);
144
+ const bad1 = i16x8.gt_u(digits1, SPLAT_09);
145
+ if (v128.any_true(v128.or(bad0, bad1))) return U64.MAX_VALUE;
146
+
147
+ const packed = i8x16.narrow_i16x8_u(digits0, digits1);
148
+ const products_lo = i16x8.extmul_low_i8x16_u(packed, PACK_WEIGHTS_10_1_FULL);
149
+ const products_hi = i16x8.extmul_high_i8x16_u(packed, PACK_WEIGHTS_10_1_FULL);
150
+ const pairs_lo = i32x4.extadd_pairwise_i16x8_u(products_lo);
151
+ const pairs_hi = i32x4.extadd_pairwise_i16x8_u(products_hi);
152
+ const pairs16 = i16x8.narrow_i32x4_u(pairs_lo, pairs_hi);
153
+ const groups = i32x4.dot_i16x8_s(pairs16, PAIR_WEIGHTS_100_1_FULL);
154
+
155
+ const g0 = i32x4.extract_lane(groups, 0);
156
+ const g1 = i32x4.extract_lane(groups, 1);
157
+ const g2 = i32x4.extract_lane(groups, 2);
158
+ const g3 = i32x4.extract_lane(groups, 3);
159
+ const pair01 = <u64>g0 * 10_000 + <u64>g1;
160
+ const pair23 = <u64>g2 * 10_000 + <u64>g3;
161
+ return pair01 * 100_000_000 + pair23;
162
+ }
163
+
164
+ /**
165
+ * Same as {@link parse16Digits_SIMD} but with the validation step removed.
166
+ * Used in consume-to-end paths.
167
+ *
168
+ * @param srcStart Pointer to 32 source bytes (16 UTF-16 chars).
169
+ * @returns The parsed 16-digit value.
170
+ */
171
+ // @ts-expect-error: @inline is a valid decorator
172
+ @inline export function parse16Digits_SIMD_Unsafe(srcStart: usize): u64 {
173
+ const block0 = load<v128>(srcStart);
174
+ const block1 = load<v128>(srcStart, 16);
175
+ const digits0 = i16x8.sub(block0, SPLAT_30);
176
+ const digits1 = i16x8.sub(block1, SPLAT_30);
177
+ const packed = i8x16.narrow_i16x8_u(digits0, digits1);
178
+ const products_lo = i16x8.extmul_low_i8x16_u(packed, PACK_WEIGHTS_10_1_FULL);
179
+ const products_hi = i16x8.extmul_high_i8x16_u(packed, PACK_WEIGHTS_10_1_FULL);
180
+ const pairs_lo = i32x4.extadd_pairwise_i16x8_u(products_lo);
181
+ const pairs_hi = i32x4.extadd_pairwise_i16x8_u(products_hi);
182
+ const pairs16 = i16x8.narrow_i32x4_u(pairs_lo, pairs_hi);
183
+ const groups = i32x4.dot_i16x8_s(pairs16, PAIR_WEIGHTS_100_1_FULL);
184
+ const g0 = i32x4.extract_lane(groups, 0);
185
+ const g1 = i32x4.extract_lane(groups, 1);
186
+ const g2 = i32x4.extract_lane(groups, 2);
187
+ const g3 = i32x4.extract_lane(groups, 3);
188
+ const pair01 = <u64>g0 * 10_000 + <u64>g1;
189
+ const pair23 = <u64>g2 * 10_000 + <u64>g3;
190
+ return pair01 * 100_000_000 + pair23;
191
+ }
@@ -7,7 +7,10 @@ import { POW_TEN_TABLE_32, POW_TEN_TABLE_64 } from "../globals/tables";
7
7
  import { atoi } from "./atoi";
8
8
 
9
9
  // @ts-ignore: Decorator valid here
10
- @inline export function snp<T extends number>(srcStart: usize, srcEnd: usize): T {
10
+ @inline export function snp<T extends number>(
11
+ srcStart: usize,
12
+ srcEnd: usize,
13
+ ): T {
11
14
  // @ts-ignore: type
12
15
  let val: T = 0;
13
16
  let char = load<u16>(srcStart) - 48;
@@ -0,0 +1,248 @@
1
+ // 4-digit lane masks
2
+ const LANE_LO_4: u64 = 0x00ff_00ff_00ff_00ff;
3
+ const ZERO_4: u64 = 0x0030_0030_0030_0030;
4
+ const RANGE_ADD_4: u64 = 0x0006_0006_0006_0006;
5
+ const RANGE_MASK_4: u64 = 0xfff0_fff0_fff0_fff0;
6
+
7
+ // 32-bit-pair masks used by the pair-multiply fold
8
+ const U32_LO_PAIR: u64 = 0x0000_ffff_0000_ffff;
9
+
10
+ /**
11
+ * Magic multiplier for the 4-digit final combine.
12
+ *
13
+ * With `pairs = (cd << 32) | ab` where `ab` and `cd` are two-digit fold
14
+ * results each in `[0, 99]`, multiplying by this constant places
15
+ * `ab*100 + cd` in the high 32 bits via the u64 multiplication's cross-term.
16
+ * Taking the high 32 yields the 4-digit value `1000a + 100b + 10c + d`.
17
+ */
18
+ const FINAL_4_MAGIC: u64 = 0x0000_0064_0000_0001;
19
+
20
+ /**
21
+ * Parse four UTF-16 ASCII digits in a `u64` into the 4-digit value, using
22
+ * the baseline scalar combine. Kept for reference and head-to-head benches.
23
+ *
24
+ * Returns `U32.MAX_VALUE` on any non-digit lane.
25
+ *
26
+ * @param block Four UTF-16 code units packed into a `u64`.
27
+ * @returns The parsed 4-digit value, or `U32.MAX_VALUE` on invalid input.
28
+ */
29
+ // @ts-expect-error: @inline is a valid decorator
30
+ @inline export function parse4Digits_Baseline(block: u64): u32 {
31
+ const digits = (block & LANE_LO_4) - ZERO_4;
32
+ if (((digits | (digits + RANGE_ADD_4)) & RANGE_MASK_4) != 0) {
33
+ return U32.MAX_VALUE;
34
+ }
35
+ return <u32>(
36
+ (<u32>(digits & 0xffff) * 1000 +
37
+ <u32>((digits >> 16) & 0xffff) * 100 +
38
+ <u32>((digits >> 32) & 0xffff) * 10 +
39
+ <u32>(digits >> 48))
40
+ );
41
+ }
42
+
43
+ /**
44
+ * Parse four UTF-16 ASCII digits into the 4-digit value.
45
+ *
46
+ * Three Lemire-inspired op reductions vs the baseline:
47
+ *
48
+ * 1. Skip the initial `& LANE_LO_4` mask. For valid UTF-16 ASCII the high
49
+ * byte of each lane is already 0, so the AND is redundant. Validation
50
+ * runs before any multiply and rejects every input where dropping the
51
+ * AND would produce inter-lane carry corruption.
52
+ * 2. Mul-then-mask pair fold: apply `digits * 10 + (digits >> 16)` to the
53
+ * whole `u64` and mask after. Saves one AND vs the lane-isolated form.
54
+ * 3. Magic-multiplier final combine: see {@link FINAL_4_MAGIC}.
55
+ *
56
+ * Returns `U32.MAX_VALUE` on any non-digit lane.
57
+ *
58
+ * @param block Four UTF-16 code units packed into a `u64`.
59
+ * @returns The parsed 4-digit value, or `U32.MAX_VALUE` on invalid input.
60
+ */
61
+ // @ts-expect-error: @inline is a valid decorator
62
+ @inline export function parse4Digits_PairMul(block: u64): u32 {
63
+ const digits = block - ZERO_4;
64
+ if (((digits | (digits + RANGE_ADD_4)) & RANGE_MASK_4) != 0) {
65
+ return U32.MAX_VALUE;
66
+ }
67
+ const pairs = (digits * 10 + (digits >> 16)) & U32_LO_PAIR;
68
+ return <u32>((pairs * FINAL_4_MAGIC) >> 32);
69
+ }
70
+
71
+ /**
72
+ * Same as {@link parse4Digits_PairMul} but with the validation step removed.
73
+ * Used in consume-to-end paths where the caller has already bounded the
74
+ * digit range, so per-stride validation isn't needed.
75
+ *
76
+ * @param block Four UTF-16 code units packed into a `u64`.
77
+ * @returns The parsed 4-digit value.
78
+ */
79
+ // @ts-expect-error: @inline is a valid decorator
80
+ @inline export function parse4Digits_PairMul_Unsafe(block: u64): u32 {
81
+ const digits = block - ZERO_4;
82
+ const pairs = (digits * 10 + (digits >> 16)) & U32_LO_PAIR;
83
+ return <u32>((pairs * FINAL_4_MAGIC) >> 32);
84
+ }
85
+
86
+ /**
87
+ * Parse eight UTF-16 ASCII digits across two `u64` blocks into one 8-digit
88
+ * `u32` value.
89
+ *
90
+ * Caller passes two consecutive `u64` loads (16 source bytes). Validates
91
+ * both halves with one combined check, then folds each half via
92
+ * {@link parse4Digits_PairMul} and combines as `lo * 10_000 + hi`.
93
+ *
94
+ * Returns `U32.MAX_VALUE` on any non-digit lane.
95
+ *
96
+ * @param lo The first `u64`, four UTF-16 code units.
97
+ * @param hi The second `u64`, four UTF-16 code units.
98
+ * @returns The parsed 8-digit value, or `U32.MAX_VALUE` on invalid input.
99
+ */
100
+ // @ts-expect-error: @inline is a valid decorator
101
+ @inline export function parse8Digits_PairMul(lo: u64, hi: u64): u32 {
102
+ const loDigits = lo - ZERO_4;
103
+ const hiDigits = hi - ZERO_4;
104
+ const bad =
105
+ (loDigits |
106
+ (loDigits + RANGE_ADD_4) |
107
+ hiDigits |
108
+ (hiDigits + RANGE_ADD_4)) &
109
+ RANGE_MASK_4;
110
+ if (bad != 0) return U32.MAX_VALUE;
111
+
112
+ const loPairs = (loDigits * 10 + (loDigits >> 16)) & U32_LO_PAIR;
113
+ const hiPairs = (hiDigits * 10 + (hiDigits >> 16)) & U32_LO_PAIR;
114
+ const loVal = <u32>((loPairs * FINAL_4_MAGIC) >> 32);
115
+ const hiVal = <u32>((hiPairs * FINAL_4_MAGIC) >> 32);
116
+ return loVal * 10_000 + hiVal;
117
+ }
118
+
119
+ /**
120
+ * Same as {@link parse8Digits_PairMul} but with the validation step removed.
121
+ * Used in consume-to-end paths.
122
+ *
123
+ * @param lo The first `u64`, four UTF-16 code units.
124
+ * @param hi The second `u64`, four UTF-16 code units.
125
+ * @returns The parsed 8-digit value.
126
+ */
127
+ // @ts-expect-error: @inline is a valid decorator
128
+ @inline export function parse8Digits_PairMul_Unsafe(lo: u64, hi: u64): u32 {
129
+ const loDigits = lo - ZERO_4;
130
+ const hiDigits = hi - ZERO_4;
131
+ const loPairs = (loDigits * 10 + (loDigits >> 16)) & U32_LO_PAIR;
132
+ const hiPairs = (hiDigits * 10 + (hiDigits >> 16)) & U32_LO_PAIR;
133
+ const loVal = <u32>((loPairs * FINAL_4_MAGIC) >> 32);
134
+ const hiVal = <u32>((hiPairs * FINAL_4_MAGIC) >> 32);
135
+ return loVal * 10_000 + hiVal;
136
+ }
137
+
138
+ /**
139
+ * Non-digit lane mask for a `u64` holding four UTF-16 code units. Returns a
140
+ * `u64` with bit 7 of each non-digit lane set, or 0 if all four lanes are
141
+ * valid ASCII `'0'..'9'`. Lets a caller find the digit-run boundary in one
142
+ * SWAR step:
143
+ *
144
+ * ```ts
145
+ * const mask = nonDigitMask4(block);
146
+ * if (mask == 0) { /* all valid *\/ }
147
+ * else { const laneIdx = ctz(mask) >> 3; /* first bad byte *\/ }
148
+ * ```
149
+ *
150
+ * @param block Four UTF-16 code units packed into a `u64`.
151
+ * @returns A mask with non-digit lanes flagged in their high bit, or 0.
152
+ */
153
+ // @ts-expect-error: @inline is a valid decorator
154
+ @inline export function nonDigitMask4(block: u64): u64 {
155
+ const digits = (block & LANE_LO_4) - ZERO_4;
156
+ return (digits | (digits + RANGE_ADD_4)) & RANGE_MASK_4;
157
+ }
158
+
159
+ /**
160
+ * Parse sixteen UTF-16 ASCII digits (32 source bytes) into one 16-digit
161
+ * `u64` value.
162
+ *
163
+ * Mirrors the SIMD 16-digit parser's shape using pure SWAR. Four `u64`
164
+ * loads, one combined validation mask, four independent 4-digit folds (each
165
+ * a chance for the engine to issue them in parallel), then a parallel-pair
166
+ * tree combine.
167
+ *
168
+ * Best for long-integer atoi: one branch covers 16 digits, the four folds
169
+ * have no cross-dependencies, and the final combine forms two independent
170
+ * 8-digit values that merge in one mul-add.
171
+ *
172
+ * Returns `U64.MAX_VALUE` on any non-digit lane.
173
+ *
174
+ * @param srcStart Pointer to the start of 32 source bytes (16 UTF-16 chars).
175
+ * @returns The parsed 16-digit value, or `U64.MAX_VALUE` on invalid input.
176
+ */
177
+ // @ts-expect-error: @inline is a valid decorator
178
+ @inline export function parse16Digits_SWAR(srcStart: usize): u64 {
179
+ const b0 = load<u64>(srcStart);
180
+ const b1 = load<u64>(srcStart, 8);
181
+ const b2 = load<u64>(srcStart, 16);
182
+ const b3 = load<u64>(srcStart, 24);
183
+
184
+ const d0 = b0 - ZERO_4;
185
+ const d1 = b1 - ZERO_4;
186
+ const d2 = b2 - ZERO_4;
187
+ const d3 = b3 - ZERO_4;
188
+
189
+ const bad =
190
+ (d0 |
191
+ (d0 + RANGE_ADD_4) |
192
+ d1 |
193
+ (d1 + RANGE_ADD_4) |
194
+ d2 |
195
+ (d2 + RANGE_ADD_4) |
196
+ d3 |
197
+ (d3 + RANGE_ADD_4)) &
198
+ RANGE_MASK_4;
199
+ if (bad != 0) return U64.MAX_VALUE;
200
+
201
+ const p0 = (d0 * 10 + (d0 >> 16)) & U32_LO_PAIR;
202
+ const p1 = (d1 * 10 + (d1 >> 16)) & U32_LO_PAIR;
203
+ const p2 = (d2 * 10 + (d2 >> 16)) & U32_LO_PAIR;
204
+ const p3 = (d3 * 10 + (d3 >> 16)) & U32_LO_PAIR;
205
+
206
+ const v0 = <u32>((p0 * FINAL_4_MAGIC) >> 32);
207
+ const v1 = <u32>((p1 * FINAL_4_MAGIC) >> 32);
208
+ const v2 = <u32>((p2 * FINAL_4_MAGIC) >> 32);
209
+ const v3 = <u32>((p3 * FINAL_4_MAGIC) >> 32);
210
+
211
+ const pair01 = <u64>v0 * 10_000 + <u64>v1;
212
+ const pair23 = <u64>v2 * 10_000 + <u64>v3;
213
+ return pair01 * 100_000_000 + pair23;
214
+ }
215
+
216
+ /**
217
+ * Same as {@link parse16Digits_SWAR} but with the validation step removed.
218
+ * Used in consume-to-end paths.
219
+ *
220
+ * @param srcStart Pointer to the start of 32 source bytes (16 UTF-16 chars).
221
+ * @returns The parsed 16-digit value.
222
+ */
223
+ // @ts-expect-error: @inline is a valid decorator
224
+ @inline export function parse16Digits_SWAR_Unsafe(srcStart: usize): u64 {
225
+ const b0 = load<u64>(srcStart);
226
+ const b1 = load<u64>(srcStart, 8);
227
+ const b2 = load<u64>(srcStart, 16);
228
+ const b3 = load<u64>(srcStart, 24);
229
+
230
+ const d0 = b0 - ZERO_4;
231
+ const d1 = b1 - ZERO_4;
232
+ const d2 = b2 - ZERO_4;
233
+ const d3 = b3 - ZERO_4;
234
+
235
+ const p0 = (d0 * 10 + (d0 >> 16)) & U32_LO_PAIR;
236
+ const p1 = (d1 * 10 + (d1 >> 16)) & U32_LO_PAIR;
237
+ const p2 = (d2 * 10 + (d2 >> 16)) & U32_LO_PAIR;
238
+ const p3 = (d3 * 10 + (d3 >> 16)) & U32_LO_PAIR;
239
+
240
+ const v0 = <u32>((p0 * FINAL_4_MAGIC) >> 32);
241
+ const v1 = <u32>((p1 * FINAL_4_MAGIC) >> 32);
242
+ const v2 = <u32>((p2 * FINAL_4_MAGIC) >> 32);
243
+ const v3 = <u32>((p3 * FINAL_4_MAGIC) >> 32);
244
+
245
+ const pair01 = <u64>v0 * 10_000 + <u64>v1;
246
+ const pair23 = <u64>v2 * 10_000 + <u64>v3;
247
+ return pair01 * 100_000_000 + pair23;
248
+ }
@@ -23,7 +23,12 @@
23
23
  // (c & 0xF) + 9 * (c >> 6)
24
24
  block = (block & 0x0f000f000f000f) + ((block >> 6) & 0x03000300030003) * 9;
25
25
 
26
- return <u16>(((block >> 0) << 12) | ((block >> 16) << 8) | ((block >> 32) << 4) | (block >> 48));
26
+ return <u16>(
27
+ (((block >> 0) << 12) |
28
+ ((block >> 16) << 8) |
29
+ ((block >> 32) << 4) |
30
+ (block >> 48))
31
+ );
27
32
  }
28
33
 
29
34
  /**
@@ -48,9 +53,14 @@
48
53
  */
49
54
  // @ts-expect-error: @inline is a valid decorator
50
55
  @inline export function u16_to_hex4_swar(code: u16): u64 {
51
- let block = (<u64>((code >> 12) & 0xf)) | ((<u64>((code >> 8) & 0xf)) << 16) | ((<u64>((code >> 4) & 0xf)) << 32) | ((<u64>(code & 0xf)) << 48);
56
+ let block =
57
+ (<u64>((code >> 12) & 0xf)) |
58
+ ((<u64>((code >> 8) & 0xf)) << 16) |
59
+ ((<u64>((code >> 4) & 0xf)) << 32) |
60
+ ((<u64>(code & 0xf)) << 48);
52
61
 
53
- const alphaMask = ((block + 0x0006_0006_0006_0006) >> 4) & 0x0001_0001_0001_0001;
62
+ const alphaMask =
63
+ ((block + 0x0006_0006_0006_0006) >> 4) & 0x0001_0001_0001_0001;
54
64
  block += 0x0030_0030_0030_0030 + alphaMask * 39;
55
65
  return block;
56
66
  }
package/lib/as-bs.ts CHANGED
@@ -21,7 +21,7 @@ export namespace bs {
21
21
  export let offset: usize = buffer;
22
22
 
23
23
  /** Byte length of the buffer. */
24
- let bufferSize: usize = MIN_BUFFER_SIZE;
24
+ export let bufferSize: usize = MIN_BUFFER_SIZE;
25
25
 
26
26
  /** Proposed size of output */
27
27
  export let stackSize: usize = 0;
@@ -104,6 +104,19 @@ export namespace bs {
104
104
  pauseStackSizes.length = index;
105
105
  }
106
106
 
107
+ /**
108
+ * Resets the buffer to a clean, empty state. Call this after a throw aborts a
109
+ * serialize/deserialize op mid-flight: a partial run can leave `offset`
110
+ * advanced and the pause stacks non-empty, which would corrupt the next op.
111
+ */
112
+ // @ts-expect-error: @inline is a valid decorator
113
+ @inline export function reset(): void {
114
+ offset = buffer;
115
+ stackSize = 0;
116
+ pauseOffsets.length = 0;
117
+ pauseStackSizes.length = 0;
118
+ }
119
+
107
120
  /**
108
121
  * Proposes that the buffer size is should be greater than or equal to the proposed size.
109
122
  * If necessary, reallocates the buffer to the exact new size.
@@ -180,7 +193,8 @@ export namespace bs {
180
193
  memory.copy(_out, buffer, len);
181
194
  return changetype<T>(_out);
182
195
  } else {
183
- const pauseOffset = buffer + unchecked(pauseOffsets[pauseOffsets.length - 1]);
196
+ const pauseOffset =
197
+ buffer + unchecked(pauseOffsets[pauseOffsets.length - 1]);
184
198
  const len = offset - pauseOffset;
185
199
  // @ts-expect-error: __new is a runtime builtin
186
200
  const _out = __new(len, idof<T>());
@@ -330,15 +344,22 @@ export namespace sc {
330
344
  // @ts-expect-error: JSON_CACHE may not be defined. If so, it will default to false.
331
345
  export const CACHE_ENABLED: bool = isDefined(JSON_CACHE) ? JSON_CACHE : false;
332
346
  // @ts-expect-error: JSON_CACHE_SIZE may not be defined. If so, it will default to 1MB.
333
- export const CACHE_BYTES: usize = isDefined(JSON_CACHE_SIZE) ? <usize>JSON_CACHE_SIZE : 1 << 20;
347
+ export const CACHE_BYTES: usize = isDefined(JSON_CACHE_SIZE)
348
+ ? <usize>JSON_CACHE_SIZE
349
+ : 1 << 20;
334
350
  /** Minimum serialized length to cache - smaller outputs aren't worth caching */
335
351
  export const MIN_CACHE_LEN: usize = 128;
336
352
  /** Size of the circular arena buffer for cached strings */
337
- export const ARENA_SIZE: usize = CACHE_BYTES >= MIN_CACHE_LEN ? CACHE_BYTES : MIN_CACHE_LEN;
353
+ export const ARENA_SIZE: usize =
354
+ CACHE_BYTES >= MIN_CACHE_LEN ? CACHE_BYTES : MIN_CACHE_LEN;
338
355
 
339
356
  /** Number of cache slots (power of 2 for efficient masking). Set to 0 when caching disabled. */
340
- const CACHE_SIZE_BASE: i32 = CACHE_ENABLED ? i32(ARENA_SIZE >> 10 >= 1 ? ARENA_SIZE >> 10 : 1) : 0;
341
- export const CACHE_SIZE: usize = CACHE_ENABLED ? <usize>(1 << (32 - clz<i32>(CACHE_SIZE_BASE - 1))) : 0;
357
+ const CACHE_SIZE_BASE: i32 = CACHE_ENABLED
358
+ ? i32(ARENA_SIZE >> 10 >= 1 ? ARENA_SIZE >> 10 : 1)
359
+ : 0;
360
+ export const CACHE_SIZE: usize = CACHE_ENABLED
361
+ ? <usize>(1 << (32 - clz<i32>(CACHE_SIZE_BASE - 1)))
362
+ : 0;
342
363
  /** Bitmask for fast modulo operation on cache index */
343
364
  export const CACHE_MASK: usize = CACHE_SIZE > 0 ? CACHE_SIZE - 1 : 0;
344
365