json-as 1.3.6 → 1.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +45 -0
- package/README.md +1 -1
- package/assembly/deserialize/helpers/uint.ts +4 -1
- package/assembly/deserialize/index/arbitrary.ts +7 -3
- package/assembly/deserialize/index/array.ts +42 -17
- package/assembly/deserialize/index/bool.ts +1 -1
- package/assembly/deserialize/index/date.ts +1 -1
- package/assembly/deserialize/index/float.ts +40 -1
- package/assembly/deserialize/index/integer.ts +68 -1
- package/assembly/deserialize/index/map.ts +1 -1
- package/assembly/deserialize/index/object.ts +1 -1
- package/assembly/deserialize/index/raw.ts +1 -1
- package/assembly/deserialize/index/set.ts +1 -1
- package/assembly/deserialize/index/staticarray.ts +4 -1
- package/assembly/deserialize/index/string.ts +32 -4
- package/assembly/deserialize/index/struct.ts +1 -1
- package/assembly/deserialize/index/typedarray.ts +30 -10
- package/assembly/deserialize/index/unsigned.ts +78 -1
- package/assembly/deserialize/index.ts +1 -0
- package/assembly/deserialize/{simple → naive}/array/arbitrary.ts +24 -5
- package/assembly/deserialize/{simple → naive}/array/array.ts +8 -2
- package/assembly/deserialize/naive/array/bool.ts +68 -0
- package/assembly/deserialize/{simple → naive}/array/box.ts +8 -2
- package/assembly/deserialize/naive/array/float.ts +63 -0
- package/assembly/deserialize/{simple → naive}/array/generic.ts +14 -7
- package/assembly/deserialize/naive/array/integer.ts +86 -0
- package/assembly/deserialize/naive/array/map.ts +47 -0
- package/assembly/deserialize/naive/array/object.ts +47 -0
- package/assembly/deserialize/{simple → naive}/array/raw.ts +34 -7
- package/assembly/deserialize/naive/array/string.ts +69 -0
- package/assembly/deserialize/naive/array/struct.ts +47 -0
- package/assembly/deserialize/{simple → naive}/array.ts +15 -10
- package/assembly/deserialize/{simple → naive}/bool.ts +6 -2
- package/assembly/deserialize/naive/float.ts +135 -0
- package/assembly/deserialize/{simple → naive}/integer.ts +10 -2
- package/assembly/deserialize/{simple → naive}/map.ts +106 -27
- package/assembly/deserialize/{simple → naive}/object.ts +65 -19
- package/assembly/deserialize/{simple → naive}/raw.ts +4 -1
- package/assembly/deserialize/{simple → naive}/set.ts +49 -19
- package/assembly/deserialize/{simple → naive}/staticarray/array.ts +1 -1
- package/assembly/deserialize/{simple → naive}/staticarray/bool.ts +1 -1
- package/assembly/deserialize/{simple → naive}/staticarray/float.ts +1 -1
- package/assembly/deserialize/{simple → naive}/staticarray/integer.ts +1 -1
- package/assembly/deserialize/{simple → naive}/staticarray/string.ts +11 -3
- package/assembly/deserialize/{simple → naive}/staticarray/struct.ts +1 -2
- package/assembly/deserialize/{simple → naive}/staticarray.ts +68 -18
- package/assembly/deserialize/naive/string.ts +199 -0
- package/assembly/deserialize/{simple → naive}/struct.ts +5 -1
- package/assembly/deserialize/{simple → naive}/typedarray.ts +17 -4
- package/assembly/deserialize/{simple → naive}/unsigned.ts +10 -15
- package/assembly/deserialize/simd/array/integer.ts +339 -62
- package/assembly/deserialize/simd/float.ts +303 -0
- package/assembly/deserialize/simd/integer.ts +233 -0
- package/assembly/deserialize/simd/string.ts +266 -107
- package/assembly/deserialize/swar/array/arbitrary.ts +11 -3
- package/assembly/deserialize/swar/array/array.ts +40 -9
- package/assembly/deserialize/swar/array/bool.ts +28 -5
- package/assembly/deserialize/swar/array/box.ts +11 -3
- package/assembly/deserialize/swar/array/float.ts +295 -7
- package/assembly/deserialize/swar/array/generic.ts +28 -7
- package/assembly/deserialize/swar/array/integer.ts +363 -112
- package/assembly/deserialize/swar/array/map.ts +11 -3
- package/assembly/deserialize/swar/array/object.ts +37 -25
- package/assembly/deserialize/swar/array/raw.ts +11 -3
- package/assembly/deserialize/swar/array/shared.ts +63 -14
- package/assembly/deserialize/swar/array/string.ts +140 -7
- package/assembly/deserialize/swar/array/struct.ts +66 -12
- package/assembly/deserialize/swar/array.ts +12 -51
- package/assembly/deserialize/swar/float.ts +304 -0
- package/assembly/deserialize/swar/integer.ts +246 -0
- package/assembly/deserialize/swar/string.ts +213 -294
- package/assembly/deserialize/swar/typedarray.ts +224 -0
- package/assembly/index.d.ts +3 -1
- package/assembly/index.ts +402 -261
- package/assembly/serialize/index/array.ts +1 -1
- package/assembly/serialize/index/bool.ts +1 -1
- package/assembly/serialize/index/date.ts +1 -1
- package/assembly/serialize/index/float.ts +5 -1
- package/assembly/serialize/index/integer.ts +1 -1
- package/assembly/serialize/index/map.ts +1 -1
- package/assembly/serialize/index/raw.ts +1 -1
- package/assembly/serialize/index/set.ts +1 -1
- package/assembly/serialize/index/staticarray.ts +1 -1
- package/assembly/serialize/index/string.ts +1 -1
- package/assembly/serialize/index/struct.ts +1 -1
- package/assembly/serialize/index/typedarray.ts +21 -12
- package/assembly/serialize/index.ts +1 -0
- package/assembly/serialize/naive/array.ts +351 -0
- package/assembly/serialize/{simple → naive}/float.ts +4 -1
- package/assembly/serialize/naive/integer.ts +19 -0
- package/assembly/serialize/{simple → naive}/map.ts +6 -2
- package/assembly/serialize/{simple → naive}/raw.ts +5 -1
- package/assembly/serialize/{simple → naive}/set.ts +6 -1
- package/assembly/serialize/{simple → naive}/staticarray.ts +6 -1
- package/assembly/serialize/{simple → naive}/string.ts +1 -2
- package/assembly/serialize/{simple → naive}/typedarray.ts +10 -3
- package/assembly/serialize/simd/string.ts +6 -2
- package/assembly/serialize/swar/string.ts +15 -141
- package/assembly/util/atoi-fast.ts +81 -0
- package/assembly/util/concat.ts +5 -1
- package/assembly/util/dragonbox-cache.ts +443 -2
- package/assembly/util/dragonbox.ts +53 -17
- package/assembly/util/itoa-fast.ts +241 -0
- package/assembly/util/masks.ts +18 -1
- package/assembly/util/parsefloat-fast.ts +167 -0
- package/assembly/util/scanValueEnd.ts +78 -0
- package/assembly/util/scientific.ts +132 -0
- package/assembly/util/simd-int.ts +191 -0
- package/assembly/util/snp.ts +4 -1
- package/assembly/util/swar-int.ts +248 -0
- package/assembly/util/swar.ts +13 -3
- package/lib/as-bs.ts +27 -6
- package/package.json +15 -11
- package/transform/lib/builder.d.ts.map +1 -1
- package/transform/lib/builder.js +13 -5
- package/transform/lib/builder.js.map +1 -1
- package/transform/lib/index.d.ts +5 -0
- package/transform/lib/index.d.ts.map +1 -1
- package/transform/lib/index.js +1046 -340
- package/transform/lib/index.js.map +1 -1
- package/transform/lib/linkers/alias.d.ts.map +1 -1
- package/transform/lib/linkers/alias.js.map +1 -1
- package/transform/lib/linkers/custom.d.ts.map +1 -1
- package/transform/lib/linkers/custom.js +3 -2
- package/transform/lib/linkers/custom.js.map +1 -1
- package/transform/lib/linkers/imports.d.ts.map +1 -1
- package/transform/lib/linkers/imports.js.map +1 -1
- package/transform/lib/types.d.ts.map +1 -1
- package/transform/lib/types.js +54 -16
- package/transform/lib/types.js.map +1 -1
- package/transform/lib/util.d.ts.map +1 -1
- package/transform/lib/util.js +1 -1
- package/transform/lib/util.js.map +1 -1
- package/transform/lib/visitor.d.ts.map +1 -1
- package/transform/lib/visitor.js +2 -1
- package/transform/lib/visitor.js.map +1 -1
- package/assembly/custom/util.ts +0 -310
- package/assembly/deserialize/simple/arbitrary.ts +0 -23
- package/assembly/deserialize/simple/array/bool.ts +0 -17
- package/assembly/deserialize/simple/array/float.ts +0 -28
- package/assembly/deserialize/simple/array/integer.ts +0 -27
- package/assembly/deserialize/simple/array/map.ts +0 -28
- package/assembly/deserialize/simple/array/object.ts +0 -28
- package/assembly/deserialize/simple/array/string.ts +0 -23
- package/assembly/deserialize/simple/array/struct.ts +0 -28
- package/assembly/deserialize/simple/float.ts +0 -201
- package/assembly/deserialize/simple/string.ts +0 -132
- package/assembly/serialize/simple/arbitrary.ts +0 -79
- package/assembly/serialize/simple/array.ts +0 -86
- package/assembly/serialize/simple/integer.ts +0 -20
- package/assembly/serialize/simple/object.ts +0 -42
- /package/assembly/deserialize/{simple → naive}/date.ts +0 -0
- /package/assembly/serialize/{simple → naive}/bool.ts +0 -0
- /package/assembly/serialize/{simple → naive}/date.ts +0 -0
- /package/assembly/serialize/{simple → naive}/struct.ts +0 -0
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
// Direct decimal-to-f64 conversion from a `(u64 mantissa, i32 decimal exp)`
|
|
2
|
+
// pair. Bit-identical to `f64.parse` / `f32.parse` for any input the SWAR
|
|
3
|
+
// float deserializer can produce.
|
|
4
|
+
//
|
|
5
|
+
// Ported from AssemblyScript std's `util/string.ts` (which itself is adapted
|
|
6
|
+
// from the "metallic" library). The reason we duplicate it: AS std exposes
|
|
7
|
+
// `strtod(str)` but the underlying `scientific(mantissa, exp)` helper is
|
|
8
|
+
// module-private. Going through `strtod` requires a string allocation and a
|
|
9
|
+
// re-parse of digits we've already accumulated in the SWAR loop. Calling
|
|
10
|
+
// `scientific` directly skips both costs.
|
|
11
|
+
//
|
|
12
|
+
// scientific() is correctly rounded for all u64 mantissas and decimal
|
|
13
|
+
// exponents that fit in IEEE-754 f64's range — including the [2^53, 2^64)
|
|
14
|
+
// mantissa range that breaks Lemire's single-fmul fast path.
|
|
15
|
+
|
|
16
|
+
const POWERS10: usize = memory.data<f64>([
|
|
17
|
+
1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14,
|
|
18
|
+
1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22,
|
|
19
|
+
]);
|
|
20
|
+
|
|
21
|
+
// 5^i for i in [0, 13]. ipow32(5, e) for the exponent ranges scaledown
|
|
22
|
+
// and scaleup actually call it with.
|
|
23
|
+
const POWERS5: usize = memory.data<i32>([
|
|
24
|
+
1, 5, 25, 125, 625, 3125, 15625, 78125, 390625, 1953125, 9765625, 48828125,
|
|
25
|
+
244140625, 1220703125,
|
|
26
|
+
]);
|
|
27
|
+
|
|
28
|
+
// @ts-ignore: inline
|
|
29
|
+
@inline function pow10(n: i32): f64 {
|
|
30
|
+
return load<f64>(POWERS10 + ((<usize>n) << alignof<f64>()));
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// @ts-ignore: inline
|
|
34
|
+
@inline function pow5_32(n: i32): i32 {
|
|
35
|
+
return load<i32>(POWERS5 + ((<usize>n) << alignof<i32>()));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// __fixmulShift is mutated by `fixmul` and read by `scaleup`. AS std uses a
|
|
39
|
+
// module-level @lazy variable for the same reason; matching that.
|
|
40
|
+
// @ts-ignore: lazy decorator
|
|
41
|
+
@lazy let __fixmulShift: u64 = 0;
|
|
42
|
+
|
|
43
|
+
// @ts-ignore: inline
|
|
44
|
+
@inline function fixmul(a: u64, b: u32): u64 {
|
|
45
|
+
const low = (a & 0xffffffff) * b;
|
|
46
|
+
const high = (a >> 32) * b + (low >> 32);
|
|
47
|
+
const overflow = <u32>(high >> 32);
|
|
48
|
+
const space = clz(overflow);
|
|
49
|
+
const revspace: u64 = 32 - space;
|
|
50
|
+
__fixmulShift += revspace;
|
|
51
|
+
return (
|
|
52
|
+
((high << space) | ((low & 0xffffffff) >> revspace)) +
|
|
53
|
+
(((low << space) >> 31) & 1)
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// @ts-ignore: inline
|
|
58
|
+
@inline function scaledown(significand: u64, exp: i32): f64 {
|
|
59
|
+
const denom: u64 = 6103515625; // 1e14 * 0x1p-14
|
|
60
|
+
const scale = reinterpret<f64>(0x3f06849b86a12b9b); // 1e-14 * 0x1p32
|
|
61
|
+
|
|
62
|
+
let shift = clz(significand);
|
|
63
|
+
significand <<= shift;
|
|
64
|
+
shift = exp - shift;
|
|
65
|
+
|
|
66
|
+
for (; exp <= -14; exp += 14) {
|
|
67
|
+
const q = significand / denom;
|
|
68
|
+
const r = significand % denom;
|
|
69
|
+
const s = clz(q);
|
|
70
|
+
significand = (q << s) + <u64>nearest(scale * <f64>(r << (s - 18)));
|
|
71
|
+
shift -= s;
|
|
72
|
+
}
|
|
73
|
+
const b = <u64>pow5_32(-exp);
|
|
74
|
+
const q = significand / b;
|
|
75
|
+
const r = significand % b;
|
|
76
|
+
const s = clz(q);
|
|
77
|
+
significand =
|
|
78
|
+
(q << s) +
|
|
79
|
+
<u64>(reinterpret<f64>(reinterpret<u64>(<f64>r) + (s << 52)) / <f64>b);
|
|
80
|
+
shift -= s;
|
|
81
|
+
|
|
82
|
+
return NativeMath.scalbn(<f64>significand, <i32>shift);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// @ts-ignore: inline
|
|
86
|
+
@inline function scaleup(significand: u64, exp: i32): f64 {
|
|
87
|
+
const coeff: u32 = 1220703125; // 1e13 * 0x1p-13;
|
|
88
|
+
let shift = ctz(significand);
|
|
89
|
+
significand >>= shift;
|
|
90
|
+
shift += exp;
|
|
91
|
+
|
|
92
|
+
__fixmulShift = shift;
|
|
93
|
+
for (; exp >= 13; exp -= 13) {
|
|
94
|
+
significand = fixmul(significand, coeff);
|
|
95
|
+
}
|
|
96
|
+
significand = fixmul(significand, <u32>pow5_32(exp));
|
|
97
|
+
shift = __fixmulShift;
|
|
98
|
+
return NativeMath.scalbn(<f64>significand, <i32>shift);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Construct an f64 from a u64 mantissa and decimal exponent. Result is
|
|
103
|
+
* correctly rounded — bit-identical to `f64.parse` for any input the SWAR
|
|
104
|
+
* float deserializer can pre-parse into this form.
|
|
105
|
+
*
|
|
106
|
+
* Caller guarantees the digit run that produced `significand` was already
|
|
107
|
+
* scanned and validated; this function only handles the value computation,
|
|
108
|
+
* not the lexing.
|
|
109
|
+
*
|
|
110
|
+
* @param significand u64 mantissa (any value from 0 to U64.MAX_VALUE)
|
|
111
|
+
* @param exp Decimal exponent (e.g. for "12.34" pass 1234 and -2)
|
|
112
|
+
* @returns The correctly rounded f64, or 0 / Infinity at the extremes.
|
|
113
|
+
*/
|
|
114
|
+
// @ts-ignore: inline
|
|
115
|
+
@inline export function scientific(significand: u64, exp: i32): f64 {
|
|
116
|
+
if (!significand || exp < -342) return 0;
|
|
117
|
+
if (exp > 308) return Infinity;
|
|
118
|
+
let significandf = <f64>significand;
|
|
119
|
+
if (!exp) return significandf;
|
|
120
|
+
if (exp > 22 && exp <= 22 + 15) {
|
|
121
|
+
significandf *= pow10(exp - 22);
|
|
122
|
+
exp = 22;
|
|
123
|
+
}
|
|
124
|
+
if (significand <= 9007199254740991 && abs(exp) <= 22) {
|
|
125
|
+
if (exp > 0) return significandf * pow10(exp);
|
|
126
|
+
return significandf / pow10(-exp);
|
|
127
|
+
} else if (exp < 0) {
|
|
128
|
+
return scaledown(significand, exp);
|
|
129
|
+
} else {
|
|
130
|
+
return scaleup(significand, exp);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
// SIMD (v128) integer-digit parsing kernels over UTF-16 sources.
|
|
2
|
+
//
|
|
3
|
+
// Requires `--enable simd` at compile time. Imported only by the SIMD-mode
|
|
4
|
+
// dispatch paths and dead-code-eliminated when JSON_MODE != SIMD.
|
|
5
|
+
//
|
|
6
|
+
// Algorithm is the Lemire-style narrow-extmul-dot pipeline used by simdjson:
|
|
7
|
+
//
|
|
8
|
+
// 1. `i16x8.sub` subtracts `'0'` from each UTF-16 lane.
|
|
9
|
+
// 2. `i8x16.narrow_i16x8_u` packs two 8-lane u16 vectors into one 16-lane u8
|
|
10
|
+
// vector. This pack is free in SIMD and is the move that makes the SWAR
|
|
11
|
+
// packing problem disappear.
|
|
12
|
+
// 3. `i16x8.extmul_low/high_i8x16_u(packed, (10, 1, ...))` multiplies
|
|
13
|
+
// adjacent bytes by 10 and 1, encoding the first pair-fold step in a
|
|
14
|
+
// vector op.
|
|
15
|
+
// 4. `i32x4.extadd_pairwise_i16x8_u` pairwise-sums adjacent u16 lanes into
|
|
16
|
+
// u32 lanes, completing the first pair-fold.
|
|
17
|
+
// 5. `i16x8.narrow_i32x4_u + i32x4.dot_i16x8_s(_, (100, 1, 100, 1, ...))`
|
|
18
|
+
// folds 4 u32 pair-values into 2 u32 group-values per lane via dot
|
|
19
|
+
// product.
|
|
20
|
+
|
|
21
|
+
// @ts-expect-error: @lazy is a valid decorator
|
|
22
|
+
@lazy const SPLAT_30 = i16x8.splat(0x30);
|
|
23
|
+
// @ts-expect-error: @lazy is a valid decorator
|
|
24
|
+
@lazy const SPLAT_09 = i16x8.splat(9);
|
|
25
|
+
// @ts-expect-error: @lazy is a valid decorator
|
|
26
|
+
@lazy const ZERO_I16X8 = i16x8.splat(0);
|
|
27
|
+
// @ts-expect-error: @lazy is a valid decorator
|
|
28
|
+
@lazy const ZERO_I32X4 = i32x4.splat(0);
|
|
29
|
+
|
|
30
|
+
// Weights for the first pair-fold step (`digit_lo * 10 + digit_hi`).
|
|
31
|
+
// @ts-expect-error: @lazy is a valid decorator
|
|
32
|
+
@lazy const PACK_WEIGHTS_10_1 = i8x16(
|
|
33
|
+
10,
|
|
34
|
+
1,
|
|
35
|
+
10,
|
|
36
|
+
1,
|
|
37
|
+
10,
|
|
38
|
+
1,
|
|
39
|
+
10,
|
|
40
|
+
1,
|
|
41
|
+
0,
|
|
42
|
+
0,
|
|
43
|
+
0,
|
|
44
|
+
0,
|
|
45
|
+
0,
|
|
46
|
+
0,
|
|
47
|
+
0,
|
|
48
|
+
0,
|
|
49
|
+
);
|
|
50
|
+
// @ts-expect-error: @lazy is a valid decorator
|
|
51
|
+
@lazy const PACK_WEIGHTS_10_1_FULL = i8x16(
|
|
52
|
+
10,
|
|
53
|
+
1,
|
|
54
|
+
10,
|
|
55
|
+
1,
|
|
56
|
+
10,
|
|
57
|
+
1,
|
|
58
|
+
10,
|
|
59
|
+
1,
|
|
60
|
+
10,
|
|
61
|
+
1,
|
|
62
|
+
10,
|
|
63
|
+
1,
|
|
64
|
+
10,
|
|
65
|
+
1,
|
|
66
|
+
10,
|
|
67
|
+
1,
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
// Weights for the second fold step (`pair_lo * 100 + pair_hi`).
|
|
71
|
+
// @ts-expect-error: @lazy is a valid decorator
|
|
72
|
+
@lazy const PAIR_WEIGHTS_100_1 = i16x8(100, 1, 100, 1, 0, 0, 0, 0);
|
|
73
|
+
// @ts-expect-error: @lazy is a valid decorator
|
|
74
|
+
@lazy const PAIR_WEIGHTS_100_1_FULL = i16x8(100, 1, 100, 1, 100, 1, 100, 1);
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Parse eight UTF-16 ASCII digits (16 source bytes) into the 8-digit `u32`
|
|
78
|
+
* value using SIMD.
|
|
79
|
+
*
|
|
80
|
+
* Returns `U32.MAX_VALUE` on any non-digit lane.
|
|
81
|
+
*
|
|
82
|
+
* @param srcStart Pointer to 16 source bytes (8 UTF-16 chars).
|
|
83
|
+
* @returns The parsed 8-digit value, or `U32.MAX_VALUE` on invalid input.
|
|
84
|
+
*/
|
|
85
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
86
|
+
@inline export function parse8Digits_SIMD(srcStart: usize): u32 {
|
|
87
|
+
const block = load<v128>(srcStart);
|
|
88
|
+
const digits = i16x8.sub(block, SPLAT_30);
|
|
89
|
+
if (v128.any_true(i16x8.gt_u(digits, SPLAT_09))) return U32.MAX_VALUE;
|
|
90
|
+
const packed = i8x16.narrow_i16x8_u(digits, ZERO_I16X8);
|
|
91
|
+
const products = i16x8.extmul_low_i8x16_u(packed, PACK_WEIGHTS_10_1);
|
|
92
|
+
const pairs = i32x4.extadd_pairwise_i16x8_u(products);
|
|
93
|
+
const pairs16 = i16x8.narrow_i32x4_u(pairs, ZERO_I32X4);
|
|
94
|
+
const groups = i32x4.dot_i16x8_s(pairs16, PAIR_WEIGHTS_100_1);
|
|
95
|
+
const lo = i32x4.extract_lane(groups, 0);
|
|
96
|
+
const hi = i32x4.extract_lane(groups, 1);
|
|
97
|
+
return <u32>lo * 10_000 + <u32>hi;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Same as {@link parse8Digits_SIMD} but with the validation step removed.
|
|
102
|
+
* Used in consume-to-end paths.
|
|
103
|
+
*
|
|
104
|
+
* @param srcStart Pointer to 16 source bytes (8 UTF-16 chars).
|
|
105
|
+
* @returns The parsed 8-digit value.
|
|
106
|
+
*/
|
|
107
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
108
|
+
@inline export function parse8Digits_SIMD_Unsafe(srcStart: usize): u32 {
|
|
109
|
+
const block = load<v128>(srcStart);
|
|
110
|
+
const digits = i16x8.sub(block, SPLAT_30);
|
|
111
|
+
const packed = i8x16.narrow_i16x8_u(digits, ZERO_I16X8);
|
|
112
|
+
const products = i16x8.extmul_low_i8x16_u(packed, PACK_WEIGHTS_10_1);
|
|
113
|
+
const pairs = i32x4.extadd_pairwise_i16x8_u(products);
|
|
114
|
+
const pairs16 = i16x8.narrow_i32x4_u(pairs, ZERO_I32X4);
|
|
115
|
+
const groups = i32x4.dot_i16x8_s(pairs16, PAIR_WEIGHTS_100_1);
|
|
116
|
+
const lo = i32x4.extract_lane(groups, 0);
|
|
117
|
+
const hi = i32x4.extract_lane(groups, 1);
|
|
118
|
+
return <u32>lo * 10_000 + <u32>hi;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Parse sixteen UTF-16 ASCII digits (32 source bytes) into one 16-digit
|
|
123
|
+
* `u64` value using SIMD.
|
|
124
|
+
*
|
|
125
|
+
* Two `v128` loads. Combined OR'd validation across both halves means one
|
|
126
|
+
* branch covers all 16 digits. Both halves' `extmul`s feed a single dot
|
|
127
|
+
* product, producing 4 four-digit groups that the final parallel-pair
|
|
128
|
+
* scalar combine merges.
|
|
129
|
+
*
|
|
130
|
+
* Returns `U64.MAX_VALUE` on any non-digit lane.
|
|
131
|
+
*
|
|
132
|
+
* @param srcStart Pointer to 32 source bytes (16 UTF-16 chars).
|
|
133
|
+
* @returns The parsed 16-digit value, or `U64.MAX_VALUE` on invalid input.
|
|
134
|
+
*/
|
|
135
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
136
|
+
@inline export function parse16Digits_SIMD(srcStart: usize): u64 {
|
|
137
|
+
const block0 = load<v128>(srcStart);
|
|
138
|
+
const block1 = load<v128>(srcStart, 16);
|
|
139
|
+
|
|
140
|
+
const digits0 = i16x8.sub(block0, SPLAT_30);
|
|
141
|
+
const digits1 = i16x8.sub(block1, SPLAT_30);
|
|
142
|
+
|
|
143
|
+
const bad0 = i16x8.gt_u(digits0, SPLAT_09);
|
|
144
|
+
const bad1 = i16x8.gt_u(digits1, SPLAT_09);
|
|
145
|
+
if (v128.any_true(v128.or(bad0, bad1))) return U64.MAX_VALUE;
|
|
146
|
+
|
|
147
|
+
const packed = i8x16.narrow_i16x8_u(digits0, digits1);
|
|
148
|
+
const products_lo = i16x8.extmul_low_i8x16_u(packed, PACK_WEIGHTS_10_1_FULL);
|
|
149
|
+
const products_hi = i16x8.extmul_high_i8x16_u(packed, PACK_WEIGHTS_10_1_FULL);
|
|
150
|
+
const pairs_lo = i32x4.extadd_pairwise_i16x8_u(products_lo);
|
|
151
|
+
const pairs_hi = i32x4.extadd_pairwise_i16x8_u(products_hi);
|
|
152
|
+
const pairs16 = i16x8.narrow_i32x4_u(pairs_lo, pairs_hi);
|
|
153
|
+
const groups = i32x4.dot_i16x8_s(pairs16, PAIR_WEIGHTS_100_1_FULL);
|
|
154
|
+
|
|
155
|
+
const g0 = i32x4.extract_lane(groups, 0);
|
|
156
|
+
const g1 = i32x4.extract_lane(groups, 1);
|
|
157
|
+
const g2 = i32x4.extract_lane(groups, 2);
|
|
158
|
+
const g3 = i32x4.extract_lane(groups, 3);
|
|
159
|
+
const pair01 = <u64>g0 * 10_000 + <u64>g1;
|
|
160
|
+
const pair23 = <u64>g2 * 10_000 + <u64>g3;
|
|
161
|
+
return pair01 * 100_000_000 + pair23;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Same as {@link parse16Digits_SIMD} but with the validation step removed.
|
|
166
|
+
* Used in consume-to-end paths.
|
|
167
|
+
*
|
|
168
|
+
* @param srcStart Pointer to 32 source bytes (16 UTF-16 chars).
|
|
169
|
+
* @returns The parsed 16-digit value.
|
|
170
|
+
*/
|
|
171
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
172
|
+
@inline export function parse16Digits_SIMD_Unsafe(srcStart: usize): u64 {
|
|
173
|
+
const block0 = load<v128>(srcStart);
|
|
174
|
+
const block1 = load<v128>(srcStart, 16);
|
|
175
|
+
const digits0 = i16x8.sub(block0, SPLAT_30);
|
|
176
|
+
const digits1 = i16x8.sub(block1, SPLAT_30);
|
|
177
|
+
const packed = i8x16.narrow_i16x8_u(digits0, digits1);
|
|
178
|
+
const products_lo = i16x8.extmul_low_i8x16_u(packed, PACK_WEIGHTS_10_1_FULL);
|
|
179
|
+
const products_hi = i16x8.extmul_high_i8x16_u(packed, PACK_WEIGHTS_10_1_FULL);
|
|
180
|
+
const pairs_lo = i32x4.extadd_pairwise_i16x8_u(products_lo);
|
|
181
|
+
const pairs_hi = i32x4.extadd_pairwise_i16x8_u(products_hi);
|
|
182
|
+
const pairs16 = i16x8.narrow_i32x4_u(pairs_lo, pairs_hi);
|
|
183
|
+
const groups = i32x4.dot_i16x8_s(pairs16, PAIR_WEIGHTS_100_1_FULL);
|
|
184
|
+
const g0 = i32x4.extract_lane(groups, 0);
|
|
185
|
+
const g1 = i32x4.extract_lane(groups, 1);
|
|
186
|
+
const g2 = i32x4.extract_lane(groups, 2);
|
|
187
|
+
const g3 = i32x4.extract_lane(groups, 3);
|
|
188
|
+
const pair01 = <u64>g0 * 10_000 + <u64>g1;
|
|
189
|
+
const pair23 = <u64>g2 * 10_000 + <u64>g3;
|
|
190
|
+
return pair01 * 100_000_000 + pair23;
|
|
191
|
+
}
|
package/assembly/util/snp.ts
CHANGED
|
@@ -7,7 +7,10 @@ import { POW_TEN_TABLE_32, POW_TEN_TABLE_64 } from "../globals/tables";
|
|
|
7
7
|
import { atoi } from "./atoi";
|
|
8
8
|
|
|
9
9
|
// @ts-ignore: Decorator valid here
|
|
10
|
-
@inline export function snp<T extends number>(
|
|
10
|
+
@inline export function snp<T extends number>(
|
|
11
|
+
srcStart: usize,
|
|
12
|
+
srcEnd: usize,
|
|
13
|
+
): T {
|
|
11
14
|
// @ts-ignore: type
|
|
12
15
|
let val: T = 0;
|
|
13
16
|
let char = load<u16>(srcStart) - 48;
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
// 4-digit lane masks
|
|
2
|
+
const LANE_LO_4: u64 = 0x00ff_00ff_00ff_00ff;
|
|
3
|
+
const ZERO_4: u64 = 0x0030_0030_0030_0030;
|
|
4
|
+
const RANGE_ADD_4: u64 = 0x0006_0006_0006_0006;
|
|
5
|
+
const RANGE_MASK_4: u64 = 0xfff0_fff0_fff0_fff0;
|
|
6
|
+
|
|
7
|
+
// 32-bit-pair masks used by the pair-multiply fold
|
|
8
|
+
const U32_LO_PAIR: u64 = 0x0000_ffff_0000_ffff;
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Magic multiplier for the 4-digit final combine.
|
|
12
|
+
*
|
|
13
|
+
* With `pairs = (cd << 32) | ab` where `ab` and `cd` are two-digit fold
|
|
14
|
+
* results each in `[0, 99]`, multiplying by this constant places
|
|
15
|
+
* `ab*100 + cd` in the high 32 bits via the u64 multiplication's cross-term.
|
|
16
|
+
* Taking the high 32 yields the 4-digit value `1000a + 100b + 10c + d`.
|
|
17
|
+
*/
|
|
18
|
+
const FINAL_4_MAGIC: u64 = 0x0000_0064_0000_0001;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Parse four UTF-16 ASCII digits in a `u64` into the 4-digit value, using
|
|
22
|
+
* the baseline scalar combine. Kept for reference and head-to-head benches.
|
|
23
|
+
*
|
|
24
|
+
* Returns `U32.MAX_VALUE` on any non-digit lane.
|
|
25
|
+
*
|
|
26
|
+
* @param block Four UTF-16 code units packed into a `u64`.
|
|
27
|
+
* @returns The parsed 4-digit value, or `U32.MAX_VALUE` on invalid input.
|
|
28
|
+
*/
|
|
29
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
30
|
+
@inline export function parse4Digits_Baseline(block: u64): u32 {
|
|
31
|
+
const digits = (block & LANE_LO_4) - ZERO_4;
|
|
32
|
+
if (((digits | (digits + RANGE_ADD_4)) & RANGE_MASK_4) != 0) {
|
|
33
|
+
return U32.MAX_VALUE;
|
|
34
|
+
}
|
|
35
|
+
return <u32>(
|
|
36
|
+
(<u32>(digits & 0xffff) * 1000 +
|
|
37
|
+
<u32>((digits >> 16) & 0xffff) * 100 +
|
|
38
|
+
<u32>((digits >> 32) & 0xffff) * 10 +
|
|
39
|
+
<u32>(digits >> 48))
|
|
40
|
+
);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Parse four UTF-16 ASCII digits into the 4-digit value.
|
|
45
|
+
*
|
|
46
|
+
* Three Lemire-inspired op reductions vs the baseline:
|
|
47
|
+
*
|
|
48
|
+
* 1. Skip the initial `& LANE_LO_4` mask. For valid UTF-16 ASCII the high
|
|
49
|
+
* byte of each lane is already 0, so the AND is redundant. Validation
|
|
50
|
+
* runs before any multiply and rejects every input where dropping the
|
|
51
|
+
* AND would produce inter-lane carry corruption.
|
|
52
|
+
* 2. Mul-then-mask pair fold: apply `digits * 10 + (digits >> 16)` to the
|
|
53
|
+
* whole `u64` and mask after. Saves one AND vs the lane-isolated form.
|
|
54
|
+
* 3. Magic-multiplier final combine: see {@link FINAL_4_MAGIC}.
|
|
55
|
+
*
|
|
56
|
+
* Returns `U32.MAX_VALUE` on any non-digit lane.
|
|
57
|
+
*
|
|
58
|
+
* @param block Four UTF-16 code units packed into a `u64`.
|
|
59
|
+
* @returns The parsed 4-digit value, or `U32.MAX_VALUE` on invalid input.
|
|
60
|
+
*/
|
|
61
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
62
|
+
@inline export function parse4Digits_PairMul(block: u64): u32 {
|
|
63
|
+
const digits = block - ZERO_4;
|
|
64
|
+
if (((digits | (digits + RANGE_ADD_4)) & RANGE_MASK_4) != 0) {
|
|
65
|
+
return U32.MAX_VALUE;
|
|
66
|
+
}
|
|
67
|
+
const pairs = (digits * 10 + (digits >> 16)) & U32_LO_PAIR;
|
|
68
|
+
return <u32>((pairs * FINAL_4_MAGIC) >> 32);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Same as {@link parse4Digits_PairMul} but with the validation step removed.
|
|
73
|
+
* Used in consume-to-end paths where the caller has already bounded the
|
|
74
|
+
* digit range, so per-stride validation isn't needed.
|
|
75
|
+
*
|
|
76
|
+
* @param block Four UTF-16 code units packed into a `u64`.
|
|
77
|
+
* @returns The parsed 4-digit value.
|
|
78
|
+
*/
|
|
79
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
80
|
+
@inline export function parse4Digits_PairMul_Unsafe(block: u64): u32 {
|
|
81
|
+
const digits = block - ZERO_4;
|
|
82
|
+
const pairs = (digits * 10 + (digits >> 16)) & U32_LO_PAIR;
|
|
83
|
+
return <u32>((pairs * FINAL_4_MAGIC) >> 32);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Parse eight UTF-16 ASCII digits across two `u64` blocks into one 8-digit
|
|
88
|
+
* `u32` value.
|
|
89
|
+
*
|
|
90
|
+
* Caller passes two consecutive `u64` loads (16 source bytes). Validates
|
|
91
|
+
* both halves with one combined check, then folds each half via
|
|
92
|
+
* {@link parse4Digits_PairMul} and combines as `lo * 10_000 + hi`.
|
|
93
|
+
*
|
|
94
|
+
* Returns `U32.MAX_VALUE` on any non-digit lane.
|
|
95
|
+
*
|
|
96
|
+
* @param lo The first `u64`, four UTF-16 code units.
|
|
97
|
+
* @param hi The second `u64`, four UTF-16 code units.
|
|
98
|
+
* @returns The parsed 8-digit value, or `U32.MAX_VALUE` on invalid input.
|
|
99
|
+
*/
|
|
100
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
101
|
+
@inline export function parse8Digits_PairMul(lo: u64, hi: u64): u32 {
|
|
102
|
+
const loDigits = lo - ZERO_4;
|
|
103
|
+
const hiDigits = hi - ZERO_4;
|
|
104
|
+
const bad =
|
|
105
|
+
(loDigits |
|
|
106
|
+
(loDigits + RANGE_ADD_4) |
|
|
107
|
+
hiDigits |
|
|
108
|
+
(hiDigits + RANGE_ADD_4)) &
|
|
109
|
+
RANGE_MASK_4;
|
|
110
|
+
if (bad != 0) return U32.MAX_VALUE;
|
|
111
|
+
|
|
112
|
+
const loPairs = (loDigits * 10 + (loDigits >> 16)) & U32_LO_PAIR;
|
|
113
|
+
const hiPairs = (hiDigits * 10 + (hiDigits >> 16)) & U32_LO_PAIR;
|
|
114
|
+
const loVal = <u32>((loPairs * FINAL_4_MAGIC) >> 32);
|
|
115
|
+
const hiVal = <u32>((hiPairs * FINAL_4_MAGIC) >> 32);
|
|
116
|
+
return loVal * 10_000 + hiVal;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Same as {@link parse8Digits_PairMul} but with the validation step removed.
|
|
121
|
+
* Used in consume-to-end paths.
|
|
122
|
+
*
|
|
123
|
+
* @param lo The first `u64`, four UTF-16 code units.
|
|
124
|
+
* @param hi The second `u64`, four UTF-16 code units.
|
|
125
|
+
* @returns The parsed 8-digit value.
|
|
126
|
+
*/
|
|
127
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
128
|
+
@inline export function parse8Digits_PairMul_Unsafe(lo: u64, hi: u64): u32 {
|
|
129
|
+
const loDigits = lo - ZERO_4;
|
|
130
|
+
const hiDigits = hi - ZERO_4;
|
|
131
|
+
const loPairs = (loDigits * 10 + (loDigits >> 16)) & U32_LO_PAIR;
|
|
132
|
+
const hiPairs = (hiDigits * 10 + (hiDigits >> 16)) & U32_LO_PAIR;
|
|
133
|
+
const loVal = <u32>((loPairs * FINAL_4_MAGIC) >> 32);
|
|
134
|
+
const hiVal = <u32>((hiPairs * FINAL_4_MAGIC) >> 32);
|
|
135
|
+
return loVal * 10_000 + hiVal;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Non-digit lane mask for a `u64` holding four UTF-16 code units. Returns a
|
|
140
|
+
* `u64` with bit 7 of each non-digit lane set, or 0 if all four lanes are
|
|
141
|
+
* valid ASCII `'0'..'9'`. Lets a caller find the digit-run boundary in one
|
|
142
|
+
* SWAR step:
|
|
143
|
+
*
|
|
144
|
+
* ```ts
|
|
145
|
+
* const mask = nonDigitMask4(block);
|
|
146
|
+
* if (mask == 0) { /* all valid *\/ }
|
|
147
|
+
* else { const laneIdx = ctz(mask) >> 3; /* first bad byte *\/ }
|
|
148
|
+
* ```
|
|
149
|
+
*
|
|
150
|
+
* @param block Four UTF-16 code units packed into a `u64`.
|
|
151
|
+
* @returns A mask with non-digit lanes flagged in their high bit, or 0.
|
|
152
|
+
*/
|
|
153
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
154
|
+
@inline export function nonDigitMask4(block: u64): u64 {
|
|
155
|
+
const digits = (block & LANE_LO_4) - ZERO_4;
|
|
156
|
+
return (digits | (digits + RANGE_ADD_4)) & RANGE_MASK_4;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Parse sixteen UTF-16 ASCII digits (32 source bytes) into one 16-digit
|
|
161
|
+
* `u64` value.
|
|
162
|
+
*
|
|
163
|
+
* Mirrors the SIMD 16-digit parser's shape using pure SWAR. Four `u64`
|
|
164
|
+
* loads, one combined validation mask, four independent 4-digit folds (each
|
|
165
|
+
* a chance for the engine to issue them in parallel), then a parallel-pair
|
|
166
|
+
* tree combine.
|
|
167
|
+
*
|
|
168
|
+
* Best for long-integer atoi: one branch covers 16 digits, the four folds
|
|
169
|
+
* have no cross-dependencies, and the final combine forms two independent
|
|
170
|
+
* 8-digit values that merge in one mul-add.
|
|
171
|
+
*
|
|
172
|
+
* Returns `U64.MAX_VALUE` on any non-digit lane.
|
|
173
|
+
*
|
|
174
|
+
* @param srcStart Pointer to the start of 32 source bytes (16 UTF-16 chars).
|
|
175
|
+
* @returns The parsed 16-digit value, or `U64.MAX_VALUE` on invalid input.
|
|
176
|
+
*/
|
|
177
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
178
|
+
@inline export function parse16Digits_SWAR(srcStart: usize): u64 {
|
|
179
|
+
const b0 = load<u64>(srcStart);
|
|
180
|
+
const b1 = load<u64>(srcStart, 8);
|
|
181
|
+
const b2 = load<u64>(srcStart, 16);
|
|
182
|
+
const b3 = load<u64>(srcStart, 24);
|
|
183
|
+
|
|
184
|
+
const d0 = b0 - ZERO_4;
|
|
185
|
+
const d1 = b1 - ZERO_4;
|
|
186
|
+
const d2 = b2 - ZERO_4;
|
|
187
|
+
const d3 = b3 - ZERO_4;
|
|
188
|
+
|
|
189
|
+
const bad =
|
|
190
|
+
(d0 |
|
|
191
|
+
(d0 + RANGE_ADD_4) |
|
|
192
|
+
d1 |
|
|
193
|
+
(d1 + RANGE_ADD_4) |
|
|
194
|
+
d2 |
|
|
195
|
+
(d2 + RANGE_ADD_4) |
|
|
196
|
+
d3 |
|
|
197
|
+
(d3 + RANGE_ADD_4)) &
|
|
198
|
+
RANGE_MASK_4;
|
|
199
|
+
if (bad != 0) return U64.MAX_VALUE;
|
|
200
|
+
|
|
201
|
+
const p0 = (d0 * 10 + (d0 >> 16)) & U32_LO_PAIR;
|
|
202
|
+
const p1 = (d1 * 10 + (d1 >> 16)) & U32_LO_PAIR;
|
|
203
|
+
const p2 = (d2 * 10 + (d2 >> 16)) & U32_LO_PAIR;
|
|
204
|
+
const p3 = (d3 * 10 + (d3 >> 16)) & U32_LO_PAIR;
|
|
205
|
+
|
|
206
|
+
const v0 = <u32>((p0 * FINAL_4_MAGIC) >> 32);
|
|
207
|
+
const v1 = <u32>((p1 * FINAL_4_MAGIC) >> 32);
|
|
208
|
+
const v2 = <u32>((p2 * FINAL_4_MAGIC) >> 32);
|
|
209
|
+
const v3 = <u32>((p3 * FINAL_4_MAGIC) >> 32);
|
|
210
|
+
|
|
211
|
+
const pair01 = <u64>v0 * 10_000 + <u64>v1;
|
|
212
|
+
const pair23 = <u64>v2 * 10_000 + <u64>v3;
|
|
213
|
+
return pair01 * 100_000_000 + pair23;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Same as {@link parse16Digits_SWAR} but with the validation step removed.
|
|
218
|
+
* Used in consume-to-end paths.
|
|
219
|
+
*
|
|
220
|
+
* @param srcStart Pointer to the start of 32 source bytes (16 UTF-16 chars).
|
|
221
|
+
* @returns The parsed 16-digit value.
|
|
222
|
+
*/
|
|
223
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
224
|
+
@inline export function parse16Digits_SWAR_Unsafe(srcStart: usize): u64 {
|
|
225
|
+
const b0 = load<u64>(srcStart);
|
|
226
|
+
const b1 = load<u64>(srcStart, 8);
|
|
227
|
+
const b2 = load<u64>(srcStart, 16);
|
|
228
|
+
const b3 = load<u64>(srcStart, 24);
|
|
229
|
+
|
|
230
|
+
const d0 = b0 - ZERO_4;
|
|
231
|
+
const d1 = b1 - ZERO_4;
|
|
232
|
+
const d2 = b2 - ZERO_4;
|
|
233
|
+
const d3 = b3 - ZERO_4;
|
|
234
|
+
|
|
235
|
+
const p0 = (d0 * 10 + (d0 >> 16)) & U32_LO_PAIR;
|
|
236
|
+
const p1 = (d1 * 10 + (d1 >> 16)) & U32_LO_PAIR;
|
|
237
|
+
const p2 = (d2 * 10 + (d2 >> 16)) & U32_LO_PAIR;
|
|
238
|
+
const p3 = (d3 * 10 + (d3 >> 16)) & U32_LO_PAIR;
|
|
239
|
+
|
|
240
|
+
const v0 = <u32>((p0 * FINAL_4_MAGIC) >> 32);
|
|
241
|
+
const v1 = <u32>((p1 * FINAL_4_MAGIC) >> 32);
|
|
242
|
+
const v2 = <u32>((p2 * FINAL_4_MAGIC) >> 32);
|
|
243
|
+
const v3 = <u32>((p3 * FINAL_4_MAGIC) >> 32);
|
|
244
|
+
|
|
245
|
+
const pair01 = <u64>v0 * 10_000 + <u64>v1;
|
|
246
|
+
const pair23 = <u64>v2 * 10_000 + <u64>v3;
|
|
247
|
+
return pair01 * 100_000_000 + pair23;
|
|
248
|
+
}
|
package/assembly/util/swar.ts
CHANGED
|
@@ -23,7 +23,12 @@
|
|
|
23
23
|
// (c & 0xF) + 9 * (c >> 6)
|
|
24
24
|
block = (block & 0x0f000f000f000f) + ((block >> 6) & 0x03000300030003) * 9;
|
|
25
25
|
|
|
26
|
-
return <u16>(
|
|
26
|
+
return <u16>(
|
|
27
|
+
(((block >> 0) << 12) |
|
|
28
|
+
((block >> 16) << 8) |
|
|
29
|
+
((block >> 32) << 4) |
|
|
30
|
+
(block >> 48))
|
|
31
|
+
);
|
|
27
32
|
}
|
|
28
33
|
|
|
29
34
|
/**
|
|
@@ -48,9 +53,14 @@
|
|
|
48
53
|
*/
|
|
49
54
|
// @ts-expect-error: @inline is a valid decorator
|
|
50
55
|
@inline export function u16_to_hex4_swar(code: u16): u64 {
|
|
51
|
-
let block =
|
|
56
|
+
let block =
|
|
57
|
+
(<u64>((code >> 12) & 0xf)) |
|
|
58
|
+
((<u64>((code >> 8) & 0xf)) << 16) |
|
|
59
|
+
((<u64>((code >> 4) & 0xf)) << 32) |
|
|
60
|
+
((<u64>(code & 0xf)) << 48);
|
|
52
61
|
|
|
53
|
-
const alphaMask =
|
|
62
|
+
const alphaMask =
|
|
63
|
+
((block + 0x0006_0006_0006_0006) >> 4) & 0x0001_0001_0001_0001;
|
|
54
64
|
block += 0x0030_0030_0030_0030 + alphaMask * 39;
|
|
55
65
|
return block;
|
|
56
66
|
}
|
package/lib/as-bs.ts
CHANGED
|
@@ -21,7 +21,7 @@ export namespace bs {
|
|
|
21
21
|
export let offset: usize = buffer;
|
|
22
22
|
|
|
23
23
|
/** Byte length of the buffer. */
|
|
24
|
-
let bufferSize: usize = MIN_BUFFER_SIZE;
|
|
24
|
+
export let bufferSize: usize = MIN_BUFFER_SIZE;
|
|
25
25
|
|
|
26
26
|
/** Proposed size of output */
|
|
27
27
|
export let stackSize: usize = 0;
|
|
@@ -104,6 +104,19 @@ export namespace bs {
|
|
|
104
104
|
pauseStackSizes.length = index;
|
|
105
105
|
}
|
|
106
106
|
|
|
107
|
+
/**
|
|
108
|
+
* Resets the buffer to a clean, empty state. Call this after a throw aborts a
|
|
109
|
+
* serialize/deserialize op mid-flight: a partial run can leave `offset`
|
|
110
|
+
* advanced and the pause stacks non-empty, which would corrupt the next op.
|
|
111
|
+
*/
|
|
112
|
+
// @ts-expect-error: @inline is a valid decorator
|
|
113
|
+
@inline export function reset(): void {
|
|
114
|
+
offset = buffer;
|
|
115
|
+
stackSize = 0;
|
|
116
|
+
pauseOffsets.length = 0;
|
|
117
|
+
pauseStackSizes.length = 0;
|
|
118
|
+
}
|
|
119
|
+
|
|
107
120
|
/**
|
|
108
121
|
* Proposes that the buffer size is should be greater than or equal to the proposed size.
|
|
109
122
|
* If necessary, reallocates the buffer to the exact new size.
|
|
@@ -180,7 +193,8 @@ export namespace bs {
|
|
|
180
193
|
memory.copy(_out, buffer, len);
|
|
181
194
|
return changetype<T>(_out);
|
|
182
195
|
} else {
|
|
183
|
-
const pauseOffset =
|
|
196
|
+
const pauseOffset =
|
|
197
|
+
buffer + unchecked(pauseOffsets[pauseOffsets.length - 1]);
|
|
184
198
|
const len = offset - pauseOffset;
|
|
185
199
|
// @ts-expect-error: __new is a runtime builtin
|
|
186
200
|
const _out = __new(len, idof<T>());
|
|
@@ -330,15 +344,22 @@ export namespace sc {
|
|
|
330
344
|
// @ts-expect-error: JSON_CACHE may not be defined. If so, it will default to false.
|
|
331
345
|
export const CACHE_ENABLED: bool = isDefined(JSON_CACHE) ? JSON_CACHE : false;
|
|
332
346
|
// @ts-expect-error: JSON_CACHE_SIZE may not be defined. If so, it will default to 1MB.
|
|
333
|
-
export const CACHE_BYTES: usize = isDefined(JSON_CACHE_SIZE)
|
|
347
|
+
export const CACHE_BYTES: usize = isDefined(JSON_CACHE_SIZE)
|
|
348
|
+
? <usize>JSON_CACHE_SIZE
|
|
349
|
+
: 1 << 20;
|
|
334
350
|
/** Minimum serialized length to cache - smaller outputs aren't worth caching */
|
|
335
351
|
export const MIN_CACHE_LEN: usize = 128;
|
|
336
352
|
/** Size of the circular arena buffer for cached strings */
|
|
337
|
-
export const ARENA_SIZE: usize =
|
|
353
|
+
export const ARENA_SIZE: usize =
|
|
354
|
+
CACHE_BYTES >= MIN_CACHE_LEN ? CACHE_BYTES : MIN_CACHE_LEN;
|
|
338
355
|
|
|
339
356
|
/** Number of cache slots (power of 2 for efficient masking). Set to 0 when caching disabled. */
|
|
340
|
-
const CACHE_SIZE_BASE: i32 = CACHE_ENABLED
|
|
341
|
-
|
|
357
|
+
const CACHE_SIZE_BASE: i32 = CACHE_ENABLED
|
|
358
|
+
? i32(ARENA_SIZE >> 10 >= 1 ? ARENA_SIZE >> 10 : 1)
|
|
359
|
+
: 0;
|
|
360
|
+
export const CACHE_SIZE: usize = CACHE_ENABLED
|
|
361
|
+
? <usize>(1 << (32 - clz<i32>(CACHE_SIZE_BASE - 1)))
|
|
362
|
+
: 0;
|
|
342
363
|
/** Bitmask for fast modulo operation on cache index */
|
|
343
364
|
export const CACHE_MASK: usize = CACHE_SIZE > 0 ? CACHE_SIZE - 1 : 0;
|
|
344
365
|
|