numtypes 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/LICENSE +12 -0
- package/LICENSE-APACHE +201 -0
- package/LICENSE-MIT +21 -0
- package/README.md +652 -0
- package/dist/lib/index.d.ts +22 -0
- package/dist/lib/index.d.ts.map +1 -0
- package/dist/lib/index.js +2 -0
- package/dist/lib/index.js.map +1 -0
- package/dist/transformer/analyze/analyze-source-file.d.ts +15 -0
- package/dist/transformer/analyze/analyze-source-file.d.ts.map +1 -0
- package/dist/transformer/analyze/analyze-source-file.js +605 -0
- package/dist/transformer/analyze/analyze-source-file.js.map +1 -0
- package/dist/transformer/analyze/get-contextual-domain.d.ts +19 -0
- package/dist/transformer/analyze/get-contextual-domain.d.ts.map +1 -0
- package/dist/transformer/analyze/get-contextual-domain.js +197 -0
- package/dist/transformer/analyze/get-contextual-domain.js.map +1 -0
- package/dist/transformer/analyze/get-expression-domain.d.ts +26 -0
- package/dist/transformer/analyze/get-expression-domain.d.ts.map +1 -0
- package/dist/transformer/analyze/get-expression-domain.js +804 -0
- package/dist/transformer/analyze/get-expression-domain.js.map +1 -0
- package/dist/transformer/analyze/type-domain.d.ts +41 -0
- package/dist/transformer/analyze/type-domain.d.ts.map +1 -0
- package/dist/transformer/analyze/type-domain.js +260 -0
- package/dist/transformer/analyze/type-domain.js.map +1 -0
- package/dist/transformer/ast.d.ts +10 -0
- package/dist/transformer/ast.d.ts.map +1 -0
- package/dist/transformer/ast.js +115 -0
- package/dist/transformer/ast.js.map +1 -0
- package/dist/transformer/diagnostics.d.ts +17 -0
- package/dist/transformer/diagnostics.d.ts.map +1 -0
- package/dist/transformer/diagnostics.js +30 -0
- package/dist/transformer/diagnostics.js.map +1 -0
- package/dist/transformer/domains.d.ts +11 -0
- package/dist/transformer/domains.d.ts.map +1 -0
- package/dist/transformer/domains.js +32 -0
- package/dist/transformer/domains.js.map +1 -0
- package/dist/transformer/index.d.ts +10 -0
- package/dist/transformer/index.d.ts.map +1 -0
- package/dist/transformer/index.js +60 -0
- package/dist/transformer/index.js.map +1 -0
- package/dist/transformer/operators.d.ts +16 -0
- package/dist/transformer/operators.d.ts.map +1 -0
- package/dist/transformer/operators.js +44 -0
- package/dist/transformer/operators.js.map +1 -0
- package/dist/transformer/options.d.ts +19 -0
- package/dist/transformer/options.d.ts.map +1 -0
- package/dist/transformer/options.js +17 -0
- package/dist/transformer/options.js.map +1 -0
- package/dist/transformer/symbols.d.ts +56 -0
- package/dist/transformer/symbols.d.ts.map +1 -0
- package/dist/transformer/symbols.js +270 -0
- package/dist/transformer/symbols.js.map +1 -0
- package/dist/transformer/transform/erase-imports.d.ts +14 -0
- package/dist/transformer/transform/erase-imports.d.ts.map +1 -0
- package/dist/transformer/transform/erase-imports.js +174 -0
- package/dist/transformer/transform/erase-imports.js.map +1 -0
- package/dist/transformer/transform/generated-coercions.d.ts +9 -0
- package/dist/transformer/transform/generated-coercions.d.ts.map +1 -0
- package/dist/transformer/transform/generated-coercions.js +22 -0
- package/dist/transformer/transform/generated-coercions.js.map +1 -0
- package/dist/transformer/transform/optimize-coercions.d.ts +11 -0
- package/dist/transformer/transform/optimize-coercions.d.ts.map +1 -0
- package/dist/transformer/transform/optimize-coercions.js +1702 -0
- package/dist/transformer/transform/optimize-coercions.js.map +1 -0
- package/dist/transformer/transform/transform-declaration-file.d.ts +9 -0
- package/dist/transformer/transform/transform-declaration-file.d.ts.map +1 -0
- package/dist/transformer/transform/transform-declaration-file.js +376 -0
- package/dist/transformer/transform/transform-declaration-file.js.map +1 -0
- package/dist/transformer/transform/transform-expression.d.ts +24 -0
- package/dist/transformer/transform/transform-expression.d.ts.map +1 -0
- package/dist/transformer/transform/transform-expression.js +545 -0
- package/dist/transformer/transform/transform-expression.js.map +1 -0
- package/dist/transformer/transform/transform-source-file.d.ts +10 -0
- package/dist/transformer/transform/transform-source-file.d.ts.map +1 -0
- package/dist/transformer/transform/transform-source-file.js +52 -0
- package/dist/transformer/transform/transform-source-file.js.map +1 -0
- package/dist/transformer/ts-compat.d.ts +4 -0
- package/dist/transformer/ts-compat.d.ts.map +1 -0
- package/dist/transformer/ts-compat.js +24 -0
- package/dist/transformer/ts-compat.js.map +1 -0
- package/docs/implementation-plan.md +335 -0
- package/docs/lib-implementation.md +77 -0
- package/docs/lowering-optimization-spec.md +1020 -0
- package/docs/project-structure.md +52 -0
- package/docs/transform-spec.md +2114 -0
- package/package.json +83 -0
package/README.md
ADDED
|
@@ -0,0 +1,652 @@
|
|
|
1
|
+
# numtypes
|
|
2
|
+
|
|
3
|
+
`numtypes` is a zero-runtime TypeScript numeric-domain transformer.
|
|
4
|
+
|
|
5
|
+
User code imports a small set of phantom symbols, writes ordinary TypeScript
|
|
6
|
+
with branded numeric types, and the transformer erases those symbols into the
|
|
7
|
+
JavaScript coercion idioms engines already understand:
|
|
8
|
+
|
|
9
|
+
| Type | Runtime closure shape | Intended domain |
|
|
10
|
+
| --- | --- | --- |
|
|
11
|
+
| `i32` | `x \| 0`, `Math.imul(a, b)` | signed int32 |
|
|
12
|
+
| `u32` | `x >>> 0`, `Math.imul(a, b) >>> 0` | unsigned int32 |
|
|
13
|
+
| `f32` | `Math.fround(x)` | float32 value rounded back to JS `number` |
|
|
14
|
+
| `f64` | `+x` | JavaScript binary64 `number` |
|
|
15
|
+
|
|
16
|
+
The imported functions are compile-time markers, not runtime utilities. A
|
|
17
|
+
successful build should not keep calls to `i32`, `u32`, `f32`, or `f64` in the
|
|
18
|
+
emitted JavaScript unless you intentionally preserve phantom imports for
|
|
19
|
+
debugging.
|
|
20
|
+
|
|
21
|
+
## Why This Exists
|
|
22
|
+
|
|
23
|
+
JavaScript exposes one ordinary numeric value type, `number`, but engines have
|
|
24
|
+
many internal numeric representations. Hot code may be optimized as small
|
|
25
|
+
integers, int32, uint32, float64, or sometimes float32 when the source shape is
|
|
26
|
+
clear enough. Historically, hand-written JavaScript used coercion idioms such as
|
|
27
|
+
`| 0`, `>>> 0`, `Math.imul`, and `Math.fround` to make those shapes visible to
|
|
28
|
+
JIT compilers.
|
|
29
|
+
|
|
30
|
+
`numtypes` moves that idiom into TypeScript:
|
|
31
|
+
|
|
32
|
+
- TypeScript gets branded domain names (`i32`, `u32`, `f32`, `f64`).
|
|
33
|
+
- The transformer inserts the required coercions at arithmetic and storage
|
|
34
|
+
boundaries.
|
|
35
|
+
- Runtime JavaScript stays dependency-free.
|
|
36
|
+
- The generated code is benchmarked against plain JavaScript on current engines.
|
|
37
|
+
|
|
38
|
+
This is not a replacement for WebAssembly or SIMD. It is a source-to-source
|
|
39
|
+
lowering pass for JavaScript projects that want explicit numeric domains without
|
|
40
|
+
manually writing coercions after every operation.
|
|
41
|
+
|
|
42
|
+
## Status
|
|
43
|
+
|
|
44
|
+
The project is still early. The transformer is the main artifact; the public
|
|
45
|
+
runtime package exists mostly so TypeScript source can name the marker symbols.
|
|
46
|
+
|
|
47
|
+
Expect sharp semantics:
|
|
48
|
+
|
|
49
|
+
- arithmetic over branded values must stay inside a branded domain, or escape
|
|
50
|
+
explicitly with `as number`, `as unknown`, or `as any`;
|
|
51
|
+
- mixed domains such as `i32 + f32` are rejected unless the user casts one side;
|
|
52
|
+
- `as i32` is unchecked and emits no coercion by itself;
|
|
53
|
+
- `i32(x)` is checked and emits `x | 0`;
|
|
54
|
+
- declaration output can preserve branded types or erase them to `number`.
|
|
55
|
+
|
|
56
|
+
See [docs/transform-spec.md](docs/transform-spec.md) for the full transform
|
|
57
|
+
rules and [docs/lowering-optimization-spec.md](docs/lowering-optimization-spec.md)
|
|
58
|
+
for the coercion optimization pass.
|
|
59
|
+
|
|
60
|
+
## Install
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
npm install numtypes
|
|
64
|
+
npm install --save-dev typescript ts-patch
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
`numtypes` has a peer dependency on TypeScript. It also requires a build
|
|
68
|
+
pipeline that can install TypeScript custom transformers. The recommended path
|
|
69
|
+
is `ts-patch`, configured through `compilerOptions.plugins` and run with `tspc`.
|
|
70
|
+
The stock `tsc` CLI does not load third-party transformers from `tsconfig.json`
|
|
71
|
+
unless TypeScript has been patched.
|
|
72
|
+
|
|
73
|
+
## Quick Example
|
|
74
|
+
|
|
75
|
+
```typescript
|
|
76
|
+
import { i32 } from "numtypes";
|
|
77
|
+
|
|
78
|
+
export function hashStep(hash: i32, byte: i32): i32 {
|
|
79
|
+
return i32((hash ^ byte) * 0x01000193);
|
|
80
|
+
}
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
The source type checks as branded TypeScript. The emitted JavaScript is ordinary
|
|
84
|
+
number code:
|
|
85
|
+
|
|
86
|
+
```javascript
|
|
87
|
+
export function hashStep(hash, byte) {
|
|
88
|
+
return Math.imul(hash ^ byte, 0x01000193);
|
|
89
|
+
}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
The package import disappears because the marker call has been consumed by the
|
|
93
|
+
transformer.
|
|
94
|
+
|
|
95
|
+
## Checked Casts And Unchecked Assertions
|
|
96
|
+
|
|
97
|
+
The call form performs the coercion:
|
|
98
|
+
|
|
99
|
+
```typescript
|
|
100
|
+
import { i32 } from "numtypes";
|
|
101
|
+
|
|
102
|
+
const checked = i32(0.9); // emits 0.9 | 0
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
The TypeScript assertion form is an unchecked promise:
|
|
106
|
+
|
|
107
|
+
```typescript
|
|
108
|
+
import type { i32 } from "numtypes";
|
|
109
|
+
|
|
110
|
+
const unchecked = 0.9 as i32; // emits 0.9
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Assertions are useful at trusted boundaries, but they do not retroactively
|
|
114
|
+
coerce the asserted expression. If a checked runtime conversion is required, use
|
|
115
|
+
the function marker.
|
|
116
|
+
|
|
117
|
+
## Transformer Usage
|
|
118
|
+
|
|
119
|
+
`numtypes` is intended to be used through
|
|
120
|
+
[`ts-patch`](https://github.com/nonara/ts-patch). `ts-patch` lets TypeScript
|
|
121
|
+
load custom transformers from `tsconfig.json` and run them through a patched
|
|
122
|
+
compiler command.
|
|
123
|
+
|
|
124
|
+
Install `ts-patch` next to TypeScript:
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
npm install --save-dev ts-patch typescript
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
`ts-patch` v4 targets TypeScript 6 and later. If your project is still on
|
|
131
|
+
TypeScript 5, use `ts-patch` v3.
|
|
132
|
+
|
|
133
|
+
Add the transformer to `tsconfig.json`:
|
|
134
|
+
|
|
135
|
+
```json
|
|
136
|
+
{
|
|
137
|
+
"compilerOptions": {
|
|
138
|
+
"plugins": [
|
|
139
|
+
{
|
|
140
|
+
"transform": "numtypes/transformer"
|
|
141
|
+
}
|
|
142
|
+
]
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Then compile with `tspc` instead of `tsc`:
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
npx tspc -p tsconfig.json
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
You can also install a persistent patch and keep using `tsc`:
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
npx ts-patch install
|
|
157
|
+
npx tsc -p tsconfig.json
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
For declaration emit, the default is to preserve branded declaration types:
|
|
161
|
+
|
|
162
|
+
```typescript
|
|
163
|
+
export declare function read(value: i32): f32;
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
If you want generated `.d.ts` files to expose only the runtime JavaScript
|
|
167
|
+
contract, add the declaration transformer as an `afterDeclarations` plugin:
|
|
168
|
+
|
|
169
|
+
```json
|
|
170
|
+
{
|
|
171
|
+
"compilerOptions": {
|
|
172
|
+
"declaration": true,
|
|
173
|
+
"plugins": [
|
|
174
|
+
{
|
|
175
|
+
"transform": "numtypes/transformer"
|
|
176
|
+
},
|
|
177
|
+
{
|
|
178
|
+
"transform": "numtypes/transformer",
|
|
179
|
+
"import": "createDeclarationTransformer",
|
|
180
|
+
"afterDeclarations": true,
|
|
181
|
+
"declarationTypes": "erase"
|
|
182
|
+
}
|
|
183
|
+
]
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
That declaration plugin rewrites branded declaration positions to `number` and
|
|
189
|
+
removes declaration-only numtypes imports that become unused.
|
|
190
|
+
|
|
191
|
+
## Options
|
|
192
|
+
|
|
193
|
+
| Option | Default | Meaning |
|
|
194
|
+
| --- | --- | --- |
|
|
195
|
+
| `declarationTypes` | `"preserve"` | Preserve `i32`/`u32`/`f32`/`f64` in `.d.ts`, or erase them to `number` when passed to the `createDeclarationTransformer` `afterDeclarations` plugin. |
|
|
196
|
+
| `collectDiagnostics` | `true` | Emit transformer diagnostics for domain leaks, mixed domains, ambiguous unions, and marker misuse. |
|
|
197
|
+
| `preservePhantomImports` | `false` | Keep otherwise-erased imports from `numtypes`; mainly useful for debugging transformer output. |
|
|
198
|
+
| `optimizeTypedArrayElementAccess` | `false` | Allow proof-based removal of generated coercions around numeric TypedArray element reads when loop bounds prove in-bounds access. |
|
|
199
|
+
|
|
200
|
+
`optimizeTypedArrayElementAccess` is off by default because it trusts runtime
|
|
201
|
+
objects to match their TypeScript annotations. If a value is typed as
|
|
202
|
+
`Int32Array`, the optimized region assumes it is actually an intrinsic
|
|
203
|
+
`Int32Array`, not a monkey-patched or structurally similar object.
|
|
204
|
+
|
|
205
|
+
## Declaration Surface
|
|
206
|
+
|
|
207
|
+
By default, declarations preserve branded APIs:
|
|
208
|
+
|
|
209
|
+
```typescript
|
|
210
|
+
export declare function read(value: i32): f32;
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
With `declarationTypes: "erase"`, declaration emit exposes the runtime contract:
|
|
214
|
+
|
|
215
|
+
```typescript
|
|
216
|
+
export declare function read(value: number): number;
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Use `"preserve"` for libraries that want downstream TypeScript users to keep the
|
|
220
|
+
same branded constraints. Use `"erase"` for libraries that treat numtypes as a
|
|
221
|
+
private implementation detail and publish a plain JavaScript numeric API.
|
|
222
|
+
|
|
223
|
+
## Domain Guidance
|
|
224
|
+
|
|
225
|
+
The practical takeaway from current benchmarks is conservative:
|
|
226
|
+
|
|
227
|
+
| Domain | Recommendation |
|
|
228
|
+
| --- | --- |
|
|
229
|
+
| `i32` | Useful for real performance-sensitive integer code, especially TypedArray-heavy kernels and multiply-heavy loops. Also useful for correctness when 32-bit overflow is intended. |
|
|
230
|
+
| `u32` | Useful for unsigned range semantics and hashing/bitwise correctness. Do not assume a broad speedup over good hand-written JavaScript. |
|
|
231
|
+
| `f32` | Useful as a precision contract. On current V8 it is slower; on SpiderMonkey the same shape can keep near parity when the engine lowers the pattern to float32. |
|
|
232
|
+
| `f64` | Useful mostly as a proof and documentation domain. JavaScript already uses binary64 `number`; extra unary `+` coercions rarely improve performance and can be neutral or slightly negative. |
|
|
233
|
+
|
|
234
|
+
In short: `i32` is the only domain that currently looks broadly practical as a
|
|
235
|
+
performance tool. `u32`, `f32`, and `f64` are still useful when the value domain
|
|
236
|
+
itself matters, but they should be treated as semantic constraints first and
|
|
237
|
+
performance hints second.
|
|
238
|
+
|
|
239
|
+
## Benchmark Methodology
|
|
240
|
+
|
|
241
|
+
Benchmarks are opt-in:
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
npm run bench
|
|
245
|
+
npm run bench:md
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
The algebra benchmarks compile a source string containing both:
|
|
249
|
+
|
|
250
|
+
1. a plain JavaScript `number` baseline; and
|
|
251
|
+
2. a numtypes-authored implementation.
|
|
252
|
+
|
|
253
|
+
The numtypes implementation is lowered by the real transformer at benchmark load
|
|
254
|
+
time. The benchmark therefore measures actual transformer output, including the
|
|
255
|
+
coercion optimization pass, not hand-written approximations.
|
|
256
|
+
|
|
257
|
+
Each comparison warms both implementations, alternates baseline/typed
|
|
258
|
+
measurement order, discards the first measured sample rounds, and reports the
|
|
259
|
+
median of the retained steady-state rounds. This matters because the first
|
|
260
|
+
optimized runs after dynamic import can still include speculation, tiering, and
|
|
261
|
+
recompilation outliers on V8.
|
|
262
|
+
|
|
263
|
+
`Rel speed` is:
|
|
264
|
+
|
|
265
|
+
```text
|
|
266
|
+
baseline runtime / transformed runtime
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
Higher than `1.00x` means the transformed version is faster. Lower than `1.00x`
|
|
270
|
+
means the transformed version is slower.
|
|
271
|
+
|
|
272
|
+
## Benchmark Snapshot
|
|
273
|
+
|
|
274
|
+
Measured on:
|
|
275
|
+
|
|
276
|
+
```text
|
|
277
|
+
Node v24.4.1
|
|
278
|
+
V8 13.6.233.10-node.17
|
|
279
|
+
Windows x64
|
|
280
|
+
2026-06-21
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
These numbers are not contractual. They are a snapshot of one engine, one CPU,
|
|
284
|
+
one OS, and one set of hot-loop shapes.
|
|
285
|
+
|
|
286
|
+
### f32: Precision Contract, Not a V8 Speedup
|
|
287
|
+
|
|
288
|
+
The f32 suite compares transformed `Math.fround` kernels against plain f64
|
|
289
|
+
`number` kernels. Results differ by design because f32 has less precision.
|
|
290
|
+
|
|
291
|
+
| Benchmark | Rel speed | Precision |
|
|
292
|
+
| --- | ---: | --- |
|
|
293
|
+
| vec3 dot | 0.39x | ~8e-8 |
|
|
294
|
+
| vec3 cross | 0.18x | ~8e-8 |
|
|
295
|
+
| vec3 normalize | 0.54x | ~9e-8 |
|
|
296
|
+
| mat4 x mat4 | 0.36x | ~1e-7 |
|
|
297
|
+
| mat4 x vec4 | 0.15x | ~9e-8 |
|
|
298
|
+
| quat multiply | 0.15x | ~8e-8 |
|
|
299
|
+
| vec3 lerp | 0.18x | ~9e-8 |
|
|
300
|
+
| ray-triangle | 0.17x | ~1e-5 |
|
|
301
|
+
|
|
302
|
+
Current V8 treats `Math.fround` as real rounding work in these scalar kernels.
|
|
303
|
+
That means f32 source can be 2x to 5x slower than the f64 baseline even though
|
|
304
|
+
the emitted shape is the standard JavaScript way to request binary32 rounding.
|
|
305
|
+
|
|
306
|
+
SpiderMonkey is different. Mozilla's IonMonkey work added an optimization pass
|
|
307
|
+
that recognizes `Float32Array`/`Math.fround` patterns and can emit float32
|
|
308
|
+
operations when the required identities hold. In a local scalar dot-product
|
|
309
|
+
probe, SpiderMonkey 153 kept a `Float32Array` + `Math.fround` variant around
|
|
310
|
+
parity with the f64 `Float64Array` baseline, while V8 13.6 did not.
|
|
311
|
+
|
|
312
|
+
The history matters here. This optimization came from the same period as
|
|
313
|
+
asm.js, Emscripten, and WebGL performance work, but it was not merely an
|
|
314
|
+
asm.js-only fast path. Mozilla's 2013 Float32 write-up describes the work as
|
|
315
|
+
general IonMonkey support for float32 operations, followed by an optimization
|
|
316
|
+
pass that recognizes `Float32Array` and `Math.fround`. The asm.js performance
|
|
317
|
+
article from the same period says the Float32 work was generic and applied to
|
|
318
|
+
any JavaScript that matched the optimizable shape; after that, SpiderMonkey and
|
|
319
|
+
Emscripten added float32 to the asm.js type system so asm.js code could benefit
|
|
320
|
+
from it specifically. In other words, SpiderMonkey's present advantage is best
|
|
321
|
+
understood as an asm.js-era optimization lineage that still benefits ordinary
|
|
322
|
+
JavaScript patterns, not as evidence that the active asm.js/OdinMonkey compiler
|
|
323
|
+
path is required for `Math.fround` to be fast.
|
|
324
|
+
|
|
325
|
+
This makes f32 engine-dependent. It is semantically valuable when you need
|
|
326
|
+
single-precision rounding. It is not currently a V8 performance optimization.
|
|
327
|
+
For V8, real f32 speedups are more likely to come from whole-kernel WebAssembly,
|
|
328
|
+
especially with SIMD, than from scalar `Math.fround` JavaScript.
|
|
329
|
+
|
|
330
|
+
### f64: Mostly Neutral
|
|
331
|
+
|
|
332
|
+
The f64 suite adds explicit unary `+` proofs around ordinary JavaScript number
|
|
333
|
+
math. Since JavaScript `number` is already binary64, the optimizer usually has
|
|
334
|
+
little to gain.
|
|
335
|
+
|
|
336
|
+
| Benchmark | Rel speed | Precision |
|
|
337
|
+
| --- | ---: | --- |
|
|
338
|
+
| f64 dot4 | 1.00x | exact |
|
|
339
|
+
| f64 dot4 typed-array | 1.01x | exact |
|
|
340
|
+
| horner poly | 0.97x | exact |
|
|
341
|
+
| f64 kahan sum | 1.01x | exact |
|
|
342
|
+
| f64 welford | 1.01x | exact |
|
|
343
|
+
| f64 biquad | 0.98x | exact |
|
|
344
|
+
| f64 black-scholes | 0.99x | exact |
|
|
345
|
+
| f64 newton sqrt | 1.00x | exact |
|
|
346
|
+
| f64 scale-add | 1.04x | exact |
|
|
347
|
+
| f64 recurrence | 1.01x | exact |
|
|
348
|
+
| f64 RK4 logistic | 0.99x | exact |
|
|
349
|
+
|
|
350
|
+
The result is roughly parity. Some cases are slightly faster, some are slightly
|
|
351
|
+
slower, and the differences are small enough that they should not drive API
|
|
352
|
+
design. Use `f64` when the type annotation helps the program express "this must
|
|
353
|
+
be numeric binary64-like data" and when explicit plain-number escapes are useful
|
|
354
|
+
for code review.
|
|
355
|
+
|
|
356
|
+
### i32: The Most Useful Performance Domain
|
|
357
|
+
|
|
358
|
+
The i32 suite has three families:
|
|
359
|
+
|
|
360
|
+
- overflow-oriented kernels where int32 closure is part of the intended
|
|
361
|
+
algorithm;
|
|
362
|
+
- bounded-number kernels where f64 `number` arithmetic is already exact, but the
|
|
363
|
+
source logically treats values as integers;
|
|
364
|
+
- intentionally naive full-width multiply kernels where `(a * b) | 0` is both
|
|
365
|
+
semantically wrong and slow compared with transformer-generated `Math.imul`.
|
|
366
|
+
|
|
367
|
+
| Benchmark | Rel speed | Precision |
|
|
368
|
+
| --- | ---: | --- |
|
|
369
|
+
| i32 dot4 imul | 1.01x | exact |
|
|
370
|
+
| i32 dot8 imul | 1.02x | exact |
|
|
371
|
+
| i32 fir4 | 1.01x | exact |
|
|
372
|
+
| i32 LCG | 1.04x | exact |
|
|
373
|
+
| i32 fixed IIR | 1.04x | exact |
|
|
374
|
+
| i32 bresenham | 1.05x | exact |
|
|
375
|
+
| i32 xorshift | 1.04x | exact |
|
|
376
|
+
| i32 collatz | 1.05x | exact |
|
|
377
|
+
| i32 affine mix | 1.03x | exact |
|
|
378
|
+
| i32 horner mix | 0.95x | exact |
|
|
379
|
+
| i32 bounded score | 0.90x | exact |
|
|
380
|
+
| i32 bounded smooth | 1.76x | exact |
|
|
381
|
+
| i32 bounded delta | 1.81x | exact |
|
|
382
|
+
| i32 bounded state | 1.04x | exact |
|
|
383
|
+
| i32 bounded poly | 0.85x | exact |
|
|
384
|
+
| i32 bounded dither | 0.87x | exact |
|
|
385
|
+
|
|
386
|
+
i32 is not magic. Bounded small-number code where ordinary f64 arithmetic is
|
|
387
|
+
already exact can still get slower, because int32 closure adds coercion work that
|
|
388
|
+
the baseline does not need. After median-based measurement, those losses are
|
|
389
|
+
much smaller than the first-run outliers, but they are real enough that hot paths
|
|
390
|
+
still need per-engine benchmarking. The general picture is better than the other
|
|
391
|
+
domains: multiply-heavy and TypedArray-heavy kernels often benefit, and
|
|
392
|
+
overflow-oriented integer algorithms become easier to write correctly.
|
|
393
|
+
|
|
394
|
+
The strongest i32 appeal point is the last category. JavaScript code often tries
|
|
395
|
+
to express C-like signed 32-bit multiplication with `(a * b) | 0`. That is only
|
|
396
|
+
valid while the binary64 product still contains the low 32 bits exactly. For
|
|
397
|
+
full-width int32 values, the product can exceed `2^53`, low bits are lost before
|
|
398
|
+
the final `| 0`, and the result is no longer equivalent to 32-bit wraparound
|
|
399
|
+
multiplication. In those cases, `i32(a * b)` is not just a hint; it is a compact
|
|
400
|
+
way to request the correct operation, and the transformer lowers it through
|
|
401
|
+
`Math.imul`.
|
|
402
|
+
|
|
403
|
+
```typescript
|
|
404
|
+
// Not equivalent to int32 multiplication for full-width inputs.
|
|
405
|
+
const wrong = (a * b) | 0;
|
|
406
|
+
|
|
407
|
+
// Emits Math.imul(a, b), then preserves i32 closure for later operations.
|
|
408
|
+
const right = i32(a * b);
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
| Benchmark | Rel speed | Precision |
|
|
412
|
+
| --- | ---: | --- |
|
|
413
|
+
| i32 naive LCG | 5.07x | naive wrong (100% differ) |
|
|
414
|
+
| i32 naive fmix | 2.35x | naive wrong (100% differ) |
|
|
415
|
+
| i32 naive FNV-1a | 4.50x | naive wrong (100% differ) |
|
|
416
|
+
|
|
417
|
+
This table is intentionally not a fair "same result, faster code" comparison.
|
|
418
|
+
The baseline is wrong. The point is more practical: numtypes lets source code
|
|
419
|
+
state the intended int32 domain once, then emits the operation shape engines
|
|
420
|
+
already understand for full-width 32-bit multiplication.
|
|
421
|
+
|
|
422
|
+
The additional `Math.imul` probe is important here. On V8 TurboFan with
|
|
423
|
+
TypedArrays, `Math.imul` was much faster than `* | 0` for variable multiply and
|
|
424
|
+
multiply-accumulate loops. On ordinary JS arrays with small integer values,
|
|
425
|
+
`Math.imul` was sometimes slower than `* | 0`. That means `Math.imul` is the
|
|
426
|
+
right conservative lowering for correctness and many hot loops, but a future
|
|
427
|
+
optimizer may want a range-proven escape hatch for small-value normal-array
|
|
428
|
+
code.
|
|
429
|
+
|
|
430
|
+
Focused V8 disassembly checks are useful for interpreting the slower i32 rows.
|
|
431
|
+
With `--print-opt-code`, `Math.imul(x, y)` lowers to the x64 `imull`
|
|
432
|
+
instruction in these hot loops. A hand-written `(x * y) | 0` variant of the
|
|
433
|
+
same `bounded poly` kernel lowered to essentially the same integer multiply
|
|
434
|
+
sequence and was not faster.
|
|
435
|
+
|
|
436
|
+
The clearest slowdown is `i32 bounded poly`:
|
|
437
|
+
|
|
438
|
+
```typescript
|
|
439
|
+
const x: i32 = i32(input[i] % 1024);
|
|
440
|
+
out[i] = i32((x * x + i32(3) * x + i32(7)) % i32(10007));
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
The plain baseline is also optimized as integer code because V8 learns that
|
|
444
|
+
`input[i]` is a small non-negative value. Its hot path for `% 10007` uses a
|
|
445
|
+
short unsigned-style magic-multiply sequence:
|
|
446
|
+
|
|
447
|
+
```asm
|
|
448
|
+
andl rdx,0x3ff
|
|
449
|
+
imull rsi,rdx
|
|
450
|
+
mull rcx
|
|
451
|
+
shrl rdx,12
|
|
452
|
+
imull rdx,rdx,0x2717
|
|
453
|
+
subl rsi,rdx
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
The i32-lowered version must preserve signed int32 closure and JavaScript `%`
|
|
457
|
+
semantics, so the remainder path carries extra sign correction:
|
|
458
|
+
|
|
459
|
+
```asm
|
|
460
|
+
andl rdx,0x3ff
|
|
461
|
+
imull rsi,rdx
|
|
462
|
+
imull rcx
|
|
463
|
+
shrl rax,31
|
|
464
|
+
sarl rdx,12
|
|
465
|
+
addl rdx,rax
|
|
466
|
+
imull rdx,rdx,0x2717
|
|
467
|
+
subl rsi,rdx
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
This is a range-analysis gap. The transformer knows the expression is i32, but
|
|
471
|
+
it does not yet prove that the value is non-negative and small enough for the
|
|
472
|
+
shorter unsigned remainder path. `bounded score` and `bounded dither` are
|
|
473
|
+
different: they do not contain `%`, and V8 also lowers their small multiplications
|
|
474
|
+
or additions to ordinary integer instructions. Their small losses are best
|
|
475
|
+
understood as code-shape and guard/register-allocation effects from explicit
|
|
476
|
+
closure in already-exact bounded arithmetic.
|
|
477
|
+
`i32 horner mix` is near parity when measured in isolation and is not the same
|
|
478
|
+
bounded-remainder issue.
|
|
479
|
+
|
|
480
|
+
### u32: Correctness First
|
|
481
|
+
|
|
482
|
+
u32 is valuable for bitwise and hash code because unsigned closure matters. The
|
|
483
|
+
performance story is mixed.
|
|
484
|
+
|
|
485
|
+
| Benchmark | Rel speed | Precision |
|
|
486
|
+
| --- | ---: | --- |
|
|
487
|
+
| u32 LCG | 0.99x | exact |
|
|
488
|
+
| xorshift32 | 1.02x | exact |
|
|
489
|
+
| xorshift*32 | 1.00x | exact |
|
|
490
|
+
| mulberry32 correct | 0.99x | exact |
|
|
491
|
+
| wang hash32 | 0.96x | exact |
|
|
492
|
+
| murmur3 block | 0.95x | exact |
|
|
493
|
+
| jenkins oat | 0.99x | exact |
|
|
494
|
+
| CRC32 bitwise | 1.01x | exact |
|
|
495
|
+
| Adler-32 | 1.01x | exact |
|
|
496
|
+
| FNV-1a bytes | 1.02x | exact |
|
|
497
|
+
| u32 bounded prefix | 1.01x | exact |
|
|
498
|
+
| u32 bounded checksum | 1.10x | exact |
|
|
499
|
+
| u32 bounded counter | 0.99x | exact |
|
|
500
|
+
| u32 bounded pair hash | 1.23x | exact |
|
|
501
|
+
| u32 bounded rgb pack | 0.99x | exact |
|
|
502
|
+
| u32 bounded unpack sum | 2.74x | exact |
|
|
503
|
+
|
|
504
|
+
Some u32 cases are faster, especially when generated coercions are optimized
|
|
505
|
+
away in tight bounded loops. Other cases are neutral or slower. For real hashing
|
|
506
|
+
code, the more important point is often correctness: naive `number`
|
|
507
|
+
multiplication silently loses low bits when products exceed `2^53`.
|
|
508
|
+
|
|
509
|
+
| Benchmark | Rel speed | Precision |
|
|
510
|
+
| --- | ---: | --- |
|
|
511
|
+
| mulberry32 | 2.21x | naive wrong (100% differ) |
|
|
512
|
+
| murmur3 fmix | 2.10x | naive wrong (100% differ) |
|
|
513
|
+
|
|
514
|
+
The second table compares against intentionally naive baselines. It is not a
|
|
515
|
+
fair speed comparison because the baseline is wrong. It shows why `Math.imul`
|
|
516
|
+
exists: a plain `*` implementation can be fast or slow, but it is not equivalent
|
|
517
|
+
for full-width 32-bit integer multiplication.
|
|
518
|
+
|
|
519
|
+
## Engine Notes
|
|
520
|
+
|
|
521
|
+
### `Math.imul`
|
|
522
|
+
|
|
523
|
+
ECMAScript defines `Math.imul(x, y)` as 32-bit multiplication modulo `2^32`,
|
|
524
|
+
returned as a signed int32. MDN describes it as C-like 32-bit integer
|
|
525
|
+
multiplication, and the TC39 integer-math proposal notes that engines can infer
|
|
526
|
+
some integer optimizations but that explicit source operations are useful when
|
|
527
|
+
inference is not practical.
|
|
528
|
+
|
|
529
|
+
For numtypes, this means `i32(a * b)` and `u32(a * b)` lower through
|
|
530
|
+
`Math.imul` when both operands are integer-domain values. Rewriting every
|
|
531
|
+
integer multiply to `*` would be incorrect for full 32-bit values.
|
|
532
|
+
|
|
533
|
+
### `Math.fround`
|
|
534
|
+
|
|
535
|
+
ECMAScript defines `Math.fround` as binary64 input, binary32 conversion with
|
|
536
|
+
round-to-nearest ties-to-even, then binary64 output. The return value is still a
|
|
537
|
+
JavaScript `number`; the operation just forces single-precision rounding at that
|
|
538
|
+
point.
|
|
539
|
+
|
|
540
|
+
SpiderMonkey's historical float32 optimization is designed around this shape:
|
|
541
|
+
`Float32Array` values and `Math.fround` calls provide enough information for the
|
|
542
|
+
JIT to use float32 operations when doing so preserves the required rounded
|
|
543
|
+
result. V8 currently does not show the same scalar speedup in this benchmark
|
|
544
|
+
set, even when forced to TurboFan.
|
|
545
|
+
|
|
546
|
+
The code-history trail is:
|
|
547
|
+
|
|
548
|
+
- [Bug 888109](https://bugzilla.mozilla.org/show_bug.cgi?id=888109),
|
|
549
|
+
"IonMonkey: Introduce Float32 general optimization", added the general
|
|
550
|
+
Float32 optimization work in the IonMonkey backend. The bug description
|
|
551
|
+
explicitly targets lowering `Float32Array` arithmetic from double
|
|
552
|
+
load/convert/add/store sequences to float32 assembly instructions.
|
|
553
|
+
- [Efficient float32 arithmetic in JavaScript](https://blog.mozilla.org/javascript/2013/11/07/efficient-float32-arithmetic-in-javascript/)
|
|
554
|
+
describes the implementation sequence: add general float32 support to
|
|
555
|
+
IonMonkey, add `Math.fround`, then add a recognition pass for
|
|
556
|
+
`Float32Array`/`Math.fround` patterns.
|
|
557
|
+
- [Gap between asm.js and native performance gets even narrower with float32 optimizations](https://hacks.mozilla.org/2013/12/gap-between-asm-js-and-native-performance-gets-even-narrower-with-float32-optimizations/)
|
|
558
|
+
connects that generic Float32 work back to asm.js and Emscripten: once the
|
|
559
|
+
generic optimization existed, asm.js gained a float32 type path so compiled
|
|
560
|
+
C/C++ float-heavy code could benefit.
|
|
561
|
+
- [Saying goodbye to asm.js](https://spidermonkey.dev/blog/2026/05/20/saying-goodbye-to-asmjs.html)
|
|
562
|
+
says SpiderMonkey's asm.js optimizations are disabled by default as of
|
|
563
|
+
Firefox 148 and planned for removal. That makes it important not to describe
|
|
564
|
+
current `Math.fround` behavior as relying on the active asm.js compiler. The
|
|
565
|
+
safer claim is that the optimization comes from the asm.js-era Float32 work
|
|
566
|
+
and remains relevant to ordinary JavaScript shapes.
|
|
567
|
+
|
|
568
|
+
### Unary `+`
|
|
569
|
+
|
|
570
|
+
Unary `+` proves "number-ness" at runtime. It can prevent accidental string
|
|
571
|
+
concatenation or object coercion from crossing into a numeric region, but it
|
|
572
|
+
does not create a narrower machine type than JavaScript's normal binary64
|
|
573
|
+
`number`. The optimizer can often remove redundant unary `+`, but the benchmark
|
|
574
|
+
results show that it should not be expected to produce a broad speedup.
|
|
575
|
+
|
|
576
|
+
## Optimization Pass
|
|
577
|
+
|
|
578
|
+
The initial lowering pass is intentionally conservative and may emit more
|
|
579
|
+
coercions than a human would write. A later optimization pass removes only
|
|
580
|
+
coercions generated by numtypes, not coercions the user wrote by hand.
|
|
581
|
+
|
|
582
|
+
This generated-only rule matters. If the user writes:
|
|
583
|
+
|
|
584
|
+
```typescript
|
|
585
|
+
const x = value | 0;
|
|
586
|
+
```
|
|
587
|
+
|
|
588
|
+
the optimizer preserves it. If numtypes generated the same coercion as part of a
|
|
589
|
+
closed expression and can prove it redundant, it may remove it.
|
|
590
|
+
|
|
591
|
+
The optimizer currently handles cases such as:
|
|
592
|
+
|
|
593
|
+
- repeated closure of already-closed local values;
|
|
594
|
+
- closure propagation through selected arithmetic and bitwise expressions;
|
|
595
|
+
- u32 mask and modulo producer patterns;
|
|
596
|
+
- optional TypedArray element access proofs when
|
|
597
|
+
`optimizeTypedArrayElementAccess` is enabled.
|
|
598
|
+
|
|
599
|
+
See [docs/lowering-optimization-spec.md](docs/lowering-optimization-spec.md) for
|
|
600
|
+
the exact proof rules.
|
|
601
|
+
|
|
602
|
+
## Practical Usage Rules
|
|
603
|
+
|
|
604
|
+
Use `i32` when:
|
|
605
|
+
|
|
606
|
+
- the algorithm intentionally uses signed 32-bit overflow;
|
|
607
|
+
- multiplication must have C-like 32-bit semantics;
|
|
608
|
+
- the current code relies on `(a * b) | 0` and the operands may be full-width
|
|
609
|
+
int32 values;
|
|
610
|
+
- the hot loop is TypedArray-heavy or integer-heavy;
|
|
611
|
+
- you can benchmark the transformed output on your target engine.
|
|
612
|
+
|
|
613
|
+
Use `u32` when:
|
|
614
|
+
|
|
615
|
+
- unsigned range and bitwise behavior are part of correctness;
|
|
616
|
+
- the code implements hashes, checksums, PRNGs, encoders, or binary formats;
|
|
617
|
+
- performance is measured rather than assumed.
|
|
618
|
+
|
|
619
|
+
Use `f32` when:
|
|
620
|
+
|
|
621
|
+
- single-precision rounding is semantically meaningful;
|
|
622
|
+
- compatibility with WebGL, binary formats, or float32 data pipelines matters;
|
|
623
|
+
- your target engine is known to optimize the `Math.fround` pattern, or you
|
|
624
|
+
accept the V8 slowdown as the cost of precision control.
|
|
625
|
+
|
|
626
|
+
Use `f64` when:
|
|
627
|
+
|
|
628
|
+
- you want explicit numeric boundaries in TypeScript;
|
|
629
|
+
- you want declaration-level documentation that a value is meant to stay in the
|
|
630
|
+
numeric domain;
|
|
631
|
+
- you are not expecting a consistent speedup from unary `+`.
|
|
632
|
+
|
|
633
|
+
Avoid numtypes when:
|
|
634
|
+
|
|
635
|
+
- the code is ordinary application arithmetic where readability matters more
|
|
636
|
+
than numeric-domain control;
|
|
637
|
+
- the hot path is not benchmarked;
|
|
638
|
+
- the algorithm would be clearer or faster as WebAssembly/SIMD.
|
|
639
|
+
|
|
640
|
+
## References
|
|
641
|
+
|
|
642
|
+
- [ECMAScript `Math.imul`](https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-math.imul)
|
|
643
|
+
- [ECMAScript `Math.fround`](https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-math.fround)
|
|
644
|
+
- [MDN: `Math.imul()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/imul)
|
|
645
|
+
- [MDN: `Math.fround()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/fround)
|
|
646
|
+
- [Mozilla JavaScript Blog: Efficient float32 arithmetic in JavaScript](https://blog.mozilla.org/javascript/2013/11/07/efficient-float32-arithmetic-in-javascript/)
|
|
647
|
+
- [V8 docs: TurboFan](https://v8.dev/docs/turbofan)
|
|
648
|
+
- [TC39 proposal: Modulus and Additional Integer Math](https://github.com/tc39/proposal-integer-and-modulus-math)
|
|
649
|
+
|
|
650
|
+
## License
|
|
651
|
+
|
|
652
|
+
MIT OR Apache-2.0
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
declare const i32Brand: unique symbol;
|
|
2
|
+
declare const u32Brand: unique symbol;
|
|
3
|
+
declare const f32Brand: unique symbol;
|
|
4
|
+
declare const f64Brand: unique symbol;
|
|
5
|
+
export type i32 = number & {
|
|
6
|
+
readonly [i32Brand]: never;
|
|
7
|
+
};
|
|
8
|
+
export type u32 = number & {
|
|
9
|
+
readonly [u32Brand]: never;
|
|
10
|
+
};
|
|
11
|
+
export type f32 = number & {
|
|
12
|
+
readonly [f32Brand]: never;
|
|
13
|
+
};
|
|
14
|
+
export type f64 = number & {
|
|
15
|
+
readonly [f64Brand]: never;
|
|
16
|
+
};
|
|
17
|
+
export declare function i32(value: number): i32;
|
|
18
|
+
export declare function u32(value: number): u32;
|
|
19
|
+
export declare function f32(value: number): f32;
|
|
20
|
+
export declare function f64(value: number): f64;
|
|
21
|
+
export {};
|
|
22
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/lib/index.ts"],"names":[],"mappings":"AAAA,OAAO,CAAC,MAAM,QAAQ,EAAE,OAAO,MAAM,CAAC;AACtC,OAAO,CAAC,MAAM,QAAQ,EAAE,OAAO,MAAM,CAAC;AACtC,OAAO,CAAC,MAAM,QAAQ,EAAE,OAAO,MAAM,CAAC;AACtC,OAAO,CAAC,MAAM,QAAQ,EAAE,OAAO,MAAM,CAAC;AAEtC,MAAM,MAAM,GAAG,GAAG,MAAM,GAAG;IACzB,QAAQ,CAAC,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,GAAG,GAAG,MAAM,GAAG;IACzB,QAAQ,CAAC,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,GAAG,GAAG,MAAM,GAAG;IACzB,QAAQ,CAAC,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,GAAG,GAAG,MAAM,GAAG;IACzB,QAAQ,CAAC,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;CAC5B,CAAC;AAEF,MAAM,CAAC,OAAO,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC;AAEhD,MAAM,CAAC,OAAO,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC;AAEhD,MAAM,CAAC,OAAO,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC;AAEhD,MAAM,CAAC,OAAO,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/lib/index.ts"],"names":[],"mappings":""}
|