numtypes 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/LICENSE +12 -0
  3. package/LICENSE-APACHE +201 -0
  4. package/LICENSE-MIT +21 -0
  5. package/README.md +652 -0
  6. package/dist/lib/index.d.ts +22 -0
  7. package/dist/lib/index.d.ts.map +1 -0
  8. package/dist/lib/index.js +2 -0
  9. package/dist/lib/index.js.map +1 -0
  10. package/dist/transformer/analyze/analyze-source-file.d.ts +15 -0
  11. package/dist/transformer/analyze/analyze-source-file.d.ts.map +1 -0
  12. package/dist/transformer/analyze/analyze-source-file.js +605 -0
  13. package/dist/transformer/analyze/analyze-source-file.js.map +1 -0
  14. package/dist/transformer/analyze/get-contextual-domain.d.ts +19 -0
  15. package/dist/transformer/analyze/get-contextual-domain.d.ts.map +1 -0
  16. package/dist/transformer/analyze/get-contextual-domain.js +197 -0
  17. package/dist/transformer/analyze/get-contextual-domain.js.map +1 -0
  18. package/dist/transformer/analyze/get-expression-domain.d.ts +26 -0
  19. package/dist/transformer/analyze/get-expression-domain.d.ts.map +1 -0
  20. package/dist/transformer/analyze/get-expression-domain.js +804 -0
  21. package/dist/transformer/analyze/get-expression-domain.js.map +1 -0
  22. package/dist/transformer/analyze/type-domain.d.ts +41 -0
  23. package/dist/transformer/analyze/type-domain.d.ts.map +1 -0
  24. package/dist/transformer/analyze/type-domain.js +260 -0
  25. package/dist/transformer/analyze/type-domain.js.map +1 -0
  26. package/dist/transformer/ast.d.ts +10 -0
  27. package/dist/transformer/ast.d.ts.map +1 -0
  28. package/dist/transformer/ast.js +115 -0
  29. package/dist/transformer/ast.js.map +1 -0
  30. package/dist/transformer/diagnostics.d.ts +17 -0
  31. package/dist/transformer/diagnostics.d.ts.map +1 -0
  32. package/dist/transformer/diagnostics.js +30 -0
  33. package/dist/transformer/diagnostics.js.map +1 -0
  34. package/dist/transformer/domains.d.ts +11 -0
  35. package/dist/transformer/domains.d.ts.map +1 -0
  36. package/dist/transformer/domains.js +32 -0
  37. package/dist/transformer/domains.js.map +1 -0
  38. package/dist/transformer/index.d.ts +10 -0
  39. package/dist/transformer/index.d.ts.map +1 -0
  40. package/dist/transformer/index.js +60 -0
  41. package/dist/transformer/index.js.map +1 -0
  42. package/dist/transformer/operators.d.ts +16 -0
  43. package/dist/transformer/operators.d.ts.map +1 -0
  44. package/dist/transformer/operators.js +44 -0
  45. package/dist/transformer/operators.js.map +1 -0
  46. package/dist/transformer/options.d.ts +19 -0
  47. package/dist/transformer/options.d.ts.map +1 -0
  48. package/dist/transformer/options.js +17 -0
  49. package/dist/transformer/options.js.map +1 -0
  50. package/dist/transformer/symbols.d.ts +56 -0
  51. package/dist/transformer/symbols.d.ts.map +1 -0
  52. package/dist/transformer/symbols.js +270 -0
  53. package/dist/transformer/symbols.js.map +1 -0
  54. package/dist/transformer/transform/erase-imports.d.ts +14 -0
  55. package/dist/transformer/transform/erase-imports.d.ts.map +1 -0
  56. package/dist/transformer/transform/erase-imports.js +174 -0
  57. package/dist/transformer/transform/erase-imports.js.map +1 -0
  58. package/dist/transformer/transform/generated-coercions.d.ts +9 -0
  59. package/dist/transformer/transform/generated-coercions.d.ts.map +1 -0
  60. package/dist/transformer/transform/generated-coercions.js +22 -0
  61. package/dist/transformer/transform/generated-coercions.js.map +1 -0
  62. package/dist/transformer/transform/optimize-coercions.d.ts +11 -0
  63. package/dist/transformer/transform/optimize-coercions.d.ts.map +1 -0
  64. package/dist/transformer/transform/optimize-coercions.js +1702 -0
  65. package/dist/transformer/transform/optimize-coercions.js.map +1 -0
  66. package/dist/transformer/transform/transform-declaration-file.d.ts +9 -0
  67. package/dist/transformer/transform/transform-declaration-file.d.ts.map +1 -0
  68. package/dist/transformer/transform/transform-declaration-file.js +376 -0
  69. package/dist/transformer/transform/transform-declaration-file.js.map +1 -0
  70. package/dist/transformer/transform/transform-expression.d.ts +24 -0
  71. package/dist/transformer/transform/transform-expression.d.ts.map +1 -0
  72. package/dist/transformer/transform/transform-expression.js +545 -0
  73. package/dist/transformer/transform/transform-expression.js.map +1 -0
  74. package/dist/transformer/transform/transform-source-file.d.ts +10 -0
  75. package/dist/transformer/transform/transform-source-file.d.ts.map +1 -0
  76. package/dist/transformer/transform/transform-source-file.js +52 -0
  77. package/dist/transformer/transform/transform-source-file.js.map +1 -0
  78. package/dist/transformer/ts-compat.d.ts +4 -0
  79. package/dist/transformer/ts-compat.d.ts.map +1 -0
  80. package/dist/transformer/ts-compat.js +24 -0
  81. package/dist/transformer/ts-compat.js.map +1 -0
  82. package/docs/implementation-plan.md +335 -0
  83. package/docs/lib-implementation.md +77 -0
  84. package/docs/lowering-optimization-spec.md +1020 -0
  85. package/docs/project-structure.md +52 -0
  86. package/docs/transform-spec.md +2114 -0
  87. package/package.json +83 -0
package/README.md ADDED
@@ -0,0 +1,652 @@
1
+ # numtypes
2
+
3
+ `numtypes` is a zero-runtime TypeScript numeric-domain transformer.
4
+
5
+ User code imports a small set of phantom symbols, writes ordinary TypeScript
6
+ with branded numeric types, and the transformer erases those symbols into the
7
+ JavaScript coercion idioms engines already understand:
8
+
9
+ | Type | Runtime closure shape | Intended domain |
10
+ | --- | --- | --- |
11
+ | `i32` | `x \| 0`, `Math.imul(a, b)` | signed int32 |
12
+ | `u32` | `x >>> 0`, `Math.imul(a, b) >>> 0` | unsigned int32 |
13
+ | `f32` | `Math.fround(x)` | float32 value rounded back to JS `number` |
14
+ | `f64` | `+x` | JavaScript binary64 `number` |
15
+
16
+ The imported functions are compile-time markers, not runtime utilities. A
17
+ successful build should not keep calls to `i32`, `u32`, `f32`, or `f64` in the
18
+ emitted JavaScript unless you intentionally preserve phantom imports for
19
+ debugging.
20
+
21
+ ## Why This Exists
22
+
23
+ JavaScript exposes one ordinary numeric value type, `number`, but engines have
24
+ many internal numeric representations. Hot code may be optimized as small
25
+ integers, int32, uint32, float64, or sometimes float32 when the source shape is
26
+ clear enough. Historically, hand-written JavaScript used coercion idioms such as
27
+ `| 0`, `>>> 0`, `Math.imul`, and `Math.fround` to make those shapes visible to
28
+ JIT compilers.
29
+
30
+ `numtypes` moves that idiom into TypeScript:
31
+
32
+ - TypeScript gets branded domain names (`i32`, `u32`, `f32`, `f64`).
33
+ - The transformer inserts the required coercions at arithmetic and storage
34
+ boundaries.
35
+ - Runtime JavaScript stays dependency-free.
36
+ - The generated code is benchmarked against plain JavaScript on current engines.
37
+
38
+ This is not a replacement for WebAssembly or SIMD. It is a source-to-source
39
+ lowering pass for JavaScript projects that want explicit numeric domains without
40
+ manually writing coercions after every operation.
41
+
42
+ ## Status
43
+
44
+ The project is still early. The transformer is the main artifact; the public
45
+ runtime package exists mostly so TypeScript source can name the marker symbols.
46
+
47
+ Expect sharp semantics:
48
+
49
+ - arithmetic over branded values must stay inside a branded domain, or escape
50
+ explicitly with `as number`, `as unknown`, or `as any`;
51
+ - mixed domains such as `i32 + f32` are rejected unless the user casts one side;
52
+ - `as i32` is unchecked and emits no coercion by itself;
53
+ - `i32(x)` is checked and emits `x | 0`;
54
+ - declaration output can preserve branded types or erase them to `number`.
55
+
56
+ See [docs/transform-spec.md](docs/transform-spec.md) for the full transform
57
+ rules and [docs/lowering-optimization-spec.md](docs/lowering-optimization-spec.md)
58
+ for the coercion optimization pass.
59
+
60
+ ## Install
61
+
62
+ ```bash
63
+ npm install numtypes
64
+ npm install --save-dev typescript ts-patch
65
+ ```
66
+
67
+ `numtypes` has a peer dependency on TypeScript. It also requires a build
68
+ pipeline that can install TypeScript custom transformers. The recommended path
69
+ is `ts-patch`, configured through `compilerOptions.plugins` and run with `tspc`.
70
+ The stock `tsc` CLI does not load third-party transformers from `tsconfig.json`
71
+ unless TypeScript has been patched.
72
+
73
+ ## Quick Example
74
+
75
+ ```typescript
76
+ import { i32 } from "numtypes";
77
+
78
+ export function hashStep(hash: i32, byte: i32): i32 {
79
+ return i32((hash ^ byte) * 0x01000193);
80
+ }
81
+ ```
82
+
83
+ The source type checks as branded TypeScript. The emitted JavaScript is ordinary
84
+ number code:
85
+
86
+ ```javascript
87
+ export function hashStep(hash, byte) {
88
+ return Math.imul(hash ^ byte, 0x01000193);
89
+ }
90
+ ```
91
+
92
+ The package import disappears because the marker call has been consumed by the
93
+ transformer.
94
+
95
+ ## Checked Casts And Unchecked Assertions
96
+
97
+ The call form performs the coercion:
98
+
99
+ ```typescript
100
+ import { i32 } from "numtypes";
101
+
102
+ const checked = i32(0.9); // emits 0.9 | 0
103
+ ```
104
+
105
+ The TypeScript assertion form is an unchecked promise:
106
+
107
+ ```typescript
108
+ import type { i32 } from "numtypes";
109
+
110
+ const unchecked = 0.9 as i32; // emits 0.9
111
+ ```
112
+
113
+ Assertions are useful at trusted boundaries, but they do not retroactively
114
+ coerce the asserted expression. If a checked runtime conversion is required, use
115
+ the function marker.
116
+
117
+ ## Transformer Usage
118
+
119
+ `numtypes` is intended to be used through
120
+ [`ts-patch`](https://github.com/nonara/ts-patch). `ts-patch` lets TypeScript
121
+ load custom transformers from `tsconfig.json` and run them through a patched
122
+ compiler command.
123
+
124
+ Install `ts-patch` next to TypeScript:
125
+
126
+ ```bash
127
+ npm install --save-dev ts-patch typescript
128
+ ```
129
+
130
+ `ts-patch` v4 targets TypeScript 6 and later. If your project is still on
131
+ TypeScript 5, use `ts-patch` v3.
132
+
133
+ Add the transformer to `tsconfig.json`:
134
+
135
+ ```json
136
+ {
137
+ "compilerOptions": {
138
+ "plugins": [
139
+ {
140
+ "transform": "numtypes/transformer"
141
+ }
142
+ ]
143
+ }
144
+ }
145
+ ```
146
+
147
+ Then compile with `tspc` instead of `tsc`:
148
+
149
+ ```bash
150
+ npx tspc -p tsconfig.json
151
+ ```
152
+
153
+ You can also install a persistent patch and keep using `tsc`:
154
+
155
+ ```bash
156
+ npx ts-patch install
157
+ npx tsc -p tsconfig.json
158
+ ```
159
+
160
+ For declaration emit, the default is to preserve branded declaration types:
161
+
162
+ ```typescript
163
+ export declare function read(value: i32): f32;
164
+ ```
165
+
166
+ If you want generated `.d.ts` files to expose only the runtime JavaScript
167
+ contract, add the declaration transformer as an `afterDeclarations` plugin:
168
+
169
+ ```json
170
+ {
171
+ "compilerOptions": {
172
+ "declaration": true,
173
+ "plugins": [
174
+ {
175
+ "transform": "numtypes/transformer"
176
+ },
177
+ {
178
+ "transform": "numtypes/transformer",
179
+ "import": "createDeclarationTransformer",
180
+ "afterDeclarations": true,
181
+ "declarationTypes": "erase"
182
+ }
183
+ ]
184
+ }
185
+ }
186
+ ```
187
+
188
+ That declaration plugin rewrites branded declaration positions to `number` and
189
+ removes declaration-only numtypes imports that become unused.
190
+
191
+ ## Options
192
+
193
+ | Option | Default | Meaning |
194
+ | --- | --- | --- |
195
+ | `declarationTypes` | `"preserve"` | Preserve `i32`/`u32`/`f32`/`f64` in `.d.ts`, or erase them to `number` when passed to the `createDeclarationTransformer` `afterDeclarations` plugin. |
196
+ | `collectDiagnostics` | `true` | Emit transformer diagnostics for domain leaks, mixed domains, ambiguous unions, and marker misuse. |
197
+ | `preservePhantomImports` | `false` | Keep otherwise-erased imports from `numtypes`; mainly useful for debugging transformer output. |
198
+ | `optimizeTypedArrayElementAccess` | `false` | Allow proof-based removal of generated coercions around numeric TypedArray element reads when loop bounds prove in-bounds access. |
199
+
200
+ `optimizeTypedArrayElementAccess` is off by default because it trusts runtime
201
+ objects to match their TypeScript annotations. If a value is typed as
202
+ `Int32Array`, the optimized region assumes it is actually an intrinsic
203
+ `Int32Array`, not a monkey-patched or structurally similar object.
204
+
205
+ ## Declaration Surface
206
+
207
+ By default, declarations preserve branded APIs:
208
+
209
+ ```typescript
210
+ export declare function read(value: i32): f32;
211
+ ```
212
+
213
+ With `declarationTypes: "erase"`, declaration emit exposes the runtime contract:
214
+
215
+ ```typescript
216
+ export declare function read(value: number): number;
217
+ ```
218
+
219
+ Use `"preserve"` for libraries that want downstream TypeScript users to keep the
220
+ same branded constraints. Use `"erase"` for libraries that treat numtypes as a
221
+ private implementation detail and publish a plain JavaScript numeric API.
222
+
223
+ ## Domain Guidance
224
+
225
+ The practical takeaway from current benchmarks is conservative:
226
+
227
+ | Domain | Recommendation |
228
+ | --- | --- |
229
+ | `i32` | Useful for real performance-sensitive integer code, especially TypedArray-heavy kernels and multiply-heavy loops. Also useful for correctness when 32-bit overflow is intended. |
230
+ | `u32` | Useful for unsigned range semantics and hashing/bitwise correctness. Do not assume a broad speedup over good hand-written JavaScript. |
231
+ | `f32` | Useful as a precision contract. On current V8 it is slower; on SpiderMonkey the same shape can keep near parity when the engine lowers the pattern to float32. |
232
+ | `f64` | Useful mostly as a proof and documentation domain. JavaScript already uses binary64 `number`; extra unary `+` coercions rarely improve performance and can be neutral or slightly negative. |
233
+
234
+ In short: `i32` is the only domain that currently looks broadly practical as a
235
+ performance tool. `u32`, `f32`, and `f64` are still useful when the value domain
236
+ itself matters, but they should be treated as semantic constraints first and
237
+ performance hints second.
238
+
239
+ ## Benchmark Methodology
240
+
241
+ Benchmarks are opt-in:
242
+
243
+ ```bash
244
+ npm run bench
245
+ npm run bench:md
246
+ ```
247
+
248
+ The algebra benchmarks compile a source string containing both:
249
+
250
+ 1. a plain JavaScript `number` baseline; and
251
+ 2. a numtypes-authored implementation.
252
+
253
+ The numtypes implementation is lowered by the real transformer at benchmark load
254
+ time. The benchmark therefore measures actual transformer output, including the
255
+ coercion optimization pass, not hand-written approximations.
256
+
257
+ Each comparison warms both implementations, alternates baseline/typed
258
+ measurement order, discards the first measured sample rounds, and reports the
259
+ median of the retained steady-state rounds. This matters because the first
260
+ optimized runs after dynamic import can still include speculation, tiering, and
261
+ recompilation outliers on V8.
262
+
263
+ `Rel speed` is:
264
+
265
+ ```text
266
+ baseline runtime / transformed runtime
267
+ ```
268
+
269
+ Higher than `1.00x` means the transformed version is faster. Lower than `1.00x`
270
+ means the transformed version is slower.
271
+
272
+ ## Benchmark Snapshot
273
+
274
+ Measured on:
275
+
276
+ ```text
277
+ Node v24.4.1
278
+ V8 13.6.233.10-node.17
279
+ Windows x64
280
+ 2026-06-21
281
+ ```
282
+
283
+ These numbers are not contractual. They are a snapshot of one engine, one CPU,
284
+ one OS, and one set of hot-loop shapes.
285
+
286
+ ### f32: Precision Contract, Not a V8 Speedup
287
+
288
+ The f32 suite compares transformed `Math.fround` kernels against plain f64
289
+ `number` kernels. Results differ by design because f32 has less precision.
290
+
291
+ | Benchmark | Rel speed | Precision |
292
+ | --- | ---: | --- |
293
+ | vec3 dot | 0.39x | ~8e-8 |
294
+ | vec3 cross | 0.18x | ~8e-8 |
295
+ | vec3 normalize | 0.54x | ~9e-8 |
296
+ | mat4 x mat4 | 0.36x | ~1e-7 |
297
+ | mat4 x vec4 | 0.15x | ~9e-8 |
298
+ | quat multiply | 0.15x | ~8e-8 |
299
+ | vec3 lerp | 0.18x | ~9e-8 |
300
+ | ray-triangle | 0.17x | ~1e-5 |
301
+
302
+ Current V8 treats `Math.fround` as real rounding work in these scalar kernels.
303
+ That means f32 source can be 2x to 5x slower than the f64 baseline even though
304
+ the emitted shape is the standard JavaScript way to request binary32 rounding.
305
+
306
+ SpiderMonkey is different. Mozilla's IonMonkey work added an optimization pass
307
+ that recognizes `Float32Array`/`Math.fround` patterns and can emit float32
308
+ operations when the required identities hold. In a local scalar dot-product
309
+ probe, SpiderMonkey 153 kept a `Float32Array` + `Math.fround` variant around
310
+ parity with the f64 `Float64Array` baseline, while V8 13.6 did not.
311
+
312
+ The history matters here. This optimization came from the same period as
313
+ asm.js, Emscripten, and WebGL performance work, but it was not merely an
314
+ asm.js-only fast path. Mozilla's 2013 Float32 write-up describes the work as
315
+ general IonMonkey support for float32 operations, followed by an optimization
316
+ pass that recognizes `Float32Array` and `Math.fround`. The asm.js performance
317
+ article from the same period says the Float32 work was generic and applied to
318
+ any JavaScript that matched the optimizable shape; after that, SpiderMonkey and
319
+ Emscripten added float32 to the asm.js type system so asm.js code could benefit
320
+ from it specifically. In other words, SpiderMonkey's present advantage is best
321
+ understood as an asm.js-era optimization lineage that still benefits ordinary
322
+ JavaScript patterns, not as evidence that the active asm.js/OdinMonkey compiler
323
+ path is required for `Math.fround` to be fast.
324
+
325
+ This makes f32 engine-dependent. It is semantically valuable when you need
326
+ single-precision rounding. It is not currently a V8 performance optimization.
327
+ For V8, real f32 speedups are more likely to come from whole-kernel WebAssembly,
328
+ especially with SIMD, than from scalar `Math.fround` JavaScript.
329
+
330
+ ### f64: Mostly Neutral
331
+
332
+ The f64 suite adds explicit unary `+` proofs around ordinary JavaScript number
333
+ math. Since JavaScript `number` is already binary64, the optimizer usually has
334
+ little to gain.
335
+
336
+ | Benchmark | Rel speed | Precision |
337
+ | --- | ---: | --- |
338
+ | f64 dot4 | 1.00x | exact |
339
+ | f64 dot4 typed-array | 1.01x | exact |
340
+ | horner poly | 0.97x | exact |
341
+ | f64 kahan sum | 1.01x | exact |
342
+ | f64 welford | 1.01x | exact |
343
+ | f64 biquad | 0.98x | exact |
344
+ | f64 black-scholes | 0.99x | exact |
345
+ | f64 newton sqrt | 1.00x | exact |
346
+ | f64 scale-add | 1.04x | exact |
347
+ | f64 recurrence | 1.01x | exact |
348
+ | f64 RK4 logistic | 0.99x | exact |
349
+
350
+ The result is roughly parity. Some cases are slightly faster, some are slightly
351
+ slower, and the differences are small enough that they should not drive API
352
+ design. Use `f64` when the type annotation helps the program express "this must
353
+ be numeric binary64-like data" and when explicit plain-number escapes are useful
354
+ for code review.
355
+
356
+ ### i32: The Most Useful Performance Domain
357
+
358
+ The i32 suite has three families:
359
+
360
+ - overflow-oriented kernels where int32 closure is part of the intended
361
+ algorithm;
362
+ - bounded-number kernels where f64 `number` arithmetic is already exact, but the
363
+ source logically treats values as integers;
364
+ - intentionally naive full-width multiply kernels where `(a * b) | 0` is both
365
+ semantically wrong and slow compared with transformer-generated `Math.imul`.
366
+
367
+ | Benchmark | Rel speed | Precision |
368
+ | --- | ---: | --- |
369
+ | i32 dot4 imul | 1.01x | exact |
370
+ | i32 dot8 imul | 1.02x | exact |
371
+ | i32 fir4 | 1.01x | exact |
372
+ | i32 LCG | 1.04x | exact |
373
+ | i32 fixed IIR | 1.04x | exact |
374
+ | i32 bresenham | 1.05x | exact |
375
+ | i32 xorshift | 1.04x | exact |
376
+ | i32 collatz | 1.05x | exact |
377
+ | i32 affine mix | 1.03x | exact |
378
+ | i32 horner mix | 0.95x | exact |
379
+ | i32 bounded score | 0.90x | exact |
380
+ | i32 bounded smooth | 1.76x | exact |
381
+ | i32 bounded delta | 1.81x | exact |
382
+ | i32 bounded state | 1.04x | exact |
383
+ | i32 bounded poly | 0.85x | exact |
384
+ | i32 bounded dither | 0.87x | exact |
385
+
386
+ i32 is not magic. Bounded small-number code where ordinary f64 arithmetic is
387
+ already exact can still get slower, because int32 closure adds coercion work that
388
+ the baseline does not need. After median-based measurement, those losses are
389
+ much smaller than the first-run outliers, but they are real enough that hot paths
390
+ still need per-engine benchmarking. The general picture is better than the other
391
+ domains: multiply-heavy and TypedArray-heavy kernels often benefit, and
392
+ overflow-oriented integer algorithms become easier to write correctly.
393
+
394
+ The strongest i32 appeal point is the last category. JavaScript code often tries
395
+ to express C-like signed 32-bit multiplication with `(a * b) | 0`. That is only
396
+ valid while the binary64 product still contains the low 32 bits exactly. For
397
+ full-width int32 values, the product can exceed `2^53`, low bits are lost before
398
+ the final `| 0`, and the result is no longer equivalent to 32-bit wraparound
399
+ multiplication. In those cases, `i32(a * b)` is not just a hint; it is a compact
400
+ way to request the correct operation, and the transformer lowers it through
401
+ `Math.imul`.
402
+
403
+ ```typescript
404
+ // Not equivalent to int32 multiplication for full-width inputs.
405
+ const wrong = (a * b) | 0;
406
+
407
+ // Emits Math.imul(a, b), then preserves i32 closure for later operations.
408
+ const right = i32(a * b);
409
+ ```
410
+
411
+ | Benchmark | Rel speed | Precision |
412
+ | --- | ---: | --- |
413
+ | i32 naive LCG | 5.07x | naive wrong (100% differ) |
414
+ | i32 naive fmix | 2.35x | naive wrong (100% differ) |
415
+ | i32 naive FNV-1a | 4.50x | naive wrong (100% differ) |
416
+
417
+ This table is intentionally not a fair "same result, faster code" comparison.
418
+ The baseline is wrong. The point is more practical: numtypes lets source code
419
+ state the intended int32 domain once, then emits the operation shape engines
420
+ already understand for full-width 32-bit multiplication.
421
+
422
+ The additional `Math.imul` probe is important here. On V8 TurboFan with
423
+ TypedArrays, `Math.imul` was much faster than `* | 0` for variable multiply and
424
+ multiply-accumulate loops. On ordinary JS arrays with small integer values,
425
+ `Math.imul` was sometimes slower than `* | 0`. That means `Math.imul` is the
426
+ right conservative lowering for correctness and many hot loops, but a future
427
+ optimizer may want a range-proven escape hatch for small-value normal-array
428
+ code.
429
+
430
+ Focused V8 disassembly checks are useful for interpreting the slower i32 rows.
431
+ With `--print-opt-code`, `Math.imul(x, y)` lowers to the x64 `imull`
432
+ instruction in these hot loops. A hand-written `(x * y) | 0` variant of the
433
+ same `bounded poly` kernel lowered to essentially the same integer multiply
434
+ sequence and was not faster.
435
+
436
+ The clearest slowdown is `i32 bounded poly`:
437
+
438
+ ```typescript
439
+ const x: i32 = i32(input[i] % 1024);
440
+ out[i] = i32((x * x + i32(3) * x + i32(7)) % i32(10007));
441
+ ```
442
+
443
+ The plain baseline is also optimized as integer code because V8 learns that
444
+ `input[i]` is a small non-negative value. Its hot path for `% 10007` uses a
445
+ short unsigned-style magic-multiply sequence:
446
+
447
+ ```asm
448
+ andl rdx,0x3ff
449
+ imull rsi,rdx
450
+ mull rcx
451
+ shrl rdx,12
452
+ imull rdx,rdx,0x2717
453
+ subl rsi,rdx
454
+ ```
455
+
456
+ The i32-lowered version must preserve signed int32 closure and JavaScript `%`
457
+ semantics, so the remainder path carries extra sign correction:
458
+
459
+ ```asm
460
+ andl rdx,0x3ff
461
+ imull rsi,rdx
462
+ imull rcx
463
+ shrl rax,31
464
+ sarl rdx,12
465
+ addl rdx,rax
466
+ imull rdx,rdx,0x2717
467
+ subl rsi,rdx
468
+ ```
469
+
470
+ This is a range-analysis gap. The transformer knows the expression is i32, but
471
+ it does not yet prove that the value is non-negative and small enough for the
472
+ shorter unsigned remainder path. `bounded score` and `bounded dither` are
473
+ different: they do not contain `%`, and V8 also lowers their small multiplications
474
+ or additions to ordinary integer instructions. Their small losses are best
475
+ understood as code-shape and guard/register-allocation effects from explicit
476
+ closure in already-exact bounded arithmetic.
477
+ `i32 horner mix` is near parity when measured in isolation and is not the same
478
+ bounded-remainder issue.
479
+
480
+ ### u32: Correctness First
481
+
482
+ u32 is valuable for bitwise and hash code because unsigned closure matters. The
483
+ performance story is mixed.
484
+
485
+ | Benchmark | Rel speed | Precision |
486
+ | --- | ---: | --- |
487
+ | u32 LCG | 0.99x | exact |
488
+ | xorshift32 | 1.02x | exact |
489
+ | xorshift*32 | 1.00x | exact |
490
+ | mulberry32 correct | 0.99x | exact |
491
+ | wang hash32 | 0.96x | exact |
492
+ | murmur3 block | 0.95x | exact |
493
+ | jenkins oat | 0.99x | exact |
494
+ | CRC32 bitwise | 1.01x | exact |
495
+ | Adler-32 | 1.01x | exact |
496
+ | FNV-1a bytes | 1.02x | exact |
497
+ | u32 bounded prefix | 1.01x | exact |
498
+ | u32 bounded checksum | 1.10x | exact |
499
+ | u32 bounded counter | 0.99x | exact |
500
+ | u32 bounded pair hash | 1.23x | exact |
501
+ | u32 bounded rgb pack | 0.99x | exact |
502
+ | u32 bounded unpack sum | 2.74x | exact |
503
+
504
+ Some u32 cases are faster, especially when generated coercions are optimized
505
+ away in tight bounded loops. Other cases are neutral or slower. For real hashing
506
+ code, the more important point is often correctness: naive `number`
507
+ multiplication silently loses low bits when products exceed `2^53`.
508
+
509
+ | Benchmark | Rel speed | Precision |
510
+ | --- | ---: | --- |
511
+ | mulberry32 | 2.21x | naive wrong (100% differ) |
512
+ | murmur3 fmix | 2.10x | naive wrong (100% differ) |
513
+
514
+ The second table compares against intentionally naive baselines. It is not a
515
+ fair speed comparison because the baseline is wrong. It shows why `Math.imul`
516
+ exists: a plain `*` implementation can be fast or slow, but it is not equivalent
517
+ for full-width 32-bit integer multiplication.
518
+
519
+ ## Engine Notes
520
+
521
+ ### `Math.imul`
522
+
523
+ ECMAScript defines `Math.imul(x, y)` as 32-bit multiplication modulo `2^32`,
524
+ returned as a signed int32. MDN describes it as C-like 32-bit integer
525
+ multiplication, and the TC39 integer-math proposal notes that engines can infer
526
+ some integer optimizations but that explicit source operations are useful when
527
+ inference is not practical.
528
+
529
+ For numtypes, this means `i32(a * b)` and `u32(a * b)` lower through
530
+ `Math.imul` when both operands are integer-domain values. Rewriting every
531
+ integer multiply to `*` would be incorrect for full 32-bit values.
532
+
533
+ ### `Math.fround`
534
+
535
+ ECMAScript defines `Math.fround` as binary64 input, binary32 conversion with
536
+ round-to-nearest ties-to-even, then binary64 output. The return value is still a
537
+ JavaScript `number`; the operation just forces single-precision rounding at that
538
+ point.
539
+
540
+ SpiderMonkey's historical float32 optimization is designed around this shape:
541
+ `Float32Array` values and `Math.fround` calls provide enough information for the
542
+ JIT to use float32 operations when doing so preserves the required rounded
543
+ result. V8 currently does not show the same scalar speedup in this benchmark
544
+ set, even when forced to TurboFan.
545
+
546
+ The code-history trail is:
547
+
548
+ - [Bug 888109](https://bugzilla.mozilla.org/show_bug.cgi?id=888109),
549
+ "IonMonkey: Introduce Float32 general optimization", added the general
550
+ Float32 optimization work in the IonMonkey backend. The bug description
551
+ explicitly targets lowering `Float32Array` arithmetic from double
552
+ load/convert/add/store sequences to float32 assembly instructions.
553
+ - [Efficient float32 arithmetic in JavaScript](https://blog.mozilla.org/javascript/2013/11/07/efficient-float32-arithmetic-in-javascript/)
554
+ describes the implementation sequence: add general float32 support to
555
+ IonMonkey, add `Math.fround`, then add a recognition pass for
556
+ `Float32Array`/`Math.fround` patterns.
557
+ - [Gap between asm.js and native performance gets even narrower with float32 optimizations](https://hacks.mozilla.org/2013/12/gap-between-asm-js-and-native-performance-gets-even-narrower-with-float32-optimizations/)
558
+ connects that generic Float32 work back to asm.js and Emscripten: once the
559
+ generic optimization existed, asm.js gained a float32 type path so compiled
560
+ C/C++ float-heavy code could benefit.
561
+ - [Saying goodbye to asm.js](https://spidermonkey.dev/blog/2026/05/20/saying-goodbye-to-asmjs.html)
562
+ says SpiderMonkey's asm.js optimizations are disabled by default as of
563
+ Firefox 148 and planned for removal. That makes it important not to describe
564
+ current `Math.fround` behavior as relying on the active asm.js compiler. The
565
+ safer claim is that the optimization comes from the asm.js-era Float32 work
566
+ and remains relevant to ordinary JavaScript shapes.
567
+
568
+ ### Unary `+`
569
+
570
+ Unary `+` proves "number-ness" at runtime. It can prevent accidental string
571
+ concatenation or object coercion from crossing into a numeric region, but it
572
+ does not create a narrower machine type than JavaScript's normal binary64
573
+ `number`. The optimizer can often remove redundant unary `+`, but the benchmark
574
+ results show that it should not be expected to produce a broad speedup.
575
+
576
+ ## Optimization Pass
577
+
578
+ The initial lowering pass is intentionally conservative and may emit more
579
+ coercions than a human would write. A later optimization pass removes only
580
+ coercions generated by numtypes, not coercions the user wrote by hand.
581
+
582
+ This generated-only rule matters. If the user writes:
583
+
584
+ ```typescript
585
+ const x = value | 0;
586
+ ```
587
+
588
+ the optimizer preserves it. If numtypes generated the same coercion as part of a
589
+ closed expression and can prove it redundant, it may remove it.
590
+
591
+ The optimizer currently handles cases such as:
592
+
593
+ - repeated closure of already-closed local values;
594
+ - closure propagation through selected arithmetic and bitwise expressions;
595
+ - u32 mask and modulo producer patterns;
596
+ - optional TypedArray element access proofs when
597
+ `optimizeTypedArrayElementAccess` is enabled.
598
+
599
+ See [docs/lowering-optimization-spec.md](docs/lowering-optimization-spec.md) for
600
+ the exact proof rules.
601
+
602
+ ## Practical Usage Rules
603
+
604
+ Use `i32` when:
605
+
606
+ - the algorithm intentionally uses signed 32-bit overflow;
607
+ - multiplication must have C-like 32-bit semantics;
608
+ - the current code relies on `(a * b) | 0` and the operands may be full-width
609
+ int32 values;
610
+ - the hot loop is TypedArray-heavy or integer-heavy;
611
+ - you can benchmark the transformed output on your target engine.
612
+
613
+ Use `u32` when:
614
+
615
+ - unsigned range and bitwise behavior are part of correctness;
616
+ - the code implements hashes, checksums, PRNGs, encoders, or binary formats;
617
+ - performance is measured rather than assumed.
618
+
619
+ Use `f32` when:
620
+
621
+ - single-precision rounding is semantically meaningful;
622
+ - compatibility with WebGL, binary formats, or float32 data pipelines matters;
623
+ - your target engine is known to optimize the `Math.fround` pattern, or you
624
+ accept the V8 slowdown as the cost of precision control.
625
+
626
+ Use `f64` when:
627
+
628
+ - you want explicit numeric boundaries in TypeScript;
629
+ - you want declaration-level documentation that a value is meant to stay in the
630
+ numeric domain;
631
+ - you are not expecting a consistent speedup from unary `+`.
632
+
633
+ Avoid numtypes when:
634
+
635
+ - the code is ordinary application arithmetic where readability matters more
636
+ than numeric-domain control;
637
+ - the hot path is not benchmarked;
638
+ - the algorithm would be clearer or faster as WebAssembly/SIMD.
639
+
640
+ ## References
641
+
642
+ - [ECMAScript `Math.imul`](https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-math.imul)
643
+ - [ECMAScript `Math.fround`](https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-math.fround)
644
+ - [MDN: `Math.imul()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/imul)
645
+ - [MDN: `Math.fround()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/fround)
646
+ - [Mozilla JavaScript Blog: Efficient float32 arithmetic in JavaScript](https://blog.mozilla.org/javascript/2013/11/07/efficient-float32-arithmetic-in-javascript/)
647
+ - [V8 docs: TurboFan](https://v8.dev/docs/turbofan)
648
+ - [TC39 proposal: Modulus and Additional Integer Math](https://github.com/tc39/proposal-integer-and-modulus-math)
649
+
650
+ ## License
651
+
652
+ MIT OR Apache-2.0
@@ -0,0 +1,22 @@
1
+ declare const i32Brand: unique symbol;
2
+ declare const u32Brand: unique symbol;
3
+ declare const f32Brand: unique symbol;
4
+ declare const f64Brand: unique symbol;
5
+ export type i32 = number & {
6
+ readonly [i32Brand]: never;
7
+ };
8
+ export type u32 = number & {
9
+ readonly [u32Brand]: never;
10
+ };
11
+ export type f32 = number & {
12
+ readonly [f32Brand]: never;
13
+ };
14
+ export type f64 = number & {
15
+ readonly [f64Brand]: never;
16
+ };
17
+ export declare function i32(value: number): i32;
18
+ export declare function u32(value: number): u32;
19
+ export declare function f32(value: number): f32;
20
+ export declare function f64(value: number): f64;
21
+ export {};
22
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/lib/index.ts"],"names":[],"mappings":"AAAA,OAAO,CAAC,MAAM,QAAQ,EAAE,OAAO,MAAM,CAAC;AACtC,OAAO,CAAC,MAAM,QAAQ,EAAE,OAAO,MAAM,CAAC;AACtC,OAAO,CAAC,MAAM,QAAQ,EAAE,OAAO,MAAM,CAAC;AACtC,OAAO,CAAC,MAAM,QAAQ,EAAE,OAAO,MAAM,CAAC;AAEtC,MAAM,MAAM,GAAG,GAAG,MAAM,GAAG;IACzB,QAAQ,CAAC,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,GAAG,GAAG,MAAM,GAAG;IACzB,QAAQ,CAAC,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,GAAG,GAAG,MAAM,GAAG;IACzB,QAAQ,CAAC,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,GAAG,GAAG,MAAM,GAAG;IACzB,QAAQ,CAAC,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;CAC5B,CAAC;AAEF,MAAM,CAAC,OAAO,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC;AAEhD,MAAM,CAAC,OAAO,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC;AAEhD,MAAM,CAAC,OAAO,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC;AAEhD,MAAM,CAAC,OAAO,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/lib/index.ts"],"names":[],"mappings":""}