numbl 0.3.0 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/binding.gyp +16 -33
  2. package/dist-cli/cli.js +20535 -25629
  3. package/dist-lib/graphics/types.d.ts +22 -0
  4. package/dist-lib/lib.d.ts +1 -0
  5. package/dist-lib/lib.js +59075 -63895
  6. package/dist-lib/numbl-core/executeCode.d.ts +11 -14
  7. package/dist-lib/numbl-core/executors/cJit/builtins.d.ts +30 -0
  8. package/dist-lib/numbl-core/executors/cJit/chainCodegen.d.ts +59 -0
  9. package/dist-lib/numbl-core/executors/cJit/chainExecutor.d.ts +27 -0
  10. package/dist-lib/numbl-core/executors/cJit/chainPass.d.ts +42 -0
  11. package/dist-lib/numbl-core/executors/cJit/codegen.d.ts +44 -0
  12. package/dist-lib/numbl-core/executors/cJit/compile.d.ts +45 -0
  13. package/dist-lib/numbl-core/executors/cJit/elemwiseCodegen.d.ts +23 -0
  14. package/dist-lib/numbl-core/executors/cJit/elemwiseStructural.d.ts +33 -0
  15. package/dist-lib/numbl-core/executors/cJit/fuseAnalyze.d.ts +39 -0
  16. package/dist-lib/numbl-core/executors/cJit/fuseCodegen.d.ts +16 -0
  17. package/dist-lib/numbl-core/executors/cJit/fuseExecutor.d.ts +28 -0
  18. package/dist-lib/numbl-core/executors/cJit/loopExecutor.d.ts +32 -0
  19. package/dist-lib/numbl-core/executors/cJit/register.d.ts +10 -0
  20. package/dist-lib/numbl-core/executors/cJit/whitelist.d.ts +15 -0
  21. package/dist-lib/numbl-core/executors/cache.d.ts +26 -0
  22. package/dist-lib/numbl-core/executors/context.d.ts +76 -0
  23. package/dist-lib/numbl-core/executors/index.d.ts +17 -0
  24. package/dist-lib/numbl-core/executors/jsJit/callExecutor.d.ts +25 -0
  25. package/dist-lib/numbl-core/{jit/js → executors/jsJit/codegen}/jitCodegen.d.ts +2 -2
  26. package/dist-lib/numbl-core/{jit/js → executors/jsJit/codegen}/jitCodegenHoist.d.ts +1 -1
  27. package/dist-lib/numbl-core/{jit/js → executors/jsJit/codegen}/jsMultiReduction.d.ts +10 -13
  28. package/dist-lib/numbl-core/executors/jsJit/helpers/alloc.d.ts +12 -0
  29. package/dist-lib/numbl-core/{jit/js → executors/jsJit/helpers}/jitHelpers.d.ts +2 -2
  30. package/dist-lib/numbl-core/{jit/js → executors/jsJit/helpers}/jitHelpersComplex.d.ts +1 -1
  31. package/dist-lib/numbl-core/executors/jsJit/helpers/jitHelpersIndex.d.ts +33 -0
  32. package/dist-lib/numbl-core/{jit/js → executors/jsJit/helpers}/jitHelpersTensor.d.ts +7 -7
  33. package/dist-lib/numbl-core/executors/jsJit/jitCall.d.ts +59 -0
  34. package/dist-lib/numbl-core/executors/jsJit/jitLoop.d.ts +53 -0
  35. package/dist-lib/numbl-core/executors/jsJit/jitTopLevel.d.ts +44 -0
  36. package/dist-lib/numbl-core/executors/jsJit/loopExecutor.d.ts +15 -0
  37. package/dist-lib/numbl-core/{jit/jitLoopAnalysis.d.ts → executors/jsJit/lower/blockAnalysis.d.ts} +5 -5
  38. package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitBailSafety.d.ts +1 -1
  39. package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitLower.d.ts +18 -4
  40. package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitLowerExpr.d.ts +11 -2
  41. package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitLowerStmt.d.ts +2 -2
  42. package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitLowerTypes.d.ts +2 -2
  43. package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/scalarEmit.d.ts +2 -2
  44. package/dist-lib/numbl-core/executors/jsJit/shared.d.ts +120 -0
  45. package/dist-lib/numbl-core/executors/jsJit/topLevelExecutor.d.ts +17 -0
  46. package/dist-lib/numbl-core/executors/lowering.d.ts +166 -0
  47. package/dist-lib/numbl-core/executors/plugins.d.ts +39 -0
  48. package/dist-lib/numbl-core/executors/registry.d.ts +148 -0
  49. package/dist-lib/numbl-core/executors/types.d.ts +103 -0
  50. package/dist-lib/numbl-core/functionResolve.d.ts +7 -0
  51. package/dist-lib/numbl-core/helpers/check-helpers.d.ts +4 -5
  52. package/dist-lib/numbl-core/helpers/linsolve.d.ts +2 -3
  53. package/dist-lib/numbl-core/helpers/prng.d.ts +1 -2
  54. package/dist-lib/numbl-core/interpreter/builtins/datetime.d.ts +2 -1
  55. package/dist-lib/numbl-core/interpreter/builtins/misc.d.ts +4 -1
  56. package/dist-lib/numbl-core/interpreter/builtins/types.d.ts +4 -91
  57. package/dist-lib/numbl-core/interpreter/interpreter.d.ts +33 -47
  58. package/dist-lib/numbl-core/interpreter/interpreterSpecialBuiltins.d.ts +6 -3
  59. package/dist-lib/numbl-core/interpreter/types.d.ts +27 -12
  60. package/dist-lib/numbl-core/{jit/jitTypes.d.ts → jitTypes.d.ts} +15 -1
  61. package/dist-lib/numbl-core/jsUserFunctions.d.ts +8 -0
  62. package/dist-lib/numbl-core/lowering/loweringContext.d.ts +24 -0
  63. package/dist-lib/numbl-core/native/lapack-bridge.d.ts +3 -3
  64. package/dist-lib/numbl-core/parser/types.d.ts +20 -0
  65. package/dist-lib/numbl-core/runtime/constructors.d.ts +6 -6
  66. package/dist-lib/numbl-core/runtime/cow.d.ts +33 -0
  67. package/dist-lib/numbl-core/runtime/index.d.ts +3 -2
  68. package/dist-lib/numbl-core/runtime/indexing.d.ts +6 -1
  69. package/dist-lib/numbl-core/runtime/plotBuiltinDispatch.d.ts +86 -0
  70. package/dist-lib/numbl-core/runtime/plotUtils.d.ts +17 -2
  71. package/dist-lib/numbl-core/runtime/refcount.d.ts +85 -0
  72. package/dist-lib/numbl-core/runtime/runtime.d.ts +27 -66
  73. package/dist-lib/numbl-core/runtime/runtimeDispatch.d.ts +2 -2
  74. package/dist-lib/numbl-core/runtime/runtimeIndexing.d.ts +2 -2
  75. package/dist-lib/numbl-core/runtime/runtimeMemberAccess.d.ts +1 -1
  76. package/dist-lib/numbl-core/runtime/runtimePlot.d.ts +1 -0
  77. package/dist-lib/numbl-core/runtime/struct-access.d.ts +2 -1
  78. package/dist-lib/numbl-core/runtime/types.d.ts +104 -62
  79. package/dist-lib/numbl-core/runtime/utils.d.ts +2 -8
  80. package/dist-lib/numbl-core/version.d.ts +1 -1
  81. package/dist-plot-viewer/assets/index-COAM8o1E.js +4426 -0
  82. package/dist-plot-viewer/index.html +1 -1
  83. package/native/lapack_linsolve.cpp +1 -1
  84. package/native/numbl_addon_common.h +2 -2
  85. package/native/ops/comparison.c +1 -1
  86. package/package.json +3 -6
  87. package/dist-lib/numbl-core/jit/c/abi.d.ts +0 -90
  88. package/dist-lib/numbl-core/jit/c/assemble.d.ts +0 -56
  89. package/dist-lib/numbl-core/jit/c/classify.d.ts +0 -70
  90. package/dist-lib/numbl-core/jit/c/compile.d.ts +0 -37
  91. package/dist-lib/numbl-core/jit/c/context.d.ts +0 -152
  92. package/dist-lib/numbl-core/jit/c/emit/assign.d.ts +0 -20
  93. package/dist-lib/numbl-core/jit/c/emit/complexScalar.d.ts +0 -18
  94. package/dist-lib/numbl-core/jit/c/emit/fused.d.ts +0 -42
  95. package/dist-lib/numbl-core/jit/c/emit/helpers.d.ts +0 -40
  96. package/dist-lib/numbl-core/jit/c/emit/index.d.ts +0 -14
  97. package/dist-lib/numbl-core/jit/c/emit/scalar.d.ts +0 -23
  98. package/dist-lib/numbl-core/jit/c/emit/stmt.d.ts +0 -25
  99. package/dist-lib/numbl-core/jit/c/emit/tensor.d.ts +0 -127
  100. package/dist-lib/numbl-core/jit/c/emit/userCall.d.ts +0 -58
  101. package/dist-lib/numbl-core/jit/c/epilogue.d.ts +0 -26
  102. package/dist-lib/numbl-core/jit/c/feasibility.d.ts +0 -44
  103. package/dist-lib/numbl-core/jit/c/prelude.d.ts +0 -37
  104. package/dist-lib/numbl-core/jit/c/visit.d.ts +0 -63
  105. package/dist-lib/numbl-core/jit/e1/complexKernelEmit.d.ts +0 -46
  106. package/dist-lib/numbl-core/jit/e1/hash.d.ts +0 -10
  107. package/dist-lib/numbl-core/jit/e1/install.d.ts +0 -13
  108. package/dist-lib/numbl-core/jit/e1/kernelEmit.d.ts +0 -54
  109. package/dist-lib/numbl-core/jit/e1/multiReductionKernel.d.ts +0 -66
  110. package/dist-lib/numbl-core/jit/e1/openmpFlag.d.ts +0 -13
  111. package/dist-lib/numbl-core/jit/e1/scalarFnKernel.d.ts +0 -44
  112. package/dist-lib/numbl-core/jit/e2/assignKernel.d.ts +0 -34
  113. package/dist-lib/numbl-core/jit/e2/astToJitExpr.d.ts +0 -25
  114. package/dist-lib/numbl-core/jit/e2/cache.d.ts +0 -80
  115. package/dist-lib/numbl-core/jit/e2/chainKernelEmit.d.ts +0 -55
  116. package/dist-lib/numbl-core/jit/e2/classify.d.ts +0 -119
  117. package/dist-lib/numbl-core/jit/e2/compileFn.d.ts +0 -16
  118. package/dist-lib/numbl-core/jit/e2/complexChainKernelEmit.d.ts +0 -79
  119. package/dist-lib/numbl-core/jit/e2/emitShared.d.ts +0 -71
  120. package/dist-lib/numbl-core/jit/e2/install.d.ts +0 -11
  121. package/dist-lib/numbl-core/jit/e2/liveness.d.ts +0 -29
  122. package/dist-lib/numbl-core/jit/e2/loopKernel.d.ts +0 -49
  123. package/dist-lib/numbl-core/jit/e2/loopKernelEmit.d.ts +0 -75
  124. package/dist-lib/numbl-core/jit/e2/multiReductionDriver.d.ts +0 -24
  125. package/dist-lib/numbl-core/jit/e2/reductionKernelEmit.d.ts +0 -72
  126. package/dist-lib/numbl-core/jit/e2/scalarFnDriver.d.ts +0 -29
  127. package/dist-lib/numbl-core/jit/fusedChainHelpers.d.ts +0 -65
  128. package/dist-lib/numbl-core/jit/fusedScalarEmit.d.ts +0 -69
  129. package/dist-lib/numbl-core/jit/fusion.d.ts +0 -71
  130. package/dist-lib/numbl-core/jit/fusionOps.d.ts +0 -25
  131. package/dist-lib/numbl-core/jit/heavyOps.d.ts +0 -15
  132. package/dist-lib/numbl-core/jit/index.d.ts +0 -7
  133. package/dist-lib/numbl-core/jit/jitLoop.d.ts +0 -25
  134. package/dist-lib/numbl-core/jit/jitTopLevel.d.ts +0 -22
  135. package/dist-lib/numbl-core/jit/js/jitHelpersIndex.d.ts +0 -33
  136. package/dist-lib/numbl-core/jit/js/jsFusedCodegen.d.ts +0 -17
  137. package/dist-lib/numbl-core/runtime/alloc.d.ts +0 -23
  138. package/dist-plot-viewer/assets/index-GiUNnMQg.js +0 -4426
  139. package/native/jit_runtime/jit_runtime.c +0 -261
  140. package/native/jit_runtime/jit_runtime.h +0 -204
@@ -1,44 +0,0 @@
1
- /**
2
- * e1 (experimental) — whole-function scalar kernel emission.
3
- *
4
- * Complements [kernelEmit.ts](./kernelEmit.ts) (which handles tensor
5
- * fusible chains) by covering the other big win case: a user function
6
- * that is entirely scalar arithmetic — e.g. the inner loop of a
7
- * Horner-style series, a Runge-Kutta step on a handful of doubles,
8
- * benchmarks/scalar_bench.m's `run_bench(N, M)`.
9
- *
10
- * Under `--opt e1`, when a JIT-able function's signature and body are
11
- * purely scalar, we call `generateC()` (the same emitter the C-JIT
12
- * uses at `--opt 2`) and wrap its output with a thin inline JS
13
- * function that shells out to `$h.compileKernel(...)`. The C source
14
- * and koffi signature are inlined as JS string literals, so
15
- * `--dump-js` shows the complete picture.
16
- *
17
- * Scope for the prototype:
18
- * - All params are scalar doubles / booleans (CParamDesc.kind === "scalar")
19
- * - All outputs are scalar / boolean (COutputDesc.kind === "scalar" | "boolean")
20
- * - No tic/toc, no Index reads (no errFlag), no disp(...) calls
21
- *
22
- * Anything outside that envelope returns `null` and the caller falls
23
- * back to the plain JS-JIT path, which still benefits from e1's
24
- * per-chain tensor kernels.
25
- */
26
- import type { FunctionDef } from "../../interpreter/types.js";
27
- import type { JitStmt, JitType } from "../jitTypes.js";
28
- import type { GeneratedFn } from "../jitLower.js";
29
- import type { Interpreter } from "../../interpreter/interpreter.js";
30
- export interface ScalarFnKernelResult {
31
- /** The inline-compileKernel JS source. The JIT caller splices this
32
- * in place of the normal JS-JIT body. */
33
- jsSource: string;
34
- /** Content-addressed kernel name from generateC, for logging. */
35
- kernelName: string;
36
- /** Raw C source (also embedded in `jsSource` as a string literal).
37
- * Exposed for `--dump-c` / logging. */
38
- cSource: string;
39
- }
40
- /**
41
- * Try to emit a whole-function scalar kernel for the given lowered IR.
42
- * Returns null when the function is not a pure-scalar candidate.
43
- */
44
- export declare function tryEmitScalarFnKernel(interp: Interpreter, fn: FunctionDef, body: JitStmt[], outputNames: string[], localVars: Set<string>, outputType: JitType | null, outputTypes: JitType[], argTypes: JitType[], nargout: number, generatedIRBodies: Map<string, GeneratedFn>): ScalarFnKernelResult | null;
@@ -1,34 +0,0 @@
1
- /**
2
- * e2 — per-assign / chain kernel driver.
3
- *
4
- * Entry point `tryE2Assign` is called from `interpreterExec.ts` for
5
- * every `Assign` statement when `interp.experimental === "e2"`.
6
- *
7
- * Multi-LHS chain detection: scans consecutive suppressed Assigns
8
- * regardless of LHS name. For each chain LHS, uses scope-body liveness
9
- * (via `interp._currentScopeBody`) to decide whether the LHS escapes
10
- * (materializes as an `out_<name>` buffer) or is purely chain-local
11
- * (kept as a per-element stack-local). Reads of a chain LHS before
12
- * its first assign in the chain become `in_<name>` parameters.
13
- *
14
- * On success:
15
- * - Single chain assign: handled like a one-stmt chain.
16
- * - Multi-stmt chain: one C kernel runs all assigns, only escape
17
- * LHSs materialize back to env. `interp._e2ChainAdvance` is set
18
- * so the surrounding loop skips the consumed sibling stmts.
19
- *
20
- * Compilation failures are hard errors (RuntimeError). Classification
21
- * bails (non-classifiable RHS, mismatched lengths, etc.) silently fall
22
- * through to the regular interpreter path.
23
- */
24
- import type { Stmt } from "../../parser/types.js";
25
- import type { Interpreter } from "../../interpreter/interpreter.js";
26
- /**
27
- * Try to compile a chain (1+ stmts) starting at `stmt`. Returns true
28
- * on success — `interp._e2ChainAdvance` is set to the count of EXTRA
29
- * sibling stmts the kernel consumed (0 for a single-stmt chain).
30
- * Returns false to fall back to the regular interpreter path.
31
- */
32
- export declare function tryE2Assign(interp: Interpreter, stmt: Stmt & {
33
- type: "Assign";
34
- }): boolean;
@@ -1,25 +0,0 @@
1
- /**
2
- * e2 — minimal AST `Expr` → `JitExpr` lowerer.
3
- *
4
- * Only handles the whitelist that `classify.ts` accepts: Number, Ident,
5
- * whitelisted Binary/Unary, whitelisted FuncCall. Types are read from
6
- * the live runtime environment (the caller passes in a per-name
7
- * `JitType` lookup), so there's no cross-branch unification.
8
- *
9
- * The classifier already replaced opaque subtrees with synthetic Ident
10
- * nodes whose `name` is also in `envTypes`, so this lowerer doesn't need
11
- * to know about opacity — every Ident it sees has a known runtime type.
12
- */
13
- import type { Expr } from "../../parser/types.js";
14
- import type { JitExpr, JitType } from "../jitTypes.js";
15
- export declare class E2LowerError extends Error {
16
- }
17
- export interface LowerOptions {
18
- /** When a `FuncCall{name, args}` has `name` in `envTypes` as a tensor,
19
- * treat it as tensor indexing and lower to an `Index` node instead
20
- * of looking up a builtin. Used by the e2 whole-loop kernel — the
21
- * chain emitters don't set this (their classifier has already
22
- * marked tensor-access FuncCalls as opaque). */
23
- resolveFuncCallAsTensorIndex?: boolean;
24
- }
25
- export declare function lowerAstToJitExpr(expr: Expr, envTypes: ReadonlyMap<string, JitType>, options?: LowerOptions): JitExpr;
@@ -1,80 +0,0 @@
1
- /**
2
- * e2 — per-AST-node compiled-kernel cache.
3
- *
4
- * Each AST `Expr` (the RHS of an `Assign` we've seen at least once) maps
5
- * to a per-signature cache: the same expression visited with a different
6
- * runtime type signature produces a different specialization. The
7
- * signature includes input names, scalar-vs-tensor, complex-or-not, and
8
- * the LHS name (since the kernel hard-codes which output to write).
9
- *
10
- * The Map is keyed by the AST node identity, not by source text — two
11
- * identical-looking `r = r .* y` statements at different file:line
12
- * positions get separate cache entries, so a recompile from a
13
- * different call-site doesn't poison earlier ones.
14
- *
15
- * The cache holds either an `E2CacheEntry` or the `E2_BAILED` sentinel
16
- * indicating that classification or compilation failed for this
17
- * signature; the sentinel prevents re-attempting the same hopeless
18
- * lowering on every invocation.
19
- */
20
- import type { Stmt, BinaryOperation } from "../../parser/types.js";
21
- export type CompiledKernelFn = (...args: unknown[]) => unknown;
22
- export declare const E2_BAILED: unique symbol;
23
- export interface E2ReductionInfo {
24
- /** Reduction op name (sum / prod / max / min / mean / any / all). */
25
- reduceName: string;
26
- /** Accumulator variable name in env. */
27
- accName: string;
28
- /** When true, the source pattern was `acc = acc OP reduce(...)`;
29
- * the driver applies the same OP to combine the kernel's scalar
30
- * output with the existing env value of `acc`. When false, the
31
- * source pattern was `acc = reduce(...)` and the kernel output is
32
- * written directly. */
33
- hasAccumulate: boolean;
34
- /** Only meaningful when `hasAccumulate` is true. */
35
- accOp?: BinaryOperation;
36
- }
37
- /** Complex-path partitioning info. Present iff the kernel was compiled
38
- * via the paired-buffer complex emitter. The driver uses these lists
39
- * to marshal complex tensors (two pointers per tensor), complex
40
- * scalars (two doubles per scalar), and to allocate complex output
41
- * buffers (data + imag Float64Arrays). */
42
- export interface E2ComplexInfo {
43
- complexTensorNames: string[];
44
- realTensorNames: string[];
45
- complexInputLhsNames: string[];
46
- realInputLhsNames: string[];
47
- complexScalarNames: string[];
48
- realScalarNames: string[];
49
- complexEscapeLhsNames: string[];
50
- realEscapeLhsNames: string[];
51
- }
52
- export interface E2CacheEntry {
53
- fn: CompiledKernelFn;
54
- /** Env tensor input names (combined — for diagnostics). When
55
- * `complex` is defined, the complex marshaling code uses
56
- * `complex.complexTensorNames` and `complex.realTensorNames`
57
- * instead of this. */
58
- tensorNames: string[];
59
- /** Chain LHS names that need `in_<name>` (between tensors and scalars). */
60
- inputLhsNames: string[];
61
- /** Ordered scalar input names. */
62
- scalarNames: string[];
63
- /** Chain LHS names that materialize via `out_<name>` (escape names). */
64
- escapeLhsNames: string[];
65
- /** Number of chain assigns this entry encodes (0 for a standalone
66
- * reduction kernel, 1 for a single-assign chain kernel, >=2 for
67
- * multi-stmt chains). */
68
- chainLength: number;
69
- /** Set when the kernel produces a trailing scalar reduction output.
70
- * The driver allocates a `Float64Array(1)` for `out_acc`, calls the
71
- * kernel, then combines the result with `env[accName]` per the
72
- * `accOp` and `hasAccumulate` fields. Complex chains never set this
73
- * — the complex emitter rejects trailing reductions. */
74
- reduction?: E2ReductionInfo;
75
- /** Paired-buffer complex path info. When set, the marshaling code
76
- * takes the complex branch. */
77
- complex?: E2ComplexInfo;
78
- }
79
- export declare function chainCacheGet(firstStmt: Stmt, sig: string): E2CacheEntry | typeof E2_BAILED | undefined;
80
- export declare function chainCacheSet(firstStmt: Stmt, sig: string, entry: E2CacheEntry | typeof E2_BAILED): void;
@@ -1,55 +0,0 @@
1
- /**
2
- * e2 — multi-LHS fused chain C kernel emission.
3
- *
4
- * Given a sequence of `JitExpr` RHSs each writing to a (possibly
5
- * distinct) chain LHS, produces one C function that runs every assign
6
- * in a single per-element loop. Each chain LHS becomes a stack-local
7
- * `double <name>` declared once at the top of the loop body. Within
8
- * the body, references to a chain-LHS name resolve to the stack-local
9
- * once the corresponding assign has run; before that point they
10
- * resolve to `in_<name>[i]` (so the kernel signature includes
11
- * `in_<lhsName>` for any chain LHS that's read before being written).
12
- *
13
- * After the per-iter assigns, every "escape" LHS (one that's actually
14
- * referenced by the rest of the function body) gets written to its
15
- * `out_<name>[i]` pointer. Chain-locals (only used inside the chain)
16
- * are dropped at the end of the iteration with no buffer materialized.
17
- *
18
- * void e2c_<hash>(int64_t n,
19
- * const double *in_<input1>, ...,
20
- * [const double *in_<lhs_needing_input>, ...,]
21
- * double s_<scalar1>, ...,
22
- * double *out_<escape_lhs1>, ...)
23
- * {
24
- * #pragma omp simd
25
- * for (int64_t i = 0; i < n; i++) {
26
- * double <chain_lhs1>, <chain_lhs2>, ...;
27
- * <chain_lhs1> = <stmt0_rhs_C>;
28
- * <chain_lhs2> = <stmt1_rhs_C>;
29
- * ...
30
- * out_<escape_lhs1>[i] = <escape_lhs1>;
31
- * out_<escape_lhs2>[i] = <escape_lhs2>;
32
- * }
33
- * }
34
- */
35
- import { type ChainAssignSpec, type KernelInputs } from "./emitShared.js";
36
- export type { ChainAssignSpec } from "./emitShared.js";
37
- export interface E2ChainEmitResult {
38
- kernelName: string;
39
- cSource: string;
40
- koffiSig: string;
41
- hash: string;
42
- /** Tensor input names in signature order — does NOT include any
43
- * in_<lhs> entries. */
44
- inputTensors: string[];
45
- /** Chain LHS names that appear as `in_<name>` in the signature, in
46
- * order. */
47
- inputLhsNames: string[];
48
- /** Scalar input names in signature order. */
49
- inputScalars: string[];
50
- /** Chain LHS names that appear as `out_<name>` in the signature,
51
- * in order. */
52
- escapeLhsNames: string[];
53
- chainLength: number;
54
- }
55
- export declare function emitE2ChainKernel(assigns: ChainAssignSpec[], inputs: KernelInputs, par?: boolean): E2ChainEmitResult;
@@ -1,119 +0,0 @@
1
- /**
2
- * e2 (experimental) — per-assign expression classifier.
3
- *
4
- * Walks an AST `Expr` and decides whether it can be compiled into a
5
- * single per-element C kernel. The classifier never evaluates anything
6
- * — it only inspects the AST shape and the names referenced.
7
- *
8
- * Whitelist:
9
- * - Number, Ident
10
- * - Binary with arithmetic / comparison ops
11
- * - Unary Plus, Minus, Not
12
- * - FuncCall to a whitelisted scalar math builtin
13
- *
14
- * Anything outside the whitelist is recorded as an "opaque root": the
15
- * driver is expected to evaluate that subtree via the interpreter and
16
- * bind the result to a fresh synthetic name, then re-classify with that
17
- * name in scope.
18
- *
19
- * The classifier returns a list of opaque-root subtrees and the
20
- * "rewritten" expression that uses synthetic names where the opaque
21
- * roots used to be. The driver is responsible for runtime type checks
22
- * and for actually evaluating the opaque subtrees.
23
- */
24
- import type { Expr } from "../../parser/types.js";
25
- import { BinaryOperation } from "../../parser/types.js";
26
- export { BinaryOperation } from "../../parser/types.js";
27
- /** Scalar math builtins that map cleanly to C99. Mirrors the JS-JIT
28
- * Math.* table plus pow / hypot / atan2 / etc. */
29
- export declare const E2_BUILTIN_WHITELIST: ReadonlySet<string>;
30
- /** One opaque subtree the driver must evaluate before invoking the
31
- * kernel. The classifier replaces it in `emittableExpr` with an Ident
32
- * named `syntheticName`. */
33
- export interface OpaqueRoot {
34
- syntheticName: string;
35
- expr: Expr;
36
- }
37
- export interface ClassifyResult {
38
- /** AST with opaque subtrees replaced by Ident(syntheticName) nodes. */
39
- emittableExpr: Expr;
40
- /** Subtrees the driver must evaluate via the interpreter. */
41
- opaqueRoots: OpaqueRoot[];
42
- /** Identifiers referenced in `emittableExpr` that originated from the
43
- * user's environment (i.e. NOT synthetic opaque-root bindings). The
44
- * driver looks these up in the env to determine input types. */
45
- envIdents: Set<string>;
46
- }
47
- /** Classify an expression. Always succeeds — the worst case is the
48
- * whole expression becomes a single opaque root, which the driver
49
- * will reject. */
50
- export declare function classifyExpr(expr: Expr): ClassifyResult;
51
- /** Heuristic gate: an expression is "worth" JIT'ing only when it does
52
- * some work — a bare Ident or Number is not. The driver also gates
53
- * on tensor size at runtime; this is just a structural pre-filter to
54
- * skip the cost of lowering trivial expressions. */
55
- export declare function isWorthCompiling(emittableExpr: Expr): boolean;
56
- /** A single classification entry for one assign in a chain. The chain
57
- * emitter consumes one of these per assign in order. */
58
- export interface ChainAssignClassification {
59
- /** The original AST stmt (kept so the cache can key on it). */
60
- stmt: import("../../parser/types.js").Stmt & {
61
- type: "Assign";
62
- };
63
- /** The classifier's rewritten RHS — opaque subtrees replaced by
64
- * Ident(syntheticName). */
65
- emittableExpr: Expr;
66
- /** Opaque subtrees this assign contributed (driver evaluates these
67
- * before the kernel call). */
68
- opaqueRoots: OpaqueRoot[];
69
- /** Identifiers referenced by this assign's emittableExpr. Includes
70
- * synthetic opaque-root names. */
71
- envIdents: Set<string>;
72
- /** True if the assign reads its own LHS (e.g. `r = r + x`). For the
73
- * first stmt of a chain this means the kernel needs `in_<lhs>` as
74
- * an input pointer; for later stmts it just means a chain-local
75
- * read. */
76
- selfReadsLhs: boolean;
77
- }
78
- export interface ChainClassification {
79
- /** Chain assigns, in source order. May have multiple distinct LHSs. */
80
- assigns: ChainAssignClassification[];
81
- }
82
- /** Detect a chain of consecutive suppressed classifiable Assigns
83
- * starting at `stmts[startIdx]`. Each LHS may be a different name;
84
- * the driver decides — using full-scope liveness — whether each LHS
85
- * becomes a chain-local or a materialized output buffer.
86
- *
87
- * The chain ends at the first non-Assign, the first unsuppressed
88
- * Assign, or the first Assign whose RHS classification is not worth
89
- * compiling.
90
- *
91
- * Returns null if the very first stmt isn't a chainable Assign. */
92
- export declare function classifyAssignChain(stmts: import("../../parser/types.js").Stmt[], startIdx: number): ChainClassification | null;
93
- /** Reduction op names whose semantics the e2 reduction emitter knows. */
94
- export declare const E2_REDUCTION_OPS: ReadonlySet<string>;
95
- export interface TrailingReductionMatch {
96
- /** The original Assign stmt — pinned for cache identity. */
97
- stmt: import("../../parser/types.js").Stmt & {
98
- type: "Assign";
99
- };
100
- /** LHS = accumulator name. */
101
- accName: string;
102
- /** Reduction op name. */
103
- reduceName: string;
104
- /** AST expression argument to the reduction call. The driver decides
105
- * whether to treat it as a Var-targeting-chain-local (for trailing-
106
- * after-chain) or as a standalone elemwise expression. */
107
- targetExpr: Expr;
108
- /** True for `acc = acc OP reduce(...)`; false for `acc = reduce(...)`. */
109
- hasAccumulate: boolean;
110
- /** The accumulate op (only meaningful when `hasAccumulate` is true). */
111
- accOp?: BinaryOperation;
112
- }
113
- /** Match an Assign of the form:
114
- * acc = reduce(arg)
115
- * acc = acc OP reduce(arg)
116
- * acc = reduce(arg) OP acc (commutative ops only)
117
- * where `reduce` is a single-argument call to a known reduction op.
118
- * Returns the matched details or null. */
119
- export declare function matchTrailingReduction(stmt: import("../../parser/types.js").Stmt): TrailingReductionMatch | null;
@@ -1,16 +0,0 @@
1
- /**
2
- * e2 — browser-safe indirection for the C compile driver.
3
- *
4
- * The driver in `c/compile.ts` is Node-only (it shells out to `cc` and
5
- * loads via koffi). The browser bundle includes the e2 modules but
6
- * NOT this driver — `setE2CompileFn` from `e2/install.ts` (Node only)
7
- * swaps in the real implementation. In the browser, the stub throws.
8
- */
9
- export type E2CompileFn = (cSource: string, koffiSig: string, kernelName: string, log?: (msg: string) => void) => ((...args: unknown[]) => unknown) | null;
10
- export declare function setE2CompileFn(fn: E2CompileFn): void;
11
- export declare function getE2CompileFn(): E2CompileFn;
12
- /** Minimum element count of the largest tensor input before we'll
13
- * consider compiling an e2 kernel. Below this, koffi overhead dwarfs
14
- * the work and falling through to the interpreter is faster.
15
- * Overridable via `NUMBL_E2_MIN_ELEMS`. */
16
- export declare function e2MinElems(): number;
@@ -1,79 +0,0 @@
1
- /**
2
- * e2 — complex multi-LHS fused chain C kernel emission (paired-buffer).
3
- *
4
- * Sister to [chainKernelEmit.ts](./chainKernelEmit.ts) for chains that
5
- * produce at least one complex tensor. Mirrors the codegen shape of
6
- * [e1/complexKernelEmit.ts](../e1/complexKernelEmit.ts) and uses the
7
- * same fusion envelope (+ - * .* unary +/- conj real imag, real/complex
8
- * widening, ImagLiteral). Anything outside that subset (`./`,
9
- * `abs(complex)`, transcendentals on complex) is rejected at the e2
10
- * lowerer level, which causes the driver to bail to the interpreter
11
- * per-op complex path — matching e1's fusion fallthrough behavior.
12
- *
13
- * void e2cc_<hash>(int64_t n,
14
- * const double *in_<cta>_re, const double *in_<cta>_im,
15
- * const double *in_<rtb>,
16
- * [in_<lhs_input>_re/_im or in_<lhs_input> ...,]
17
- * double s_<csc>_re, double s_<csc>_im,
18
- * double s_<rsc>,
19
- * [out_<lhs>_re, out_<lhs>_im or out_<lhs> ...])
20
- * {
21
- * #pragma omp simd
22
- * for (int64_t i = 0; i < n; i++) {
23
- * double <clhs1>_re, <clhs1>_im, ..., <rlhs1>, ...;
24
- * <clhs1>_re = ...; <clhs1>_im = ...;
25
- * <rlhs1> = ...;
26
- * out_<clhs>_re[i] = <clhs>_re; out_<clhs>_im[i] = <clhs>_im;
27
- * out_<rlhs>[i] = <rlhs>;
28
- * }
29
- * }
30
- *
31
- * Complex chains deliberately stick to `#pragma omp simd` regardless of
32
- * `--par`: per-element bodies are ~6 flops spread across paired re/im
33
- * buffers (memory-bandwidth-bound), and thread-spawn overhead dominates
34
- * the compute win at realistic N. Matches e1's stance.
35
- */
36
- import type { ChainAssignSpec } from "./emitShared.js";
37
- export interface E2ComplexKernelInputs {
38
- /** Env tensor names, split by complex-ness. */
39
- complexTensorNames: string[];
40
- realTensorNames: string[];
41
- /** Env scalar names, split by complex-ness. */
42
- complexScalarNames: string[];
43
- realScalarNames: string[];
44
- /** Chain LHS names that need `in_<name>` because they're read before
45
- * being written. Split by complex-ness of the LHS. */
46
- complexInputLhsNames: string[];
47
- realInputLhsNames: string[];
48
- /** Chain LHS names that escape the chain. Split by complex-ness. */
49
- complexEscapeLhsNames: string[];
50
- realEscapeLhsNames: string[];
51
- }
52
- export interface E2ComplexChainEmitResult {
53
- kernelName: string;
54
- cSource: string;
55
- koffiSig: string;
56
- hash: string;
57
- /** In signature order: complex tensors, real tensors. */
58
- complexInputTensors: string[];
59
- realInputTensors: string[];
60
- /** In signature order: complex input LHSs, real input LHSs. */
61
- complexInputLhsNames: string[];
62
- realInputLhsNames: string[];
63
- /** In signature order: complex scalars, real scalars. */
64
- complexInputScalars: string[];
65
- realInputScalars: string[];
66
- /** In signature order: complex escape LHSs, real escape LHSs. */
67
- complexEscapeLhsNames: string[];
68
- realEscapeLhsNames: string[];
69
- chainLength: number;
70
- }
71
- /** Per-assign LHS info — complex-ness of the RHS determines whether
72
- * this stmt emits a paired (re/im) local or a single real local. */
73
- export interface ComplexChainAssignSpec extends ChainAssignSpec {
74
- /** True when THIS stmt's RHS is complex. Chain-LHS type can differ
75
- * per reassignment; we track per-stmt so a `a = real; a = complex;`
76
- * sequence sees `a` as complex only after the second assign. */
77
- rhsIsComplex: boolean;
78
- }
79
- export declare function emitE2ComplexChainKernel(assigns: ComplexChainAssignSpec[], inputs: E2ComplexKernelInputs): E2ComplexChainEmitResult;
@@ -1,71 +0,0 @@
1
- /**
2
- * e2 — shared kernel-emission helpers used by both the chain emitter
3
- * and the reduction emitter. The two emitters build the same kernel
4
- * shape up to a few trailing differences (reduction init / combine /
5
- * out_acc output), so everything that's identical lives here.
6
- */
7
- import type { JitExpr } from "../jitTypes.js";
8
- import { type FusedTarget } from "../fusedScalarEmit.js";
9
- import type { ScalarOpTarget } from "../scalarEmit.js";
10
- /**
11
- * C helper included in every e2 kernel prologue.
12
- *
13
- * `-ffast-math` implies `-ffinite-math-only`, which lets the compiler
14
- * assume no NaN/Inf values and constant-fold `x != x` to 0 and `x == x`
15
- * to 1. The NaN-detection idiom `mask = x ~= x` would silently return all
16
- * zeros. `numbl_is_nan_fp` inspects the IEEE-754 bit pattern directly —
17
- * the optimizer cannot look through `memcpy` to apply finite-math
18
- * assumptions, so this survives the flag.
19
- */
20
- export declare const E2_C_PROLOGUE: string;
21
- /**
22
- * e2-specific scalar op target.
23
- *
24
- * Identical to `C_SCALAR_TARGET` except `binEq` / `binNe`: when both
25
- * operand strings are the same C expression (i.e. the source was `x == x`
26
- * or `x ~= x`), the standard `==` / `!=` forms are constant-folded to
27
- * 1 / 0 by `-ffinite-math-only`. Replace them with the bit-pattern NaN
28
- * helper so the self-comparison gives the correct IEEE 754 result.
29
- */
30
- export declare const E2_C_SCALAR_TARGET: ScalarOpTarget;
31
- export interface ChainAssignSpec {
32
- lhsName: string;
33
- rhs: JitExpr;
34
- }
35
- export interface KernelInputs {
36
- /** Regular env input tensor names (NOT chain LHSs). */
37
- tensorNames: string[];
38
- /** Scalar env input names. */
39
- scalarNames: string[];
40
- /** Chain LHS names that need `in_<name>` because they're read before
41
- * being written. */
42
- inputLhsNames: string[];
43
- /** Chain LHS names that escape the chain (materialized via
44
- * `out_<name>`). Does NOT include a reduce-target name — that one
45
- * is always chain-local by construction. */
46
- escapeLhsNames: string[];
47
- }
48
- export declare const cInputPtr: (name: string) => string;
49
- export declare const cOutputPtr: (name: string) => string;
50
- export declare const cScalarParam: (name: string) => string;
51
- /** FusedTarget for the per-element body. Resolves Var reads to either
52
- * the chain-local stack name (once the corresponding assign has run)
53
- * or `in_<name>[i]` (before that point), mangles scalar param names,
54
- * and dispatches whitelisted builtins through their `jitEmitC`. */
55
- export declare function makeFusedTarget(locallyAssigned: ReadonlySet<string>): FusedTarget;
56
- /** Unique chain LHS names in source order — for the leading
57
- * `double <a>, <b>;` declaration. */
58
- export declare function uniqueLhsOrdered(chain: ChainAssignSpec[]): string[];
59
- /** All tensor-typed names visible to emitFusedScalarExpr: regular env
60
- * tensors, `in_<lhs>` tensors, and chain LHSs. */
61
- export declare function allTensorVarsFor(inputs: KernelInputs, chain: ChainAssignSpec[]): Set<string>;
62
- /** Emit one `<lhs> = <rhsC>;` line per chain assign, growing
63
- * `locallyAssigned` as we go so later stmts resolve earlier LHSs to
64
- * the stack-local. */
65
- export declare function emitChainAssignLines(chain: ChainAssignSpec[], allTensorVars: ReadonlySet<string>, ft: FusedTarget, locallyAssigned: Set<string>): string[];
66
- /** Kernel param list (tensor → inputLhs → scalar → escapeLhs). Callers
67
- * append any trailing params (e.g. `double *out_acc`). */
68
- export declare function buildParamList(inputs: KernelInputs): string[];
69
- /** koffi type list in the same order as `buildParamList`. Callers
70
- * append any trailing entries. */
71
- export declare function buildKoffiParts(inputs: KernelInputs): string[];
@@ -1,11 +0,0 @@
1
- /**
2
- * e2 — Node-only install hook.
3
- *
4
- * Sets the module-level `e2CompileFn` to the real `compileAndLoad`
5
- * driver from `c/compile.ts`. The browser bundle never imports this
6
- * file, so `e2CompileFn` stays at the throwing stub and any attempt
7
- * to use `--opt e2` from the web fails with a clear message.
8
- *
9
- * Idempotent: re-importing in tests doesn't re-install.
10
- */
11
- export {};
@@ -1,29 +0,0 @@
1
- /**
2
- * e2 — AST liveness helpers.
3
- *
4
- * Used by the chain classifier to decide whether a chain LHS is
5
- * actually used outside the chain's own stmts. If not, it can be
6
- * compiled as a per-element stack-local instead of being materialized
7
- * as a tensor output buffer.
8
- *
9
- * The "scope" passed in is the innermost enclosing function body or
10
- * top-level script body — chosen so that for-bodies, if-bodies, etc.
11
- * are scanned recursively (MATLAB has flat function-level scoping for
12
- * locals, so a name introduced inside a for-loop is visible to other
13
- * stmts in the same function body).
14
- *
15
- * The walk excludes the chain's own stmts (and the trailing-reduction
16
- * stmt if any) from the scan, applying the exclusion at every nesting
17
- * level — so a chain inside a for-body whose LHS is read by another
18
- * stmt in the same function body counts as referenced, but the chain
19
- * stmts themselves don't trigger a false positive.
20
- */
21
- import type { Stmt } from "../../parser/types.js";
22
- /**
23
- * True iff `name` appears anywhere in `scopeBody` outside the stmts
24
- * listed in `excludeStmts`. The exclusion is by reference identity
25
- * and is applied at every nesting level — pass the chain stmts (and
26
- * the trailing-reduction stmt if any) so they don't trigger false
27
- * positives.
28
- */
29
- export declare function isNameReferencedOutsideStmts(scopeBody: readonly Stmt[], excludeStmts: ReadonlySet<Stmt>, name: string): boolean;
@@ -1,49 +0,0 @@
1
- /**
2
- * e2 whole-loop C JIT.
3
- *
4
- * For a `for varName = lo:hi <body> end` where the body fits a supported
5
- * shape, emit a single C function that runs all n iterations and call it
6
- * once, instead of walking the AST on every iteration.
7
- *
8
- * Without this path, `--opt e2` pays ~70–100 ns per iter on a trivial
9
- * `s = s + i` just for AST dispatch; a compiled C loop runs it in <1 ns.
10
- *
11
- * Current supported body shapes (all may mix in one loop):
12
- * - scalar assign `s = s + sin(i) * cos(i) + sqrt(i*0.01)`
13
- * - scalar indexed read `s = s + x(i)` (real tensor x)
14
- * - scalar indexed write `y(i) = sin(i*0.01)` (preallocated y)
15
- * - tensor local (elemwise) `c = a.*b + i*0.001` (per-element
16
- * expression is
17
- * inlined into any
18
- * consuming sum();
19
- * last-iter value
20
- * is also written
21
- * back to the env
22
- * for MATLAB
23
- * post-loop
24
- * visibility)
25
- * - reductions `s = s + sum(c)` (c is a
26
- * tensor_local —
27
- * chained
28
- * tensor_locals
29
- * fuse through)
30
- *
31
- * Not supported (falls through to the interpreter / other JIT paths):
32
- * - non-`lo:hi` loop shapes (stepped ranges, `for i = v`)
33
- * - complex or logical tensor inputs
34
- * - matrix-matrix / matrix-vector multiplication
35
- * - bsxfun / broadcast across shapes
36
- * - function-handle calls, user-function calls
37
- * - control flow inside the body (if / while / return)
38
- * - multi-dimensional tensor access
39
- */
40
- import type { Stmt } from "../../parser/types.js";
41
- import type { Interpreter } from "../../interpreter/interpreter.js";
42
- /**
43
- * Attempt to compile and execute a for-loop as one C kernel under
44
- * `--opt e2`. Returns true on success, false to fall back to the regular
45
- * interpreter path (the caller will run the loop normally).
46
- */
47
- export declare function tryE2Loop(interp: Interpreter, stmt: Stmt & {
48
- type: "For";
49
- }): boolean;