numbl 0.3.0 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +16 -33
- package/dist-cli/cli.js +20535 -25629
- package/dist-lib/graphics/types.d.ts +22 -0
- package/dist-lib/lib.d.ts +1 -0
- package/dist-lib/lib.js +59075 -63895
- package/dist-lib/numbl-core/executeCode.d.ts +11 -14
- package/dist-lib/numbl-core/executors/cJit/builtins.d.ts +30 -0
- package/dist-lib/numbl-core/executors/cJit/chainCodegen.d.ts +59 -0
- package/dist-lib/numbl-core/executors/cJit/chainExecutor.d.ts +27 -0
- package/dist-lib/numbl-core/executors/cJit/chainPass.d.ts +42 -0
- package/dist-lib/numbl-core/executors/cJit/codegen.d.ts +44 -0
- package/dist-lib/numbl-core/executors/cJit/compile.d.ts +45 -0
- package/dist-lib/numbl-core/executors/cJit/elemwiseCodegen.d.ts +23 -0
- package/dist-lib/numbl-core/executors/cJit/elemwiseStructural.d.ts +33 -0
- package/dist-lib/numbl-core/executors/cJit/fuseAnalyze.d.ts +39 -0
- package/dist-lib/numbl-core/executors/cJit/fuseCodegen.d.ts +16 -0
- package/dist-lib/numbl-core/executors/cJit/fuseExecutor.d.ts +28 -0
- package/dist-lib/numbl-core/executors/cJit/loopExecutor.d.ts +32 -0
- package/dist-lib/numbl-core/executors/cJit/register.d.ts +10 -0
- package/dist-lib/numbl-core/executors/cJit/whitelist.d.ts +15 -0
- package/dist-lib/numbl-core/executors/cache.d.ts +26 -0
- package/dist-lib/numbl-core/executors/context.d.ts +76 -0
- package/dist-lib/numbl-core/executors/index.d.ts +17 -0
- package/dist-lib/numbl-core/executors/jsJit/callExecutor.d.ts +25 -0
- package/dist-lib/numbl-core/{jit/js → executors/jsJit/codegen}/jitCodegen.d.ts +2 -2
- package/dist-lib/numbl-core/{jit/js → executors/jsJit/codegen}/jitCodegenHoist.d.ts +1 -1
- package/dist-lib/numbl-core/{jit/js → executors/jsJit/codegen}/jsMultiReduction.d.ts +10 -13
- package/dist-lib/numbl-core/executors/jsJit/helpers/alloc.d.ts +12 -0
- package/dist-lib/numbl-core/{jit/js → executors/jsJit/helpers}/jitHelpers.d.ts +2 -2
- package/dist-lib/numbl-core/{jit/js → executors/jsJit/helpers}/jitHelpersComplex.d.ts +1 -1
- package/dist-lib/numbl-core/executors/jsJit/helpers/jitHelpersIndex.d.ts +33 -0
- package/dist-lib/numbl-core/{jit/js → executors/jsJit/helpers}/jitHelpersTensor.d.ts +7 -7
- package/dist-lib/numbl-core/executors/jsJit/jitCall.d.ts +59 -0
- package/dist-lib/numbl-core/executors/jsJit/jitLoop.d.ts +53 -0
- package/dist-lib/numbl-core/executors/jsJit/jitTopLevel.d.ts +44 -0
- package/dist-lib/numbl-core/executors/jsJit/loopExecutor.d.ts +15 -0
- package/dist-lib/numbl-core/{jit/jitLoopAnalysis.d.ts → executors/jsJit/lower/blockAnalysis.d.ts} +5 -5
- package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitBailSafety.d.ts +1 -1
- package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitLower.d.ts +18 -4
- package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitLowerExpr.d.ts +11 -2
- package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitLowerStmt.d.ts +2 -2
- package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitLowerTypes.d.ts +2 -2
- package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/scalarEmit.d.ts +2 -2
- package/dist-lib/numbl-core/executors/jsJit/shared.d.ts +120 -0
- package/dist-lib/numbl-core/executors/jsJit/topLevelExecutor.d.ts +17 -0
- package/dist-lib/numbl-core/executors/lowering.d.ts +166 -0
- package/dist-lib/numbl-core/executors/plugins.d.ts +39 -0
- package/dist-lib/numbl-core/executors/registry.d.ts +148 -0
- package/dist-lib/numbl-core/executors/types.d.ts +103 -0
- package/dist-lib/numbl-core/functionResolve.d.ts +7 -0
- package/dist-lib/numbl-core/helpers/check-helpers.d.ts +4 -5
- package/dist-lib/numbl-core/helpers/linsolve.d.ts +2 -3
- package/dist-lib/numbl-core/helpers/prng.d.ts +1 -2
- package/dist-lib/numbl-core/interpreter/builtins/datetime.d.ts +2 -1
- package/dist-lib/numbl-core/interpreter/builtins/misc.d.ts +4 -1
- package/dist-lib/numbl-core/interpreter/builtins/types.d.ts +4 -91
- package/dist-lib/numbl-core/interpreter/interpreter.d.ts +33 -47
- package/dist-lib/numbl-core/interpreter/interpreterSpecialBuiltins.d.ts +6 -3
- package/dist-lib/numbl-core/interpreter/types.d.ts +27 -12
- package/dist-lib/numbl-core/{jit/jitTypes.d.ts → jitTypes.d.ts} +15 -1
- package/dist-lib/numbl-core/jsUserFunctions.d.ts +8 -0
- package/dist-lib/numbl-core/lowering/loweringContext.d.ts +24 -0
- package/dist-lib/numbl-core/native/lapack-bridge.d.ts +3 -3
- package/dist-lib/numbl-core/parser/types.d.ts +20 -0
- package/dist-lib/numbl-core/runtime/constructors.d.ts +6 -6
- package/dist-lib/numbl-core/runtime/cow.d.ts +33 -0
- package/dist-lib/numbl-core/runtime/index.d.ts +3 -2
- package/dist-lib/numbl-core/runtime/indexing.d.ts +6 -1
- package/dist-lib/numbl-core/runtime/plotBuiltinDispatch.d.ts +86 -0
- package/dist-lib/numbl-core/runtime/plotUtils.d.ts +17 -2
- package/dist-lib/numbl-core/runtime/refcount.d.ts +85 -0
- package/dist-lib/numbl-core/runtime/runtime.d.ts +27 -66
- package/dist-lib/numbl-core/runtime/runtimeDispatch.d.ts +2 -2
- package/dist-lib/numbl-core/runtime/runtimeIndexing.d.ts +2 -2
- package/dist-lib/numbl-core/runtime/runtimeMemberAccess.d.ts +1 -1
- package/dist-lib/numbl-core/runtime/runtimePlot.d.ts +1 -0
- package/dist-lib/numbl-core/runtime/struct-access.d.ts +2 -1
- package/dist-lib/numbl-core/runtime/types.d.ts +104 -62
- package/dist-lib/numbl-core/runtime/utils.d.ts +2 -8
- package/dist-lib/numbl-core/version.d.ts +1 -1
- package/dist-plot-viewer/assets/index-COAM8o1E.js +4426 -0
- package/dist-plot-viewer/index.html +1 -1
- package/native/lapack_linsolve.cpp +1 -1
- package/native/numbl_addon_common.h +2 -2
- package/native/ops/comparison.c +1 -1
- package/package.json +3 -6
- package/dist-lib/numbl-core/jit/c/abi.d.ts +0 -90
- package/dist-lib/numbl-core/jit/c/assemble.d.ts +0 -56
- package/dist-lib/numbl-core/jit/c/classify.d.ts +0 -70
- package/dist-lib/numbl-core/jit/c/compile.d.ts +0 -37
- package/dist-lib/numbl-core/jit/c/context.d.ts +0 -152
- package/dist-lib/numbl-core/jit/c/emit/assign.d.ts +0 -20
- package/dist-lib/numbl-core/jit/c/emit/complexScalar.d.ts +0 -18
- package/dist-lib/numbl-core/jit/c/emit/fused.d.ts +0 -42
- package/dist-lib/numbl-core/jit/c/emit/helpers.d.ts +0 -40
- package/dist-lib/numbl-core/jit/c/emit/index.d.ts +0 -14
- package/dist-lib/numbl-core/jit/c/emit/scalar.d.ts +0 -23
- package/dist-lib/numbl-core/jit/c/emit/stmt.d.ts +0 -25
- package/dist-lib/numbl-core/jit/c/emit/tensor.d.ts +0 -127
- package/dist-lib/numbl-core/jit/c/emit/userCall.d.ts +0 -58
- package/dist-lib/numbl-core/jit/c/epilogue.d.ts +0 -26
- package/dist-lib/numbl-core/jit/c/feasibility.d.ts +0 -44
- package/dist-lib/numbl-core/jit/c/prelude.d.ts +0 -37
- package/dist-lib/numbl-core/jit/c/visit.d.ts +0 -63
- package/dist-lib/numbl-core/jit/e1/complexKernelEmit.d.ts +0 -46
- package/dist-lib/numbl-core/jit/e1/hash.d.ts +0 -10
- package/dist-lib/numbl-core/jit/e1/install.d.ts +0 -13
- package/dist-lib/numbl-core/jit/e1/kernelEmit.d.ts +0 -54
- package/dist-lib/numbl-core/jit/e1/multiReductionKernel.d.ts +0 -66
- package/dist-lib/numbl-core/jit/e1/openmpFlag.d.ts +0 -13
- package/dist-lib/numbl-core/jit/e1/scalarFnKernel.d.ts +0 -44
- package/dist-lib/numbl-core/jit/e2/assignKernel.d.ts +0 -34
- package/dist-lib/numbl-core/jit/e2/astToJitExpr.d.ts +0 -25
- package/dist-lib/numbl-core/jit/e2/cache.d.ts +0 -80
- package/dist-lib/numbl-core/jit/e2/chainKernelEmit.d.ts +0 -55
- package/dist-lib/numbl-core/jit/e2/classify.d.ts +0 -119
- package/dist-lib/numbl-core/jit/e2/compileFn.d.ts +0 -16
- package/dist-lib/numbl-core/jit/e2/complexChainKernelEmit.d.ts +0 -79
- package/dist-lib/numbl-core/jit/e2/emitShared.d.ts +0 -71
- package/dist-lib/numbl-core/jit/e2/install.d.ts +0 -11
- package/dist-lib/numbl-core/jit/e2/liveness.d.ts +0 -29
- package/dist-lib/numbl-core/jit/e2/loopKernel.d.ts +0 -49
- package/dist-lib/numbl-core/jit/e2/loopKernelEmit.d.ts +0 -75
- package/dist-lib/numbl-core/jit/e2/multiReductionDriver.d.ts +0 -24
- package/dist-lib/numbl-core/jit/e2/reductionKernelEmit.d.ts +0 -72
- package/dist-lib/numbl-core/jit/e2/scalarFnDriver.d.ts +0 -29
- package/dist-lib/numbl-core/jit/fusedChainHelpers.d.ts +0 -65
- package/dist-lib/numbl-core/jit/fusedScalarEmit.d.ts +0 -69
- package/dist-lib/numbl-core/jit/fusion.d.ts +0 -71
- package/dist-lib/numbl-core/jit/fusionOps.d.ts +0 -25
- package/dist-lib/numbl-core/jit/heavyOps.d.ts +0 -15
- package/dist-lib/numbl-core/jit/index.d.ts +0 -7
- package/dist-lib/numbl-core/jit/jitLoop.d.ts +0 -25
- package/dist-lib/numbl-core/jit/jitTopLevel.d.ts +0 -22
- package/dist-lib/numbl-core/jit/js/jitHelpersIndex.d.ts +0 -33
- package/dist-lib/numbl-core/jit/js/jsFusedCodegen.d.ts +0 -17
- package/dist-lib/numbl-core/runtime/alloc.d.ts +0 -23
- package/dist-plot-viewer/assets/index-GiUNnMQg.js +0 -4426
- package/native/jit_runtime/jit_runtime.c +0 -261
- package/native/jit_runtime/jit_runtime.h +0 -204
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import { type JitExpr } from "../../jitTypes.js";
|
|
2
|
-
import { type EmitCtx } from "../context.js";
|
|
3
|
-
/** Emit a value-expression. For scalars, returns a C `double` expression.
|
|
4
|
-
* For tensors, returns the data-variable name (the caller knows to also
|
|
5
|
-
* access the corresponding _len variable).
|
|
6
|
-
*
|
|
7
|
-
* Complex-typed expressions are *not* valid here — they produce a pair
|
|
8
|
-
* of doubles and must go through `emitComplex`. Reaching this function
|
|
9
|
-
* with a complex expression indicates a missed routing at the caller. */
|
|
10
|
-
export declare function emitExpr(expr: JitExpr, ctx: EmitCtx): string;
|
|
11
|
-
export declare function emitBinary(expr: JitExpr & {
|
|
12
|
-
tag: "Binary";
|
|
13
|
-
}, ctx: EmitCtx): string;
|
|
14
|
-
export declare function emitUnary(expr: JitExpr & {
|
|
15
|
-
tag: "Unary";
|
|
16
|
-
}, ctx: EmitCtx): string;
|
|
17
|
-
export declare function emitIndex(expr: JitExpr & {
|
|
18
|
-
tag: "Index";
|
|
19
|
-
}, ctx: EmitCtx): string;
|
|
20
|
-
export declare function emitCall(expr: JitExpr & {
|
|
21
|
-
tag: "Call";
|
|
22
|
-
}, ctx: EmitCtx): string;
|
|
23
|
-
export declare function emitTruthiness(expr: JitExpr, ctx: EmitCtx): string;
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Statement-level emission — the top-level dispatch for every JitStmt.
|
|
3
|
-
*
|
|
4
|
-
* Exports:
|
|
5
|
-
* - `emitStmts(lines, stmts, indent, ctx)` — public entry point; the
|
|
6
|
-
* outer function body is fed through this. Runs the fusion pass
|
|
7
|
-
* when `ctx.fuse` is set, otherwise emits one statement at a time.
|
|
8
|
-
* - `emitStmt` — per-statement dispatch used internally (also used
|
|
9
|
-
* inside If / For / While bodies via `emitStmts`).
|
|
10
|
-
* - `withPendingStmts` — scoped helper for nested expressions that
|
|
11
|
-
* need to hoist declarations above the calling statement.
|
|
12
|
-
*
|
|
13
|
-
* This module is thin on purpose: it routes to the specialized emitters
|
|
14
|
-
* in [./scalar.ts](./scalar.ts), [./complexScalar.ts](./complexScalar.ts),
|
|
15
|
-
* [./tensor.ts](./tensor.ts), [./assign.ts](./assign.ts),
|
|
16
|
-
* [./userCall.ts](./userCall.ts), and [./fused.ts](./fused.ts).
|
|
17
|
-
*/
|
|
18
|
-
import { type JitStmt } from "../../jitTypes.js";
|
|
19
|
-
import { type EmitCtx } from "../context.js";
|
|
20
|
-
/** Evaluate `fn` with `ctx.pendingStmts` set to `{lines, indent}` so any
|
|
21
|
-
* UserCall / RangeSliceRead nested inside can hoist its decl+call into
|
|
22
|
-
* `lines` ahead of the calling statement. Restores the prior value on
|
|
23
|
-
* exit (safe even if nested save/restore frames stack). */
|
|
24
|
-
export declare function withPendingStmts<T>(ctx: EmitCtx, lines: string[], indent: string, fn: () => T): T;
|
|
25
|
-
export declare function emitStmts(lines: string[], stmts: JitStmt[], indent: string, ctx: EmitCtx): void;
|
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
import { type JitExpr } from "../../jitTypes.js";
|
|
2
|
-
import { type EmitCtx } from "../context.js";
|
|
3
|
-
import { type ComplexTensorResult } from "./helpers.js";
|
|
4
|
-
/** Size a scratch tensor buffer to `lenExpr` doubles, freeing any stale
|
|
5
|
-
* buffer from a prior loop iteration first. A plain `if (!sData) malloc`
|
|
6
|
-
* would overflow the buffer if a later iteration demanded more bytes. */
|
|
7
|
-
export declare function emitScratchBufAlloc(lines: string[], indent: string, sData: string, sLen: string, lenExpr: string): void;
|
|
8
|
-
/** Complex scratch alloc: re + im buffers, both sized to `lenExpr`. The
|
|
9
|
-
* complex kernels in numbl_ops write both buffers unconditionally, so
|
|
10
|
-
* both must be valid pointers (len > 0).
|
|
11
|
-
*
|
|
12
|
-
* Same length-match fast-path as emitScratchBufAlloc: both buffers are
|
|
13
|
-
* kept in lockstep, so a single size check guards both. */
|
|
14
|
-
export declare function emitScratchBufAllocComplex(lines: string[], indent: string, sData: string, sDataIm: string, sLen: string, lenExpr: string): void;
|
|
15
|
-
/** Derive (d0, d1) C expressions for a dynamic tensor output whose
|
|
16
|
-
* shape is inherited from an elemwise operand. Uses the operand's
|
|
17
|
-
* static shape when known; else — when `ctx` is provided — falls
|
|
18
|
-
* back to the nearest tensor Var's runtime `_d0`/`_d1` locals
|
|
19
|
-
* (accurate even for partial-shape specializations). Final fallback
|
|
20
|
-
* recovers the missing dim from lenExpr and the other dim (e.g. row
|
|
21
|
-
* `[1, -1]` → `[1, len]`), or `[len, 1]` (column convention) when
|
|
22
|
-
* nothing is known. */
|
|
23
|
-
export declare function shapeExprsFor(shapeSrc: JitExpr | undefined, lenExpr: string, ctx?: EmitCtx): [string, string];
|
|
24
|
-
/** Find a tensor-length expression from a tensor expr tree (for pre-
|
|
25
|
-
* allocating scratch buffers). Returns a C expression for the length. */
|
|
26
|
-
export declare function findTensorLenExpr(expr: JitExpr, ctx: EmitCtx): string;
|
|
27
|
-
/** Ensure `destName`'s buffer is sized to exactly `lenExpr` doubles
|
|
28
|
-
* before an elemwise op writes into it. For locals and dynamic
|
|
29
|
-
* outputs, always free+malloc — loop iterations can produce different
|
|
30
|
-
* sizes, so a first-time-only malloc would overflow the buffer in
|
|
31
|
-
* later iterations. For fixed-size outputs (caller-aliased buffer),
|
|
32
|
-
* just records the length; freeing would corrupt the caller.
|
|
33
|
-
*
|
|
34
|
-
* When the destination is a dynamic output, also writes its _d0/_d1
|
|
35
|
-
* shape locals. Pass the tensor operand whose shape the result
|
|
36
|
-
* inherits (elemwise preserves operand shape); we use its static
|
|
37
|
-
* jitType.shape when available, else fall back to `[lenExpr, 1]`. */
|
|
38
|
-
export declare function emitEnsureTensorBuf(lines: string[], indent: string, destName: string, lenExpr: string, ctx: EmitCtx, shapeSrc?: JitExpr): void;
|
|
39
|
-
/** Emit an elemwise tensor-assign with aliasing safety and a
|
|
40
|
-
* steady-state fast path.
|
|
41
|
-
*
|
|
42
|
-
* Problem: the RHS of `dst = <elemwise-expr>` may read from `dst` itself
|
|
43
|
-
* (e.g. `r = r .* y + 3.0`). If we resized the dst buffer first and
|
|
44
|
-
* then the kernel read the old dst data, it would dangle. Pre-c54add0
|
|
45
|
-
* the kernel wrote inline into dst, which was only safe when no resize
|
|
46
|
-
* was needed — resizing freed the operand buffer before the kernel
|
|
47
|
-
* read it. c54add0 defused this via an unconditional scratch-transfer
|
|
48
|
-
* (eval into scratch, then free+malloc dst, then memcpy) but that
|
|
49
|
-
* added a full-tensor memcpy to every elemwise assign in hot loops.
|
|
50
|
-
*
|
|
51
|
-
* This helper splits at runtime: if dst is already the right size,
|
|
52
|
-
* emit the kernel directly into dst (no memcpy, no scratch). Else
|
|
53
|
-
* evaluate into a scratch, then realloc dst and memcpy. Both paths
|
|
54
|
-
* are aliasing-safe: the fast path doesn't free dst at all; the slow
|
|
55
|
-
* path reads operands before dst is freed.
|
|
56
|
-
*
|
|
57
|
-
* Fixed-size outputs (caller-owned buffer) skip the guard — their
|
|
58
|
-
* buffer is never freed, so the inline path is always safe.
|
|
59
|
-
*
|
|
60
|
-
* `emitKernel` emits the tensor kernel writing `targetLen` doubles
|
|
61
|
-
* into `targetData`. It is invoked exactly once; both paths share
|
|
62
|
-
* a single emission. Sub-expressions may use scratch buffers freely. */
|
|
63
|
-
export declare function emitElemwiseTensorAssign(lines: string[], indent: string, destName: string, lenExpr: string, shapeSrc: JitExpr | undefined, ctx: EmitCtx, emitKernel: (lines: string[], indent: string, targetData: string, targetLen: string) => void): void;
|
|
64
|
-
/** Complex-tensor sibling of `emitElemwiseTensorAssign`. Same runtime
|
|
65
|
-
* size-match scheme, but manages paired (re, im) buffers. The kernel
|
|
66
|
-
* callback receives both `targetData` and `targetDataIm` so it can
|
|
67
|
-
* write the paired output in one shot.
|
|
68
|
-
*
|
|
69
|
-
* When the kernel's operand aliases the target (e.g. `z = -z`, `z =
|
|
70
|
-
* conj(z)`), the callback MUST be element-wise safe — kernels like
|
|
71
|
-
* `numbl_complex_scalar_binary_elemwise` already are, but a memcpy
|
|
72
|
-
* from operand→target against a same-buffer alias is UB. Use an
|
|
73
|
-
* element-wise loop there. */
|
|
74
|
-
export declare function emitComplexElemwiseTensorAssign(lines: string[], indent: string, destName: string, lenExpr: string, shapeSrc: JitExpr | undefined, ctx: EmitCtx, emitKernel: (lines: string[], indent: string, targetData: string, targetDataIm: string, targetLen: string) => void): void;
|
|
75
|
-
/** Complex-tensor version of emitEnsureTensorBuf. Reallocates both re
|
|
76
|
-
* and im buffers in lockstep, so kernel writes into the paired
|
|
77
|
-
* destination don't mismatch in size. Fixed outputs keep their
|
|
78
|
-
* caller-aliased re buffer and (for complex) the caller-aliased im
|
|
79
|
-
* buffer — we only record the length. */
|
|
80
|
-
export declare function emitEnsureComplexTensorBuf(lines: string[], indent: string, destName: string, lenExpr: string, ctx: EmitCtx, shapeSrc?: JitExpr): void;
|
|
81
|
-
/** Emit a tensor expression as statements, returning the data/len vars. */
|
|
82
|
-
export declare function emitTensorExprToStmts(lines: string[], indent: string, expr: JitExpr, ctx: EmitCtx): {
|
|
83
|
-
data: string;
|
|
84
|
-
len: string;
|
|
85
|
-
};
|
|
86
|
-
/** For tensor binary/unary, we need multi-statement emission. This helper
|
|
87
|
-
* emits the operation as statements into `lines` and returns the result
|
|
88
|
-
* data pointer variable name. Callers that route the result into a
|
|
89
|
-
* named dst (vs. a scratch) must use the scratch-transfer pattern —
|
|
90
|
-
* let this helper write the full expression into a fresh scratch via
|
|
91
|
-
* `emitTensorExprToStmts`, then `emitEnsureTensorBuf` + memcpy into
|
|
92
|
-
* the dst. That ordering avoids clobbering an operand that aliases
|
|
93
|
-
* the dst (e.g. `r = r .* y + 3.0`). */
|
|
94
|
-
export declare function emitTensorBinaryStmts(lines: string[], indent: string, expr: JitExpr & {
|
|
95
|
-
tag: "Binary";
|
|
96
|
-
}, ctx: EmitCtx, destDataVar: string, destLenVar: string): void;
|
|
97
|
-
/** Emit a tensor expression in complex (paired-buffer) form, producing
|
|
98
|
-
* scratch locals for sub-expressions. Handles:
|
|
99
|
-
* - Var on a complex tensor: returns the name's `_data`/`_data_im`/`_len`.
|
|
100
|
-
* - Var on a real tensor: widens via `dataIm = NULL`.
|
|
101
|
-
* - Binary on a complex-typed tensor expr (the result is complex).
|
|
102
|
-
* - Unary Plus/Minus on a complex-typed tensor expr.
|
|
103
|
-
* - conj / real / imag on a complex tensor — real/imag are actually
|
|
104
|
-
* handled via the real tensor path (result is real), but conj stays
|
|
105
|
-
* complex and is lowered here.
|
|
106
|
-
*/
|
|
107
|
-
export declare function emitComplexTensorExprToStmts(lines: string[], indent: string, expr: JitExpr, ctx: EmitCtx): ComplexTensorResult;
|
|
108
|
-
/** Emit a complex tensor binary op into caller-provided dest buffers
|
|
109
|
-
* (both re and im). Handles all combos of real-tensor/complex-tensor/
|
|
110
|
-
* real-scalar/complex-scalar operands. Operand widening is kernel-side
|
|
111
|
-
* (NULL imag pointer or imag=0.0). */
|
|
112
|
-
export declare function emitComplexTensorBinaryStmts(lines: string[], indent: string, expr: JitExpr & {
|
|
113
|
-
tag: "Binary";
|
|
114
|
-
}, ctx: EmitCtx, destDataVar: string, destDataImVar: string, destLenVar: string): void;
|
|
115
|
-
/** Emit `dstData = src(a:b) copy` pattern into caller-provided dst data /
|
|
116
|
-
* len vars. */
|
|
117
|
-
export declare function emitRangeSliceReadToBuf(lines: string[], indent: string, expr: JitExpr & {
|
|
118
|
-
tag: "RangeSliceRead";
|
|
119
|
-
}, ctx: EmitCtx, destData: string, destLen: string): void;
|
|
120
|
-
/** Emit a `src(a:b)` RangeSliceRead into a fresh scratch buffer,
|
|
121
|
-
* returning the {data, len} pair for the result. */
|
|
122
|
-
export declare function emitRangeSliceReadToStmts(lines: string[], indent: string, expr: JitExpr & {
|
|
123
|
-
tag: "RangeSliceRead";
|
|
124
|
-
}, ctx: EmitCtx): {
|
|
125
|
-
data: string;
|
|
126
|
-
len: string;
|
|
127
|
-
};
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* UserCall emission — scalar returns in value position, and tensor
|
|
3
|
-
* returns from an Assign RHS.
|
|
4
|
-
*
|
|
5
|
-
* A UserCall lowers to a static C function (`jit_<jitName>`) generated
|
|
6
|
-
* alongside the outer function by [../assemble.ts](../assemble.ts).
|
|
7
|
-
* This module marshals arg slots per the callee's ABI and emits the
|
|
8
|
-
* call itself.
|
|
9
|
-
*
|
|
10
|
-
* Exports:
|
|
11
|
-
* - `emitUserCall` — scalar-return UserCall in value position (Assign
|
|
12
|
-
* RHS / nested expr). Stashes the result into a fresh `__ucN_out`
|
|
13
|
-
* local and returns the name as the expression text.
|
|
14
|
-
* - `emitUserCallArgSlots` — helper used by `emitUserCall` *and* the
|
|
15
|
-
* tensor-return variant in [./assign.ts](./assign.ts) to convert
|
|
16
|
-
* one arg into its ABI-slot expression list.
|
|
17
|
-
* - `emitUserCallTensorAssign` — tensor-return UserCall, only allowed
|
|
18
|
-
* as the top RHS of an Assign. Uses the dynamic-output ABI: the
|
|
19
|
-
* callee mallocs + transfers ownership via `double **` out-params.
|
|
20
|
-
*/
|
|
21
|
-
import type { JitExpr } from "../../jitTypes.js";
|
|
22
|
-
import type { TensorMeta } from "../classify.js";
|
|
23
|
-
import { type EmitCtx } from "../context.js";
|
|
24
|
-
/** Emit the C expressions for one arg's ABI slots, consulting the
|
|
25
|
-
* callee's paramDesc so the slot order matches the callee's signature.
|
|
26
|
-
* Scalars contribute one slot; tensors contribute data + len + optional
|
|
27
|
-
* d0 / d1. For the shape slots the caller falls back to
|
|
28
|
-
* `(int64_t)tensorLen(arg)` / `1` when its own arg-var wasn't classified
|
|
29
|
-
* with matching shape plumbing. */
|
|
30
|
-
export declare function emitUserCallArgSlots(a: JitExpr, paramDesc: {
|
|
31
|
-
kind: "scalar" | "complexScalar" | "tensor";
|
|
32
|
-
slots: {
|
|
33
|
-
kind: string;
|
|
34
|
-
}[];
|
|
35
|
-
}, ctx: EmitCtx): string[];
|
|
36
|
-
/** Scalar-return UserCall. Tensor args are marshaled via the callee's
|
|
37
|
-
* paramDescs (data + len + optional d0/d1 slots). The callee is emitted
|
|
38
|
-
* as `static void jit_<jitName>(...)` in the same .c file by
|
|
39
|
-
* `generateC`, with a trailing `__err_flag` pointer. We stash the
|
|
40
|
-
* return value in a fresh local and return its name as the expression
|
|
41
|
-
* text. Must be invoked from statement context so the decl + call can
|
|
42
|
-
* be inserted before the surrounding expression.
|
|
43
|
-
*
|
|
44
|
-
* Tensor-return UserCall is handled upstream by emitTensorAssign (only
|
|
45
|
-
* allowed as an Assign RHS), not here. */
|
|
46
|
-
export declare function emitUserCall(expr: JitExpr & {
|
|
47
|
-
tag: "UserCall";
|
|
48
|
-
}, ctx: EmitCtx): string;
|
|
49
|
-
/** Emit `dest = foo(...)` where foo returns a tensor via the dynamic-
|
|
50
|
-
* output ABI. Feasibility has already verified the callee's output[0]
|
|
51
|
-
* is a fresh-alloc dynamic output, so the callee fills
|
|
52
|
-
* `buf_out / out_len / d0_out / d1_out` and transfers ownership. The
|
|
53
|
-
* caller frees the old dest buffer (if any), takes the new buffer, and
|
|
54
|
-
* lets the epilogue free() it at end-of-scope alongside the other
|
|
55
|
-
* local tensors. */
|
|
56
|
-
export declare function emitUserCallTensorAssign(lines: string[], indent: string, destName: string, destMeta: TensorMeta, expr: JitExpr & {
|
|
57
|
-
tag: "UserCall";
|
|
58
|
-
}, ctx: EmitCtx): void;
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Function epilogue emission.
|
|
3
|
-
*
|
|
4
|
-
* The epilogue is the set of statements appended after the body of a
|
|
5
|
-
* generated function: output-slot writes (scalar out-pointers, tensor
|
|
6
|
-
* dynamic-output transfers), scratch buffer frees, and local / unshared
|
|
7
|
-
* tensor frees. Called once per function by `generateC` in
|
|
8
|
-
* [assemble.ts](./assemble.ts).
|
|
9
|
-
*
|
|
10
|
-
* The prelude is in [prelude.ts](./prelude.ts); both read from the same
|
|
11
|
-
* shared state (`ClassificationResult` + `EmitCtx`).
|
|
12
|
-
*/
|
|
13
|
-
import type { ClassificationResult } from "./classify.js";
|
|
14
|
-
import type { COutputDesc } from "./abi.js";
|
|
15
|
-
import { type EmitCtx } from "./context.js";
|
|
16
|
-
export interface EpilogueInput {
|
|
17
|
-
cls: ClassificationResult;
|
|
18
|
-
ctx: EmitCtx;
|
|
19
|
-
outputDescs: COutputDesc[];
|
|
20
|
-
/** Pure-input tensor params that were malloc'd in the prelude's
|
|
21
|
-
* unshare path — must be freed here. */
|
|
22
|
-
unshareTensorParams: Set<string>;
|
|
23
|
-
/** Indent string to prepend to each emitted line. */
|
|
24
|
-
indent: string;
|
|
25
|
-
}
|
|
26
|
-
export declare function buildEpilogue(input: EpilogueInput): string[];
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Feasibility prepass for the C-JIT path.
|
|
3
|
-
*
|
|
4
|
-
* Given the lowered JIT IR for a function and the argument types, decide
|
|
5
|
-
* whether the C codegen can handle it. On any construct that isn't in
|
|
6
|
-
* the whitelist, return `{ok: false, reason}` so the caller falls
|
|
7
|
-
* through to the JS-JIT path.
|
|
8
|
-
*
|
|
9
|
-
* The whitelist intentionally mirrors what [assemble.ts](./assemble.ts)
|
|
10
|
-
* can emit, which in turn mirrors what JS-JIT does. Widen all three
|
|
11
|
-
* together.
|
|
12
|
-
*
|
|
13
|
-
* Covered today: scalar arithmetic and control flow (numbers, booleans,
|
|
14
|
-
* complex-scalar pairs); real tensor params / locals / outputs with
|
|
15
|
-
* Index / AssignIndex (1-3D), RangeSliceRead, AssignIndexRange,
|
|
16
|
-
* AssignIndexCol, TensorLiteral, VConcatGrow, zeros/ones (1-2D); real
|
|
17
|
-
* tensor binary + unary + reductions via the jitCapabilities opcodes;
|
|
18
|
-
* complex tensor binary + unary (minus, conj, real, imag, abs) and
|
|
19
|
-
* flat reductions (sum, prod, any, all); UserCall (scalar args/return
|
|
20
|
-
* and real / complex tensor args/return when the callee passes its own
|
|
21
|
-
* feasibility check).
|
|
22
|
-
*/
|
|
23
|
-
import type { JitStmt, JitType } from "../jitTypes.js";
|
|
24
|
-
import type { GeneratedFn } from "../jitLower.js";
|
|
25
|
-
export type FeasibilityResult = {
|
|
26
|
-
ok: true;
|
|
27
|
-
} | {
|
|
28
|
-
ok: false;
|
|
29
|
-
reason: string;
|
|
30
|
-
line?: number;
|
|
31
|
-
};
|
|
32
|
-
/**
|
|
33
|
-
* Check if the lowered function can be handled by the C-JIT.
|
|
34
|
-
*
|
|
35
|
-
* `outputTypes` holds the types of every output variable in order;
|
|
36
|
-
* `outputType` is kept for backwards-compatibility and equals
|
|
37
|
-
* `outputTypes[0]` when present.
|
|
38
|
-
*
|
|
39
|
-
* Multi-output mirrors the JS-JIT's `return [out0, out1, ...]` shape:
|
|
40
|
-
* the generated C builds a `napi_value` array of length `nargout`,
|
|
41
|
-
* each entry boxed according to its type (scalar doubles, booleans, or
|
|
42
|
-
* tensors).
|
|
43
|
-
*/
|
|
44
|
-
export declare function checkCFeasibility(body: JitStmt[], paramNames: string[], argTypes: JitType[], outputType: JitType | null, outputTypes: JitType[], nargout: number, generatedIRBodies?: Map<string, GeneratedFn>): FeasibilityResult;
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Function prelude emission.
|
|
3
|
-
*
|
|
4
|
-
* The prelude is the set of C declarations at the top of a generated
|
|
5
|
-
* function, written before any statement from the body: shadowed param
|
|
6
|
-
* locals (for param-output seeding + unshare-at-entry), local tensor
|
|
7
|
-
* declarations, complex-scalar imag companions, and scratch buffer
|
|
8
|
-
* slots.
|
|
9
|
-
*
|
|
10
|
-
* Exported as `buildPrelude` — called once per function by `generateC`
|
|
11
|
-
* in [assemble.ts](./assemble.ts).
|
|
12
|
-
*
|
|
13
|
-
* The epilogue (tensor frees, out-pointer writes) is in
|
|
14
|
-
* [epilogue.ts](./epilogue.ts); both read from the same shared state
|
|
15
|
-
* (`ClassificationResult` + `EmitCtx`) populated upstream.
|
|
16
|
-
*/
|
|
17
|
-
import type { JitType } from "../jitTypes.js";
|
|
18
|
-
import type { ClassificationResult } from "./classify.js";
|
|
19
|
-
import { type EmitCtx } from "./context.js";
|
|
20
|
-
export interface PreludeInput {
|
|
21
|
-
cls: ClassificationResult;
|
|
22
|
-
ctx: EmitCtx;
|
|
23
|
-
params: string[];
|
|
24
|
-
argTypes: JitType[];
|
|
25
|
-
/** Names with `kind === "paramOutput"` (output name reuses a param name). */
|
|
26
|
-
paramOutputTensors: Set<string>;
|
|
27
|
-
/** Pure-input tensor params that need an unshare-at-entry malloc+memcpy
|
|
28
|
-
* (the body writes to them, and we must not mutate the caller's buffer). */
|
|
29
|
-
unshareTensorParams: Set<string>;
|
|
30
|
-
/** Locals to declare — outer-scope `localVars` minus params, sorted. */
|
|
31
|
-
allLocals: string[];
|
|
32
|
-
/** Names carrying `complex_or_number` scalar values (paired re+im locals). */
|
|
33
|
-
complexScalarVars: Set<string>;
|
|
34
|
-
/** Indent string to prepend to each emitted line. */
|
|
35
|
-
indent: string;
|
|
36
|
-
}
|
|
37
|
-
export declare function buildPrelude(input: PreludeInput): string[];
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Lightweight IR traversal helpers shared across the C-JIT subsystem.
|
|
3
|
-
*
|
|
4
|
-
* Several places in `jit/c/` need to walk a lowered IR body observing
|
|
5
|
-
* (but not transforming) expressions and statements: the feasibility
|
|
6
|
-
* fall-through paths, tensor-classification, hybrid-loop live-in/out
|
|
7
|
-
* analysis, and the shape-propagation / callee-discovery / complex-
|
|
8
|
-
* scalar scans in [assemble.ts](./assemble.ts). Each used to
|
|
9
|
-
* reimplement the same switch-on-tag descent.
|
|
10
|
-
*
|
|
11
|
-
* This module centralizes the descent. Three primitives, composable:
|
|
12
|
-
*
|
|
13
|
-
* - `walkExprNodes(expr, visit)` — post-order walk of every sub-node
|
|
14
|
-
* of `expr` (including `expr` itself). Every leaf calls `visit`
|
|
15
|
-
* once; nothing is skipped. Adding a new JitExpr tag means editing
|
|
16
|
-
* this one function.
|
|
17
|
-
*
|
|
18
|
-
* - `walkStmts(body, visit)` — pre-order walk of every statement in
|
|
19
|
-
* `body`, recursing into If/For/While nested bodies. Does NOT
|
|
20
|
-
* traverse expressions inside the stmt — callers that need that
|
|
21
|
-
* compose with `walkStmtExprs` + `walkExprNodes`.
|
|
22
|
-
*
|
|
23
|
-
* - `walkStmtExprs(stmt, visit)` — call `visit` on each top-level
|
|
24
|
-
* expression attached to `stmt` (the `expr` in an Assign, the
|
|
25
|
-
* `cond` in an If, the `start`/`end`/`step` in a For, etc.). Does
|
|
26
|
-
* NOT recurse into nested expression sub-nodes (use `walkExprNodes`
|
|
27
|
-
* for that) and does NOT walk into nested stmt bodies.
|
|
28
|
-
*
|
|
29
|
-
* The dispatchers in `feasibility.ts`, `emit/stmt.ts`, and
|
|
30
|
-
* `emit/fused.ts` keep their native switches — they produce structured
|
|
31
|
-
* results (feasibility verdicts, emitted C lines), so a callback-based
|
|
32
|
-
* observer doesn't fit their shape.
|
|
33
|
-
*/
|
|
34
|
-
import type { JitExpr, JitStmt } from "../jitTypes.js";
|
|
35
|
-
/**
|
|
36
|
-
* Walk every sub-node of `expr` in post-order (children first, then
|
|
37
|
-
* `expr` itself), calling `visit` on each. Leaves (NumberLiteral,
|
|
38
|
-
* ImagLiteral, Var, StringLiteral, MemberRead) are still visited once.
|
|
39
|
-
*
|
|
40
|
-
* Adding a new JitExpr tag: add a case here. Observer callers (which
|
|
41
|
-
* is all of them) don't need to know about tag-specific sub-node
|
|
42
|
-
* fields — this is the one place those are encoded.
|
|
43
|
-
*/
|
|
44
|
-
export declare function walkExprNodes(expr: JitExpr, visit: (e: JitExpr) => void): void;
|
|
45
|
-
/**
|
|
46
|
-
* Walk every statement in `body`, recursing into nested If / For /
|
|
47
|
-
* While bodies. Pre-order: `visit` is called on each stmt before
|
|
48
|
-
* descending. Does NOT traverse expressions inside the stmt.
|
|
49
|
-
*/
|
|
50
|
-
export declare function walkStmts(body: JitStmt[], visit: (s: JitStmt) => void): void;
|
|
51
|
-
/**
|
|
52
|
-
* Call `visit` on every top-level expression attached to `stmt` — the
|
|
53
|
-
* RHS of an Assign, the indices + value of an AssignIndex, the start /
|
|
54
|
-
* end / step of a For, the cond of an If / While, and so on. Does
|
|
55
|
-
* NOT recurse into expression sub-nodes (compose with `walkExprNodes`)
|
|
56
|
-
* and does NOT descend into nested stmt bodies (compose with
|
|
57
|
-
* `walkStmts`).
|
|
58
|
-
*
|
|
59
|
-
* For If, the `cond` of the primary branch AND each elseif is visited;
|
|
60
|
-
* the bodies themselves are stmt-trees, not exprs, and are reached via
|
|
61
|
-
* `walkStmts` recursion.
|
|
62
|
-
*/
|
|
63
|
-
export declare function walkStmtExprs(stmt: JitStmt, visit: (e: JitExpr) => void): void;
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* e1 (experimental) — complex-tensor standalone C-kernel emission.
|
|
3
|
-
*
|
|
4
|
-
* Sister module to `kernelEmit.ts` (which handles real-tensor chains).
|
|
5
|
-
* Given a FusibleChain that produces at least one complex tensor, emit
|
|
6
|
-
* a paired-buffer C kernel of the form
|
|
7
|
-
*
|
|
8
|
-
* void k_<hash>(int64_t n,
|
|
9
|
-
* const double *in_<a>_re, const double *in_<a>_im, // complex tensor
|
|
10
|
-
* const double *in_<b>, // real tensor (widened)
|
|
11
|
-
* double s_<c>_re, double s_<c>_im, // complex scalar
|
|
12
|
-
* double s_<d>, // real scalar
|
|
13
|
-
* double *out_<y>_re, double *out_<y>_im) // complex output
|
|
14
|
-
* {
|
|
15
|
-
* #pragma omp simd
|
|
16
|
-
* for (int64_t i = 0; i < n; i++) {
|
|
17
|
-
* double __f_y_re = ...;
|
|
18
|
-
* double __f_y_im = ...;
|
|
19
|
-
* out_<y>_re[i] = __f_y_re;
|
|
20
|
-
* out_<y>_im[i] = __f_y_im;
|
|
21
|
-
* }
|
|
22
|
-
* }
|
|
23
|
-
*
|
|
24
|
-
* Supports the same fusion envelope as emitComplexPerElem in
|
|
25
|
-
* `c/emit/fused.ts`:
|
|
26
|
-
* - Binary: + - * .*
|
|
27
|
-
* - Unary: + -
|
|
28
|
-
* - Call: conj, real, imag
|
|
29
|
-
* - Operand widening: real tensor / real scalar read with im = 0
|
|
30
|
-
* - ImagLiteral: (0.0, 1.0) pair
|
|
31
|
-
*
|
|
32
|
-
* Complex chains do NOT carry a trailing reduction — `fusion.ts` drops
|
|
33
|
-
* the absorption for complex chains because the inline scalar
|
|
34
|
-
* accumulator can't hold a complex value. Kernels emitted here have no
|
|
35
|
-
* reduction output.
|
|
36
|
-
*/
|
|
37
|
-
import type { FusibleChain } from "../fusion.js";
|
|
38
|
-
import type { KernelEmitResult } from "./kernelEmit.js";
|
|
39
|
-
/**
|
|
40
|
-
* Emit a complex-tensor fused chain as a standalone C kernel.
|
|
41
|
-
*
|
|
42
|
-
* Returns null when the chain contains an expression the per-element
|
|
43
|
-
* walker doesn't support (abs, complex divide, transcendental on
|
|
44
|
-
* complex, etc.) — the caller falls back to the JS-JIT per-op path.
|
|
45
|
-
*/
|
|
46
|
-
export declare function emitComplexChainKernel(chain: FusibleChain, allTensorVars: ReadonlySet<string>, complexTensorNames: ReadonlySet<string>, complexScalarVars: ReadonlySet<string>, outputTensorNames: ReadonlySet<string>): KernelEmitResult | null;
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Shared content-hash helper for the e1 codegen.
|
|
3
|
-
*
|
|
4
|
-
* 64-bit FNV-1a over UTF-8 code units, returned as 16 hex chars.
|
|
5
|
-
* Deterministic, fully self-contained, and browser-safe (no Node
|
|
6
|
-
* `crypto` dependency). Cryptographic strength isn't needed — the
|
|
7
|
-
* hash is a content-addressed suffix for kernel names and
|
|
8
|
-
* `$h.$kernels[...]` cache keys.
|
|
9
|
-
*/
|
|
10
|
-
export declare function fnv1a64Hex(s: string): string;
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Node-only install shim for the e1 (experimental) kernel pipeline.
|
|
3
|
-
*
|
|
4
|
-
* Side-effect import from `cli.ts`. Replaces the `compileKernel` stub on
|
|
5
|
-
* the module-level `jitHelpers` object with a real implementation that
|
|
6
|
-
* shells out to `cc` via `compile.ts` and loads the result through koffi.
|
|
7
|
-
*
|
|
8
|
-
* Registration is idempotent — re-importing this module in tests won't
|
|
9
|
-
* re-install. The kernel cache on `jitHelpers.$kernels` is shared across
|
|
10
|
-
* all specializations in the process so the same fused chain used from
|
|
11
|
-
* two different JIT'd functions compiles only once.
|
|
12
|
-
*/
|
|
13
|
-
export {};
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* e1 (experimental) — standalone C-kernel emission for fusible tensor
|
|
3
|
-
* chains used by the JS-JIT.
|
|
4
|
-
*
|
|
5
|
-
* Given a `FusibleChain` the normal JS fused codegen would emit, this
|
|
6
|
-
* module produces an equivalent standalone C function of the form
|
|
7
|
-
*
|
|
8
|
-
* void k_<hash>(int64_t n,
|
|
9
|
-
* const double *in_<x>, ...,
|
|
10
|
-
* double s_<scalar>, ...,
|
|
11
|
-
* double *out_<y>, ...)
|
|
12
|
-
* {
|
|
13
|
-
* #pragma omp simd
|
|
14
|
-
* for (int64_t i = 0; i < n; i++) {
|
|
15
|
-
* double f_tmp1 = <expr>;
|
|
16
|
-
* ...
|
|
17
|
-
* out_<y>[i] = <final>;
|
|
18
|
-
* }
|
|
19
|
-
* }
|
|
20
|
-
*
|
|
21
|
-
* It returns the full C source, a koffi signature string, a content-
|
|
22
|
-
* addressed hash, and the ordered list of JS expressions the generated
|
|
23
|
-
* code should pass as arguments — everything the JS codegen needs to
|
|
24
|
-
* emit a `$h.compileKernel(source, sig); kernel(n, x_data, y_data)`
|
|
25
|
-
* dispatch.
|
|
26
|
-
*
|
|
27
|
-
* The prototype deliberately handles only the common real-tensor chain
|
|
28
|
-
* shape: no reductions, no complex tensors, no dynamic-shape outputs.
|
|
29
|
-
* Any chain that falls outside that envelope causes `emitChainKernel`
|
|
30
|
-
* to return `null`, which signals the caller to fall back to the plain
|
|
31
|
-
* inline JS fused loop.
|
|
32
|
-
*/
|
|
33
|
-
import type { FusibleChain } from "../fusion.js";
|
|
34
|
-
/**
|
|
35
|
-
* A fused chain compiled to a standalone C kernel. The caller (the JS
|
|
36
|
-
* codegen) combines this with a runtime size threshold to emit
|
|
37
|
-
*
|
|
38
|
-
* if (n >= THRESHOLD) $h.<kernelName>(n, x_data, y_data)
|
|
39
|
-
* else <plain JS fused loop>
|
|
40
|
-
*/
|
|
41
|
-
export interface KernelEmitResult {
|
|
42
|
-
/** Hash-derived C function name, e.g. `nk_3a7f81b2`. */
|
|
43
|
-
kernelName: string;
|
|
44
|
-
/** Full C source: `#include` + function definition. */
|
|
45
|
-
cSource: string;
|
|
46
|
-
/** koffi function signature, e.g. `"void nk_3a7f81b2(int64_t, ...)"`. */
|
|
47
|
-
koffiSig: string;
|
|
48
|
-
/** Content hash over the final C source (stable id for caching). */
|
|
49
|
-
hash: string;
|
|
50
|
-
/** Ordered list of JS expressions to pass as call arguments. The
|
|
51
|
-
* caller emits something like `$h.<kernelName>(${jsCallArgs.join(",")})`. */
|
|
52
|
-
jsCallArgs: string[];
|
|
53
|
-
}
|
|
54
|
-
export declare function emitChainKernel(chain: FusibleChain, allTensorVars: ReadonlySet<string>, outputTensorNames: ReadonlySet<string>, par: boolean): KernelEmitResult | null;
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* e1 — C kernel for multi-reduction scalar assigns.
|
|
3
|
-
*
|
|
4
|
-
* A MATLAB line like
|
|
5
|
-
*
|
|
6
|
-
* red_acc = red_acc + (sum(x) + mean(x) + max(x) + min(x));
|
|
7
|
-
*
|
|
8
|
-
* has four reductions over the same vector. The default JS-JIT path
|
|
9
|
-
* emits four `$h.tSum` / `$h.ib_*` helper calls, each of which scans
|
|
10
|
-
* the whole vector. This module emits a single-pass C kernel that
|
|
11
|
-
* computes every requested reduction in one loop and writes results
|
|
12
|
-
* into caller-allocated scalar slots.
|
|
13
|
-
*
|
|
14
|
-
* Specialised per op-set: a group of `{sum, max, min}` compiles to a
|
|
15
|
-
* different kernel than `{sum, mean, max, min}`. Source-addressed by
|
|
16
|
-
* FNV-1a hash so the JS `$h.$kernels[...]` cache dedupes repeated call
|
|
17
|
-
* sites.
|
|
18
|
-
*
|
|
19
|
-
* NaN handling: `-ffast-math` is on for the compile (matches the other
|
|
20
|
-
* e1 kernels), so naive `isnan` is folded to `false`. The kernel uses
|
|
21
|
-
* an inline bit-pattern NaN check to drive MATLAB's omit-NaN semantics
|
|
22
|
-
* for `max`/`min` and records an `any_non_nan` flag the JS side uses
|
|
23
|
-
* to map an all-NaN input to NaN.
|
|
24
|
-
*/
|
|
25
|
-
/** Reductions we can fuse into one pass. `any` / `all` are excluded
|
|
26
|
-
* because their short-circuit `break` would prematurely stop the
|
|
27
|
-
* other accumulators. */
|
|
28
|
-
export type MultiReduceOp = "sum" | "prod" | "max" | "min" | "mean";
|
|
29
|
-
export interface MultiReductionKernelInfo {
|
|
30
|
-
/** Hash-derived C function name, e.g. `mr_3a7f81b2...`. */
|
|
31
|
-
kernelName: string;
|
|
32
|
-
/** Full C source string. */
|
|
33
|
-
cSource: string;
|
|
34
|
-
/** koffi function signature. */
|
|
35
|
-
koffiSig: string;
|
|
36
|
-
/** Content hash. */
|
|
37
|
-
hash: string;
|
|
38
|
-
/**
|
|
39
|
-
* Output slot layout. Each reduction in the kernel writes to its own
|
|
40
|
-
* Float64 slot, in the order of this array. `any_non_nan` (a 0/1 flag
|
|
41
|
-
* stored as double) is at the end when `hasMinOrMax` is true.
|
|
42
|
-
* The JS caller allocates a `Float64Array(slotCount)` and reads slots
|
|
43
|
-
* by index after the call.
|
|
44
|
-
*/
|
|
45
|
-
slotNames: string[];
|
|
46
|
-
/** True when the kernel emits an `any_non_nan` slot at index
|
|
47
|
-
* `slotNames.length - 1`. */
|
|
48
|
-
hasAnyNonNan: boolean;
|
|
49
|
-
}
|
|
50
|
-
/**
|
|
51
|
-
* Build a multi-reduction kernel for the given op set. `ops` should be
|
|
52
|
-
* a deduplicated list of reductions to compute (e.g. ["sum", "max"]).
|
|
53
|
-
* The returned `slotNames` preserves insertion order for indexing; if
|
|
54
|
-
* the op set contains `max`/`min`, an extra `any_non_nan` slot is
|
|
55
|
-
* appended (the JS side uses it to override the sentinel max/min with
|
|
56
|
-
* NaN when every input element was NaN).
|
|
57
|
-
*
|
|
58
|
-
* When `par` is true, the per-element loop is emitted as
|
|
59
|
-
* `#pragma omp parallel for simd reduction(...)` with one reduction
|
|
60
|
-
* clause per accumulator and an `if(n >= T)` gate that falls back to
|
|
61
|
-
* serial below the threshold. Requires the caller to link with
|
|
62
|
-
* `-fopenmp`; e1's `install.ts` already does this when libgomp is
|
|
63
|
-
* available. When `par` is false, the loop is emitted as plain
|
|
64
|
-
* `#pragma omp simd` (SIMD-only, single-threaded).
|
|
65
|
-
*/
|
|
66
|
-
export declare function emitMultiReductionKernel(ops: readonly MultiReduceOp[], par?: boolean): MultiReductionKernelInfo;
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Runtime-overridable OpenMP availability flag for the e1 codegen path.
|
|
3
|
-
*
|
|
4
|
-
* `scalarFnKernel.ts` is transitively reachable from the JS-JIT module
|
|
5
|
-
* graph that Vite bundles for the web REPL, but `c/compile.ts` is
|
|
6
|
-
* Node-only (child_process, fs, ...). Importing `cJitOpenmpAvailable`
|
|
7
|
-
* directly from `compile.ts` would drag all of that into the browser
|
|
8
|
-
* bundle. Instead we default to `false` here and let Node-only
|
|
9
|
-
* `e1/install.ts` override the getter at install time — the same
|
|
10
|
-
* pattern used for the `compileKernel` stub in `jitHelpers.ts`.
|
|
11
|
-
*/
|
|
12
|
-
export declare function setOpenmpAvailableGetter(fn: () => boolean): void;
|
|
13
|
-
export declare function isOpenmpAvailable(): boolean;
|