numbl 0.3.0 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +16 -33
- package/dist-cli/cli.js +20535 -25629
- package/dist-lib/graphics/types.d.ts +22 -0
- package/dist-lib/lib.d.ts +1 -0
- package/dist-lib/lib.js +59075 -63895
- package/dist-lib/numbl-core/executeCode.d.ts +11 -14
- package/dist-lib/numbl-core/executors/cJit/builtins.d.ts +30 -0
- package/dist-lib/numbl-core/executors/cJit/chainCodegen.d.ts +59 -0
- package/dist-lib/numbl-core/executors/cJit/chainExecutor.d.ts +27 -0
- package/dist-lib/numbl-core/executors/cJit/chainPass.d.ts +42 -0
- package/dist-lib/numbl-core/executors/cJit/codegen.d.ts +44 -0
- package/dist-lib/numbl-core/executors/cJit/compile.d.ts +45 -0
- package/dist-lib/numbl-core/executors/cJit/elemwiseCodegen.d.ts +23 -0
- package/dist-lib/numbl-core/executors/cJit/elemwiseStructural.d.ts +33 -0
- package/dist-lib/numbl-core/executors/cJit/fuseAnalyze.d.ts +39 -0
- package/dist-lib/numbl-core/executors/cJit/fuseCodegen.d.ts +16 -0
- package/dist-lib/numbl-core/executors/cJit/fuseExecutor.d.ts +28 -0
- package/dist-lib/numbl-core/executors/cJit/loopExecutor.d.ts +32 -0
- package/dist-lib/numbl-core/executors/cJit/register.d.ts +10 -0
- package/dist-lib/numbl-core/executors/cJit/whitelist.d.ts +15 -0
- package/dist-lib/numbl-core/executors/cache.d.ts +26 -0
- package/dist-lib/numbl-core/executors/context.d.ts +76 -0
- package/dist-lib/numbl-core/executors/index.d.ts +17 -0
- package/dist-lib/numbl-core/executors/jsJit/callExecutor.d.ts +25 -0
- package/dist-lib/numbl-core/{jit/js → executors/jsJit/codegen}/jitCodegen.d.ts +2 -2
- package/dist-lib/numbl-core/{jit/js → executors/jsJit/codegen}/jitCodegenHoist.d.ts +1 -1
- package/dist-lib/numbl-core/{jit/js → executors/jsJit/codegen}/jsMultiReduction.d.ts +10 -13
- package/dist-lib/numbl-core/executors/jsJit/helpers/alloc.d.ts +12 -0
- package/dist-lib/numbl-core/{jit/js → executors/jsJit/helpers}/jitHelpers.d.ts +2 -2
- package/dist-lib/numbl-core/{jit/js → executors/jsJit/helpers}/jitHelpersComplex.d.ts +1 -1
- package/dist-lib/numbl-core/executors/jsJit/helpers/jitHelpersIndex.d.ts +33 -0
- package/dist-lib/numbl-core/{jit/js → executors/jsJit/helpers}/jitHelpersTensor.d.ts +7 -7
- package/dist-lib/numbl-core/executors/jsJit/jitCall.d.ts +59 -0
- package/dist-lib/numbl-core/executors/jsJit/jitLoop.d.ts +53 -0
- package/dist-lib/numbl-core/executors/jsJit/jitTopLevel.d.ts +44 -0
- package/dist-lib/numbl-core/executors/jsJit/loopExecutor.d.ts +15 -0
- package/dist-lib/numbl-core/{jit/jitLoopAnalysis.d.ts → executors/jsJit/lower/blockAnalysis.d.ts} +5 -5
- package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitBailSafety.d.ts +1 -1
- package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitLower.d.ts +18 -4
- package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitLowerExpr.d.ts +11 -2
- package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitLowerStmt.d.ts +2 -2
- package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/jitLowerTypes.d.ts +2 -2
- package/dist-lib/numbl-core/{jit → executors/jsJit/lower}/scalarEmit.d.ts +2 -2
- package/dist-lib/numbl-core/executors/jsJit/shared.d.ts +120 -0
- package/dist-lib/numbl-core/executors/jsJit/topLevelExecutor.d.ts +17 -0
- package/dist-lib/numbl-core/executors/lowering.d.ts +166 -0
- package/dist-lib/numbl-core/executors/plugins.d.ts +39 -0
- package/dist-lib/numbl-core/executors/registry.d.ts +148 -0
- package/dist-lib/numbl-core/executors/types.d.ts +103 -0
- package/dist-lib/numbl-core/functionResolve.d.ts +7 -0
- package/dist-lib/numbl-core/helpers/check-helpers.d.ts +4 -5
- package/dist-lib/numbl-core/helpers/linsolve.d.ts +2 -3
- package/dist-lib/numbl-core/helpers/prng.d.ts +1 -2
- package/dist-lib/numbl-core/interpreter/builtins/datetime.d.ts +2 -1
- package/dist-lib/numbl-core/interpreter/builtins/misc.d.ts +4 -1
- package/dist-lib/numbl-core/interpreter/builtins/types.d.ts +4 -91
- package/dist-lib/numbl-core/interpreter/interpreter.d.ts +33 -47
- package/dist-lib/numbl-core/interpreter/interpreterSpecialBuiltins.d.ts +6 -3
- package/dist-lib/numbl-core/interpreter/types.d.ts +27 -12
- package/dist-lib/numbl-core/{jit/jitTypes.d.ts → jitTypes.d.ts} +15 -1
- package/dist-lib/numbl-core/jsUserFunctions.d.ts +8 -0
- package/dist-lib/numbl-core/lowering/loweringContext.d.ts +24 -0
- package/dist-lib/numbl-core/native/lapack-bridge.d.ts +3 -3
- package/dist-lib/numbl-core/parser/types.d.ts +20 -0
- package/dist-lib/numbl-core/runtime/constructors.d.ts +6 -6
- package/dist-lib/numbl-core/runtime/cow.d.ts +33 -0
- package/dist-lib/numbl-core/runtime/index.d.ts +3 -2
- package/dist-lib/numbl-core/runtime/indexing.d.ts +6 -1
- package/dist-lib/numbl-core/runtime/plotBuiltinDispatch.d.ts +86 -0
- package/dist-lib/numbl-core/runtime/plotUtils.d.ts +17 -2
- package/dist-lib/numbl-core/runtime/refcount.d.ts +85 -0
- package/dist-lib/numbl-core/runtime/runtime.d.ts +27 -66
- package/dist-lib/numbl-core/runtime/runtimeDispatch.d.ts +2 -2
- package/dist-lib/numbl-core/runtime/runtimeIndexing.d.ts +2 -2
- package/dist-lib/numbl-core/runtime/runtimeMemberAccess.d.ts +1 -1
- package/dist-lib/numbl-core/runtime/runtimePlot.d.ts +1 -0
- package/dist-lib/numbl-core/runtime/struct-access.d.ts +2 -1
- package/dist-lib/numbl-core/runtime/types.d.ts +104 -62
- package/dist-lib/numbl-core/runtime/utils.d.ts +2 -8
- package/dist-lib/numbl-core/version.d.ts +1 -1
- package/dist-plot-viewer/assets/index-COAM8o1E.js +4426 -0
- package/dist-plot-viewer/index.html +1 -1
- package/native/lapack_linsolve.cpp +1 -1
- package/native/numbl_addon_common.h +2 -2
- package/native/ops/comparison.c +1 -1
- package/package.json +3 -6
- package/dist-lib/numbl-core/jit/c/abi.d.ts +0 -90
- package/dist-lib/numbl-core/jit/c/assemble.d.ts +0 -56
- package/dist-lib/numbl-core/jit/c/classify.d.ts +0 -70
- package/dist-lib/numbl-core/jit/c/compile.d.ts +0 -37
- package/dist-lib/numbl-core/jit/c/context.d.ts +0 -152
- package/dist-lib/numbl-core/jit/c/emit/assign.d.ts +0 -20
- package/dist-lib/numbl-core/jit/c/emit/complexScalar.d.ts +0 -18
- package/dist-lib/numbl-core/jit/c/emit/fused.d.ts +0 -42
- package/dist-lib/numbl-core/jit/c/emit/helpers.d.ts +0 -40
- package/dist-lib/numbl-core/jit/c/emit/index.d.ts +0 -14
- package/dist-lib/numbl-core/jit/c/emit/scalar.d.ts +0 -23
- package/dist-lib/numbl-core/jit/c/emit/stmt.d.ts +0 -25
- package/dist-lib/numbl-core/jit/c/emit/tensor.d.ts +0 -127
- package/dist-lib/numbl-core/jit/c/emit/userCall.d.ts +0 -58
- package/dist-lib/numbl-core/jit/c/epilogue.d.ts +0 -26
- package/dist-lib/numbl-core/jit/c/feasibility.d.ts +0 -44
- package/dist-lib/numbl-core/jit/c/prelude.d.ts +0 -37
- package/dist-lib/numbl-core/jit/c/visit.d.ts +0 -63
- package/dist-lib/numbl-core/jit/e1/complexKernelEmit.d.ts +0 -46
- package/dist-lib/numbl-core/jit/e1/hash.d.ts +0 -10
- package/dist-lib/numbl-core/jit/e1/install.d.ts +0 -13
- package/dist-lib/numbl-core/jit/e1/kernelEmit.d.ts +0 -54
- package/dist-lib/numbl-core/jit/e1/multiReductionKernel.d.ts +0 -66
- package/dist-lib/numbl-core/jit/e1/openmpFlag.d.ts +0 -13
- package/dist-lib/numbl-core/jit/e1/scalarFnKernel.d.ts +0 -44
- package/dist-lib/numbl-core/jit/e2/assignKernel.d.ts +0 -34
- package/dist-lib/numbl-core/jit/e2/astToJitExpr.d.ts +0 -25
- package/dist-lib/numbl-core/jit/e2/cache.d.ts +0 -80
- package/dist-lib/numbl-core/jit/e2/chainKernelEmit.d.ts +0 -55
- package/dist-lib/numbl-core/jit/e2/classify.d.ts +0 -119
- package/dist-lib/numbl-core/jit/e2/compileFn.d.ts +0 -16
- package/dist-lib/numbl-core/jit/e2/complexChainKernelEmit.d.ts +0 -79
- package/dist-lib/numbl-core/jit/e2/emitShared.d.ts +0 -71
- package/dist-lib/numbl-core/jit/e2/install.d.ts +0 -11
- package/dist-lib/numbl-core/jit/e2/liveness.d.ts +0 -29
- package/dist-lib/numbl-core/jit/e2/loopKernel.d.ts +0 -49
- package/dist-lib/numbl-core/jit/e2/loopKernelEmit.d.ts +0 -75
- package/dist-lib/numbl-core/jit/e2/multiReductionDriver.d.ts +0 -24
- package/dist-lib/numbl-core/jit/e2/reductionKernelEmit.d.ts +0 -72
- package/dist-lib/numbl-core/jit/e2/scalarFnDriver.d.ts +0 -29
- package/dist-lib/numbl-core/jit/fusedChainHelpers.d.ts +0 -65
- package/dist-lib/numbl-core/jit/fusedScalarEmit.d.ts +0 -69
- package/dist-lib/numbl-core/jit/fusion.d.ts +0 -71
- package/dist-lib/numbl-core/jit/fusionOps.d.ts +0 -25
- package/dist-lib/numbl-core/jit/heavyOps.d.ts +0 -15
- package/dist-lib/numbl-core/jit/index.d.ts +0 -7
- package/dist-lib/numbl-core/jit/jitLoop.d.ts +0 -25
- package/dist-lib/numbl-core/jit/jitTopLevel.d.ts +0 -22
- package/dist-lib/numbl-core/jit/js/jitHelpersIndex.d.ts +0 -33
- package/dist-lib/numbl-core/jit/js/jsFusedCodegen.d.ts +0 -17
- package/dist-lib/numbl-core/runtime/alloc.d.ts +0 -23
- package/dist-plot-viewer/assets/index-GiUNnMQg.js +0 -4426
- package/native/jit_runtime/jit_runtime.c +0 -261
- package/native/jit_runtime/jit_runtime.h +0 -204
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* e2 whole-loop C emission.
|
|
3
|
-
*
|
|
4
|
-
* Given a classified loop body (`BodyStmt[]`) plus the parameter lists
|
|
5
|
-
* that describe how env values flow in and out, emit a single C
|
|
6
|
-
* function that runs the whole `for varName = lo:hi` loop in one call.
|
|
7
|
-
*
|
|
8
|
-
* Three BodyStmt shapes are supported:
|
|
9
|
-
*
|
|
10
|
-
* scalar_assign `s = s + sin(i)` → one C statement per iter
|
|
11
|
-
* tensor_write `y(i) = sin(i*0.01)` → `v_y[(int64_t)idx-1] = ...`
|
|
12
|
-
* tensor_local `c = a.*b + i*0.001` → no code emitted here; its
|
|
13
|
-
* per-element expression is
|
|
14
|
-
* substituted into whichever
|
|
15
|
-
* reduction consumes it
|
|
16
|
-
*
|
|
17
|
-
* Reductions: a `scalar_assign` carries a list of `sum(<tensor_local>)`
|
|
18
|
-
* rewrites that were pulled out of its RHS upstream. Each is emitted as
|
|
19
|
-
* an inline inner `for __j` loop that accumulates the tensor_local's
|
|
20
|
-
* per-element expression into a fresh local. Chained tensor_locals
|
|
21
|
-
* (`d = sqrt(c+1)` where c is itself a tensor_local) fuse through
|
|
22
|
-
* recursively, so no intermediate buffer is materialized.
|
|
23
|
-
*/
|
|
24
|
-
import type { JitExpr } from "../jitTypes.js";
|
|
25
|
-
/** Scalar math builtins we emit as direct C library calls. We bypass
|
|
26
|
-
* each IBuiltin's `jitEmitC` here because some of those reject based
|
|
27
|
-
* on type narrowing (e.g. `sqrt` requires `isNonneg` and we don't
|
|
28
|
-
* propagate sign through Binary ops) — but in a pure-real scalar loop
|
|
29
|
-
* the C semantics (NaN on negative sqrt, etc.) match what a MATLAB
|
|
30
|
-
* user gets from `sqrt` on real numeric input.
|
|
31
|
-
*
|
|
32
|
-
* Exported so the driver's pre-lowering analysis can treat these
|
|
33
|
-
* names as non-env references. */
|
|
34
|
-
export declare const LOOP_SCALAR_BUILTINS: Record<string, string>;
|
|
35
|
-
/** A fused reduction lifted out of a `scalar_assign`'s RHS.
|
|
36
|
-
* `sum(<tensorLocal>)` in the source becomes a synthetic scalar ident
|
|
37
|
-
* `synthName`; the emitter materializes it as an inline inner loop
|
|
38
|
-
* that accumulates `tensorLocal`'s per-element expression. */
|
|
39
|
-
export interface Reduction {
|
|
40
|
-
synthName: string;
|
|
41
|
-
tensorLocal: string;
|
|
42
|
-
op: "sum";
|
|
43
|
-
}
|
|
44
|
-
/** A body statement in a form ready for C emission. */
|
|
45
|
-
export type BodyStmt = {
|
|
46
|
-
kind: "scalar_assign";
|
|
47
|
-
name: string;
|
|
48
|
-
rhs: JitExpr;
|
|
49
|
-
reductions: Reduction[];
|
|
50
|
-
} | {
|
|
51
|
-
kind: "tensor_write";
|
|
52
|
-
name: string;
|
|
53
|
-
idxRhs: JitExpr;
|
|
54
|
-
rhs: JitExpr;
|
|
55
|
-
} | {
|
|
56
|
-
kind: "tensor_local";
|
|
57
|
-
name: string;
|
|
58
|
-
elemExpr: JitExpr;
|
|
59
|
-
lengthTensor: string;
|
|
60
|
-
};
|
|
61
|
-
/** Mangle a MATLAB scalar name to a C local-variable name. Prefix keeps
|
|
62
|
-
* it out of the way of our bookkeeping locals (`lo`, `hi`, `__iv`). */
|
|
63
|
-
export declare function v(name: string): string;
|
|
64
|
-
/** Name for the `int64_t` length companion that travels alongside each
|
|
65
|
-
* tensor param so inner reductions can bound their inline `__j` loop. */
|
|
66
|
-
export declare function lenN(name: string): string;
|
|
67
|
-
/** Names of all tensor_locals in the body, in body-declaration order.
|
|
68
|
-
* Callers use this to allocate matching output buffers in the same
|
|
69
|
-
* order as the kernel's param list. */
|
|
70
|
-
export declare function tensorLocalNames(body: BodyStmt[]): string[];
|
|
71
|
-
export declare function emitLoopKernel(scalarInputVars: string[], tensorInputVars: string[], tensorInoutVars: string[], inoutVars: string[], loopVar: string, body: BodyStmt[]): {
|
|
72
|
-
cSource: string;
|
|
73
|
-
kernelName: string;
|
|
74
|
-
koffiSig: string;
|
|
75
|
-
};
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* e2 — multi-reduction driver.
|
|
3
|
-
*
|
|
4
|
-
* Handles a scalar `Assign` whose RHS contains TWO or more reduction
|
|
5
|
-
* calls (`sum`, `prod`, `max`, `min`, `mean`) over the same single
|
|
6
|
-
* tensor variable, e.g.
|
|
7
|
-
*
|
|
8
|
-
* red_acc = red_acc + sum(x) + mean(x) + max(x) + min(x);
|
|
9
|
-
*
|
|
10
|
-
* The default interpreter path makes one pass through the tensor per
|
|
11
|
-
* reduction (4× the memory traffic of the optimal). The e2 driver
|
|
12
|
-
* detects the pattern, compiles ONE kernel that computes every
|
|
13
|
-
* requested reduction in a single pass, and substitutes the reduction
|
|
14
|
-
* subtrees in the RHS with the kernel's scalar outputs before
|
|
15
|
-
* evaluating the residual expression.
|
|
16
|
-
*
|
|
17
|
-
* Reuses [e1/multiReductionKernel.ts](../e1/multiReductionKernel.ts)
|
|
18
|
-
* for the C emission (same shape works for both backends).
|
|
19
|
-
*/
|
|
20
|
-
import type { Stmt } from "../../parser/types.js";
|
|
21
|
-
import type { Interpreter } from "../../interpreter/interpreter.js";
|
|
22
|
-
export declare function tryE2MultiReduction(interp: Interpreter, stmt: Stmt & {
|
|
23
|
-
type: "Assign";
|
|
24
|
-
}): boolean;
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* e2 — reduction kernel emission.
|
|
3
|
-
*
|
|
4
|
-
* Handles two related patterns in a single emitter:
|
|
5
|
-
*
|
|
6
|
-
* (A) Standalone reduction:
|
|
7
|
-
* acc = [acc OP] reduce(elemwiseExpr)
|
|
8
|
-
* Empty chain prefix; the kernel walks the inputs once and
|
|
9
|
-
* accumulates `reduce(per-element-expr)` into a scalar buffer.
|
|
10
|
-
*
|
|
11
|
-
* (B) Chain + trailing reduction:
|
|
12
|
-
* lhs1 = ...; lhs2 = ...; ...; lhsK = ...;
|
|
13
|
-
* acc = [acc OP] reduce(lhsK)
|
|
14
|
-
* The chain runs in the same per-element loop; lhsK is purely
|
|
15
|
-
* chain-local (never materialized) — the kernel accumulates
|
|
16
|
-
* reduce(lhsK) into the scalar buffer. Other chain LHSs may
|
|
17
|
-
* still escape (extra `out_<name>` outputs).
|
|
18
|
-
*
|
|
19
|
-
* Both cases use the same kernel shape:
|
|
20
|
-
*
|
|
21
|
-
* void e2r_<hash>(int64_t n,
|
|
22
|
-
* ..in_*.., ..in_lhs_input.., ..s_*..,
|
|
23
|
-
* ..out_escape.., double *out_acc)
|
|
24
|
-
* {
|
|
25
|
-
* double acc = <init>;
|
|
26
|
-
* #pragma omp simd
|
|
27
|
-
* for (int64_t i = 0; i < n; i++) {
|
|
28
|
-
* double <chain_lhs1>, ..., <chain_lhsK>;
|
|
29
|
-
* <chain_lhs1> = <stmt0_rhs_C>;
|
|
30
|
-
* ...
|
|
31
|
-
* <chain_lhsK> = <stmtK_rhs_C>;
|
|
32
|
-
* out_<escape>[i] = <escape>;
|
|
33
|
-
* <reduce-combine>(acc, <reduce_value_expr>);
|
|
34
|
-
* }
|
|
35
|
-
* *out_acc = acc;
|
|
36
|
-
* }
|
|
37
|
-
*
|
|
38
|
-
* For "mean": JS combines `acc /= n` after reading the buffer back.
|
|
39
|
-
* For "max"/"min": uses if-update inside the loop (works under
|
|
40
|
-
* `-ffast-math` + `#pragma omp simd`).
|
|
41
|
-
*/
|
|
42
|
-
import type { JitExpr } from "../jitTypes.js";
|
|
43
|
-
import { type ChainAssignSpec, type KernelInputs } from "./emitShared.js";
|
|
44
|
-
export interface ReductionEmitSpec {
|
|
45
|
-
/** Chain prefix (length 0 for standalone-reduction). */
|
|
46
|
-
chain: ChainAssignSpec[];
|
|
47
|
-
/** Reduction op name: sum, prod, max, min, mean, any, all. */
|
|
48
|
-
reduceName: string;
|
|
49
|
-
/** Per-element value expression to feed the reduction.
|
|
50
|
-
* - For (A) standalone: the elemwise expression `reduce(...)` was
|
|
51
|
-
* given.
|
|
52
|
-
* - For (B) chain + trailing: a `Var(lastChainLhsName)` JitExpr —
|
|
53
|
-
* the emitter resolves it to the stack-local. */
|
|
54
|
-
reduceValueExpr: JitExpr;
|
|
55
|
-
inputs: KernelInputs;
|
|
56
|
-
}
|
|
57
|
-
export interface E2ReductionEmitResult {
|
|
58
|
-
kernelName: string;
|
|
59
|
-
cSource: string;
|
|
60
|
-
koffiSig: string;
|
|
61
|
-
hash: string;
|
|
62
|
-
inputTensors: string[];
|
|
63
|
-
inputLhsNames: string[];
|
|
64
|
-
inputScalars: string[];
|
|
65
|
-
escapeLhsNames: string[];
|
|
66
|
-
/** True when the kernel produces a scalar reduction output (always
|
|
67
|
-
* true for this emitter; here for symmetry with other entries). */
|
|
68
|
-
hasReductionOutput: true;
|
|
69
|
-
reduceName: string;
|
|
70
|
-
chainLength: number;
|
|
71
|
-
}
|
|
72
|
-
export declare function emitE2ReductionKernel(spec: ReductionEmitSpec, par?: boolean): E2ReductionEmitResult;
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* e2 — whole-function scalar C-kernel driver.
|
|
3
|
-
*
|
|
4
|
-
* Mirrors what e1 does for pure-scalar functions (benchmarks/scalar_bench.m's
|
|
5
|
-
* `run_bench` is the motivating case) but triggers straight from the
|
|
6
|
-
* interpreter's `callUserFunction` entry, not through the JS-JIT outer.
|
|
7
|
-
* Under `--opt e2` the JS-JIT is disabled (optimization clamped to 0),
|
|
8
|
-
* so we can't lean on `tryEmitScalarFnKernel` + the `$h.compileKernel`
|
|
9
|
-
* plumbing; instead we invoke the shared lowering + C-emit pipeline
|
|
10
|
-
* directly and call the resulting koffi function with plain scalar
|
|
11
|
-
* args and Float64Array(1) out-buffers per output.
|
|
12
|
-
*
|
|
13
|
-
* Scope:
|
|
14
|
-
* - All args are scalar `number` or `boolean` RuntimeValues.
|
|
15
|
-
* - Declared outputs (the first `nargout || 1` of them) all lower to
|
|
16
|
-
* scalar / boolean types.
|
|
17
|
-
* - The body survives `checkCFeasibility` (no tic/toc, no Index
|
|
18
|
-
* writes, no disp, etc.).
|
|
19
|
-
*
|
|
20
|
-
* Outside this envelope we return `E2_SKIP` and the caller proceeds
|
|
21
|
-
* with the interpreter path. Compilation failures are HARD errors —
|
|
22
|
-
* mirrors the e2 multi-reduction/chain drivers' policy.
|
|
23
|
-
*/
|
|
24
|
-
import type { Interpreter } from "../../interpreter/interpreter.js";
|
|
25
|
-
import type { FunctionDef } from "../../interpreter/types.js";
|
|
26
|
-
export declare const E2_SKIP: unique symbol;
|
|
27
|
-
/** Try to run `fn(args)` via a whole-function C kernel. Returns
|
|
28
|
-
* `E2_SKIP` to fall through to the interpreter. */
|
|
29
|
-
export declare function tryE2ScalarFn(interp: Interpreter, fn: FunctionDef, args: unknown[], nargout: number): unknown | typeof E2_SKIP;
|
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Chain-level helpers shared by the JS and C fused-codegen backends.
|
|
3
|
-
*
|
|
4
|
-
* The per-element scalar expression walker lives in `fusedScalarEmit.ts`;
|
|
5
|
-
* this module covers the surrounding logic that decides which chain dests
|
|
6
|
-
* need a write-back to their tensor buffer, and the reduction-accumulator
|
|
7
|
-
* init/combine snippets for inline reductions.
|
|
8
|
-
*
|
|
9
|
-
* Reductions are parameterized over a small `ReductionLiterals` record so
|
|
10
|
-
* each backend supplies its own spelling of `0` vs `0.0`, `===` vs `==`,
|
|
11
|
-
* `-Infinity` vs `(-1.0/0.0)`, etc. — the control structure is identical.
|
|
12
|
-
*/
|
|
13
|
-
import { BinaryOperation } from "../parser/types.js";
|
|
14
|
-
import type { FusibleChain } from "./fusion.js";
|
|
15
|
-
/**
|
|
16
|
-
* Compute the set of distinct dest names in a fused chain and which of
|
|
17
|
-
* them require a write-back into their tensor buffer.
|
|
18
|
-
*
|
|
19
|
-
* A dest normally needs write-back; the exception is the chain's last
|
|
20
|
-
* tensor if it is fully consumed by a trailing reduction (in which case
|
|
21
|
-
* the scalar reduction accumulator is the only output — materialising
|
|
22
|
-
* the tensor buffer would be wasted work). If that last-dest tensor is
|
|
23
|
-
* ALSO a named output of the enclosing function, the write-back is kept
|
|
24
|
-
* so the caller sees the updated buffer.
|
|
25
|
-
*/
|
|
26
|
-
export declare function determineWriteBack(chain: FusibleChain, outputTensorNames: ReadonlySet<string>): {
|
|
27
|
-
destNames: Set<string>;
|
|
28
|
-
writeBack: Set<string>;
|
|
29
|
-
reductionConsumes: boolean;
|
|
30
|
-
};
|
|
31
|
-
/**
|
|
32
|
-
* Target-specific literal spellings used by the reduction helpers.
|
|
33
|
-
*
|
|
34
|
-
* The structure of the reduction snippets is identical between JS and
|
|
35
|
-
* C, but the literals differ: JS uses `1`, `-Infinity`, `===`/`!==`,
|
|
36
|
-
* while C uses `1.0`, `(-1.0/0.0)`, `==`/`!=`. The caller picks a
|
|
37
|
-
* record for its target and reuses it.
|
|
38
|
-
*/
|
|
39
|
-
export interface ReductionLiterals {
|
|
40
|
-
/** Additive identity (`0` for JS, `0.0` for C). */
|
|
41
|
-
zero: string;
|
|
42
|
-
/** Multiplicative identity / truthy (`1` or `1.0`). */
|
|
43
|
-
one: string;
|
|
44
|
-
/** Positive infinity literal (`Infinity` or `(1.0/0.0)`). */
|
|
45
|
-
posInf: string;
|
|
46
|
-
/** Negative infinity literal (`-Infinity` or `(-1.0/0.0)`). */
|
|
47
|
-
negInf: string;
|
|
48
|
-
/** Strict-equality operator (`===` for JS, `==` for C). */
|
|
49
|
-
eq: string;
|
|
50
|
-
/** Strict-inequality operator (`!==` for JS, `!=` for C). */
|
|
51
|
-
neq: string;
|
|
52
|
-
}
|
|
53
|
-
export declare const JS_REDUCTION_LITERALS: ReductionLiterals;
|
|
54
|
-
export declare const C_REDUCTION_LITERALS: ReductionLiterals;
|
|
55
|
-
/** Initial value expression for a reduction accumulator. */
|
|
56
|
-
export declare function reductionInit(reduceName: string, lits: ReductionLiterals): string;
|
|
57
|
-
/** Statement that folds a per-element `valueExpr` into the accumulator. */
|
|
58
|
-
export declare function reductionCombine(reduceName: string, accVar: string, valueExpr: string, lits: ReductionLiterals): string;
|
|
59
|
-
/**
|
|
60
|
-
* Statement that folds a per-chain `val` into an enclosing accumulator
|
|
61
|
-
* `dest` via the outer-loop op (e.g. `ir_acc = ir_acc + sum(...)`).
|
|
62
|
-
*
|
|
63
|
-
* Target-neutral: `+=` / `-=` / `*=` have identical syntax in JS and C.
|
|
64
|
-
*/
|
|
65
|
-
export declare function accumulateOp(op: BinaryOperation, dest: string, val: string): string;
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Shared per-element scalar-expression emission for fused loops.
|
|
3
|
-
*
|
|
4
|
-
* Both the JS-JIT and C-JIT fused-chain emitters walk the chain's
|
|
5
|
-
* expression trees and emit each sub-expression in "per-element"
|
|
6
|
-
* form — tensor Vars become `data[__i]` reads (or a scalar local for
|
|
7
|
-
* chain-produced intermediates), Binary/Unary/Call map to scalar
|
|
8
|
-
* operations that will run once per element of the fused loop.
|
|
9
|
-
*
|
|
10
|
-
* The walk itself is identical between the two backends; only the
|
|
11
|
-
* leaf syntax differs (JS `Math.sin` vs C `sin`, integer literal
|
|
12
|
-
* formatting, mangling prefix). A backend supplies a `FusedTarget`
|
|
13
|
-
* describing those leaves and a value-form `ScalarOpTarget` for the
|
|
14
|
-
* arithmetic/comparison/logical switches.
|
|
15
|
-
*
|
|
16
|
-
* Note: the op target used here must emit comparison / logical ops
|
|
17
|
-
* in *numeric* form (result is a double 0.0/1.0 suitable for tensor
|
|
18
|
-
* write-back). For C this coincides with the regular value target;
|
|
19
|
-
* for JS a second target instance is needed because value-form
|
|
20
|
-
* comparisons return a JS boolean.
|
|
21
|
-
*/
|
|
22
|
-
import type { JitExpr } from "./jitTypes.js";
|
|
23
|
-
import type { FusibleChain } from "./fusion.js";
|
|
24
|
-
import { type ScalarOpTarget } from "./scalarEmit.js";
|
|
25
|
-
/** Scalar local name for a chain-produced tensor intermediate. */
|
|
26
|
-
export declare function fusedLocal(name: string): string;
|
|
27
|
-
export interface FusedTarget {
|
|
28
|
-
/** Format a numeric literal (e.g. `1` for JS, `1.0` for C). */
|
|
29
|
-
formatNumber(v: number): string;
|
|
30
|
-
/** Mangle a scalar variable reference (non-tensor). */
|
|
31
|
-
mangle(name: string): string;
|
|
32
|
-
/**
|
|
33
|
-
* Emit a per-element read of tensor var `name` — i.e. the expression
|
|
34
|
-
* that yields `data[__i]` for that tensor. The backend decides how
|
|
35
|
-
* the data pointer is named and whether it's aliased locally.
|
|
36
|
-
*/
|
|
37
|
-
tensorElemRead(name: string): string;
|
|
38
|
-
/**
|
|
39
|
-
* Emit a read of tensor `name` at a runtime 1-based scalar index
|
|
40
|
-
* `idxC` — i.e. `data[(int64_t)idx - 1]`. Used by the e2 whole-loop
|
|
41
|
-
* kernel (scalar-context access; elemwise backends can leave this
|
|
42
|
-
* undefined, the emitter will throw on an Index node). Returns `null`
|
|
43
|
-
* to reject.
|
|
44
|
-
*/
|
|
45
|
-
tensorScalarIndexRead?(name: string, idxC: string): string | null;
|
|
46
|
-
/**
|
|
47
|
-
* Emit a call to a scalar math builtin. The backend decides which
|
|
48
|
-
* builtins it supports and how they map to library functions (e.g.
|
|
49
|
-
* JS `Math.sin` vs C `sin`). Return `null` to reject.
|
|
50
|
-
*
|
|
51
|
-
* `name` is the builtin name (e.g. `"sin"`, `"mod"`, `"rem"`);
|
|
52
|
-
* `args` are already-emitted per-element scalar expressions.
|
|
53
|
-
*/
|
|
54
|
-
emitBuiltinCall(name: string, args: string[]): string | null;
|
|
55
|
-
}
|
|
56
|
-
/** Shared walker: emit a JitExpr as a per-element scalar expression. */
|
|
57
|
-
export declare function emitFusedScalarExpr(expr: JitExpr, chainLocals: ReadonlySet<string>, allTensorVars: ReadonlySet<string>, opTarget: ScalarOpTarget, fusedTarget: FusedTarget): string;
|
|
58
|
-
/**
|
|
59
|
-
* Find the first tensor-param name referenced in a chain's assigns.
|
|
60
|
-
* Used by both backends to pick the length-determining tensor.
|
|
61
|
-
*/
|
|
62
|
-
export declare function findTensorParamInChain(chain: FusibleChain, paramTensors: ReadonlySet<string>, allTensorVars: ReadonlySet<string>): string | null;
|
|
63
|
-
/**
|
|
64
|
-
* Collect distinct tensor names referenced in the chain's expression
|
|
65
|
-
* trees that are NOT produced by the chain itself (i.e. read from
|
|
66
|
-
* outside: params or pre-existing locals). Both backends need this to
|
|
67
|
-
* pick a length-reference tensor when no formal param is in the chain.
|
|
68
|
-
*/
|
|
69
|
-
export declare function collectInputTensors(chain: FusibleChain, allTensorVars: ReadonlySet<string>): Set<string>;
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Fusion analysis for JIT backends (shared by JS-JIT and C-JIT).
|
|
3
|
-
*
|
|
4
|
-
* Scans a statement list for runs of tensor element-wise assigns that
|
|
5
|
-
* can be collapsed into a single per-element `for` loop. Each such run
|
|
6
|
-
* is a "fusible chain."
|
|
7
|
-
*
|
|
8
|
-
* A chain breaks on:
|
|
9
|
-
* - control flow (If/For/While)
|
|
10
|
-
* - any non-Assign statement
|
|
11
|
-
* - a tensor assign whose RHS references a tensor that is NOT an input
|
|
12
|
-
* param and NOT previously assigned within the same chain
|
|
13
|
-
* - a scalar assign (left for the per-op emitter)
|
|
14
|
-
*
|
|
15
|
-
* An optional **trailing reduction** is absorbed when the statement
|
|
16
|
-
* immediately after a tensor chain is of the form
|
|
17
|
-
* `acc = acc + reduce(lastChainVar)` or
|
|
18
|
-
* `acc = reduce(lastChainVar)`
|
|
19
|
-
* where `reduce` is sum/prod/max/min/mean/any/all. Absorbing the
|
|
20
|
-
* reduction lets the fused loop emit an inline accumulator instead of
|
|
21
|
-
* materialising the intermediate buffer.
|
|
22
|
-
*/
|
|
23
|
-
import type { JitExpr, JitStmt } from "./jitTypes.js";
|
|
24
|
-
import { BinaryOperation } from "../parser/types.js";
|
|
25
|
-
/** One tensor assign inside a fusible chain. */
|
|
26
|
-
export interface FusedAssign {
|
|
27
|
-
/** Destination tensor variable name. */
|
|
28
|
-
destName: string;
|
|
29
|
-
/** RHS expression tree (all tensor ops are element-wise). */
|
|
30
|
-
expr: JitExpr;
|
|
31
|
-
}
|
|
32
|
-
/** A trailing reduction absorbed into the fused loop. */
|
|
33
|
-
export interface FusedReduction {
|
|
34
|
-
/** Scalar accumulator variable name (e.g. `chain_acc`). */
|
|
35
|
-
accName: string;
|
|
36
|
-
/** Reduction builtin name (e.g. `sum`). */
|
|
37
|
-
reduceName: string;
|
|
38
|
-
/** The tensor variable being reduced (last chain dest). */
|
|
39
|
-
tensorName: string;
|
|
40
|
-
/**
|
|
41
|
-
* When true, the scalar statement is `acc = acc OP reduce(tensor)`,
|
|
42
|
-
* and `accOp` says which binary op combines the old accumulator with
|
|
43
|
-
* the reduction result. When false, it's a plain `acc = reduce(tensor)`.
|
|
44
|
-
*/
|
|
45
|
-
hasAccumulate: boolean;
|
|
46
|
-
accOp?: BinaryOperation;
|
|
47
|
-
}
|
|
48
|
-
/** Describes one fusible chain found in a statement list. */
|
|
49
|
-
export interface FusibleChain {
|
|
50
|
-
/** Index of the first statement in the chain (within the parent list). */
|
|
51
|
-
startIdx: number;
|
|
52
|
-
/** Number of statements consumed (tensor assigns + optional reduction). */
|
|
53
|
-
length: number;
|
|
54
|
-
/** The tensor assigns to fuse. */
|
|
55
|
-
assigns: FusedAssign[];
|
|
56
|
-
/** Optional trailing reduction. */
|
|
57
|
-
reduction?: FusedReduction;
|
|
58
|
-
}
|
|
59
|
-
/**
|
|
60
|
-
* Scan a statement list and return all fusible chains.
|
|
61
|
-
*
|
|
62
|
-
* `paramTensors` is the set of tensor parameter names (input data that
|
|
63
|
-
* will be read via `data[i]` in the fused loop).
|
|
64
|
-
* `allTensorVars` is the full set of tensor-typed variables (params +
|
|
65
|
-
* locals + outputs).
|
|
66
|
-
* `allowedUnaryOps` optionally restricts which tensor unary Call names
|
|
67
|
-
* are fusible. Defaults to `FUSIBLE_TENSOR_UNARY_OPS` (full set).
|
|
68
|
-
* The JS backend passes a restricted set that excludes transcendentals
|
|
69
|
-
* (V8 can't vectorize them, so fusing them is slower than per-op calls).
|
|
70
|
-
*/
|
|
71
|
-
export declare function findFusibleChains(stmts: JitStmt[], paramTensors: ReadonlySet<string>, allTensorVars: ReadonlySet<string>, allowedUnaryOps?: ReadonlySet<string>): FusibleChain[];
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Shared fusible-operation name sets for JIT fusion analysis.
|
|
3
|
-
*
|
|
4
|
-
* Both the C-JIT and JS-JIT fusion paths use these to determine which
|
|
5
|
-
* tensor Call nodes are fusible element-wise unary ops or absorbable
|
|
6
|
-
* trailing reductions. The numeric op codes live in their respective
|
|
7
|
-
* backend files (feasibility.ts for C, jitHelpersTensor.ts for JS).
|
|
8
|
-
*/
|
|
9
|
-
/** Tensor unary builtins fusible into per-element loops. */
|
|
10
|
-
export declare const FUSIBLE_TENSOR_UNARY_OPS: ReadonlySet<string>;
|
|
11
|
-
/**
|
|
12
|
-
* JS-JIT-safe subset: excludes transcendentals (exp, sin, cos, tan, etc.)
|
|
13
|
-
* which V8 can't SIMD-vectorize. Fusing these into a scalar per-element
|
|
14
|
-
* loop is slower than calling libnumbl_ops per-op (which uses -fopenmp-simd).
|
|
15
|
-
* The C-JIT uses the full set because GCC/Clang vectorize via #pragma omp simd.
|
|
16
|
-
*/
|
|
17
|
-
export declare const FUSIBLE_TENSOR_UNARY_OPS_JS: ReadonlySet<string>;
|
|
18
|
-
/**
|
|
19
|
-
* Two-argument tensor element-wise builtins fusible into per-element loops.
|
|
20
|
-
* These are parsed as Call nodes (not Binary nodes) and need separate
|
|
21
|
-
* recognition in isPureElementwise / emitScalarExpr.
|
|
22
|
-
*/
|
|
23
|
-
export declare const FUSIBLE_TENSOR_BINARY_OPS: ReadonlySet<string>;
|
|
24
|
-
/** Tensor reduction builtins absorbable as trailing reductions. */
|
|
25
|
-
export declare const FUSIBLE_TENSOR_REDUCTION_OPS: ReadonlySet<string>;
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Shared heavy-op heuristic for fused-loop emitters (e1 and e2).
|
|
3
|
-
*
|
|
4
|
-
* Counts the number of "expensive" math operations in a JitExpr — the
|
|
5
|
-
* kind of work that's heavy enough per element that OpenMP thread-
|
|
6
|
-
* spawn overhead pays off at N >= 100k. Arithmetic-only chains skip
|
|
7
|
-
* the parallel-for pragma because threads slow them down: the body
|
|
8
|
-
* becomes memory-bandwidth-bound and adding threads only adds overhead.
|
|
9
|
-
*/
|
|
10
|
-
import type { JitExpr } from "./jitTypes.js";
|
|
11
|
-
export declare function countHeavyOps(expr: JitExpr): number;
|
|
12
|
-
/** Minimum element count before `#pragma omp parallel for simd` kicks
|
|
13
|
-
* in. Below this the thread-spawn cost dominates the work.
|
|
14
|
-
* Overridable via `NUMBL_OMP_THRESHOLD` for benchmarks. */
|
|
15
|
-
export declare function ompParallelThreshold(): number;
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* JIT compilation entry point for interpreter function calls.
|
|
3
|
-
*/
|
|
4
|
-
import type { Interpreter } from "../interpreter/interpreter.js";
|
|
5
|
-
import type { FunctionDef } from "../interpreter/types.js";
|
|
6
|
-
export declare const JIT_SKIP: unique symbol;
|
|
7
|
-
export declare function tryJitCall(interp: Interpreter, fn: FunctionDef, args: unknown[], nargout: number): unknown | typeof JIT_SKIP;
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* JIT compilation for inline for/while loops.
|
|
3
|
-
*
|
|
4
|
-
* When the interpreter encounters a for or while loop, these functions
|
|
5
|
-
* attempt to JIT-compile the loop as a synthetic function: inputs are
|
|
6
|
-
* variables from the enclosing scope, outputs are variables assigned
|
|
7
|
-
* inside the loop body. On success the compiled code runs and output
|
|
8
|
-
* values are written back to the interpreter environment.
|
|
9
|
-
*/
|
|
10
|
-
import type { Interpreter } from "../interpreter/interpreter.js";
|
|
11
|
-
import type { Stmt } from "../parser/types.js";
|
|
12
|
-
/**
|
|
13
|
-
* Attempt to JIT-compile and execute a for-loop statement.
|
|
14
|
-
* Returns true if JIT succeeded, false to fall back to interpretation.
|
|
15
|
-
*/
|
|
16
|
-
export declare function tryJitFor(interp: Interpreter, stmt: Stmt & {
|
|
17
|
-
type: "For";
|
|
18
|
-
}): boolean;
|
|
19
|
-
/**
|
|
20
|
-
* Attempt to JIT-compile and execute a while-loop statement.
|
|
21
|
-
* Returns true if JIT succeeded, false to fall back to interpretation.
|
|
22
|
-
*/
|
|
23
|
-
export declare function tryJitWhile(interp: Interpreter, stmt: Stmt & {
|
|
24
|
-
type: "While";
|
|
25
|
-
}): boolean;
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* JIT compilation for the top-level script body (the main workspace).
|
|
3
|
-
*
|
|
4
|
-
* Wraps the list of non-function, non-classdef statements of a script as
|
|
5
|
-
* a synthetic `FunctionDef` whose parameters are the live-in env vars
|
|
6
|
-
* and whose outputs are every variable assigned in the script. On
|
|
7
|
-
* success the compiled code runs once and all output values are written
|
|
8
|
-
* back to the interpreter's workspace env.
|
|
9
|
-
*
|
|
10
|
-
* Mirrors `tryJitLoop` in jitLoop.ts — same lowering, same JS/C backend
|
|
11
|
-
* pipeline, same progressive type widening. The differences:
|
|
12
|
-
* - the synthetic body is the list of stmts directly, not a single For/While
|
|
13
|
-
* - every assigned variable is live-out (the whole workspace is live)
|
|
14
|
-
* - cache key is per-Interpreter (a single script AST per interp run)
|
|
15
|
-
*/
|
|
16
|
-
import type { Interpreter } from "../interpreter/interpreter.js";
|
|
17
|
-
import type { Stmt } from "../parser/types.js";
|
|
18
|
-
/**
|
|
19
|
-
* Attempt to JIT-compile and execute the top-level script body.
|
|
20
|
-
* Returns true if JIT succeeded, false to fall back to interpretation.
|
|
21
|
-
*/
|
|
22
|
-
export declare function tryJitTopLevel(interp: Interpreter, stmts: Stmt[]): boolean;
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Tensor indexing helpers for JIT-compiled code.
|
|
3
|
-
*
|
|
4
|
-
* Three tiers of helpers, from most general to most specialized:
|
|
5
|
-
* 1. Generic (idx1, idx2, idxN) — handle any base type, real or complex
|
|
6
|
-
* 2. Real-tensor fast path (idx1r, idx2r, idx3r) — skip type/imag checks
|
|
7
|
-
* 3. Hoisted-base (idx*r_h, set*r_h) — take pre-extracted data/len/shape
|
|
8
|
-
*
|
|
9
|
-
* All helpers use 1-based MATLAB indexing and include per-dimension bounds
|
|
10
|
-
* checks to ensure consistency with the interpreter.
|
|
11
|
-
*/
|
|
12
|
-
import { type FloatXArrayType, type RuntimeTensor } from "../../runtime/types.js";
|
|
13
|
-
export declare class JitBailToInterpreter extends Error {
|
|
14
|
-
readonly reason: string;
|
|
15
|
-
constructor(reason: string);
|
|
16
|
-
}
|
|
17
|
-
export declare function bce(): never;
|
|
18
|
-
export declare function idx1(base: unknown, i: number): unknown;
|
|
19
|
-
export declare function idx2(base: unknown, ri: number, ci: number): unknown;
|
|
20
|
-
export declare function idxN(base: unknown, indices: number[]): unknown;
|
|
21
|
-
export declare function idx1r(base: RuntimeTensor, i: number): number;
|
|
22
|
-
export declare function idx2r(base: RuntimeTensor, ri: number, ci: number): number;
|
|
23
|
-
export declare function idx3r(base: RuntimeTensor, i1: number, i2: number, i3: number): number;
|
|
24
|
-
export declare function idx1r_h(data: FloatXArrayType, len: number, i: number): number;
|
|
25
|
-
export declare function idx2r_h(data: FloatXArrayType, len: number, rows: number, ri: number, ci: number): number;
|
|
26
|
-
export declare function idx3r_h(data: FloatXArrayType, len: number, d0: number, d1: number, i1: number, i2: number, i3: number): number;
|
|
27
|
-
export declare function set1r_h(data: FloatXArrayType, len: number, i: number, v: number): void;
|
|
28
|
-
export declare function set2r_h(data: FloatXArrayType, len: number, rows: number, ri: number, ci: number, v: number): void;
|
|
29
|
-
export declare function set3r_h(data: FloatXArrayType, len: number, d0: number, d1: number, i1: number, i2: number, i3: number, v: number): void;
|
|
30
|
-
export declare function setRange1r_h(dstData: FloatXArrayType, dstLen: number, dstStart: number, dstEnd: number, srcData: FloatXArrayType, srcLen: number, srcStart: number, srcEnd: number): void;
|
|
31
|
-
export declare function subarrayCopy1r(srcData: FloatXArrayType, srcLen: number, start: number, end: number): RuntimeTensor;
|
|
32
|
-
export declare function subarrayCopy1rRow(srcData: FloatXArrayType, srcLen: number, start: number, end: number): RuntimeTensor;
|
|
33
|
-
export declare function setCol2r_h(dstData: FloatXArrayType, dstRows: number, dstLen: number, col: number, srcData: FloatXArrayType, srcLen: number): void;
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Fused per-element loop emission for the JS-JIT.
|
|
3
|
-
*
|
|
4
|
-
* Given a FusibleChain (from fusion.ts), emits a single block-scoped
|
|
5
|
-
* JavaScript `for` loop that evaluates all the chain's tensor assigns
|
|
6
|
-
* as inline scalar expressions per element — no $h.tAdd / $h.tMul
|
|
7
|
-
* helper calls, no intermediate tensor allocations.
|
|
8
|
-
*
|
|
9
|
-
* Tensor var references become either:
|
|
10
|
-
* - `__<name>_data[__i]` for input params / pre-existing tensors
|
|
11
|
-
* - `__f_<name>` for chain-produced intermediates (scalar local)
|
|
12
|
-
*
|
|
13
|
-
* The optional trailing reduction is absorbed as an inline accumulator
|
|
14
|
-
* inside the same loop.
|
|
15
|
-
*/
|
|
16
|
-
import type { FusibleChain } from "../fusion.js";
|
|
17
|
-
export declare function emitJsFusedChain(lines: string[], indent: string, chain: FusibleChain, allTensorVars: ReadonlySet<string>, paramTensors: ReadonlySet<string>, outputTensorNames: ReadonlySet<string>, _localTensorNames: ReadonlySet<string>, complexTensorNames: ReadonlySet<string>, complexScalarVars: ReadonlySet<string>, mangle: (n: string) => string, experimental?: string, par?: boolean): void;
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Allocate an UNINITIALIZED typed array — skips the zero-fill that
|
|
3
|
-
* `new Float64Array(n)` / `new FloatXArray(n)` perform by default.
|
|
4
|
-
*
|
|
5
|
-
* On Node, `Buffer.allocUnsafe` returns un-zeroed memory; wrapping it
|
|
6
|
-
* in a TypedArray view costs ~10× less than the zero-fill for a 16 MB
|
|
7
|
-
* buffer (~45 µs vs ~470 µs at N=2M doubles).
|
|
8
|
-
*
|
|
9
|
-
* SAFETY CONTRACT (very important):
|
|
10
|
-
* The caller MUST write every element before reading it. Any element
|
|
11
|
-
* that is read before being written will contain arbitrary stale bytes
|
|
12
|
-
* from recently-freed memory. If you cannot guarantee full coverage,
|
|
13
|
-
* use `new Float64Array(n)` / `new FloatXArray(n)` instead.
|
|
14
|
-
*
|
|
15
|
-
* In non-Node environments (browser, Deno without node-compat, …) where
|
|
16
|
-
* `Buffer` is unavailable, we fall back to the zero-filling constructor
|
|
17
|
-
* — still correct, just slower.
|
|
18
|
-
*/
|
|
19
|
-
import { FloatXArray } from "./types.js";
|
|
20
|
-
type FloatXInstance = InstanceType<typeof FloatXArray>;
|
|
21
|
-
export declare function uninitFloat64(n: number): Float64Array<ArrayBuffer>;
|
|
22
|
-
export declare function uninitFloatX(n: number): FloatXInstance;
|
|
23
|
-
export {};
|