webinfer 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -25
- package/dist/activation/index.d.ts +30 -0
- package/dist/core/context.d.ts +60 -0
- package/dist/core/paged-kv-cache.d.ts +33 -0
- package/dist/core/tensor.d.ts +38 -19
- package/dist/core/types.d.ts +27 -0
- package/dist/decode/index.d.ts +65 -0
- package/dist/gemm/index.d.ts +25 -0
- package/dist/index.d.ts +26 -21
- package/dist/index.js +2439 -4872
- package/dist/kernels/activation.wgsl.d.ts +14 -0
- package/dist/kernels/batch-decode-paged.wgsl.d.ts +12 -0
- package/dist/kernels/batch-prefill-paged.wgsl.d.ts +13 -0
- package/dist/kernels/decode-attention.wgsl.d.ts +16 -0
- package/dist/kernels/gemm.wgsl.d.ts +17 -0
- package/dist/kernels/page.wgsl.d.ts +10 -0
- package/dist/kernels/prefill-attention.wgsl.d.ts +17 -0
- package/dist/kernels/rmsnorm.wgsl.d.ts +10 -0
- package/dist/kernels/rope.wgsl.d.ts +19 -0
- package/dist/kernels/sampling.wgsl.d.ts +23 -0
- package/dist/norm/index.d.ts +43 -0
- package/dist/page/index.d.ts +21 -0
- package/dist/prefill/index.d.ts +69 -0
- package/dist/rope/index.d.ts +37 -0
- package/dist/sampling/index.d.ts +53 -4
- package/package.json +1 -1
- package/dist/attention/block-sparse/format.d.ts +0 -52
- package/dist/attention/block-sparse/patterns/causal.d.ts +0 -16
- package/dist/attention/block-sparse/patterns/sliding.d.ts +0 -22
- package/dist/attention/block-sparse/patterns/tree.d.ts +0 -65
- package/dist/attention/cascaded-inference.d.ts +0 -29
- package/dist/attention/flash-attention.d.ts +0 -30
- package/dist/attention/index.d.ts +0 -118
- package/dist/attention/paged-attention.d.ts +0 -40
- package/dist/attention/paged-kv/block-manager.d.ts +0 -102
- package/dist/attention/paged-kv/index.d.ts +0 -5
- package/dist/attention/paged-kv/page-table.d.ts +0 -165
- package/dist/attention/scheduler.d.ts +0 -40
- package/dist/core/buffer-pool.d.ts +0 -18
- package/dist/core/device.d.ts +0 -23
- package/dist/core/tdr.d.ts +0 -114
- package/dist/inference/engine.d.ts +0 -69
- package/dist/inference/generate.d.ts +0 -30
- package/dist/inference/index.d.ts +0 -7
- package/dist/inference/types.d.ts +0 -161
- package/dist/jit/compiler.d.ts +0 -23
- package/dist/jit/kernel-cache.d.ts +0 -21
- package/dist/model/gguf.d.ts +0 -90
- package/dist/model/index.d.ts +0 -16
- package/dist/model/safetensors.d.ts +0 -38
- package/dist/model/types.d.ts +0 -182
- package/dist/ops/activations.d.ts +0 -43
- package/dist/ops/elementwise.d.ts +0 -38
- package/dist/ops/embedding.d.ts +0 -30
- package/dist/ops/matmul.d.ts +0 -21
- package/dist/ops/normalization.d.ts +0 -63
- package/dist/ops/reshape.d.ts +0 -39
- package/dist/ops/rope.d.ts +0 -32
- package/dist/ops/softmax.d.ts +0 -18
- package/dist/quantization/index.d.ts +0 -6
- package/dist/quantization/qmatmul.d.ts +0 -38
- package/dist/quantization/quantize.d.ts +0 -52
- package/dist/sampling/beam-search.d.ts +0 -87
- package/dist/sampling/sampler.d.ts +0 -72
- package/dist/sampling/speculative.d.ts +0 -65
- package/dist/sampling/top-k.d.ts +0 -24
- package/dist/sampling/top-p.d.ts +0 -14
- package/dist/tvm/adapter.d.ts +0 -81
- package/dist/tvm/index.d.ts +0 -8
- package/dist/tvm/ops.d.ts +0 -26
- package/dist/tvm/types.d.ts +0 -35
package/dist/tvm/types.d.ts
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* TVM Runtime Type Re-exports
|
|
3
|
-
* Re-exports types from @mlc-ai/web-runtime (tvmjs)
|
|
4
|
-
*/
|
|
5
|
-
export type { Instance as TVMInstance, Tensor as TVMTensor, PackedFunc, DLDevice, DLDataType, TVMObject, TVMArray, Module as TVMModule, VirtualMachine, Scalar, GPUDeviceDetectOutput, } from "@mlc-ai/web-runtime";
|
|
6
|
-
export { instantiate, detectGPUDevice } from "@mlc-ai/web-runtime";
|
|
7
|
-
import type { Tensor } from "@mlc-ai/web-runtime";
|
|
8
|
-
/**
|
|
9
|
-
* Extended WebGPU context interface for buffer access
|
|
10
|
-
* Note: bufferTable and gpuBufferFromPtr are private in tvmjs
|
|
11
|
-
* This interface is for internal use to attempt zero-copy when possible
|
|
12
|
-
*/
|
|
13
|
-
export interface ExtendedWebGPUContext {
|
|
14
|
-
device: GPUDevice;
|
|
15
|
-
sync(): Promise<void>;
|
|
16
|
-
bufferTable?: (GPUBuffer | undefined)[];
|
|
17
|
-
gpuBufferFromPtr?(ptr: number): GPUBuffer;
|
|
18
|
-
}
|
|
19
|
-
/**
|
|
20
|
-
* Map webinfer dtype to TVM dtype string
|
|
21
|
-
*/
|
|
22
|
-
export declare function toTVMDType(dtype: "f32" | "f16" | "i32" | "u32"): string;
|
|
23
|
-
/**
|
|
24
|
-
* Map TVM dtype string to webinfer dtype
|
|
25
|
-
*/
|
|
26
|
-
export declare function fromTVMDType(dtype: string): "f32" | "f16" | "i32" | "u32";
|
|
27
|
-
/**
|
|
28
|
-
* Check if a TVM tensor is on WebGPU device
|
|
29
|
-
*/
|
|
30
|
-
export declare function isWebGPUTensor(tensor: Tensor): boolean;
|
|
31
|
-
/**
|
|
32
|
-
* Get the data pointer from a TVM tensor
|
|
33
|
-
* This is used to look up the GPUBuffer in WebGPUContext.bufferTable
|
|
34
|
-
*/
|
|
35
|
-
export declare function getTensorDataPtr(tensor: Tensor): number;
|