webinfer 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +40 -25
  2. package/dist/activation/index.d.ts +30 -0
  3. package/dist/core/context.d.ts +60 -0
  4. package/dist/core/paged-kv-cache.d.ts +33 -0
  5. package/dist/core/tensor.d.ts +38 -19
  6. package/dist/core/types.d.ts +27 -0
  7. package/dist/decode/index.d.ts +65 -0
  8. package/dist/gemm/index.d.ts +25 -0
  9. package/dist/index.d.ts +26 -21
  10. package/dist/index.js +2439 -4872
  11. package/dist/kernels/activation.wgsl.d.ts +14 -0
  12. package/dist/kernels/batch-decode-paged.wgsl.d.ts +12 -0
  13. package/dist/kernels/batch-prefill-paged.wgsl.d.ts +13 -0
  14. package/dist/kernels/decode-attention.wgsl.d.ts +16 -0
  15. package/dist/kernels/gemm.wgsl.d.ts +17 -0
  16. package/dist/kernels/page.wgsl.d.ts +10 -0
  17. package/dist/kernels/prefill-attention.wgsl.d.ts +17 -0
  18. package/dist/kernels/rmsnorm.wgsl.d.ts +10 -0
  19. package/dist/kernels/rope.wgsl.d.ts +19 -0
  20. package/dist/kernels/sampling.wgsl.d.ts +23 -0
  21. package/dist/norm/index.d.ts +43 -0
  22. package/dist/page/index.d.ts +21 -0
  23. package/dist/prefill/index.d.ts +69 -0
  24. package/dist/rope/index.d.ts +37 -0
  25. package/dist/sampling/index.d.ts +53 -4
  26. package/package.json +1 -1
  27. package/dist/attention/block-sparse/format.d.ts +0 -52
  28. package/dist/attention/block-sparse/patterns/causal.d.ts +0 -16
  29. package/dist/attention/block-sparse/patterns/sliding.d.ts +0 -22
  30. package/dist/attention/block-sparse/patterns/tree.d.ts +0 -65
  31. package/dist/attention/cascaded-inference.d.ts +0 -29
  32. package/dist/attention/flash-attention.d.ts +0 -30
  33. package/dist/attention/index.d.ts +0 -118
  34. package/dist/attention/paged-attention.d.ts +0 -40
  35. package/dist/attention/paged-kv/block-manager.d.ts +0 -102
  36. package/dist/attention/paged-kv/index.d.ts +0 -5
  37. package/dist/attention/paged-kv/page-table.d.ts +0 -165
  38. package/dist/attention/scheduler.d.ts +0 -40
  39. package/dist/core/buffer-pool.d.ts +0 -18
  40. package/dist/core/device.d.ts +0 -23
  41. package/dist/core/tdr.d.ts +0 -114
  42. package/dist/inference/engine.d.ts +0 -69
  43. package/dist/inference/generate.d.ts +0 -30
  44. package/dist/inference/index.d.ts +0 -7
  45. package/dist/inference/types.d.ts +0 -161
  46. package/dist/jit/compiler.d.ts +0 -23
  47. package/dist/jit/kernel-cache.d.ts +0 -21
  48. package/dist/model/gguf.d.ts +0 -90
  49. package/dist/model/index.d.ts +0 -16
  50. package/dist/model/safetensors.d.ts +0 -38
  51. package/dist/model/types.d.ts +0 -182
  52. package/dist/ops/activations.d.ts +0 -43
  53. package/dist/ops/elementwise.d.ts +0 -38
  54. package/dist/ops/embedding.d.ts +0 -30
  55. package/dist/ops/matmul.d.ts +0 -21
  56. package/dist/ops/normalization.d.ts +0 -63
  57. package/dist/ops/reshape.d.ts +0 -39
  58. package/dist/ops/rope.d.ts +0 -32
  59. package/dist/ops/softmax.d.ts +0 -18
  60. package/dist/quantization/index.d.ts +0 -6
  61. package/dist/quantization/qmatmul.d.ts +0 -38
  62. package/dist/quantization/quantize.d.ts +0 -52
  63. package/dist/sampling/beam-search.d.ts +0 -87
  64. package/dist/sampling/sampler.d.ts +0 -72
  65. package/dist/sampling/speculative.d.ts +0 -65
  66. package/dist/sampling/top-k.d.ts +0 -24
  67. package/dist/sampling/top-p.d.ts +0 -14
  68. package/dist/tvm/adapter.d.ts +0 -81
  69. package/dist/tvm/index.d.ts +0 -8
  70. package/dist/tvm/ops.d.ts +0 -26
  71. package/dist/tvm/types.d.ts +0 -35
@@ -1,35 +0,0 @@
1
- /**
2
- * TVM Runtime Type Re-exports
3
- * Re-exports types from @mlc-ai/web-runtime (tvmjs)
4
- */
5
- export type { Instance as TVMInstance, Tensor as TVMTensor, PackedFunc, DLDevice, DLDataType, TVMObject, TVMArray, Module as TVMModule, VirtualMachine, Scalar, GPUDeviceDetectOutput, } from "@mlc-ai/web-runtime";
6
- export { instantiate, detectGPUDevice } from "@mlc-ai/web-runtime";
7
- import type { Tensor } from "@mlc-ai/web-runtime";
8
- /**
9
- * Extended WebGPU context interface for buffer access
10
- * Note: bufferTable and gpuBufferFromPtr are private in tvmjs
11
- * This interface is for internal use to attempt zero-copy when possible
12
- */
13
- export interface ExtendedWebGPUContext {
14
- device: GPUDevice;
15
- sync(): Promise<void>;
16
- bufferTable?: (GPUBuffer | undefined)[];
17
- gpuBufferFromPtr?(ptr: number): GPUBuffer;
18
- }
19
- /**
20
- * Map webinfer dtype to TVM dtype string
21
- */
22
- export declare function toTVMDType(dtype: "f32" | "f16" | "i32" | "u32"): string;
23
- /**
24
- * Map TVM dtype string to webinfer dtype
25
- */
26
- export declare function fromTVMDType(dtype: string): "f32" | "f16" | "i32" | "u32";
27
- /**
28
- * Check if a TVM tensor is on WebGPU device
29
- */
30
- export declare function isWebGPUTensor(tensor: Tensor): boolean;
31
- /**
32
- * Get the data pointer from a TVM tensor
33
- * This is used to look up the GPUBuffer in WebGPUContext.bufferTable
34
- */
35
- export declare function getTensorDataPtr(tensor: Tensor): number;