webinfer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/LICENSE +201 -0
  2. package/dist/attention/block-sparse/format.d.ts +52 -0
  3. package/dist/attention/block-sparse/patterns/causal.d.ts +16 -0
  4. package/dist/attention/block-sparse/patterns/sliding.d.ts +22 -0
  5. package/dist/attention/flash-attention.d.ts +30 -0
  6. package/dist/attention/index.d.ts +9 -0
  7. package/dist/attention/paged-kv/block-manager.d.ts +102 -0
  8. package/dist/attention/paged-kv/index.d.ts +5 -0
  9. package/dist/attention/paged-kv/page-table.d.ts +99 -0
  10. package/dist/attention/scheduler.d.ts +40 -0
  11. package/dist/core/buffer-pool.d.ts +18 -0
  12. package/dist/core/device.d.ts +23 -0
  13. package/dist/core/tensor.d.ts +25 -0
  14. package/dist/index.d.ts +22 -0
  15. package/dist/index.js +4228 -0
  16. package/dist/inference/engine.d.ts +69 -0
  17. package/dist/inference/generate.d.ts +30 -0
  18. package/dist/inference/index.d.ts +7 -0
  19. package/dist/inference/types.d.ts +161 -0
  20. package/dist/jit/compiler.d.ts +23 -0
  21. package/dist/jit/kernel-cache.d.ts +21 -0
  22. package/dist/model/gguf.d.ts +90 -0
  23. package/dist/model/index.d.ts +16 -0
  24. package/dist/model/safetensors.d.ts +38 -0
  25. package/dist/model/types.d.ts +182 -0
  26. package/dist/ops/activations.d.ts +43 -0
  27. package/dist/ops/elementwise.d.ts +38 -0
  28. package/dist/ops/embedding.d.ts +30 -0
  29. package/dist/ops/matmul.d.ts +21 -0
  30. package/dist/ops/normalization.d.ts +24 -0
  31. package/dist/ops/reshape.d.ts +39 -0
  32. package/dist/ops/rope.d.ts +32 -0
  33. package/dist/ops/softmax.d.ts +18 -0
  34. package/dist/quantization/index.d.ts +6 -0
  35. package/dist/quantization/qmatmul.d.ts +38 -0
  36. package/dist/quantization/quantize.d.ts +52 -0
  37. package/dist/sampling/index.d.ts +6 -0
  38. package/dist/sampling/sampler.d.ts +39 -0
  39. package/dist/sampling/top-k.d.ts +24 -0
  40. package/dist/sampling/top-p.d.ts +14 -0
  41. package/package.json +54 -0
@@ -0,0 +1,22 @@
1
+ /**
2
+ * WebInfer - High-performance LLM inference kernels for WebGPU
3
+ * "The cuDNN/FlashInfer of WebGPU"
4
+ */
5
+ export { WebInferDevice, type DeviceInfo } from "./core/device.ts";
6
+ export { Tensor, type DType } from "./core/tensor.ts";
7
+ export { BufferPool } from "./core/buffer-pool.ts";
8
+ export { KernelCache, type CacheStats } from "./jit/kernel-cache.ts";
9
+ export { WGSLCompiler, type MatMulConfig } from "./jit/compiler.ts";
10
+ export { matmul, matmulCPU, getMatMulCacheStats } from "./ops/matmul.ts";
11
+ export { layerNorm, layerNormCPU, rmsNorm, rmsNormCPU, } from "./ops/normalization.ts";
12
+ export { rope, ropeCPU, computeRoPEFrequencies, type RoPEConfig, } from "./ops/rope.ts";
13
+ export { gelu, geluCPU, geluExactCPU, silu, siluCPU, relu, reluCPU, sigmoidCPU, } from "./ops/activations.ts";
14
+ export { softmaxGPU, softmaxCPU, logSoftmaxCPU } from "./ops/softmax.ts";
15
+ export { add, addCPU, mul, mulCPU, scale, scaleCPU, addScalarCPU, fmaCPU, } from "./ops/elementwise.ts";
16
+ export { embedding, embeddingCPU, batchedEmbeddingCPU, } from "./ops/embedding.ts";
17
+ export { transpose2D, transpose2DCPU, transposeCPU, reshapeCPU, permuteCPU, } from "./ops/reshape.ts";
18
+ export { quantizeToInt8, quantizeToInt4, dequantizeInt8, dequantizeInt4, quantizationError, getMemorySavings, qmatmulInt8CPU, qmatmulInt4CPU, qmatmulInt8BlockCPU, estimateQMatMulFlops, estimateQMatMulBandwidth, type QuantConfig, type QuantizedTensor, } from "./quantization/index.ts";
19
+ export { flashAttention, attentionCPU, type AttentionConfig, buildBlockSparseCSR, getSparsityRatio, estimateMemorySavings, type BlockSparseCSR, type AttentionPattern, buildCausalMask, getCausalSparsity, buildSlidingWindowMask, buildCausalSlidingWindowMask, getSlidingWindowSparsity, AttentionScheduler, type ChunkPlan, PagedKVCache, type PagedKVCacheConfig, type SequenceEntry, BlockManager, ContinuousBatchScheduler, type BlockManagerConfig, type AllocationPolicy, type AllocationRequest, } from "./attention/index.ts";
20
+ export { topK, topKCPU, topKFilter, topPFilter, topPFilterCPU, sample, sampleCPU, sampleGreedy, sampleFromProbs, softmax, applyRepetitionPenalty, type SamplingConfig, } from "./sampling/index.ts";
21
+ export { type ModelFormat, type SafetensorsDType, GGUFQuantType, GGUFMetadataValueType, type TensorInfo, type SafetensorsHeader, type ModelMetadata, type LoadedModel, type LoadOptions, parseSafetensorsHeader, loadSafetensors, loadSafetensorsFromUrl, isSafetensors, parseGGUFHeader, loadGGUF, loadGGUFFromUrl, loadGGUFTensor, isGGUF, loadModel, } from "./model/index.ts";
22
+ export { type ModelConfig, type InferenceConfig, type GenerationConfig, type GenerationResult, type StreamToken, type FinishReason, type ForwardResult, DEFAULT_GENERATION_CONFIG, normalizeGenerationConfig, InferenceEngine, generate, generateStream, greedyDecode, sampleNextToken, } from "./inference/index.ts";