compressed-tensors 0.9.5a20250509__tar.gz → 0.9.5a20250513__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. {compressed_tensors-0.9.5a20250509/src/compressed_tensors.egg-info → compressed_tensors-0.9.5a20250513}/PKG-INFO +1 -1
  2. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/quantization/quant_args.py +55 -3
  3. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/quantization/quant_scheme.py +13 -1
  4. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/version.py +1 -1
  5. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  6. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/.github/.gitkeep +0 -0
  7. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/.github/actions/test/action.yml +0 -0
  8. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/.github/scripts/step-status +0 -0
  9. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/.github/workflows/build-test.yml +0 -0
  10. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/.github/workflows/build.yml +0 -0
  11. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/.github/workflows/report.yml +0 -0
  12. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/.github/workflows/test-check.yaml +0 -0
  13. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/.github/workflows/test.yml +0 -0
  14. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/.github/workflows/trigger-all.yml +0 -0
  15. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/.github/workflows/upload.yml +0 -0
  16. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/.gitignore +0 -0
  17. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/LICENSE +0 -0
  18. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/Makefile +0 -0
  19. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/README.md +0 -0
  20. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  21. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/examples/bit_packing/int4_config.json +0 -0
  22. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/examples/bitmask_compression.ipynb +0 -0
  23. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  24. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  25. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/examples/llama_1.1b/example_quant_config.json +0 -0
  26. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  27. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/examples/quantize_and_pack_int4.ipynb +0 -0
  28. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/pyproject.toml +0 -0
  29. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/setup.cfg +0 -0
  30. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/setup.py +0 -0
  31. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/__init__.py +0 -0
  32. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/README.md +0 -0
  33. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/__init__.py +0 -0
  34. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/base.py +0 -0
  35. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/__init__.py +0 -0
  36. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/base.py +0 -0
  37. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/helpers.py +0 -0
  38. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  39. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  40. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  41. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  42. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  43. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
  44. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  45. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  46. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  47. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  48. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  49. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  50. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  51. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  52. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/config/__init__.py +0 -0
  53. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/config/base.py +0 -0
  54. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/config/dense.py +0 -0
  55. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  56. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  57. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/linear/__init__.py +0 -0
  58. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  59. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/quantization/__init__.py +0 -0
  60. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  61. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  62. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  63. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  64. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  65. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  66. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/quantization/quant_config.py +0 -0
  67. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  68. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  69. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/registry/__init__.py +0 -0
  70. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/registry/registry.py +0 -0
  71. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/utils/__init__.py +0 -0
  72. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/utils/helpers.py +0 -0
  73. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/utils/offload.py +0 -0
  74. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/utils/permutations_24.py +0 -0
  75. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/utils/permute.py +0 -0
  76. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  77. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  78. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  79. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  80. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors.egg-info/requires.txt +0 -0
  81. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  82. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/__init__.py +0 -0
  83. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/conftest.py +0 -0
  84. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/__init__.py +0 -0
  85. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/model_compressors/__init__.py +0 -0
  86. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  87. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  88. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  89. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  90. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  91. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  92. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  93. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  94. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  95. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  96. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  97. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_configs/__init__.py +0 -0
  98. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_configs/test_base.py +0 -0
  99. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  100. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_linear/__init__.py +0 -0
  101. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_linear/test_compressed_linear.py +0 -0
  102. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/__init__.py +0 -0
  103. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/lifecycle/__init__.py +0 -0
  104. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/lifecycle/conftest.py +0 -0
  105. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  106. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  107. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  108. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  109. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  110. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  111. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  112. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/test_configs/__init__.py +0 -0
  113. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  114. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  115. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/test_quant_args.py +0 -0
  116. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/test_quant_config.py +0 -0
  117. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/test_quant_scheme.py +0 -0
  118. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  119. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_registry.py +0 -0
  120. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_utils/__init__.py +0 -0
  121. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_utils/test_helpers.py +0 -0
  122. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_utils/test_offload.py +0 -0
  123. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/test_utils/test_safetensors_load.py +0 -0
  124. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/tests/testing_utils.py +0 -0
  125. {compressed_tensors-0.9.5a20250509 → compressed_tensors-0.9.5a20250513}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.5a20250509
3
+ Version: 0.9.5a20250513
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -24,6 +24,8 @@ from pydantic import BaseModel, Field, field_validator, model_validator
24
24
 
25
25
  __all__ = [
26
26
  "FP8_DTYPE",
27
+ "FP8_E4M3_DATA",
28
+ "FP4_E2M1_DATA",
27
29
  "QuantizationType",
28
30
  "QuantizationStrategy",
29
31
  "QuantizationArgs",
@@ -31,6 +33,48 @@ __all__ = [
31
33
  "ActivationOrdering",
32
34
  ]
33
35
 
36
+
37
+ class FloatArgs:
38
+ exponent: int
39
+ mantissa: int
40
+ bits: int
41
+ max: float
42
+ min: float
43
+ dtype: Optional[torch.dtype] = None
44
+
45
+
46
+ class FP4_E2M1_DATA(FloatArgs):
47
+ exponent = 2
48
+ mantissa = 1
49
+ bits = 4
50
+ max = 6.0
51
+ min = -6.0
52
+
53
+ @staticmethod
54
+ def cast_to_fp4(x):
55
+ sign = torch.sign(x)
56
+ x = torch.abs(x)
57
+ x[(x >= 0.0) & (x <= 0.25)] = 0.0
58
+ x[(x > 0.25) & (x < 0.75)] = 0.5
59
+ x[(x >= 0.75) & (x <= 1.25)] = 1.0
60
+ x[(x > 1.25) & (x < 1.75)] = 1.5
61
+ x[(x >= 1.75) & (x <= 2.5)] = 2.0
62
+ x[(x > 2.5) & (x < 3.5)] = 3.0
63
+ x[(x >= 3.5) & (x <= 5.0)] = 4.0
64
+ x[x > 5.0] = 6.0
65
+ return x * sign
66
+
67
+
68
+ class FP8_E4M3_DATA(FloatArgs):
69
+ exponent = 4
70
+ mantissa = 3
71
+ bits = 8
72
+ max = torch.finfo(torch.float8_e4m3fn).max
73
+ min = torch.finfo(torch.float8_e4m3fn).min
74
+ dtype = torch.float8_e4m3fn
75
+
76
+
77
+ # TODO: Remove soon in favour of a more descriptive FloatArgs
34
78
  FP8_DTYPE = torch.float8_e4m3fn
35
79
 
36
80
 
@@ -162,7 +206,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
162
206
  return value
163
207
 
164
208
  @model_validator(mode="after")
165
- def validate_model_after(model: "QuantizationArgs") -> Dict[str, Any]:
209
+ def validate_model_after(model: "QuantizationArgs") -> "QuantizationArgs":
166
210
  # extract user-passed values from dictionary
167
211
  strategy = model.strategy
168
212
  group_size = model.group_size
@@ -234,7 +278,10 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
234
278
 
235
279
  def pytorch_dtype(self) -> torch.dtype:
236
280
  if self.type == QuantizationType.FLOAT:
237
- return FP8_DTYPE
281
+ if self.num_bits == 8:
282
+ return FP8_E4M3_DATA.dtype
283
+ else:
284
+ raise NotImplementedError("Only num_bits in (8) are supported")
238
285
  elif self.type == QuantizationType.INT:
239
286
  if self.num_bits <= 8:
240
287
  return torch.int8
@@ -263,7 +310,12 @@ def round_to_quantized_type(
263
310
  """
264
311
  original_dtype = tensor.dtype
265
312
  if args.type == QuantizationType.FLOAT:
266
- rounded = tensor.to(FP8_DTYPE)
313
+ if args.num_bits == 8:
314
+ rounded = tensor.to(FP8_E4M3_DATA.dtype)
315
+ elif args.num_bits == 4:
316
+ rounded = FP4_E2M1_DATA.cast_to_fp4(tensor)
317
+ else:
318
+ raise NotImplementedError("Only num_bits in (4, 8) are supported")
267
319
  elif args.type == QuantizationType.INT:
268
320
  rounded = torch.round(tensor)
269
321
  else:
@@ -48,7 +48,7 @@ class QuantizationScheme(BaseModel):
48
48
  output_activations: Optional[QuantizationArgs] = None
49
49
 
50
50
  @model_validator(mode="after")
51
- def validate_model_after(model: "QuantizationArgs") -> Dict[str, Any]:
51
+ def validate_model_after(model: "QuantizationScheme") -> "QuantizationScheme":
52
52
  inputs = model.input_activations
53
53
  outputs = model.output_activations
54
54
 
@@ -100,6 +100,17 @@ def is_preset_scheme(name: str) -> bool:
100
100
 
101
101
  UNQUANTIZED = dict()
102
102
 
103
+ NVFP4A16 = dict(
104
+ weights=QuantizationArgs(
105
+ num_bits=4,
106
+ type=QuantizationType.FLOAT,
107
+ strategy=QuantizationStrategy.GROUP,
108
+ symmetric=True,
109
+ dynamic=False,
110
+ group_size=16,
111
+ )
112
+ )
113
+
103
114
  # 8 bit integer weights and 8 bit activations quantization
104
115
  INT8_W8A8 = dict(
105
116
  weights=QuantizationArgs(
@@ -225,4 +236,5 @@ PRESET_SCHEMES = {
225
236
  # Float weight and activation schemes
226
237
  "FP8": FP8,
227
238
  "FP8_DYNAMIC": FP8_DYNAMIC,
239
+ "NVFP4A16": NVFP4A16,
228
240
  }
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.9.5.a20250509'
20
+ __version__ = version = '0.9.5.a20250513'
21
21
  __version_tuple__ = version_tuple = (0, 9, 5)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.5a20250509
3
+ Version: 0.9.5a20250513
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.