compressed-tensors 0.9.5a20250502__tar.gz → 0.9.5a20250509__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. {compressed_tensors-0.9.5a20250502/src/compressed_tensors.egg-info → compressed_tensors-0.9.5a20250509}/PKG-INFO +1 -1
  2. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +2 -2
  3. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +1 -0
  4. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/quantized_compressors/base.py +9 -9
  5. compressed_tensors-0.9.5a20250509/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +190 -0
  6. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/config/base.py +1 -0
  7. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/utils/safetensors_load.py +10 -10
  8. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/version.py +1 -1
  9. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  10. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors.egg-info/SOURCES.txt +2 -0
  11. compressed_tensors-0.9.5a20250509/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +43 -0
  12. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/.github/.gitkeep +0 -0
  13. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/.github/actions/test/action.yml +0 -0
  14. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/.github/scripts/step-status +0 -0
  15. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/.github/workflows/build-test.yml +0 -0
  16. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/.github/workflows/build.yml +0 -0
  17. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/.github/workflows/report.yml +0 -0
  18. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/.github/workflows/test-check.yaml +0 -0
  19. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/.github/workflows/test.yml +0 -0
  20. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/.github/workflows/trigger-all.yml +0 -0
  21. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/.github/workflows/upload.yml +0 -0
  22. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/.gitignore +0 -0
  23. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/LICENSE +0 -0
  24. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/Makefile +0 -0
  25. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/README.md +0 -0
  26. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  27. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/examples/bit_packing/int4_config.json +0 -0
  28. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/examples/bitmask_compression.ipynb +0 -0
  29. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  30. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  31. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/examples/llama_1.1b/example_quant_config.json +0 -0
  32. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  33. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/examples/quantize_and_pack_int4.ipynb +0 -0
  34. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/pyproject.toml +0 -0
  35. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/setup.cfg +0 -0
  36. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/setup.py +0 -0
  37. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/__init__.py +0 -0
  38. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/README.md +0 -0
  39. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/__init__.py +0 -0
  40. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/base.py +0 -0
  41. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/__init__.py +0 -0
  42. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/base.py +0 -0
  43. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/helpers.py +0 -0
  44. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  45. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  46. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  47. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  48. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  49. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  50. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  51. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  52. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  53. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  54. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/config/__init__.py +0 -0
  55. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/config/dense.py +0 -0
  56. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  57. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  58. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/linear/__init__.py +0 -0
  59. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  60. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/quantization/__init__.py +0 -0
  61. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  62. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  63. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  64. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  65. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  66. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  67. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/quantization/quant_args.py +0 -0
  68. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/quantization/quant_config.py +0 -0
  69. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  70. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  71. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  72. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/registry/__init__.py +0 -0
  73. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/registry/registry.py +0 -0
  74. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/utils/__init__.py +0 -0
  75. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/utils/helpers.py +0 -0
  76. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/utils/offload.py +0 -0
  77. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/utils/permutations_24.py +0 -0
  78. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/utils/permute.py +0 -0
  79. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  80. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  81. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors.egg-info/requires.txt +0 -0
  82. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  83. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/__init__.py +0 -0
  84. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/conftest.py +0 -0
  85. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_compressors/__init__.py +0 -0
  86. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_compressors/model_compressors/__init__.py +0 -0
  87. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  88. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  89. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  90. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  91. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  92. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  93. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  94. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  95. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  96. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  97. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_configs/__init__.py +0 -0
  98. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_configs/test_base.py +0 -0
  99. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  100. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_linear/__init__.py +0 -0
  101. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_linear/test_compressed_linear.py +0 -0
  102. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/__init__.py +0 -0
  103. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/lifecycle/__init__.py +0 -0
  104. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/lifecycle/conftest.py +0 -0
  105. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  106. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  107. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  108. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  109. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  110. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  111. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  112. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/test_configs/__init__.py +0 -0
  113. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  114. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  115. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/test_quant_args.py +0 -0
  116. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/test_quant_config.py +0 -0
  117. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/test_quant_scheme.py +0 -0
  118. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  119. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_registry.py +0 -0
  120. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_utils/__init__.py +0 -0
  121. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_utils/test_helpers.py +0 -0
  122. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_utils/test_offload.py +0 -0
  123. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/test_utils/test_safetensors_load.py +0 -0
  124. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/tests/testing_utils.py +0 -0
  125. {compressed_tensors-0.9.5a20250502 → compressed_tensors-0.9.5a20250509}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.5a20250502
3
+ Version: 0.9.5a20250509
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -573,8 +573,8 @@ class ModelCompressor:
573
573
  :param model: The model whose weights are to be updated.
574
574
  """
575
575
 
576
- for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
577
- module = operator.attrgetter(name)(model)
576
+ for mod_path, data in tqdm(dense_weight_generator, desc="Decompressing model"):
577
+ module = operator.attrgetter(mod_path)(model)
578
578
 
579
579
  params_device = next(module.parameters()).device
580
580
  device = "cpu" if has_offloaded_params(module) else params_device
@@ -15,4 +15,5 @@
15
15
 
16
16
  from .base import *
17
17
  from .naive_quantized import *
18
+ from .nvfp4_quantized import *
18
19
  from .pack_quantized import *
@@ -195,33 +195,33 @@ class BaseQuantizationCompressor(BaseCompressor):
195
195
  weight_mappings = get_nested_weight_mappings(
196
196
  path_to_model, self.compression_param_names
197
197
  )
198
- for weight_name in weight_mappings.keys():
198
+ for module_path in weight_mappings.keys():
199
199
  weight_data = {}
200
- for param_name, safe_path in weight_mappings[weight_name].items():
201
- full_name = merge_names(weight_name, param_name)
200
+ for param_name, safe_path in weight_mappings[module_path].items():
201
+ full_name = merge_names(module_path, param_name)
202
202
  with safe_open(safe_path, framework="pt", device=device) as f:
203
203
  weight_data[param_name] = f.get_tensor(full_name)
204
204
  if "weight_scale" in weight_data:
205
- quant_args = names_to_scheme[weight_name].weights
205
+ quant_args = names_to_scheme[module_path].weights
206
206
  decompressed = self.decompress_weight(
207
207
  compressed_data=weight_data, quantization_args=quant_args
208
208
  )
209
209
  weight_data["weight"] = decompressed
210
- yield weight_name, weight_data
210
+ yield module_path, weight_data
211
211
 
212
212
  def _decompress_from_state_dict(self, state_dict, names_to_scheme):
213
213
  weight_mappings = get_nested_mappings_from_state_dict(
214
214
  state_dict, self.compression_param_names
215
215
  )
216
- for weight_name in weight_mappings.keys():
216
+ for module_path in weight_mappings.keys():
217
217
  weight_data = {}
218
- for param_name, param_value in weight_mappings[weight_name].items():
218
+ for param_name, param_value in weight_mappings[module_path].items():
219
219
  weight_data[param_name] = param_value
220
220
 
221
221
  if "weight_scale" in weight_data:
222
- quant_args = names_to_scheme[weight_name]
222
+ quant_args = names_to_scheme[module_path]
223
223
  decompressed = self.decompress_weight(
224
224
  compressed_data=weight_data, quantization_args=quant_args
225
225
  )
226
226
  weight_data["weight"] = decompressed
227
- yield weight_name, weight_data
227
+ yield module_path, weight_data
@@ -0,0 +1,190 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from typing import Dict, Optional, Tuple
17
+
18
+ import numpy
19
+ import torch
20
+ from compressed_tensors.compressors.base import BaseCompressor
21
+ from compressed_tensors.compressors.quantized_compressors.base import (
22
+ BaseQuantizationCompressor,
23
+ )
24
+ from compressed_tensors.config import CompressionFormat
25
+ from compressed_tensors.quantization import QuantizationArgs
26
+ from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
27
+ from torch import Tensor
28
+
29
+
30
+ __all__ = ["pack_fp4_to_uint8", "unpack_fp4_from_uint8"]
31
+
32
+ FLOAT_TO_E2M1 = [
33
+ 0.0,
34
+ 0.5,
35
+ 1.0,
36
+ 1.5,
37
+ 2.0,
38
+ 3.0,
39
+ 4.0,
40
+ 6.0,
41
+ ]
42
+
43
+
44
+ @BaseCompressor.register(name=CompressionFormat.nvfp4_pack_quantized.value)
45
+ class NVFP4PackedCompressor(BaseQuantizationCompressor):
46
+ """
47
+ Implements compression of FP4 values. Weights of each quantized layer
48
+ are packed into uint8. Only supports symmetric weight compression for now.
49
+ """
50
+
51
+ @property
52
+ def compression_param_names(self) -> Tuple[str]:
53
+ """
54
+ Returns a tuple of compression parameter names introduced by
55
+ the compressor during compression
56
+ """
57
+ return (
58
+ "weight_packed",
59
+ "weight_scale",
60
+ "weight_zero_point",
61
+ "weight_global_scale",
62
+ )
63
+
64
+ def compress_weight(
65
+ self,
66
+ weight: Tensor,
67
+ scale: Tensor,
68
+ global_scale: Tensor,
69
+ quantization_args: QuantizationArgs,
70
+ device: Optional[torch.device] = None,
71
+ zero_point: Optional[torch.Tensor] = None,
72
+ g_idx: Optional[torch.Tensor] = None,
73
+ ) -> Dict[str, torch.Tensor]:
74
+
75
+ quantized_weight = quantize(
76
+ x=weight,
77
+ scale=scale,
78
+ global_scale=global_scale,
79
+ zero_point=zero_point,
80
+ args=quantization_args,
81
+ )
82
+ compressed_dict = {}
83
+ weight_packed = pack_fp4_to_uint8(quantized_weight)
84
+ if device is not None:
85
+ weight_packed = weight_packed.to(device)
86
+ compressed_dict["weight_packed"] = weight_packed
87
+ return compressed_dict
88
+
89
+ def decompress_weight(
90
+ self,
91
+ compressed_data: Dict[str, Tensor],
92
+ quantization_args: Optional[QuantizationArgs] = None,
93
+ ) -> torch.Tensor:
94
+
95
+ weight = compressed_data["weight_packed"]
96
+ scale = compressed_data["weight_scale"]
97
+ global_scale = compressed_data["weight_global_scale"]
98
+ m, n = weight.shape
99
+ # TODO: use a user provided dequant dtype
100
+ unpacked = unpack_fp4_from_uint8(weight, m, n * 2)
101
+ decompressed_weight = dequantize(
102
+ x_q=unpacked, scale=scale, global_scale=global_scale, dtype=unpacked.dtype
103
+ )
104
+
105
+ return decompressed_weight
106
+
107
+
108
+ def pack_fp4_to_uint8(x: torch.Tensor) -> torch.Tensor:
109
+ """
110
+ Packs a tensor with values in the fp4 range into uint8.
111
+ As there are 16 valid fp4 values, two fp4 values can be
112
+ packed into one uint8. Each fp4 value is mapped to its
113
+ particular index (e.g. 0.5 is mapped to index 1, 6.0 is mapped
114
+ to index 7) which is then represented using 4 bits. Consecutive
115
+ pairs of 4 bits are then packed into an uint8.
116
+
117
+ :param x: tensor to pack
118
+ returns: a packed tensor in uint8
119
+ """
120
+
121
+ m, n = x.shape
122
+ device = x.device
123
+
124
+ # Create lookup table for FP4 values to indices
125
+ # Map the absolute values to 0-7 indices
126
+ kE2M1 = torch.tensor(FLOAT_TO_E2M1, device=device, dtype=x.dtype)
127
+
128
+ # Find closest valid FP4 value index for each element
129
+ abs_x = torch.abs(x)
130
+ abs_indices = torch.zeros_like(abs_x, dtype=torch.long)
131
+ for i, val in enumerate(kE2M1):
132
+ abs_indices = torch.where(torch.isclose(abs_x, val), i, abs_indices)
133
+
134
+ # Apply sign bit (bit 3) to get final 4-bit representation
135
+ indices = abs_indices + (torch.signbit(x) << 3).to(torch.long)
136
+
137
+ # Reshape to prepare for packing pairs of values
138
+ indices = indices.reshape(-1)
139
+
140
+ # Handle odd length by padding if necessary
141
+ if indices.numel() % 2 != 0:
142
+ indices = torch.cat([indices, torch.zeros(1, dtype=torch.long, device=device)])
143
+
144
+ # Reshape to pair consecutive elements
145
+ indices = indices.reshape(-1, 2)
146
+
147
+ # Pack pairs of 4-bit values into 8-bit values
148
+ packed = (indices[:, 0] | (indices[:, 1] << 4)).to(torch.uint8)
149
+
150
+ return packed.reshape(m, n // 2)
151
+
152
+
153
+ kE2M1ToFloat = torch.tensor(
154
+ [0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0], dtype=torch.float32
155
+ )
156
+
157
+ # reference: : https://github.com/vllm-project/vllm/pull/16362
158
+ def unpack_fp4_from_uint8(
159
+ a: torch.Tensor, m: int, n: int, dtype: Optional[torch.dtype] = torch.bfloat16
160
+ ) -> torch.Tensor:
161
+ """
162
+ Unpacks uint8 values into fp4. Each uint8 consists of two fp4 values
163
+ (i.e. first four bits correspond to one fp4 value, last four corresond to a consecutive
164
+ fp4 value). The bits represent an index, which are mapped to an fp4 value.
165
+
166
+ :param a: tensor to unpack
167
+ :param m: original dim 0 size of the unpacked tensor
168
+ :param n: original dim 1 size of the unpacked tensor
169
+ :param dtype: dense dtype to cast the unpacked tensor to
170
+ """
171
+ assert a.dtype == torch.uint8
172
+
173
+ # Vectorized nibble processing
174
+ a_flat = a.flatten()
175
+ high = (a_flat & 0xF0) >> 4 # Upper nibbles
176
+ low = a_flat & 0x0F # Lower nibbles
177
+
178
+ # Combine nibbles for batch processing
179
+ combined = torch.stack((low, high), dim=1).flatten()
180
+
181
+ # Vectorized sign and magnitude extraction
182
+ signs = (combined & 0x08).to(torch.bool) # Sign bits
183
+ abs_vals = (combined & 0x07).to(torch.long) # Magnitude indices
184
+
185
+ # Device-aware lookup and sign application
186
+ kE2M1 = kE2M1ToFloat.to(device=a.device)
187
+ values = kE2M1[abs_vals] * torch.where(signs, -1.0, 1.0)
188
+
189
+ # Reshape to final form
190
+ return values.reshape(m, n).to(dtype=dtype)
@@ -32,6 +32,7 @@ class CompressionFormat(Enum):
32
32
  naive_quantized = "naive-quantized"
33
33
  pack_quantized = "pack-quantized"
34
34
  marlin_24 = "marlin-24"
35
+ nvfp4_pack_quantized = "nvfp4-pack-quantized"
35
36
 
36
37
 
37
38
  @unique
@@ -234,11 +234,11 @@ def get_nested_weight_mappings(
234
234
  for key, file_location in weight_mappings.items():
235
235
  matched = False
236
236
  for param_name in params_to_nest:
237
- dense_param = match_param_name(key, param_name)
238
- if dense_param:
239
- if dense_param not in nested_weight_mappings:
240
- nested_weight_mappings[dense_param] = {}
241
- nested_weight_mappings[dense_param][param_name] = file_location
237
+ module_path = match_param_name(key, param_name)
238
+ if module_path:
239
+ if module_path not in nested_weight_mappings:
240
+ nested_weight_mappings[module_path] = {}
241
+ nested_weight_mappings[module_path][param_name] = file_location
242
242
  matched = True
243
243
  if return_unmatched_params and not matched:
244
244
  unmatched_params[key] = file_location
@@ -271,11 +271,11 @@ def get_nested_mappings_from_state_dict(
271
271
  nested_weight_mappings = {}
272
272
  for key in state_dict.keys():
273
273
  for param_name in params_to_nest:
274
- dense_param = match_param_name(key, param_name)
275
- if dense_param:
276
- if dense_param not in nested_weight_mappings:
277
- nested_weight_mappings[dense_param] = {}
278
- nested_weight_mappings[dense_param][param_name] = state_dict[key]
274
+ module_path = match_param_name(key, param_name)
275
+ if module_path:
276
+ if module_path not in nested_weight_mappings:
277
+ nested_weight_mappings[module_path] = {}
278
+ nested_weight_mappings[module_path][param_name] = state_dict[key]
279
279
  return nested_weight_mappings
280
280
 
281
281
 
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.9.5.a20250502'
20
+ __version__ = version = '0.9.5.a20250509'
21
21
  __version_tuple__ = version_tuple = (0, 9, 5)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.5a20250502
3
+ Version: 0.9.5a20250509
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -41,6 +41,7 @@ src/compressed_tensors/compressors/model_compressors/model_compressor.py
41
41
  src/compressed_tensors/compressors/quantized_compressors/__init__.py
42
42
  src/compressed_tensors/compressors/quantized_compressors/base.py
43
43
  src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py
44
+ src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py
44
45
  src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py
45
46
  src/compressed_tensors/compressors/sparse_compressors/__init__.py
46
47
  src/compressed_tensors/compressors/sparse_compressors/base.py
@@ -87,6 +88,7 @@ tests/test_compressors/model_compressors/test_model_compressor.py
87
88
  tests/test_compressors/quantized_compressors/__init__.py
88
89
  tests/test_compressors/quantized_compressors/test_fp8_quant.py
89
90
  tests/test_compressors/quantized_compressors/test_int_quant.py
91
+ tests/test_compressors/quantized_compressors/test_nvfp4_quant.py
90
92
  tests/test_compressors/quantized_compressors/test_pack_quant.py
91
93
  tests/test_compressors/sparse_compressors/__init__.py
92
94
  tests/test_compressors/sparse_compressors/test_bitmask.py
@@ -0,0 +1,43 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import torch
16
+ from compressed_tensors.compressors.quantized_compressors.nvfp4_quantized import (
17
+ pack_fp4_to_uint8,
18
+ unpack_fp4_from_uint8,
19
+ )
20
+
21
+
22
+ def test_pack_unpack():
23
+ x = torch.Tensor(
24
+ [
25
+ [-0.5000, -6.0000, -0.5000, -1.5000, -1.0000, 6.0000, 0.0000, -0.0000],
26
+ [-1.0000, -6.0000, -0.5000, -0.0000, 0.5000, 0.5000, -0.0000, 0.0000],
27
+ [-3.0000, -6.0000, -0.5000, -2.0000, -0.5000, -1.5000, -0.0000, -0.0000],
28
+ [1.5000, 6.0000, -0.0000, -0.5000, 1.0000, 1.0000, -0.0000, 0.0000],
29
+ ]
30
+ )
31
+
32
+ dense_dtype = torch.bfloat16
33
+ x = x.to(dense_dtype)
34
+ m, n = x.shape
35
+ packed = pack_fp4_to_uint8(x)
36
+ assert packed.dtype == torch.uint8
37
+ unpacked = unpack_fp4_from_uint8(packed, m, n, dtype=dense_dtype)
38
+ assert unpacked.dtype == dense_dtype
39
+
40
+ assert torch.equal(unpacked, x) # misleading as -0 and 0 are considered equal
41
+ sign_bitx = torch.signbit(x)
42
+ sign_bitout = torch.signbit(unpacked)
43
+ assert torch.equal(sign_bitout, sign_bitx)