compressed-tensors 0.10.2a20250611__tar.gz → 0.10.2a20250613__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. {compressed_tensors-0.10.2a20250611/src/compressed_tensors.egg-info → compressed_tensors-0.10.2a20250613}/PKG-INFO +1 -1
  2. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/setup.py +1 -0
  3. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/transform/factory/hadamard.py +1 -1
  4. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/transform/factory/random_hadamard.py +1 -1
  5. compressed_tensors-0.10.2a20250613/src/compressed_tensors/transform/utils/hadamard.py +160 -0
  6. compressed_tensors-0.10.2a20250613/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  7. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/utils/offload.py +85 -50
  8. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/version.py +1 -1
  9. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  10. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors.egg-info/SOURCES.txt +1 -0
  11. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_transform/factory/test_correctness.py +2 -2
  12. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_transform/factory/test_memory.py +2 -2
  13. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_transform/utils/test_hadamard.py +38 -32
  14. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_utils/test_offload.py +66 -12
  15. compressed_tensors-0.10.2a20250611/src/compressed_tensors/transform/utils/hadamard.py +0 -161
  16. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/.github/.gitkeep +0 -0
  17. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/.github/actions/test/action.yml +0 -0
  18. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/.github/scripts/step-status +0 -0
  19. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/.github/workflows/build-test.yml +0 -0
  20. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/.github/workflows/build.yml +0 -0
  21. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/.github/workflows/report.yml +0 -0
  22. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/.github/workflows/test-check.yaml +0 -0
  23. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/.github/workflows/test.yml +0 -0
  24. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/.github/workflows/trigger-all.yml +0 -0
  25. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/.github/workflows/upload.yml +0 -0
  26. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/.gitignore +0 -0
  27. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/LICENSE +0 -0
  28. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/Makefile +0 -0
  29. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/README.md +0 -0
  30. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  31. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/examples/bit_packing/int4_config.json +0 -0
  32. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/examples/bitmask_compression.ipynb +0 -0
  33. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  34. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  35. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/examples/llama_1.1b/example_quant_config.json +0 -0
  36. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  37. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/examples/quantize_and_pack_int4.ipynb +0 -0
  38. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/pyproject.toml +0 -0
  39. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/setup.cfg +0 -0
  40. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/__init__.py +0 -0
  41. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/README.md +0 -0
  42. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/__init__.py +0 -0
  43. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/base.py +0 -0
  44. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/__init__.py +0 -0
  45. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/base.py +0 -0
  46. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/helpers.py +0 -0
  47. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  48. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  49. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  50. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  51. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  52. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
  53. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  54. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  55. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  56. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  57. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  58. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  59. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  60. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  61. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/config/__init__.py +0 -0
  62. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/config/base.py +0 -0
  63. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/config/dense.py +0 -0
  64. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  65. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  66. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/linear/__init__.py +0 -0
  67. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  68. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/quantization/__init__.py +0 -0
  69. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  70. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  71. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  72. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  73. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  74. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  75. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/quantization/quant_args.py +0 -0
  76. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/quantization/quant_config.py +0 -0
  77. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  78. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  79. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  80. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/registry/__init__.py +0 -0
  81. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/registry/registry.py +0 -0
  82. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/transform/__init__.py +0 -0
  83. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  84. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/transform/factory/base.py +0 -0
  85. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  86. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/transform/transform_args.py +0 -0
  87. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/transform/transform_config.py +0 -0
  88. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  89. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  90. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/transform/utils/utils.py +0 -0
  91. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/utils/__init__.py +0 -0
  92. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/utils/helpers.py +0 -0
  93. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/utils/permutations_24.py +0 -0
  94. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/utils/permute.py +0 -0
  95. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  96. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  97. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  98. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors.egg-info/requires.txt +0 -0
  99. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  100. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/__init__.py +0 -0
  101. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/conftest.py +0 -0
  102. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/__init__.py +0 -0
  103. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/model_compressors/__init__.py +0 -0
  104. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  105. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  106. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  107. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  108. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  109. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  110. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  111. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  112. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  113. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  114. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  115. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_configs/__init__.py +0 -0
  116. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_configs/test_base.py +0 -0
  117. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  118. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_linear/__init__.py +0 -0
  119. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_linear/test_compressed_linear.py +0 -0
  120. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/__init__.py +0 -0
  121. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/lifecycle/__init__.py +0 -0
  122. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/lifecycle/conftest.py +0 -0
  123. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  124. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  125. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  126. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  127. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  128. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  129. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  130. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/test_configs/__init__.py +0 -0
  131. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  132. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  133. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/test_quant_args.py +0 -0
  134. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/test_quant_config.py +0 -0
  135. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/test_quant_scheme.py +0 -0
  136. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  137. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_registry.py +0 -0
  138. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_transform/test_transform_args.py +0 -0
  139. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_transform/test_transform_config.py +0 -0
  140. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_transform/test_transform_scheme.py +0 -0
  141. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_utils/__init__.py +0 -0
  142. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_utils/test_helpers.py +0 -0
  143. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/test_utils/test_safetensors_load.py +0 -0
  144. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/tests/testing_utils.py +0 -0
  145. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250613}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.2a20250611
3
+ Version: 0.10.2a20250613
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -113,5 +113,6 @@ setup(
113
113
  extras_require=_setup_extras(),
114
114
  install_requires=_setup_install_requires(),
115
115
  package_dir={"": "src"},
116
+ package_data={"": ["transform/utils/hadamards.safetensors"]},
116
117
  packages=_setup_packages(),
117
118
  )
@@ -59,7 +59,7 @@ class HadamardFactory(TransformFactory):
59
59
  return HadamardTransform(weight, args)
60
60
 
61
61
  def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
62
- data = deterministic_hadamard_matrix(size)
62
+ data = deterministic_hadamard_matrix(size, dtype, device)
63
63
  data = data.to(dtype=dtype, device=device)
64
64
  return Parameter(data, requires_grad=self.scheme.requires_grad)
65
65
 
@@ -29,6 +29,6 @@ class RandomHadamardFactory(HadamardFactory):
29
29
  """
30
30
 
31
31
  def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
32
- data = random_hadamard_matrix(size, self.generator)
32
+ data = random_hadamard_matrix(size, dtype, device, self.generator)
33
33
  data = data.to(dtype=dtype, device=device)
34
34
  return Parameter(data, requires_grad=self.scheme.requires_grad)
@@ -0,0 +1,160 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import math
16
+ from pathlib import Path
17
+ from typing import Optional
18
+
19
+ import torch
20
+ from safetensors import safe_open
21
+
22
+
23
+ REPO_PATH = Path(__file__).parent / "hadamards.safetensors"
24
+
25
+
26
+ __all__ = ["random_hadamard_matrix", "deterministic_hadamard_matrix", "is_pow2"]
27
+
28
+
29
+ # note that hadamard matrix multiplication can be accelerated using a library such as
30
+ # https://github.com/Dao-AILab/fast-hadamard-transform/tree/master
31
+
32
+
33
+ def deterministic_hadamard_matrix(
34
+ size: int,
35
+ dtype: torch.dtype = torch.bfloat16,
36
+ device: torch.device = torch.device("cpu"),
37
+ ) -> torch.Tensor:
38
+ """
39
+ Construct an n-by-n Hadamard matrix, using Sylvester's construction.
40
+ `n` must be a power of 2.
41
+
42
+ Adapated from https://github.com/scipy/scipy/blob/v1.15.2/scipy/linalg/_special_matrices.py # noqa: E501
43
+
44
+ :param size: order of the matrix, must be a power of 2
45
+ :param dtype: data type of matrix
46
+ :param device: device to construct matrix on
47
+ :return: hadamard matrix of size `size`
48
+ """
49
+ if size <= 0:
50
+ raise ValueError("Cannot construct deterministic hadamard of size <= 0")
51
+
52
+ log2 = int(math.log2(size))
53
+ if size != 2**log2:
54
+ raise ValueError("Cannot construct deterministic hadamard of size != 2^n")
55
+
56
+ H = torch.tensor([[1]], dtype=dtype, device=device)
57
+
58
+ # Sylvester's construction
59
+ for _ in range(log2):
60
+ H = torch.vstack((torch.hstack((H, H)), torch.hstack((H, -H))))
61
+
62
+ return H / math.sqrt(size)
63
+
64
+
65
+ def random_hadamard_matrix(
66
+ size: int,
67
+ dtype: torch.dtype = torch.bfloat16,
68
+ device: torch.device = torch.device("cpu"),
69
+ gen: Optional[torch.Generator] = None,
70
+ ) -> torch.Tensor:
71
+ """
72
+ Produces a randomly generated Hadamard matrix. Differs from
73
+ `deterministic_hadamard_matrix` in that this function supports non powers of 2
74
+ and randomization using a seeded generator
75
+
76
+ Adapated from https://github.com/facebookresearch/SpinQuant/blob/main/utils/hadamard_utils.py # noqa: E501
77
+ Known matrices were retrieved from N. J. A. Sloane's Library of Hadamard Matrices http://www.neilsloane.com/hadamard/ # noqa: E501
78
+
79
+ :param size: The dimension of the hamadard matrix
80
+ :param dtype: data type of matrix
81
+ :param device: device to construct matrix on
82
+ :param gen: Optional generator random values
83
+ :return: randomly generated hadamard matrix
84
+ """
85
+ Q = torch.randint(low=0, high=2, size=(size,), generator=gen, dtype=dtype) # cpu
86
+ Q = Q.to(device=device)
87
+ Q = Q * 2 - 1
88
+ Q = torch.diag(Q)
89
+ return _matmul_hadU(Q) / math.sqrt(size)
90
+
91
+
92
+ def is_pow2(n: int) -> bool:
93
+ """
94
+ Check if a number is a power of 2
95
+
96
+ :param n: number to check
97
+ :return: True iff `n` is a power of 2
98
+ """
99
+ return n > 0 and (n & (n - 1) == 0)
100
+
101
+
102
+ def _fetch_hadamard_divisor(
103
+ n: int,
104
+ dtype: torch.dtype,
105
+ device: torch.device = torch.device("cpu"),
106
+ file_path: str = REPO_PATH,
107
+ ) -> Optional[torch.Tensor]:
108
+ """
109
+ Fetch a known hadamard matrix from the given file path. The returned matrix will
110
+ be of of size `k` such that `n / k` is a power of two. Return None if no such
111
+ matrix exists.
112
+
113
+ Note: This function reopens the safetensors file every time it is called.
114
+ This is technically inefficient, but a very small runtime cost and simpler
115
+ than forcing callers to manage the file open context
116
+
117
+ :param n: size of known hadamard matrix
118
+ :return: a known hadamard matrix of size `n` if one exists, else None
119
+ """
120
+ with safe_open(file_path, framework="pt", device=str(device)) as file:
121
+ divisors = sorted((int(key) for key in file.keys()), reverse=True)
122
+ for divisor in divisors:
123
+ if n % divisor == 0 and is_pow2(n // divisor):
124
+ return file.get_tensor(str(divisor)).to(dtype=dtype)
125
+
126
+ return None
127
+
128
+
129
+ def _matmul_hadU(X: torch.Tensor) -> torch.Tensor:
130
+ size = X.size(0)
131
+ dtype = X.dtype
132
+ device = X.device
133
+
134
+ # Check if we have the determined hadamard matrix
135
+ hadK = _fetch_hadamard_divisor(size, dtype, device=device)
136
+ if hadK is None:
137
+ raise ValueError(f"Cannot construct random hadamard matrix of size {size}")
138
+ K = hadK.size(0)
139
+
140
+ # Reshape diag matrix with randomized -1/+1
141
+ input = X.clone().view(-1, size, 1)
142
+ output = input.clone()
143
+ while input.shape[1] > K:
144
+ input = input.view(input.shape[0], input.shape[1] // 2, 2, input.shape[2])
145
+ output = output.view(input.shape)
146
+ output[:, :, 0, :] = input[:, :, 0, :] + input[:, :, 1, :]
147
+ output[:, :, 1, :] = input[:, :, 0, :] - input[:, :, 1, :]
148
+ output = output.view(input.shape[0], input.shape[1], -1)
149
+ (input, output) = (output, input)
150
+ assert input.shape[1] == K
151
+ del output
152
+
153
+ # Do not explicitly repeat - OOM
154
+ # input = torch.bmm(
155
+ # hadK.repeat(len(input), 1, 1).to(input.device).to(input.dtype), input)
156
+ # Use bcast instead
157
+ input = hadK.view(1, K, K).to(input) @ input
158
+
159
+ # normalize
160
+ return input.view(X.shape)
@@ -14,27 +14,30 @@
14
14
  """
15
15
  Utilities associated with offloading functionality provided by `accelerate`.
16
16
 
17
- | ----------------------------------------------------------------------------------------------------- | # noqa: E501
18
- | Operation | Without offloading support | With offloading support | # noqa: E501
19
- | --------- | -------------------------------------- | ------------------------------------------------ | # noqa: E501
20
- | Add | module.register_parameter(name, param) | register_offload_parameter(module, name, param) | # noqa: E501
21
- | Check | N/A | has_offloaded_params(module) | # noqa: E501
22
- | Onload | N/A | with align_module_device(module) | # noqa: E501
23
- | Update | module.name.data.copy_(new_data) | update_offload_parameter(module, name, new_data) | # noqa: E501
24
- | Delete | del module.name | delete_offload_parameter(module, name) | # noqa: E501
25
- | ----------------------------------------------------------------------------------------------------- | # noqa: E501
17
+ | ------------------------------------------------------------------------------------------------------ | # noqa: E501
18
+ | Operation | Without offloading support | With offloading support | # noqa: E501
19
+ | ---------- | -------------------------------------- | ------------------------------------------------ | # noqa: E501
20
+ | Add | module.register_parameter(name, param) | register_offload_parameter(module, name, param) | # noqa: E501
21
+ | Check | N/A | has_offloaded_params(module) | # noqa: E501
22
+ | Onload | N/A | with align_module_device(module) | # noqa: E501
23
+ | Update | module.name.data.copy_(new_data) | update_offload_parameter(module, name, new_data) | # noqa: E501
24
+ | Delete | del module.name | delete_offload_parameter(module, name) | # noqa: E501
25
+ | Add Module | module.register_module(name, child) | register_offload_module(name, child) | # noqa: E501
26
+ | Del Module | del module.name | delete_offload_module(module, name) | # noqa: E501
27
+ | ------------------------------------------------------------------------------------------------------ | # noqa: E501
26
28
  """
27
29
 
28
30
  import contextlib
29
31
  import warnings
30
32
  from functools import wraps
31
- from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Union
33
+ from operator import attrgetter
34
+ from typing import Any, Callable, Dict, Iterable, Literal, Optional, Tuple, Union
32
35
 
33
36
  import torch
37
+ from compressed_tensors.utils import patch_attr
34
38
 
35
39
 
36
40
  try:
37
- from accelerate import dispatch_model
38
41
  from accelerate.hooks import (
39
42
  AlignDevicesHook,
40
43
  add_hook_to_module,
@@ -45,10 +48,12 @@ try:
45
48
  from accelerate.utils import (
46
49
  OffloadedWeightsLoader,
47
50
  PrefixedDataset,
51
+ find_tied_parameters,
48
52
  set_module_tensor_to_device,
49
53
  )
50
54
 
51
55
  _has_accelerate = True
56
+
52
57
  except ImportError:
53
58
  _has_accelerate = False
54
59
  AlignDevicesHook = None
@@ -58,8 +63,8 @@ except ImportError:
58
63
  PrefixedDataset = None
59
64
  set_module_tensor_to_device = None
60
65
  named_module_tensors = None
61
- dispatch_model = None
62
66
  attach_align_device_hook = None
67
+ find_tied_parameters = None
63
68
 
64
69
 
65
70
  __all__ = [
@@ -78,14 +83,14 @@ __all__ = [
78
83
  "align_module_device",
79
84
  "register_offload_module",
80
85
  "delete_offload_module",
81
- "force_cpu_offload",
86
+ "offloaded_dispatch",
87
+ "disable_offloading",
82
88
  ]
83
89
 
84
90
 
85
91
  def check_accelerate(fallback: Any):
86
92
  def decorator(func: Callable[[Any], Any]):
87
93
  if not _has_accelerate:
88
-
89
94
  if fallback == "error":
90
95
 
91
96
  @wraps(func)
@@ -211,7 +216,7 @@ def register_offload_parameter(
211
216
  def update_offload_parameter(
212
217
  module: torch.nn.Module,
213
218
  name: str,
214
- data: Optional[torch.Tensor],
219
+ data: torch.Tensor,
215
220
  offload_device: Optional[Union[torch.device, Literal["disk"]]] = None,
216
221
  ):
217
222
  """
@@ -224,7 +229,7 @@ def update_offload_parameter(
224
229
  :param offload_device: device on which weight will be offloaded to. If None is
225
230
  provided, then infer device from parameters on module
226
231
  """
227
- param = getattr(module, name)
232
+ param: torch.nn.Parameter = getattr(module, name)
228
233
  if param.data.shape != data.shape:
229
234
  warnings.warn(
230
235
  f"Shape of parameter being updated {param.data.shape} does not match shape "
@@ -232,7 +237,7 @@ def update_offload_parameter(
232
237
  )
233
238
 
234
239
  # copy data into onloaded parameter if applicable
235
- if param.device != torch.device("meta"):
240
+ if param.device != torch.device("meta") and data is not param.data:
236
241
  param.data.copy_(data)
237
242
 
238
243
  # update offload dict
@@ -479,46 +484,76 @@ def delete_offload_module(base: torch.nn.Module, name: str):
479
484
 
480
485
 
481
486
  @check_accelerate(fallback="error")
482
- def force_cpu_offload(
483
- module: torch.nn.Module, execution_device: torch.device
487
+ def offloaded_dispatch(
488
+ module: torch.nn.Module,
489
+ execution_device: torch.device,
490
+ offload_device: Union[torch.device, Literal["disk"]] = torch.device("cpu"),
484
491
  ) -> torch.nn.Module:
485
492
  """
486
- Force cpu offloading a module, primarily used for testing
493
+ Unlike `dispatch_model`, this function forces a module (and its submodules) to
494
+ offload all parameters and replace them with meta tensors, utiliizing the
495
+ `AlignDevicesHook` to control onloading and offloading.
487
496
 
488
497
  :param module: module containing parameters to offload
489
- :param execution_device: execution device submodules
490
- :return: module with hooks to perform cpu offloading
491
- """
492
- # edge case: there is a bug in `dispatch_model` which causes
493
- # the function to only work if the model contains submodules
494
- if next(module.children(), None) is None:
495
- attach_align_device_hook(
496
- module,
497
- execution_device=execution_device,
498
- offload=True,
499
- weights_map=module.state_dict(),
500
- tied_params_map={},
501
- )
502
- return module
503
-
504
- device_map = {}
505
-
506
- def collect_device_map(name: List[str], module: torch.nn.Module):
507
- if next(module.parameters(recurse=False), None) is not None:
508
- device_map[".".join(name)] = "cpu"
509
- return
498
+ :param execution_device: device that modules will onload and execute on
499
+ :param offload_device: device that module parameters will offload to
500
+ :return: module with offloading device hooks
501
+ """
502
+ if offload_device == "disk":
503
+ raise NotImplementedError("Disk offloading is not currently supported")
504
+
505
+ # create weights map
506
+ state_dict = module.state_dict()
507
+ state_dict = {key: val.to(offload_device) for key, val in state_dict.items()}
508
+ weights_map = OffloadedWeightsLoader(state_dict=state_dict, device=offload_device)
509
+
510
+ # create tied params map
511
+ tied_params = find_tied_parameters(module)
512
+ tied_params_map = {}
513
+ for group in tied_params:
514
+ for param_name in group:
515
+ data_ptr = attrgetter(param_name)(module).data_ptr()
516
+ tied_params_map[data_ptr] = {}
517
+
518
+ # recursively attaches hooks to all submodules
519
+ attach_align_device_hook(
520
+ module,
521
+ execution_device=execution_device,
522
+ offload=True,
523
+ weights_map=weights_map,
524
+ tied_params_map=tied_params_map,
525
+ )
526
+ return module
510
527
 
511
- else:
512
- for submodule_name, submodule in module.named_children():
513
- name.append(submodule_name)
514
- collect_device_map(name, submodule)
515
- name.pop()
516
528
 
517
- collect_device_map([], module)
529
+ @contextlib.contextmanager
530
+ def disable_offloading():
531
+ """
532
+ Keep modules onloaded and disable offloading until this context exits.
533
+ Affects modules which have been hooked with accelerate's `AlignDevicesHook`
534
+ """
535
+ original_pre_forward = AlignDevicesHook.pre_forward
536
+ onloaded_modules: Dict[torch.nn.Module, Tuple[AlignDevicesHook, bool]] = dict()
537
+
538
+ # onload once and disable any future onloading/offloading steps
539
+ def keep_onload_pre_forward(self: AlignDevicesHook, module, *args, **kwargs):
540
+ ret = original_pre_forward(self, module, *args, **kwargs)
541
+ if module not in onloaded_modules:
542
+ onloaded_modules[module] = (self, self.offload)
543
+ self.offload = False
544
+ return ret
545
+
546
+ # use the patched pre_forward function within the context
547
+ with patch_attr(AlignDevicesHook, "pre_forward", keep_onload_pre_forward):
548
+ yield
518
549
 
519
- return dispatch_model(
520
- module, device_map, main_device=execution_device, force_hooks=True
521
- )
550
+ # manually offload all modules that were onloaded
551
+ # update any parameters which may have changed
552
+ for module, (hook, offload) in onloaded_modules.items():
553
+ hook.offload = offload
554
+ for name, param in module.named_parameters():
555
+ update_offload_parameter(module, name, param.data)
556
+ hook.post_forward(module, None)
522
557
 
523
558
 
524
559
  """ Upstreamed Functions """
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.2.a20250611'
20
+ __version__ = version = '0.10.2.a20250613'
21
21
  __version_tuple__ = version_tuple = (0, 10, 2)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.2a20250611
3
+ Version: 0.10.2a20250613
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -82,6 +82,7 @@ src/compressed_tensors/transform/factory/matrix_multiply.py
82
82
  src/compressed_tensors/transform/factory/random_hadamard.py
83
83
  src/compressed_tensors/transform/utils/__init__.py
84
84
  src/compressed_tensors/transform/utils/hadamard.py
85
+ src/compressed_tensors/transform/utils/hadamards.safetensors
85
86
  src/compressed_tensors/transform/utils/utils.py
86
87
  src/compressed_tensors/utils/__init__.py
87
88
  src/compressed_tensors/utils/helpers.py
@@ -19,7 +19,7 @@ from compressed_tensors.transform import (
19
19
  TransformFactory,
20
20
  TransformScheme,
21
21
  )
22
- from compressed_tensors.utils import align_modules, force_cpu_offload
22
+ from compressed_tensors.utils import offloaded_dispatch
23
23
  from tests.testing_utils import requires_accelerate, requires_gpu
24
24
 
25
25
 
@@ -75,7 +75,7 @@ def test_correctness_model(scheme, offload=False):
75
75
  # load model
76
76
  model = TransformableModel(2, 4, 8, 16, 32, 64)
77
77
  if offload:
78
- model = force_cpu_offload(model, torch.device("cuda"))
78
+ model = offloaded_dispatch(model, torch.device("cuda"))
79
79
 
80
80
  # create factory
81
81
  scheme.apply = [
@@ -22,7 +22,7 @@ from compressed_tensors.transform import (
22
22
  TransformFactory,
23
23
  TransformScheme,
24
24
  )
25
- from compressed_tensors.utils import align_modules, force_cpu_offload
25
+ from compressed_tensors.utils import align_modules, offloaded_dispatch
26
26
  from tests.testing_utils import requires_accelerate, requires_gpu
27
27
 
28
28
 
@@ -58,7 +58,7 @@ def test_memory_sharing(scheme, offload=False):
58
58
  # load model (maybe with offloading)
59
59
  model = TransformableModel(2, 2, 4, 4, 8, 8)
60
60
  if offload:
61
- force_cpu_offload(model, torch.device("cuda"))
61
+ offloaded_dispatch(model, torch.device("cuda"))
62
62
 
63
63
  # add transforms to model
64
64
  factory.apply_to_model(model)
@@ -13,46 +13,48 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
- import numpy
17
16
  import pytest
18
17
  import torch
19
18
  from compressed_tensors.transform.utils.hadamard import (
20
- _get_had12,
21
- _get_had20,
22
19
  deterministic_hadamard_matrix,
20
+ is_pow2,
23
21
  random_hadamard_matrix,
24
22
  )
23
+ from tests.testing_utils import requires_gpu
25
24
 
26
25
 
27
- @pytest.mark.parametrize(
28
- "had_func",
29
- [
30
- _get_had12,
31
- _get_had20,
32
- ],
33
- )
34
- def test_packed_hadamard_compliant(had_func):
35
- had_matrix = had_func()
36
- size = had_matrix.size(0)
37
- # HH.T == nI
38
- product = had_matrix @ had_matrix.T
39
- assert torch.equal(product, size * torch.eye(size))
26
+ _sizes_to_test = [
27
+ 768, # gpt2 small
28
+ 1024, # gpt2 medium
29
+ 1280, # qwen_2_5_vl vision
30
+ 1600, # gpt2 xl
31
+ 2048, # gpt3 small
32
+ 3584, # qwen_2_5_vl
33
+ 3840, # qwen_2_5_vl vision qkv
34
+ 4096, # llama3
35
+ 7168, # deepseek_v3
36
+ 14336, # llama3 intermediate
37
+ 18432, # deepseek_v3 intermediate
38
+ 18944, # qwen_2_5_vl intermediate
39
+ ]
40
+ _atol = 1e-1 # bfloat16 is low precision for large matrices
40
41
 
41
42
 
42
- @pytest.mark.parametrize(
43
- "size",
44
- [4096, 2048],
45
- )
43
+ @requires_gpu
44
+ @pytest.mark.parametrize("size", _sizes_to_test)
46
45
  def test_random_hadamard_matrix_compliant(size):
47
- had_matrix = random_hadamard_matrix(size)
48
- product = torch.round(had_matrix @ had_matrix.T)
49
- assert torch.equal(product, torch.eye(size))
46
+ # (H / sqrt(n))(H.T / sqrt(n)) == I
47
+ matrix = random_hadamard_matrix(size, device="cuda")
48
+ product = matrix @ matrix.T
49
+ eye = torch.eye(size, dtype=product.dtype, device="cuda")
50
+ assert torch.allclose(product, eye, atol=_atol)
50
51
 
51
52
 
52
53
  def test_random_hadamard_generator():
54
+ # check that generation is deterministic with a seed
53
55
  generator = torch.Generator().manual_seed(42)
54
- one = random_hadamard_matrix(2048, generator)
55
- two = random_hadamard_matrix(2048, generator)
56
+ one = random_hadamard_matrix(2048, gen=generator)
57
+ two = random_hadamard_matrix(2048, gen=generator)
56
58
 
57
59
  one_true = torch.tensor(
58
60
  [
@@ -73,12 +75,16 @@ def test_random_hadamard_generator():
73
75
  assert torch.all(two[:3, :3].sign() == two_true.sign())
74
76
 
75
77
 
76
- @pytest.mark.parametrize(
77
- "size",
78
- [1024],
79
- )
78
+ @requires_gpu
79
+ @pytest.mark.parametrize("size", _sizes_to_test)
80
80
  def test_deterministic_hadamard_compliant(size):
81
- had_matrix = deterministic_hadamard_matrix(size)
81
+ if not is_pow2(size):
82
+ with pytest.raises(ValueError):
83
+ matrix = deterministic_hadamard_matrix(size, device="cuda")
84
+ return
85
+
82
86
  # (H / sqrt(n))(H.T / sqrt(n)) == I
83
- product = had_matrix @ had_matrix.T
84
- assert numpy.array_equal(product, numpy.eye(size))
87
+ matrix = deterministic_hadamard_matrix(size, device="cuda")
88
+ product = matrix @ matrix.T
89
+ eye = torch.eye(size, dtype=product.dtype, device="cuda")
90
+ assert torch.allclose(product, eye, atol=_atol)