compressed-tensors 0.10.2a20250611__tar.gz → 0.10.2a20250612__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. {compressed_tensors-0.10.2a20250611/src/compressed_tensors.egg-info → compressed_tensors-0.10.2a20250612}/PKG-INFO +1 -1
  2. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/utils/offload.py +49 -48
  3. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/version.py +1 -1
  4. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  5. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_transform/factory/test_correctness.py +2 -2
  6. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_transform/factory/test_memory.py +2 -2
  7. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_utils/test_offload.py +14 -8
  8. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/.github/.gitkeep +0 -0
  9. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/.github/actions/test/action.yml +0 -0
  10. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/.github/scripts/step-status +0 -0
  11. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/.github/workflows/build-test.yml +0 -0
  12. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/.github/workflows/build.yml +0 -0
  13. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/.github/workflows/report.yml +0 -0
  14. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/.github/workflows/test-check.yaml +0 -0
  15. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/.github/workflows/test.yml +0 -0
  16. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/.github/workflows/trigger-all.yml +0 -0
  17. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/.github/workflows/upload.yml +0 -0
  18. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/.gitignore +0 -0
  19. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/LICENSE +0 -0
  20. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/Makefile +0 -0
  21. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/README.md +0 -0
  22. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  23. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/examples/bit_packing/int4_config.json +0 -0
  24. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/examples/bitmask_compression.ipynb +0 -0
  25. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  26. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  27. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/examples/llama_1.1b/example_quant_config.json +0 -0
  28. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  29. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/examples/quantize_and_pack_int4.ipynb +0 -0
  30. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/pyproject.toml +0 -0
  31. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/setup.cfg +0 -0
  32. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/setup.py +0 -0
  33. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/__init__.py +0 -0
  34. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/README.md +0 -0
  35. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/__init__.py +0 -0
  36. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/base.py +0 -0
  37. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/__init__.py +0 -0
  38. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/base.py +0 -0
  39. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/helpers.py +0 -0
  40. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  41. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  42. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  43. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  44. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  45. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
  46. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  47. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  48. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  49. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  50. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  51. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  52. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  53. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  54. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/config/__init__.py +0 -0
  55. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/config/base.py +0 -0
  56. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/config/dense.py +0 -0
  57. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  58. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  59. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/linear/__init__.py +0 -0
  60. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  61. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/quantization/__init__.py +0 -0
  62. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  63. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  64. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  65. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  66. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  67. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  68. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/quantization/quant_args.py +0 -0
  69. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/quantization/quant_config.py +0 -0
  70. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  71. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  72. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  73. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/registry/__init__.py +0 -0
  74. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/registry/registry.py +0 -0
  75. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/transform/__init__.py +0 -0
  76. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  77. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/transform/factory/base.py +0 -0
  78. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  79. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  80. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  81. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/transform/transform_args.py +0 -0
  82. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/transform/transform_config.py +0 -0
  83. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  84. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  85. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  86. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/transform/utils/utils.py +0 -0
  87. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/utils/__init__.py +0 -0
  88. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/utils/helpers.py +0 -0
  89. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/utils/permutations_24.py +0 -0
  90. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/utils/permute.py +0 -0
  91. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  92. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  93. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  94. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  95. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors.egg-info/requires.txt +0 -0
  96. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  97. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/__init__.py +0 -0
  98. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/conftest.py +0 -0
  99. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/__init__.py +0 -0
  100. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/model_compressors/__init__.py +0 -0
  101. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  102. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  103. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  104. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  105. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  106. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  107. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  108. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  109. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  110. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  111. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  112. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_configs/__init__.py +0 -0
  113. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_configs/test_base.py +0 -0
  114. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  115. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_linear/__init__.py +0 -0
  116. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_linear/test_compressed_linear.py +0 -0
  117. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/__init__.py +0 -0
  118. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/lifecycle/__init__.py +0 -0
  119. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/lifecycle/conftest.py +0 -0
  120. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  121. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  122. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  123. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  124. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  125. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  126. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  127. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/test_configs/__init__.py +0 -0
  128. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  129. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  130. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/test_quant_args.py +0 -0
  131. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/test_quant_config.py +0 -0
  132. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/test_quant_scheme.py +0 -0
  133. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  134. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_registry.py +0 -0
  135. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_transform/test_transform_args.py +0 -0
  136. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_transform/test_transform_config.py +0 -0
  137. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_transform/test_transform_scheme.py +0 -0
  138. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_transform/utils/test_hadamard.py +0 -0
  139. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_utils/__init__.py +0 -0
  140. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_utils/test_helpers.py +0 -0
  141. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/test_utils/test_safetensors_load.py +0 -0
  142. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/tests/testing_utils.py +0 -0
  143. {compressed_tensors-0.10.2a20250611 → compressed_tensors-0.10.2a20250612}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.2a20250611
3
+ Version: 0.10.2a20250612
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -14,27 +14,29 @@
14
14
  """
15
15
  Utilities associated with offloading functionality provided by `accelerate`.
16
16
 
17
- | ----------------------------------------------------------------------------------------------------- | # noqa: E501
18
- | Operation | Without offloading support | With offloading support | # noqa: E501
19
- | --------- | -------------------------------------- | ------------------------------------------------ | # noqa: E501
20
- | Add | module.register_parameter(name, param) | register_offload_parameter(module, name, param) | # noqa: E501
21
- | Check | N/A | has_offloaded_params(module) | # noqa: E501
22
- | Onload | N/A | with align_module_device(module) | # noqa: E501
23
- | Update | module.name.data.copy_(new_data) | update_offload_parameter(module, name, new_data) | # noqa: E501
24
- | Delete | del module.name | delete_offload_parameter(module, name) | # noqa: E501
25
- | ----------------------------------------------------------------------------------------------------- | # noqa: E501
17
+ | ------------------------------------------------------------------------------------------------------ | # noqa: E501
18
+ | Operation | Without offloading support | With offloading support | # noqa: E501
19
+ | ---------- | -------------------------------------- | ------------------------------------------------ | # noqa: E501
20
+ | Add | module.register_parameter(name, param) | register_offload_parameter(module, name, param) | # noqa: E501
21
+ | Check | N/A | has_offloaded_params(module) | # noqa: E501
22
+ | Onload | N/A | with align_module_device(module) | # noqa: E501
23
+ | Update | module.name.data.copy_(new_data) | update_offload_parameter(module, name, new_data) | # noqa: E501
24
+ | Delete | del module.name | delete_offload_parameter(module, name) | # noqa: E501
25
+ | Add Module | module.register_module(name, child) | register_offload_module(name, child) | # noqa: E501
26
+ | Del Module | del module.name | delete_offload_module(module, name) | # noqa: E501
27
+ | ------------------------------------------------------------------------------------------------------ | # noqa: E501
26
28
  """
27
29
 
28
30
  import contextlib
29
31
  import warnings
30
32
  from functools import wraps
31
- from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Union
33
+ from operator import attrgetter
34
+ from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
32
35
 
33
36
  import torch
34
37
 
35
38
 
36
39
  try:
37
- from accelerate import dispatch_model
38
40
  from accelerate.hooks import (
39
41
  AlignDevicesHook,
40
42
  add_hook_to_module,
@@ -45,10 +47,12 @@ try:
45
47
  from accelerate.utils import (
46
48
  OffloadedWeightsLoader,
47
49
  PrefixedDataset,
50
+ find_tied_parameters,
48
51
  set_module_tensor_to_device,
49
52
  )
50
53
 
51
54
  _has_accelerate = True
55
+
52
56
  except ImportError:
53
57
  _has_accelerate = False
54
58
  AlignDevicesHook = None
@@ -58,8 +62,8 @@ except ImportError:
58
62
  PrefixedDataset = None
59
63
  set_module_tensor_to_device = None
60
64
  named_module_tensors = None
61
- dispatch_model = None
62
65
  attach_align_device_hook = None
66
+ find_tied_parameters = None
63
67
 
64
68
 
65
69
  __all__ = [
@@ -78,14 +82,13 @@ __all__ = [
78
82
  "align_module_device",
79
83
  "register_offload_module",
80
84
  "delete_offload_module",
81
- "force_cpu_offload",
85
+ "offloaded_dispatch",
82
86
  ]
83
87
 
84
88
 
85
89
  def check_accelerate(fallback: Any):
86
90
  def decorator(func: Callable[[Any], Any]):
87
91
  if not _has_accelerate:
88
-
89
92
  if fallback == "error":
90
93
 
91
94
  @wraps(func)
@@ -479,46 +482,44 @@ def delete_offload_module(base: torch.nn.Module, name: str):
479
482
 
480
483
 
481
484
  @check_accelerate(fallback="error")
482
- def force_cpu_offload(
483
- module: torch.nn.Module, execution_device: torch.device
485
+ def offloaded_dispatch(
486
+ module: torch.nn.Module,
487
+ execution_device: torch.device,
488
+ offload_device: Union[torch.device, Literal["disk"]] = torch.device("cpu"),
484
489
  ) -> torch.nn.Module:
485
490
  """
486
- Force cpu offloading a module, primarily used for testing
491
+ Unlike `dispatch_model`, this function forces a module (and its submodules) to
492
+ offload all parameters and replace them with meta tensors, utiliizing the
493
+ `AlignDevicesHook` to control onloading and offloading.
487
494
 
488
495
  :param module: module containing parameters to offload
489
- :param execution_device: execution device submodules
490
- :return: module with hooks to perform cpu offloading
496
+ :param execution_device: device that modules will onload and execute on
497
+ :param offload_device: device that module parameters will offload to
498
+ :return: module with offloading device hooks
491
499
  """
492
- # edge case: there is a bug in `dispatch_model` which causes
493
- # the function to only work if the model contains submodules
494
- if next(module.children(), None) is None:
495
- attach_align_device_hook(
496
- module,
497
- execution_device=execution_device,
498
- offload=True,
499
- weights_map=module.state_dict(),
500
- tied_params_map={},
501
- )
502
- return module
503
-
504
- device_map = {}
505
-
506
- def collect_device_map(name: List[str], module: torch.nn.Module):
507
- if next(module.parameters(recurse=False), None) is not None:
508
- device_map[".".join(name)] = "cpu"
509
- return
510
-
511
- else:
512
- for submodule_name, submodule in module.named_children():
513
- name.append(submodule_name)
514
- collect_device_map(name, submodule)
515
- name.pop()
516
-
517
- collect_device_map([], module)
518
-
519
- return dispatch_model(
520
- module, device_map, main_device=execution_device, force_hooks=True
500
+ if offload_device == "disk":
501
+ raise NotImplementedError("Disk offloading is not currently supported")
502
+
503
+ # create weights map
504
+ weights_map = OffloadedWeightsLoader(state_dict=module.state_dict(), device="cpu")
505
+
506
+ # create tied params map
507
+ tied_params = find_tied_parameters(module)
508
+ tied_params_map = {}
509
+ for group in tied_params:
510
+ for param_name in group:
511
+ data_ptr = attrgetter(param_name)(module).data_ptr()
512
+ tied_params_map[data_ptr] = {}
513
+
514
+ # recursively attaches hooks to all submodules
515
+ attach_align_device_hook(
516
+ module,
517
+ execution_device=execution_device,
518
+ offload=True,
519
+ weights_map=weights_map,
520
+ tied_params_map=tied_params_map,
521
521
  )
522
+ return module
522
523
 
523
524
 
524
525
  """ Upstreamed Functions """
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.2.a20250611'
20
+ __version__ = version = '0.10.2.a20250612'
21
21
  __version_tuple__ = version_tuple = (0, 10, 2)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.2a20250611
3
+ Version: 0.10.2a20250612
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -19,7 +19,7 @@ from compressed_tensors.transform import (
19
19
  TransformFactory,
20
20
  TransformScheme,
21
21
  )
22
- from compressed_tensors.utils import align_modules, force_cpu_offload
22
+ from compressed_tensors.utils import offloaded_dispatch
23
23
  from tests.testing_utils import requires_accelerate, requires_gpu
24
24
 
25
25
 
@@ -75,7 +75,7 @@ def test_correctness_model(scheme, offload=False):
75
75
  # load model
76
76
  model = TransformableModel(2, 4, 8, 16, 32, 64)
77
77
  if offload:
78
- model = force_cpu_offload(model, torch.device("cuda"))
78
+ model = offloaded_dispatch(model, torch.device("cuda"))
79
79
 
80
80
  # create factory
81
81
  scheme.apply = [
@@ -22,7 +22,7 @@ from compressed_tensors.transform import (
22
22
  TransformFactory,
23
23
  TransformScheme,
24
24
  )
25
- from compressed_tensors.utils import align_modules, force_cpu_offload
25
+ from compressed_tensors.utils import align_modules, offloaded_dispatch
26
26
  from tests.testing_utils import requires_accelerate, requires_gpu
27
27
 
28
28
 
@@ -58,7 +58,7 @@ def test_memory_sharing(scheme, offload=False):
58
58
  # load model (maybe with offloading)
59
59
  model = TransformableModel(2, 2, 4, 4, 8, 8)
60
60
  if offload:
61
- force_cpu_offload(model, torch.device("cuda"))
61
+ offloaded_dispatch(model, torch.device("cuda"))
62
62
 
63
63
  # add transforms to model
64
64
  factory.apply_to_model(model)
@@ -19,9 +19,9 @@ from compressed_tensors.utils import (
19
19
  delete_offload_module,
20
20
  delete_offload_parameter,
21
21
  disable_hf_hook,
22
- force_cpu_offload,
23
22
  get_execution_device,
24
23
  has_offloaded_params,
24
+ offloaded_dispatch,
25
25
  register_offload_module,
26
26
  register_offload_parameter,
27
27
  update_offload_parameter,
@@ -111,7 +111,7 @@ def test_register_offload_parameter():
111
111
 
112
112
  # register a param prior to offloading
113
113
  register_offload_parameter(module, "c", parameter)
114
- assert hasattr(module, "c") and module.c == parameter
114
+ assert module.c == parameter
115
115
 
116
116
  # offloading, check that added param was offloaded
117
117
  attach_align_device_hook(module, offload=True, weights_map=module.state_dict())
@@ -119,7 +119,7 @@ def test_register_offload_parameter():
119
119
 
120
120
  # register a param after offloading, check that added param was offloaded
121
121
  register_offload_parameter(module, "d", parameter)
122
- assert hasattr(module, "d") and module.d.device == torch.device("meta")
122
+ assert module.d.device == torch.device("meta")
123
123
  assert module._hf_hook.weights_map["d"].device == torch.device("cpu")
124
124
 
125
125
  # added parameters can be onloaded and offloaded
@@ -358,7 +358,7 @@ def test_register_offload_module(exec_device):
358
358
  # with offloading
359
359
  model = ExampleModel()
360
360
  child = torch.nn.Linear(2, 3)
361
- force_cpu_offload(model, exec_device)
361
+ offloaded_dispatch(model, exec_device)
362
362
  register_offload_module(model, "child", child)
363
363
  register_offload_module(model.linear, "child", child)
364
364
  assert child in model.children()
@@ -386,7 +386,7 @@ def test_delete_offload_module(exec_device):
386
386
  # with offloading
387
387
  model = ExampleModel()
388
388
  child = torch.nn.Linear(2, 3)
389
- force_cpu_offload(model, exec_device)
389
+ offloaded_dispatch(model, exec_device)
390
390
  register_offload_module(model, "child", child)
391
391
  register_offload_module(model.linear, "child", child)
392
392
  delete_offload_module(model, "child")
@@ -398,10 +398,10 @@ def test_delete_offload_module(exec_device):
398
398
  @requires_gpu
399
399
  @requires_accelerate()
400
400
  @pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
401
- def test_force_cpu_offload(exec_device):
401
+ def test_offloaded_dispatch(exec_device):
402
402
  # single module
403
403
  module = torch.nn.Linear(1, 2)
404
- module = force_cpu_offload(module, exec_device)
404
+ module = offloaded_dispatch(module, exec_device)
405
405
  assert has_offloaded_params(module)
406
406
  assert module._hf_hook.offload
407
407
  assert module.weight.device == torch.device("meta")
@@ -413,7 +413,7 @@ def test_force_cpu_offload(exec_device):
413
413
 
414
414
  # model
415
415
  model = ExampleModel()
416
- model = force_cpu_offload(model, exec_device)
416
+ model = offloaded_dispatch(model, exec_device)
417
417
  assert not has_offloaded_params(model)
418
418
 
419
419
  assert has_offloaded_params(model.linear)
@@ -424,3 +424,9 @@ def test_force_cpu_offload(exec_device):
424
424
 
425
425
  # can run
426
426
  model(torch.empty(1, device=exec_device))
427
+
428
+ # can add new params
429
+ parameter = torch.nn.Parameter(torch.tensor(1.0))
430
+ register_offload_parameter(module, "new_param", parameter)
431
+ assert module.new_param.device == torch.device("meta")
432
+ assert module._hf_hook.weights_map["new_param"].device == torch.device("cpu")