compressed-tensors 0.10.3a20250707__tar.gz → 0.10.3a20250709__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/actions/test/action.yml +32 -0
  2. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/test.yml +17 -0
  3. {compressed_tensors-0.10.3a20250707/src/compressed_tensors.egg-info → compressed_tensors-0.10.3a20250709}/PKG-INFO +1 -1
  4. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +12 -8
  5. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/quantized_compressors/base.py +6 -7
  6. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +22 -18
  7. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +19 -5
  8. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/lifecycle/apply.py +8 -9
  9. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/lifecycle/initialize.py +1 -1
  10. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/quant_config.py +1 -5
  11. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/utils/helpers.py +12 -36
  12. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/__init__.py +1 -0
  13. compressed_tensors-0.10.3a20250709/src/compressed_tensors/transform/apply.py +32 -0
  14. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/factory/base.py +8 -5
  15. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/__init__.py +1 -0
  16. compressed_tensors-0.10.3a20250709/src/compressed_tensors/utils/internal.py +29 -0
  17. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/version.py +1 -1
  18. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  19. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors.egg-info/SOURCES.txt +2 -0
  20. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/model_compressors/test_model_compressor.py +60 -0
  21. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_apply.py +1 -2
  22. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/factory/test_correctness.py +14 -22
  23. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/factory/test_memory.py +16 -23
  24. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/.gitkeep +0 -0
  25. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/scripts/step-status +0 -0
  26. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/build-test.yml +0 -0
  27. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/build.yml +0 -0
  28. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/report.yml +0 -0
  29. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/test-check.yaml +0 -0
  30. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/trigger-all.yml +0 -0
  31. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/upload.yml +0 -0
  32. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.gitignore +0 -0
  33. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/LICENSE +0 -0
  34. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/Makefile +0 -0
  35. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/README.md +0 -0
  36. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  37. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/bit_packing/int4_config.json +0 -0
  38. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/bitmask_compression.ipynb +0 -0
  39. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  40. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  41. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/llama_1.1b/example_quant_config.json +0 -0
  42. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  43. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/quantize_and_pack_int4.ipynb +0 -0
  44. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/pyproject.toml +0 -0
  45. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/setup.cfg +0 -0
  46. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/setup.py +0 -0
  47. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/__init__.py +0 -0
  48. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/README.md +0 -0
  49. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/__init__.py +0 -0
  50. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/base.py +0 -0
  51. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/__init__.py +0 -0
  52. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/base.py +0 -0
  53. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/helpers.py +0 -0
  54. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  55. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  56. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  57. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
  58. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  59. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  60. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  61. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  62. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  63. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  64. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/config/__init__.py +0 -0
  65. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/config/base.py +0 -0
  66. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/config/dense.py +0 -0
  67. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  68. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  69. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/linear/__init__.py +0 -0
  70. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  71. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/__init__.py +0 -0
  72. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  73. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  74. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  75. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  76. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/quant_args.py +0 -0
  77. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  78. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  79. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/registry/__init__.py +0 -0
  80. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/registry/registry.py +0 -0
  81. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  82. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  83. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  84. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  85. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/transform_args.py +0 -0
  86. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/transform_config.py +0 -0
  87. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  88. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  89. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  90. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  91. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/utils/utils.py +0 -0
  92. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/helpers.py +0 -0
  93. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/offload.py +0 -0
  94. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/permutations_24.py +0 -0
  95. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/permute.py +0 -0
  96. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  97. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  98. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  99. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors.egg-info/requires.txt +0 -0
  100. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  101. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/__init__.py +0 -0
  102. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/conftest.py +0 -0
  103. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/__init__.py +0 -0
  104. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/model_compressors/__init__.py +0 -0
  105. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  106. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  107. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  108. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  109. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  110. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  111. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  112. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  113. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  114. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  115. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_configs/__init__.py +0 -0
  116. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_configs/test_base.py +0 -0
  117. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  118. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_linear/__init__.py +0 -0
  119. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_linear/test_compressed_linear.py +0 -0
  120. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/__init__.py +0 -0
  121. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/__init__.py +0 -0
  122. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/conftest.py +0 -0
  123. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  124. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  125. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  126. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  127. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  128. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  129. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_configs/__init__.py +0 -0
  130. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  131. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  132. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_quant_args.py +0 -0
  133. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_quant_config.py +0 -0
  134. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_quant_scheme.py +0 -0
  135. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  136. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_registry.py +0 -0
  137. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/conftest.py +0 -0
  138. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/test_transform_args.py +0 -0
  139. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/test_transform_config.py +0 -0
  140. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/test_transform_scheme.py +0 -0
  141. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/utils/test_hadamard.py +0 -0
  142. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_utils/__init__.py +0 -0
  143. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_utils/test_helpers.py +0 -0
  144. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_utils/test_offload.py +0 -0
  145. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_utils/test_safetensors_load.py +0 -0
  146. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/testing_utils.py +0 -0
  147. {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/utils/copyright.py +0 -0
@@ -7,6 +7,10 @@ inputs:
7
7
  suitename:
8
8
  description: "test suite name"
9
9
  required: true
10
+ code_coverage:
11
+ description: whether to collect code coverage metrics during test run
12
+ type: boolean
13
+ default: false
10
14
  outputs:
11
15
  status:
12
16
  description: "final status from test"
@@ -44,9 +48,37 @@ runs:
44
48
  run: |
45
49
  source ${{ inputs.venv }}/bin/activate
46
50
  rm -rf src
51
+
52
+ if [[ "${ENABLE_COVERAGE}" == "true" ]]; then
53
+ echo "::group::Installing code coverage requirements via pip"
54
+ pip install bashlex https://github.com/neuralmagic/pytest-nm-releng/archive/v0.4.0.tar.gz
55
+ pip install coverage pytest-cov
56
+
57
+ # Adding Code coverage to the tests
58
+ nmre-generate-coverage-flags --package "compressed_tensors" --output-file ".coverage_flags.sh"
59
+ source .coverage_flags.sh
60
+ echo "::endgroup::"
61
+ fi
62
+
63
+ echo "::group::running tests"
64
+ echo "PYTEST_ADDOPTS set to: ${PYTEST_ADDOPTS}"
65
+
47
66
  SUCCESS=0
48
67
  pytest tests --junitxml=test-results/report.xml -o junit_suite_name="${{ inputs.suitename }}" || SUCCESS=$?
49
68
  echo "status=${SUCCESS}" >> "$GITHUB_OUTPUT"
69
+ echo "::endgroup::"
70
+
71
+ if [[ "${ENABLE_COVERAGE}" == "true" ]]; then
72
+ echo "::group::consolidating coverage reports"
73
+ mkdir -p coverage-results
74
+ mv .coverage coverage-results/ || echo ".coverage file not found"
75
+ mv coverage-html coverage-results/ || echo "coverage-html folder not found"
76
+ mv coverage.json coverage-results/ || echo "coverage.json file not found"
77
+ echo "::endgroup::"
78
+ fi
79
+
50
80
  deactivate
51
81
  exit ${SUCCESS}
52
82
  shell: bash
83
+ env:
84
+ ENABLE_COVERAGE: ${{ inputs.code_coverage || false }}
@@ -25,6 +25,10 @@ on:
25
25
  run_id:
26
26
  description: run id of the BUILD job that generated the assets
27
27
  type: string
28
+ code_coverage:
29
+ description: whether to collect code coverage metrics during test run
30
+ type: boolean
31
+ default: false
28
32
 
29
33
  # makes workflow manually callable
30
34
  workflow_dispatch:
@@ -51,6 +55,10 @@ on:
51
55
  run_id:
52
56
  description: run id of the BUILD job that generated the assets
53
57
  type: string
58
+ code_coverage:
59
+ description: whether to collect code coverage metrics during test run
60
+ type: boolean
61
+ default: false
54
62
 
55
63
  jobs:
56
64
 
@@ -124,6 +132,7 @@ jobs:
124
132
  with:
125
133
  venv: ${{ steps.create_venv.outputs.penv }}
126
134
  suitename: test-${{ inputs.python }}-${{ inputs.test_label }}
135
+ code_coverage: ${{ inputs.code_coverage }}
127
136
 
128
137
  - name: summary
129
138
  uses: neuralmagic/nm-actions/actions/summary-test@v1.13.0
@@ -146,3 +155,11 @@ jobs:
146
155
  name: report-${{ inputs.test_label }}.xml
147
156
  path: test-results/report.xml
148
157
  retention-days: 5
158
+
159
+ - name: upload coverage report
160
+ uses: actions/upload-artifact@v4
161
+ if: (success() || failure()) && inputs.code_coverage
162
+ with:
163
+ name: coverage-results
164
+ path: coverage-results/*
165
+ retention-days: 5
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250707
3
+ Version: 0.10.3a20250709
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -42,10 +42,7 @@ from compressed_tensors.quantization import (
42
42
  load_pretrained_quantization_parameters,
43
43
  )
44
44
  from compressed_tensors.quantization.lifecycle import expand_target_names
45
- from compressed_tensors.quantization.utils import (
46
- is_module_quantized,
47
- iter_named_leaf_modules,
48
- )
45
+ from compressed_tensors.quantization.utils import is_module_quantized
49
46
  from compressed_tensors.utils import (
50
47
  align_module_device,
51
48
  delete_offload_parameter,
@@ -393,9 +390,16 @@ class ModelCompressor:
393
390
  )
394
391
 
395
392
  for prefix, module in tqdm(model.named_modules(), desc="Compressing model"):
393
+
396
394
  if prefix in module_to_scheme or prefix in sparse_compression_targets:
395
+ module_device = get_execution_device(module).type
396
+ is_meta = (module_device == "meta")
397
+
398
+ exec_device = "meta" if is_meta else "cpu"
399
+ onloading_device = "meta" if is_meta else module_device
400
+
397
401
  # in the future, support compression on same device
398
- with align_module_device(module, execution_device="cpu"):
402
+ with align_module_device(module, execution_device=exec_device):
399
403
  state_dict = module.state_dict(prefix=f"{prefix}.")
400
404
 
401
405
  # quantization first
@@ -404,6 +408,7 @@ class ModelCompressor:
404
408
  state_dict,
405
409
  names_to_scheme=module_to_scheme,
406
410
  show_progress=False,
411
+ compression_device=exec_device,
407
412
  )
408
413
 
409
414
  # sparsity second
@@ -415,7 +420,6 @@ class ModelCompressor:
415
420
  )
416
421
 
417
422
  # remove any existing parameters
418
- exec_device = get_execution_device(module)
419
423
  offload_device = get_offloaded_device(module)
420
424
  for name, _ in list(module.named_parameters()):
421
425
  delete_offload_parameter(module, name)
@@ -423,7 +427,7 @@ class ModelCompressor:
423
427
  # replace with compressed parameters
424
428
  for name, value in state_dict.items():
425
429
  name = name.removeprefix(f"{prefix}.")
426
- value = value.to(exec_device)
430
+ value = value.to(onloading_device)
427
431
  param = torch.nn.Parameter(value, requires_grad=False)
428
432
  register_offload_parameter(module, name, param, offload_device)
429
433
 
@@ -747,7 +751,7 @@ def map_module_to_scheme(model: Module) -> Dict[str, QuantizationScheme]:
747
751
  """
748
752
  return {
749
753
  fix_fsdp_module_name(name): module.quantization_scheme
750
- for name, module in iter_named_leaf_modules(model)
754
+ for name, module in model.named_modules()
751
755
  if is_module_quantized(module)
752
756
  }
753
757
 
@@ -72,6 +72,7 @@ class BaseQuantizationCompressor(BaseCompressor):
72
72
  model_state: Dict[str, Tensor],
73
73
  names_to_scheme: Dict[str, QuantizationScheme],
74
74
  show_progress: bool = False,
75
+ compression_device: str = "cpu",
75
76
  **kwargs,
76
77
  ) -> Dict[str, Tensor]:
77
78
  """
@@ -85,7 +86,6 @@ class BaseQuantizationCompressor(BaseCompressor):
85
86
  """
86
87
  uncompressed_names = list(model_state.keys())
87
88
  compressed_dict = {}
88
- save_device = "cpu"
89
89
 
90
90
  # compress values
91
91
  desc = "Compressing with quantization"
@@ -104,10 +104,10 @@ class BaseQuantizationCompressor(BaseCompressor):
104
104
 
105
105
  # is scale does not exist, then weight cannot be compressed
106
106
  if scale is None:
107
- compressed_dict[name] = value.to(save_device)
107
+ compressed_dict[name] = value.to(compression_device)
108
108
  continue
109
109
 
110
- # compress values on cpu (memory movement too expensive)
110
+ # compress values on meta if loading from meta otherwise on cpu (memory movement too expensive)
111
111
  module_path = prefix[:-1] if prefix.endswith(".") else prefix
112
112
  quant_args = names_to_scheme[module_path].weights
113
113
  compressed_values = self.compress_weight(
@@ -117,12 +117,12 @@ class BaseQuantizationCompressor(BaseCompressor):
117
117
  global_scale=global_scale,
118
118
  g_idx=g_idx,
119
119
  quantization_args=quant_args,
120
- device="cpu",
120
+ device=compression_device,
121
121
  )
122
122
 
123
123
  # update state dict
124
124
  for key, value in compressed_values.items():
125
- compressed_dict[prefix + key] = value.to(save_device)
125
+ compressed_dict[prefix + key] = value.to(compression_device)
126
126
 
127
127
  else:
128
128
  # omit saving zero points for symmetric or packed quantization
@@ -133,8 +133,7 @@ class BaseQuantizationCompressor(BaseCompressor):
133
133
  # TODO: does this case actually occur?
134
134
  elif name.endswith("g_idx") and torch.any(value <= -1):
135
135
  continue
136
-
137
- compressed_dict[name] = value.to(save_device)
136
+ compressed_dict[name] = value.to(compression_device)
138
137
 
139
138
  return compressed_dict
140
139
 
@@ -220,30 +220,34 @@ def pack_to_int32(
220
220
  if num_bits < 1:
221
221
  raise ValueError(f"num_bits must be at least 1, got {num_bits}")
222
222
 
223
- # convert to unsigned for packing
223
+ # Convert to unsigned range for packing, matching quantization offset
224
224
  offset = 1 << (num_bits - 1)
225
225
  value = (value + offset).to(torch.uint8)
226
- value = value.cpu().numpy().astype(np.uint32)
226
+ device = value.device
227
+
227
228
  pack_factor = 32 // num_bits
228
229
 
229
- # pad input tensor and initialize packed output
230
- packed_size = math.ceil(value.shape[packed_dim] / pack_factor)
231
- padding = packed_size * pack_factor - value.shape[packed_dim]
232
- value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
230
+ if packed_dim == 0:
231
+ value = value.transpose(0, 1)
233
232
 
234
- # pack values
235
- if packed_dim == 1:
236
- packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
237
- for i in range(pack_factor):
238
- packed |= value[:, i::pack_factor] << num_bits * i
239
- else:
240
- packed = np.zeros((packed_size, value.shape[1]), dtype=np.uint32)
241
- for i in range(pack_factor):
242
- packed |= value[i::pack_factor, :] << num_bits * i
233
+ rows, cols = value.shape
234
+ padded_cols = math.ceil(cols / pack_factor) * pack_factor
235
+ pad_len = padded_cols - cols
236
+
237
+ if pad_len > 0:
238
+ value = torch.nn.functional.pad(value, (0, pad_len))
239
+
240
+ num_groups = padded_cols // pack_factor
241
+
242
+ # Use int32 here
243
+ reshaped = value.view(rows, num_groups, pack_factor).to(torch.int32)
244
+ bit_shifts = torch.arange(pack_factor, device=device, dtype=torch.int32) * num_bits
245
+ packed = (reshaped << bit_shifts).sum(dim=2, dtype=torch.int32)
246
+
247
+ if packed_dim == 0:
248
+ packed = packed.transpose(0, 1)
243
249
 
244
- # convert back to signed and torch
245
- packed = np.ascontiguousarray(packed).view(np.int32)
246
- return torch.from_numpy(packed)
250
+ return packed
247
251
 
248
252
 
249
253
  def unpack_from_int32(
@@ -56,8 +56,10 @@ class Sparse24BitMaskCompressor(BaseSparseCompressor):
56
56
  bitmask_tensor = Sparse24BitMaskTensor.from_dense(
57
57
  value, self.config.sparsity_structure
58
58
  )
59
- bitmask_dict = bitmask_tensor.dict(name_prefix=name, device="cpu")
60
- return bitmask_dict
59
+ return bitmask_tensor.dict(
60
+ name_prefix=name,
61
+ device="meta" if value.is_meta else "cpu",
62
+ )
61
63
 
62
64
  def decompress_weight(self, weight_data):
63
65
  data = Sparse24BitMaskTensor.from_compressed_data(**weight_data)
@@ -90,9 +92,14 @@ class Sparse24BitMaskTensor:
90
92
  :return: instantiated compressed tensor
91
93
  """
92
94
  shape = list(tensor.shape)
93
- compressed, bitmask = sparse24_bitmask_compress(
94
- tensor.cpu(), sparsity_structure=sparsity_structure
95
- )
95
+ if tensor.is_meta:
96
+ compressed, bitmask = sparse24_bitmask_compress(
97
+ tensor, sparsity_structure=sparsity_structure
98
+ )
99
+ else:
100
+ compressed, bitmask = sparse24_bitmask_compress(
101
+ tensor.cpu(), sparsity_structure=sparsity_structure
102
+ )
96
103
  return Sparse24BitMaskTensor(
97
104
  shape=shape,
98
105
  compressed=compressed,
@@ -169,6 +176,13 @@ def sparse24_bitmask_compress(
169
176
  SparsityStructure(sparsity_structure) == SparsityStructure.TWO_FOUR
170
177
  ), "Only 2:4 sparsity is supported"
171
178
 
179
+ if tensor.is_meta:
180
+ num_rows, num_cols = tensor.shape
181
+ compressed_values = torch.empty((num_rows, num_cols // 2), dtype=tensor.dtype, device="meta")
182
+ packed_cols = (num_cols + 7) // 8
183
+ bitmasks_packed = torch.empty((num_rows, packed_cols), dtype=torch.uint8, device="meta")
184
+ return compressed_values, bitmasks_packed
185
+
172
186
  bytemasks = get_24_bytemasks(tensor=tensor)
173
187
 
174
188
  if tensor.dtype == FP8_DTYPE:
@@ -38,8 +38,6 @@ from compressed_tensors.quantization.utils import (
38
38
  KV_CACHE_TARGETS,
39
39
  infer_quantization_status,
40
40
  is_kv_cache_quant_scheme,
41
- iter_named_leaf_modules,
42
- iter_named_quantizable_modules,
43
41
  )
44
42
  from compressed_tensors.utils.helpers import fix_fsdp_module_name, replace_module
45
43
  from compressed_tensors.utils.offload import update_parameter_data
@@ -87,7 +85,7 @@ def load_pretrained_quantization_parameters(
87
85
  model_path = get_safetensors_folder(model_name_or_path)
88
86
  mapping = get_quantization_parameter_to_path_mapping(model_path)
89
87
 
90
- for name, submodule in iter_named_leaf_modules(model):
88
+ for name, submodule in model.named_modules():
91
89
  if not is_module_quantized(submodule):
92
90
  continue
93
91
  if submodule.quantization_scheme.input_activations is not None:
@@ -152,11 +150,7 @@ def apply_quantization_config(
152
150
  # list of submodules to ignore
153
151
  ignored_submodules = defaultdict(list)
154
152
  # mark appropriate layers for quantization by setting their quantization schemes
155
- for name, submodule in iter_named_quantizable_modules(
156
- model,
157
- include_children=True,
158
- include_attn=True,
159
- ): # child modules and attention modules
153
+ for name, submodule in model.named_modules():
160
154
  # potentially fix module name to remove FSDP wrapper prefix
161
155
  name = fix_fsdp_module_name(name)
162
156
  if matches := find_name_or_class_matches(name, submodule, config.ignore):
@@ -287,7 +281,7 @@ def expand_target_names(
287
281
  """
288
282
  return {
289
283
  name
290
- for name, module in iter_named_leaf_modules(model)
284
+ for name, module in model.named_modules()
291
285
  if is_target(name, module, targets, ignore)
292
286
  }
293
287
 
@@ -328,6 +322,11 @@ def find_name_or_class_matches(
328
322
  2. matches on regex patterns
329
323
  3. matches on module names
330
324
  """
325
+ from compressed_tensors import InternalModule
326
+
327
+ if isinstance(module, InternalModule):
328
+ return []
329
+
331
330
  targets = sorted(targets, key=lambda x: ("re:" in x, x))
332
331
  if isinstance(targets, Iterable):
333
332
  matches = _find_matches(name, targets) + _find_matches(
@@ -189,7 +189,7 @@ def _initialize_scale_zero_point(
189
189
  else:
190
190
  # TODO: consider erroring out in the future as if the dtype if not one of these,
191
191
  # there is likely bug
192
- if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32]:
192
+ if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32, torch.float64]:
193
193
  scale_dtype = torch.float16
194
194
  zp_dtype = quantization_args.pytorch_dtype()
195
195
 
@@ -22,9 +22,7 @@ from compressed_tensors.quantization.quant_scheme import (
22
22
  preset_name_to_scheme,
23
23
  )
24
24
  from compressed_tensors.quantization.utils import (
25
- calculate_compression_ratio,
26
25
  is_module_quantized,
27
- iter_named_quantizable_modules,
28
26
  module_type,
29
27
  parse_out_kv_cache_args,
30
28
  )
@@ -177,9 +175,7 @@ class QuantizationConfig(BaseModel):
177
175
  quantization_status = None
178
176
  ignore = {}
179
177
  quantization_type_names = set()
180
- for name, submodule in iter_named_quantizable_modules(
181
- model, include_children=True, include_attn=True
182
- ):
178
+ for name, submodule in model.named_modules():
183
179
  layer_type = module_type(submodule)
184
180
  if not is_module_quantized(submodule):
185
181
  if layer_type not in ignore:
@@ -26,6 +26,7 @@ from compressed_tensors.quantization.quant_args import (
26
26
  QuantizationType,
27
27
  )
28
28
  from compressed_tensors.quantization.quant_scheme import QuantizationScheme
29
+ from compressed_tensors.utils import deprecated
29
30
  from torch import FloatTensor, IntTensor, Tensor
30
31
  from torch.nn import Module
31
32
  from tqdm import tqdm
@@ -36,7 +37,6 @@ __all__ = [
36
37
  "is_module_quantized",
37
38
  "is_model_quantized",
38
39
  "module_type",
39
- "calculate_compression_ratio",
40
40
  "get_torch_bit_depth",
41
41
  "can_quantize",
42
42
  "parse_out_kv_cache_args",
@@ -276,12 +276,7 @@ def is_model_quantized(model: Module) -> bool:
276
276
  :param model: pytorch model
277
277
  :return: True if model is quantized, False otherwise
278
278
  """
279
-
280
- for _, submodule in iter_named_leaf_modules(model):
281
- if is_module_quantized(submodule):
282
- return True
283
-
284
- return False
279
+ return any(is_module_quantized(submodule) for submodule in model.modules())
285
280
 
286
281
 
287
282
  def module_type(module: Module) -> str:
@@ -294,6 +289,11 @@ def module_type(module: Module) -> str:
294
289
  return type(module).__name__
295
290
 
296
291
 
292
+ @deprecated(
293
+ message="This function will be removed in a future release. "
294
+ "Please use `model.named_modules()` and filter by "
295
+ "compressed_tensors.InternalModule if neceessary"
296
+ )
297
297
  def iter_named_leaf_modules(model: Module) -> Generator[Tuple[str, Module], None, None]:
298
298
  """
299
299
  Yields modules that do not have any submodules except observers. The observers
@@ -320,6 +320,11 @@ def iter_named_leaf_modules(model: Module) -> Generator[Tuple[str, Module], None
320
320
  yield name, submodule
321
321
 
322
322
 
323
+ @deprecated(
324
+ message="This function will be removed in a future release. "
325
+ "Please use `model.named_modules()` and filter by "
326
+ "compressed_tensors.InternalModule if neceessary"
327
+ )
323
328
  def iter_named_quantizable_modules(
324
329
  model: Module,
325
330
  include_children: bool = True,
@@ -330,7 +335,6 @@ def iter_named_quantizable_modules(
330
335
  Yield name and submodule of
331
336
  - leaf modules, set by include_children
332
337
  - attention modyles, set by include_attn
333
-
334
338
  :param model: model to get leaf modules of
335
339
  :param include_children: flag to get the leaf modules
336
340
  :param inlcude_attn: flag to get the attention modules
@@ -397,34 +401,6 @@ def can_quantize(value: torch.Tensor, quant_args: "QuantizationArgs") -> bool:
397
401
  return bit_depth > quant_args.num_bits
398
402
 
399
403
 
400
- def calculate_compression_ratio(model: Module) -> float:
401
- """
402
- Calculates the quantization compression ratio of a pytorch model, based on the
403
- number of bits needed to represent the total weights in compressed form. Does not
404
- take into account activation quantizatons.
405
-
406
- :param model: pytorch module to calculate compression ratio for
407
- :return: compression ratio of the whole model
408
- """
409
- total_compressed = 0.0
410
- total_uncompressed = 0.0
411
- for name, submodule in tqdm(
412
- iter_named_leaf_modules(model),
413
- desc="Calculating quantization compression ratio",
414
- ):
415
- for parameter in model.parameters():
416
- uncompressed_bits = get_torch_bit_depth(parameter)
417
- compressed_bits = uncompressed_bits
418
- if is_module_quantized(submodule) and submodule.quantization_scheme.weights:
419
- compressed_bits = submodule.quantization_scheme.weights.num_bits
420
-
421
- num_weights = parameter.numel()
422
- total_compressed += compressed_bits * num_weights
423
- total_uncompressed += uncompressed_bits * num_weights
424
-
425
- return total_uncompressed / total_compressed
426
-
427
-
428
404
  def is_kv_cache_quant_scheme(scheme: QuantizationScheme) -> bool:
429
405
  """
430
406
  Check whether the QuantizationScheme targets the kv cache.
@@ -23,3 +23,4 @@ from .factory.base import *
23
23
  from .factory.hadamard import *
24
24
  from .factory.matrix_multiply import *
25
25
  from .factory.random_hadamard import *
26
+ from .apply import *
@@ -0,0 +1,32 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import torch
16
+ from compressed_tensors.transform import TransformConfig, TransformFactory
17
+
18
+
19
+ __all__ = ["apply_transform_config"]
20
+
21
+
22
+ def apply_transform_config(model: torch.nn.Module, config: TransformConfig):
23
+ """
24
+ Apply a transform config to a model. Weight transforms are fused into weights, while
25
+ activation transforms are attached as submodules and trigger via pytorch hooks
26
+
27
+ :param model: model to apply config to
28
+ :param config: transform config to apply
29
+ """
30
+ for name, scheme in config.config_groups.items():
31
+ factory = TransformFactory.from_scheme(scheme, name=name)
32
+ factory.apply_to_model(model)
@@ -17,6 +17,7 @@ from typing import Optional
17
17
 
18
18
  import torch
19
19
  import torch.nn.utils.parametrize as P
20
+ from compressed_tensors import InternalModule
20
21
  from compressed_tensors.quantization.lifecycle import is_target # TODO: move to utils
21
22
  from compressed_tensors.registry.registry import RegistryMixin, T
22
23
  from compressed_tensors.transform import (
@@ -26,6 +27,7 @@ from compressed_tensors.transform import (
26
27
  )
27
28
  from compressed_tensors.utils import (
28
29
  align_module_device,
30
+ delete_offload_module,
29
31
  has_offloaded_params,
30
32
  patch_attr,
31
33
  register_offload_module,
@@ -99,7 +101,7 @@ class TransformFactory(RegistryMixin, ABC):
99
101
  # create transform as submodule
100
102
  transform_name = f"{self.name}_{args.location.value}"
101
103
  transform = self.create_transform(module, args)
102
- register_offload_module(module, transform_name, transform) # (1)
104
+ register_offload_module(module, transform_name, transform)
103
105
 
104
106
  # register input transformation hook
105
107
  if args.location == TransformLocation.INPUT:
@@ -118,6 +120,7 @@ class TransformFactory(RegistryMixin, ABC):
118
120
  assert isinstance(module, torch.nn.Linear)
119
121
  assert module.bias is None
120
122
 
123
+ # fuse transform into weight
121
124
  with torch.no_grad(), align_module_device(module):
122
125
  update_offload_parameter(module, "weight", transform(module.weight))
123
126
 
@@ -128,6 +131,9 @@ class TransformFactory(RegistryMixin, ABC):
128
131
  raise ValueError("Offloaded training is not supported")
129
132
  P.register_parametrization(module, "weight", transform)
130
133
 
134
+ # transform is no longer needed (unfusing is not supported)
135
+ delete_offload_module(module, transform_name)
136
+
131
137
  # register output transformation hook
132
138
  elif args.location == TransformLocation.OUTPUT:
133
139
 
@@ -140,11 +146,8 @@ class TransformFactory(RegistryMixin, ABC):
140
146
  else:
141
147
  raise NotImplementedError()
142
148
 
143
- # (1) even in the `weight` cases, this submodule attachment is needed in order
144
- # to support saving in the frozen state
145
-
146
149
 
147
- class TransformBase(Module, ABC):
150
+ class TransformBase(InternalModule, ABC):
148
151
  """
149
152
  Represents the application of a transform accord to TransformArgs
150
153
  """
@@ -14,6 +14,7 @@
14
14
  # flake8: noqa
15
15
 
16
16
  from .helpers import *
17
+ from .internal import *
17
18
  from .offload import *
18
19
  from .permutations_24 import *
19
20
  from .permute import *
@@ -0,0 +1,29 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import torch
16
+
17
+
18
+ __all__ = ["InternalModule"]
19
+
20
+
21
+ class InternalModule(torch.nn.Module):
22
+ """
23
+ Abstract base class for modules which are not a part of the the model definition.
24
+ `torch.nn.Module`s which inherit from this class will not be targeted by configs
25
+
26
+ This is typically used to skip apply configs to `Observers` and `Transforms`
27
+ """
28
+
29
+ pass
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.3.a20250707'
20
+ __version__ = version = '0.10.3.a20250709'
21
21
  __version_tuple__ = version_tuple = (0, 10, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250707
3
+ Version: 0.10.3a20250709
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -72,6 +72,7 @@ src/compressed_tensors/quantization/utils/helpers.py
72
72
  src/compressed_tensors/registry/__init__.py
73
73
  src/compressed_tensors/registry/registry.py
74
74
  src/compressed_tensors/transform/__init__.py
75
+ src/compressed_tensors/transform/apply.py
75
76
  src/compressed_tensors/transform/transform_args.py
76
77
  src/compressed_tensors/transform/transform_config.py
77
78
  src/compressed_tensors/transform/transform_scheme.py
@@ -86,6 +87,7 @@ src/compressed_tensors/transform/utils/hadamards.safetensors
86
87
  src/compressed_tensors/transform/utils/utils.py
87
88
  src/compressed_tensors/utils/__init__.py
88
89
  src/compressed_tensors/utils/helpers.py
90
+ src/compressed_tensors/utils/internal.py
89
91
  src/compressed_tensors/utils/offload.py
90
92
  src/compressed_tensors/utils/permutations_24.py
91
93
  src/compressed_tensors/utils/permute.py