compressed-tensors 0.12.3a20251009__tar.gz → 0.12.3a20251013__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/test.yml +1 -1
  2. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/trigger-all.yml +1 -1
  3. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/PKG-INFO +1 -1
  4. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/base.py +0 -3
  5. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/lifecycle/apply.py +2 -28
  6. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/lifecycle/initialize.py +1 -1
  7. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/quant_args.py +7 -0
  8. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/version.py +1 -1
  9. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors.egg-info/PKG-INFO +1 -1
  10. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/conftest.py +0 -21
  11. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_initialize.py +0 -7
  12. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_configs/test_strategies.py +0 -31
  13. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_utils/test_helpers.py +0 -1
  14. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/.gitkeep +0 -0
  15. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/actions/test/action.yml +0 -0
  16. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/scripts/step-status +0 -0
  17. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/build-test.yml +0 -0
  18. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/build.yml +0 -0
  19. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/post-release-nightly-build.yml +0 -0
  20. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/quality-check.yaml +0 -0
  21. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/report.yml +0 -0
  22. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/test-check.yaml +0 -0
  23. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/upload.yml +0 -0
  24. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.gitignore +0 -0
  25. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/LICENSE +0 -0
  26. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/Makefile +0 -0
  27. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/README.md +0 -0
  28. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  29. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/bit_packing/int4_config.json +0 -0
  30. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/bitmask_compression.ipynb +0 -0
  31. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  32. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  33. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/llama_1.1b/example_quant_config.json +0 -0
  34. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  35. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/quantize_and_pack_int4.ipynb +0 -0
  36. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/pyproject.toml +0 -0
  37. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/setup.cfg +0 -0
  38. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/setup.py +0 -0
  39. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/__init__.py +0 -0
  40. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/README.md +0 -0
  41. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/__init__.py +0 -0
  42. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/__init__.py +0 -0
  43. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/base.py +0 -0
  44. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/helpers.py +0 -0
  45. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  46. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  47. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  48. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  49. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  50. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
  51. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  52. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  53. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  54. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  55. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  56. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  57. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  58. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  59. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/config/__init__.py +0 -0
  60. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/config/base.py +0 -0
  61. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/config/dense.py +0 -0
  62. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/config/format.py +0 -0
  63. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  64. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  65. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/linear/__init__.py +0 -0
  66. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  67. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/logger.py +0 -0
  68. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/__init__.py +0 -0
  69. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  70. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  71. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  72. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  73. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/quant_config.py +0 -0
  74. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
  75. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  76. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  77. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  78. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/registry/__init__.py +0 -0
  79. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/registry/registry.py +0 -0
  80. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/__init__.py +0 -0
  81. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/apply.py +0 -0
  82. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  83. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/factory/base.py +0 -0
  84. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  85. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  86. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  87. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/transform_args.py +0 -0
  88. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/transform_config.py +0 -0
  89. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  90. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  91. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  92. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  93. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  94. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/__init__.py +0 -0
  95. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/helpers.py +0 -0
  96. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/internal.py +0 -0
  97. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/match.py +0 -0
  98. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/offload.py +0 -0
  99. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/permutations_24.py +0 -0
  100. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  101. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  102. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/type.py +0 -0
  103. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  104. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  105. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors.egg-info/requires.txt +0 -0
  106. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  107. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/__init__.py +0 -0
  108. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/mock_observer.py +0 -0
  109. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/__init__.py +0 -0
  110. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/model_compressors/__init__.py +0 -0
  111. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  112. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  113. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  114. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  115. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  116. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  117. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  118. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  119. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  120. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  121. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  122. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_configs/__init__.py +0 -0
  123. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_configs/test_base.py +0 -0
  124. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_configs/test_infer_quant.py +0 -0
  125. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  126. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_linear/__init__.py +0 -0
  127. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_linear/test_compressed_linear.py +0 -0
  128. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/__init__.py +0 -0
  129. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/__init__.py +0 -0
  130. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/conftest.py +0 -0
  131. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  132. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  133. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  134. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  135. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  136. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
  137. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_configs/__init__.py +0 -0
  138. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  139. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_quant_args.py +0 -0
  140. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_quant_config.py +0 -0
  141. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_quant_scheme.py +0 -0
  142. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_registry.py +0 -0
  143. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/conftest.py +0 -0
  144. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/factory/test_correctness.py +0 -0
  145. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/factory/test_memory.py +0 -0
  146. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/factory/test_serialization.py +0 -0
  147. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/test_transform_args.py +0 -0
  148. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/test_transform_config.py +0 -0
  149. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/test_transform_scheme.py +0 -0
  150. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/utils/test_hadamard.py +0 -0
  151. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_utils/__init__.py +0 -0
  152. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_utils/test_helpers.py +0 -0
  153. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_utils/test_match.py +0 -0
  154. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_utils/test_offload.py +0 -0
  155. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_utils/test_safetensors_load.py +0 -0
  156. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_utils/test_type.py +0 -0
  157. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/testing_utils.py +0 -0
  158. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/utils/copyright.py +0 -0
@@ -82,7 +82,7 @@ jobs:
82
82
 
83
83
  - name: set python
84
84
  id: set_python
85
- uses: actions/setup-python@v5
85
+ uses: actions/setup-python@v6
86
86
  with:
87
87
  python-version: ${{ inputs.python }}
88
88
 
@@ -49,6 +49,6 @@ jobs:
49
49
  push_to_pypi: ${{ (github.event.schedule == '30 0 * * *') || inputs.push_to_pypi || false }}
50
50
  test_configs: '[{"python":"3.11.4","label":"k8s-util","timeout":"40","code_coverage":true},
51
51
  {"python":"3.10.12","label":"k8s-util","timeout":"40"},
52
- {"python":"3.9.17","label":"k8s-h100-solo","timeout":"40"},
52
+ {"python":"3.13","label":"k8s-h100-solo","timeout":"40"},
53
53
  {"python":"3.12.6","label":"k8s-a100-duo","timeout":"40"}]'
54
54
  secrets: inherit
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251009
3
+ Version: 0.12.3a20251013
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -20,6 +20,3 @@ TRANSFORM_CONFIG_NAME = "transform_config"
20
20
  # required fields
21
21
  COMPRESSION_VERSION_NAME = "version"
22
22
  QUANTIZATION_METHOD_NAME = "quant_method"
23
-
24
- # auxillary configs
25
- KV_CACHE_SCHEME_NAME = "kv_cache_scheme"
@@ -15,7 +15,7 @@
15
15
  import logging
16
16
  from collections import OrderedDict
17
17
  from copy import deepcopy
18
- from typing import Dict, Iterable, List, Optional
18
+ from typing import Dict, List, Optional
19
19
  from typing import OrderedDict as OrderedDictType
20
20
  from typing import Union
21
21
 
@@ -34,7 +34,7 @@ from compressed_tensors.quantization.utils import (
34
34
  KV_CACHE_TARGETS,
35
35
  is_kv_cache_quant_scheme,
36
36
  )
37
- from compressed_tensors.utils.helpers import deprecated, replace_module
37
+ from compressed_tensors.utils.helpers import replace_module
38
38
  from compressed_tensors.utils.match import match_named_modules, match_targets
39
39
  from compressed_tensors.utils.offload import update_parameter_data
40
40
  from compressed_tensors.utils.safetensors_load import get_safetensors_folder
@@ -45,7 +45,6 @@ from torch.nn import Module
45
45
  __all__ = [
46
46
  "load_pretrained_quantization_parameters",
47
47
  "apply_quantization_config",
48
- "find_name_or_class_matches",
49
48
  ]
50
49
 
51
50
  from compressed_tensors.quantization.utils.helpers import is_module_quantized
@@ -208,31 +207,6 @@ def process_kv_cache_config(
208
207
  return config
209
208
 
210
209
 
211
- @deprecated(
212
- message="This function is deprecated and will be removed in a future release."
213
- "Please use `match_targets` from `compressed_tensors.utils.match` instead."
214
- )
215
- def find_name_or_class_matches(
216
- name: str, module: Module, targets: Iterable[str], check_contains: bool = False
217
- ) -> List[str]:
218
- """
219
- Returns all targets that match the given name or the class name.
220
- Returns empty list otherwise.
221
- The order of the output `matches` list matters.
222
- The entries are sorted in the following order:
223
- 1. matches on exact strings
224
- 2. matches on regex patterns
225
- 3. matches on module names
226
- """
227
- if check_contains:
228
- raise NotImplementedError(
229
- "This function is deprecated, and the check_contains=True option has been"
230
- " removed."
231
- )
232
-
233
- return match_targets(name, module, targets)
234
-
235
-
236
210
  def _load_quant_args_from_mapping(
237
211
  base_name: str, module_name: str, module: Module, mapping: Dict
238
212
  ):
@@ -199,7 +199,7 @@ def initialize_qparams(
199
199
  expected_shape = (1,)
200
200
 
201
201
  elif strategy == QuantizationStrategy.TOKEN:
202
- expected_shape = (1, 1)
202
+ raise ValueError("Cannot perform static token quantization")
203
203
 
204
204
  elif strategy == QuantizationStrategy.CHANNEL:
205
205
  if len(observed_shape) < 2:
@@ -264,6 +264,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
264
264
  actorder = model.actorder
265
265
  dynamic = model.dynamic
266
266
  observer = model.observer
267
+ dynamic = model.dynamic
267
268
 
268
269
  # infer strategy
269
270
  if strategy is None:
@@ -279,6 +280,12 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
279
280
  "strategy='group' and group_size = -1 for 'channel'"
280
281
  )
281
282
 
283
+ # validate token strategy
284
+ if strategy == QuantizationStrategy.TOKEN and not dynamic:
285
+ raise ValueError(
286
+ "Cannot perform static token quantization, please use `dynamic=True`"
287
+ )
288
+
282
289
  # validate group strategy
283
290
  if strategy == QuantizationStrategy.GROUP:
284
291
  if group_size is None or group_size <= 0:
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.12.3.a20251009'
20
+ __version__ = version = '0.12.3.a20251013'
21
21
  __version_tuple__ = version_tuple = (0, 12, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251009
3
+ Version: 0.12.3a20251013
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -29,27 +29,6 @@ def _get_dim(dim: int, value: torch.Tensor):
29
29
  return reduce_dims
30
30
 
31
31
 
32
- @pytest.fixture
33
- def mock_per_token_calibration():
34
- def update_scale_zp(module: torch.nn.Module, base_name: str, value: torch.Tensor):
35
- quantization_scheme = getattr(module, "quantization_scheme", None)
36
- if not quantization_scheme:
37
- # no quantization scheme nothing to do
38
- return
39
-
40
- arg_name = "weights" if base_name == "weight" else f"{base_name}_activations"
41
- args = getattr(quantization_scheme, arg_name, None)
42
-
43
- dim = _get_dim({0, 1}, value)
44
- min_val = torch.amin(value, dim=dim, keepdims=True)
45
- max_val = torch.amax(value, dim=dim, keepdims=True)
46
- scale, zp = calculate_qparams(min_val, max_val, args)
47
- update_parameter_data(module, scale, f"{base_name}_scale")
48
- update_parameter_data(module, zp, f"{base_name}_zero_point")
49
-
50
- return update_scale_zp
51
-
52
-
53
32
  @pytest.fixture
54
33
  def mock_per_group_calibration():
55
34
  def update_scale_zp(
@@ -176,10 +176,6 @@ def test_initialize_module_for_quantization_offloaded(
176
176
  QuantizationArgs(strategy="block", block_structure=[2, 4]),
177
177
  None,
178
178
  ),
179
- (
180
- QuantizationArgs(strategy="token"),
181
- QuantizationArgs(strategy="token"),
182
- ),
183
179
  ],
184
180
  )
185
181
  def test_initialize_quantization_parameters(weights, input_activations):
@@ -238,9 +234,6 @@ def test_initialize_quantization_parameters(weights, input_activations):
238
234
  # For activations or when block_structure is None
239
235
  expected_shape = (1,)
240
236
 
241
- elif args.strategy == QuantizationStrategy.TOKEN:
242
- expected_shape = (1, 1)
243
-
244
237
  if not args.dynamic:
245
238
  assert getattr(layer, f"{q_param_name}_scale").shape == expected_shape
246
239
  assert getattr(layer, f"{q_param_name}_zero_point").shape == expected_shape
@@ -105,34 +105,3 @@ def test_group(
105
105
  model_shape[1],
106
106
  int(model_shape[0] / group_size),
107
107
  )
108
-
109
-
110
- @torch.no_grad
111
- @pytest.mark.parametrize("input_symmetry", [True, False])
112
- @pytest.mark.parametrize("weight_symmetry", [True, False])
113
- @pytest.mark.parametrize("input_shape", [(32, 256), (300, 200), (400, 400)])
114
- def test_token(
115
- mock_per_channel_calibration,
116
- mock_per_token_calibration,
117
- input_symmetry,
118
- weight_symmetry,
119
- input_shape,
120
- ):
121
- model = Linear(input_shape[1], 256)
122
- quant_config = create_config(
123
- input_symmetry,
124
- weight_symmetry,
125
- w_strategy=QuantizationStrategy.CHANNEL,
126
- i_strategy=QuantizationStrategy.TOKEN,
127
- )
128
- apply_quantization_config(model, quant_config)
129
-
130
- inputs = torch.randn(input_shape)
131
- mock_per_channel_calibration(model, base_name="weight", value=model.weight)
132
- mock_per_token_calibration(model, base_name="input", value=inputs)
133
-
134
- assert model.input_scale.shape == (1, 1)
135
- assert model.input_zero_point.shape == (1, 1)
136
-
137
- assert model.weight_scale.shape == (256, 1)
138
- assert model.weight_zero_point.shape == (256, 1)
@@ -50,7 +50,6 @@ from compressed_tensors.quantization.utils import (
50
50
  ]
51
51
  ),
52
52
  ),
53
- (True, "token", torch.Size([1, 1])),
54
53
  ],
55
54
  )
56
55
  def test_calculate_qparams(keepdims, strategy, exp_shape):